clearbit 0.2.7 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/CODEOWNERS +3 -0
- data/LICENSE +21 -0
- data/README.md +87 -3
- data/clearbit.gemspec +2 -0
- data/examples/name_domain.rb +4 -0
- data/examples/prospector.rb +5 -4
- data/examples/reveal.rb +11 -0
- data/examples/risk.rb +13 -0
- data/examples/risk_flag.rb +35 -0
- data/lib/clearbit.rb +4 -1
- data/lib/clearbit/analytics.rb +62 -0
- data/lib/clearbit/analytics/LICENSE +10 -0
- data/lib/clearbit/analytics/README.md +114 -0
- data/lib/clearbit/analytics/backoff_policy.rb +49 -0
- data/lib/clearbit/analytics/client.rb +189 -0
- data/lib/clearbit/analytics/defaults.rb +36 -0
- data/lib/clearbit/analytics/field_parser.rb +192 -0
- data/lib/clearbit/analytics/logging.rb +60 -0
- data/lib/clearbit/analytics/message_batch.rb +72 -0
- data/lib/clearbit/analytics/request.rb +134 -0
- data/lib/clearbit/analytics/response.rb +15 -0
- data/lib/clearbit/analytics/utils.rb +91 -0
- data/lib/clearbit/analytics/worker.rb +66 -0
- data/lib/clearbit/audiences.rb +14 -0
- data/lib/clearbit/enrichment.rb +1 -0
- data/lib/clearbit/enrichment/news.rb +18 -0
- data/lib/clearbit/name_domain.rb +17 -0
- data/lib/clearbit/risk.rb +1 -1
- data/lib/clearbit/version.rb +1 -1
- data/lib/clearbit/webhook.rb +3 -1
- data/spec/lib/clearbit/analytics_spec.rb +66 -0
- data/spec/lib/clearbit/prospector_spec.rb +12 -2
- data/spec/support/helpers.rb +3 -1
- metadata +27 -4
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
module Clearbit
|
4
|
+
class Analytics
|
5
|
+
# Wraps an existing logger and adds a prefix to all messages
|
6
|
+
class PrefixedLogger
|
7
|
+
def initialize(logger, prefix)
|
8
|
+
@logger = logger
|
9
|
+
@prefix = prefix
|
10
|
+
end
|
11
|
+
|
12
|
+
def debug(msg)
|
13
|
+
@logger.debug("#{@prefix} #{msg}")
|
14
|
+
end
|
15
|
+
|
16
|
+
def info(msg)
|
17
|
+
@logger.info("#{@prefix} #{msg}")
|
18
|
+
end
|
19
|
+
|
20
|
+
def warn(msg)
|
21
|
+
@logger.warn("#{@prefix} #{msg}")
|
22
|
+
end
|
23
|
+
|
24
|
+
def error(msg)
|
25
|
+
@logger.error("#{@prefix} #{msg}")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
module Logging
|
30
|
+
class << self
|
31
|
+
def logger
|
32
|
+
return @logger if @logger
|
33
|
+
|
34
|
+
base_logger = if defined?(Rails)
|
35
|
+
Rails.logger
|
36
|
+
else
|
37
|
+
logger = Logger.new STDOUT
|
38
|
+
logger.progname = 'Clearbit::Analytics'
|
39
|
+
logger
|
40
|
+
end
|
41
|
+
@logger = PrefixedLogger.new(base_logger, '[clearbit-ruby]')
|
42
|
+
end
|
43
|
+
|
44
|
+
attr_writer :logger
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.included(base)
|
48
|
+
class << base
|
49
|
+
def logger
|
50
|
+
Logging.logger
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def logger
|
56
|
+
Logging.logger
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
require 'clearbit/analytics/logging'
|
3
|
+
|
4
|
+
module Clearbit
|
5
|
+
class Analytics
|
6
|
+
# A batch of `Message`s to be sent to the API
|
7
|
+
class MessageBatch
|
8
|
+
class JSONGenerationError < StandardError; end
|
9
|
+
|
10
|
+
extend Forwardable
|
11
|
+
include Clearbit::Analytics::Logging
|
12
|
+
include Clearbit::Analytics::Defaults::MessageBatch
|
13
|
+
|
14
|
+
def initialize(max_message_count)
|
15
|
+
@messages = []
|
16
|
+
@max_message_count = max_message_count
|
17
|
+
@json_size = 0
|
18
|
+
end
|
19
|
+
|
20
|
+
def <<(message)
|
21
|
+
begin
|
22
|
+
message_json = message.to_json
|
23
|
+
rescue StandardError => e
|
24
|
+
raise JSONGenerationError, "Serialization error: #{e}"
|
25
|
+
end
|
26
|
+
|
27
|
+
message_json_size = message_json.bytesize
|
28
|
+
if message_too_big?(message_json_size)
|
29
|
+
logger.error('a message exceeded the maximum allowed size')
|
30
|
+
else
|
31
|
+
@messages << message
|
32
|
+
@json_size += message_json_size + 1 # One byte for the comma
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def full?
|
37
|
+
item_count_exhausted? || size_exhausted?
|
38
|
+
end
|
39
|
+
|
40
|
+
def clear
|
41
|
+
@messages.clear
|
42
|
+
@json_size = 0
|
43
|
+
end
|
44
|
+
|
45
|
+
def_delegators :@messages, :to_json
|
46
|
+
def_delegators :@messages, :empty?
|
47
|
+
def_delegators :@messages, :length
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def item_count_exhausted?
|
52
|
+
@messages.length >= @max_message_count
|
53
|
+
end
|
54
|
+
|
55
|
+
def message_too_big?(message_json_size)
|
56
|
+
message_json_size > Defaults::Message::MAX_BYTES
|
57
|
+
end
|
58
|
+
|
59
|
+
# We consider the max size here as just enough to leave room for one more
|
60
|
+
# message of the largest size possible. This is a shortcut that allows us
|
61
|
+
# to use a native Ruby `Queue` that doesn't allow peeking. The tradeoff
|
62
|
+
# here is that we might fit in less messages than possible into a batch.
|
63
|
+
#
|
64
|
+
# The alternative is to use our own `Queue` implementation that allows
|
65
|
+
# peeking, and to consider the next message size when calculating whether
|
66
|
+
# the message can be accomodated in this batch.
|
67
|
+
def size_exhausted?
|
68
|
+
@json_size >= (MAX_BYTES - Defaults::Message::MAX_BYTES)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
require 'clearbit/analytics/defaults'
|
2
|
+
require 'clearbit/analytics/utils'
|
3
|
+
require 'clearbit/analytics/response'
|
4
|
+
require 'clearbit/analytics/logging'
|
5
|
+
require 'clearbit/analytics/backoff_policy'
|
6
|
+
require 'net/http'
|
7
|
+
require 'net/https'
|
8
|
+
require 'json'
|
9
|
+
|
10
|
+
module Clearbit
|
11
|
+
class Analytics
|
12
|
+
class Request
|
13
|
+
include Clearbit::Analytics::Defaults::Request
|
14
|
+
include Clearbit::Analytics::Utils
|
15
|
+
include Clearbit::Analytics::Logging
|
16
|
+
|
17
|
+
# public: Creates a new request object to send analytics batch
|
18
|
+
#
|
19
|
+
def initialize(options = {})
|
20
|
+
options[:host] ||= HOST
|
21
|
+
options[:port] ||= PORT
|
22
|
+
options[:ssl] ||= SSL
|
23
|
+
@headers = options[:headers] || HEADERS
|
24
|
+
@path = options[:path] || PATH
|
25
|
+
@retries = options[:retries] || RETRIES
|
26
|
+
@backoff_policy =
|
27
|
+
options[:backoff_policy] || Clearbit::Analytics::BackoffPolicy.new
|
28
|
+
|
29
|
+
http = Net::HTTP.new(options[:host], options[:port])
|
30
|
+
http.use_ssl = options[:ssl]
|
31
|
+
http.read_timeout = 8
|
32
|
+
http.open_timeout = 4
|
33
|
+
|
34
|
+
@http = http
|
35
|
+
end
|
36
|
+
|
37
|
+
# public: Posts the write key and batch of messages to the API.
|
38
|
+
#
|
39
|
+
# returns - Response of the status and error if it exists
|
40
|
+
def post(write_key, batch)
|
41
|
+
logger.debug("Sending request for #{batch.length} items")
|
42
|
+
|
43
|
+
last_response, exception = retry_with_backoff(@retries) do
|
44
|
+
status_code, body = send_request(write_key, batch)
|
45
|
+
error = JSON.parse(body)['error']
|
46
|
+
should_retry = should_retry_request?(status_code, body)
|
47
|
+
logger.debug("Response status code: #{status_code}")
|
48
|
+
logger.debug("Response error: #{error}") if error
|
49
|
+
|
50
|
+
[Response.new(status_code, error), should_retry]
|
51
|
+
end
|
52
|
+
|
53
|
+
if exception
|
54
|
+
logger.error(exception.message)
|
55
|
+
exception.backtrace.each { |line| logger.error(line) }
|
56
|
+
Response.new(-1, exception.to_s)
|
57
|
+
else
|
58
|
+
last_response
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def should_retry_request?(status_code, body)
|
65
|
+
if status_code >= 500
|
66
|
+
true # Server error
|
67
|
+
elsif status_code == 429
|
68
|
+
true # Rate limited
|
69
|
+
elsif status_code >= 400
|
70
|
+
logger.error(body)
|
71
|
+
false # Client error. Do not retry, but log
|
72
|
+
else
|
73
|
+
false
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Takes a block that returns [result, should_retry].
|
78
|
+
#
|
79
|
+
# Retries upto `retries_remaining` times, if `should_retry` is false or
|
80
|
+
# an exception is raised. `@backoff_policy` is used to determine the
|
81
|
+
# duration to sleep between attempts
|
82
|
+
#
|
83
|
+
# Returns [last_result, raised_exception]
|
84
|
+
def retry_with_backoff(retries_remaining, &block)
|
85
|
+
result, caught_exception = nil
|
86
|
+
should_retry = false
|
87
|
+
|
88
|
+
begin
|
89
|
+
result, should_retry = yield
|
90
|
+
return [result, nil] unless should_retry
|
91
|
+
rescue StandardError => e
|
92
|
+
should_retry = true
|
93
|
+
caught_exception = e
|
94
|
+
end
|
95
|
+
|
96
|
+
if should_retry && (retries_remaining > 1)
|
97
|
+
logger.debug("Retrying request, #{retries_remaining} retries left")
|
98
|
+
sleep(@backoff_policy.next_interval.to_f / 1000)
|
99
|
+
retry_with_backoff(retries_remaining - 1, &block)
|
100
|
+
else
|
101
|
+
[result, caught_exception]
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Sends a request for the batch, returns [status_code, body]
|
106
|
+
def send_request(write_key, batch)
|
107
|
+
payload = JSON.generate(
|
108
|
+
:sentAt => datetime_in_iso8601(Time.now),
|
109
|
+
:batch => batch
|
110
|
+
)
|
111
|
+
request = Net::HTTP::Post.new(@path, @headers)
|
112
|
+
request.basic_auth(write_key, nil)
|
113
|
+
|
114
|
+
if self.class.stub
|
115
|
+
logger.debug "stubbed request to #{@path}: " \
|
116
|
+
"write key = #{write_key}, batch = JSON.generate(#{batch})"
|
117
|
+
|
118
|
+
[200, '{}']
|
119
|
+
else
|
120
|
+
response = @http.request(request, payload)
|
121
|
+
[response.code.to_i, response.body]
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
class << self
|
126
|
+
attr_writer :stub
|
127
|
+
|
128
|
+
def stub
|
129
|
+
@stub || ENV['STUB']
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Clearbit
|
2
|
+
class Analytics
|
3
|
+
class Response
|
4
|
+
attr_reader :status, :error
|
5
|
+
|
6
|
+
# public: Simple class to wrap responses from the API
|
7
|
+
#
|
8
|
+
#
|
9
|
+
def initialize(status = 200, error = nil)
|
10
|
+
@status = status
|
11
|
+
@error = error
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'securerandom'
|
2
|
+
|
3
|
+
module Clearbit
|
4
|
+
class Analytics
|
5
|
+
module Utils
|
6
|
+
extend self
|
7
|
+
|
8
|
+
# public: Return a new hash with keys converted from strings to symbols
|
9
|
+
#
|
10
|
+
def symbolize_keys(hash)
|
11
|
+
hash.each_with_object({}) do |(k, v), memo|
|
12
|
+
memo[k.to_sym] = v
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# public: Convert hash keys from strings to symbols in place
|
17
|
+
#
|
18
|
+
def symbolize_keys!(hash)
|
19
|
+
hash.replace symbolize_keys hash
|
20
|
+
end
|
21
|
+
|
22
|
+
# public: Return a new hash with keys as strings
|
23
|
+
#
|
24
|
+
def stringify_keys(hash)
|
25
|
+
hash.each_with_object({}) do |(k, v), memo|
|
26
|
+
memo[k.to_s] = v
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# public: Returns a new hash with all the date values in the into iso8601
|
31
|
+
# strings
|
32
|
+
#
|
33
|
+
def isoify_dates(hash)
|
34
|
+
hash.each_with_object({}) do |(k, v), memo|
|
35
|
+
memo[k] = datetime_in_iso8601(v)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# public: Converts all the date values in the into iso8601 strings in place
|
40
|
+
#
|
41
|
+
def isoify_dates!(hash)
|
42
|
+
hash.replace isoify_dates hash
|
43
|
+
end
|
44
|
+
|
45
|
+
# public: Returns a uid string
|
46
|
+
#
|
47
|
+
def uid
|
48
|
+
arr = SecureRandom.random_bytes(16).unpack('NnnnnN')
|
49
|
+
arr[2] = (arr[2] & 0x0fff) | 0x4000
|
50
|
+
arr[3] = (arr[3] & 0x3fff) | 0x8000
|
51
|
+
'%08x-%04x-%04x-%04x-%04x%08x' % arr
|
52
|
+
end
|
53
|
+
|
54
|
+
def datetime_in_iso8601(datetime)
|
55
|
+
case datetime
|
56
|
+
when Time
|
57
|
+
time_in_iso8601 datetime
|
58
|
+
when DateTime
|
59
|
+
time_in_iso8601 datetime.to_time
|
60
|
+
when Date
|
61
|
+
date_in_iso8601 datetime
|
62
|
+
else
|
63
|
+
datetime
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def time_in_iso8601(time, fraction_digits = 3)
|
68
|
+
fraction = if fraction_digits > 0
|
69
|
+
('.%06i' % time.usec)[0, fraction_digits + 1]
|
70
|
+
end
|
71
|
+
|
72
|
+
"#{time.strftime('%Y-%m-%dT%H:%M:%S')}#{fraction}#{formatted_offset(time, true, 'Z')}"
|
73
|
+
end
|
74
|
+
|
75
|
+
def date_in_iso8601(date)
|
76
|
+
date.strftime('%F')
|
77
|
+
end
|
78
|
+
|
79
|
+
def formatted_offset(time, colon = true, alternate_utc_string = nil)
|
80
|
+
time.utc? && alternate_utc_string || seconds_to_utc_offset(time.utc_offset, colon)
|
81
|
+
end
|
82
|
+
|
83
|
+
def seconds_to_utc_offset(seconds, colon = true)
|
84
|
+
(colon ? UTC_OFFSET_WITH_COLON : UTC_OFFSET_WITHOUT_COLON) % [(seconds < 0 ? '-' : '+'), (seconds.abs / 3600), ((seconds.abs % 3600) / 60)]
|
85
|
+
end
|
86
|
+
|
87
|
+
UTC_OFFSET_WITH_COLON = '%s%02d:%02d'
|
88
|
+
UTC_OFFSET_WITHOUT_COLON = UTC_OFFSET_WITH_COLON.sub(':', '')
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'clearbit/analytics/defaults'
|
2
|
+
require 'clearbit/analytics/message_batch'
|
3
|
+
require 'clearbit/analytics/request'
|
4
|
+
require 'clearbit/analytics/utils'
|
5
|
+
|
6
|
+
module Clearbit
|
7
|
+
class Analytics
|
8
|
+
class Worker
|
9
|
+
include Clearbit::Analytics::Utils
|
10
|
+
include Clearbit::Analytics::Defaults
|
11
|
+
include Clearbit::Analytics::Logging
|
12
|
+
|
13
|
+
# public: Creates a new worker
|
14
|
+
#
|
15
|
+
# The worker continuously takes messages off the queue
|
16
|
+
# and makes requests to the segment.io api
|
17
|
+
#
|
18
|
+
# queue - Queue synchronized between client and worker
|
19
|
+
# write_key - String of the project's Write key
|
20
|
+
# options - Hash of worker options
|
21
|
+
# batch_size - Fixnum of how many items to send in a batch
|
22
|
+
# on_error - Proc of what to do on an error
|
23
|
+
#
|
24
|
+
def initialize(queue, write_key, options = {})
|
25
|
+
symbolize_keys! options
|
26
|
+
@queue = queue
|
27
|
+
@write_key = write_key
|
28
|
+
@on_error = options[:on_error] || proc { |status, error| }
|
29
|
+
batch_size = options[:batch_size] || Defaults::MessageBatch::MAX_SIZE
|
30
|
+
@batch = MessageBatch.new(batch_size)
|
31
|
+
@lock = Mutex.new
|
32
|
+
end
|
33
|
+
|
34
|
+
# public: Continuously runs the loop to check for new events
|
35
|
+
#
|
36
|
+
def run
|
37
|
+
until Thread.current[:should_exit]
|
38
|
+
return if @queue.empty?
|
39
|
+
|
40
|
+
@lock.synchronize do
|
41
|
+
consume_message_from_queue! until @batch.full? || @queue.empty?
|
42
|
+
end
|
43
|
+
|
44
|
+
res = Request.new.post @write_key, @batch
|
45
|
+
@on_error.call(res.status, res.error) unless res.status == 200
|
46
|
+
|
47
|
+
@lock.synchronize { @batch.clear }
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# public: Check whether we have outstanding requests.
|
52
|
+
#
|
53
|
+
def is_requesting?
|
54
|
+
@lock.synchronize { !@batch.empty? }
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def consume_message_from_queue!
|
60
|
+
@batch << @queue.pop
|
61
|
+
rescue MessageBatch::JSONGenerationError => e
|
62
|
+
@on_error.call(-1, e.to_s)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module Clearbit
|
2
|
+
class Audiences < Base
|
3
|
+
endpoint 'https://audiences.clearbit.com'
|
4
|
+
path '/v1/audiences'
|
5
|
+
|
6
|
+
def self.add_email(values = {})
|
7
|
+
post('email', values)
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.add_domain(values = {})
|
11
|
+
post('domain', values)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|