clearbit 0.2.7 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/CODEOWNERS +3 -0
- data/LICENSE +21 -0
- data/README.md +87 -3
- data/clearbit.gemspec +2 -0
- data/examples/name_domain.rb +4 -0
- data/examples/prospector.rb +5 -4
- data/examples/reveal.rb +11 -0
- data/examples/risk.rb +13 -0
- data/examples/risk_flag.rb +35 -0
- data/lib/clearbit.rb +4 -1
- data/lib/clearbit/analytics.rb +62 -0
- data/lib/clearbit/analytics/LICENSE +10 -0
- data/lib/clearbit/analytics/README.md +114 -0
- data/lib/clearbit/analytics/backoff_policy.rb +49 -0
- data/lib/clearbit/analytics/client.rb +189 -0
- data/lib/clearbit/analytics/defaults.rb +36 -0
- data/lib/clearbit/analytics/field_parser.rb +192 -0
- data/lib/clearbit/analytics/logging.rb +60 -0
- data/lib/clearbit/analytics/message_batch.rb +72 -0
- data/lib/clearbit/analytics/request.rb +134 -0
- data/lib/clearbit/analytics/response.rb +15 -0
- data/lib/clearbit/analytics/utils.rb +91 -0
- data/lib/clearbit/analytics/worker.rb +66 -0
- data/lib/clearbit/audiences.rb +14 -0
- data/lib/clearbit/enrichment.rb +1 -0
- data/lib/clearbit/enrichment/news.rb +18 -0
- data/lib/clearbit/name_domain.rb +17 -0
- data/lib/clearbit/risk.rb +1 -1
- data/lib/clearbit/version.rb +1 -1
- data/lib/clearbit/webhook.rb +3 -1
- data/spec/lib/clearbit/analytics_spec.rb +66 -0
- data/spec/lib/clearbit/prospector_spec.rb +12 -2
- data/spec/support/helpers.rb +3 -1
- metadata +27 -4
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
module Clearbit
|
4
|
+
class Analytics
|
5
|
+
# Wraps an existing logger and adds a prefix to all messages
|
6
|
+
class PrefixedLogger
|
7
|
+
def initialize(logger, prefix)
|
8
|
+
@logger = logger
|
9
|
+
@prefix = prefix
|
10
|
+
end
|
11
|
+
|
12
|
+
def debug(msg)
|
13
|
+
@logger.debug("#{@prefix} #{msg}")
|
14
|
+
end
|
15
|
+
|
16
|
+
def info(msg)
|
17
|
+
@logger.info("#{@prefix} #{msg}")
|
18
|
+
end
|
19
|
+
|
20
|
+
def warn(msg)
|
21
|
+
@logger.warn("#{@prefix} #{msg}")
|
22
|
+
end
|
23
|
+
|
24
|
+
def error(msg)
|
25
|
+
@logger.error("#{@prefix} #{msg}")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
module Logging
|
30
|
+
class << self
|
31
|
+
def logger
|
32
|
+
return @logger if @logger
|
33
|
+
|
34
|
+
base_logger = if defined?(Rails)
|
35
|
+
Rails.logger
|
36
|
+
else
|
37
|
+
logger = Logger.new STDOUT
|
38
|
+
logger.progname = 'Clearbit::Analytics'
|
39
|
+
logger
|
40
|
+
end
|
41
|
+
@logger = PrefixedLogger.new(base_logger, '[clearbit-ruby]')
|
42
|
+
end
|
43
|
+
|
44
|
+
attr_writer :logger
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.included(base)
|
48
|
+
class << base
|
49
|
+
def logger
|
50
|
+
Logging.logger
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def logger
|
56
|
+
Logging.logger
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
require 'clearbit/analytics/logging'
|
3
|
+
|
4
|
+
module Clearbit
|
5
|
+
class Analytics
|
6
|
+
# A batch of `Message`s to be sent to the API
|
7
|
+
class MessageBatch
|
8
|
+
class JSONGenerationError < StandardError; end
|
9
|
+
|
10
|
+
extend Forwardable
|
11
|
+
include Clearbit::Analytics::Logging
|
12
|
+
include Clearbit::Analytics::Defaults::MessageBatch
|
13
|
+
|
14
|
+
def initialize(max_message_count)
|
15
|
+
@messages = []
|
16
|
+
@max_message_count = max_message_count
|
17
|
+
@json_size = 0
|
18
|
+
end
|
19
|
+
|
20
|
+
def <<(message)
|
21
|
+
begin
|
22
|
+
message_json = message.to_json
|
23
|
+
rescue StandardError => e
|
24
|
+
raise JSONGenerationError, "Serialization error: #{e}"
|
25
|
+
end
|
26
|
+
|
27
|
+
message_json_size = message_json.bytesize
|
28
|
+
if message_too_big?(message_json_size)
|
29
|
+
logger.error('a message exceeded the maximum allowed size')
|
30
|
+
else
|
31
|
+
@messages << message
|
32
|
+
@json_size += message_json_size + 1 # One byte for the comma
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def full?
|
37
|
+
item_count_exhausted? || size_exhausted?
|
38
|
+
end
|
39
|
+
|
40
|
+
def clear
|
41
|
+
@messages.clear
|
42
|
+
@json_size = 0
|
43
|
+
end
|
44
|
+
|
45
|
+
def_delegators :@messages, :to_json
|
46
|
+
def_delegators :@messages, :empty?
|
47
|
+
def_delegators :@messages, :length
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def item_count_exhausted?
|
52
|
+
@messages.length >= @max_message_count
|
53
|
+
end
|
54
|
+
|
55
|
+
def message_too_big?(message_json_size)
|
56
|
+
message_json_size > Defaults::Message::MAX_BYTES
|
57
|
+
end
|
58
|
+
|
59
|
+
# We consider the max size here as just enough to leave room for one more
|
60
|
+
# message of the largest size possible. This is a shortcut that allows us
|
61
|
+
# to use a native Ruby `Queue` that doesn't allow peeking. The tradeoff
|
62
|
+
# here is that we might fit in less messages than possible into a batch.
|
63
|
+
#
|
64
|
+
# The alternative is to use our own `Queue` implementation that allows
|
65
|
+
# peeking, and to consider the next message size when calculating whether
|
66
|
+
# the message can be accomodated in this batch.
|
67
|
+
def size_exhausted?
|
68
|
+
@json_size >= (MAX_BYTES - Defaults::Message::MAX_BYTES)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
require 'clearbit/analytics/defaults'
|
2
|
+
require 'clearbit/analytics/utils'
|
3
|
+
require 'clearbit/analytics/response'
|
4
|
+
require 'clearbit/analytics/logging'
|
5
|
+
require 'clearbit/analytics/backoff_policy'
|
6
|
+
require 'net/http'
|
7
|
+
require 'net/https'
|
8
|
+
require 'json'
|
9
|
+
|
10
|
+
module Clearbit
|
11
|
+
class Analytics
|
12
|
+
class Request
|
13
|
+
include Clearbit::Analytics::Defaults::Request
|
14
|
+
include Clearbit::Analytics::Utils
|
15
|
+
include Clearbit::Analytics::Logging
|
16
|
+
|
17
|
+
# public: Creates a new request object to send analytics batch
|
18
|
+
#
|
19
|
+
def initialize(options = {})
|
20
|
+
options[:host] ||= HOST
|
21
|
+
options[:port] ||= PORT
|
22
|
+
options[:ssl] ||= SSL
|
23
|
+
@headers = options[:headers] || HEADERS
|
24
|
+
@path = options[:path] || PATH
|
25
|
+
@retries = options[:retries] || RETRIES
|
26
|
+
@backoff_policy =
|
27
|
+
options[:backoff_policy] || Clearbit::Analytics::BackoffPolicy.new
|
28
|
+
|
29
|
+
http = Net::HTTP.new(options[:host], options[:port])
|
30
|
+
http.use_ssl = options[:ssl]
|
31
|
+
http.read_timeout = 8
|
32
|
+
http.open_timeout = 4
|
33
|
+
|
34
|
+
@http = http
|
35
|
+
end
|
36
|
+
|
37
|
+
# public: Posts the write key and batch of messages to the API.
|
38
|
+
#
|
39
|
+
# returns - Response of the status and error if it exists
|
40
|
+
def post(write_key, batch)
|
41
|
+
logger.debug("Sending request for #{batch.length} items")
|
42
|
+
|
43
|
+
last_response, exception = retry_with_backoff(@retries) do
|
44
|
+
status_code, body = send_request(write_key, batch)
|
45
|
+
error = JSON.parse(body)['error']
|
46
|
+
should_retry = should_retry_request?(status_code, body)
|
47
|
+
logger.debug("Response status code: #{status_code}")
|
48
|
+
logger.debug("Response error: #{error}") if error
|
49
|
+
|
50
|
+
[Response.new(status_code, error), should_retry]
|
51
|
+
end
|
52
|
+
|
53
|
+
if exception
|
54
|
+
logger.error(exception.message)
|
55
|
+
exception.backtrace.each { |line| logger.error(line) }
|
56
|
+
Response.new(-1, exception.to_s)
|
57
|
+
else
|
58
|
+
last_response
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def should_retry_request?(status_code, body)
|
65
|
+
if status_code >= 500
|
66
|
+
true # Server error
|
67
|
+
elsif status_code == 429
|
68
|
+
true # Rate limited
|
69
|
+
elsif status_code >= 400
|
70
|
+
logger.error(body)
|
71
|
+
false # Client error. Do not retry, but log
|
72
|
+
else
|
73
|
+
false
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Takes a block that returns [result, should_retry].
|
78
|
+
#
|
79
|
+
# Retries upto `retries_remaining` times, if `should_retry` is false or
|
80
|
+
# an exception is raised. `@backoff_policy` is used to determine the
|
81
|
+
# duration to sleep between attempts
|
82
|
+
#
|
83
|
+
# Returns [last_result, raised_exception]
|
84
|
+
def retry_with_backoff(retries_remaining, &block)
|
85
|
+
result, caught_exception = nil
|
86
|
+
should_retry = false
|
87
|
+
|
88
|
+
begin
|
89
|
+
result, should_retry = yield
|
90
|
+
return [result, nil] unless should_retry
|
91
|
+
rescue StandardError => e
|
92
|
+
should_retry = true
|
93
|
+
caught_exception = e
|
94
|
+
end
|
95
|
+
|
96
|
+
if should_retry && (retries_remaining > 1)
|
97
|
+
logger.debug("Retrying request, #{retries_remaining} retries left")
|
98
|
+
sleep(@backoff_policy.next_interval.to_f / 1000)
|
99
|
+
retry_with_backoff(retries_remaining - 1, &block)
|
100
|
+
else
|
101
|
+
[result, caught_exception]
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Sends a request for the batch, returns [status_code, body]
|
106
|
+
def send_request(write_key, batch)
|
107
|
+
payload = JSON.generate(
|
108
|
+
:sentAt => datetime_in_iso8601(Time.now),
|
109
|
+
:batch => batch
|
110
|
+
)
|
111
|
+
request = Net::HTTP::Post.new(@path, @headers)
|
112
|
+
request.basic_auth(write_key, nil)
|
113
|
+
|
114
|
+
if self.class.stub
|
115
|
+
logger.debug "stubbed request to #{@path}: " \
|
116
|
+
"write key = #{write_key}, batch = JSON.generate(#{batch})"
|
117
|
+
|
118
|
+
[200, '{}']
|
119
|
+
else
|
120
|
+
response = @http.request(request, payload)
|
121
|
+
[response.code.to_i, response.body]
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
class << self
|
126
|
+
attr_writer :stub
|
127
|
+
|
128
|
+
def stub
|
129
|
+
@stub || ENV['STUB']
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Clearbit
|
2
|
+
class Analytics
|
3
|
+
class Response
|
4
|
+
attr_reader :status, :error
|
5
|
+
|
6
|
+
# public: Simple class to wrap responses from the API
|
7
|
+
#
|
8
|
+
#
|
9
|
+
def initialize(status = 200, error = nil)
|
10
|
+
@status = status
|
11
|
+
@error = error
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'securerandom'
|
2
|
+
|
3
|
+
module Clearbit
|
4
|
+
class Analytics
|
5
|
+
module Utils
|
6
|
+
extend self
|
7
|
+
|
8
|
+
# public: Return a new hash with keys converted from strings to symbols
|
9
|
+
#
|
10
|
+
def symbolize_keys(hash)
|
11
|
+
hash.each_with_object({}) do |(k, v), memo|
|
12
|
+
memo[k.to_sym] = v
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# public: Convert hash keys from strings to symbols in place
|
17
|
+
#
|
18
|
+
def symbolize_keys!(hash)
|
19
|
+
hash.replace symbolize_keys hash
|
20
|
+
end
|
21
|
+
|
22
|
+
# public: Return a new hash with keys as strings
|
23
|
+
#
|
24
|
+
def stringify_keys(hash)
|
25
|
+
hash.each_with_object({}) do |(k, v), memo|
|
26
|
+
memo[k.to_s] = v
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# public: Returns a new hash with all the date values in the into iso8601
|
31
|
+
# strings
|
32
|
+
#
|
33
|
+
def isoify_dates(hash)
|
34
|
+
hash.each_with_object({}) do |(k, v), memo|
|
35
|
+
memo[k] = datetime_in_iso8601(v)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# public: Converts all the date values in the into iso8601 strings in place
|
40
|
+
#
|
41
|
+
def isoify_dates!(hash)
|
42
|
+
hash.replace isoify_dates hash
|
43
|
+
end
|
44
|
+
|
45
|
+
# public: Returns a uid string
|
46
|
+
#
|
47
|
+
def uid
|
48
|
+
arr = SecureRandom.random_bytes(16).unpack('NnnnnN')
|
49
|
+
arr[2] = (arr[2] & 0x0fff) | 0x4000
|
50
|
+
arr[3] = (arr[3] & 0x3fff) | 0x8000
|
51
|
+
'%08x-%04x-%04x-%04x-%04x%08x' % arr
|
52
|
+
end
|
53
|
+
|
54
|
+
def datetime_in_iso8601(datetime)
|
55
|
+
case datetime
|
56
|
+
when Time
|
57
|
+
time_in_iso8601 datetime
|
58
|
+
when DateTime
|
59
|
+
time_in_iso8601 datetime.to_time
|
60
|
+
when Date
|
61
|
+
date_in_iso8601 datetime
|
62
|
+
else
|
63
|
+
datetime
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def time_in_iso8601(time, fraction_digits = 3)
|
68
|
+
fraction = if fraction_digits > 0
|
69
|
+
('.%06i' % time.usec)[0, fraction_digits + 1]
|
70
|
+
end
|
71
|
+
|
72
|
+
"#{time.strftime('%Y-%m-%dT%H:%M:%S')}#{fraction}#{formatted_offset(time, true, 'Z')}"
|
73
|
+
end
|
74
|
+
|
75
|
+
def date_in_iso8601(date)
|
76
|
+
date.strftime('%F')
|
77
|
+
end
|
78
|
+
|
79
|
+
def formatted_offset(time, colon = true, alternate_utc_string = nil)
|
80
|
+
time.utc? && alternate_utc_string || seconds_to_utc_offset(time.utc_offset, colon)
|
81
|
+
end
|
82
|
+
|
83
|
+
def seconds_to_utc_offset(seconds, colon = true)
|
84
|
+
(colon ? UTC_OFFSET_WITH_COLON : UTC_OFFSET_WITHOUT_COLON) % [(seconds < 0 ? '-' : '+'), (seconds.abs / 3600), ((seconds.abs % 3600) / 60)]
|
85
|
+
end
|
86
|
+
|
87
|
+
UTC_OFFSET_WITH_COLON = '%s%02d:%02d'
|
88
|
+
UTC_OFFSET_WITHOUT_COLON = UTC_OFFSET_WITH_COLON.sub(':', '')
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'clearbit/analytics/defaults'
|
2
|
+
require 'clearbit/analytics/message_batch'
|
3
|
+
require 'clearbit/analytics/request'
|
4
|
+
require 'clearbit/analytics/utils'
|
5
|
+
|
6
|
+
module Clearbit
|
7
|
+
class Analytics
|
8
|
+
class Worker
|
9
|
+
include Clearbit::Analytics::Utils
|
10
|
+
include Clearbit::Analytics::Defaults
|
11
|
+
include Clearbit::Analytics::Logging
|
12
|
+
|
13
|
+
# public: Creates a new worker
|
14
|
+
#
|
15
|
+
# The worker continuously takes messages off the queue
|
16
|
+
# and makes requests to the segment.io api
|
17
|
+
#
|
18
|
+
# queue - Queue synchronized between client and worker
|
19
|
+
# write_key - String of the project's Write key
|
20
|
+
# options - Hash of worker options
|
21
|
+
# batch_size - Fixnum of how many items to send in a batch
|
22
|
+
# on_error - Proc of what to do on an error
|
23
|
+
#
|
24
|
+
def initialize(queue, write_key, options = {})
|
25
|
+
symbolize_keys! options
|
26
|
+
@queue = queue
|
27
|
+
@write_key = write_key
|
28
|
+
@on_error = options[:on_error] || proc { |status, error| }
|
29
|
+
batch_size = options[:batch_size] || Defaults::MessageBatch::MAX_SIZE
|
30
|
+
@batch = MessageBatch.new(batch_size)
|
31
|
+
@lock = Mutex.new
|
32
|
+
end
|
33
|
+
|
34
|
+
# public: Continuously runs the loop to check for new events
|
35
|
+
#
|
36
|
+
def run
|
37
|
+
until Thread.current[:should_exit]
|
38
|
+
return if @queue.empty?
|
39
|
+
|
40
|
+
@lock.synchronize do
|
41
|
+
consume_message_from_queue! until @batch.full? || @queue.empty?
|
42
|
+
end
|
43
|
+
|
44
|
+
res = Request.new.post @write_key, @batch
|
45
|
+
@on_error.call(res.status, res.error) unless res.status == 200
|
46
|
+
|
47
|
+
@lock.synchronize { @batch.clear }
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# public: Check whether we have outstanding requests.
|
52
|
+
#
|
53
|
+
def is_requesting?
|
54
|
+
@lock.synchronize { !@batch.empty? }
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def consume_message_from_queue!
|
60
|
+
@batch << @queue.pop
|
61
|
+
rescue MessageBatch::JSONGenerationError => e
|
62
|
+
@on_error.call(-1, e.to_s)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module Clearbit
|
2
|
+
class Audiences < Base
|
3
|
+
endpoint 'https://audiences.clearbit.com'
|
4
|
+
path '/v1/audiences'
|
5
|
+
|
6
|
+
def self.add_email(values = {})
|
7
|
+
post('email', values)
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.add_domain(values = {})
|
11
|
+
post('domain', values)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|