apisonator 2.100.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +317 -0
  3. data/Gemfile +11 -0
  4. data/Gemfile.base +65 -0
  5. data/Gemfile.lock +319 -0
  6. data/Gemfile.on_prem +1 -0
  7. data/Gemfile.on_prem.lock +297 -0
  8. data/LICENSE +202 -0
  9. data/NOTICE +15 -0
  10. data/README.md +230 -0
  11. data/Rakefile +287 -0
  12. data/apisonator.gemspec +47 -0
  13. data/app/api/api.rb +13 -0
  14. data/app/api/internal/alert_limits.rb +32 -0
  15. data/app/api/internal/application_keys.rb +49 -0
  16. data/app/api/internal/application_referrer_filters.rb +43 -0
  17. data/app/api/internal/applications.rb +77 -0
  18. data/app/api/internal/errors.rb +54 -0
  19. data/app/api/internal/events.rb +42 -0
  20. data/app/api/internal/internal.rb +104 -0
  21. data/app/api/internal/metrics.rb +40 -0
  22. data/app/api/internal/service_tokens.rb +46 -0
  23. data/app/api/internal/services.rb +58 -0
  24. data/app/api/internal/stats.rb +42 -0
  25. data/app/api/internal/usagelimits.rb +62 -0
  26. data/app/api/internal/utilization.rb +23 -0
  27. data/bin/3scale_backend +223 -0
  28. data/bin/3scale_backend_worker +26 -0
  29. data/config.ru +4 -0
  30. data/config/puma.rb +192 -0
  31. data/config/schedule.rb +9 -0
  32. data/ext/mkrf_conf.rb +64 -0
  33. data/lib/3scale/backend.rb +67 -0
  34. data/lib/3scale/backend/alert_limit.rb +56 -0
  35. data/lib/3scale/backend/alerts.rb +137 -0
  36. data/lib/3scale/backend/analytics/kinesis.rb +3 -0
  37. data/lib/3scale/backend/analytics/kinesis/adapter.rb +180 -0
  38. data/lib/3scale/backend/analytics/kinesis/exporter.rb +86 -0
  39. data/lib/3scale/backend/analytics/kinesis/job.rb +135 -0
  40. data/lib/3scale/backend/analytics/redshift.rb +3 -0
  41. data/lib/3scale/backend/analytics/redshift/adapter.rb +367 -0
  42. data/lib/3scale/backend/analytics/redshift/importer.rb +83 -0
  43. data/lib/3scale/backend/analytics/redshift/job.rb +33 -0
  44. data/lib/3scale/backend/application.rb +330 -0
  45. data/lib/3scale/backend/application_events.rb +76 -0
  46. data/lib/3scale/backend/background_job.rb +65 -0
  47. data/lib/3scale/backend/configurable.rb +20 -0
  48. data/lib/3scale/backend/configuration.rb +151 -0
  49. data/lib/3scale/backend/configuration/loader.rb +42 -0
  50. data/lib/3scale/backend/constants.rb +19 -0
  51. data/lib/3scale/backend/cors.rb +84 -0
  52. data/lib/3scale/backend/distributed_lock.rb +67 -0
  53. data/lib/3scale/backend/environment.rb +21 -0
  54. data/lib/3scale/backend/error_storage.rb +52 -0
  55. data/lib/3scale/backend/errors.rb +343 -0
  56. data/lib/3scale/backend/event_storage.rb +120 -0
  57. data/lib/3scale/backend/experiment.rb +84 -0
  58. data/lib/3scale/backend/extensions.rb +5 -0
  59. data/lib/3scale/backend/extensions/array.rb +19 -0
  60. data/lib/3scale/backend/extensions/hash.rb +26 -0
  61. data/lib/3scale/backend/extensions/nil_class.rb +13 -0
  62. data/lib/3scale/backend/extensions/redis.rb +44 -0
  63. data/lib/3scale/backend/extensions/string.rb +13 -0
  64. data/lib/3scale/backend/extensions/time.rb +110 -0
  65. data/lib/3scale/backend/failed_jobs_scheduler.rb +141 -0
  66. data/lib/3scale/backend/job_fetcher.rb +122 -0
  67. data/lib/3scale/backend/listener.rb +728 -0
  68. data/lib/3scale/backend/listener_metrics.rb +99 -0
  69. data/lib/3scale/backend/logging.rb +48 -0
  70. data/lib/3scale/backend/logging/external.rb +44 -0
  71. data/lib/3scale/backend/logging/external/impl.rb +93 -0
  72. data/lib/3scale/backend/logging/external/impl/airbrake.rb +66 -0
  73. data/lib/3scale/backend/logging/external/impl/bugsnag.rb +69 -0
  74. data/lib/3scale/backend/logging/external/impl/default.rb +18 -0
  75. data/lib/3scale/backend/logging/external/resque.rb +57 -0
  76. data/lib/3scale/backend/logging/logger.rb +18 -0
  77. data/lib/3scale/backend/logging/middleware.rb +62 -0
  78. data/lib/3scale/backend/logging/middleware/json_writer.rb +21 -0
  79. data/lib/3scale/backend/logging/middleware/text_writer.rb +60 -0
  80. data/lib/3scale/backend/logging/middleware/writer.rb +143 -0
  81. data/lib/3scale/backend/logging/worker.rb +107 -0
  82. data/lib/3scale/backend/manifest.rb +80 -0
  83. data/lib/3scale/backend/memoizer.rb +277 -0
  84. data/lib/3scale/backend/metric.rb +275 -0
  85. data/lib/3scale/backend/metric/collection.rb +91 -0
  86. data/lib/3scale/backend/oauth.rb +4 -0
  87. data/lib/3scale/backend/oauth/token.rb +26 -0
  88. data/lib/3scale/backend/oauth/token_key.rb +30 -0
  89. data/lib/3scale/backend/oauth/token_storage.rb +313 -0
  90. data/lib/3scale/backend/oauth/token_value.rb +25 -0
  91. data/lib/3scale/backend/period.rb +3 -0
  92. data/lib/3scale/backend/period/boundary.rb +107 -0
  93. data/lib/3scale/backend/period/cache.rb +28 -0
  94. data/lib/3scale/backend/period/period.rb +402 -0
  95. data/lib/3scale/backend/queue_storage.rb +16 -0
  96. data/lib/3scale/backend/rack.rb +49 -0
  97. data/lib/3scale/backend/rack/exception_catcher.rb +136 -0
  98. data/lib/3scale/backend/rack/internal_error_catcher.rb +23 -0
  99. data/lib/3scale/backend/rack/prometheus.rb +19 -0
  100. data/lib/3scale/backend/saas.rb +6 -0
  101. data/lib/3scale/backend/saas_analytics.rb +4 -0
  102. data/lib/3scale/backend/server.rb +30 -0
  103. data/lib/3scale/backend/server/falcon.rb +52 -0
  104. data/lib/3scale/backend/server/puma.rb +71 -0
  105. data/lib/3scale/backend/service.rb +317 -0
  106. data/lib/3scale/backend/service_token.rb +97 -0
  107. data/lib/3scale/backend/stats.rb +8 -0
  108. data/lib/3scale/backend/stats/aggregator.rb +170 -0
  109. data/lib/3scale/backend/stats/aggregators/base.rb +72 -0
  110. data/lib/3scale/backend/stats/aggregators/response_code.rb +58 -0
  111. data/lib/3scale/backend/stats/aggregators/usage.rb +34 -0
  112. data/lib/3scale/backend/stats/bucket_reader.rb +135 -0
  113. data/lib/3scale/backend/stats/bucket_storage.rb +108 -0
  114. data/lib/3scale/backend/stats/cleaner.rb +195 -0
  115. data/lib/3scale/backend/stats/codes_commons.rb +14 -0
  116. data/lib/3scale/backend/stats/delete_job_def.rb +60 -0
  117. data/lib/3scale/backend/stats/key_generator.rb +73 -0
  118. data/lib/3scale/backend/stats/keys.rb +104 -0
  119. data/lib/3scale/backend/stats/partition_eraser_job.rb +58 -0
  120. data/lib/3scale/backend/stats/partition_generator_job.rb +46 -0
  121. data/lib/3scale/backend/stats/period_commons.rb +34 -0
  122. data/lib/3scale/backend/stats/stats_parser.rb +141 -0
  123. data/lib/3scale/backend/stats/storage.rb +113 -0
  124. data/lib/3scale/backend/statsd.rb +14 -0
  125. data/lib/3scale/backend/storable.rb +35 -0
  126. data/lib/3scale/backend/storage.rb +40 -0
  127. data/lib/3scale/backend/storage_async.rb +4 -0
  128. data/lib/3scale/backend/storage_async/async_redis.rb +21 -0
  129. data/lib/3scale/backend/storage_async/client.rb +205 -0
  130. data/lib/3scale/backend/storage_async/pipeline.rb +79 -0
  131. data/lib/3scale/backend/storage_async/resque_extensions.rb +30 -0
  132. data/lib/3scale/backend/storage_helpers.rb +278 -0
  133. data/lib/3scale/backend/storage_key_helpers.rb +9 -0
  134. data/lib/3scale/backend/storage_sync.rb +43 -0
  135. data/lib/3scale/backend/transaction.rb +62 -0
  136. data/lib/3scale/backend/transactor.rb +177 -0
  137. data/lib/3scale/backend/transactor/limit_headers.rb +54 -0
  138. data/lib/3scale/backend/transactor/notify_batcher.rb +139 -0
  139. data/lib/3scale/backend/transactor/notify_job.rb +47 -0
  140. data/lib/3scale/backend/transactor/process_job.rb +33 -0
  141. data/lib/3scale/backend/transactor/report_job.rb +84 -0
  142. data/lib/3scale/backend/transactor/status.rb +236 -0
  143. data/lib/3scale/backend/transactor/usage_report.rb +182 -0
  144. data/lib/3scale/backend/usage.rb +63 -0
  145. data/lib/3scale/backend/usage_limit.rb +115 -0
  146. data/lib/3scale/backend/use_cases/provider_key_change_use_case.rb +60 -0
  147. data/lib/3scale/backend/util.rb +17 -0
  148. data/lib/3scale/backend/validators.rb +26 -0
  149. data/lib/3scale/backend/validators/base.rb +36 -0
  150. data/lib/3scale/backend/validators/key.rb +17 -0
  151. data/lib/3scale/backend/validators/limits.rb +57 -0
  152. data/lib/3scale/backend/validators/oauth_key.rb +15 -0
  153. data/lib/3scale/backend/validators/oauth_setting.rb +15 -0
  154. data/lib/3scale/backend/validators/redirect_uri.rb +33 -0
  155. data/lib/3scale/backend/validators/referrer.rb +60 -0
  156. data/lib/3scale/backend/validators/service_state.rb +15 -0
  157. data/lib/3scale/backend/validators/state.rb +15 -0
  158. data/lib/3scale/backend/version.rb +5 -0
  159. data/lib/3scale/backend/views/oauth_access_tokens.builder +14 -0
  160. data/lib/3scale/backend/views/oauth_app_id_by_token.builder +4 -0
  161. data/lib/3scale/backend/worker.rb +87 -0
  162. data/lib/3scale/backend/worker_async.rb +88 -0
  163. data/lib/3scale/backend/worker_metrics.rb +44 -0
  164. data/lib/3scale/backend/worker_sync.rb +32 -0
  165. data/lib/3scale/bundler_shim.rb +17 -0
  166. data/lib/3scale/prometheus_server.rb +10 -0
  167. data/lib/3scale/tasks/connectivity.rake +41 -0
  168. data/lib/3scale/tasks/helpers.rb +3 -0
  169. data/lib/3scale/tasks/helpers/environment.rb +23 -0
  170. data/lib/3scale/tasks/stats.rake +131 -0
  171. data/lib/3scale/tasks/swagger.rake +46 -0
  172. data/licenses.xml +1215 -0
  173. metadata +227 -0
@@ -0,0 +1,3 @@
1
+ require_relative 'kinesis/adapter'
2
+ require_relative 'kinesis/exporter'
3
+ require_relative 'kinesis/job'
@@ -0,0 +1,180 @@
1
+ require '3scale/backend/logging'
2
+
3
+ module ThreeScale
4
+ module Backend
5
+ module Analytics
6
+ module Kinesis
7
+ class Adapter
8
+ # Each Kinesis record is rounded to the nearest 5KB to calculate the
9
+ # cost. Each of our events is a hash with a few keys: service,
10
+ # metric, period, time, value, etc. This means that the size of one
11
+ # of our events is nowhere near 5KB. For that reason, we need to make
12
+ # sure that we send many events in each record.
13
+ # The max size for each record is 1000KB. In each record batch, Kinesis
14
+ # accepts a maximum of 4MB.
15
+ #
16
+ # We will try to optimize the batching process later. For now, I will
17
+ # just put 1000 events in each record. And batches of 5 records max.
18
+ #
19
+ # When we receive a number of events not big enough to fill a record,
20
+ # those events are marked as pending events.
21
+ # Kinesis can return errors, when that happens, the events of the
22
+ # records that failed are re-enqueued as pending events.
23
+ # The list of pending events is stored in Redis, so we do not fail to
24
+ # process any events in case of downtime or errors.
25
+
26
+ include Logging
27
+
28
+ EVENTS_PER_RECORD = 1000
29
+ private_constant :EVENTS_PER_RECORD
30
+
31
+ MAX_RECORDS_PER_BATCH = 5
32
+ private_constant :MAX_RECORDS_PER_BATCH
33
+
34
+ EVENTS_PER_BATCH = EVENTS_PER_RECORD*MAX_RECORDS_PER_BATCH
35
+ private_constant :EVENTS_PER_BATCH
36
+
37
+ KINESIS_PENDING_EVENTS_KEY = 'send_to_kinesis:pending_events'
38
+ private_constant :KINESIS_PENDING_EVENTS_KEY
39
+
40
+ # We need to limit the number of pending events stored in Redis.
41
+ # The Redis database can grow very quickly if a few consecutive jobs
42
+ # fail. I am going to limit the number of pending events to 600k
43
+ # (10 jobs approx.). If that limit is reached, we will disable the
44
+ # creation of buckets in the system, but we will continue trying to
45
+ # send the failed events. We will lose data, but that is better than
46
+ # collapsing the whole Redis.
47
+ # We will try to find a better alternative once we cannot afford to
48
+ # miss events. Right now, we are just deleting the stats keys with
49
+ # period = minute, so we can restore everything else.
50
+ MAX_PENDING_EVENTS = 600_000
51
+ private_constant :MAX_PENDING_EVENTS
52
+
53
+ MAX_PENDING_EVENTS_REACHED_MSG =
54
+ 'Bucket creation has been disabled. Max pending events reached'.freeze
55
+ private_constant :MAX_PENDING_EVENTS_REACHED_MSG
56
+
57
+ def initialize(stream_name, kinesis_client, storage)
58
+ @stream_name = stream_name
59
+ @kinesis_client = kinesis_client
60
+ @storage = storage
61
+ end
62
+
63
+ def send_events(events)
64
+ pending_events = stored_pending_events + events
65
+
66
+ # Only disable indicating emergency if bucket storage is enabled.
67
+ # We do not want to indicate emergency if it was disabled manually.
68
+ if limit_pending_events_reached?(pending_events.size) && Stats::Storage.enabled?
69
+ Stats::Storage.disable!(true)
70
+ log_bucket_creation_disabled
71
+ end
72
+
73
+ # Batch events until we can fill at least one record
74
+ if pending_events.size >= EVENTS_PER_RECORD
75
+ failed_events = send_events_in_batches(pending_events)
76
+ store_pending_events(failed_events)
77
+ else
78
+ store_pending_events(pending_events)
79
+ end
80
+ end
81
+
82
+ # Sends the pending events to Kinesis, even if there are not enough of
83
+ # them to fill 1 record.
84
+ # Returns the number of events correctly sent to Kinesis
85
+ def flush(limit = nil)
86
+ pending_events = stored_pending_events
87
+ events_to_flush = limit ? pending_events.take(limit) : pending_events
88
+ failed_events = send_events_in_batches(events_to_flush)
89
+ store_pending_events(pending_events - events_to_flush + failed_events)
90
+ events_to_flush.size - failed_events.size
91
+ end
92
+
93
+ def num_pending_events
94
+ storage.scard(KINESIS_PENDING_EVENTS_KEY)
95
+ end
96
+
97
+ private
98
+
99
+ attr_reader :stream_name, :kinesis_client, :storage
100
+
101
+ def stored_pending_events
102
+ storage.smembers(KINESIS_PENDING_EVENTS_KEY).map do |pending_event|
103
+ JSON.parse(pending_event, symbolize_names: true)
104
+ end
105
+ end
106
+
107
+ def limit_pending_events_reached?(count)
108
+ count > MAX_PENDING_EVENTS
109
+ end
110
+
111
+ def log_bucket_creation_disabled
112
+ logger.info(MAX_PENDING_EVENTS_REACHED_MSG)
113
+ end
114
+
115
+ # Returns the failed events
116
+ def send_events_in_batches(events)
117
+ failed_events = []
118
+
119
+ events.each_slice(EVENTS_PER_BATCH) do |events_slice|
120
+ begin
121
+ kinesis_resp = kinesis_client.put_record_batch(
122
+ { delivery_stream_name: stream_name,
123
+ records: events_to_kinesis_records(events_slice) })
124
+ failed_events << failed_events_kinesis_resp(
125
+ kinesis_resp[:request_responses], events_slice)
126
+ rescue Aws::Firehose::Errors::ServiceError
127
+ failed_events << events_slice
128
+ end
129
+ end
130
+
131
+ failed_events.flatten
132
+ end
133
+
134
+ def events_to_kinesis_records(events)
135
+ # Record format expected by Kinesis:
136
+ # [{ data: "data_event_group_1" }, { data: "data_event_group_2" }]
137
+ events.each_slice(EVENTS_PER_RECORD).map do |events_slice|
138
+ { data: events_to_pseudo_json(events_slice) }
139
+ end
140
+ end
141
+
142
+ # We want to send to Kinesis events that can be read by Redshift.
143
+ # Redshift expects events in JSON format without the '[]' and
144
+ # without separating them with commas.
145
+ # We put each event in a separated line, that will make their parsing
146
+ # easier, but it is not needed by Redshift.
147
+ def events_to_pseudo_json(events)
148
+ events.map { |event| event.to_json }.join("\n") + "\n"
149
+ end
150
+
151
+ def failed_events_kinesis_resp(request_responses, events)
152
+ failed_records_indexes = failed_records_indexes(request_responses)
153
+ failed_records_indexes.flat_map do |failed_record_index|
154
+ events_index_start = failed_record_index*EVENTS_PER_RECORD
155
+ events_index_end = events_index_start + EVENTS_PER_RECORD - 1
156
+ events[events_index_start..events_index_end]
157
+ end
158
+ end
159
+
160
+ def failed_records_indexes(request_responses)
161
+ result = []
162
+ request_responses.each_with_index do |response, index|
163
+ result << index unless response[:error_code].nil?
164
+ end
165
+ result
166
+ end
167
+
168
+ def store_pending_events(events)
169
+ storage.pipelined do
170
+ storage.del(KINESIS_PENDING_EVENTS_KEY)
171
+ events.each do |event|
172
+ storage.sadd(KINESIS_PENDING_EVENTS_KEY, event.to_json)
173
+ end
174
+ end
175
+ end
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end
@@ -0,0 +1,86 @@
1
+ module ThreeScale
2
+ module Backend
3
+ module Analytics
4
+ module Kinesis
5
+
6
+ # The main responsibility of this class is to schedule Kinesis jobs.
7
+ # We know that the distributed locking algorithm that we are using
8
+ # guarantees that two jobs will not be running at the same time except
9
+ # in some corner cases, like in the case of a failure of one of the Redis
10
+ # masters. However, this is not a problem in our case. If two Kinesis
11
+ # jobs run at the same time, they will probably export the same events to
12
+ # Kinesis. However, they will not be imported twice into Redshift because
13
+ # the import method that we use detects that two events are the same and
14
+ # only imports one. This detection is done using the 'time_gen' field
15
+ # that we attach to each event before they are send to Kinesis.
16
+ class Exporter
17
+ SEND_TO_KINESIS_ENABLED_KEY = 'send_to_kinesis:enabled'.freeze
18
+ private_constant :SEND_TO_KINESIS_ENABLED_KEY
19
+
20
+ TTL_JOB_RUNNING_KEY_SEC = 360
21
+ private_constant :TTL_JOB_RUNNING_KEY_SEC
22
+
23
+ class << self
24
+ def enable
25
+ storage.set(SEND_TO_KINESIS_ENABLED_KEY, '1')
26
+ end
27
+
28
+ def disable
29
+ storage.del(SEND_TO_KINESIS_ENABLED_KEY)
30
+ end
31
+
32
+ def enabled?
33
+ storage.get(SEND_TO_KINESIS_ENABLED_KEY).to_i == 1
34
+ end
35
+
36
+ def schedule_job
37
+ if enabled?
38
+ lock_key = dist_lock.lock
39
+ if lock_key
40
+ Resque.enqueue(Job, Time.now.utc, lock_key, Time.now.utc.to_f)
41
+ end
42
+ end
43
+ end
44
+
45
+ def flush_pending_events(limit = nil)
46
+ flushed_events = 0
47
+ if enabled?
48
+ lock_key = dist_lock.lock
49
+ if lock_key
50
+ flushed_events = kinesis_adapter.flush(limit)
51
+ job_finished(lock_key) # flush is not asynchronous
52
+ end
53
+ end
54
+ flushed_events
55
+ end
56
+
57
+ def num_pending_events
58
+ kinesis_adapter.num_pending_events
59
+ end
60
+
61
+ # To be called by a kinesis job once it exits so other jobs can run
62
+ def job_finished(lock_key)
63
+ dist_lock.unlock if lock_key == dist_lock.current_lock_key
64
+ end
65
+
66
+ private
67
+
68
+ def storage
69
+ Backend::Storage.instance
70
+ end
71
+
72
+ def kinesis_adapter
73
+ Stats::Storage.kinesis_adapter
74
+ end
75
+
76
+ def dist_lock
77
+ @dist_lock ||= DistributedLock.new(self.name,
78
+ TTL_JOB_RUNNING_KEY_SEC,
79
+ storage)
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,135 @@
1
+ require '3scale/backend/logging'
2
+
3
+ module ThreeScale
4
+ module Backend
5
+ module Analytics
6
+ module Kinesis
7
+ # This job works as follows:
8
+ # 1) Reads the pending events from the buckets that have not been read.
9
+ # 2) Parses and filters those events.
10
+ # 3) Sends the events to the Kinesis adapter.
11
+ # 4) Updates the latest bucket read, to avoid processing buckets more
12
+ # than once.
13
+ # The events are sent in batches to Kinesis, but the component that does
14
+ # that batching is the Kinesis adapter.
15
+ #
16
+ # Before sending the events to Kinesis, we attach a 'time_gen' attribute
17
+ # to each of them. This is a timestamp that indicates approximately when
18
+ # the event was generated based on the bucket where it was stored.
19
+ # We need this attribute because we will have repeated event keys in
20
+ # Redis and we will need to know which one contains the most updated
21
+ # value.
22
+ # Notice that we do not send all the events that are in the buckets to
23
+ # Kinesis. This job reads several buckets each time it runs. Some events
24
+ # can be repeated across those buckets. However, the job will only send
25
+ # to Kinesis the latest value (the one in the most recent bucket). This
26
+ # reduces the information that we need to parse, filter, and send.
27
+ # We need the extra field 'time_gen', because we cannot safely assume any
28
+ # order in S3 when sending events to Kinesis.
29
+ class Job < BackgroundJob
30
+ @queue = :stats
31
+
32
+ FILTERED_EVENT_PERIODS = %w(week eternity)
33
+ private_constant :FILTERED_EVENT_PERIODS
34
+
35
+ # We need to limit the amount of buckets that a job can process.
36
+ # Otherwise, there is the possibility that the job would not finish
37
+ # before its expiration time, and the next one would start processing
38
+ # the same buckets.
39
+ MAX_BUCKETS = 60
40
+ private_constant :MAX_BUCKETS
41
+
42
+ FILTERED_EVENT_PERIODS_STR = FILTERED_EVENT_PERIODS.map do |period|
43
+ "/#{period}".freeze
44
+ end.freeze
45
+ private_constant :FILTERED_EVENT_PERIODS_STR
46
+
47
+ class << self
48
+ include Logging
49
+
50
+ def perform_logged(end_time_utc, lock_key, _enqueue_time)
51
+ # end_time_utc will be a string when the worker processes this job.
52
+ # The parameter is passed through Redis as a string. We need to
53
+ # convert it back.
54
+ events_sent = 0
55
+
56
+ end_time = DateTime.parse(end_time_utc).to_time.utc
57
+ pending_events = bucket_reader.pending_events_in_buckets(
58
+ end_time_utc: end_time, max_buckets: MAX_BUCKETS)
59
+
60
+ unless pending_events[:events].empty?
61
+ events = prepare_events(pending_events[:latest_bucket],
62
+ pending_events[:events])
63
+ kinesis_adapter.send_events(events)
64
+ bucket_reader.latest_bucket_read = pending_events[:latest_bucket]
65
+ events_sent = events.size
66
+
67
+ # We might use a different strategy to delete buckets in the
68
+ # future, but for now, we are going to delete the buckets as they
69
+ # are read
70
+ bucket_storage.delete_range(pending_events[:latest_bucket])
71
+ end
72
+
73
+ Exporter.job_finished(lock_key)
74
+ [true, msg_events_sent(events_sent)]
75
+ end
76
+
77
+ private
78
+
79
+ def prepare_events(bucket, events)
80
+ filter_events(events)
81
+ parsed_events = parse_events(events.lazy)
82
+ add_time_gen_to_events(parsed_events, bucket_to_timestamp(bucket)).force
83
+ end
84
+
85
+ # Parses the events and discards the invalid ones
86
+ def parse_events(events)
87
+ events.map do |k, v|
88
+ begin
89
+ Stats::StatsParser.parse(k, v)
90
+ rescue Stats::StatsParser::StatsKeyValueInvalid
91
+ logger.notify("Invalid stats key-value. k: #{k}. v: #{v}")
92
+ nil
93
+ end
94
+ end.reject(&:nil?)
95
+ end
96
+
97
+ # We do not want to send all the events to Kinesis.
98
+ # This method filters them.
99
+ def filter_events(events)
100
+ events.reject! do |event|
101
+ FILTERED_EVENT_PERIODS_STR.any? do |filtered_period|
102
+ event.include?(filtered_period)
103
+ end
104
+ end
105
+ end
106
+
107
+ def add_time_gen_to_events(events, time_gen)
108
+ events.map { |event| event[:time_gen] = time_gen; event }
109
+ end
110
+
111
+ def bucket_to_timestamp(bucket)
112
+ DateTime.parse(bucket).to_time.utc.strftime('%Y%m%d %H:%M:%S')
113
+ end
114
+
115
+ def msg_events_sent(n_events)
116
+ "#{n_events} events have been sent to the Kinesis adapter"
117
+ end
118
+
119
+ def bucket_storage
120
+ Stats::Storage.bucket_storage
121
+ end
122
+
123
+ def bucket_reader
124
+ Stats::Storage.bucket_reader
125
+ end
126
+
127
+ def kinesis_adapter
128
+ Stats::Storage.kinesis_adapter
129
+ end
130
+ end
131
+ end
132
+ end
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,3 @@
1
+ require_relative 'redshift/adapter'
2
+ require_relative 'redshift/importer'
3
+ require_relative 'redshift/job'
@@ -0,0 +1,367 @@
1
+ require 'pg'
2
+
3
+ module ThreeScale
4
+ module Backend
5
+ module Analytics
6
+ module Redshift
7
+ # This class imports the events stored by Kinesis in S3 into Redshift.
8
+ # It keeps track of the events that have been imported so it does not
9
+ # read twice the same S3 path.
10
+ #
11
+ # We store 'repeated' events in S3. This means that we can find several
12
+ # times the same {service, instance, uinstance, metric, period, timestamp}
13
+ # combination.
14
+ #
15
+ # In order to avoid storing repeated information in Redshift we need to
16
+ # perform UPSERTs. The algorithm followed is the one explained in the
17
+ # official Redshift documentation:
18
+ # http://docs.aws.amazon.com/redshift/latest/dg/t_updating-inserting-using-staging-tables-.html
19
+ # The process is as follows:
20
+ # 1) Create a temporary table with the data imported from S3, including
21
+ # duplicates.
22
+ # Two attributes can have nulls: cinstance and uinstance. We replace
23
+ # those nulls with ''. I have observed substantial performance gains
24
+ # because of this.
25
+ # 2) Perform the necessary operations in the temp table to remove
26
+ # duplicates. (In our case this basically consists of an inner-join).
27
+ # 3) Inside a transaction, delete all the events that are in the temp
28
+ # table from the final table. Next, insert the ones in the temp
29
+ # table into the final table. Finally, remove the temp table.
30
+ # 4) Last, we perform a vacuum, because Redshift does not automatically
31
+ # reclaim and reuse space that has been freed after deletes or
32
+ # updates. The vacuum operation also leaves the table sorted.
33
+ # More info:
34
+ # http://docs.aws.amazon.com/redshift/latest/dg/t_Reclaiming_storage_space202.html
35
+ # Right now, we are going to vacuum every time we insert new data,
36
+ # we will see if for performance reasons we need to do it less often.
37
+ class Adapter
38
+
39
+ module SQL
40
+ SCHEMA = 'backend'.freeze
41
+
42
+ # This importer relies on some tables or views that are created in
43
+ # Redshift to function correctly.
44
+ TABLES = { events: "#{SCHEMA}.events".freeze,
45
+ latest_s3_path_read: "#{SCHEMA}.latest_s3_path_read".freeze,
46
+ temp: "#{SCHEMA}.temp_events".freeze,
47
+ unique_imported_events: "#{SCHEMA}.unique_imported_events".freeze }.freeze
48
+
49
+ EVENT_ATTRS = %w(service cinstance uinstance metric period timestamp time_gen).freeze
50
+ JOIN_EVENT_ATTRS = (EVENT_ATTRS - ['time_gen']).freeze
51
+
52
+ EXISTING_TABLES =
53
+ 'SELECT table_name '\
54
+ 'FROM information_schema.tables '\
55
+ "WHERE table_schema = '#{SCHEMA}';".freeze
56
+
57
+ CREATE_TEMP_TABLES =
58
+ "DROP TABLE IF EXISTS #{TABLES[:temp]} CASCADE; "\
59
+ "CREATE TABLE #{TABLES[:temp]} (LIKE #{TABLES[:events]}); "\
60
+ "DROP TABLE IF EXISTS #{TABLES[:unique_imported_events]} CASCADE; "\
61
+ "CREATE TABLE #{TABLES[:unique_imported_events]} (LIKE #{TABLES[:events]}); "\
62
+ 'COMMIT;'.freeze
63
+
64
+ CLEAN_TEMP_TABLES =
65
+ "DROP TABLE #{TABLES[:unique_imported_events]}; "\
66
+ "DROP TABLE #{TABLES[:temp]};".freeze
67
+
68
+ LATEST_TIMESTAMP_READ = "SELECT s3_path FROM #{TABLES[:latest_s3_path_read]}".freeze
69
+
70
+ VACUUM = "VACUUM FULL #{TABLES[:events]}".freeze
71
+
72
+ class << self
73
+
74
+ def insert_imported_events
75
+ 'BEGIN TRANSACTION; '\
76
+ "DELETE FROM #{TABLES[:events]} "\
77
+ "USING #{TABLES[:unique_imported_events]} u "\
78
+ "WHERE #{TABLES[:events]}.timestamp >= "\
79
+ "(SELECT MIN(timestamp) FROM #{TABLES[:unique_imported_events]}) "\
80
+ "AND #{join_comparisons(TABLES[:events], 'u', JOIN_EVENT_ATTRS)} "\
81
+ "AND (#{TABLES[:events]}.time_gen < u.time_gen); "\
82
+ "INSERT INTO #{TABLES[:events]} "\
83
+ "SELECT * FROM #{TABLES[:unique_imported_events]};" \
84
+ 'END TRANSACTION;'.freeze
85
+ end
86
+
87
+ # In order to get unique events, I use an inner-join with the same
88
+ # table. There might be several rows with the same {service, instance,
89
+ # uinstance, metric, period, timestamp} and different time_gen and
90
+ # value. From those rows, we want to get just the one with the highest
91
+ # time_gen. We cannot get the one with the highest value because we
92
+ # support SET operations. That means that a value of '0' can be more
93
+ # recent than '50'.
94
+ #
95
+ # The way to solve this is as follows: find out the max time_gen
96
+ # grouping the 'repeated' events, and then perform an inner-join to
97
+ # select the row with the most recent data.
98
+ #
99
+ # Note that we are only getting events with period != 'minute' and
100
+ # service = master. This is what is required for the dashboard project.
101
+ # We will need to change this when we start importing data to a
102
+ # Redshift cluster used as a source for the stats API.
103
+ def fill_table_unique_imported
104
+ "INSERT INTO #{TABLES[:unique_imported_events]} "\
105
+ 'SELECT e.service, e.cinstance, e.uinstance, e.metric, e.period, '\
106
+ 'e.timestamp, e.time_gen, e.value '\
107
+ 'FROM '\
108
+ '(SELECT service, cinstance, uinstance, metric, period, '\
109
+ 'MAX(time_gen) AS max_time_gen, timestamp '\
110
+ "FROM #{TABLES[:temp]} "\
111
+ "WHERE period != 'minute' AND service = '#{master_service}' "\
112
+ 'GROUP BY service, cinstance, uinstance, metric, period, timestamp) AS e1 '\
113
+ "INNER JOIN #{TABLES[:temp]} e "\
114
+ "ON #{join_comparisons('e', 'e1', JOIN_EVENT_ATTRS)} "\
115
+ 'AND e.time_gen = e1.max_time_gen ' \
116
+ 'GROUP BY e.service, e.cinstance, e.uinstance, e.metric, e.period, '\
117
+ 'e.timestamp, e.time_gen, e.value'.freeze
118
+ end
119
+
120
+ # Once we have imported some events and have made sure that we have
121
+ # selected only the ones that are more recent, we need to delete the
122
+ # ones that do not need to be imported. Those are the ones that have
123
+ # a time_gen older than that of the same event in the events table.
124
+ def delete_outdated_from_unique_imported
125
+ "DELETE FROM #{TABLES[:unique_imported_events]} "\
126
+ 'USING (SELECT * '\
127
+ "FROM #{TABLES[:events]} e "\
128
+ 'WHERE e.time_gen >= (SELECT MIN(time_gen) '\
129
+ "FROM #{TABLES[:unique_imported_events]})) AS e "\
130
+ "WHERE #{join_comparisons(
131
+ TABLES[:unique_imported_events], 'e', JOIN_EVENT_ATTRS)} "\
132
+ "AND (#{TABLES[:unique_imported_events]}.time_gen <= e.time_gen);".freeze
133
+ end
134
+
135
+ def import_s3_path(path, access_key_id, secret_access_key)
136
+ "COPY #{TABLES[:temp]} "\
137
+ "FROM '#{path}' "\
138
+ "CREDENTIALS '#{amazon_credentials(access_key_id,
139
+ secret_access_key)}' "\
140
+ "FORMAT AS JSON 'auto' "\
141
+ "TIMEFORMAT 'auto';"
142
+ end
143
+
144
+ def delete_nulls_from_imported
145
+ attrs_with_nulls = %w(cinstance uinstance)
146
+ attrs_with_nulls.map do |attr|
147
+ replace_nulls(TABLES[:temp], attr, '')
148
+ end.join(' ')
149
+ end
150
+
151
+ def store_timestamp_read(timestamp)
152
+ "DELETE FROM #{TABLES[:latest_s3_path_read]}; "\
153
+ "INSERT INTO #{TABLES[:latest_s3_path_read]} VALUES ('#{timestamp}');"
154
+ end
155
+
156
+ def duplicated_events
157
+ 'SELECT COUNT(*) '\
158
+ 'FROM (SELECT COUNT(*) AS count '\
159
+ "FROM #{TABLES[:events]} "\
160
+ "GROUP BY #{JOIN_EVENT_ATTRS.join(',')}) AS group_counts "\
161
+ 'WHERE group_counts.count > 1;'
162
+ end
163
+
164
+ private
165
+
166
+ def amazon_credentials(access_key_id, secret_access_key)
167
+ "aws_access_key_id=#{access_key_id};"\
168
+ "aws_secret_access_key=#{secret_access_key}"
169
+ end
170
+
171
+ def replace_nulls(table, attr, value)
172
+ "UPDATE #{table} "\
173
+ "SET #{attr} = '#{value}' "\
174
+ "WHERE #{attr} IS NULL;"
175
+ end
176
+
177
+ # Given 2 tables and an array of attributes, generates a string
178
+ # like this:
179
+ # table1.attr1 = table2.attr1 AND table1.attr2 = table2.attr2 AND ...
180
+ # This is helpful to build the WHERE clauses of certain JOINs.
181
+ def join_comparisons(table1, table2, attrs)
182
+ attrs.map do |attr|
183
+ "#{table1}.#{attr} = #{table2}.#{attr}"
184
+ end.join(' AND ') + ' '
185
+ end
186
+
187
+ def master_service
188
+ Backend.configuration.master_service_id
189
+ end
190
+
191
+ end
192
+ end
193
+
194
+ # This private class is the responsible for calculating the S3 paths
195
+ # that we have not imported to Redshift yet.
196
+ class S3EventPaths
197
+
198
+ # The events in our S3 bucket are classified in paths.
199
+ # Paths are created every hour.
200
+ DIR_CREATION_INTERVAL = 60*60
201
+ private_constant :DIR_CREATION_INTERVAL
202
+
203
+ # When we read a path we want to be sure that no more events will be stored
204
+ # For that reason, we will wait a few minutes after the hour ends just to
205
+ # be safe. For example, we will not read the path '2016/02/25/00' until
206
+ # 2016-02-25 01:00 + DIR_BACKUP_TIME_S
207
+ DIR_BACKUP_TIME_S = 60*10
208
+ private_constant :DIR_BACKUP_TIME_S
209
+
210
+ class << self
211
+
212
+ def pending_paths(latest_read)
213
+ time_now = Time.now.utc
214
+ start_time = DateTime.parse(latest_read).to_time.utc + DIR_CREATION_INTERVAL
215
+
216
+ (start_time.to_i..time_now.to_i).step(DIR_CREATION_INTERVAL).inject([]) do |res, time|
217
+ t = Time.at(time)
218
+ break res unless can_get_events?(time_now, t)
219
+ res << t.utc
220
+ end
221
+ end
222
+
223
+ private
224
+
225
+ def can_get_events?(now, time)
226
+ now - time > DIR_CREATION_INTERVAL + DIR_BACKUP_TIME_S
227
+ end
228
+
229
+ end
230
+
231
+ end
232
+ private_constant :S3EventPaths
233
+
234
+ S3_BUCKET = 'backend-events'.freeze
235
+ private_constant :S3_BUCKET
236
+
237
+ S3_EVENTS_BASE_PATH = "s3://#{S3_BUCKET}/".freeze
238
+ private_constant :S3_EVENTS_BASE_PATH
239
+
240
+ REQUIRED_TABLES = [SQL::TABLES[:events],
241
+ SQL::TABLES[:latest_s3_path_read]].freeze
242
+ private_constant :REQUIRED_TABLES
243
+
244
+ MissingRequiredTables = Class.new(ThreeScale::Backend::Error)
245
+ MissingLatestS3PathRead = Class.new(ThreeScale::Backend::Error)
246
+
247
+ class << self
248
+
249
+ def insert_pending_events(silent = false)
250
+ check_redshift_tables
251
+
252
+ pending_times_utc = S3EventPaths.pending_paths(latest_timestamp_read)
253
+ pending_times_utc.each do |pending_time_utc|
254
+ puts "Loading events generated in hour: #{pending_time_utc}" unless silent
255
+ save_in_redshift(s3_path(pending_time_utc))
256
+ save_latest_read(pending_time_utc)
257
+ end
258
+ pending_times_utc.last
259
+ end
260
+
261
+ # This method import a specific S3 path into Redshift.
262
+ # Right now, its main use case consists of uploading past events to
263
+ # a path and importing only that path.
264
+ def insert_path(path)
265
+ # Need to check that the 'events' table exists. Do not care about
266
+ # 'latest_s3_path_read' in this case.
267
+ unless existing_tables_with_schema.include?(SQL::TABLES[:events])
268
+ raise MissingRequiredTables, 'Events table is missing'
269
+ end
270
+
271
+ save_in_redshift("#{S3_EVENTS_BASE_PATH}#{path}")
272
+ end
273
+
274
+ # Returns a timestamp with format 'YYYYMMDDHH' or nil if the latest
275
+ # timestamp read does not exist in the DB.
276
+ def latest_timestamp_read
277
+ query_result = execute_command(SQL::LATEST_TIMESTAMP_READ)
278
+ return nil if query_result.ntuples == 0
279
+ query_result.first['s3_path']
280
+ end
281
+
282
+ # Returns whether the data in the DB is consistent. Right now, this
283
+ # method only checks if there are duplicated events, but it could be
284
+ # extended in the future.
285
+ def consistent_data?
286
+ execute_command(SQL::duplicated_events).first['count'].to_i.zero?
287
+ end
288
+
289
+ private
290
+
291
+ def config
292
+ Backend.configuration
293
+ end
294
+
295
+ def redshift_config
296
+ config.redshift.to_h
297
+ end
298
+
299
+ def redshift_connection
300
+ @connection ||= PGconn.new(redshift_config)
301
+ end
302
+
303
+ def execute_command(command)
304
+ redshift_connection.exec(command)
305
+ end
306
+
307
+ def check_redshift_tables
308
+ unless required_tables_exist?
309
+ raise MissingRequiredTables, 'Some of the required tables are not in Redshift.'
310
+ end
311
+
312
+ unless latest_timestamp_read_exists?
313
+ raise MissingLatestS3PathRead,
314
+ "The 'latest read' table does not contain any values"
315
+ end
316
+ end
317
+
318
+ def existing_tables
319
+ execute_command(SQL::EXISTING_TABLES).map { |row| row['table_name'] }
320
+ end
321
+
322
+ def existing_tables_with_schema
323
+ existing_tables.map { |table| "#{SQL::SCHEMA}.#{table}" }
324
+ end
325
+
326
+ def required_tables_exist?
327
+ db_tables_with_schema = existing_tables_with_schema
328
+ REQUIRED_TABLES.all? do |required_table|
329
+ db_tables_with_schema.include?(required_table)
330
+ end
331
+ end
332
+
333
+ def save_in_redshift(path)
334
+ import_s3_path(path)
335
+ [SQL.delete_nulls_from_imported,
336
+ SQL.fill_table_unique_imported,
337
+ SQL.delete_outdated_from_unique_imported,
338
+ SQL.insert_imported_events,
339
+ SQL::CLEAN_TEMP_TABLES,
340
+ SQL::VACUUM].each { |command| execute_command(command) }
341
+ end
342
+
343
+ def save_latest_read(time_utc)
344
+ execute_command(SQL.store_timestamp_read(time_utc.strftime('%Y%m%d%H')))
345
+ end
346
+
347
+ def import_s3_path(path)
348
+ execute_command(SQL::CREATE_TEMP_TABLES)
349
+ execute_command(SQL.import_s3_path(
350
+ path, config.aws_access_key_id, config.aws_secret_access_key))
351
+ end
352
+
353
+ def latest_timestamp_read_exists?
354
+ execute_command(SQL::LATEST_TIMESTAMP_READ).ntuples > 0
355
+ end
356
+
357
+ def s3_path(time_utc)
358
+ "#{S3_EVENTS_BASE_PATH}#{time_utc.strftime('%Y/%m/%d/%H')}"
359
+ end
360
+
361
+ end
362
+
363
+ end
364
+ end
365
+ end
366
+ end
367
+ end