apisonator 3.0.1.1 → 3.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,14 +4,36 @@ require 'rack'
4
4
  module ThreeScale
5
5
  module Backend
6
6
  class ListenerMetrics
7
- REQUEST_TYPES = {
7
+ AUTH_AND_REPORT_REQUEST_TYPES = {
8
8
  '/transactions/authorize.xml' => 'authorize',
9
9
  '/transactions/oauth_authorize.xml' => 'authorize_oauth',
10
10
  '/transactions/authrep.xml' => 'authrep',
11
11
  '/transactions/oauth_authrep.xml' => 'authrep_oauth',
12
12
  '/transactions.xml' => 'report'
13
13
  }
14
- private_constant :REQUEST_TYPES
14
+ private_constant :AUTH_AND_REPORT_REQUEST_TYPES
15
+
16
+ # Only the first match is taken into account, that's why for example,
17
+ # "/\/services\/.*\/stats/" needs to appear before "/\/services/"
18
+ INTERNAL_API_PATHS = [
19
+ [/\/services\/.*\/alert_limits/, 'alerts'.freeze],
20
+ [/\/services\/.*\/applications\/.*\/keys/, 'application_keys'.freeze],
21
+ [/\/services\/.*\/applications\/.*\/referrer_filters/, 'application_referrer_filters'.freeze],
22
+ [/\/services\/.*\/applications\/.*\/utilization/, 'utilization'.freeze],
23
+ [/\/services\/.*\/applications/, 'applications'.freeze],
24
+ [/\/services\/.*\/errors/, 'errors'.freeze],
25
+ [/\/events/, 'events'.freeze],
26
+ [/\/services\/.*\/metrics/, 'metrics'.freeze],
27
+ [/\/service_tokens/, 'service_tokens'.freeze],
28
+ [/\/services\/.*\/stats/, 'stats'.freeze],
29
+ [/\/services\/.*\/plans\/.*\/usagelimits/, 'usage_limits'.freeze],
30
+ [/\/services/, 'services'.freeze],
31
+ ].freeze
32
+ private_constant :INTERNAL_API_PATHS
33
+
34
+ # Most requests will be under 100ms, so use a higher granularity from there
35
+ TIME_BUCKETS = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.25, 0.5, 0.75, 1]
36
+ private_constant :TIME_BUCKETS
15
37
 
16
38
  class << self
17
39
  ERRORS_4XX_TO_TRACK = Set[403, 404, 409].freeze
@@ -27,9 +49,12 @@ module ThreeScale
27
49
  end
28
50
 
29
51
  def report_resp_code(path, resp_code)
30
- Yabeda.apisonator_listener.response_codes.increment(
52
+ req_type = req_type(path)
53
+ prometheus_group = prometheus_group(req_type)
54
+
55
+ Yabeda.send(prometheus_group).response_codes.increment(
31
56
  {
32
- request_type: REQUEST_TYPES[path],
57
+ request_type: req_type,
33
58
  resp_code: code_group(resp_code)
34
59
  },
35
60
  by: 1
@@ -37,8 +62,11 @@ module ThreeScale
37
62
  end
38
63
 
39
64
  def report_response_time(path, request_time)
40
- Yabeda.apisonator_listener.response_times.measure(
41
- { request_type: REQUEST_TYPES[path] },
65
+ req_type = req_type(path)
66
+ prometheus_group = prometheus_group(req_type)
67
+
68
+ Yabeda.send(prometheus_group).response_times.measure(
69
+ { request_type: req_type },
42
70
  request_time
43
71
  )
44
72
  end
@@ -69,8 +97,21 @@ module ThreeScale
69
97
  comment 'Response times'
70
98
  unit :seconds
71
99
  tags %i[request_type]
72
- # Most requests will be under 100ms, so use a higher granularity from there
73
- buckets [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.25, 0.5, 0.75, 1]
100
+ buckets TIME_BUCKETS
101
+ end
102
+ end
103
+
104
+ group :apisonator_listener_internal_api do
105
+ counter :response_codes do
106
+ comment 'Response codes'
107
+ tags %i[request_type resp_code]
108
+ end
109
+
110
+ histogram :response_times do
111
+ comment 'Response times'
112
+ unit :seconds
113
+ tags %i[request_type]
114
+ buckets TIME_BUCKETS
74
115
  end
75
116
  end
76
117
  end
@@ -93,6 +134,24 @@ module ThreeScale
93
134
  'unknown'.freeze
94
135
  end
95
136
  end
137
+
138
+ def req_type(path)
139
+ AUTH_AND_REPORT_REQUEST_TYPES[path] || internal_api_req_type(path)
140
+ end
141
+
142
+ def internal_api_req_type(path)
143
+ (_regex, type) = INTERNAL_API_PATHS.find { |(regex, _)| regex.match path }
144
+ type
145
+ end
146
+
147
+ # Returns the group as defined in .define_metrics
148
+ def prometheus_group(request_type)
149
+ if AUTH_AND_REPORT_REQUEST_TYPES.values.include? request_type
150
+ :apisonator_listener
151
+ else
152
+ :apisonator_listener_internal_api
153
+ end
154
+ end
96
155
  end
97
156
  end
98
157
  end
@@ -1,8 +1,4 @@
1
1
  require '3scale/backend/stats/codes_commons'
2
2
  require '3scale/backend/stats/period_commons'
3
3
  require '3scale/backend/stats/aggregator'
4
- require '3scale/backend/stats/delete_job_def'
5
- require '3scale/backend/stats/key_generator'
6
- require '3scale/backend/stats/partition_generator_job'
7
- require '3scale/backend/stats/partition_eraser_job'
8
4
  require '3scale/backend/stats/cleaner'
@@ -20,7 +20,14 @@ module ThreeScale
20
20
  key = counter_key(prefix_key, granularity.new(timestamp))
21
21
  expire_time = Stats::PeriodCommons.expire_time_for_granularity(granularity)
22
22
 
23
- store_key(cmd, key, value, expire_time)
23
+ # We don't need to store stats keys set to 0. It wastes Redis
24
+ # memory because for rate-limiting and stats, a key of set to 0
25
+ # is equivalent to a key that does not exist.
26
+ if cmd == :set && value == 0
27
+ storage.del(key)
28
+ else
29
+ store_key(cmd, key, value, expire_time)
30
+ end
24
31
 
25
32
  unless Stats::PeriodCommons::EXCLUDED_FOR_BUCKETS.include?(granularity)
26
33
  keys_for_bucket << key
@@ -45,6 +45,12 @@ module ThreeScale
45
45
  STATS_KEY_PREFIX = 'stats/'.freeze
46
46
  private_constant :STATS_KEY_PREFIX
47
47
 
48
+ REDIS_CONN_ERRORS = [Redis::BaseConnectionError, Errno::ECONNREFUSED, Errno::EPIPE].freeze
49
+ private_constant :REDIS_CONN_ERRORS
50
+
51
+ MAX_RETRIES_REDIS_ERRORS = 3
52
+ private_constant :MAX_RETRIES_REDIS_ERRORS
53
+
48
54
  class << self
49
55
  include Logging
50
56
  def mark_service_to_be_deleted(service_id)
@@ -77,37 +83,73 @@ module ThreeScale
77
83
  logger.info("Going to delete the stats keys for these services: #{services.to_a}")
78
84
 
79
85
  unless services.empty?
80
- delete_successful = true
81
- redis_conns.each do |redis_conn|
86
+ _ok, failed = redis_conns.partition do |redis_conn|
82
87
  begin
83
88
  delete_keys(redis_conn, services, log_deleted_keys)
84
- # If it's a connection error, mark as failed and continue
85
- # cleaning other shards. If it's another kind of error, it
86
- # could be a bug, so better re-raise.
87
- rescue Redis::BaseConnectionError, Errno::ECONNREFUSED, Errno::EPIPE => e
88
- logger.error("Error while deleting stats of server #{redis_conn}: #{e}")
89
- delete_successful = false
90
- rescue Redis::CommandError => e
91
- # Redis::CommandError from redis-rb can be raised for multiple
92
- # reasons, so we need to check the error message to distinguish
93
- # connection errors from the rest.
94
- if e.message == 'ERR Connection timed out'.freeze
95
- logger.error("Error while deleting stats of server #{redis_conn}: #{e}")
96
- delete_successful = false
97
- else
98
- raise e
99
- end
89
+ true
90
+ rescue => e
91
+ handle_redis_exception(e, redis_conn)
92
+ false
100
93
  end
101
94
  end
102
95
 
103
- remove_services_from_delete_set(services) if delete_successful
96
+ with_retries { remove_services_from_delete_set(services) } if failed.empty?
97
+
98
+ failed.each do |failed_conn|
99
+ logger.error("Error while deleting stats of server #{failed_conn}")
100
+ end
104
101
  end
105
102
 
106
103
  logger.info("Finished deleting the stats keys for these services: #{services.to_a}")
107
104
  end
108
105
 
106
+ # Deletes all the stats keys set to 0.
107
+ #
108
+ # Stats keys set to 0 are useless and occupy Redis memory
109
+ # unnecessarily. They were generated due to a bug in previous versions
110
+ # of Apisonator.
111
+ # Ref: https://github.com/3scale/apisonator/pull/247
112
+ #
113
+ # As the .delete function, this one also receives a collection of
114
+ # instantiated Redis clients and those need to connect to Redis
115
+ # servers directly.
116
+ #
117
+ # @param [Array] redis_conns Instantiated Redis clients.
118
+ # @param [IO] log_deleted_keys IO where to write the logs. Defaults to
119
+ # nil (logs nothing).
120
+ def delete_stats_keys_set_to_0(redis_conns, log_deleted_keys: nil)
121
+ _ok, failed = redis_conns.partition do |redis_conn|
122
+ begin
123
+ delete_stats_keys_with_val_0(redis_conn, log_deleted_keys)
124
+ true
125
+ rescue => e
126
+ handle_redis_exception(e, redis_conn)
127
+ false
128
+ end
129
+ end
130
+
131
+ failed.each do |failed_conn|
132
+ logger.error("Error while deleting stats of server #{failed_conn}")
133
+ end
134
+ end
135
+
109
136
  private
110
137
 
138
+ def handle_redis_exception(exception, redis_conn)
139
+ # If it's a connection error, do nothing so we can continue with
140
+ # other shards. If it's another kind of error, it could be caused by
141
+ # a bug, so better re-raise.
142
+
143
+ case exception
144
+ when *REDIS_CONN_ERRORS
145
+ # Do nothing.
146
+ when Redis::CommandError
147
+ raise exception if exception.message != 'ERR Connection timed out'.freeze
148
+ else
149
+ raise exception
150
+ end
151
+ end
152
+
111
153
  # Returns a set with the services included in the
112
154
  # SET_WITH_SERVICES_MARKED_FOR_DELETION Redis set.
113
155
  def services_to_delete
@@ -133,19 +175,21 @@ module ThreeScale
133
175
  cursor = 0
134
176
 
135
177
  loop do
136
- cursor, keys = redis_conn.scan(cursor, count: SCAN_SLICE)
178
+ with_retries do
179
+ cursor, keys = redis_conn.scan(cursor, count: SCAN_SLICE)
137
180
 
138
- to_delete = keys.select { |key| delete_key?(key, services) }
181
+ to_delete = keys.select { |key| delete_key?(key, services) }
139
182
 
140
- unless to_delete.empty?
141
- if log_deleted_keys
142
- values = redis_conn.mget(*(to_delete.to_a))
143
- to_delete.each_with_index do |k, i|
144
- log_deleted_keys.puts "#{k} #{values[i]}"
183
+ unless to_delete.empty?
184
+ if log_deleted_keys
185
+ values = redis_conn.mget(*(to_delete.to_a))
186
+ to_delete.each_with_index do |k, i|
187
+ log_deleted_keys.puts "#{k} #{values[i]}"
188
+ end
145
189
  end
146
- end
147
190
 
148
- redis_conn.del(to_delete)
191
+ redis_conn.del(to_delete)
192
+ end
149
193
  end
150
194
 
151
195
  break if cursor.to_i == 0
@@ -188,6 +232,43 @@ module ThreeScale
188
232
  # simply ignore those keys.
189
233
  nil
190
234
  end
235
+
236
+ def delete_stats_keys_with_val_0(redis_conn, log_deleted_keys)
237
+ cursor = 0
238
+
239
+ loop do
240
+ with_retries do
241
+ cursor, keys = redis_conn.scan(cursor, count: SCAN_SLICE)
242
+
243
+ stats_keys = keys.select { |k| is_stats_key?(k) }
244
+
245
+ unless stats_keys.empty?
246
+ values = redis_conn.mget(*stats_keys)
247
+ to_delete = stats_keys.zip(values).select { |_, v| v == '0'.freeze }.map(&:first)
248
+
249
+ unless to_delete.empty?
250
+ redis_conn.del(to_delete)
251
+ to_delete.each { |k| log_deleted_keys.puts k } if log_deleted_keys
252
+ end
253
+ end
254
+ end
255
+
256
+ break if cursor.to_i == 0
257
+
258
+ sleep(SLEEP_BETWEEN_SCANS)
259
+ end
260
+ end
261
+
262
+ def with_retries(max = MAX_RETRIES_REDIS_ERRORS)
263
+ retries = 0
264
+ begin
265
+ yield
266
+ rescue Exception => e
267
+ retries += 1
268
+ retry if retries < max
269
+ raise e
270
+ end
271
+ end
191
272
  end
192
273
  end
193
274
  end
@@ -70,6 +70,12 @@ module ThreeScale
70
70
  key
71
71
  end
72
72
 
73
+ def set_of_apps_with_traffic(service_id)
74
+ Stats::Keys.applications_key_prefix(
75
+ Stats::Keys.service_key_prefix(service_id)
76
+ )
77
+ end
78
+
73
79
  # We want all the buckets to go to the same Redis shard.
74
80
  # The reason is that SUNION support in Twemproxy requires that the
75
81
  # supplied keys hash to the same server.
@@ -12,9 +12,6 @@ module ThreeScale
12
12
  GRANULARITY_EXPIRATION_TIME = { Period[:minute] => 180 }.freeze
13
13
  private_constant :GRANULARITY_EXPIRATION_TIME
14
14
 
15
- PERMANENT_SERVICE_GRANULARITIES = (SERVICE_GRANULARITIES - GRANULARITY_EXPIRATION_TIME.keys).freeze
16
- PERMANENT_EXPANDED_GRANULARITIES = (EXPANDED_GRANULARITIES - GRANULARITY_EXPIRATION_TIME.keys).freeze
17
-
18
15
  # We are not going to send metrics with granularity 'eternity' or
19
16
  # 'week' to Kinesis, so there is no point in storing them in Redis
20
17
  # buckets.
@@ -20,8 +20,14 @@ module ThreeScale
20
20
  def report(provider_key, service_id, transactions, context_info = {})
21
21
  service = Service.load_with_provider_key!(service_id, provider_key)
22
22
 
23
- report_enqueue(service.id, transactions, context_info)
24
- notify_report(provider_key, transactions.size)
23
+ # A usage of 0 does not affect rate-limits or stats, so we do not need
24
+ # to report it.
25
+ filtered_transactions = filter_usages_with_0(transactions.clone)
26
+
27
+ return if filtered_transactions.empty?
28
+
29
+ report_enqueue(service.id, filtered_transactions, context_info)
30
+ notify_report(provider_key, filtered_transactions.size)
25
31
  end
26
32
 
27
33
  def authorize(provider_key, params, context_info = {})
@@ -137,9 +143,17 @@ module ThreeScale
137
143
 
138
144
  usage = params[:usage]
139
145
 
140
- if (usage || params[:log]) && status.authorized?
146
+ filtered_usage = filter_metrics_without_inc(usage.clone) if usage
147
+
148
+ if ((filtered_usage && !filtered_usage.empty?) || params[:log]) && status.authorized?
141
149
  application_id = status.application.id
142
- report_enqueue(status.service_id, { 0 => {"app_id" => application_id, "usage" => usage, "log" => params[:log] } }, request: { extensions: request_info[:extensions] })
150
+
151
+ report_enqueue(
152
+ status.service_id,
153
+ { 0 => {"app_id" => application_id, "usage" => filtered_usage, "log" => params[:log] } },
154
+ request: { extensions: request_info[:extensions] }
155
+ )
156
+
143
157
  notify_authrep(provider_key, usage ? 1 : 0)
144
158
  else
145
159
  notify_authorize(provider_key)
@@ -182,6 +196,19 @@ module ThreeScale
182
196
  end
183
197
  end
184
198
 
199
+ def filter_usages_with_0(transactions)
200
+ # There are plenty of existing tests using both a string and a symbol
201
+ # when accessing the usage.
202
+ transactions.delete_if do |_idx, tx|
203
+ (usage = tx['usage'.freeze] || tx[:usage]) or next
204
+ filter_metrics_without_inc(usage).empty?
205
+ end
206
+ end
207
+
208
+ def filter_metrics_without_inc(usage)
209
+ usage.delete_if { |_metric, delta| delta.to_s == '0'.freeze }
210
+ end
211
+
185
212
  def storage
186
213
  Storage.instance
187
214
  end
@@ -30,9 +30,13 @@ module ThreeScale
30
30
  end
31
31
 
32
32
  def notify(provider_key, usage)
33
- # batch several notifications together so that we can process just one
33
+ # We need the master service ID to report its metrics. If it's not
34
+ # set, we don't need to notify anything.
35
+ # Batch several notifications together so that we can process just one
34
36
  # job for a group of them.
35
- notify_batch(provider_key, usage)
37
+ unless configuration.master_service_id.to_s.empty?
38
+ notify_batch(provider_key, usage)
39
+ end
36
40
  end
37
41
 
38
42
  def notify_batch(provider_key, usage)
@@ -7,8 +7,6 @@ module ThreeScale
7
7
  extend Configurable
8
8
  @queue = :main
9
9
 
10
- InvalidMasterServiceId = Class.new(ThreeScale::Backend::Error)
11
-
12
10
  class << self
13
11
  def perform_logged(provider_key, usage, timestamp, _enqueue_time)
14
12
  application_id = Application.load_id_by_key(master_service_id, provider_key)
@@ -16,12 +14,42 @@ module ThreeScale
16
14
  if application_id && Application.exists?(master_service_id, application_id)
17
15
  master_metrics = Metric.load_all(master_service_id)
18
16
 
19
- ProcessJob.perform([{
20
- service_id: master_service_id,
21
- application_id: application_id,
22
- timestamp: timestamp,
23
- usage: master_metrics.process_usage(usage)
24
- }])
17
+ begin
18
+ ProcessJob.perform([{
19
+ service_id: master_service_id,
20
+ application_id: application_id,
21
+ timestamp: timestamp,
22
+ usage: master_metrics.process_usage(usage)
23
+ }])
24
+ rescue MetricInvalid => e
25
+ # This happens when the master account in Porta does not have
26
+ # the notify metrics defined (by default "transactions" and
27
+ # "transactions/authorize"). These metrics need to be created in
28
+ # Porta, Apisonator does not have a way to guarantee that
29
+ # they're defined.
30
+ # Notice that this rescue prevents the job from being retried.
31
+ # Apisonator can't know when the metrics will be created (if
32
+ # ever) so it's better to log the error rather than retrying
33
+ # these jobs for an undefined period of time.
34
+ Worker.logger.notify(e)
35
+ return [false, "#{e}"]
36
+ rescue TransactionTimestampNotWithinRange => e
37
+ # This is very unlikely to happen. The timestamps in a notify
38
+ # job are not set by users, they are set by the listeners. If
39
+ # this error happens it might mean that:
40
+ # a) The worker started processing this job way after the
41
+ # listener produced it. This can happen for example if we make
42
+ # some requests to a listener with no workers. The listeners
43
+ # will enqueue some notify jobs. If we start a worker hours
44
+ # later, we might see this error.
45
+ # b) There's some kind of clock skew issue.
46
+ # c) There's a bug.
47
+ #
48
+ # We can't raise here, because then, the job will be retried,
49
+ # but it's going to fail always if it has an old timestamp.
50
+ Worker.logger.notify(e)
51
+ return [false, "#{provider_key} #{application_id} #{e}"]
52
+ end
25
53
  end
26
54
  [true, "#{provider_key} #{application_id || '--'}"]
27
55
  end
@@ -29,15 +57,7 @@ module ThreeScale
29
57
  private
30
58
 
31
59
  def master_service_id
32
- value = configuration.master_service_id
33
-
34
- unless value
35
- raise InvalidMasterServiceId,
36
- "Can't find master service id. Make sure the \"master_service_id\" "\
37
- 'configuration value is set correctly'
38
- end
39
-
40
- value.to_s
60
+ configuration.master_service_id.to_s
41
61
  end
42
62
  end
43
63
  end