apisonator 3.2.1 → 3.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,32 +6,13 @@ module ThreeScale
6
6
  respond_with_404('service not found') unless Service.exists?(params[:service_id])
7
7
  end
8
8
 
9
- # This is very slow and needs to be disabled until the performance
10
- # issues are solved. In the meanwhile, the job will just return OK.
11
- =begin
12
- delete '' do |service_id|
13
- delete_stats_job_attrs = api_params Stats::DeleteJobDef
14
- delete_stats_job_attrs[:service_id] = service_id
15
- delete_stats_job_attrs[:from] = delete_stats_job_attrs[:from].to_i
16
- delete_stats_job_attrs[:to] = delete_stats_job_attrs[:to].to_i
17
- begin
18
- Stats::DeleteJobDef.new(delete_stats_job_attrs).run_async
19
- rescue DeleteServiceStatsValidationError => e
20
- [400, headers, { status: :error, error: e.message }.to_json]
21
- else
22
- { status: :to_be_deleted }.to_json
23
- end
24
- =end
25
-
26
- # This is an alternative to the above. It just adds the service to a
27
- # Redis set to marked is as "to be deleted".
28
- # Later a script can read that set and actually delete the keys.
29
- # Read the docs of the Stats::Cleaner class for more details.
9
+ # This adds the service to a Redis set to mark is as "to be deleted".
10
+ # Later a script can read that set and actually delete the keys. Read
11
+ # the docs of the Stats::Cleaner class for more details.
30
12
  #
31
- # Notice that this method ignores the "from" and "to" parameters. When
32
- # system calls this method, they're always interested in deleting all
33
- # the keys. They were just passing "from" and "to" to make the
34
- # implementation of the option above easier.
13
+ # Notice that this method ignores the "from" and "to" parameters used in
14
+ # previous versions. When system calls this method, they're always
15
+ # interested in deleting all the keys.
35
16
  delete '' do |service_id|
36
17
  Stats::Cleaner.mark_service_to_be_deleted(service_id)
37
18
  { status: :to_be_deleted }.to_json
@@ -6,11 +6,10 @@ module ThreeScale
6
6
 
7
7
  # The compacted hour in the params refers to the
8
8
  # TimeHacks.to_compact_s method.
9
- def alert_keys(service_id, app_id, discrete_utilization, compacted_hour_start)
9
+ def alert_keys(service_id, app_id, discrete_utilization)
10
10
  {
11
11
  already_notified: key_already_notified(service_id, app_id, discrete_utilization),
12
12
  allowed: key_allowed_set(service_id),
13
- current_max: key_current_max(service_id, app_id, compacted_hour_start),
14
13
  current_id: key_current_id
15
14
  }
16
15
  end
@@ -31,11 +30,6 @@ module ThreeScale
31
30
  "#{prefix}allowed_set"
32
31
  end
33
32
 
34
- def key_current_max(service_id, app_id, compacted_hour_start)
35
- prefix = key_prefix(service_id, app_id)
36
- "#{prefix}#{compacted_hour_start}/current_max"
37
- end
38
-
39
33
  def key_current_id
40
34
  'alerts/current_id'.freeze
41
35
  end
@@ -43,6 +37,7 @@ module ThreeScale
43
37
 
44
38
  extend self
45
39
  extend KeyHelpers
40
+ include Memoizer::Decorator
46
41
 
47
42
  ALERT_TTL = 24*3600 # 1 day (only one message per day)
48
43
  ## zero must be here and sorted, yes or yes
@@ -50,6 +45,16 @@ module ThreeScale
50
45
  FIRST_ALERT_BIN = ALERT_BINS.first
51
46
  RALERT_BINS = ALERT_BINS.reverse.freeze
52
47
 
48
+ def can_raise_more_alerts?(service_id, app_id)
49
+ allowed_bins = allowed_set_for_service(service_id).sort
50
+
51
+ return false if allowed_bins.empty?
52
+
53
+ # If the bin with the highest value has already been notified, there's
54
+ # no need to notify anything else.
55
+ not notified?(service_id, app_id, allowed_bins.last)
56
+ end
57
+
53
58
  def utilization(app_usage_reports)
54
59
  max_utilization = -1.0
55
60
  max_record = nil
@@ -77,25 +82,12 @@ module ThreeScale
77
82
 
78
83
  def update_utilization(service_id, app_id, max_utilization, max_record, timestamp)
79
84
  discrete = utilization_discrete(max_utilization)
80
- max_utilization_i = (max_utilization * 100.0).round
81
85
 
82
- beginning_of_day = Period::Boundary.day_start(timestamp)
83
- period_hour = Period::Boundary.hour_start(timestamp).to_compact_s
84
- # UNIX timestamp for key expiration - add 1 day + 5 mins
85
- expire_at = (beginning_of_day + 86700).to_i
86
+ keys = alert_keys(service_id, app_id, discrete)
86
87
 
87
- keys = alert_keys(service_id, app_id, discrete, period_hour)
88
-
89
- already_alerted, allowed, current_max, _ = storage.pipelined do
88
+ already_alerted, allowed = storage.pipelined do
90
89
  storage.get(keys[:already_notified])
91
90
  storage.sismember(keys[:allowed], discrete)
92
- storage.get(keys[:current_max])
93
- storage.expireat(keys[:current_max], expire_at)
94
- end
95
-
96
- ## update the status of utilization
97
- if max_utilization_i > current_max.to_i
98
- storage.set(keys[:current_max], max_utilization_i)
99
91
  end
100
92
 
101
93
  if already_alerted.nil? && allowed && discrete.to_i > 0
@@ -129,6 +121,16 @@ module ThreeScale
129
121
  "#{record.current_value}/#{record.max_value}"
130
122
  end
131
123
 
124
+ def allowed_set_for_service(service_id)
125
+ storage.smembers(key_allowed_set(service_id)).map(&:to_i) # Redis returns strings always
126
+ end
127
+ memoize :allowed_set_for_service
128
+
129
+ def notified?(service_id, app_id, bin)
130
+ storage.get(key_already_notified(service_id, app_id, bin))
131
+ end
132
+ memoize :notified?
133
+
132
134
  def storage
133
135
  Storage.instance
134
136
  end
@@ -40,10 +40,8 @@ module ThreeScale
40
40
  private
41
41
 
42
42
  def self.first_traffic(service_id, application_id)
43
- key = Stats::Keys.applications_key_prefix(
44
- Stats::Keys.service_key_prefix(service_id)
45
- )
46
- if storage.sadd(key, encode_key(application_id))
43
+ if storage.sadd(Stats::Keys.set_of_apps_with_traffic(service_id),
44
+ encode_key(application_id))
47
45
  EventStorage.store(:first_traffic,
48
46
  { service_id: service_id,
49
47
  application_id: application_id,
@@ -32,8 +32,6 @@ module ThreeScale
32
32
 
33
33
  CONFIG_DELETE_STATS_BATCH_SIZE = 50
34
34
  private_constant :CONFIG_DELETE_STATS_BATCH_SIZE
35
- CONFIG_DELETE_STATS_PARTITION_BATCH_SIZE = 1000
36
- private_constant :CONFIG_DELETE_STATS_PARTITION_BATCH_SIZE
37
35
 
38
36
  @configuration = Configuration::Loader.new
39
37
 
@@ -54,13 +52,12 @@ module ThreeScale
54
52
  config.add_section(:analytics_redis, :server,
55
53
  :connect_timeout, :read_timeout, :write_timeout)
56
54
  config.add_section(:hoptoad, :service, :api_key)
57
- config.add_section(:stats, :bucket_size, :delete_batch_size, :delete_partition_batch_size)
55
+ config.add_section(:stats, :bucket_size, :delete_batch_size)
58
56
  config.add_section(:redshift, :host, :port, :dbname, :user, :password)
59
57
  config.add_section(:statsd, :host, :port)
60
58
  config.add_section(:internal_api, :user, :password)
61
59
  config.add_section(:master, :metrics)
62
60
  config.add_section(:worker_prometheus_metrics, :enabled, :port)
63
- config.add_section(:listener_prometheus_metrics, :enabled, :port)
64
61
 
65
62
  config.add_section(
66
63
  :async_worker,
@@ -125,9 +122,6 @@ module ThreeScale
125
122
  config.stats.delete_batch_size = parse_int(config.stats.delete_batch_size,
126
123
  CONFIG_DELETE_STATS_BATCH_SIZE)
127
124
 
128
- config.stats.delete_partition_batch_size = parse_int(config.stats.delete_partition_batch_size,
129
- CONFIG_DELETE_STATS_PARTITION_BATCH_SIZE)
130
-
131
125
  # often we don't have a log_file setting - generate it here from
132
126
  # the log_path setting.
133
127
  log_file = config.log_file
@@ -292,12 +292,6 @@ module ThreeScale
292
292
  end
293
293
  end
294
294
 
295
- class DeleteServiceStatsValidationError < Error
296
- def initialize(service_id, msg)
297
- super "Delete stats job context validation error. Service: #{service_id}. Error: #{msg}"
298
- end
299
- end
300
-
301
295
  class EndUsersNoLongerSupported < BadRequest
302
296
  def initialize
303
297
  super 'End-users are no longer supported, do not specify the user_id parameter'.freeze
@@ -32,25 +32,6 @@ module ThreeScale
32
32
  DEFAULT_WAIT_BEFORE_FETCHING_MORE_JOBS
33
33
  end
34
34
 
35
- def pop_from_queue
36
- begin
37
- encoded_job = @redis.blpop(*@queues, timeout: @fetch_timeout)
38
- rescue Redis::BaseConnectionError, Errno::ECONNREFUSED, Errno::EPIPE => e
39
- raise RedisConnectionError.new(e.message)
40
- rescue Redis::CommandError => e
41
- # Redis::CommandError from redis-rb can be raised for multiple
42
- # reasons, so we need to check the error message to distinguish
43
- # connection errors from the rest.
44
- if e.message == 'ERR Connection timed out'.freeze
45
- raise RedisConnectionError.new(e.message)
46
- else
47
- raise e
48
- end
49
- end
50
-
51
- encoded_job
52
- end
53
-
54
35
  def fetch
55
36
  encoded_job = pop_from_queue
56
37
  return nil if encoded_job.nil? || encoded_job.empty?
@@ -99,10 +80,11 @@ module ThreeScale
99
80
 
100
81
  # Re-instantiate Redis instance. This is needed to recover from
101
82
  # Errno::EPIPE, not sure if there are others.
102
- @redis = ThreeScale::Backend::QueueStorage.connection(
103
- ThreeScale::Backend.environment,
104
- ThreeScale::Backend.configuration
83
+ @redis = Redis::Namespace.new(
84
+ WorkerAsync.const_get(:RESQUE_REDIS_NAMESPACE),
85
+ redis: QueueStorage.connection(Backend.environment, Backend.configuration)
105
86
  )
87
+
106
88
  # If there is a different kind of error, it's probably a
107
89
  # programming error. Like sending an invalid blpop command to
108
90
  # Redis. In that case, let the worker crash.
@@ -111,12 +93,36 @@ module ThreeScale
111
93
  end
112
94
  end
113
95
 
96
+ rescue Exception => e
97
+ Worker.logger.notify(e)
98
+ ensure
114
99
  job_queue.close
115
100
  end
116
101
 
117
102
  def shutdown
118
103
  @shutdown = true
119
104
  end
105
+
106
+ private
107
+
108
+ def pop_from_queue
109
+ begin
110
+ encoded_job = @redis.blpop(*@queues, timeout: @fetch_timeout)
111
+ rescue Redis::BaseConnectionError, Errno::ECONNREFUSED, Errno::EPIPE => e
112
+ raise RedisConnectionError.new(e.message)
113
+ rescue Redis::CommandError => e
114
+ # Redis::CommandError from redis-rb can be raised for multiple
115
+ # reasons, so we need to check the error message to distinguish
116
+ # connection errors from the rest.
117
+ if e.message == 'ERR Connection timed out'.freeze
118
+ raise RedisConnectionError.new(e.message)
119
+ else
120
+ raise e
121
+ end
122
+ end
123
+
124
+ encoded_job
125
+ end
120
126
  end
121
127
  end
122
128
  end
@@ -17,7 +17,10 @@ module ThreeScale
17
17
 
18
18
  Backend::Logging::External.setup_rack self
19
19
 
20
- if Backend.configuration.listener_prometheus_metrics.enabled
20
+ # Notice that this cannot be specified via config, it needs to be an
21
+ # ENV because the metric server is started in Puma/Falcon
22
+ # "before_fork" and the configuration is not loaded at that point.
23
+ if ENV['CONFIG_LISTENER_PROMETHEUS_METRICS_ENABLED'].to_s.downcase.freeze == 'true'.freeze
21
24
  use Rack::Prometheus
22
25
  end
23
26
 
@@ -1,8 +1,4 @@
1
1
  require '3scale/backend/stats/codes_commons'
2
2
  require '3scale/backend/stats/period_commons'
3
3
  require '3scale/backend/stats/aggregator'
4
- require '3scale/backend/stats/delete_job_def'
5
- require '3scale/backend/stats/key_generator'
6
- require '3scale/backend/stats/partition_generator_job'
7
- require '3scale/backend/stats/partition_eraser_job'
8
4
  require '3scale/backend/stats/cleaner'
@@ -145,6 +145,16 @@ module ThreeScale
145
145
  application = Backend::Application.load(service_id,
146
146
  values[:application_id])
147
147
 
148
+ # The app could have been deleted at some point since the job was
149
+ # enqueued. No need to update alerts in that case.
150
+ next unless application
151
+
152
+ # The operations below are costly. They load all the usage limits
153
+ # and current usages to find the current utilization levels.
154
+ # That's why before that, we check if there are any alerts that
155
+ # can be raised.
156
+ next unless Alerts.can_raise_more_alerts?(service_id, values[:application_id])
157
+
148
158
  application.load_metric_names
149
159
  usage = Usage.application_usage(application, current_timestamp)
150
160
  status = Transactor::Status.new(service_id: service_id,
@@ -20,7 +20,14 @@ module ThreeScale
20
20
  key = counter_key(prefix_key, granularity.new(timestamp))
21
21
  expire_time = Stats::PeriodCommons.expire_time_for_granularity(granularity)
22
22
 
23
- store_key(cmd, key, value, expire_time)
23
+ # We don't need to store stats keys set to 0. It wastes Redis
24
+ # memory because for rate-limiting and stats, a key of set to 0
25
+ # is equivalent to a key that does not exist.
26
+ if cmd == :set && value == 0
27
+ storage.del(key)
28
+ else
29
+ store_key(cmd, key, value, expire_time)
30
+ end
24
31
 
25
32
  unless Stats::PeriodCommons::EXCLUDED_FOR_BUCKETS.include?(granularity)
26
33
  keys_for_bucket << key
@@ -45,6 +45,12 @@ module ThreeScale
45
45
  STATS_KEY_PREFIX = 'stats/'.freeze
46
46
  private_constant :STATS_KEY_PREFIX
47
47
 
48
+ REDIS_CONN_ERRORS = [Redis::BaseConnectionError, Errno::ECONNREFUSED, Errno::EPIPE].freeze
49
+ private_constant :REDIS_CONN_ERRORS
50
+
51
+ MAX_RETRIES_REDIS_ERRORS = 3
52
+ private_constant :MAX_RETRIES_REDIS_ERRORS
53
+
48
54
  class << self
49
55
  include Logging
50
56
  def mark_service_to_be_deleted(service_id)
@@ -77,37 +83,73 @@ module ThreeScale
77
83
  logger.info("Going to delete the stats keys for these services: #{services.to_a}")
78
84
 
79
85
  unless services.empty?
80
- delete_successful = true
81
- redis_conns.each do |redis_conn|
86
+ _ok, failed = redis_conns.partition do |redis_conn|
82
87
  begin
83
88
  delete_keys(redis_conn, services, log_deleted_keys)
84
- # If it's a connection error, mark as failed and continue
85
- # cleaning other shards. If it's another kind of error, it
86
- # could be a bug, so better re-raise.
87
- rescue Redis::BaseConnectionError, Errno::ECONNREFUSED, Errno::EPIPE => e
88
- logger.error("Error while deleting stats of server #{redis_conn}: #{e}")
89
- delete_successful = false
90
- rescue Redis::CommandError => e
91
- # Redis::CommandError from redis-rb can be raised for multiple
92
- # reasons, so we need to check the error message to distinguish
93
- # connection errors from the rest.
94
- if e.message == 'ERR Connection timed out'.freeze
95
- logger.error("Error while deleting stats of server #{redis_conn}: #{e}")
96
- delete_successful = false
97
- else
98
- raise e
99
- end
89
+ true
90
+ rescue => e
91
+ handle_redis_exception(e, redis_conn)
92
+ false
100
93
  end
101
94
  end
102
95
 
103
- remove_services_from_delete_set(services) if delete_successful
96
+ with_retries { remove_services_from_delete_set(services) } if failed.empty?
97
+
98
+ failed.each do |failed_conn|
99
+ logger.error("Error while deleting stats of server #{failed_conn}")
100
+ end
104
101
  end
105
102
 
106
103
  logger.info("Finished deleting the stats keys for these services: #{services.to_a}")
107
104
  end
108
105
 
106
+ # Deletes all the stats keys set to 0.
107
+ #
108
+ # Stats keys set to 0 are useless and occupy Redis memory
109
+ # unnecessarily. They were generated due to a bug in previous versions
110
+ # of Apisonator.
111
+ # Ref: https://github.com/3scale/apisonator/pull/247
112
+ #
113
+ # As the .delete function, this one also receives a collection of
114
+ # instantiated Redis clients and those need to connect to Redis
115
+ # servers directly.
116
+ #
117
+ # @param [Array] redis_conns Instantiated Redis clients.
118
+ # @param [IO] log_deleted_keys IO where to write the logs. Defaults to
119
+ # nil (logs nothing).
120
+ def delete_stats_keys_set_to_0(redis_conns, log_deleted_keys: nil)
121
+ _ok, failed = redis_conns.partition do |redis_conn|
122
+ begin
123
+ delete_stats_keys_with_val_0(redis_conn, log_deleted_keys)
124
+ true
125
+ rescue => e
126
+ handle_redis_exception(e, redis_conn)
127
+ false
128
+ end
129
+ end
130
+
131
+ failed.each do |failed_conn|
132
+ logger.error("Error while deleting stats of server #{failed_conn}")
133
+ end
134
+ end
135
+
109
136
  private
110
137
 
138
+ def handle_redis_exception(exception, redis_conn)
139
+ # If it's a connection error, do nothing so we can continue with
140
+ # other shards. If it's another kind of error, it could be caused by
141
+ # a bug, so better re-raise.
142
+
143
+ case exception
144
+ when *REDIS_CONN_ERRORS
145
+ # Do nothing.
146
+ when Redis::CommandError
147
+ raise exception if exception.message != 'ERR Connection timed out'.freeze
148
+ else
149
+ raise exception
150
+ end
151
+ end
152
+
111
153
  # Returns a set with the services included in the
112
154
  # SET_WITH_SERVICES_MARKED_FOR_DELETION Redis set.
113
155
  def services_to_delete
@@ -133,19 +175,21 @@ module ThreeScale
133
175
  cursor = 0
134
176
 
135
177
  loop do
136
- cursor, keys = redis_conn.scan(cursor, count: SCAN_SLICE)
178
+ with_retries do
179
+ cursor, keys = redis_conn.scan(cursor, count: SCAN_SLICE)
137
180
 
138
- to_delete = keys.select { |key| delete_key?(key, services) }
181
+ to_delete = keys.select { |key| delete_key?(key, services) }
139
182
 
140
- unless to_delete.empty?
141
- if log_deleted_keys
142
- values = redis_conn.mget(*(to_delete.to_a))
143
- to_delete.each_with_index do |k, i|
144
- log_deleted_keys.puts "#{k} #{values[i]}"
183
+ unless to_delete.empty?
184
+ if log_deleted_keys
185
+ values = redis_conn.mget(*(to_delete.to_a))
186
+ to_delete.each_with_index do |k, i|
187
+ log_deleted_keys.puts "#{k} #{values[i]}"
188
+ end
145
189
  end
146
- end
147
190
 
148
- redis_conn.del(to_delete)
191
+ redis_conn.del(to_delete)
192
+ end
149
193
  end
150
194
 
151
195
  break if cursor.to_i == 0
@@ -188,6 +232,43 @@ module ThreeScale
188
232
  # simply ignore those keys.
189
233
  nil
190
234
  end
235
+
236
+ def delete_stats_keys_with_val_0(redis_conn, log_deleted_keys)
237
+ cursor = 0
238
+
239
+ loop do
240
+ with_retries do
241
+ cursor, keys = redis_conn.scan(cursor, count: SCAN_SLICE)
242
+
243
+ stats_keys = keys.select { |k| is_stats_key?(k) }
244
+
245
+ unless stats_keys.empty?
246
+ values = redis_conn.mget(*stats_keys)
247
+ to_delete = stats_keys.zip(values).select { |_, v| v == '0'.freeze }.map(&:first)
248
+
249
+ unless to_delete.empty?
250
+ redis_conn.del(to_delete)
251
+ to_delete.each { |k| log_deleted_keys.puts k } if log_deleted_keys
252
+ end
253
+ end
254
+ end
255
+
256
+ break if cursor.to_i == 0
257
+
258
+ sleep(SLEEP_BETWEEN_SCANS)
259
+ end
260
+ end
261
+
262
+ def with_retries(max = MAX_RETRIES_REDIS_ERRORS)
263
+ retries = 0
264
+ begin
265
+ yield
266
+ rescue Exception => e
267
+ retries += 1
268
+ retry if retries < max
269
+ raise e
270
+ end
271
+ end
191
272
  end
192
273
  end
193
274
  end