inst-jobs-statsd 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/lib/inst-jobs-statsd.rb +26 -0
  3. data/lib/inst_jobs_statsd/default_tracking.rb +15 -0
  4. data/lib/inst_jobs_statsd/jobs_tracker.rb +25 -0
  5. data/lib/inst_jobs_statsd/naming.rb +49 -0
  6. data/lib/inst_jobs_statsd/stats/counters/failed.rb +28 -0
  7. data/lib/inst_jobs_statsd/stats/counters/orphaned.rb +34 -0
  8. data/lib/inst_jobs_statsd/stats/counters/run.rb +21 -0
  9. data/lib/inst_jobs_statsd/stats/counters.rb +10 -0
  10. data/lib/inst_jobs_statsd/stats/periodic/failed.rb +21 -0
  11. data/lib/inst_jobs_statsd/stats/periodic/queue.rb +49 -0
  12. data/lib/inst_jobs_statsd/stats/periodic/run.rb +38 -0
  13. data/lib/inst_jobs_statsd/stats/periodic.rb +91 -0
  14. data/lib/inst_jobs_statsd/stats/timing/failed.rb +17 -0
  15. data/lib/inst_jobs_statsd/stats/timing/perform.rb +29 -0
  16. data/lib/inst_jobs_statsd/stats/timing/pop.rb +28 -0
  17. data/lib/inst_jobs_statsd/stats/timing.rb +27 -0
  18. data/lib/inst_jobs_statsd/version.rb +3 -0
  19. data/spec/factories/jobs.rb +9 -0
  20. data/spec/factories/workers.rb +9 -0
  21. data/spec/gemfiles/42.gemfile +7 -0
  22. data/spec/gemfiles/42.gemfile.lock +201 -0
  23. data/spec/gemfiles/50.gemfile +7 -0
  24. data/spec/gemfiles/50.gemfile.lock +224 -0
  25. data/spec/gemfiles/51.gemfile +7 -0
  26. data/spec/gemfiles/51.gemfile.lock +224 -0
  27. data/spec/inst_jobs_statsd/jobs_tracker_spec.rb +30 -0
  28. data/spec/inst_jobs_statsd/naming_spec.rb +26 -0
  29. data/spec/inst_jobs_statsd/stats/counters/failed_spec.rb +20 -0
  30. data/spec/inst_jobs_statsd/stats/counters/orphaned_spec.rb +27 -0
  31. data/spec/inst_jobs_statsd/stats/counters/run_spec.rb +27 -0
  32. data/spec/inst_jobs_statsd/stats/periodic/failed_spec.rb +31 -0
  33. data/spec/inst_jobs_statsd/stats/periodic/queue_spec.rb +95 -0
  34. data/spec/inst_jobs_statsd/stats/periodic/run_spec.rb +53 -0
  35. data/spec/inst_jobs_statsd/stats/periodic_spec.rb +63 -0
  36. data/spec/inst_jobs_statsd/stats/timing/failed_spec.rb +25 -0
  37. data/spec/inst_jobs_statsd/stats/timing/perform_spec.rb +35 -0
  38. data/spec/inst_jobs_statsd/stats/timing/pop_spec.rb +34 -0
  39. data/spec/inst_jobs_statsd/stats/timing_spec.rb +35 -0
  40. data/spec/inst_statsd/default_tracking_spec.rb +16 -0
  41. data/spec/matchers.rb +3 -0
  42. data/spec/setup_test_db.rb +41 -0
  43. data/spec/spec_helper.rb +63 -0
  44. metadata +327 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ea3cc2f583c0d355a0ab7440affd123343fa8de1
4
+ data.tar.gz: 7fca023059959468544941e582d9d805caf2f3e1
5
+ SHA512:
6
+ metadata.gz: 116fada163459dab67b7d70a00d0a8191438636bffe7fa5c95e25b06ad45a1ac8a60e28f628b50333821f109cdca3e182d75a1eb145af54b9992b48d1f067a46
7
+ data.tar.gz: fe822cd09c1421b0d694b2ea8255e34f47e98b402275e9ba4c4b262f1b953127a00f122b58376299971c02a2b1fec9305529927db5ad49a817df55ca70749ef7
@@ -0,0 +1,26 @@
1
+ require 'inst-jobs'
2
+ require 'inst_statsd'
3
+
4
+ require_relative 'inst_jobs_statsd/version'
5
+
6
+ require_relative 'inst_jobs_statsd/default_tracking'
7
+ require_relative 'inst_jobs_statsd/jobs_tracker'
8
+
9
+ require_relative 'inst_jobs_statsd/naming'
10
+
11
+ require_relative 'inst_jobs_statsd/stats/counters'
12
+ require_relative 'inst_jobs_statsd/stats/counters/failed'
13
+ require_relative 'inst_jobs_statsd/stats/counters/orphaned'
14
+ require_relative 'inst_jobs_statsd/stats/counters/run'
15
+
16
+ require_relative 'inst_jobs_statsd/stats/periodic'
17
+ require_relative 'inst_jobs_statsd/stats/periodic/failed'
18
+ require_relative 'inst_jobs_statsd/stats/periodic/queue'
19
+ require_relative 'inst_jobs_statsd/stats/periodic/run'
20
+
21
+ require_relative 'inst_jobs_statsd/stats/timing'
22
+ require_relative 'inst_jobs_statsd/stats/timing/failed'
23
+ require_relative 'inst_jobs_statsd/stats/timing/perform'
24
+ require_relative 'inst_jobs_statsd/stats/timing/pop'
25
+
26
+ ::InstStatsd::DefaultTracking.include InstJobsStatsd::DefaultTracking
@@ -0,0 +1,15 @@
1
+ # Defines InstStatsd::DefaultTracking.track_jobs
2
+ # to be consistent with InstStatsd::DefaultTracking.track_sql etc
3
+ module InstJobsStatsd
4
+ module DefaultTracking
5
+ def self.included(base)
6
+ base.extend(ClassMethods)
7
+ end
8
+
9
+ module ClassMethods
10
+ def track_jobs
11
+ @jobs_tracker ||= JobsTracker.new
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,25 @@
1
+ module InstJobsStatsd
2
+ class JobsTracker
3
+ def self.track
4
+ @current_tracking = new
5
+ yield
6
+ tracking = @current_tracking
7
+ @current_tracking = nil
8
+ tracking
9
+ end
10
+
11
+ def initialize
12
+ Stats::Counters::Failed.enable
13
+ Stats::Counters::Orphaned.enable
14
+ Stats::Counters::Run.enable
15
+
16
+ Stats::Periodic::Failed.enable
17
+ Stats::Periodic::Queue.enable
18
+ Stats::Periodic::Run.enable
19
+
20
+ Stats::Timing::Failed.enable
21
+ Stats::Timing::Perform.enable
22
+ Stats::Timing::Pop.enable
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,49 @@
1
+ module InstJobsStatsd
2
+ module Naming
3
+ BASENAME = 'delayedjob'.freeze
4
+
5
+ # The root prefix for all stat names
6
+ # TODO: Make this configurable
7
+ def self.basename
8
+ BASENAME
9
+ end
10
+
11
+ def self.qualified_names(stat_name, job)
12
+ names = ["#{basename}.#{stat_name}"]
13
+ tagged = tagged_stat(names[0], job)
14
+ names << tagged if tagged.present?
15
+ names
16
+ end
17
+
18
+ # Given a stat name, add a suffix to it to make it
19
+ # unique per job type -- using the job's class name
20
+ # and method name as appropriate
21
+ def self.tagged_stat(stat_name, job)
22
+ return unless job
23
+
24
+ obj_tag, method_tag = job_tags(job)
25
+ return if obj_tag.blank?
26
+
27
+ tagged = "#{stat_name}.tag.#{obj_tag}"
28
+ tagged += ".#{method_tag}" if method_tag.present?
29
+ tagged
30
+ end
31
+
32
+ # this converts Foo#bar" or "Foo.bar" into "Foo and "bar",
33
+ # and makes sure the values are valid to be used for statsd names
34
+ def self.job_tags(job)
35
+ return unless job
36
+ return unless job.tag
37
+ return if job.tag =~ /Class:0x/
38
+
39
+ obj_tag, method_tag = job.tag.split(/[\.#]/, 2).map do |v|
40
+ InstStatsd::Statsd.escape(v).gsub('::', '-')
41
+ end
42
+
43
+ tags = [obj_tag]
44
+ tags << method_tag if method_tag.present?
45
+
46
+ tags
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,28 @@
1
+ module InstJobsStatsd
2
+ module Stats
3
+ module Counters
4
+ module Failed
5
+ def self.enable
6
+ enable_failed_count
7
+ end
8
+
9
+ def self.enable_failed_count
10
+ return if Delayed::Job::Failed < AfterCreateHook
11
+ Delayed::Job::Failed.include AfterCreateHook
12
+ end
13
+
14
+ module AfterCreateHook
15
+ def self.included(base)
16
+ base.after_create do
17
+ InstJobsStatsd::Stats::Counters::Failed.report_failed_count(self)
18
+ end
19
+ end
20
+ end
21
+
22
+ def self.report_failed_count(job)
23
+ Counters.report_count(:failed, 1, job: job)
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,34 @@
1
+ module InstJobsStatsd
2
+ module Stats
3
+ module Counters
4
+ module Orphaned
5
+ def self.enable
6
+ enable_orphaned_count
7
+ end
8
+
9
+ # The idea of the orphaned count: when a job finishes, if there
10
+ # are other jobs locked_by the *same* worker, they must have been
11
+ # orphaned, because they are not going to be picked up and run by
12
+ # the worker -- the work queue is designed to only have one job
13
+ # locked_by a worker at a time.
14
+ # This is based on the symptom seen in AMS-447, where mutliple
15
+ # rows of the jobs table can be (incorrectly) updated by the same
16
+ # query.
17
+ def self.enable_orphaned_count
18
+ Delayed::Worker.lifecycle.before(:perform) do |_worker, job|
19
+ report_orphaned_count(job)
20
+ end
21
+ end
22
+
23
+ def self.report_orphaned_count(job)
24
+ scope = Delayed::Job.where(
25
+ 'locked_by = ? AND locked_at = ? AND id <> ?',
26
+ job.locked_by, job.locked_at, job.id
27
+ )
28
+ count = scope.count
29
+ Counters.report_count(:orphaned, count, job: job) unless count.zero?
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,21 @@
1
+ module InstJobsStatsd
2
+ module Stats
3
+ module Counters
4
+ module Run
5
+ def self.enable
6
+ enable_run_count
7
+ end
8
+
9
+ def self.enable_run_count
10
+ Delayed::Worker.lifecycle.before(:perform) do |_worker, job|
11
+ report_run_count(job)
12
+ end
13
+ end
14
+
15
+ def self.report_run_count(job)
16
+ Counters.report_count(:run, 1, job: job)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,10 @@
1
+ module InstJobsStatsd
2
+ module Stats
3
+ module Counters
4
+ def self.report_count(stat, value, job: nil, sample_rate: 1)
5
+ stats = Naming.qualified_names(stat, job)
6
+ InstStatsd::Statsd.count(stats, value, sample_rate)
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,21 @@
1
+ module InstJobsStatsd
2
+ module Stats
3
+ module Periodic
4
+ module Failed
5
+ def self.enable
6
+ enable_failed_depth
7
+ end
8
+
9
+ def self.enable_failed_depth
10
+ Periodic.enable_callbacks
11
+ Periodic.add(-> { report_failed_depth })
12
+ end
13
+
14
+ def self.report_failed_depth
15
+ count = Delayed::Job::Failed.count
16
+ Periodic.report_gauge(:failed_depth, count)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,49 @@
1
+ module InstJobsStatsd
2
+ module Stats
3
+ module Periodic
4
+ module Queue
5
+ def self.enable
6
+ enable_queue_depth
7
+ enable_queue_age
8
+ end
9
+
10
+ def self.enable_queue_depth
11
+ Periodic.enable_callbacks
12
+ Periodic.add(-> { report_queue_depth })
13
+ end
14
+
15
+ def self.enable_queue_age
16
+ Periodic.enable_callbacks
17
+ Periodic.add(-> { report_queue_age })
18
+ end
19
+
20
+ def self.report_queue_depth
21
+ # count = Delayed::Job.jobs_count(:current) <-- includes running / locked
22
+ scope = queued_jobs_scope
23
+ count = scope.count
24
+ Periodic.report_gauge(:queue_depth, count)
25
+ end
26
+
27
+ # Limit the jobs included in this gauge to prevent blowing up
28
+ # memory usage in iterating the list.
29
+ # This has the adverse effect of artificially capping this
30
+ # metric, but the limit should be high enough so that the
31
+ # the metric still has a meaningful range -- and even if
32
+ # the count is capped, the metric will continue to grow
33
+ # if the queue is actually stalled
34
+ def self.report_queue_age
35
+ jobs_run_at = queued_jobs_scope.limit(10_000).pluck(:run_at)
36
+ age_secs = jobs_run_at.map { |t| Delayed::Job.db_time_now - t }
37
+ Periodic.report_gauge(:queue_age_total, age_secs.sum)
38
+ Periodic.report_gauge(:queue_age_max, age_secs.max)
39
+ end
40
+
41
+ def self.queued_jobs_scope
42
+ Delayed::Job
43
+ .current
44
+ .where("locked_at IS NULL OR locked_by = 'on_hold'") # not running
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,38 @@
1
+ module InstJobsStatsd
2
+ module Stats
3
+ module Periodic
4
+ module Run
5
+ def self.enable
6
+ enable_run_depth
7
+ enable_run_age
8
+ end
9
+
10
+ def self.enable_run_depth
11
+ Periodic.enable_callbacks
12
+ Periodic.add(-> { report_run_depth })
13
+ end
14
+
15
+ def self.enable_run_age
16
+ Periodic.enable_callbacks
17
+ Periodic.add(-> { report_run_age })
18
+ end
19
+
20
+ def self.report_run_depth
21
+ scope = running_jobs_scope
22
+ Periodic.report_gauge(:run_depth, scope.count)
23
+ end
24
+
25
+ def self.report_run_age
26
+ jobs_run_at = running_jobs_scope.limit(10_000).pluck(:run_at)
27
+ age_secs = jobs_run_at.map { |t| Delayed::Job.db_time_now - t }
28
+ Periodic.report_gauge(:run_age_total, age_secs.sum)
29
+ Periodic.report_gauge(:run_age_max, age_secs.max)
30
+ end
31
+
32
+ def self.running_jobs_scope
33
+ Delayed::Job.running
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,91 @@
1
+ module InstJobsStatsd
2
+ module Stats
3
+ module Periodic
4
+ def self.enable_callbacks
5
+ @instance ||= Callbacks.new
6
+ end
7
+
8
+ def self.add(proc)
9
+ return unless @instance
10
+ @instance.add(proc)
11
+ end
12
+
13
+ def self.report_gauge(stat, value, job: nil, sample_rate: 1)
14
+ stats = Naming.qualified_names(stat, job)
15
+ InstStatsd::Statsd.gauge(stats, value, sample_rate)
16
+ end
17
+
18
+ class Callbacks
19
+ def initialize(min_interval = 60)
20
+ @timer = Timer.new(min_interval)
21
+ @procs = []
22
+ register_lifecycle
23
+ end
24
+
25
+ def add(proc)
26
+ @procs << proc if proc
27
+ end
28
+
29
+ # This hooks into the lifecycle events such that it will
30
+ # get triggered periodically which reasonable certainty
31
+ # -- as long as the rest of the inst-jobs processing
32
+ # system is working, any way.
33
+ # This allows for stats to be reported periodically
34
+ # without having to start a separate sideband process.
35
+ # It means that reporting of those stats will run
36
+ # inline (in the same process and thread) as the
37
+ # regular job processing work, but it also means
38
+ # no need for an additional database connection, and
39
+ # no managing of additional threads or processes.
40
+ #
41
+ # The :work_queue_pop event is used becaused in production
42
+ # mode, the 'parent_process' work queue is typically
43
+ # going to be used, and this callback runs in the parent
44
+ # process -- as opposed to having this callback run in
45
+ # each of the subordinate worker processes, which would
46
+ # not be ideal. In a dev env with the 'in_process' work queue,
47
+ # there's typically only going to be a single worker process
48
+ # anyway, so it works just as well.
49
+ def register_lifecycle
50
+ Delayed::Worker.lifecycle.after(:work_queue_pop) do |_q, _c|
51
+ @timer.tick do
52
+ run
53
+ end
54
+ end
55
+ end
56
+
57
+ def run
58
+ InstStatsd::Statsd.batch do
59
+ @procs.each(&:call)
60
+ end
61
+ end
62
+ end
63
+
64
+ class Timer
65
+ def initialize(min_interval)
66
+ @min_interval = min_interval * 1.0
67
+ @start_time = Delayed::Job.db_time_now
68
+ update_next_run
69
+ end
70
+
71
+ # This is called as often as possible, based on the lifecycle callbacks.
72
+ # When the required interval of time has passed, execute the given block
73
+ def tick
74
+ return unless Delayed::Job.db_time_now >= @next_run
75
+ update_next_run
76
+ yield
77
+ end
78
+
79
+ private
80
+
81
+ # Target the next run time to based on the original start time,
82
+ # instead of just adding the run interval, to prevent drift
83
+ # from the target interval as much as possible
84
+ def update_next_run
85
+ ticks = ((Delayed::Job.db_time_now - @start_time) / @min_interval).floor
86
+ @next_run = @start_time + (ticks + 1) * @min_interval
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,17 @@
1
+ module InstJobsStatsd
2
+ module Stats
3
+ module Timing
4
+ module Failed
5
+ def self.enable
6
+ enable_failure_timing
7
+ end
8
+
9
+ def self.enable_failure_timing
10
+ Delayed::Worker.lifecycle.before(:error) do |_worker, job, _exception|
11
+ Timing.report_job_timing_failed(job)
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,29 @@
1
+ module InstJobsStatsd
2
+ module Stats
3
+ module Timing
4
+ module Perform
5
+ def self.enable
6
+ enable_batching
7
+ enable_perform_timing
8
+ end
9
+
10
+ def self.enable_batching
11
+ Delayed::Worker.lifecycle.around(:perform) do |worker, job, &block|
12
+ InstStatsd::Statsd.batch do
13
+ block.call(worker, job)
14
+ end
15
+ end
16
+ end
17
+
18
+ def self.enable_perform_timing
19
+ Delayed::Worker.lifecycle.around(:perform) do |worker, job, &block|
20
+ Timing.report_job_timing_queued(job)
21
+ Timing.report_timing(:perform, job: job) do
22
+ block.call(worker, job)
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,28 @@
1
+ module InstJobsStatsd
2
+ module Stats
3
+ module Timing
4
+ module Pop
5
+ def self.enable
6
+ enable_pop_timing
7
+ enable_workqueue_pop_timing
8
+ end
9
+
10
+ def self.enable_pop_timing
11
+ Delayed::Worker.lifecycle.around(:pop) do |worker, &block|
12
+ Timing.report_timing(:pop) do
13
+ block.call(worker)
14
+ end
15
+ end
16
+ end
17
+
18
+ def self.enable_workqueue_pop_timing
19
+ Delayed::Worker.lifecycle.around(:work_queue_pop) do |worker, config, &block|
20
+ Timing.report_timing(:workqueuepop) do
21
+ block.call(worker, config)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,27 @@
1
+ module InstJobsStatsd
2
+ module Stats
3
+ module Timing
4
+ def self.report_timing(stat, job: nil, timing: nil, sample_rate: 1)
5
+ stats = Naming.qualified_names(stat, job)
6
+
7
+ if block_given?
8
+ InstStatsd::Statsd.time(stats, sample_rate) { yield }
9
+ else
10
+ InstStatsd::Statsd.timing(stats, timing, sample_rate)
11
+ end
12
+ end
13
+
14
+ def self.report_job_timing_queued(job)
15
+ return unless job
16
+ time_in_queue = ((Delayed::Job.db_time_now - job.run_at) * 1000).round
17
+ report_timing(:queue, job: job, timing: time_in_queue)
18
+ end
19
+
20
+ def self.report_job_timing_failed(job)
21
+ return unless job
22
+ time_to_failure = ((Delayed::Job.db_time_now - job.run_at) * 1000).round
23
+ report_timing(:failed_after, job: job, timing: time_to_failure)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,3 @@
1
+ module InstJobsStatsd
2
+ VERSION = '1.0.0'.freeze
3
+ end
@@ -0,0 +1,9 @@
1
+ FactoryGirl.define do
2
+ class JobFixture
3
+ attr_accessor :tag, :run_at
4
+ end
5
+
6
+ factory :job_fixture, aliases: [:job] do
7
+ tag 'Test::Job.perform'
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ FactoryGirl.define do
2
+ class WorkerFixture
3
+ attr_accessor :name
4
+ end
5
+
6
+ factory :worker_fixture, aliases: [:worker] do
7
+ sequence(:name) { |n| "worker-#{n}" }
8
+ end
9
+ end
@@ -0,0 +1,7 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec :path=>"../../"
4
+
5
+ gem "rails", "~> 4.2.5"
6
+ gem "after_transaction_commit", "<2"
7
+ gem 'test_after_commit', '0.4.1'