rocketjob 3.5.2 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +63 -1
  3. data/bin/rocketjob +1 -0
  4. data/bin/rocketjob_batch_perf +11 -0
  5. data/lib/rocket_job/batch.rb +32 -0
  6. data/lib/rocket_job/batch/callbacks.rb +40 -0
  7. data/lib/rocket_job/batch/io.rb +154 -0
  8. data/lib/rocket_job/batch/logger.rb +57 -0
  9. data/lib/rocket_job/batch/lower_priority.rb +54 -0
  10. data/lib/rocket_job/batch/model.rb +157 -0
  11. data/lib/rocket_job/batch/performance.rb +99 -0
  12. data/lib/rocket_job/batch/result.rb +8 -0
  13. data/lib/rocket_job/batch/results.rb +9 -0
  14. data/lib/rocket_job/batch/state_machine.rb +102 -0
  15. data/lib/rocket_job/batch/statistics.rb +88 -0
  16. data/lib/rocket_job/batch/tabular.rb +56 -0
  17. data/lib/rocket_job/batch/tabular/input.rb +123 -0
  18. data/lib/rocket_job/batch/tabular/output.rb +59 -0
  19. data/lib/rocket_job/batch/throttle.rb +91 -0
  20. data/lib/rocket_job/batch/throttle_running_slices.rb +53 -0
  21. data/lib/rocket_job/batch/worker.rb +288 -0
  22. data/lib/rocket_job/cli.rb +29 -7
  23. data/lib/rocket_job/config.rb +1 -1
  24. data/lib/rocket_job/extensions/mongoid/clients/options.rb +37 -0
  25. data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +17 -0
  26. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -4
  27. data/lib/rocket_job/extensions/mongoid_5/clients/options.rb +38 -0
  28. data/lib/rocket_job/extensions/mongoid_5/contextual/mongo.rb +64 -0
  29. data/lib/rocket_job/extensions/mongoid_5/factory.rb +13 -0
  30. data/lib/rocket_job/jobs/on_demand_batch_job.rb +127 -0
  31. data/lib/rocket_job/jobs/performance_job.rb +18 -0
  32. data/lib/rocket_job/jobs/upload_file_job.rb +2 -5
  33. data/lib/rocket_job/plugins/document.rb +2 -8
  34. data/lib/rocket_job/plugins/job/persistence.rb +6 -4
  35. data/lib/rocket_job/plugins/job/throttle.rb +3 -6
  36. data/lib/rocket_job/plugins/job/worker.rb +2 -2
  37. data/lib/rocket_job/server.rb +14 -3
  38. data/lib/rocket_job/sliced/input.rb +336 -0
  39. data/lib/rocket_job/sliced/output.rb +99 -0
  40. data/lib/rocket_job/sliced/slice.rb +166 -0
  41. data/lib/rocket_job/sliced/slices.rb +166 -0
  42. data/lib/rocket_job/sliced/writer/input.rb +60 -0
  43. data/lib/rocket_job/sliced/writer/output.rb +82 -0
  44. data/lib/rocket_job/version.rb +1 -1
  45. data/lib/rocket_job/worker.rb +2 -2
  46. data/lib/rocketjob.rb +28 -0
  47. metadata +51 -62
  48. data/test/config/database.yml +0 -5
  49. data/test/config/mongoid.yml +0 -88
  50. data/test/config_test.rb +0 -10
  51. data/test/dirmon_entry_test.rb +0 -313
  52. data/test/dirmon_job_test.rb +0 -216
  53. data/test/files/text.txt +0 -3
  54. data/test/job_test.rb +0 -71
  55. data/test/jobs/housekeeping_job_test.rb +0 -102
  56. data/test/jobs/on_demand_job_test.rb +0 -59
  57. data/test/jobs/upload_file_job_test.rb +0 -107
  58. data/test/plugins/cron_test.rb +0 -166
  59. data/test/plugins/job/callbacks_test.rb +0 -166
  60. data/test/plugins/job/defaults_test.rb +0 -53
  61. data/test/plugins/job/logger_test.rb +0 -56
  62. data/test/plugins/job/model_test.rb +0 -94
  63. data/test/plugins/job/persistence_test.rb +0 -94
  64. data/test/plugins/job/state_machine_test.rb +0 -116
  65. data/test/plugins/job/throttle_test.rb +0 -111
  66. data/test/plugins/job/worker_test.rb +0 -199
  67. data/test/plugins/processing_window_test.rb +0 -109
  68. data/test/plugins/restart_test.rb +0 -193
  69. data/test/plugins/retry_test.rb +0 -88
  70. data/test/plugins/singleton_test.rb +0 -92
  71. data/test/plugins/state_machine_event_callbacks_test.rb +0 -102
  72. data/test/plugins/state_machine_test.rb +0 -67
  73. data/test/plugins/transaction_test.rb +0 -84
  74. data/test/test_db.sqlite3 +0 -0
  75. data/test/test_helper.rb +0 -17
@@ -0,0 +1,99 @@
1
+ require 'tempfile'
2
+
3
+ module RocketJob
4
+ module Sliced
5
+ class Output < Slices
6
+ # Write this output collection to the specified file/io stream
7
+ #
8
+ # Returns [Integer] the number of records returned from the collection
9
+ #
10
+ # Parameters
11
+ # file_name_or_io [String|IO]
12
+ # The file_name of the file to write to, or an IO Stream that implements
13
+ # #write.
14
+ #
15
+ # options:
16
+ # streams [Symbol|Array]
17
+ # The formats/streams that be used to convert the data whilst it is
18
+ # being written.
19
+ # When nil, `file_name_or_io` will be inspected to try and determine what
20
+ # streams should be applied.
21
+ # Default: nil
22
+ #
23
+ # Any other option that can be supplied to IOStreams::Line::Writer
24
+ #
25
+ # Stream types / extensions supported:
26
+ # .zip Zip File [ :zip ]
27
+ # .gz, .gzip GZip File [ :gzip ]
28
+ # .enc File Encrypted using symmetric encryption [ :enc ]
29
+ #
30
+ # When a file is encrypted, it may also be compressed:
31
+ # .zip.enc [ :zip, :enc ]
32
+ # .gz.enc [ :gz, :enc ]
33
+ #
34
+ # Example: Zip
35
+ # # Since csv is not known to RocketJob it is ignored
36
+ # job.output.download('myfile.csv.zip')
37
+ #
38
+ # Example: Encrypted Zip
39
+ # job.output.download('myfile.csv.zip.enc')
40
+ #
41
+ # Example: Explicitly set the streams
42
+ # job.output.download('myfile.ze', streams: [:zip, :enc])
43
+ #
44
+ # Example: Supply custom options
45
+ # job.output.download('myfile.csv.enc', streams: [enc: { compress: true }])
46
+ #
47
+ # Example: Supply custom options
48
+ # job.output.download('myfile.csv.zip', streams: [ zip: { zip_file_name: 'myfile.csv' } ])
49
+ #
50
+ # Example: Extract streams from filename but write to a temp file
51
+ # t = Tempfile.new('my_project')
52
+ # job.output.download(t.to_path, file_name: 'myfile.gz.enc')
53
+ #
54
+ # Example: Add a header and/or trailer record to the downloaded file:
55
+ # IOStreams.writer('/tmp/file.txt.gz') do |writer|
56
+ # writer << "Header\n"
57
+ # job.download do |line|
58
+ # writer << line
59
+ # end
60
+ # writer << "Trailer\n"
61
+ # end
62
+ #
63
+ # Notes:
64
+ # - The records are returned in '_id' order. Usually this is the order in
65
+ # which the records were originally loaded.
66
+ def download(file_name_or_io = nil, header_line: nil, **args)
67
+ raise(ArgumentError, 'Either file_name_or_io, or a block must be supplied') unless file_name_or_io || block_given?
68
+
69
+ record_count = 0
70
+
71
+ if block_given?
72
+ # Write the header line
73
+ yield(header_line) if header_line
74
+
75
+ # Call the supplied block for every record returned
76
+ each do |slice|
77
+ slice.each do |record|
78
+ record_count += 1
79
+ yield(record)
80
+ end
81
+ end
82
+ else
83
+ IOStreams.line_writer(file_name_or_io, **args) do |io|
84
+ # Write the header line
85
+ io << header_line if header_line
86
+
87
+ each do |slice|
88
+ slice.each do |record|
89
+ record_count += 1
90
+ io << record
91
+ end
92
+ end
93
+ end
94
+ end
95
+ record_count
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,166 @@
1
+ require 'forwardable'
2
+ module RocketJob
3
+ module Sliced
4
+ # A slice is an Array of Records, along with meta-data that is used
5
+ # or set during processing of the individual records
6
+ #
7
+ # Note: Do _not_ create instances of this model directly, go via Slice#new
8
+ # so that the correct collection name is used.
9
+ #
10
+ # Example:
11
+ # slice = RocketJob::Sliced::Slice.new
12
+ # slice << 'first'
13
+ # slice << 'second'
14
+ # second = slice.at(1)
15
+ #
16
+ # # The [] operator is for retrieving attributes:
17
+ # slice['state']
18
+ #
19
+ class Slice
20
+ include RocketJob::Plugins::Document
21
+ include RocketJob::Plugins::StateMachine
22
+ extend Forwardable
23
+
24
+ store_in client: 'rocketjob_slices'
25
+
26
+ # The record number of the first record in this slice.
27
+ #
28
+ # Optional: If present the record_number is set while the job
29
+ # is being processed.
30
+ field :first_record_number, type: Integer
31
+
32
+ #
33
+ # Read-only attributes
34
+ #
35
+
36
+ # Current state, as set by AASM
37
+ field :state, type: Symbol, default: :queued
38
+
39
+ # When processing started on this slice
40
+ field :started_at, type: Time
41
+
42
+ # Number of times that this job has failed to process
43
+ field :failure_count, type: Integer
44
+
45
+ # This name of the worker that this job is being processed by, or was processed by
46
+ field :worker_name, type: String
47
+
48
+ # The last exception for this slice if any
49
+ embeds_one :exception, class_name: 'RocketJob::JobException'
50
+
51
+ after_find :parse_records
52
+
53
+ # State Machine events and transitions
54
+ #
55
+ # Each slice is processed separately:
56
+ # :queued -> :running -> :completed
57
+ # -> :failed -> :running ( manual )
58
+ #
59
+ # Slices are processed by ascending _id sort order
60
+ #
61
+ # Note:
62
+ # Currently all slices are destroyed on completion, so no slices
63
+ # are available in the completed state
64
+ aasm column: :state, whiny_persistence: true do
65
+ # Job has been created and is queued for processing ( Initial state )
66
+ state :queued, initial: true
67
+
68
+ # Job is running
69
+ state :running
70
+
71
+ # Job has completed processing ( End state )
72
+ state :completed
73
+
74
+ # Job failed to process and needs to be manually re-tried or aborted
75
+ state :failed
76
+
77
+ event :start, before: :set_started_at do
78
+ transitions from: :queued, to: :running
79
+ end
80
+
81
+ event :complete do
82
+ transitions from: :running, to: :completed
83
+ end
84
+
85
+ event :fail, before: :set_exception do
86
+ transitions from: :running, to: :failed
87
+ transitions from: :queued, to: :failed
88
+ end
89
+
90
+ event :retry do
91
+ transitions from: :failed, to: :queued
92
+ end
93
+ end
94
+
95
+ # `records` array has special handling so that it can be modified in place instead of having
96
+ # to replace the entire array every time. For example, when appending lines with `<<`.
97
+ def records
98
+ @records ||= []
99
+ end
100
+
101
+ # Replace the records within this slice
102
+ def records=(records)
103
+ raise(ArgumentError, "Cannot assign type: #{records.class.name} to records") unless records.is_a?(Array)
104
+
105
+ @records = records
106
+ end
107
+
108
+ def_instance_delegators :records, :each, :<<, :size, :concat, :at
109
+ def_instance_delegators :records, *(Enumerable.instance_methods - Module.methods)
110
+
111
+ # Fail this slice, along with the exception that caused the failure
112
+ def set_exception(exc = nil, record_number = nil)
113
+ if exc
114
+ self.exception = JobException.from_exception(exc)
115
+ exception.worker_name = worker_name
116
+ exception.record_number = record_number
117
+ end
118
+ self.failure_count = failure_count.to_i + 1
119
+ self.worker_name = nil
120
+ end
121
+
122
+ # Returns [Hash] the slice as a Hash for storage purposes
123
+ # Compresses / Encrypts the slice according to the job setting
124
+ if ::Mongoid::VERSION.to_i >= 6
125
+ def as_attributes
126
+ attrs = super
127
+ attrs['records'] = serialize_records if @records
128
+ attrs
129
+ end
130
+ else
131
+ def as_document
132
+ attrs = super
133
+ attrs['records'] = serialize_records if @records
134
+ attrs
135
+ end
136
+ end
137
+
138
+ def inspect
139
+ "#{super[0...-1]}, records: #{@records.inspect}, collection_name: #{collection_name.inspect}>"
140
+ end
141
+
142
+ private
143
+
144
+ # Always add records to any updates.
145
+ def atomic_updates(*args)
146
+ r = super(*args)
147
+ if @records
148
+ (r['$set'] ||= {})['records'] = serialize_records
149
+ end
150
+ r
151
+ end
152
+
153
+ def parse_records
154
+ @records = attributes.delete('records')
155
+ end
156
+
157
+ def serialize_records
158
+ records.mongoize
159
+ end
160
+
161
+ def set_started_at
162
+ self.started_at = Time.now
163
+ end
164
+ end
165
+ end
166
+ end
@@ -0,0 +1,166 @@
1
+ module RocketJob
2
+ module Sliced
3
+ class Slices
4
+ extend Forwardable
5
+ include Enumerable
6
+ include SemanticLogger::Loggable
7
+
8
+ attr_accessor :slice_class, :slice_size, :collection_name
9
+ attr_reader :all
10
+
11
+ # Parameters
12
+ # name: [String]
13
+ # Name of the collection to create
14
+ # slice_size: [Integer]
15
+ # Number of records to store in each slice
16
+ # Default: 100
17
+ # slice_class: [class]
18
+ # Slice class to use to hold records.
19
+ # Default: RocketJob::Sliced::Slice
20
+ def initialize(collection_name:, slice_class: Sliced::Slice, slice_size: 100)
21
+ @slice_class = slice_class
22
+ @slice_size = slice_size
23
+ @collection_name = collection_name
24
+ @all = slice_class.with_collection(collection_name)
25
+ end
26
+
27
+ def new(params = {})
28
+ slice_class.new(params.merge(collection_name: collection_name))
29
+ end
30
+
31
+ def create(params = {})
32
+ slice = new(params)
33
+ slice.save
34
+ slice
35
+ end
36
+
37
+ def create!(params = {})
38
+ slice = new(params)
39
+ slice.save!
40
+ slice
41
+ end
42
+
43
+ # Returns output slices in the order of their id
44
+ # which is usually the order in which they were written.
45
+ def each
46
+ all.sort(id: 1).each { |document| yield(document) }
47
+ end
48
+
49
+ # Insert a new slice into the collection
50
+ #
51
+ # Returns [Integer] the number of records uploaded
52
+ #
53
+ # Parameters
54
+ # slice [RocketJob::Sliced::Slice | Array]
55
+ # The slice to write to the slices collection
56
+ # If slice is an Array, it will be converted to a Slice before inserting
57
+ # into the slices collection
58
+ #
59
+ # input_slice [RocketJob::Sliced::Slice]
60
+ # The input slice to which this slice corresponds
61
+ # The id of the input slice is copied across
62
+ # If the insert results in a duplicate record it is ignored, to support
63
+ # restarting of jobs that failed in the middle of processing.
64
+ # A warning is logged that the slice has already been processed.
65
+ #
66
+ # Note:
67
+ # `slice_size` is not enforced.
68
+ # However many records are present in the slice will be written as a
69
+ # single slice to the slices collection
70
+ #
71
+ def insert(slice, input_slice = nil)
72
+ slice = new(records: slice) unless slice.is_a?(Slice)
73
+
74
+ # Retain input_slice id in the new output slice
75
+ if input_slice
76
+ slice.id = input_slice.id
77
+ slice.first_record_number = input_slice.first_record_number
78
+ end
79
+
80
+ begin
81
+ slice.save!
82
+ rescue Mongo::Error::OperationFailure => exc
83
+ # Ignore duplicates since it means the job was restarted
84
+ raise(exc) unless exc.message.include?('E11000')
85
+ logger.warn "Skipped already processed slice# #{slice.id}"
86
+ end
87
+ slice
88
+ end
89
+
90
+ alias << insert
91
+
92
+ # Index for find_and_modify only if it is not already present
93
+ def create_indexes
94
+ all.collection.indexes.create_one(state: 1, _id: 1) if all.collection.indexes.none? { |i| i['name'] == 'state_1__id_1' }
95
+ rescue Mongo::Error::OperationFailure
96
+ all.collection.indexes.create_one(state: 1, _id: 1)
97
+ end
98
+
99
+ def_instance_delegators :@all, :collection, :count, :delete_all, :first, :find, :last, :nor, :not, :or, :to_a, :where
100
+
101
+ # Drop this collection when it is no longer needed
102
+ def drop
103
+ all.collection.drop
104
+ end
105
+
106
+ # Forwardable generates invalid warnings on these methods.
107
+ def completed
108
+ all.completed
109
+ end
110
+
111
+ def failed
112
+ all.failed
113
+ end
114
+
115
+ def queued
116
+ all.queued
117
+ end
118
+
119
+ def running
120
+ all.running
121
+ end
122
+
123
+ # Mongoid does not apply ordering, add sort
124
+ def first
125
+ all.sort('_id' => 1).first
126
+ end
127
+
128
+ def last
129
+ all.sort('_id' => -1).first
130
+ end
131
+
132
+ # Returns [Array<Struct>] grouped exceptions by class name,
133
+ # and unique exception messages by exception class.
134
+ #
135
+ # Each struct consists of:
136
+ # class_name: [String]
137
+ # Exception class name.
138
+ #
139
+ # count: [Integer]
140
+ # Number of exceptions with this class.
141
+ #
142
+ # messages: [Array<String>]
143
+ # Unique list of error messages.
144
+ def group_exceptions
145
+ result_struct = Struct.new(:class_name, :count, :messages)
146
+ result = all.collection.aggregate(
147
+ [
148
+ {
149
+ '$match' => {state: 'failed'}
150
+ },
151
+ {
152
+ '$group' => {
153
+ _id: {error_class: '$exception.class_name'},
154
+ messages: {'$addToSet' => '$exception.message'},
155
+ count: {'$sum' => 1}
156
+ }
157
+ }
158
+ ]
159
+ )
160
+ result.collect do |errors|
161
+ result_struct.new(errors['_id']['error_class'], errors['count'], errors['messages'])
162
+ end
163
+ end
164
+ end
165
+ end
166
+ end
@@ -0,0 +1,60 @@
1
+ module RocketJob
2
+ module Sliced
3
+ module Writer
4
+ # Internal class for uploading records into input slices
5
+ class Input
6
+ attr_reader :record_count
7
+
8
+ # Batch collection of lines into slices.
9
+ #
10
+ # Parameters
11
+ # on_first: [Proc]
12
+ # Block to call on the first line only, instead of storing in the slice.
13
+ # Useful for extracting the header row
14
+ # Default: nil
15
+ def self.collect(input, **args, &block)
16
+ writer = new(input, **args)
17
+ block.call(writer)
18
+ writer.record_count
19
+ rescue Exception => exc
20
+ # Drop input collection when upload fails
21
+ input.drop
22
+ raise exc
23
+ ensure
24
+ writer&.close
25
+ end
26
+
27
+ def initialize(input, on_first: nil)
28
+ @on_first = on_first
29
+ @batch_count = 0
30
+ @record_count = 0
31
+ @input = input
32
+ @record_number = 1
33
+ @slice = @input.new(first_record_number: @record_number)
34
+ end
35
+
36
+ def <<(line)
37
+ @record_number += 1
38
+ if @on_first
39
+ @on_first.call(line)
40
+ @on_first = nil
41
+ return self
42
+ end
43
+ @slice << line
44
+ @batch_count += 1
45
+ @record_count += 1
46
+ if @batch_count >= @input.slice_size
47
+ @input.insert(@slice)
48
+ @batch_count = 0
49
+ @slice = @input.new(first_record_number: @record_number)
50
+ end
51
+ self
52
+ end
53
+
54
+ def close
55
+ @input.insert(@slice) if @slice.size.positive?
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end