rocketjob 3.5.2 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +63 -1
  3. data/bin/rocketjob +1 -0
  4. data/bin/rocketjob_batch_perf +11 -0
  5. data/lib/rocket_job/batch.rb +32 -0
  6. data/lib/rocket_job/batch/callbacks.rb +40 -0
  7. data/lib/rocket_job/batch/io.rb +154 -0
  8. data/lib/rocket_job/batch/logger.rb +57 -0
  9. data/lib/rocket_job/batch/lower_priority.rb +54 -0
  10. data/lib/rocket_job/batch/model.rb +157 -0
  11. data/lib/rocket_job/batch/performance.rb +99 -0
  12. data/lib/rocket_job/batch/result.rb +8 -0
  13. data/lib/rocket_job/batch/results.rb +9 -0
  14. data/lib/rocket_job/batch/state_machine.rb +102 -0
  15. data/lib/rocket_job/batch/statistics.rb +88 -0
  16. data/lib/rocket_job/batch/tabular.rb +56 -0
  17. data/lib/rocket_job/batch/tabular/input.rb +123 -0
  18. data/lib/rocket_job/batch/tabular/output.rb +59 -0
  19. data/lib/rocket_job/batch/throttle.rb +91 -0
  20. data/lib/rocket_job/batch/throttle_running_slices.rb +53 -0
  21. data/lib/rocket_job/batch/worker.rb +288 -0
  22. data/lib/rocket_job/cli.rb +29 -7
  23. data/lib/rocket_job/config.rb +1 -1
  24. data/lib/rocket_job/extensions/mongoid/clients/options.rb +37 -0
  25. data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +17 -0
  26. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -4
  27. data/lib/rocket_job/extensions/mongoid_5/clients/options.rb +38 -0
  28. data/lib/rocket_job/extensions/mongoid_5/contextual/mongo.rb +64 -0
  29. data/lib/rocket_job/extensions/mongoid_5/factory.rb +13 -0
  30. data/lib/rocket_job/jobs/on_demand_batch_job.rb +127 -0
  31. data/lib/rocket_job/jobs/performance_job.rb +18 -0
  32. data/lib/rocket_job/jobs/upload_file_job.rb +2 -5
  33. data/lib/rocket_job/plugins/document.rb +2 -8
  34. data/lib/rocket_job/plugins/job/persistence.rb +6 -4
  35. data/lib/rocket_job/plugins/job/throttle.rb +3 -6
  36. data/lib/rocket_job/plugins/job/worker.rb +2 -2
  37. data/lib/rocket_job/server.rb +14 -3
  38. data/lib/rocket_job/sliced/input.rb +336 -0
  39. data/lib/rocket_job/sliced/output.rb +99 -0
  40. data/lib/rocket_job/sliced/slice.rb +166 -0
  41. data/lib/rocket_job/sliced/slices.rb +166 -0
  42. data/lib/rocket_job/sliced/writer/input.rb +60 -0
  43. data/lib/rocket_job/sliced/writer/output.rb +82 -0
  44. data/lib/rocket_job/version.rb +1 -1
  45. data/lib/rocket_job/worker.rb +2 -2
  46. data/lib/rocketjob.rb +28 -0
  47. metadata +51 -62
  48. data/test/config/database.yml +0 -5
  49. data/test/config/mongoid.yml +0 -88
  50. data/test/config_test.rb +0 -10
  51. data/test/dirmon_entry_test.rb +0 -313
  52. data/test/dirmon_job_test.rb +0 -216
  53. data/test/files/text.txt +0 -3
  54. data/test/job_test.rb +0 -71
  55. data/test/jobs/housekeeping_job_test.rb +0 -102
  56. data/test/jobs/on_demand_job_test.rb +0 -59
  57. data/test/jobs/upload_file_job_test.rb +0 -107
  58. data/test/plugins/cron_test.rb +0 -166
  59. data/test/plugins/job/callbacks_test.rb +0 -166
  60. data/test/plugins/job/defaults_test.rb +0 -53
  61. data/test/plugins/job/logger_test.rb +0 -56
  62. data/test/plugins/job/model_test.rb +0 -94
  63. data/test/plugins/job/persistence_test.rb +0 -94
  64. data/test/plugins/job/state_machine_test.rb +0 -116
  65. data/test/plugins/job/throttle_test.rb +0 -111
  66. data/test/plugins/job/worker_test.rb +0 -199
  67. data/test/plugins/processing_window_test.rb +0 -109
  68. data/test/plugins/restart_test.rb +0 -193
  69. data/test/plugins/retry_test.rb +0 -88
  70. data/test/plugins/singleton_test.rb +0 -92
  71. data/test/plugins/state_machine_event_callbacks_test.rb +0 -102
  72. data/test/plugins/state_machine_test.rb +0 -67
  73. data/test/plugins/transaction_test.rb +0 -84
  74. data/test/test_db.sqlite3 +0 -0
  75. data/test/test_helper.rb +0 -17
@@ -0,0 +1,99 @@
1
+ require 'tempfile'
2
+
3
+ module RocketJob
4
+ module Sliced
5
+ class Output < Slices
6
+ # Write this output collection to the specified file/io stream
7
+ #
8
+ # Returns [Integer] the number of records returned from the collection
9
+ #
10
+ # Parameters
11
+ # file_name_or_io [String|IO]
12
+ # The file_name of the file to write to, or an IO Stream that implements
13
+ # #write.
14
+ #
15
+ # options:
16
+ # streams [Symbol|Array]
17
+ # The formats/streams that be used to convert the data whilst it is
18
+ # being written.
19
+ # When nil, `file_name_or_io` will be inspected to try and determine what
20
+ # streams should be applied.
21
+ # Default: nil
22
+ #
23
+ # Any other option that can be supplied to IOStreams::Line::Writer
24
+ #
25
+ # Stream types / extensions supported:
26
+ # .zip Zip File [ :zip ]
27
+ # .gz, .gzip GZip File [ :gzip ]
28
+ # .enc File Encrypted using symmetric encryption [ :enc ]
29
+ #
30
+ # When a file is encrypted, it may also be compressed:
31
+ # .zip.enc [ :zip, :enc ]
32
+ # .gz.enc [ :gz, :enc ]
33
+ #
34
+ # Example: Zip
35
+ # # Since csv is not known to RocketJob it is ignored
36
+ # job.output.download('myfile.csv.zip')
37
+ #
38
+ # Example: Encrypted Zip
39
+ # job.output.download('myfile.csv.zip.enc')
40
+ #
41
+ # Example: Explicitly set the streams
42
+ # job.output.download('myfile.ze', streams: [:zip, :enc])
43
+ #
44
+ # Example: Supply custom options
45
+ # job.output.download('myfile.csv.enc', streams: [enc: { compress: true }])
46
+ #
47
+ # Example: Supply custom options
48
+ # job.output.download('myfile.csv.zip', streams: [ zip: { zip_file_name: 'myfile.csv' } ])
49
+ #
50
+ # Example: Extract streams from filename but write to a temp file
51
+ # t = Tempfile.new('my_project')
52
+ # job.output.download(t.to_path, file_name: 'myfile.gz.enc')
53
+ #
54
+ # Example: Add a header and/or trailer record to the downloaded file:
55
+ # IOStreams.writer('/tmp/file.txt.gz') do |writer|
56
+ # writer << "Header\n"
57
+ # job.download do |line|
58
+ # writer << line
59
+ # end
60
+ # writer << "Trailer\n"
61
+ # end
62
+ #
63
+ # Notes:
64
+ # - The records are returned in '_id' order. Usually this is the order in
65
+ # which the records were originally loaded.
66
+ def download(file_name_or_io = nil, header_line: nil, **args)
67
+ raise(ArgumentError, 'Either file_name_or_io, or a block must be supplied') unless file_name_or_io || block_given?
68
+
69
+ record_count = 0
70
+
71
+ if block_given?
72
+ # Write the header line
73
+ yield(header_line) if header_line
74
+
75
+ # Call the supplied block for every record returned
76
+ each do |slice|
77
+ slice.each do |record|
78
+ record_count += 1
79
+ yield(record)
80
+ end
81
+ end
82
+ else
83
+ IOStreams.line_writer(file_name_or_io, **args) do |io|
84
+ # Write the header line
85
+ io << header_line if header_line
86
+
87
+ each do |slice|
88
+ slice.each do |record|
89
+ record_count += 1
90
+ io << record
91
+ end
92
+ end
93
+ end
94
+ end
95
+ record_count
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,166 @@
1
+ require 'forwardable'
2
+ module RocketJob
3
+ module Sliced
4
+ # A slice is an Array of Records, along with meta-data that is used
5
+ # or set during processing of the individual records
6
+ #
7
+ # Note: Do _not_ create instances of this model directly, go via Slice#new
8
+ # so that the correct collection name is used.
9
+ #
10
+ # Example:
11
+ # slice = RocketJob::Sliced::Slice.new
12
+ # slice << 'first'
13
+ # slice << 'second'
14
+ # second = slice.at(1)
15
+ #
16
+ # # The [] operator is for retrieving attributes:
17
+ # slice['state']
18
+ #
19
+ class Slice
20
+ include RocketJob::Plugins::Document
21
+ include RocketJob::Plugins::StateMachine
22
+ extend Forwardable
23
+
24
+ store_in client: 'rocketjob_slices'
25
+
26
+ # The record number of the first record in this slice.
27
+ #
28
+ # Optional: If present the record_number is set while the job
29
+ # is being processed.
30
+ field :first_record_number, type: Integer
31
+
32
+ #
33
+ # Read-only attributes
34
+ #
35
+
36
+ # Current state, as set by AASM
37
+ field :state, type: Symbol, default: :queued
38
+
39
+ # When processing started on this slice
40
+ field :started_at, type: Time
41
+
42
+ # Number of times that this job has failed to process
43
+ field :failure_count, type: Integer
44
+
45
+ # This name of the worker that this job is being processed by, or was processed by
46
+ field :worker_name, type: String
47
+
48
+ # The last exception for this slice if any
49
+ embeds_one :exception, class_name: 'RocketJob::JobException'
50
+
51
+ after_find :parse_records
52
+
53
+ # State Machine events and transitions
54
+ #
55
+ # Each slice is processed separately:
56
+ # :queued -> :running -> :completed
57
+ # -> :failed -> :running ( manual )
58
+ #
59
+ # Slices are processed by ascending _id sort order
60
+ #
61
+ # Note:
62
+ # Currently all slices are destroyed on completion, so no slices
63
+ # are available in the completed state
64
+ aasm column: :state, whiny_persistence: true do
65
+ # Job has been created and is queued for processing ( Initial state )
66
+ state :queued, initial: true
67
+
68
+ # Job is running
69
+ state :running
70
+
71
+ # Job has completed processing ( End state )
72
+ state :completed
73
+
74
+ # Job failed to process and needs to be manually re-tried or aborted
75
+ state :failed
76
+
77
+ event :start, before: :set_started_at do
78
+ transitions from: :queued, to: :running
79
+ end
80
+
81
+ event :complete do
82
+ transitions from: :running, to: :completed
83
+ end
84
+
85
+ event :fail, before: :set_exception do
86
+ transitions from: :running, to: :failed
87
+ transitions from: :queued, to: :failed
88
+ end
89
+
90
+ event :retry do
91
+ transitions from: :failed, to: :queued
92
+ end
93
+ end
94
+
95
+ # `records` array has special handling so that it can be modified in place instead of having
96
+ # to replace the entire array every time. For example, when appending lines with `<<`.
97
+ def records
98
+ @records ||= []
99
+ end
100
+
101
+ # Replace the records within this slice
102
+ def records=(records)
103
+ raise(ArgumentError, "Cannot assign type: #{records.class.name} to records") unless records.is_a?(Array)
104
+
105
+ @records = records
106
+ end
107
+
108
+ def_instance_delegators :records, :each, :<<, :size, :concat, :at
109
+ def_instance_delegators :records, *(Enumerable.instance_methods - Module.methods)
110
+
111
+ # Fail this slice, along with the exception that caused the failure
112
+ def set_exception(exc = nil, record_number = nil)
113
+ if exc
114
+ self.exception = JobException.from_exception(exc)
115
+ exception.worker_name = worker_name
116
+ exception.record_number = record_number
117
+ end
118
+ self.failure_count = failure_count.to_i + 1
119
+ self.worker_name = nil
120
+ end
121
+
122
+ # Returns [Hash] the slice as a Hash for storage purposes
123
+ # Compresses / Encrypts the slice according to the job setting
124
+ if ::Mongoid::VERSION.to_i >= 6
125
+ def as_attributes
126
+ attrs = super
127
+ attrs['records'] = serialize_records if @records
128
+ attrs
129
+ end
130
+ else
131
+ def as_document
132
+ attrs = super
133
+ attrs['records'] = serialize_records if @records
134
+ attrs
135
+ end
136
+ end
137
+
138
+ def inspect
139
+ "#{super[0...-1]}, records: #{@records.inspect}, collection_name: #{collection_name.inspect}>"
140
+ end
141
+
142
+ private
143
+
144
+ # Always add records to any updates.
145
+ def atomic_updates(*args)
146
+ r = super(*args)
147
+ if @records
148
+ (r['$set'] ||= {})['records'] = serialize_records
149
+ end
150
+ r
151
+ end
152
+
153
+ def parse_records
154
+ @records = attributes.delete('records')
155
+ end
156
+
157
+ def serialize_records
158
+ records.mongoize
159
+ end
160
+
161
+ def set_started_at
162
+ self.started_at = Time.now
163
+ end
164
+ end
165
+ end
166
+ end
@@ -0,0 +1,166 @@
1
+ module RocketJob
2
+ module Sliced
3
+ class Slices
4
+ extend Forwardable
5
+ include Enumerable
6
+ include SemanticLogger::Loggable
7
+
8
+ attr_accessor :slice_class, :slice_size, :collection_name
9
+ attr_reader :all
10
+
11
+ # Parameters
12
+ # name: [String]
13
+ # Name of the collection to create
14
+ # slice_size: [Integer]
15
+ # Number of records to store in each slice
16
+ # Default: 100
17
+ # slice_class: [class]
18
+ # Slice class to use to hold records.
19
+ # Default: RocketJob::Sliced::Slice
20
+ def initialize(collection_name:, slice_class: Sliced::Slice, slice_size: 100)
21
+ @slice_class = slice_class
22
+ @slice_size = slice_size
23
+ @collection_name = collection_name
24
+ @all = slice_class.with_collection(collection_name)
25
+ end
26
+
27
+ def new(params = {})
28
+ slice_class.new(params.merge(collection_name: collection_name))
29
+ end
30
+
31
+ def create(params = {})
32
+ slice = new(params)
33
+ slice.save
34
+ slice
35
+ end
36
+
37
+ def create!(params = {})
38
+ slice = new(params)
39
+ slice.save!
40
+ slice
41
+ end
42
+
43
+ # Returns output slices in the order of their id
44
+ # which is usually the order in which they were written.
45
+ def each
46
+ all.sort(id: 1).each { |document| yield(document) }
47
+ end
48
+
49
+ # Insert a new slice into the collection
50
+ #
51
+ # Returns [Integer] the number of records uploaded
52
+ #
53
+ # Parameters
54
+ # slice [RocketJob::Sliced::Slice | Array]
55
+ # The slice to write to the slices collection
56
+ # If slice is an Array, it will be converted to a Slice before inserting
57
+ # into the slices collection
58
+ #
59
+ # input_slice [RocketJob::Sliced::Slice]
60
+ # The input slice to which this slice corresponds
61
+ # The id of the input slice is copied across
62
+ # If the insert results in a duplicate record it is ignored, to support
63
+ # restarting of jobs that failed in the middle of processing.
64
+ # A warning is logged that the slice has already been processed.
65
+ #
66
+ # Note:
67
+ # `slice_size` is not enforced.
68
+ # However many records are present in the slice will be written as a
69
+ # single slice to the slices collection
70
+ #
71
+ def insert(slice, input_slice = nil)
72
+ slice = new(records: slice) unless slice.is_a?(Slice)
73
+
74
+ # Retain input_slice id in the new output slice
75
+ if input_slice
76
+ slice.id = input_slice.id
77
+ slice.first_record_number = input_slice.first_record_number
78
+ end
79
+
80
+ begin
81
+ slice.save!
82
+ rescue Mongo::Error::OperationFailure => exc
83
+ # Ignore duplicates since it means the job was restarted
84
+ raise(exc) unless exc.message.include?('E11000')
85
+ logger.warn "Skipped already processed slice# #{slice.id}"
86
+ end
87
+ slice
88
+ end
89
+
90
+ alias << insert
91
+
92
+ # Index for find_and_modify only if it is not already present
93
+ def create_indexes
94
+ all.collection.indexes.create_one(state: 1, _id: 1) if all.collection.indexes.none? { |i| i['name'] == 'state_1__id_1' }
95
+ rescue Mongo::Error::OperationFailure
96
+ all.collection.indexes.create_one(state: 1, _id: 1)
97
+ end
98
+
99
+ def_instance_delegators :@all, :collection, :count, :delete_all, :first, :find, :last, :nor, :not, :or, :to_a, :where
100
+
101
+ # Drop this collection when it is no longer needed
102
+ def drop
103
+ all.collection.drop
104
+ end
105
+
106
+ # Forwardable generates invalid warnings on these methods.
107
+ def completed
108
+ all.completed
109
+ end
110
+
111
+ def failed
112
+ all.failed
113
+ end
114
+
115
+ def queued
116
+ all.queued
117
+ end
118
+
119
+ def running
120
+ all.running
121
+ end
122
+
123
+ # Mongoid does not apply ordering, add sort
124
+ def first
125
+ all.sort('_id' => 1).first
126
+ end
127
+
128
+ def last
129
+ all.sort('_id' => -1).first
130
+ end
131
+
132
+ # Returns [Array<Struct>] grouped exceptions by class name,
133
+ # and unique exception messages by exception class.
134
+ #
135
+ # Each struct consists of:
136
+ # class_name: [String]
137
+ # Exception class name.
138
+ #
139
+ # count: [Integer]
140
+ # Number of exceptions with this class.
141
+ #
142
+ # messages: [Array<String>]
143
+ # Unique list of error messages.
144
+ def group_exceptions
145
+ result_struct = Struct.new(:class_name, :count, :messages)
146
+ result = all.collection.aggregate(
147
+ [
148
+ {
149
+ '$match' => {state: 'failed'}
150
+ },
151
+ {
152
+ '$group' => {
153
+ _id: {error_class: '$exception.class_name'},
154
+ messages: {'$addToSet' => '$exception.message'},
155
+ count: {'$sum' => 1}
156
+ }
157
+ }
158
+ ]
159
+ )
160
+ result.collect do |errors|
161
+ result_struct.new(errors['_id']['error_class'], errors['count'], errors['messages'])
162
+ end
163
+ end
164
+ end
165
+ end
166
+ end
@@ -0,0 +1,60 @@
1
+ module RocketJob
2
+ module Sliced
3
+ module Writer
4
+ # Internal class for uploading records into input slices
5
+ class Input
6
+ attr_reader :record_count
7
+
8
+ # Batch collection of lines into slices.
9
+ #
10
+ # Parameters
11
+ # on_first: [Proc]
12
+ # Block to call on the first line only, instead of storing in the slice.
13
+ # Useful for extracting the header row
14
+ # Default: nil
15
+ def self.collect(input, **args, &block)
16
+ writer = new(input, **args)
17
+ block.call(writer)
18
+ writer.record_count
19
+ rescue Exception => exc
20
+ # Drop input collection when upload fails
21
+ input.drop
22
+ raise exc
23
+ ensure
24
+ writer&.close
25
+ end
26
+
27
+ def initialize(input, on_first: nil)
28
+ @on_first = on_first
29
+ @batch_count = 0
30
+ @record_count = 0
31
+ @input = input
32
+ @record_number = 1
33
+ @slice = @input.new(first_record_number: @record_number)
34
+ end
35
+
36
+ def <<(line)
37
+ @record_number += 1
38
+ if @on_first
39
+ @on_first.call(line)
40
+ @on_first = nil
41
+ return self
42
+ end
43
+ @slice << line
44
+ @batch_count += 1
45
+ @record_count += 1
46
+ if @batch_count >= @input.slice_size
47
+ @input.insert(@slice)
48
+ @batch_count = 0
49
+ @slice = @input.new(first_record_number: @record_number)
50
+ end
51
+ self
52
+ end
53
+
54
+ def close
55
+ @input.insert(@slice) if @slice.size.positive?
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end