rocketjob 6.0.0.rc1 → 6.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +164 -8
  3. data/lib/rocket_job/batch/categories.rb +25 -18
  4. data/lib/rocket_job/batch/io.rb +130 -130
  5. data/lib/rocket_job/batch/performance.rb +2 -2
  6. data/lib/rocket_job/batch/statistics.rb +2 -2
  7. data/lib/rocket_job/batch/throttle_running_workers.rb +1 -1
  8. data/lib/rocket_job/batch/worker.rb +14 -12
  9. data/lib/rocket_job/batch.rb +0 -1
  10. data/lib/rocket_job/category/base.rb +10 -7
  11. data/lib/rocket_job/category/input.rb +61 -1
  12. data/lib/rocket_job/category/output.rb +9 -0
  13. data/lib/rocket_job/cli.rb +1 -1
  14. data/lib/rocket_job/dirmon_entry.rb +1 -1
  15. data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +2 -2
  16. data/lib/rocket_job/extensions/rocket_job_adapter.rb +2 -2
  17. data/lib/rocket_job/job_exception.rb +1 -1
  18. data/lib/rocket_job/jobs/conversion_job.rb +43 -0
  19. data/lib/rocket_job/jobs/dirmon_job.rb +24 -35
  20. data/lib/rocket_job/jobs/housekeeping_job.rb +4 -5
  21. data/lib/rocket_job/jobs/on_demand_batch_job.rb +15 -11
  22. data/lib/rocket_job/jobs/on_demand_job.rb +2 -2
  23. data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +103 -97
  24. data/lib/rocket_job/jobs/upload_file_job.rb +6 -3
  25. data/lib/rocket_job/lookup_collection.rb +4 -3
  26. data/lib/rocket_job/plugins/cron.rb +60 -20
  27. data/lib/rocket_job/plugins/job/persistence.rb +36 -0
  28. data/lib/rocket_job/plugins/job/throttle.rb +2 -2
  29. data/lib/rocket_job/plugins/restart.rb +3 -110
  30. data/lib/rocket_job/plugins/state_machine.rb +2 -2
  31. data/lib/rocket_job/plugins/throttle_dependent_jobs.rb +43 -0
  32. data/lib/rocket_job/sliced/bzip2_output_slice.rb +18 -19
  33. data/lib/rocket_job/sliced/compressed_slice.rb +3 -6
  34. data/lib/rocket_job/sliced/encrypted_bzip2_output_slice.rb +49 -0
  35. data/lib/rocket_job/sliced/encrypted_slice.rb +4 -6
  36. data/lib/rocket_job/sliced/input.rb +42 -54
  37. data/lib/rocket_job/sliced/slice.rb +7 -3
  38. data/lib/rocket_job/sliced/slices.rb +12 -9
  39. data/lib/rocket_job/sliced/writer/input.rb +46 -18
  40. data/lib/rocket_job/sliced/writer/output.rb +0 -1
  41. data/lib/rocket_job/sliced.rb +1 -19
  42. data/lib/rocket_job/throttle_definitions.rb +7 -1
  43. data/lib/rocket_job/version.rb +1 -1
  44. data/lib/rocketjob.rb +4 -5
  45. metadata +12 -12
  46. data/lib/rocket_job/batch/tabular/input.rb +0 -133
  47. data/lib/rocket_job/batch/tabular/output.rb +0 -67
  48. data/lib/rocket_job/batch/tabular.rb +0 -58
@@ -1,133 +0,0 @@
1
- require "active_support/concern"
2
-
3
- module RocketJob
4
- module Batch
5
- class Tabular
6
- # @deprecated
7
- module Input
8
- extend ActiveSupport::Concern
9
-
10
- included do
11
- warn "#{name} is using RocketJob::Batch::Tabular::Input which is deprecated"
12
-
13
- field :tabular_input_header, type: Array, class_attribute: true, user_editable: true
14
- field :tabular_input_format, type: Mongoid::StringifiedSymbol, default: :csv, class_attribute: true, user_editable: true
15
- field :tabular_input_options, type: Hash, class_attribute: true
16
-
17
- # tabular_input_mode: [:line | :array | :hash]
18
- # :line
19
- # Uploads the file a line (String) at a time for processing by workers.
20
- # :array
21
- # Parses each line from the file as an Array and uploads each array for processing by workers.
22
- # :hash
23
- # Parses each line from the file into a Hash and uploads each hash for processing by workers.
24
- # See IOStreams#each.
25
- field :tabular_input_mode, type: Mongoid::StringifiedSymbol, default: :line, class_attribute: true, user_editable: true, copy_on_restart: true
26
-
27
- validates_inclusion_of :tabular_input_format, in: IOStreams::Tabular.registered_formats
28
- validates_inclusion_of :tabular_input_mode, in: %i[line array hash row record]
29
- validate :tabular_input_header_present
30
-
31
- class_attribute :tabular_input_white_list
32
- class_attribute :tabular_input_required
33
- class_attribute :tabular_input_skip_unknown
34
-
35
- # Cleanse all uploaded data by removing non-printable characters
36
- # and any characters that cannot be converted to UTF-8
37
- class_attribute :tabular_input_type
38
-
39
- self.tabular_input_white_list = nil
40
- self.tabular_input_required = nil
41
- self.tabular_input_skip_unknown = true
42
- self.tabular_input_type = :text
43
-
44
- before_perform :tabular_input_render
45
- end
46
-
47
- # Extract the header line during the upload.
48
- #
49
- # Overrides: RocketJob::Batch::IO#upload
50
- #
51
- # Notes:
52
- # - When supplying a block the header must be set manually
53
- def upload(stream = nil, **args, &block)
54
- input_stream = stream.nil? ? nil : IOStreams.new(stream)
55
-
56
- if stream && (tabular_input_type == :text)
57
- # Cannot change the length of fixed width lines
58
- replace = tabular_input_format == :fixed ? " " : ""
59
- input_stream.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: replace)
60
- end
61
-
62
- # If an input header is not required, then we don't extract it'
63
- return super(input_stream, stream_mode: tabular_input_mode, **args, &block) unless tabular_input.header?
64
-
65
- # If the header is already set then it is not expected in the file
66
- if tabular_input_header.present?
67
- tabular_input_cleanse_header
68
- return super(input_stream, stream_mode: tabular_input_mode, **args, &block)
69
- end
70
-
71
- case tabular_input_mode
72
- when :line
73
- parse_header = lambda do |line|
74
- tabular_input.parse_header(line)
75
- tabular_input_cleanse_header
76
- self.tabular_input_header = tabular_input.header.columns
77
- end
78
- super(input_stream, on_first: parse_header, stream_mode: :line, **args, &block)
79
- when :array, :row
80
- set_header = lambda do |row|
81
- tabular_input.header.columns = row
82
- tabular_input_cleanse_header
83
- self.tabular_input_header = tabular_input.header.columns
84
- end
85
- super(input_stream, on_first: set_header, stream_mode: :array, **args, &block)
86
- when :hash, :record
87
- super(input_stream, stream_mode: :hash, **args, &block)
88
- else
89
- raise(ArgumentError, "Invalid tabular_input_mode: #{stream_mode.inspect}")
90
- end
91
- end
92
-
93
- private
94
-
95
- # Shared instance used for this slice, by a single worker (thread)
96
- def tabular_input
97
- @tabular_input ||= IOStreams::Tabular.new(
98
- columns: tabular_input_header,
99
- allowed_columns: tabular_input_white_list,
100
- required_columns: tabular_input_required,
101
- skip_unknown: tabular_input_skip_unknown,
102
- format: tabular_input_format,
103
- format_options: tabular_input_options&.deep_symbolize_keys
104
- )
105
- end
106
-
107
- def tabular_input_render
108
- return if tabular_input_header.blank? && tabular_input.header?
109
-
110
- @rocket_job_input = tabular_input.record_parse(@rocket_job_input)
111
- end
112
-
113
- # Cleanse custom input header if supplied.
114
- def tabular_input_cleanse_header
115
- ignored_columns = tabular_input.header.cleanse!
116
- logger.warn("Stripped out invalid columns from custom header", ignored_columns) unless ignored_columns.empty?
117
-
118
- self.tabular_input_header = tabular_input.header.columns
119
- end
120
-
121
- def tabular_input_header_present
122
- if tabular_input_header.present? ||
123
- !tabular_input.header? ||
124
- (tabular_input_mode == :hash || tabular_input_mode == :record)
125
- return
126
- end
127
-
128
- errors.add(:tabular_input_header, "is required when tabular_input_format is #{tabular_input_format.inspect}")
129
- end
130
- end
131
- end
132
- end
133
- end
@@ -1,67 +0,0 @@
1
- require "active_support/concern"
2
-
3
- module RocketJob
4
- module Batch
5
- class Tabular
6
- # For the simple case where all `output_categories` have the same format,
7
- # If multiple output categories are used with different formats, then use IOStreams::Tabular directly
8
- # instead of this plugin.
9
- module Output
10
- extend ActiveSupport::Concern
11
-
12
- included do
13
- warn "#{name} is using RocketJob::Batch::Tabular::Output which is deprecated"
14
-
15
- field :tabular_output_header, type: Array, class_attribute: true, user_editable: true, copy_on_restart: true
16
- field :tabular_output_format, type: Mongoid::StringifiedSymbol, default: :csv, class_attribute: true, user_editable: true, copy_on_restart: true
17
- field :tabular_output_options, type: Hash, class_attribute: true
18
-
19
- validates_inclusion_of :tabular_output_format, in: IOStreams::Tabular.registered_formats
20
-
21
- after_perform :tabular_output_render
22
- end
23
-
24
- # Clear out cached tabular_output any time header or format is changed.
25
- def tabular_output_header=(tabular_output_header)
26
- super(tabular_output_header)
27
- @tabular_output = nil
28
- end
29
-
30
- def tabular_output_format=(tabular_output_format)
31
- super(tabular_output_format)
32
- @tabular_output = nil
33
- end
34
-
35
- # Overrides: `RocketJob::Batch::IO#download` to add the `tabular_output_header`.
36
- def download(file_name_or_io = nil, category: :main, **args, &block)
37
- unless tabular_output.requires_header?(category)
38
- return super(file_name_or_io, category: category, **args, &block)
39
- end
40
-
41
- header = tabular_output.render_header(category)
42
- super(file_name_or_io, header_line: header, category: category, **args, &block)
43
- end
44
-
45
- private
46
-
47
- # Delimited instance used for this slice, by a single worker (thread)
48
- def tabular_output
49
- @tabular_output ||= Tabular.new(
50
- main: IOStreams::Tabular.new(
51
- columns: tabular_output_header,
52
- format: tabular_output_format,
53
- format_options: tabular_output_options&.deep_symbolize_keys
54
- )
55
- )
56
- end
57
-
58
- # Render the output from the perform.
59
- def tabular_output_render
60
- return unless output_categories.present?
61
-
62
- @rocket_job_output = tabular_output.render(@rocket_job_output)
63
- end
64
- end
65
- end
66
- end
67
- end
@@ -1,58 +0,0 @@
1
- module RocketJob
2
- module Batch
3
- # Format output results.
4
- #
5
- # Takes Batch::Results, Batch::Result, Hash, Array, or String and renders it for output.
6
- #
7
- # Example:
8
- #
9
- # tabular = Tabular.new(
10
- # main: IOStreams::Tabular.new(columns: main_file_headers, format: tabular_output_format),
11
- # exceptions: IOStreams::Tabular.new(columns: exception_file_headers, format: tabular_output_format)
12
- # )
13
- #
14
- # tabular.render(row)
15
- #
16
- # @deprecated
17
- class Tabular
18
- autoload :Input, "rocket_job/batch/tabular/input"
19
- autoload :Output, "rocket_job/batch/tabular/output"
20
-
21
- def initialize(map)
22
- @map = map
23
- end
24
-
25
- def [](category = :main)
26
- @map[category] || raise("No tabular map defined for category: #{category.inspect}")
27
- end
28
-
29
- # Iterate over responses and format using Tabular
30
- def render(row, category = :main)
31
- if row.is_a?(Batch::Results)
32
- results = Batch::Results.new
33
- row.each { |result| results << render(result) }
34
- results
35
- elsif row.is_a?(Batch::Result)
36
- row.value = self[row.category].render(row.value)
37
- row
38
- elsif row.blank?
39
- nil
40
- else
41
- self[category].render(row)
42
- end
43
- end
44
-
45
- def render_header(category = :main)
46
- self[category].render_header
47
- end
48
-
49
- def requires_header?(category = :main)
50
- self[category].requires_header?
51
- end
52
-
53
- def header?(category = :main)
54
- self[category].header?
55
- end
56
- end
57
- end
58
- end