jrf 0.1.12 → 0.1.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/jrf.gemspec +2 -0
- data/lib/jrf/cli/runner.rb +336 -34
- data/lib/jrf/cli.rb +17 -27
- data/lib/jrf/version.rb +1 -1
- data/test/cli_parallel_test.rb +195 -0
- data/test/cli_runner_test.rb +951 -0
- data/test/library_api_test.rb +126 -0
- data/test/readme_examples_test.rb +16 -0
- data/test/test_helper.rb +118 -0
- metadata +34 -2
- data/test/jrf_test.rb +0 -1103
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 78c1f6eb54e20d4dffbfe57f89a49d9e8ec9bbb2a9e118d911f2dec3c649f4ac
|
|
4
|
+
data.tar.gz: 63f43701422cfe200b7932a2177132f5e4e74e690960e71b88d6cc7b767e0b3c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 152ebdc2322f9a8b6c0cad2cb303a093a45d5e0ecc17b519904e40e069a747b56e33f1ddd33f7f3efb32031d78808d05e32d93ab151572b973a1324f9e676e0b
|
|
7
|
+
data.tar.gz: 63c189a79b484777c25f5c1a7951d930fc2d110f3547216b2fd099469e57e7a062c0ec64ba2c7b0c3d7e88a6fb5f1f40d3b5ba6d1a0803acfc5253b00f43dfe8
|
data/jrf.gemspec
CHANGED
|
@@ -16,6 +16,8 @@ Gem::Specification.new do |spec|
|
|
|
16
16
|
spec.bindir = "exe"
|
|
17
17
|
spec.executables = ["jrf"]
|
|
18
18
|
spec.add_dependency "oj", ">= 3.16"
|
|
19
|
+
spec.add_development_dependency "minitest", ">= 5.0"
|
|
20
|
+
spec.add_development_dependency "rake", ">= 13.0"
|
|
19
21
|
|
|
20
22
|
spec.files = Dir.glob("{exe,lib,test}/*") + Dir.glob("lib/**/*") + %w[DESIGN.txt jrf.gemspec Gemfile Rakefile].select { |path| File.file?(path) }
|
|
21
23
|
end
|
data/lib/jrf/cli/runner.rb
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "json"
|
|
4
|
+
require "zlib"
|
|
4
5
|
require_relative "../pipeline"
|
|
5
6
|
require_relative "../pipeline_parser"
|
|
6
7
|
|
|
@@ -9,6 +10,7 @@ module Jrf
|
|
|
9
10
|
class Runner
|
|
10
11
|
RS_CHAR = "\x1e"
|
|
11
12
|
DEFAULT_OUTPUT_BUFFER_LIMIT = 4096
|
|
13
|
+
PARALLEL_FRAME_HEADER_BYTES = 4
|
|
12
14
|
|
|
13
15
|
class RsNormalizer
|
|
14
16
|
def initialize(input)
|
|
@@ -28,56 +30,293 @@ module Jrf
|
|
|
28
30
|
end
|
|
29
31
|
end
|
|
30
32
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
+
class ParallelFrameReader
|
|
34
|
+
def initialize
|
|
35
|
+
@buf = +""
|
|
36
|
+
@offset = 0
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def append(chunk)
|
|
40
|
+
@buf << chunk
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def each_payload
|
|
44
|
+
while (payload = next_payload)
|
|
45
|
+
yield payload
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def has_partial?
|
|
50
|
+
@offset != @buf.bytesize
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
private
|
|
54
|
+
|
|
55
|
+
def next_payload
|
|
56
|
+
if @buf.bytesize - @offset < PARALLEL_FRAME_HEADER_BYTES
|
|
57
|
+
compact!
|
|
58
|
+
return nil
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
payload_len = @buf.byteslice(@offset, PARALLEL_FRAME_HEADER_BYTES).unpack1("N")
|
|
62
|
+
frame_len = PARALLEL_FRAME_HEADER_BYTES + payload_len
|
|
63
|
+
if @buf.bytesize - @offset < frame_len
|
|
64
|
+
compact!
|
|
65
|
+
return nil
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
payload = @buf.byteslice(@offset + PARALLEL_FRAME_HEADER_BYTES, payload_len)
|
|
69
|
+
@offset += frame_len
|
|
70
|
+
payload
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def compact!
|
|
74
|
+
if @offset > 0
|
|
75
|
+
@buf = @buf.byteslice(@offset..) || +""
|
|
76
|
+
@offset = 0
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def initialize(input: $stdin, out: $stdout, err: $stderr, lax: false, output_format: :json, atomic_write_bytes: DEFAULT_OUTPUT_BUFFER_LIMIT)
|
|
82
|
+
if input.is_a?(Array)
|
|
83
|
+
@file_paths = input
|
|
84
|
+
@stdin = nil
|
|
85
|
+
else
|
|
86
|
+
@file_paths = []
|
|
87
|
+
@stdin = input
|
|
88
|
+
end
|
|
33
89
|
@out = out
|
|
34
90
|
@err = err
|
|
35
91
|
@lax = lax
|
|
36
|
-
@
|
|
92
|
+
@output_format = output_format
|
|
37
93
|
@atomic_write_bytes = atomic_write_bytes
|
|
38
94
|
@output_buffer = +""
|
|
95
|
+
@input_errors = false
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def input_errors?
|
|
99
|
+
@input_errors
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def run(expression, parallel: 1, verbose: false)
|
|
103
|
+
blocks = build_stage_blocks(expression, verbose: verbose)
|
|
104
|
+
if @output_format == :tsv
|
|
105
|
+
values = []
|
|
106
|
+
process_values(blocks, parallel: parallel, verbose: verbose) do |value|
|
|
107
|
+
values << value
|
|
108
|
+
end
|
|
109
|
+
emit_tsv(values)
|
|
110
|
+
else
|
|
111
|
+
process_values(blocks, parallel: parallel, verbose: verbose) do |value|
|
|
112
|
+
emit_output(value)
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
ensure
|
|
116
|
+
write_output(@output_buffer)
|
|
39
117
|
end
|
|
40
118
|
|
|
41
|
-
|
|
119
|
+
private
|
|
120
|
+
|
|
121
|
+
def build_stage_blocks(expression, verbose:)
|
|
42
122
|
parsed = PipelineParser.new(expression).parse
|
|
43
123
|
stages = parsed[:stages]
|
|
44
124
|
dump_stages(stages) if verbose
|
|
45
|
-
|
|
46
|
-
blocks = stages.map { |stage|
|
|
125
|
+
stages.map { |stage|
|
|
47
126
|
eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
|
|
48
127
|
}
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def apply_pipeline(blocks, input_enum)
|
|
49
131
|
pipeline = Pipeline.new(*blocks)
|
|
132
|
+
Enumerator.new do |y|
|
|
133
|
+
pipeline.call(input_enum) { |value| y << value }
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def each_input_enum
|
|
138
|
+
Enumerator.new { |y| each_input_value { |v| y << v } }
|
|
139
|
+
end
|
|
50
140
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
141
|
+
def process_values(blocks, parallel:, verbose:, &block)
|
|
142
|
+
if parallel <= 1 || @file_paths.length <= 1
|
|
143
|
+
dump_parallel_status("disabled", verbose: verbose)
|
|
144
|
+
return apply_pipeline(blocks, each_input_enum).each(&block)
|
|
54
145
|
end
|
|
55
|
-
|
|
56
|
-
|
|
146
|
+
|
|
147
|
+
# Parallelize the longest map-only prefix; reducers stay in the parent.
|
|
148
|
+
split_index = classify_parallel_stages(blocks)
|
|
149
|
+
if split_index.nil? || split_index == 0
|
|
150
|
+
dump_parallel_status("disabled", verbose: verbose)
|
|
151
|
+
return apply_pipeline(blocks, each_input_enum).each(&block)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
map_blocks = blocks[0...split_index]
|
|
155
|
+
reduce_blocks = blocks[split_index..]
|
|
156
|
+
dump_parallel_status("enabled workers=#{parallel} files=#{@file_paths.length} split=#{split_index}/#{blocks.length}", verbose: verbose)
|
|
157
|
+
input_enum = parallel_map_enum(map_blocks, parallel)
|
|
158
|
+
(reduce_blocks.empty? ? input_enum : apply_pipeline(reduce_blocks, input_enum)).each(&block)
|
|
57
159
|
end
|
|
58
160
|
|
|
59
|
-
|
|
161
|
+
def dump_parallel_status(status, verbose:)
|
|
162
|
+
@err.puts "parallel: #{status}" if verbose
|
|
163
|
+
end
|
|
60
164
|
|
|
61
|
-
def
|
|
62
|
-
|
|
165
|
+
def classify_parallel_stages(blocks)
|
|
166
|
+
# Read the first row from the first file to probe stage modes
|
|
167
|
+
first_value = nil
|
|
168
|
+
open_file(@file_paths.first) do |stream|
|
|
169
|
+
each_stream_value(stream) do |value|
|
|
170
|
+
first_value = value
|
|
171
|
+
break
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
return nil if first_value.nil?
|
|
63
175
|
|
|
64
|
-
|
|
176
|
+
# Run the value through each stage independently to classify
|
|
177
|
+
split_index = nil
|
|
178
|
+
blocks.each_with_index do |block, i|
|
|
179
|
+
probe_pipeline = Pipeline.new(block)
|
|
180
|
+
probe_pipeline.call([first_value]) { |_| }
|
|
181
|
+
stage = probe_pipeline.instance_variable_get(:@stages).first
|
|
182
|
+
if stage.instance_variable_get(:@mode) == :reducer
|
|
183
|
+
split_index = i
|
|
184
|
+
break
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
split_index || blocks.length
|
|
65
189
|
end
|
|
66
190
|
|
|
67
|
-
def
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
191
|
+
def spawn_parallel_worker(blocks, path)
|
|
192
|
+
read_io, write_io = IO.pipe
|
|
193
|
+
pid = fork do
|
|
194
|
+
read_io.close
|
|
195
|
+
@out = write_io
|
|
196
|
+
@output_buffer = +""
|
|
197
|
+
pipeline = Pipeline.new(*blocks)
|
|
198
|
+
input_enum = Enumerator.new do |y|
|
|
199
|
+
open_file(path) { |stream| each_stream_value(stream) { |v| y << v } }
|
|
200
|
+
end
|
|
201
|
+
worker_failed = false
|
|
202
|
+
begin
|
|
203
|
+
pipeline.call(input_enum) { |value| emit_parallel_frame(value) }
|
|
204
|
+
rescue => e
|
|
205
|
+
@err.puts "#{path}: #{e.message} (#{e.class})"
|
|
206
|
+
worker_failed = true
|
|
207
|
+
end
|
|
208
|
+
write_output(@output_buffer)
|
|
209
|
+
write_io.close
|
|
210
|
+
exit!(worker_failed ? 1 : 0)
|
|
211
|
+
end
|
|
212
|
+
write_io.close
|
|
213
|
+
[read_io, pid]
|
|
214
|
+
end
|
|
72
215
|
|
|
73
|
-
|
|
216
|
+
def run_parallel_worker_pool(blocks, num_workers)
|
|
217
|
+
file_queue = @file_paths.dup
|
|
218
|
+
workers = {} # read_io => [reader, pid]
|
|
219
|
+
children = []
|
|
220
|
+
|
|
221
|
+
# Fill initial pool
|
|
222
|
+
while workers.size < num_workers && !file_queue.empty?
|
|
223
|
+
read_io, pid = spawn_parallel_worker(blocks, file_queue.shift)
|
|
224
|
+
workers[read_io] = [ParallelFrameReader.new, pid]
|
|
225
|
+
children << pid
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
read_ios = workers.keys.dup
|
|
229
|
+
|
|
230
|
+
until read_ios.empty?
|
|
231
|
+
ready = IO.select(read_ios)
|
|
232
|
+
ready[0].each do |io|
|
|
233
|
+
reader = workers[io][0]
|
|
234
|
+
chunk = io.read_nonblock(65536, exception: false)
|
|
235
|
+
if chunk == :wait_readable
|
|
236
|
+
next
|
|
237
|
+
elsif chunk.nil?
|
|
238
|
+
raise IOError, "truncated parallel frame from worker" if reader.has_partial?
|
|
239
|
+
read_ios.delete(io)
|
|
240
|
+
io.close
|
|
241
|
+
workers.delete(io)
|
|
242
|
+
|
|
243
|
+
# Spawn next worker if files remain
|
|
244
|
+
unless file_queue.empty?
|
|
245
|
+
read_io, pid = spawn_parallel_worker(blocks, file_queue.shift)
|
|
246
|
+
workers[read_io] = [ParallelFrameReader.new, pid]
|
|
247
|
+
children << pid
|
|
248
|
+
read_ios << read_io
|
|
249
|
+
end
|
|
250
|
+
else
|
|
251
|
+
reader.append(chunk)
|
|
252
|
+
reader.each_payload do |payload|
|
|
253
|
+
yield JSON.parse(payload)
|
|
254
|
+
end
|
|
255
|
+
end
|
|
74
256
|
end
|
|
75
257
|
end
|
|
258
|
+
|
|
259
|
+
children
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def parallel_map_enum(map_blocks, num_workers)
|
|
263
|
+
children = nil
|
|
264
|
+
Enumerator.new do |y|
|
|
265
|
+
children = run_parallel_worker_pool(map_blocks, num_workers) { |value| y << value }
|
|
266
|
+
ensure
|
|
267
|
+
wait_for_parallel_children(children) if children
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def wait_for_parallel_children(children)
|
|
272
|
+
failed = false
|
|
273
|
+
children.each do |pid|
|
|
274
|
+
_, status = Process.waitpid2(pid)
|
|
275
|
+
failed = true unless status.success?
|
|
276
|
+
end
|
|
277
|
+
exit(1) if failed
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def emit_parallel_frame(value)
|
|
281
|
+
payload = JSON.generate(value)
|
|
282
|
+
buffer_output([payload.bytesize].pack("N") << payload)
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def each_input_value
|
|
286
|
+
each_input do |source|
|
|
287
|
+
each_stream_value(source) { |value| yield value }
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
def each_stream_value(stream)
|
|
292
|
+
return each_stream_value_lax(stream) { |value| yield value } if @lax
|
|
293
|
+
|
|
294
|
+
stream.each_line do |raw_line|
|
|
295
|
+
line = raw_line.strip
|
|
296
|
+
next if line.empty?
|
|
297
|
+
yield JSON.parse(line)
|
|
298
|
+
end
|
|
76
299
|
end
|
|
77
300
|
|
|
78
|
-
def
|
|
301
|
+
def open_file(path)
|
|
302
|
+
if path.end_with?(".gz")
|
|
303
|
+
Zlib::GzipReader.open(path) { |source| yield source }
|
|
304
|
+
else
|
|
305
|
+
File.open(path, "rb") { |source| yield source }
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
def each_stream_value_lax(stream)
|
|
79
310
|
require "oj"
|
|
80
|
-
|
|
311
|
+
Oj.sc_parse(streaming_json_handler_class.new { |value| yield value }, RsNormalizer.new(stream))
|
|
312
|
+
rescue LoadError
|
|
313
|
+
raise "oj is required for --lax mode (gem install oj)"
|
|
314
|
+
rescue Oj::ParseError => e
|
|
315
|
+
raise JSON::ParserError, e.message
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
def streaming_json_handler_class
|
|
319
|
+
@streaming_json_handler_class ||= Class.new(Oj::ScHandler) do
|
|
81
320
|
def initialize(&emit)
|
|
82
321
|
@emit = emit
|
|
83
322
|
end
|
|
@@ -89,13 +328,6 @@ module Jrf
|
|
|
89
328
|
def array_append(array, value) = array << value
|
|
90
329
|
def add_value(value) = @emit.call(value)
|
|
91
330
|
end
|
|
92
|
-
each_input do |source|
|
|
93
|
-
Oj.sc_parse(handler.new { |value| yield value }, RsNormalizer.new(source))
|
|
94
|
-
end
|
|
95
|
-
rescue LoadError
|
|
96
|
-
raise "oj is required for --lax mode (gem install oj)"
|
|
97
|
-
rescue Oj::ParseError => e
|
|
98
|
-
raise JSON::ParserError, e.message
|
|
99
331
|
end
|
|
100
332
|
|
|
101
333
|
def dump_stages(stages)
|
|
@@ -104,12 +336,76 @@ module Jrf
|
|
|
104
336
|
end
|
|
105
337
|
end
|
|
106
338
|
|
|
107
|
-
def each_input
|
|
108
|
-
@
|
|
339
|
+
def each_input(&block)
|
|
340
|
+
if @file_paths.empty?
|
|
341
|
+
with_error_handling("<stdin>") { block.call(@stdin) }
|
|
342
|
+
else
|
|
343
|
+
@file_paths.each do |path|
|
|
344
|
+
if path == "-"
|
|
345
|
+
with_error_handling("<stdin>") { block.call(@stdin) }
|
|
346
|
+
else
|
|
347
|
+
with_error_handling(path) { open_file(path, &block) }
|
|
348
|
+
end
|
|
349
|
+
end
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
def with_error_handling(name)
|
|
354
|
+
yield
|
|
355
|
+
rescue IOError, SystemCallError, Zlib::GzipFile::Error, JSON::ParserError => e
|
|
356
|
+
@err.puts "#{name}: #{e.message} (#{e.class})"
|
|
357
|
+
@input_errors = true
|
|
109
358
|
end
|
|
110
359
|
|
|
111
360
|
def emit_output(value)
|
|
112
|
-
record = (@pretty ? JSON.pretty_generate(value) : JSON.generate(value)) << "\n"
|
|
361
|
+
record = (@output_format == :pretty ? JSON.pretty_generate(value) : JSON.generate(value)) << "\n"
|
|
362
|
+
buffer_output(record)
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
def emit_tsv(values)
|
|
366
|
+
rows = values.flat_map { |value| value_to_rows(value) }
|
|
367
|
+
rows.each do |row|
|
|
368
|
+
buffer_output(row.join("\t") << "\n")
|
|
369
|
+
end
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
def value_to_rows(value)
|
|
373
|
+
case value
|
|
374
|
+
when Hash
|
|
375
|
+
value.map { |k, v|
|
|
376
|
+
case v
|
|
377
|
+
when Array
|
|
378
|
+
[format_cell(k)] + v.map { |e| format_cell(e) }
|
|
379
|
+
else
|
|
380
|
+
[format_cell(k), format_cell(v)]
|
|
381
|
+
end
|
|
382
|
+
}
|
|
383
|
+
when Array
|
|
384
|
+
value.map { |row|
|
|
385
|
+
case row
|
|
386
|
+
when Array
|
|
387
|
+
row.map { |e| format_cell(e) }
|
|
388
|
+
else
|
|
389
|
+
[format_cell(row)]
|
|
390
|
+
end
|
|
391
|
+
}
|
|
392
|
+
else
|
|
393
|
+
[[format_cell(value)]]
|
|
394
|
+
end
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
def format_cell(value)
|
|
398
|
+
case value
|
|
399
|
+
when nil
|
|
400
|
+
"null"
|
|
401
|
+
when Numeric, String, true, false
|
|
402
|
+
value.to_s
|
|
403
|
+
else
|
|
404
|
+
JSON.generate(value)
|
|
405
|
+
end
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
def buffer_output(record)
|
|
113
409
|
if @output_buffer.bytesize + record.bytesize <= @atomic_write_bytes
|
|
114
410
|
@output_buffer << record
|
|
115
411
|
else
|
|
@@ -119,7 +415,13 @@ module Jrf
|
|
|
119
415
|
end
|
|
120
416
|
|
|
121
417
|
def write_output(str)
|
|
122
|
-
|
|
418
|
+
return if str.empty?
|
|
419
|
+
|
|
420
|
+
total = 0
|
|
421
|
+
while total < str.bytesize
|
|
422
|
+
written = @out.syswrite(str.byteslice(total..))
|
|
423
|
+
total += written
|
|
424
|
+
end
|
|
123
425
|
end
|
|
124
426
|
end
|
|
125
427
|
end
|
data/lib/jrf/cli.rb
CHANGED
|
@@ -16,7 +16,9 @@ module Jrf
|
|
|
16
16
|
Options:
|
|
17
17
|
-v, --verbose print parsed stage expressions
|
|
18
18
|
--lax allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
|
|
19
|
-
-
|
|
19
|
+
-o, --output FORMAT
|
|
20
|
+
output format: json (default), pretty, tsv
|
|
21
|
+
-P N opportunistically parallelize the map-prefix across N workers
|
|
20
22
|
-r, --require LIBRARY
|
|
21
23
|
require LIBRARY before evaluating stages
|
|
22
24
|
--no-jit do not enable YJIT, even when supported by the Ruby runtime
|
|
@@ -43,7 +45,8 @@ module Jrf
|
|
|
43
45
|
def self.run(argv = ARGV, input: ARGF, out: $stdout, err: $stderr)
|
|
44
46
|
verbose = false
|
|
45
47
|
lax = false
|
|
46
|
-
|
|
48
|
+
output_format = :json
|
|
49
|
+
parallel = 1
|
|
47
50
|
jit = true
|
|
48
51
|
required_libraries = []
|
|
49
52
|
atomic_write_bytes = Runner::DEFAULT_OUTPUT_BUFFER_LIMIT
|
|
@@ -52,7 +55,8 @@ module Jrf
|
|
|
52
55
|
opts.banner = USAGE
|
|
53
56
|
opts.on("-v", "--verbose", "print parsed stage expressions") { verbose = true }
|
|
54
57
|
opts.on("--lax", "allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)") { lax = true }
|
|
55
|
-
opts.on("-
|
|
58
|
+
opts.on("-o", "--output FORMAT", %w[json pretty tsv], "output format: json, pretty, tsv") { |fmt| output_format = fmt.to_sym }
|
|
59
|
+
opts.on("-P N", Integer, "opportunistically parallelize the map-prefix across N workers") { |n| parallel = n }
|
|
56
60
|
opts.on("-r", "--require LIBRARY", "require LIBRARY before evaluating stages") { |library| required_libraries << library }
|
|
57
61
|
opts.on("--no-jit", "do not enable YJIT, even when supported by the Ruby runtime") { jit = false }
|
|
58
62
|
opts.on("--atomic-write-bytes N", Integer, "group short outputs into atomic writes of up to N bytes") do |value|
|
|
@@ -88,34 +92,20 @@ module Jrf
|
|
|
88
92
|
enable_yjit if jit
|
|
89
93
|
required_libraries.each { |library| require library }
|
|
90
94
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
argv.each do |path|
|
|
96
|
-
if path == "-"
|
|
97
|
-
y << input
|
|
98
|
-
elsif path.end_with?(".gz")
|
|
99
|
-
require "zlib"
|
|
100
|
-
Zlib::GzipReader.open(path) do |source|
|
|
101
|
-
y << source
|
|
102
|
-
end
|
|
103
|
-
else
|
|
104
|
-
File.open(path, "rb") do |source|
|
|
105
|
-
y << source
|
|
106
|
-
end
|
|
107
|
-
end
|
|
108
|
-
end
|
|
109
|
-
end
|
|
110
|
-
end
|
|
111
|
-
Runner.new(
|
|
112
|
-
inputs: inputs,
|
|
95
|
+
file_paths = argv.dup
|
|
96
|
+
|
|
97
|
+
runner = Runner.new(
|
|
98
|
+
input: file_paths.empty? ? input : file_paths,
|
|
113
99
|
out: out,
|
|
114
100
|
err: err,
|
|
115
101
|
lax: lax,
|
|
116
|
-
|
|
102
|
+
output_format: output_format,
|
|
117
103
|
atomic_write_bytes: atomic_write_bytes
|
|
118
|
-
)
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
runner.run(expression, parallel: parallel, verbose: verbose)
|
|
107
|
+
|
|
108
|
+
exit 1 if runner.input_errors?
|
|
119
109
|
end
|
|
120
110
|
|
|
121
111
|
def self.enable_yjit
|
data/lib/jrf/version.rb
CHANGED