jrf 0.1.14 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 78c1f6eb54e20d4dffbfe57f89a49d9e8ec9bbb2a9e118d911f2dec3c649f4ac
4
- data.tar.gz: 63f43701422cfe200b7932a2177132f5e4e74e690960e71b88d6cc7b767e0b3c
3
+ metadata.gz: de85d7a03d58baee4c931d10869a824a1ff5c2eec121cd15e63ec23805203676
4
+ data.tar.gz: ce3c53475e13d41e3a176ef7c9ea840145fbbf826612457386cc0899c28a1af0
5
5
  SHA512:
6
- metadata.gz: 152ebdc2322f9a8b6c0cad2cb303a093a45d5e0ecc17b519904e40e069a747b56e33f1ddd33f7f3efb32031d78808d05e32d93ab151572b973a1324f9e676e0b
7
- data.tar.gz: 63c189a79b484777c25f5c1a7951d930fc2d110f3547216b2fd099469e57e7a062c0ec64ba2c7b0c3d7e88a6fb5f1f40d3b5ba6d1a0803acfc5253b00f43dfe8
6
+ metadata.gz: ded54cff09febe7fe02c585f30a702cd82cd11aeb563f840b9f182cd8a6e94c090ba5e71fbd6cad7f377816c14de41c12f5fed449b2b8f7c1d682513db2f19ee
7
+ data.tar.gz: bdd4f9ee2ff809cc718b497a39783027f9c7322582dfdaebc8cae8b6bf4cb1d56b9639a356f2239be66b6ee39693c1da994b4fa9b9db5ecfb6c10d66ede021d9
@@ -21,7 +21,7 @@ module Jrf
21
21
  chunk = @input.read(length)
22
22
  return nil if chunk.nil?
23
23
 
24
- chunk = chunk.tr(RS_CHAR, "\n")
24
+ chunk.tr!(RS_CHAR, "\n")
25
25
  if outbuf
26
26
  outbuf.replace(chunk)
27
27
  else
@@ -72,7 +72,7 @@ module Jrf
72
72
 
73
73
  def compact!
74
74
  if @offset > 0
75
- @buf = @buf.byteslice(@offset..) || +""
75
+ @buf.slice!(0, @offset)
76
76
  @offset = 0
77
77
  end
78
78
  end
@@ -140,13 +140,30 @@ module Jrf
140
140
 
141
141
  def process_values(blocks, parallel:, verbose:, &block)
142
142
  if parallel <= 1 || @file_paths.length <= 1
143
+ # Single file or no parallelism requested — serial is the only option.
144
+ # This also covers the all-files-empty case: no files means no workers to spawn.
143
145
  dump_parallel_status("disabled", verbose: verbose)
144
146
  return apply_pipeline(blocks, each_input_enum).each(&block)
145
147
  end
146
148
 
147
- # Parallelize the longest map-only prefix; reducers stay in the parent.
148
- split_index = classify_parallel_stages(blocks)
149
- if split_index.nil? || split_index == 0
149
+ split_index, probe_stage = classify_parallel_stages(blocks)
150
+ if split_index.nil?
151
+ dump_parallel_status("disabled", verbose: verbose)
152
+ return apply_pipeline(blocks, each_input_enum).each(&block)
153
+ end
154
+
155
+ # If the first reducer stage is decomposable, workers run everything up to
156
+ # and including it (map prefix + reducer), emit partial accumulators, and the
157
+ # parent merges. This covers both pure reducers (split_index == 0, e.g. `sum(_)`)
158
+ # and map-then-reduce (split_index > 0, e.g. `select(...) >> sum(...)`).
159
+ if probe_stage&.decomposable?
160
+ worker_blocks = blocks[0..split_index]
161
+ rest_blocks = blocks[(split_index + 1)..]
162
+ return process_decomposable_parallel(worker_blocks, rest_blocks, probe_stage,
163
+ parallel: parallel, verbose: verbose, &block)
164
+ end
165
+
166
+ if split_index == 0
150
167
  dump_parallel_status("disabled", verbose: verbose)
151
168
  return apply_pipeline(blocks, each_input_enum).each(&block)
152
169
  end
@@ -162,6 +179,9 @@ module Jrf
162
179
  @err.puts "parallel: #{status}" if verbose
163
180
  end
164
181
 
182
+ # Returns [split_index, probe_stage] where split_index is the index of the
183
+ # first reducer stage (or blocks.length if all are passthrough), and probe_stage
184
+ # is the Stage object of that first reducer (nil if all passthrough or no input).
165
185
  def classify_parallel_stages(blocks)
166
186
  # Read the first row from the first file to probe stage modes
167
187
  first_value = nil
@@ -171,24 +191,63 @@ module Jrf
171
191
  break
172
192
  end
173
193
  end
174
- return nil if first_value.nil?
194
+ return [nil, nil] if first_value.nil?
175
195
 
176
196
  # Run the value through each stage independently to classify
177
197
  split_index = nil
198
+ probe_stage = nil
178
199
  blocks.each_with_index do |block, i|
179
200
  probe_pipeline = Pipeline.new(block)
180
201
  probe_pipeline.call([first_value]) { |_| }
181
202
  stage = probe_pipeline.instance_variable_get(:@stages).first
182
203
  if stage.instance_variable_get(:@mode) == :reducer
183
204
  split_index = i
205
+ probe_stage = stage
184
206
  break
185
207
  end
186
208
  end
187
209
 
188
- split_index || blocks.length
210
+ [split_index || blocks.length, probe_stage]
211
+ end
212
+
213
+ def process_decomposable_parallel(worker_blocks, rest_blocks, probe_stage, parallel:, verbose:, &block)
214
+ dump_parallel_status("enabled workers=#{parallel} files=#{@file_paths.length} decompose=#{worker_blocks.length}/#{worker_blocks.length + rest_blocks.length}", verbose: verbose)
215
+
216
+ # Workers run map prefix + reducer stage per file and emit partial accumulators.
217
+ partials_list = []
218
+ reducer_stage_index = worker_blocks.length - 1
219
+ spawner = ->(path) do
220
+ spawn_worker(worker_blocks, path) do |pipeline, input|
221
+ pipeline.call(input) { |_| }
222
+ # If the file was empty, the stage was never initialized (no reducers),
223
+ # so skip emitting — the parent will simply not receive a partial for this worker.
224
+ stage = pipeline.instance_variable_get(:@stages)[reducer_stage_index]
225
+ partials = stage.partial_accumulators
226
+ emit_parallel_frame(partials) unless partials.empty?
227
+ end
228
+ end
229
+ children = run_parallel_worker_pool(parallel, spawner) { |v| partials_list << v }
230
+ wait_for_parallel_children(children) if children
231
+ return if partials_list.empty?
232
+
233
+ # Reuse the probe stage (already initialized with reducer structure from classify).
234
+ # Replace its accumulators with the first worker's partials, then merge the rest.
235
+ probe_stage.replace_accumulators!(partials_list.first)
236
+ partials_list.drop(1).each { |partials| probe_stage.merge_partials!(partials) }
237
+
238
+ # Finish the reducer stage and pass results through any remaining stages.
239
+ results = probe_stage.finish
240
+ if rest_blocks.empty?
241
+ results.each(&block)
242
+ else
243
+ apply_pipeline(rest_blocks, results.each).each(&block)
244
+ end
189
245
  end
190
246
 
191
- def spawn_parallel_worker(blocks, path)
247
+ # Forks a worker process that reads `path`, builds a pipeline from `blocks`,
248
+ # and yields [pipeline, input_enum] to the caller's block for custom behavior.
249
+ # Returns [read_io, pid].
250
+ def spawn_worker(blocks, path)
192
251
  read_io, write_io = IO.pipe
193
252
  pid = fork do
194
253
  read_io.close
@@ -200,7 +259,7 @@ module Jrf
200
259
  end
201
260
  worker_failed = false
202
261
  begin
203
- pipeline.call(input_enum) { |value| emit_parallel_frame(value) }
262
+ yield pipeline, input_enum
204
263
  rescue => e
205
264
  @err.puts "#{path}: #{e.message} (#{e.class})"
206
265
  worker_failed = true
@@ -213,14 +272,17 @@ module Jrf
213
272
  [read_io, pid]
214
273
  end
215
274
 
216
- def run_parallel_worker_pool(blocks, num_workers)
275
+ # Runs a pool of up to `num_workers` concurrent workers across all input files.
276
+ # `spawner` is called with a file path and must return [read_io, pid].
277
+ # Yields each decoded JSON value from worker output frames.
278
+ def run_parallel_worker_pool(num_workers, spawner)
217
279
  file_queue = @file_paths.dup
218
280
  workers = {} # read_io => [reader, pid]
219
281
  children = []
220
282
 
221
283
  # Fill initial pool
222
284
  while workers.size < num_workers && !file_queue.empty?
223
- read_io, pid = spawn_parallel_worker(blocks, file_queue.shift)
285
+ read_io, pid = spawner.call(file_queue.shift)
224
286
  workers[read_io] = [ParallelFrameReader.new, pid]
225
287
  children << pid
226
288
  end
@@ -242,7 +304,7 @@ module Jrf
242
304
 
243
305
  # Spawn next worker if files remain
244
306
  unless file_queue.empty?
245
- read_io, pid = spawn_parallel_worker(blocks, file_queue.shift)
307
+ read_io, pid = spawner.call(file_queue.shift)
246
308
  workers[read_io] = [ParallelFrameReader.new, pid]
247
309
  children << pid
248
310
  read_ios << read_io
@@ -261,8 +323,13 @@ module Jrf
261
323
 
262
324
  def parallel_map_enum(map_blocks, num_workers)
263
325
  children = nil
326
+ spawner = ->(path) do
327
+ spawn_worker(map_blocks, path) do |pipeline, input|
328
+ pipeline.call(input) { |value| emit_parallel_frame(value) }
329
+ end
330
+ end
264
331
  Enumerator.new do |y|
265
- children = run_parallel_worker_pool(map_blocks, num_workers) { |value| y << value }
332
+ children = run_parallel_worker_pool(num_workers, spawner) { |value| y << value }
266
333
  ensure
267
334
  wait_for_parallel_children(children) if children
268
335
  end
@@ -291,8 +358,8 @@ module Jrf
291
358
  def each_stream_value(stream)
292
359
  return each_stream_value_lax(stream) { |value| yield value } if @lax
293
360
 
294
- stream.each_line do |raw_line|
295
- line = raw_line.strip
361
+ stream.each_line do |line|
362
+ line.strip!
296
363
  next if line.empty?
297
364
  yield JSON.parse(line)
298
365
  end
data/lib/jrf/cli.rb CHANGED
@@ -18,7 +18,7 @@ module Jrf
18
18
  --lax allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
19
19
  -o, --output FORMAT
20
20
  output format: json (default), pretty, tsv
21
- -P N opportunistically parallelize the map-prefix across N workers
21
+ -P N opportunistically parallelize across N workers
22
22
  -r, --require LIBRARY
23
23
  require LIBRARY before evaluating stages
24
24
  --no-jit do not enable YJIT, even when supported by the Ruby runtime
data/lib/jrf/reducers.rb CHANGED
@@ -20,8 +20,37 @@ module Jrf
20
20
  end
21
21
  end
22
22
 
23
+ # A reducer whose partial accumulators can be merged across parallel workers.
24
+ #
25
+ # Contract:
26
+ # - `identity` is the neutral element for `merge_fn`: merge(identity, x) == x
27
+ # - `initial` is always set to `identity` (the accumulator starts from the neutral element)
28
+ # - Any bias (e.g. sum's `initial:` keyword) is applied in `finish_fn`, not in the starting accumulator
29
+ class DecomposableReduce < Reduce
30
+ attr_reader :merge_fn
31
+
32
+ def initialize(identity, merge:, finish_fn: nil, &step_fn)
33
+ super(identity, finish_fn: finish_fn, &step_fn)
34
+ @merge_fn = merge
35
+ end
36
+
37
+ # Returns the raw accumulator without applying finish_fn.
38
+ def partial
39
+ @acc
40
+ end
41
+
42
+ # Merges another partial accumulator into this one.
43
+ def merge_partial(other_acc)
44
+ @acc = @merge_fn.call(@acc, other_acc)
45
+ end
46
+ end
47
+
23
48
  def reduce(initial, finish: nil, &step_fn)
24
49
  Reduce.new(initial, finish_fn: finish, &step_fn)
25
50
  end
51
+
52
+ def decomposable_reduce(identity, merge:, finish: nil, &step_fn)
53
+ DecomposableReduce.new(identity, merge: merge, finish_fn: finish, &step_fn)
54
+ end
26
55
  end
27
56
  end
@@ -17,6 +17,7 @@ module Jrf
17
17
  spec.fetch(:value),
18
18
  initial: reducer_initial_value(spec.fetch(:initial)),
19
19
  finish: spec[:finish],
20
+ merge: spec[:merge],
20
21
  &spec.fetch(:step)
21
22
  )
22
23
  end
@@ -48,27 +49,38 @@ module Jrf
48
49
  end
49
50
 
50
51
  define_reducer(:sum) do |_ctx, value, initial: 0, block: nil|
51
- { value: value, initial: initial, step: ->(acc, v) { v.nil? ? acc : (acc + v) } }
52
+ step = ->(acc, v) { v.nil? ? acc : (acc + v) }
53
+ if initial.is_a?(Numeric)
54
+ # Numeric — decomposable. Bias applied once in finish.
55
+ finish = initial == 0 ? nil : ->(acc) { [acc + initial] }
56
+ { value: value, initial: 0, step: step, finish: finish, merge: ->(a, b) { a + b } }
57
+ else
58
+ # Non-numeric (e.g. string concat) — not decomposable.
59
+ { value: value, initial: initial, step: step }
60
+ end
52
61
  end
53
62
 
54
63
  define_reducer(:count) do |_ctx, value = MISSING, block: nil|
64
+ merge = ->(a, b) { a + b }
55
65
  if value.equal?(MISSING)
56
- { value: nil, initial: 0, step: ->(acc, _v) { acc + 1 } }
66
+ { value: nil, initial: 0, step: ->(acc, _v) { acc + 1 }, merge: merge }
57
67
  else
58
- { value: value, initial: 0, step: ->(acc, v) { v.nil? ? acc : (acc + 1) } }
68
+ { value: value, initial: 0, step: ->(acc, v) { v.nil? ? acc : (acc + 1) }, merge: merge }
59
69
  end
60
70
  end
61
71
 
62
72
  define_reducer(:count_if) do |_ctx, condition, block: nil|
63
- { value: condition, initial: 0, step: ->(acc, v) { v ? (acc + 1) : acc } }
73
+ { value: condition, initial: 0, step: ->(acc, v) { v ? (acc + 1) : acc }, merge: ->(a, b) { a + b } }
64
74
  end
65
75
 
66
76
  define_reducer(:min) do |_ctx, value, block: nil|
67
- { value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v < acc ? v : acc) } }
77
+ min_merge = ->(a, b) { a.nil? ? b : b.nil? ? a : (a < b ? a : b) }
78
+ { value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v < acc ? v : acc) }, merge: min_merge }
68
79
  end
69
80
 
70
81
  define_reducer(:max) do |_ctx, value, block: nil|
71
- { value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v > acc ? v : acc) } }
82
+ max_merge = ->(a, b) { a.nil? ? b : b.nil? ? a : (a > b ? a : b) }
83
+ { value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v > acc ? v : acc) }, merge: max_merge }
72
84
  end
73
85
 
74
86
  define_reducer(:average) do |_ctx, value, block: nil|
@@ -82,7 +94,8 @@ module Jrf
82
94
  acc[0] += v
83
95
  acc[1] += 1
84
96
  acc
85
- }
97
+ },
98
+ merge: ->(a, b) { [a[0] + b[0], a[1] + b[1]] }
86
99
  }
87
100
  end
88
101
 
@@ -136,7 +149,7 @@ module Jrf
136
149
 
137
150
  define_reducer(:group) do |ctx, value = MISSING, block: nil|
138
151
  resolved_value = value.equal?(MISSING) ? ctx.send(:current_input) : value
139
- { value: resolved_value, initial: -> { [] }, step: ->(acc, v) { acc << v } }
152
+ { value: resolved_value, initial: -> { [] }, step: ->(acc, v) { acc << v }, merge: ->(a, b) { a + b } }
140
153
  end
141
154
 
142
155
  define_reducer(:percentile) do |ctx, value, percentage, block: nil|
data/lib/jrf/stage.rb CHANGED
@@ -51,13 +51,17 @@ module Jrf
51
51
  (@mode == :reducer) ? Control::DROPPED : result
52
52
  end
53
53
 
54
- def step_reduce(value, initial:, finish: nil, step_fn: nil, &step_block)
54
+ def step_reduce(value, initial:, finish: nil, merge: nil, step_fn: nil, &step_block)
55
55
  idx = @cursor
56
56
  step_fn ||= step_block
57
57
 
58
58
  if @reducers[idx].nil?
59
59
  finish_rows = finish || ->(acc) { [acc] }
60
- @reducers[idx] = Reducers.reduce(initial, finish: finish_rows, &step_fn)
60
+ @reducers[idx] = if merge
61
+ Reducers.decomposable_reduce(initial, merge: merge, finish: finish_rows, &step_fn)
62
+ else
63
+ Reducers.reduce(initial, finish: finish_rows, &step_fn)
64
+ end
61
65
  result = ReducerToken.new(idx)
62
66
  else
63
67
  result = Control::DROPPED
@@ -167,6 +171,32 @@ module Jrf
167
171
  end
168
172
  end
169
173
 
174
+ # Returns true if all reducers in this stage are DecomposableReduce instances,
175
+ # meaning partial accumulators from parallel workers can be merged.
176
+ def decomposable?
177
+ @mode == :reducer && @reducers.any? &&
178
+ @reducers.all? { |r| r.is_a?(Reducers::DecomposableReduce) }
179
+ end
180
+
181
+ # Returns an array of raw accumulator values, one per reducer.
182
+ def partial_accumulators
183
+ @reducers.map(&:partial)
184
+ end
185
+
186
+ # Replaces all reducer accumulators with the given values.
187
+ def replace_accumulators!(partials)
188
+ @reducers.each_with_index do |reducer, i|
189
+ reducer.instance_variable_set(:@acc, partials[i])
190
+ end
191
+ end
192
+
193
+ # Merges an array of partial accumulators (from another worker) into this stage's reducers.
194
+ def merge_partials!(other_partials)
195
+ @reducers.each_with_index do |reducer, i|
196
+ reducer.merge_partial(other_partials[i])
197
+ end
198
+ end
199
+
170
200
  private
171
201
 
172
202
  def with_scoped_reducers(reducer_list)
data/lib/jrf/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Jrf
4
- VERSION = "0.1.14"
4
+ VERSION = "0.1.15"
5
5
  end
@@ -53,8 +53,9 @@ class CliParallelTest < JrfTestCase
53
53
  write_ndjson(dir, "a.ndjson", [{"x" => 10}, {"x" => 20}])
54
54
  write_ndjson(dir, "b.ndjson", [{"x" => 30}, {"x" => 40}])
55
55
 
56
- stdout, stderr, status = Open3.capture3("./exe/jrf", "-P", "2", 'select(_["x"] > 10) >> sum(_["x"])', *ndjson_files(dir))
56
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'select(_["x"] > 10) >> sum(_["x"])', *ndjson_files(dir))
57
57
  assert_success(status, stderr, "parallel split map+reduce")
58
+ assert_includes(stderr, "decompose=2/2", "select+sum decomposed")
58
59
  assert_equal(%w[90], lines(stdout), "parallel split map+reduce output")
59
60
  end
60
61
  end
@@ -72,15 +73,16 @@ class CliParallelTest < JrfTestCase
72
73
  end
73
74
  end
74
75
 
75
- def test_parallel_all_reducers_falls_back_to_serial
76
+ def test_parallel_decomposable_reducer
76
77
  Dir.mktmpdir do |dir|
77
78
  write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
78
79
  write_ndjson(dir, "b.ndjson", [{"x" => 3}])
79
80
 
80
81
  stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'sum(_["x"])', *ndjson_files(dir))
81
- assert_success(status, stderr, "all-reducer serial fallback")
82
- assert_equal(%w[6], lines(stdout), "all-reducer serial fallback output")
83
- assert_includes(stderr, "parallel: disabled", "parallel disabled summary")
82
+ assert_success(status, stderr, "parallel decomposable reducer")
83
+ assert_equal(%w[6], lines(stdout), "parallel decomposable reducer output")
84
+ assert_includes(stderr, "parallel: enabled", "parallel enabled for decomposable reducer")
85
+ assert_includes(stderr, "decompose=", "decompose mode indicated")
84
86
  end
85
87
  end
86
88
 
@@ -153,12 +155,209 @@ class CliParallelTest < JrfTestCase
153
155
  write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 20}, {"x" => 3}])
154
156
  write_ndjson(dir, "b.ndjson", [{"x" => 40}, {"x" => 5}])
155
157
 
156
- stdout, stderr, status = Open3.capture3("./exe/jrf", "-P", "2", 'select(_["x"] > 10) >> sum(_["x"])', *ndjson_files(dir))
158
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'select(_["x"] > 10) >> sum(_["x"])', *ndjson_files(dir))
157
159
  assert_success(status, stderr, "parallel select then sum")
160
+ assert_includes(stderr, "decompose=2/2", "select+sum fully decomposed in workers")
158
161
  assert_equal(%w[60], lines(stdout), "parallel select then sum output")
159
162
  end
160
163
  end
161
164
 
165
+ def test_parallel_decomposable_multi_reducer
166
+ Dir.mktmpdir do |dir|
167
+ write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
168
+ write_ndjson(dir, "b.ndjson", [{"x" => 3}, {"x" => 4}])
169
+
170
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", '{s: sum(_["x"]), n: count(), mn: min(_["x"]), mx: max(_["x"])}', *ndjson_files(dir))
171
+ assert_success(status, stderr, "parallel multi reducer")
172
+ assert_includes(stderr, "decompose=", "multi reducer decomposed")
173
+ result = JSON.parse(lines(stdout).first)
174
+ assert_equal(10, result["s"], "sum")
175
+ assert_equal(4, result["n"], "count")
176
+ assert_equal(1, result["mn"], "min")
177
+ assert_equal(4, result["mx"], "max")
178
+ end
179
+ end
180
+
181
+ def test_parallel_decomposable_average
182
+ Dir.mktmpdir do |dir|
183
+ write_ndjson(dir, "a.ndjson", [{"x" => 10}, {"x" => 20}])
184
+ write_ndjson(dir, "b.ndjson", [{"x" => 30}, {"x" => 40}])
185
+
186
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'average(_["x"])', *ndjson_files(dir))
187
+ assert_success(status, stderr, "parallel average")
188
+ assert_includes(stderr, "decompose=", "average decomposed")
189
+ assert_equal(["25.0"], lines(stdout), "parallel average output")
190
+ end
191
+ end
192
+
193
+ def test_parallel_decomposable_group
194
+ Dir.mktmpdir do |dir|
195
+ write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
196
+ write_ndjson(dir, "b.ndjson", [{"x" => 3}])
197
+
198
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'group(_["x"])', *ndjson_files(dir))
199
+ assert_success(status, stderr, "parallel group")
200
+ assert_includes(stderr, "decompose=", "group decomposed")
201
+ result = JSON.parse(lines(stdout).first)
202
+ assert_equal([1, 2, 3], result.sort, "parallel group output")
203
+ end
204
+ end
205
+
206
+ def test_parallel_decomposable_sum_with_initial
207
+ Dir.mktmpdir do |dir|
208
+ write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
209
+ write_ndjson(dir, "b.ndjson", [{"x" => 3}])
210
+
211
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'sum(_["x"], initial: 100)', *ndjson_files(dir))
212
+ assert_success(status, stderr, "sum with numeric initial")
213
+ assert_includes(stderr, "decompose=", "numeric initial decomposes")
214
+ assert_equal(%w[106], lines(stdout), "sum with initial output")
215
+ end
216
+ end
217
+
218
+ def test_parallel_sum_with_non_numeric_initial_falls_back
219
+ Dir.mktmpdir do |dir|
220
+ write_ndjson(dir, "a.ndjson", [{"x" => "a"}, {"x" => "b"}])
221
+ write_ndjson(dir, "b.ndjson", [{"x" => "c"}])
222
+
223
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'sum(_["x"], initial: "")', *ndjson_files(dir))
224
+ assert_success(status, stderr, "sum with string initial")
225
+ assert_includes(stderr, "parallel: disabled", "non-numeric initial falls back to serial")
226
+ assert_equal(['"abc"'], lines(stdout), "sum with string initial output")
227
+ end
228
+ end
229
+
230
+ def test_sum_with_string_initial
231
+ Dir.mktmpdir do |dir|
232
+ write_ndjson(dir, "a.ndjson", [{"x" => "hello "}, {"x" => "world"}])
233
+
234
+ stdout, stderr, status = Open3.capture3("./exe/jrf", 'sum(_["x"], initial: "")', *ndjson_files(dir))
235
+ assert_success(status, stderr, "sum with string initial")
236
+ assert_equal(['"hello world"'], lines(stdout), "sum with string initial output")
237
+ end
238
+ end
239
+
240
+ def test_parallel_decomposable_reducer_then_passthrough
241
+ Dir.mktmpdir do |dir|
242
+ write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
243
+ write_ndjson(dir, "b.ndjson", [{"x" => 3}, {"x" => 4}])
244
+
245
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'sum(_["x"]) >> _ * 2', *ndjson_files(dir))
246
+ assert_success(status, stderr, "parallel decomposable then passthrough")
247
+ assert_includes(stderr, "decompose=", "reducer then passthrough decomposed")
248
+ assert_equal(%w[20], lines(stdout), "parallel decomposable then passthrough output")
249
+ end
250
+ end
251
+
252
+ def test_parallel_mixed_decomposable_reducers
253
+ Dir.mktmpdir do |dir|
254
+ write_ndjson(dir, "a.ndjson", [{"x" => 10}, {"x" => 20}])
255
+ write_ndjson(dir, "b.ndjson", [{"x" => 30}, {"x" => 40}])
256
+
257
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", '[sum(_["x"]), average(_["x"]), min(_["x"]), max(_["x"]), count()]', *ndjson_files(dir))
258
+ assert_success(status, stderr, "mixed decomposable")
259
+ assert_includes(stderr, "decompose=", "mixed decomposable used decompose")
260
+ result = JSON.parse(lines(stdout).first)
261
+ assert_equal([100, 25.0, 10, 40, 4], result, "mixed decomposable output")
262
+ end
263
+ end
264
+
265
+ def test_parallel_mixed_decomposable_and_non_decomposable_falls_back
266
+ Dir.mktmpdir do |dir|
267
+ write_ndjson(dir, "a.ndjson", [{"x" => 10}, {"x" => 20}])
268
+ write_ndjson(dir, "b.ndjson", [{"x" => 30}, {"x" => 40}])
269
+
270
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", '[sum(_["x"]), percentile(_["x"], 0.5)]', *ndjson_files(dir))
271
+ assert_success(status, stderr, "mixed with non-decomposable")
272
+ assert_includes(stderr, "parallel: disabled", "mixed with non-decomposable falls back to serial")
273
+ result = JSON.parse(lines(stdout).first)
274
+ assert_equal([100, 20], result, "mixed with non-decomposable output")
275
+ end
276
+ end
277
+
278
+ def test_parallel_select_sum_passthrough_decomposes
279
+ Dir.mktmpdir do |dir|
280
+ write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 20}])
281
+ write_ndjson(dir, "b.ndjson", [{"x" => 40}])
282
+
283
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'select(_["x"] > 10) >> sum(_["x"]) >> _ * 2', *ndjson_files(dir))
284
+ assert_success(status, stderr, "select+sum+passthrough")
285
+ assert_includes(stderr, "decompose=2/3", "select+sum decomposed, passthrough in parent")
286
+ assert_equal(%w[120], lines(stdout), "select+sum+passthrough output")
287
+ end
288
+ end
289
+
290
+ def test_parallel_select_non_decomposable_uses_split
291
+ Dir.mktmpdir do |dir|
292
+ write_ndjson(dir, "a.ndjson", [{"x" => 3}, {"x" => 1}])
293
+ write_ndjson(dir, "b.ndjson", [{"x" => 2}])
294
+
295
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'select(_["x"] > 0) >> sort(_["x"]) >> _["x"]', *ndjson_files(dir))
296
+ assert_success(status, stderr, "select+sort uses split")
297
+ assert_includes(stderr, "split=1/3", "non-decomposable sort uses map-prefix split")
298
+ assert_equal([1, 2, 3], lines(stdout).map { |l| JSON.parse(l) }, "select+sort output")
299
+ end
300
+ end
301
+
302
+ def test_parallel_decomposable_with_empty_file
303
+ Dir.mktmpdir do |dir|
304
+ write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
305
+ File.write(File.join(dir, "b.ndjson"), "")
306
+
307
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", '{s: sum(_["x"]), n: count(), mn: min(_["x"])}', *ndjson_files(dir))
308
+ assert_success(status, stderr, "decomposable with empty file")
309
+ assert_includes(stderr, "decompose=", "decomposable with empty file used decompose")
310
+ result = JSON.parse(lines(stdout).first)
311
+ assert_equal(3, result["s"], "sum ignores empty file")
312
+ assert_equal(2, result["n"], "count ignores empty file")
313
+ assert_equal(1, result["mn"], "min ignores empty file")
314
+ end
315
+ end
316
+
317
+ def test_parallel_decomposable_all_files_empty
318
+ Dir.mktmpdir do |dir|
319
+ File.write(File.join(dir, "a.ndjson"), "")
320
+ File.write(File.join(dir, "b.ndjson"), "")
321
+
322
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'sum(_["x"])', *ndjson_files(dir))
323
+ assert_success(status, stderr, "all files empty")
324
+ # All files empty means first_value is nil, so classify returns nil → serial fallback
325
+ assert_includes(stderr, "parallel: disabled", "all files empty falls back to serial")
326
+ assert_equal([], lines(stdout), "no output for empty input")
327
+ end
328
+ end
329
+
330
+ def test_parallel_non_decomposable_falls_back_to_serial
331
+ Dir.mktmpdir do |dir|
332
+ write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
333
+ write_ndjson(dir, "b.ndjson", [{"x" => 3}])
334
+
335
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'sort(_["x"]) >> _["x"]', *ndjson_files(dir))
336
+ assert_success(status, stderr, "non-decomposable serial fallback")
337
+ assert_equal([1, 2, 3], lines(stdout).map { |l| JSON.parse(l) }, "sort output")
338
+ assert_includes(stderr, "parallel: disabled", "non-decomposable falls back to serial")
339
+ end
340
+ end
341
+
342
+ def test_parallel_decomposable_matches_serial
343
+ Dir.mktmpdir do |dir|
344
+ write_ndjson(dir, "a.ndjson", (1..50).map { |i| {"v" => i} })
345
+ write_ndjson(dir, "b.ndjson", (51..100).map { |i| {"v" => i} })
346
+
347
+ files = ndjson_files(dir)
348
+ expr = '{s: sum(_["v"]), n: count(), mn: min(_["v"]), mx: max(_["v"]), avg: average(_["v"])}'
349
+
350
+ serial_stdout, serial_stderr, serial_status = Open3.capture3("./exe/jrf", expr, *files)
351
+ assert_success(serial_status, serial_stderr, "serial baseline")
352
+
353
+ parallel_stdout, parallel_stderr, parallel_status = Open3.capture3("./exe/jrf", "-v", "-P", "2", expr, *files)
354
+ assert_success(parallel_status, parallel_stderr, "parallel run")
355
+ assert_includes(parallel_stderr, "decompose=", "decomposable matches serial used decompose")
356
+
357
+ assert_equal(JSON.parse(serial_stdout), JSON.parse(parallel_stdout), "parallel decomposable matches serial")
358
+ end
359
+ end
360
+
162
361
  def test_serial_error_includes_filename
163
362
  Dir.mktmpdir do |dir|
164
363
  good_path = File.join(dir, "a.ndjson")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jrf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.14
4
+ version: 0.1.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - kazuho