RubyGems - jrf - Versions diffs - 0.1.14 → 0.1.15 - Mend

jrf 0.1.14 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 78c1f6eb54e20d4dffbfe57f89a49d9e8ec9bbb2a9e118d911f2dec3c649f4ac
-  data.tar.gz: 63f43701422cfe200b7932a2177132f5e4e74e690960e71b88d6cc7b767e0b3c
+  metadata.gz: de85d7a03d58baee4c931d10869a824a1ff5c2eec121cd15e63ec23805203676
+  data.tar.gz: ce3c53475e13d41e3a176ef7c9ea840145fbbf826612457386cc0899c28a1af0
 SHA512:
-  metadata.gz: 152ebdc2322f9a8b6c0cad2cb303a093a45d5e0ecc17b519904e40e069a747b56e33f1ddd33f7f3efb32031d78808d05e32d93ab151572b973a1324f9e676e0b
-  data.tar.gz: 63c189a79b484777c25f5c1a7951d930fc2d110f3547216b2fd099469e57e7a062c0ec64ba2c7b0c3d7e88a6fb5f1f40d3b5ba6d1a0803acfc5253b00f43dfe8
+  metadata.gz: ded54cff09febe7fe02c585f30a702cd82cd11aeb563f840b9f182cd8a6e94c090ba5e71fbd6cad7f377816c14de41c12f5fed449b2b8f7c1d682513db2f19ee
+  data.tar.gz: bdd4f9ee2ff809cc718b497a39783027f9c7322582dfdaebc8cae8b6bf4cb1d56b9639a356f2239be66b6ee39693c1da994b4fa9b9db5ecfb6c10d66ede021d9

data/lib/jrf/cli/runner.rb CHANGED Viewed

@@ -21,7 +21,7 @@ module Jrf
           chunk = @input.read(length)
           return nil if chunk.nil?
-          chunk = chunk.tr(RS_CHAR, "\n")
+          chunk.tr!(RS_CHAR, "\n")
           if outbuf
             outbuf.replace(chunk)
           else
@@ -72,7 +72,7 @@ module Jrf
         def compact!
           if @offset > 0
-            @buf = @buf.byteslice(@offset..) || +""
+            @buf.slice!(0, @offset)
             @offset = 0
           end
         end
@@ -140,13 +140,30 @@ module Jrf
       def process_values(blocks, parallel:, verbose:, &block)
         if parallel <= 1 || @file_paths.length <= 1
+          # Single file or no parallelism requested — serial is the only option.
+          # This also covers the all-files-empty case: no files means no workers to spawn.
           dump_parallel_status("disabled", verbose: verbose)
           return apply_pipeline(blocks, each_input_enum).each(&block)
         end
-        # Parallelize the longest map-only prefix; reducers stay in the parent.
-        split_index = classify_parallel_stages(blocks)
-        if split_index.nil? || split_index == 0
+        split_index, probe_stage = classify_parallel_stages(blocks)
+        if split_index.nil?
+          dump_parallel_status("disabled", verbose: verbose)
+          return apply_pipeline(blocks, each_input_enum).each(&block)
+        end
+        # If the first reducer stage is decomposable, workers run everything up to
+        # and including it (map prefix + reducer), emit partial accumulators, and the
+        # parent merges. This covers both pure reducers (split_index == 0, e.g. `sum(_)`)
+        # and map-then-reduce (split_index > 0, e.g. `select(...) >> sum(...)`).
+        if probe_stage&.decomposable?
+          worker_blocks = blocks[0..split_index]
+          rest_blocks = blocks[(split_index + 1)..]
+          return process_decomposable_parallel(worker_blocks, rest_blocks, probe_stage,
+                                               parallel: parallel, verbose: verbose, &block)
+        end
+        if split_index == 0
           dump_parallel_status("disabled", verbose: verbose)
           return apply_pipeline(blocks, each_input_enum).each(&block)
         end
@@ -162,6 +179,9 @@ module Jrf
         @err.puts "parallel: #{status}" if verbose
       end
+      # Returns [split_index, probe_stage] where split_index is the index of the
+      # first reducer stage (or blocks.length if all are passthrough), and probe_stage
+      # is the Stage object of that first reducer (nil if all passthrough or no input).
       def classify_parallel_stages(blocks)
         # Read the first row from the first file to probe stage modes
         first_value = nil
@@ -171,24 +191,63 @@ module Jrf
             break
           end
         end
-        return nil if first_value.nil?
+        return [nil, nil] if first_value.nil?
         # Run the value through each stage independently to classify
         split_index = nil
+        probe_stage = nil
         blocks.each_with_index do |block, i|
           probe_pipeline = Pipeline.new(block)
           probe_pipeline.call([first_value]) { |_| }
           stage = probe_pipeline.instance_variable_get(:@stages).first
           if stage.instance_variable_get(:@mode) == :reducer
             split_index = i
+            probe_stage = stage
             break
           end
         end
-        split_index || blocks.length
+        [split_index || blocks.length, probe_stage]
+      end
+      def process_decomposable_parallel(worker_blocks, rest_blocks, probe_stage, parallel:, verbose:, &block)
+        dump_parallel_status("enabled workers=#{parallel} files=#{@file_paths.length} decompose=#{worker_blocks.length}/#{worker_blocks.length + rest_blocks.length}", verbose: verbose)
+        # Workers run map prefix + reducer stage per file and emit partial accumulators.
+        partials_list = []
+        reducer_stage_index = worker_blocks.length - 1
+        spawner = ->(path) do
+          spawn_worker(worker_blocks, path) do |pipeline, input|
+            pipeline.call(input) { |_| }
+            # If the file was empty, the stage was never initialized (no reducers),
+            # so skip emitting — the parent will simply not receive a partial for this worker.
+            stage = pipeline.instance_variable_get(:@stages)[reducer_stage_index]
+            partials = stage.partial_accumulators
+            emit_parallel_frame(partials) unless partials.empty?
+          end
+        end
+        children = run_parallel_worker_pool(parallel, spawner) { |v| partials_list << v }
+        wait_for_parallel_children(children) if children
+        return if partials_list.empty?
+        # Reuse the probe stage (already initialized with reducer structure from classify).
+        # Replace its accumulators with the first worker's partials, then merge the rest.
+        probe_stage.replace_accumulators!(partials_list.first)
+        partials_list.drop(1).each { |partials| probe_stage.merge_partials!(partials) }
+        # Finish the reducer stage and pass results through any remaining stages.
+        results = probe_stage.finish
+        if rest_blocks.empty?
+          results.each(&block)
+        else
+          apply_pipeline(rest_blocks, results.each).each(&block)
+        end
       end
-      def spawn_parallel_worker(blocks, path)
+      # Forks a worker process that reads `path`, builds a pipeline from `blocks`,
+      # and yields [pipeline, input_enum] to the caller's block for custom behavior.
+      # Returns [read_io, pid].
+      def spawn_worker(blocks, path)
         read_io, write_io = IO.pipe
         pid = fork do
           read_io.close
@@ -200,7 +259,7 @@ module Jrf
           end
           worker_failed = false
           begin
-            pipeline.call(input_enum) { |value| emit_parallel_frame(value) }
+            yield pipeline, input_enum
           rescue => e
             @err.puts "#{path}: #{e.message} (#{e.class})"
             worker_failed = true
@@ -213,14 +272,17 @@ module Jrf
         [read_io, pid]
       end
-      def run_parallel_worker_pool(blocks, num_workers)
+      # Runs a pool of up to `num_workers` concurrent workers across all input files.
+      # `spawner` is called with a file path and must return [read_io, pid].
+      # Yields each decoded JSON value from worker output frames.
+      def run_parallel_worker_pool(num_workers, spawner)
         file_queue = @file_paths.dup
         workers = {} # read_io => [reader, pid]
         children = []
         # Fill initial pool
         while workers.size < num_workers && !file_queue.empty?
-          read_io, pid = spawn_parallel_worker(blocks, file_queue.shift)
+          read_io, pid = spawner.call(file_queue.shift)
           workers[read_io] = [ParallelFrameReader.new, pid]
           children << pid
         end
@@ -242,7 +304,7 @@ module Jrf
               # Spawn next worker if files remain
               unless file_queue.empty?
-                read_io, pid = spawn_parallel_worker(blocks, file_queue.shift)
+                read_io, pid = spawner.call(file_queue.shift)
                 workers[read_io] = [ParallelFrameReader.new, pid]
                 children << pid
                 read_ios << read_io
@@ -261,8 +323,13 @@ module Jrf
       def parallel_map_enum(map_blocks, num_workers)
         children = nil
+        spawner = ->(path) do
+          spawn_worker(map_blocks, path) do |pipeline, input|
+            pipeline.call(input) { |value| emit_parallel_frame(value) }
+          end
+        end
         Enumerator.new do |y|
-          children = run_parallel_worker_pool(map_blocks, num_workers) { |value| y << value }
+          children = run_parallel_worker_pool(num_workers, spawner) { |value| y << value }
         ensure
           wait_for_parallel_children(children) if children
         end
@@ -291,8 +358,8 @@ module Jrf
       def each_stream_value(stream)
         return each_stream_value_lax(stream) { |value| yield value } if @lax
-        stream.each_line do |raw_line|
-          line = raw_line.strip
+        stream.each_line do |line|
+          line.strip!
           next if line.empty?
           yield JSON.parse(line)
         end

data/lib/jrf/cli.rb CHANGED Viewed

@@ -18,7 +18,7 @@ module Jrf
         --lax          allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
         -o, --output FORMAT
                        output format: json (default), pretty, tsv
-        -P N           opportunistically parallelize the map-prefix across N workers
+        -P N           opportunistically parallelize across N workers
         -r, --require LIBRARY
                        require LIBRARY before evaluating stages
         --no-jit       do not enable YJIT, even when supported by the Ruby runtime

data/lib/jrf/reducers.rb CHANGED Viewed

@@ -20,8 +20,37 @@ module Jrf
       end
     end
+    # A reducer whose partial accumulators can be merged across parallel workers.
+    #
+    # Contract:
+    # - `identity` is the neutral element for `merge_fn`: merge(identity, x) == x
+    # - `initial` is always set to `identity` (the accumulator starts from the neutral element)
+    # - Any bias (e.g. sum's `initial:` keyword) is applied in `finish_fn`, not in the starting accumulator
+    class DecomposableReduce < Reduce
+      attr_reader :merge_fn
+      def initialize(identity, merge:, finish_fn: nil, &step_fn)
+        super(identity, finish_fn: finish_fn, &step_fn)
+        @merge_fn = merge
+      end
+      # Returns the raw accumulator without applying finish_fn.
+      def partial
+        @acc
+      end
+      # Merges another partial accumulator into this one.
+      def merge_partial(other_acc)
+        @acc = @merge_fn.call(@acc, other_acc)
+      end
+    end
     def reduce(initial, finish: nil, &step_fn)
       Reduce.new(initial, finish_fn: finish, &step_fn)
     end
+    def decomposable_reduce(identity, merge:, finish: nil, &step_fn)
+      DecomposableReduce.new(identity, merge: merge, finish_fn: finish, &step_fn)
+    end
   end
 end

data/lib/jrf/row_context.rb CHANGED Viewed

@@ -17,6 +17,7 @@ module Jrf
             spec.fetch(:value),
             initial: reducer_initial_value(spec.fetch(:initial)),
             finish: spec[:finish],
+            merge: spec[:merge],
             &spec.fetch(:step)
           )
         end
@@ -48,27 +49,38 @@ module Jrf
     end
     define_reducer(:sum) do |_ctx, value, initial: 0, block: nil|
-      { value: value, initial: initial, step: ->(acc, v) { v.nil? ? acc : (acc + v) } }
+      step = ->(acc, v) { v.nil? ? acc : (acc + v) }
+      if initial.is_a?(Numeric)
+        # Numeric — decomposable. Bias applied once in finish.
+        finish = initial == 0 ? nil : ->(acc) { [acc + initial] }
+        { value: value, initial: 0, step: step, finish: finish, merge: ->(a, b) { a + b } }
+      else
+        # Non-numeric (e.g. string concat) — not decomposable.
+        { value: value, initial: initial, step: step }
+      end
     end
     define_reducer(:count) do |_ctx, value = MISSING, block: nil|
+      merge = ->(a, b) { a + b }
       if value.equal?(MISSING)
-        { value: nil, initial: 0, step: ->(acc, _v) { acc + 1 } }
+        { value: nil, initial: 0, step: ->(acc, _v) { acc + 1 }, merge: merge }
       else
-        { value: value, initial: 0, step: ->(acc, v) { v.nil? ? acc : (acc + 1) } }
+        { value: value, initial: 0, step: ->(acc, v) { v.nil? ? acc : (acc + 1) }, merge: merge }
       end
     end
     define_reducer(:count_if) do |_ctx, condition, block: nil|
-      { value: condition, initial: 0, step: ->(acc, v) { v ? (acc + 1) : acc } }
+      { value: condition, initial: 0, step: ->(acc, v) { v ? (acc + 1) : acc }, merge: ->(a, b) { a + b } }
     end
     define_reducer(:min) do |_ctx, value, block: nil|
-      { value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v < acc ? v : acc) } }
+      min_merge = ->(a, b) { a.nil? ? b : b.nil? ? a : (a < b ? a : b) }
+      { value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v < acc ? v : acc) }, merge: min_merge }
     end
     define_reducer(:max) do |_ctx, value, block: nil|
-      { value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v > acc ? v : acc) } }
+      max_merge = ->(a, b) { a.nil? ? b : b.nil? ? a : (a > b ? a : b) }
+      { value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v > acc ? v : acc) }, merge: max_merge }
     end
     define_reducer(:average) do |_ctx, value, block: nil|
@@ -82,7 +94,8 @@ module Jrf
           acc[0] += v
           acc[1] += 1
           acc
-        }
+        },
+        merge: ->(a, b) { [a[0] + b[0], a[1] + b[1]] }
       }
     end
@@ -136,7 +149,7 @@ module Jrf
     define_reducer(:group) do |ctx, value = MISSING, block: nil|
       resolved_value = value.equal?(MISSING) ? ctx.send(:current_input) : value
-      { value: resolved_value, initial: -> { [] }, step: ->(acc, v) { acc << v } }
+      { value: resolved_value, initial: -> { [] }, step: ->(acc, v) { acc << v }, merge: ->(a, b) { a + b } }
     end
     define_reducer(:percentile) do |ctx, value, percentage, block: nil|

data/lib/jrf/stage.rb CHANGED Viewed

@@ -51,13 +51,17 @@ module Jrf
       (@mode == :reducer) ? Control::DROPPED : result
     end
-    def step_reduce(value, initial:, finish: nil, step_fn: nil, &step_block)
+    def step_reduce(value, initial:, finish: nil, merge: nil, step_fn: nil, &step_block)
       idx = @cursor
       step_fn ||= step_block
       if @reducers[idx].nil?
         finish_rows = finish || ->(acc) { [acc] }
-        @reducers[idx] = Reducers.reduce(initial, finish: finish_rows, &step_fn)
+        @reducers[idx] = if merge
+          Reducers.decomposable_reduce(initial, merge: merge, finish: finish_rows, &step_fn)
+        else
+          Reducers.reduce(initial, finish: finish_rows, &step_fn)
+        end
         result = ReducerToken.new(idx)
       else
         result = Control::DROPPED
@@ -167,6 +171,32 @@ module Jrf
       end
     end
+    # Returns true if all reducers in this stage are DecomposableReduce instances,
+    # meaning partial accumulators from parallel workers can be merged.
+    def decomposable?
+      @mode == :reducer && @reducers.any? &&
+        @reducers.all? { |r| r.is_a?(Reducers::DecomposableReduce) }
+    end
+    # Returns an array of raw accumulator values, one per reducer.
+    def partial_accumulators
+      @reducers.map(&:partial)
+    end
+    # Replaces all reducer accumulators with the given values.
+    def replace_accumulators!(partials)
+      @reducers.each_with_index do |reducer, i|
+        reducer.instance_variable_set(:@acc, partials[i])
+      end
+    end
+    # Merges an array of partial accumulators (from another worker) into this stage's reducers.
+    def merge_partials!(other_partials)
+      @reducers.each_with_index do |reducer, i|
+        reducer.merge_partial(other_partials[i])
+      end
+    end
     private
     def with_scoped_reducers(reducer_list)

data/lib/jrf/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Jrf
-  VERSION = "0.1.14"
+  VERSION = "0.1.15"
 end

data/test/cli_parallel_test.rb CHANGED Viewed

@@ -53,8 +53,9 @@ class CliParallelTest < JrfTestCase
       write_ndjson(dir, "a.ndjson", [{"x" => 10}, {"x" => 20}])
       write_ndjson(dir, "b.ndjson", [{"x" => 30}, {"x" => 40}])
-      stdout, stderr, status = Open3.capture3("./exe/jrf", "-P", "2", 'select(_["x"] > 10) >> sum(_["x"])', *ndjson_files(dir))
+      stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'select(_["x"] > 10) >> sum(_["x"])', *ndjson_files(dir))
       assert_success(status, stderr, "parallel split map+reduce")
+      assert_includes(stderr, "decompose=2/2", "select+sum decomposed")
       assert_equal(%w[90], lines(stdout), "parallel split map+reduce output")
     end
   end
@@ -72,15 +73,16 @@ class CliParallelTest < JrfTestCase
     end
   end
-  def test_parallel_all_reducers_falls_back_to_serial
+  def test_parallel_decomposable_reducer
     Dir.mktmpdir do |dir|
       write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
       write_ndjson(dir, "b.ndjson", [{"x" => 3}])
       stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'sum(_["x"])', *ndjson_files(dir))
-      assert_success(status, stderr, "all-reducer serial fallback")
-      assert_equal(%w[6], lines(stdout), "all-reducer serial fallback output")
-      assert_includes(stderr, "parallel: disabled", "parallel disabled summary")
+      assert_success(status, stderr, "parallel decomposable reducer")
+      assert_equal(%w[6], lines(stdout), "parallel decomposable reducer output")
+      assert_includes(stderr, "parallel: enabled", "parallel enabled for decomposable reducer")
+      assert_includes(stderr, "decompose=", "decompose mode indicated")
     end
   end
@@ -153,12 +155,209 @@ class CliParallelTest < JrfTestCase
       write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 20}, {"x" => 3}])
       write_ndjson(dir, "b.ndjson", [{"x" => 40}, {"x" => 5}])
-      stdout, stderr, status = Open3.capture3("./exe/jrf", "-P", "2", 'select(_["x"] > 10) >> sum(_["x"])', *ndjson_files(dir))
+      stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'select(_["x"] > 10) >> sum(_["x"])', *ndjson_files(dir))
       assert_success(status, stderr, "parallel select then sum")
+      assert_includes(stderr, "decompose=2/2", "select+sum fully decomposed in workers")
       assert_equal(%w[60], lines(stdout), "parallel select then sum output")
     end
   end
+  def test_parallel_decomposable_multi_reducer
+    Dir.mktmpdir do |dir|
+      write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
+      write_ndjson(dir, "b.ndjson", [{"x" => 3}, {"x" => 4}])
+      stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", '{s: sum(_["x"]), n: count(), mn: min(_["x"]), mx: max(_["x"])}', *ndjson_files(dir))
+      assert_success(status, stderr, "parallel multi reducer")
+      assert_includes(stderr, "decompose=", "multi reducer decomposed")
+      result = JSON.parse(lines(stdout).first)
+      assert_equal(10, result["s"], "sum")
+      assert_equal(4, result["n"], "count")
+      assert_equal(1, result["mn"], "min")
+      assert_equal(4, result["mx"], "max")
+    end
+  end
+  def test_parallel_decomposable_average
+    Dir.mktmpdir do |dir|
+      write_ndjson(dir, "a.ndjson", [{"x" => 10}, {"x" => 20}])
+      write_ndjson(dir, "b.ndjson", [{"x" => 30}, {"x" => 40}])
+      stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'average(_["x"])', *ndjson_files(dir))
+      assert_success(status, stderr, "parallel average")
+      assert_includes(stderr, "decompose=", "average decomposed")
+      assert_equal(["25.0"], lines(stdout), "parallel average output")
+    end
+  end
+  def test_parallel_decomposable_group
+    Dir.mktmpdir do |dir|
+      write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
+      write_ndjson(dir, "b.ndjson", [{"x" => 3}])
+      stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'group(_["x"])', *ndjson_files(dir))
+      assert_success(status, stderr, "parallel group")
+      assert_includes(stderr, "decompose=", "group decomposed")
+      result = JSON.parse(lines(stdout).first)
+      assert_equal([1, 2, 3], result.sort, "parallel group output")
+    end
+  end
+  def test_parallel_decomposable_sum_with_initial
+    Dir.mktmpdir do |dir|
+      write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
+      write_ndjson(dir, "b.ndjson", [{"x" => 3}])
+      stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'sum(_["x"], initial: 100)', *ndjson_files(dir))
+      assert_success(status, stderr, "sum with numeric initial")
+      assert_includes(stderr, "decompose=", "numeric initial decomposes")
+      assert_equal(%w[106], lines(stdout), "sum with initial output")
+    end
+  end
+  def test_parallel_sum_with_non_numeric_initial_falls_back
+    Dir.mktmpdir do |dir|
+      write_ndjson(dir, "a.ndjson", [{"x" => "a"}, {"x" => "b"}])
+      write_ndjson(dir, "b.ndjson", [{"x" => "c"}])
+      stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'sum(_["x"], initial: "")', *ndjson_files(dir))
+      assert_success(status, stderr, "sum with string initial")
+      assert_includes(stderr, "parallel: disabled", "non-numeric initial falls back to serial")
+      assert_equal(['"abc"'], lines(stdout), "sum with string initial output")
+    end
+  end
+  def test_sum_with_string_initial
+    Dir.mktmpdir do |dir|
+      write_ndjson(dir, "a.ndjson", [{"x" => "hello "}, {"x" => "world"}])
+      stdout, stderr, status = Open3.capture3("./exe/jrf", 'sum(_["x"], initial: "")', *ndjson_files(dir))
+      assert_success(status, stderr, "sum with string initial")
+      assert_equal(['"hello world"'], lines(stdout), "sum with string initial output")
+    end
+  end
+  def test_parallel_decomposable_reducer_then_passthrough
+    Dir.mktmpdir do |dir|
+      write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
+      write_ndjson(dir, "b.ndjson", [{"x" => 3}, {"x" => 4}])
+      stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'sum(_["x"]) >> _ * 2', *ndjson_files(dir))
+      assert_success(status, stderr, "parallel decomposable then passthrough")
+      assert_includes(stderr, "decompose=", "reducer then passthrough decomposed")
+      assert_equal(%w[20], lines(stdout), "parallel decomposable then passthrough output")
+    end
+  end
+  def test_parallel_mixed_decomposable_reducers
+    Dir.mktmpdir do |dir|
+      write_ndjson(dir, "a.ndjson", [{"x" => 10}, {"x" => 20}])
+      write_ndjson(dir, "b.ndjson", [{"x" => 30}, {"x" => 40}])
+      stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", '[sum(_["x"]), average(_["x"]), min(_["x"]), max(_["x"]), count()]', *ndjson_files(dir))
+      assert_success(status, stderr, "mixed decomposable")
+      assert_includes(stderr, "decompose=", "mixed decomposable used decompose")
+      result = JSON.parse(lines(stdout).first)
+      assert_equal([100, 25.0, 10, 40, 4], result, "mixed decomposable output")
+    end
+  end
+  def test_parallel_mixed_decomposable_and_non_decomposable_falls_back
+    Dir.mktmpdir do |dir|
+      write_ndjson(dir, "a.ndjson", [{"x" => 10}, {"x" => 20}])
+      write_ndjson(dir, "b.ndjson", [{"x" => 30}, {"x" => 40}])
+      stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", '[sum(_["x"]), percentile(_["x"], 0.5)]', *ndjson_files(dir))
+      assert_success(status, stderr, "mixed with non-decomposable")
+      assert_includes(stderr, "parallel: disabled", "mixed with non-decomposable falls back to serial")
+      result = JSON.parse(lines(stdout).first)
+      assert_equal([100, 20], result, "mixed with non-decomposable output")
+    end
+  end
+  def test_parallel_select_sum_passthrough_decomposes
+    Dir.mktmpdir do |dir|
+      write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 20}])
+      write_ndjson(dir, "b.ndjson", [{"x" => 40}])
+      stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'select(_["x"] > 10) >> sum(_["x"]) >> _ * 2', *ndjson_files(dir))
+      assert_success(status, stderr, "select+sum+passthrough")
+      assert_includes(stderr, "decompose=2/3", "select+sum decomposed, passthrough in parent")
+      assert_equal(%w[120], lines(stdout), "select+sum+passthrough output")
+    end
+  end
+  def test_parallel_select_non_decomposable_uses_split
+    Dir.mktmpdir do |dir|
+      write_ndjson(dir, "a.ndjson", [{"x" => 3}, {"x" => 1}])
+      write_ndjson(dir, "b.ndjson", [{"x" => 2}])
+      stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'select(_["x"] > 0) >> sort(_["x"]) >> _["x"]', *ndjson_files(dir))
+      assert_success(status, stderr, "select+sort uses split")
+      assert_includes(stderr, "split=1/3", "non-decomposable sort uses map-prefix split")
+      assert_equal([1, 2, 3], lines(stdout).map { |l| JSON.parse(l) }, "select+sort output")
+    end
+  end
+  def test_parallel_decomposable_with_empty_file
+    Dir.mktmpdir do |dir|
+      write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
+      File.write(File.join(dir, "b.ndjson"), "")
+      stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", '{s: sum(_["x"]), n: count(), mn: min(_["x"])}', *ndjson_files(dir))
+      assert_success(status, stderr, "decomposable with empty file")
+      assert_includes(stderr, "decompose=", "decomposable with empty file used decompose")
+      result = JSON.parse(lines(stdout).first)
+      assert_equal(3, result["s"], "sum ignores empty file")
+      assert_equal(2, result["n"], "count ignores empty file")
+      assert_equal(1, result["mn"], "min ignores empty file")
+    end
+  end
+  def test_parallel_decomposable_all_files_empty
+    Dir.mktmpdir do |dir|
+      File.write(File.join(dir, "a.ndjson"), "")
+      File.write(File.join(dir, "b.ndjson"), "")
+      stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'sum(_["x"])', *ndjson_files(dir))
+      assert_success(status, stderr, "all files empty")
+      # All files empty means first_value is nil, so classify returns nil → serial fallback
+      assert_includes(stderr, "parallel: disabled", "all files empty falls back to serial")
+      assert_equal([], lines(stdout), "no output for empty input")
+    end
+  end
+  def test_parallel_non_decomposable_falls_back_to_serial
+    Dir.mktmpdir do |dir|
+      write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
+      write_ndjson(dir, "b.ndjson", [{"x" => 3}])
+      stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'sort(_["x"]) >> _["x"]', *ndjson_files(dir))
+      assert_success(status, stderr, "non-decomposable serial fallback")
+      assert_equal([1, 2, 3], lines(stdout).map { |l| JSON.parse(l) }, "sort output")
+      assert_includes(stderr, "parallel: disabled", "non-decomposable falls back to serial")
+    end
+  end
+  def test_parallel_decomposable_matches_serial
+    Dir.mktmpdir do |dir|
+      write_ndjson(dir, "a.ndjson", (1..50).map { |i| {"v" => i} })
+      write_ndjson(dir, "b.ndjson", (51..100).map { |i| {"v" => i} })
+      files = ndjson_files(dir)
+      expr = '{s: sum(_["v"]), n: count(), mn: min(_["v"]), mx: max(_["v"]), avg: average(_["v"])}'
+      serial_stdout, serial_stderr, serial_status = Open3.capture3("./exe/jrf", expr, *files)
+      assert_success(serial_status, serial_stderr, "serial baseline")
+      parallel_stdout, parallel_stderr, parallel_status = Open3.capture3("./exe/jrf", "-v", "-P", "2", expr, *files)
+      assert_success(parallel_status, parallel_stderr, "parallel run")
+      assert_includes(parallel_stderr, "decompose=", "decomposable matches serial used decompose")
+      assert_equal(JSON.parse(serial_stdout), JSON.parse(parallel_stdout), "parallel decomposable matches serial")
+    end
+  end
   def test_serial_error_includes_filename
     Dir.mktmpdir do |dir|
       good_path = File.join(dir, "a.ndjson")

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: jrf
 version: !ruby/object:Gem::Version
-  version: 0.1.14
+  version: 0.1.15
 platform: ruby
 authors:
 - kazuho