RubyGems - jrf - Versions diffs - 0.1.3 → 0.1.5 - Mend

jrf 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 34475ad560159e50a8f6ea6dbfae40dc57173b40df31065f4b222abaafe66012
-  data.tar.gz: 22c046afd9f4fba04788f08796f9ccfe24b21a87522820c8e5873f164de8cc53
+  metadata.gz: 408c1f9706af5efaa1bf0125201d6647b4c108aa4aa28c99a93b59fb9cc94f02
+  data.tar.gz: 702f2fb14dc9d498292b02c41f0cdb4a91c0fa3e093ad9a71435d9a2604532fa
 SHA512:
-  metadata.gz: 7d90e4a754ae7ca9170db6c7221571cb90077bbd48d6cd55cbefd29342afa89996075c86a3bf645dac94b337b91eceefa036968f490c30bacf52744a319d238f
-  data.tar.gz: 3d00c51e46a07f63e1d44b8f2013663dd66d3b2f3393046a00a3c26a5f1cb3dd4eabc2db82eacb12ff874625835f49a63e358baaf44639d38b8a9e01a6c3b06d
+  metadata.gz: 80dfa6d2bb7c9304e779a3e80815efbde9c599d66665708738b833b08daa1918ae54bc5b170c8b90c60399fe18b0df06d576e2c8c3d8b76b74f9daa826efcfa8
+  data.tar.gz: 597b715fd3ebd31a49cb2839f7dda814b845cd5aa87a3ac9a9cf551553792b453af749e287652553903de851ea7b06a9e5940abc7c25fccd319a9e7e72d75840

data/Rakefile CHANGED Viewed

@@ -8,8 +8,3 @@ Rake::TestTask.new do |t|
 end
 task default: :test
-desc "Build man/jrf.1 from README.md"
-task :man do
-  ruby "script/build_man_from_readme.rb"
-end

data/lib/jrf/cli.rb CHANGED Viewed

@@ -4,16 +4,17 @@ require_relative "runner"
 module Jrf
   class CLI
-    USAGE = "usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'"
+    USAGE = "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'"
     HELP_TEXT = <<~'TEXT'
-      usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'
+      usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'
       JSON filter with the power and speed of Ruby.
       Options:
         -v, --verbose  print parsed stage expressions
         --lax          allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
+        -p, --pretty   pretty-print JSON output instead of compact NDJSON
         -h, --help     show this help and exit
       Pipeline:
@@ -28,13 +29,13 @@ module Jrf
         jrf '_["msg"] >> reduce(nil) { |acc, v| acc ? "#{acc} #{v}" : v }'
       See Also:
-        README.md
-        man jrf
+        https://github.com/kazuho/jrf#readme
     TEXT
     def self.run(argv = ARGV, input: ARGF, out: $stdout, err: $stderr)
       verbose = false
       lax = false
+      pretty = false
       while argv.first&.start_with?("-")
         case argv.first
@@ -44,6 +45,9 @@ module Jrf
         when "--lax"
           lax = true
           argv.shift
+        when "-p", "--pretty"
+          pretty = true
+          argv.shift
         when "-h", "--help"
           out.puts HELP_TEXT
           return 0
@@ -60,7 +64,7 @@ module Jrf
       end
       expression = argv.shift
-      Runner.new(input: input, out: out, err: err, lax: lax).run(expression, verbose: verbose)
+      Runner.new(input: input, out: out, err: err, lax: lax, pretty: pretty).run(expression, verbose: verbose)
       0
     end
   end

data/lib/jrf/pipeline.rb ADDED Viewed

@@ -0,0 +1,85 @@
+# frozen_string_literal: true
+require_relative "control"
+require_relative "row_context"
+require_relative "stage"
+module Jrf
+  class Pipeline
+    def initialize(*blocks)
+      raise ArgumentError, "at least one stage block is required" if blocks.empty?
+      @ctx = RowContext.new
+      @stages = blocks.map { |block| Stage.new(@ctx, block, src: nil) }
+    end
+    # Run the pipeline on an enumerable of input values.
+    #
+    # Without a block, returns an Array of output values.
+    # With a block, streams each output value to the block.
+    #
+    # @param input [Enumerable] input values to process
+    # @yieldparam value output value
+    # @return [Array, nil] output values (without block), or nil (with block)
+    def call(input, &on_output)
+      if on_output
+        call_streaming(input, &on_output)
+      else
+        results = []
+        call_streaming(input) { |v| results << v }
+        results
+      end
+    end
+    private
+    def call_streaming(input, &on_output)
+      error = nil
+      begin
+        input.each { |value| process_value(value, @stages, &on_output) }
+      rescue StandardError => e
+        error = e
+      ensure
+        flush_reducers(@stages, &on_output)
+      end
+      raise error if error
+    end
+    def process_value(input, stages, &on_output)
+      current_values = [input]
+      stages.each do |stage|
+        next_values = []
+        current_values.each do |value|
+          out = stage.call(value)
+          if out.equal?(Control::DROPPED)
+            next
+          elsif out.is_a?(Control::Flat)
+            unless out.value.is_a?(Array)
+              raise TypeError, "flat expects Array, got #{out.value.class}"
+            end
+            next_values.concat(out.value)
+          else
+            next_values << out
+          end
+        end
+        return if next_values.empty?
+        current_values = next_values
+      end
+      current_values.each(&on_output)
+    end
+    def flush_reducers(stages, &on_output)
+      stages.each_with_index do |stage, idx|
+        rows = stage.finish
+        next if rows.empty?
+        rest = stages.drop(idx + 1)
+        rows.each { |value| process_value(value, rest, &on_output) }
+      end
+    end
+  end
+end

data/lib/jrf/row_context.rb CHANGED Viewed

@@ -26,10 +26,12 @@ module Jrf
     def initialize(obj = nil)
       @obj = obj
       @__jrf_current_stage = nil
+      @__jrf_current_input = obj
     end
     def reset(obj)
       @obj = obj
+      @__jrf_current_input = obj
       self
     end
@@ -38,11 +40,11 @@ module Jrf
     end
     def flat
-      Control::Flat.new(@obj)
+      Control::Flat.new(current_input)
     end
     def select(predicate)
-      predicate ? @obj : Control::DROPPED
+      predicate ? current_input : Control::DROPPED
     end
     define_reducer(:sum) do |_ctx, value, initial: 0, block: nil|
@@ -111,15 +113,16 @@ module Jrf
     define_reducer(:sort) do |ctx, key = MISSING, block: nil|
       if block
         {
-          value: ctx._,
+          value: ctx.send(:current_input),
           initial: -> { [] },
           finish: ->(rows) { rows.sort(&block) },
           step: ->(rows, row) { rows << row }
         }
       else
-        resolved_key = key.equal?(MISSING) ? ctx._ : key
+        current = ctx.send(:current_input)
+        resolved_key = key.equal?(MISSING) ? current : key
         {
-          value: [resolved_key, ctx._],
+          value: [resolved_key, current],
           initial: -> { [] },
           finish: ->(pairs) { pairs.sort_by(&:first).map(&:last) },
           step: ->(pairs, pair) { pairs << pair }
@@ -128,7 +131,7 @@ module Jrf
     end
     define_reducer(:group) do |ctx, value = MISSING, block: nil|
-      resolved_value = value.equal?(MISSING) ? ctx._ : value
+      resolved_value = value.equal?(MISSING) ? ctx.send(:current_input) : value
       { value: resolved_value, initial: -> { [] }, step: ->(acc, v) { acc << v } }
     end
@@ -143,9 +146,7 @@ module Jrf
         else
           ->(values) {
             sorted = values.sort
-            percentages.map do |p|
-              { "percentile" => p, "value" => ctx.send(:percentile_value, sorted, p) }
-            end
+            [percentages.map { |p| ctx.send(:percentile_value, sorted, p) }]
           }
         end
@@ -160,7 +161,7 @@ module Jrf
     def reduce(initial, &block)
       raise ArgumentError, "reduce requires a block" unless block
-      @__jrf_current_stage.allocate_reducer(@obj, initial: initial, &block)
+      @__jrf_current_stage.allocate_reducer(current_input, initial: initial, &block)
     end
     def map(&block)
@@ -182,6 +183,18 @@ module Jrf
     private
+    def current_input
+      @__jrf_current_input
+    end
+    def __jrf_with_current_input(value)
+      saved_input = current_input
+      @__jrf_current_input = value
+      yield
+    ensure
+      @__jrf_current_input = saved_input
+    end
     def reducer_initial_value(initial)
       return initial.call if initial.respond_to?(:call)
       return initial.dup if initial.is_a?(Array) || initial.is_a?(Hash)

data/lib/jrf/runner.rb CHANGED Viewed

@@ -1,37 +1,19 @@
 # frozen_string_literal: true
 require "json"
-require_relative "control"
+require_relative "pipeline"
 require_relative "pipeline_parser"
-require_relative "reducers"
-require_relative "row_context"
-require_relative "stage"
 module Jrf
   class Runner
     RS_CHAR = "\x1e"
-    class ProbeValue
-      def [](key)
-        self
-      end
-      def method_missing(name, *args, &block)
-        self
-      end
-      def respond_to_missing?(name, include_private = false)
-        true
-      end
-    end
-    PROBE_VALUE = ProbeValue.new
-    def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false)
+    def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false, pretty: false)
       @input = input
       @out = out
       @err = err
       @lax = lax
+      @pretty = pretty
     end
     def run(expression, verbose: false)
@@ -39,53 +21,19 @@ module Jrf
       stages = parsed[:stages]
       dump_stages(stages) if verbose
-      ctx = RowContext.new
-      compiled = compile_stages(stages, ctx)
-      compiled.each { |stage| stage.call(PROBE_VALUE, probing: true) rescue nil }
-      error = nil
+      blocks = stages.map { |stage|
+        eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
+      }
+      pipeline = Pipeline.new(*blocks)
-      begin
-        each_input_value do |value|
-          process_value(value, compiled)
-        end
-      rescue StandardError => e
-        error = e
-      ensure
-        flush_reducers(compiled)
+      input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
+      pipeline.call(input_enum) do |value|
+        @out.puts(@pretty ? JSON.pretty_generate(value) : JSON.generate(value))
       end
-      raise error if error
     end
     private
-    def process_value(input, stages)
-      current_values = [input]
-      stages.each do |stage|
-        next_values = []
-        current_values.each do |value|
-          out = stage.call(value)
-          if out.equal?(Control::DROPPED)
-            next
-          elsif out.is_a?(Control::Flat)
-            unless out.value.is_a?(Array)
-              raise TypeError, "flat expects Array, got #{out.value.class}"
-            end
-            next_values.concat(out.value)
-          else
-            next_values << out
-          end
-        end
-        return if next_values.empty?
-        current_values = next_values
-      end
-      current_values.each { |value| @out.puts JSON.generate(value) }
-    end
     def each_input_value
       return each_input_value_lax { |value| yield value } if @lax
@@ -124,33 +72,10 @@ module Jrf
       raise JSON::ParserError, e.message
     end
-    def compile_stages(stages, ctx)
-      mod = Module.new
-      stages.each_with_index.map do |stage, i|
-        method_name = :"__jrf_stage_#{i}"
-        mod.module_eval("def #{method_name}; #{stage[:src]}; end", "(jrf stage #{i})", 1)
-        Stage.new(ctx, method_name, src: stage[:src])
-      end.tap { ctx.extend(mod) }
-    end
     def dump_stages(stages)
       stages.each_with_index do |stage, i|
         @err.puts "stage[#{i}]: #{stage[:src]}"
       end
     end
-    def flush_reducers(stages)
-      tail = stages
-      loop do
-        idx = tail.index(&:reducer?)
-        break unless idx
-        rows = tail[idx].finish
-        rest = tail.drop(idx + 1)
-        rows.each { |value| process_value(value, rest) }
-        tail = rest
-      end
-    end
   end
 end

data/lib/jrf/stage.rb CHANGED Viewed

@@ -7,7 +7,7 @@ module Jrf
   class Stage
     ReducerToken = Struct.new(:index)
-    attr_reader :method_name, :src
+    attr_reader :src
     def self.resolve_template(template, reducers)
       if template.is_a?(ReducerToken)
@@ -22,28 +22,27 @@ module Jrf
       end
     end
-    def initialize(ctx, method_name, src: nil)
+    def initialize(ctx, block, src: nil)
       @ctx = ctx
-      @method_name = method_name
+      @block = block
       @src = src
       @reducers = []
       @cursor = 0
       @template = nil
       @mode = nil # nil=unknown, :reducer, :passthrough
-      @probing = false
+      @map_transforms = {}
     end
-    def call(input, probing: false)
+    def call(input)
       @ctx.reset(input)
       @cursor = 0
-      @probing = probing
       @ctx.__jrf_current_stage = self
-      result = @ctx.public_send(@method_name)
+      result = @ctx.instance_eval(&@block)
       if @mode.nil? && @reducers.any?
         @mode = :reducer
         @template = result
-      elsif @mode.nil? && !probing
+      elsif @mode.nil?
         @mode = :passthrough
       end
@@ -54,43 +53,50 @@ module Jrf
       idx = @cursor
       finish_rows = finish || ->(acc) { [acc] }
       @reducers[idx] ||= Reducers.reduce(initial, finish: finish_rows, &step_fn)
-      @reducers[idx].step(value) unless @probing
+      @reducers[idx].step(value)
       @cursor += 1
       ReducerToken.new(idx)
     end
     def allocate_map(type, collection, &block)
       idx = @cursor
-      map_reducer = (@reducers[idx] ||= MapReducer.new(type))
+      @cursor += 1
-      unless @probing
-        saved_obj = @ctx._
+      # Transformation mode (detected on first call)
+      if @map_transforms[idx]
+        return transform_collection(type, collection, &block)
+      end
-        case type
-        when :array
-          raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
-          collection.each_with_index do |v, i|
-            @ctx.reset(v)
-            with_scoped_reducers(map_reducer.slots[i] ||= []) do
-              result = block.call(v)
-              map_reducer.templates[i] ||= result
-            end
+      map_reducer = (@reducers[idx] ||= MapReducer.new(type))
+      case type
+      when :array
+        raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
+        collection.each_with_index do |v, i|
+          slot = map_reducer.slot(i)
+          with_scoped_reducers(slot.reducers) do
+            result = @ctx.send(:__jrf_with_current_input, v) { block.call(v) }
+            slot.template ||= result
           end
-        when :hash
-          raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
-          collection.each do |k, v|
-            @ctx.reset(v)
-            with_scoped_reducers(map_reducer.slots[k] ||= []) do
-              result = block.call(v)
-              map_reducer.templates[k] ||= result
-            end
+        end
+      when :hash
+        raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
+        collection.each do |k, v|
+          slot = map_reducer.slot(k)
+          with_scoped_reducers(slot.reducers) do
+            result = @ctx.send(:__jrf_with_current_input, v) { block.call(v) }
+            slot.template ||= result
           end
         end
+      end
-        @ctx.reset(saved_obj)
+      # Detect transformation: no reducers were allocated in any slot
+      if @mode.nil? && map_reducer.slots.values.all? { |s| s.reducers.empty? }
+        @map_transforms[idx] = true
+        @reducers[idx] = nil
+        return transformed_slots(type, map_reducer)
       end
-      @cursor += 1
       ReducerToken.new(idx)
     end
@@ -98,22 +104,17 @@ module Jrf
       idx = @cursor
       map_reducer = (@reducers[idx] ||= MapReducer.new(:hash))
-      unless @probing
-        slot = (map_reducer.slots[key] ||= [])
-        with_scoped_reducers(slot) do
-          result = block.call
-          map_reducer.templates[key] ||= result
-        end
+      row = @ctx._
+      slot = map_reducer.slot(key)
+      with_scoped_reducers(slot.reducers) do
+        result = @ctx.send(:__jrf_with_current_input, row) { block.call(row) }
+        slot.template ||= result
       end
       @cursor += 1
       ReducerToken.new(idx)
     end
-    def reducer?
-      @mode == :reducer
-    end
     def finish
       return [] unless @mode == :reducer && @reducers.any?
@@ -137,26 +138,93 @@ module Jrf
       @cursor = saved_cursor
     end
+    def transform_collection(type, collection, &block)
+      case type
+      when :array
+        raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
+        collection.each_with_object([]) do |value, result|
+          mapped = @ctx.send(:__jrf_with_current_input, value) { block.call(value) }
+          append_map_result(result, mapped)
+        end
+      when :hash
+        raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
+        collection.each_with_object({}) do |(key, value), result|
+          mapped = @ctx.send(:__jrf_with_current_input, value) { block.call(value) }
+          next if mapped.equal?(Control::DROPPED)
+          raise TypeError, "flat is not supported inside map_values" if mapped.is_a?(Control::Flat)
+          result[key] = mapped
+        end
+      end
+    end
+    def transformed_slots(type, map_reducer)
+      case type
+      when :array
+        map_reducer.slots
+          .sort_by { |k, _| k }
+          .each_with_object([]) do |(_, slot), result|
+            append_map_result(result, slot.template)
+          end
+      when :hash
+        map_reducer.slots.each_with_object({}) do |(key, slot), result|
+          next if slot.template.equal?(Control::DROPPED)
+          raise TypeError, "flat is not supported inside map_values" if slot.template.is_a?(Control::Flat)
+          result[key] = slot.template
+        end
+      end
+    end
+    def append_map_result(result, mapped)
+      return if mapped.equal?(Control::DROPPED)
+      if mapped.is_a?(Control::Flat)
+        unless mapped.value.is_a?(Array)
+          raise TypeError, "flat expects Array, got #{mapped.value.class}"
+        end
+        result.concat(mapped.value)
+      else
+        result << mapped
+      end
+    end
     class MapReducer
-      attr_reader :slots, :templates
+      attr_reader :slots
       def initialize(type)
         @type = type
         @slots = {}
-        @templates = {}
+      end
+      def slot(key)
+        @slots[key] ||= SlotState.new
       end
       def finish
         case @type
         when :array
           keys = @slots.keys.sort
-          [keys.map { |k| Stage.resolve_template(@templates[k], @slots[k]) }]
+          [keys.map { |k| Stage.resolve_template(@slots[k].template, @slots[k].reducers) }]
         when :hash
           result = {}
-          @slots.each { |k, reducers| result[k] = Stage.resolve_template(@templates[k], reducers) }
+          @slots.each { |k, s| result[k] = Stage.resolve_template(s.template, s.reducers) }
           [result]
         end
       end
+      class SlotState
+        attr_reader :reducers
+        attr_accessor :template
+        def initialize
+          @reducers = []
+          @template = nil
+        end
+      end
     end
   end
 end

data/lib/jrf/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Jrf
-  VERSION = "0.1.3"
+  VERSION = "0.1.5"
 end

data/lib/jrf.rb CHANGED Viewed

@@ -2,3 +2,21 @@
 require_relative "jrf/version"
 require_relative "jrf/cli"
+require_relative "jrf/pipeline"
+module Jrf
+  # Create a pipeline from one or more stage blocks.
+  #
+  # Each block is evaluated in a context where +_+ is the current value.
+  # All jrf built-in functions (+select+, +sum+, +map+, +group_by+, etc.)
+  # are available inside blocks. See https://github.com/kazuho/jrf#readme for the full list.
+  #
+  # @param blocks [Array<Proc>] one or more stage procs
+  # @return [Pipeline] a callable pipeline
+  # @example
+  #   j = Jrf.new(proc { select(_["x"] > 10) }, proc { sum(_["x"]) })
+  #   j.call([{"x" => 20}, {"x" => 30}])  # => [50]
+  def self.new(*blocks)
+    Pipeline.new(*blocks)
+  end
+end

data/test/jrf_test.rb CHANGED Viewed

@@ -92,15 +92,15 @@ assert_includes(stderr, 'stage[1]: _["hello"]')
 stdout, stderr, status = Open3.capture3("./exe/jrf", "--help")
 assert_success(status, stderr, "help option")
-assert_includes(stdout, "usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'")
+assert_includes(stdout, "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'")
 assert_includes(stdout, "JSON filter with the power and speed of Ruby.")
 assert_includes(stdout, "--lax")
+assert_includes(stdout, "--pretty")
 assert_includes(stdout, "Pipeline:")
 assert_includes(stdout, "Connect stages with top-level >>.")
 assert_includes(stdout, "The current value in each stage is available as _.")
 assert_includes(stdout, "See Also:")
-assert_includes(stdout, "README.md")
-assert_includes(stdout, "man jrf")
+assert_includes(stdout, "https://github.com/kazuho/jrf#readme")
 assert_equal([], lines(stderr), "help stderr output")
 stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "--verbose")
@@ -108,6 +108,21 @@ assert_success(status, stderr, "dump stages verbose alias")
 assert_equal(%w[123], lines(stdout), "dump stages verbose alias output")
 assert_includes(stderr, 'stage[0]: select(_["hello"] == 123)')
+stdout, stderr, status = run_jrf('_', input_hello, "--pretty")
+assert_success(status, stderr, "pretty output")
+assert_equal(
+  [
+    "{",
+    "\"hello\": 123",
+    "}",
+    "{",
+    "\"hello\": 456",
+    "}"
+  ],
+  lines(stdout),
+  "pretty output lines"
+)
 input_regex = <<~NDJSON
   {"foo":{"bar":"ok"},"x":50}
   {"foo":{"bar":"ng"},"x":70}
@@ -159,6 +174,14 @@ stdout, stderr, status = run_jrf('_["items"] >> flat >> group', input_flat)
 assert_success(status, stderr, "flat then group")
 assert_equal(['[1,2,3]'], lines(stdout), "flat then group output")
+stdout, stderr, status = run_jrf('map { |x| flat }', "[[1,2],[3],[4,5,6]]\n")
+assert_success(status, stderr, "flat inside map")
+assert_equal(['[1,2,3,4,5,6]'], lines(stdout), "flat inside map output")
+stdout, stderr, status = run_jrf('map_values { |v| flat }', "{\"a\":[1,2],\"b\":[3]}\n")
+assert_failure(status, "flat inside map_values")
+assert_includes(stderr, "flat is not supported inside map_values")
 stdout, stderr, status = run_jrf('_["foo"] >> flat', input)
 assert_failure(status, "flat requires array")
 assert_includes(stderr, "flat expects Array")
@@ -194,6 +217,10 @@ stdout, stderr, status = run_jrf('select(_["x"] > 10) >> sum(_["foo"])', input_s
 assert_success(status, stderr, "select + sum")
 assert_equal(%w[9], lines(stdout), "select + sum output")
+stdout, stderr, status = run_jrf('{total: sum(_["foo"]), n: count()}', input_sum)
+assert_success(status, stderr, "structured reducer result")
+assert_equal(['{"total":10,"n":4}'], lines(stdout), "structured reducer result output")
 stdout, stderr, status = run_jrf('average(_["foo"])', input_sum)
 assert_success(status, stderr, "average")
 assert_float_close(2.5, lines(stdout).first.to_f, 1e-12, "average output")
@@ -206,33 +233,37 @@ stdout, stderr, status = run_jrf('_["foo"] >> sum(_ * 2)', input_sum)
 assert_success(status, stderr, "extract + sum")
 assert_equal(%w[20], lines(stdout), "extract + sum output")
+stdout, stderr, status = run_jrf('sum(2 * _["foo"])', input_sum)
+assert_success(status, stderr, "sum with literal on left")
+assert_equal(%w[20], lines(stdout), "sum with literal on left output")
 stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> sum(_["foo"])', input_sum)
 assert_success(status, stderr, "sum no matches")
-assert_equal(%w[0], lines(stdout), "sum no matches output")
+assert_equal([], lines(stdout), "sum no matches output")
 stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> count()', input_sum)
 assert_success(status, stderr, "count no matches")
-assert_equal(%w[0], lines(stdout), "count no matches output")
+assert_equal([], lines(stdout), "count no matches output")
 stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> count(_["foo"])', input_sum)
 assert_success(status, stderr, "count(expr) no matches")
-assert_equal(%w[0], lines(stdout), "count(expr) no matches output")
+assert_equal([], lines(stdout), "count(expr) no matches output")
 stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> average(_["foo"])', input_sum)
 assert_success(status, stderr, "average no matches")
-assert_equal(%w[null], lines(stdout), "average no matches output")
+assert_equal([], lines(stdout), "average no matches output")
 stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> stdev(_["foo"])', input_sum)
 assert_success(status, stderr, "stdev no matches")
-assert_equal(%w[null], lines(stdout), "stdev no matches output")
+assert_equal([], lines(stdout), "stdev no matches output")
 stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> min(_["foo"])', input_sum)
 assert_success(status, stderr, "min no matches")
-assert_equal(%w[null], lines(stdout), "min no matches output")
+assert_equal([], lines(stdout), "min no matches output")
 stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> max(_["foo"])', input_sum)
 assert_success(status, stderr, "max no matches")
-assert_equal(%w[null], lines(stdout), "max no matches output")
+assert_equal([], lines(stdout), "max no matches output")
 stdout, stderr, status = run_jrf('sum(_["foo"]) >> _ + 1', input_sum)
 assert_success(status, stderr, "reduce in middle")
@@ -274,7 +305,7 @@ assert_equal([], lines(stdout), "sort no matches output")
 stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> _["foo"] >> group', input_sum)
 assert_success(status, stderr, "group no matches")
-assert_equal(['[]'], lines(stdout), "group no matches output")
+assert_equal([], lines(stdout), "group no matches output")
 input_group_multi = <<~NDJSON
   {"x":1,"y":"a"}
@@ -288,7 +319,7 @@ assert_equal(['{"a":[1,2,3],"b":["a","b","c"]}'], lines(stdout), "group in hash
 stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> {a: group(_["x"]), b: group(_["y"])}', input_group_multi)
 assert_success(status, stderr, "group in hash no matches")
-assert_equal(['{"a":[],"b":[]}'], lines(stdout), "group in hash no-match output")
+assert_equal([], lines(stdout), "group in hash no-match output")
 stdout, stderr, status = run_jrf('percentile(_["foo"], 0.50)', input_sum)
 assert_success(status, stderr, "single percentile")
@@ -297,7 +328,7 @@ assert_equal(%w[2], lines(stdout), "single percentile output")
 stdout, stderr, status = run_jrf('percentile(_["foo"], [0.25, 0.50, 1.0])', input_sum)
 assert_success(status, stderr, "array percentile")
 assert_equal(
-  ['{"percentile":0.25,"value":1}', '{"percentile":0.5,"value":2}', '{"percentile":1.0,"value":4}'],
+  ['[1,2,4]'],
   lines(stdout),
   "array percentile output"
 )
@@ -332,7 +363,7 @@ assert_float_close(1.0, lines(stdout).first.to_f, 1e-12, "stdev ignores nil outp
 stdout, stderr, status = run_jrf('percentile(_["foo"], [0.5, 1.0])', input_with_nil)
 assert_success(status, stderr, "percentile ignores nil")
 assert_equal(
-  ['{"percentile":0.5,"value":1}', '{"percentile":1.0,"value":3}'],
+  ['[1,3]'],
   lines(stdout),
   "percentile ignores nil output"
 )
@@ -388,7 +419,7 @@ NDJSON
 stdout, stderr, status = run_jrf('{a: percentile(_["a"], [0.25, 0.50, 1.0]), b: percentile(_["b"], [0.25, 0.50, 1.0])}', input_multi_cols)
 assert_success(status, stderr, "nested array percentile for multiple columns")
 assert_equal(
-  ['{"a":[{"percentile":0.25,"value":1},{"percentile":0.5,"value":2},{"percentile":1.0,"value":4}],"b":[{"percentile":0.25,"value":10},{"percentile":0.5,"value":20},{"percentile":1.0,"value":40}]}'],
+  ['{"a":[1,2,4],"b":[10,20,40]}'],
   lines(stdout),
   "nested array percentile output"
 )
@@ -513,6 +544,14 @@ stdout, stderr, status = run_jrf('_["values"] >> map { |x| max(x) }', input_map)
 assert_success(status, stderr, "map with max")
 assert_equal(['[3,30,300]'], lines(stdout), "map with max output")
+stdout, stderr, status = run_jrf('_["values"] >> map { |x| sum(_[0] + x) }', input_map)
+assert_success(status, stderr, "map keeps ambient _")
+assert_equal(['[12,66,606]'], lines(stdout), "map ambient _ output")
+stdout, stderr, status = run_jrf('_["values"] >> map { |x| reduce(0) { |acc, v| acc + v } }', input_map)
+assert_success(status, stderr, "map with reduce")
+assert_equal(['[6,60,600]'], lines(stdout), "map with reduce output")
 input_map_varying = <<~NDJSON
   [1,10]
   [2,20,200]
@@ -523,6 +562,20 @@ stdout, stderr, status = run_jrf('map { |x| sum(x) }', input_map_varying)
 assert_success(status, stderr, "map varying lengths")
 assert_equal(['[6,30,200]'], lines(stdout), "map varying lengths output")
+input_map_unsorted = <<~NDJSON
+  {"values":[3,30]}
+  {"values":[1,10]}
+  {"values":[2,20]}
+NDJSON
+stdout, stderr, status = run_jrf('_["values"] >> map { |x| group }', input_map)
+assert_success(status, stderr, "map with group")
+assert_equal(['[[1,2,3],[10,20,30],[100,200,300]]'], lines(stdout), "map with group output")
+stdout, stderr, status = run_jrf('_["values"] >> map { |x| sort }', input_map_unsorted)
+assert_success(status, stderr, "map with sort default key")
+assert_equal(['[[1,2,3],[10,20,30]]'], lines(stdout), "map with sort default key output")
 input_map_values = <<~NDJSON
   {"a":1,"b":10}
   {"a":2,"b":20}
@@ -551,18 +604,51 @@ stdout, stderr, status = run_jrf('map_values { |v| count(v) }', input_map_values
 assert_success(status, stderr, "map_values with count")
 assert_equal(['{"a":3,"b":3}'], lines(stdout), "map_values with count output")
+stdout, stderr, status = run_jrf('map_values { |v| group }', input_map_values)
+assert_success(status, stderr, "map_values with group")
+assert_equal(['{"a":[1,2,3],"b":[10,20,30]}'], lines(stdout), "map_values with group output")
+stdout, stderr, status = run_jrf('map_values { |v| sum(_["a"] + v) }', input_map_values)
+assert_success(status, stderr, "map_values keeps ambient _")
+assert_equal(['{"a":12,"b":66}'], lines(stdout), "map_values ambient _ output")
+stdout, stderr, status = run_jrf('map_values { |v| reduce(0) { |acc, x| acc + x } }', input_map_values)
+assert_success(status, stderr, "map_values with reduce")
+assert_equal(['{"a":6,"b":60}'], lines(stdout), "map_values with reduce output")
 stdout, stderr, status = run_jrf('select(false) >> map { |x| sum(x) }', input_map)
 assert_success(status, stderr, "map no matches")
-assert_equal(['[]'], lines(stdout), "map no matches output")
+assert_equal([], lines(stdout), "map no matches output")
 stdout, stderr, status = run_jrf('select(false) >> map_values { |v| sum(v) }', input_map_values)
 assert_success(status, stderr, "map_values no matches")
-assert_equal(['{}'], lines(stdout), "map_values no matches output")
+assert_equal([], lines(stdout), "map_values no matches output")
 stdout, stderr, status = run_jrf('map_values { |v| sum(v) } >> map_values { |v| v * 10 }', input_map_values)
 assert_success(status, stderr, "map_values piped to map_values passthrough")
 assert_equal(['{"a":60,"b":600}'], lines(stdout), "map_values piped output")
+# map/map_values transformation (no reducers)
+stdout, stderr, status = run_jrf('_["values"] >> map { |x| x + 1 }', input_map)
+assert_success(status, stderr, "map transform")
+assert_equal(['[2,11,101]', '[3,21,201]', '[4,31,301]'], lines(stdout), "map transform output")
+stdout, stderr, status = run_jrf('_["values"] >> map { |x| select(x >= 20) }', input_map)
+assert_success(status, stderr, "map transform with select")
+assert_equal(['[100]', '[20,200]', '[30,300]'], lines(stdout), "map transform with select output")
+stdout, stderr, status = run_jrf('map_values { |v| v * 2 }', input_map_values)
+assert_success(status, stderr, "map_values transform")
+assert_equal(['{"a":2,"b":20}', '{"a":4,"b":40}', '{"a":6,"b":60}'], lines(stdout), "map_values transform output")
+stdout, stderr, status = run_jrf('map_values { |v| select(v >= 10) }', input_map_values)
+assert_success(status, stderr, "map_values transform with select")
+assert_equal(['{"b":10}', '{"b":20}', '{"b":30}'], lines(stdout), "map_values transform with select output")
+stdout, stderr, status = run_jrf('_["values"] >> map { |x| x + 1 } >> map { |x| x * 10 }', input_map)
+assert_success(status, stderr, "chained map transforms")
+assert_equal(['[20,110,1010]', '[30,210,2010]', '[40,310,3010]'], lines(stdout), "chained map transforms output")
 input_gb = <<~NDJSON
   {"status":200,"path":"/a","latency":10}
   {"status":404,"path":"/b","latency":50}
@@ -574,11 +660,11 @@ stdout, stderr, status = run_jrf('group_by(_["status"]) { count() }', input_gb)
 assert_success(status, stderr, "group_by with count")
 assert_equal(['{"200":3,"404":1}'], lines(stdout), "group_by with count output")
-stdout, stderr, status = run_jrf('group_by(_["status"]) { sum(_["latency"]) }', input_gb)
+stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| sum(row["latency"]) }', input_gb)
 assert_success(status, stderr, "group_by with sum")
 assert_equal(['{"200":60,"404":50}'], lines(stdout), "group_by with sum output")
-stdout, stderr, status = run_jrf('group_by(_["status"]) { average(_["latency"]) }', input_gb)
+stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| average(row["latency"]) }', input_gb)
 assert_success(status, stderr, "group_by with average")
 result = JSON.parse(lines(stdout).first)
 assert_float_close(20.0, result["200"], 1e-12, "group_by average 200")
@@ -591,24 +677,97 @@ assert_equal(3, result["200"].length, "group_by default 200 count")
 assert_equal(1, result["404"].length, "group_by default 404 count")
 assert_equal("/a", result["200"][0]["path"], "group_by default first row")
-stdout, stderr, status = run_jrf('group_by(_["status"]) { group(_["path"]) }', input_gb)
+stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| group(row["path"]) }', input_gb)
 assert_success(status, stderr, "group_by with group(expr)")
 assert_equal(['{"200":["/a","/c","/d"],"404":["/b"]}'], lines(stdout), "group_by with group(expr) output")
-stdout, stderr, status = run_jrf('group_by(_["status"]) { min(_["latency"]) }', input_gb)
+stdout, stderr, status = run_jrf('group_by(_["status"]) { group }', input_gb)
+assert_success(status, stderr, "group_by with implicit group")
+result = JSON.parse(lines(stdout).first)
+assert_equal(3, result["200"].length, "group_by implicit group 200 count")
+assert_equal("/a", result["200"][0]["path"], "group_by implicit group first row")
+stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| min(row["latency"]) }', input_gb)
 assert_success(status, stderr, "group_by with min")
 assert_equal(['{"200":10,"404":50}'], lines(stdout), "group_by with min output")
-stdout, stderr, status = run_jrf('group_by(_["status"]) { {total: sum(_["latency"]), n: count()} }', input_gb)
+stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| {total: sum(row["latency"]), n: count()} }', input_gb)
 assert_success(status, stderr, "group_by with multi-reducer")
 assert_equal(['{"200":{"total":60,"n":3},"404":{"total":50,"n":1}}'], lines(stdout), "group_by multi-reducer output")
+stdout, stderr, status = run_jrf('group_by(_["status"]) { reduce(0) { |acc, row| acc + row["latency"] } }', input_gb)
+assert_success(status, stderr, "group_by with reduce")
+assert_equal(['{"200":60,"404":50}'], lines(stdout), "group_by with reduce output")
 stdout, stderr, status = run_jrf('select(false) >> group_by(_["status"]) { count() }', input_gb)
 assert_success(status, stderr, "group_by no matches")
-assert_equal(['{}'], lines(stdout), "group_by no matches output")
+assert_equal([], lines(stdout), "group_by no matches output")
 stdout, stderr, status = run_jrf('group_by(_["status"]) { count() } >> _[200]', input_gb)
 assert_success(status, stderr, "group_by then extract")
 assert_equal(%w[3], lines(stdout), "group_by then extract output")
+# === Library API (Jrf.new) ===
+require_relative "../lib/jrf"
+# passthrough
+j = Jrf.new(proc { _ })
+assert_equal([{"a" => 1}, {"a" => 2}], j.call([{"a" => 1}, {"a" => 2}]), "library passthrough")
+# extract
+j = Jrf.new(proc { _["a"] })
+assert_equal([1, 2], j.call([{"a" => 1}, {"a" => 2}]), "library extract")
+# select + extract (two stages)
+j = Jrf.new(
+  proc { select(_["a"] > 1) },
+  proc { _["a"] }
+)
+assert_equal([2, 3], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library select + extract")
+# sum
+j = Jrf.new(proc { sum(_["a"]) })
+assert_equal([6], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library sum")
+# sum with literal on left
+j = Jrf.new(proc { sum(2 * _["a"]) })
+assert_equal([12], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library sum literal on left")
+# structured reducers
+j = Jrf.new(proc { {total: sum(_["a"]), n: count()} })
+assert_equal([{total: 6, n: 3}], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library structured reducers")
+# map transform
+j = Jrf.new(proc { map { |x| x + 1 } })
+assert_equal([[2, 3], [4, 5]], j.call([[1, 2], [3, 4]]), "library map transform")
+# map reduce
+j = Jrf.new(proc { map { |x| sum(x) } })
+assert_equal([[4, 6]], j.call([[1, 2], [3, 4]]), "library map reduce")
+# map_values transform
+j = Jrf.new(proc { map_values { |v| v * 10 } })
+assert_equal([{"a" => 10, "b" => 20}], j.call([{"a" => 1, "b" => 2}]), "library map_values transform")
+# group_by
+j = Jrf.new(proc { group_by(_["k"]) { count() } })
+assert_equal([{"x" => 2, "y" => 1}], j.call([{"k" => "x"}, {"k" => "x"}, {"k" => "y"}]), "library group_by")
+# reducer then passthrough
+j = Jrf.new(
+  proc { sum(_["a"]) },
+  proc { _ + 1 }
+)
+assert_equal([7], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library reducer then passthrough")
+# closure over local variables
+threshold = 2
+j = Jrf.new(proc { select(_["a"] > threshold) })
+assert_equal([{"a" => 3}], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library closure")
+# empty input
+j = Jrf.new(proc { sum(_) })
+assert_equal([], j.call([]), "library empty input")
 puts "ok"

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: jrf
 version: !ruby/object:Gem::Version
-  version: 0.1.3
+  version: 0.1.5
 platform: ruby
 authors:
 - kazuho
@@ -41,6 +41,7 @@ files:
 - lib/jrf.rb
 - lib/jrf/cli.rb
 - lib/jrf/control.rb
+- lib/jrf/pipeline.rb
 - lib/jrf/pipeline_parser.rb
 - lib/jrf/reducers.rb
 - lib/jrf/row_context.rb