RubyGems - jrf - Versions diffs - 0.1.2 → 0.1.3 - Mend

jrf 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 2776f201f13bf8be05ec4615510f0810ceeff9115dd11ef4d54ed873c9c90030
-  data.tar.gz: 0b996f561536a47067d262122c3b1093fd9adfb499fc0fab7011226cee75f043
+  metadata.gz: 34475ad560159e50a8f6ea6dbfae40dc57173b40df31065f4b222abaafe66012
+  data.tar.gz: 22c046afd9f4fba04788f08796f9ccfe24b21a87522820c8e5873f164de8cc53
 SHA512:
-  metadata.gz: bde0a34fdeb324132084a7621bc2a37767a506261dbafadfdebbeb7fc060a2e8a142c44132ed5228a2604a5dba767871d39303eb3aa941510ba0d3f2694d5e7e
-  data.tar.gz: 725b4611d5659ce994df183950676a7c8ff2582df7b02cbc0bf7ef47682ca2bf2f55507df10c6e907a1032c6b4dcd17625b4db807f28d304a331f0caf4c8a1cd
+  metadata.gz: 7d90e4a754ae7ca9170db6c7221571cb90077bbd48d6cd55cbefd29342afa89996075c86a3bf645dac94b337b91eceefa036968f490c30bacf52744a319d238f
+  data.tar.gz: 3d00c51e46a07f63e1d44b8f2013663dd66d3b2f3393046a00a3c26a5f1cb3dd4eabc2db82eacb12ff874625835f49a63e358baaf44639d38b8a9e01a6c3b06d

data/exe/jrf CHANGED Viewed

@@ -1,6 +1,12 @@
 #!/usr/bin/env ruby
 # frozen_string_literal: true
+begin
+  require "bundler/setup"
+rescue LoadError
+  # Allow running without Bundler in plain Ruby environments.
+end
 $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
 require "jrf"

data/jrf.gemspec CHANGED Viewed

@@ -15,6 +15,7 @@ Gem::Specification.new do |spec|
   spec.bindir = "exe"
   spec.executables = ["jrf"]
+  spec.add_dependency "oj", ">= 3.16"
   spec.files = Dir.glob("{exe,lib,test}/*") + Dir.glob("lib/**/*") + %w[DESIGN.txt jrf.gemspec Gemfile Rakefile]
 end

data/lib/jrf/cli.rb CHANGED Viewed

@@ -4,15 +4,16 @@ require_relative "runner"
 module Jrf
   class CLI
-    USAGE = "usage: jrf [-v] [--help] 'STAGE >> STAGE >> ...'"
+    USAGE = "usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'"
     HELP_TEXT = <<~'TEXT'
-      usage: jrf [-v] [--help] 'STAGE >> STAGE >> ...'
+      usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'
       JSON filter with the power and speed of Ruby.
       Options:
-        -v, --verbose  print compiled stage Ruby expressions
+        -v, --verbose  print parsed stage expressions
+        --lax          allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
         -h, --help     show this help and exit
       Pipeline:
@@ -33,12 +34,16 @@ module Jrf
     def self.run(argv = ARGV, input: ARGF, out: $stdout, err: $stderr)
       verbose = false
+      lax = false
       while argv.first&.start_with?("-")
         case argv.first
         when "-v", "--verbose"
           verbose = true
           argv.shift
+        when "--lax"
+          lax = true
+          argv.shift
         when "-h", "--help"
           out.puts HELP_TEXT
           return 0
@@ -55,7 +60,7 @@ module Jrf
       end
       expression = argv.shift
-      Runner.new(input: input, out: out, err: err).run(expression, verbose: verbose)
+      Runner.new(input: input, out: out, err: err, lax: lax).run(expression, verbose: verbose)
       0
     end
   end

data/lib/jrf/pipeline_parser.rb CHANGED Viewed

@@ -9,51 +9,11 @@ module Jrf
     def parse
       stages = split_top_level_pipeline(@source).map(&:strip).reject(&:empty?)
       raise ArgumentError, "empty expression" if stages.empty?
-      { stages: stages.map { |stage| parse_stage!(stage) } }
+      { stages: stages.map { |stage| { src: stage } } }
     end
     private
-    def parse_stage!(stage)
-      if select_stage?(stage)
-        {
-          kind: :select,
-          original: stage,
-          src: "(#{parse_select!(stage)}) ? _ : ::Jrf::Control::DROPPED"
-        }
-      else
-        reject_unsupported_stage!(stage)
-        {
-          kind: :extract,
-          original: stage,
-          src: validate_extract!(stage)
-        }
-      end
-    end
-    def validate_extract!(stage)
-      reject_unsupported_stage!(stage)
-      stage
-    end
-    def parse_select!(stage)
-      reject_unsupported_stage!(stage)
-      match = /\Aselect\s*\((.*)\)\s*\z/m.match(stage)
-      raise ArgumentError, "first stage must be select(...)" unless match
-      inner = match[1].strip
-      raise ArgumentError, "select(...) must contain an expression" if inner.empty?
-      inner
-    end
-    def select_stage?(stage)
-      /\Aselect\s*\(/.match?(stage)
-    end
-    def reject_unsupported_stage!(stage)
-    end
     def split_top_level_pipeline(source)
       parts = []
       start_idx = 0

data/lib/jrf/row_context.rb CHANGED Viewed

@@ -1,21 +1,22 @@
 # frozen_string_literal: true
 require_relative "control"
 require_relative "reducers"
 module Jrf
   class RowContext
     MISSING = Object.new
-    ReducerToken = Struct.new(:index)
+    attr_writer :__jrf_current_stage
     class << self
       def define_reducer(name, &definition)
         define_method(name) do |*args, **kwargs, &block|
           spec = definition.call(self, *args, **kwargs, block: block)
-          create_reducer(
+          @__jrf_current_stage.allocate_reducer(
             spec.fetch(:value),
             initial: reducer_initial_value(spec.fetch(:initial)),
             finish: spec[:finish],
-            emit_many: spec.fetch(:emit_many, false),
             &spec.fetch(:step)
           )
         end
@@ -24,7 +25,7 @@ module Jrf
     def initialize(obj = nil)
       @obj = obj
-      @__jrf_stage = nil
+      @__jrf_current_stage = nil
     end
     def reset(obj)
@@ -40,24 +41,38 @@ module Jrf
       Control::Flat.new(@obj)
     end
+    def select(predicate)
+      predicate ? @obj : Control::DROPPED
+    end
     define_reducer(:sum) do |_ctx, value, initial: 0, block: nil|
-      { value: value, initial: initial, step: ->(acc, v) { acc + v } }
+      { value: value, initial: initial, step: ->(acc, v) { v.nil? ? acc : (acc + v) } }
+    end
+    define_reducer(:count) do |_ctx, value = MISSING, block: nil|
+      if value.equal?(MISSING)
+        { value: nil, initial: 0, step: ->(acc, _v) { acc + 1 } }
+      else
+        { value: value, initial: 0, step: ->(acc, v) { v.nil? ? acc : (acc + 1) } }
+      end
     end
     define_reducer(:min) do |_ctx, value, block: nil|
-      { value: value, initial: nil, step: ->(acc, v) { acc.nil? || v < acc ? v : acc } }
+      { value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v < acc ? v : acc) } }
     end
     define_reducer(:max) do |_ctx, value, block: nil|
-      { value: value, initial: nil, step: ->(acc, v) { acc.nil? || v > acc ? v : acc } }
+      { value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v > acc ? v : acc) } }
     end
     define_reducer(:average) do |_ctx, value, block: nil|
       {
         value: value,
         initial: -> { [0.0, 0] },
-        finish: ->((sum, count)) { count.zero? ? nil : (sum / count) },
+        finish: ->((sum, count)) { [count.zero? ? nil : (sum / count)] },
         step: ->(acc, v) {
+          return acc if v.nil?
           acc[0] += v
           acc[1] += 1
           acc
@@ -70,13 +85,15 @@ module Jrf
         value: value,
         initial: [0, 0.0, 0.0],
         finish: ->((count, mean, m2)) {
-          return nil if count.zero?
-          return nil if sample && count < 2
+          return [nil] if count.zero?
+          return [nil] if sample && count < 2
           denom = sample ? (count - 1) : count
-          Math.sqrt(m2 / denom)
+          [Math.sqrt(m2 / denom)]
         },
         step: ->(acc, x) {
+          return acc if x.nil?
           count, mean, m2 = acc
           count += 1
           delta = x - mean
@@ -96,7 +113,6 @@ module Jrf
         {
           value: ctx._,
           initial: -> { [] },
-          emit_many: true,
           finish: ->(rows) { rows.sort(&block) },
           step: ->(rows, row) { rows << row }
         }
@@ -105,7 +121,6 @@ module Jrf
         {
           value: [resolved_key, ctx._],
           initial: -> { [] },
-          emit_many: true,
           finish: ->(pairs) { pairs.sort_by(&:first).map(&:last) },
           step: ->(pairs, pair) { pairs << pair }
         }
@@ -124,7 +139,7 @@ module Jrf
       finish =
         if scalar
-          ->(values) { ctx.send(:percentile_value, values.sort, percentages.first) }
+          ->(values) { [ctx.send(:percentile_value, values.sort, percentages.first)] }
         else
           ->(values) {
             sorted = values.sort
@@ -137,44 +152,36 @@ module Jrf
       {
         value: value,
         initial: -> { [] },
-        emit_many: !scalar,
         finish: finish,
-        step: ->(acc, v) { acc << v }
+        step: ->(acc, v) { v.nil? ? acc : (acc << v) }
       }
     end
     def reduce(initial, &block)
       raise ArgumentError, "reduce requires a block" unless block
-      create_reducer(@obj, initial: initial, &block)
+      @__jrf_current_stage.allocate_reducer(@obj, initial: initial, &block)
     end
-    def __jrf_begin_stage__(stage, probing: false)
-      @__jrf_stage = stage
-      stage[:reducer_cursor] = 0
-      stage[:reducer_called] = false
-      stage[:reducer_probing] = probing
-    end
+    def map(&block)
+      raise ArgumentError, "map requires a block" unless block
-    def __jrf_reducer_called?
-      @__jrf_stage && @__jrf_stage[:reducer_called]
+      @__jrf_current_stage.allocate_map(:array, @obj, &block)
     end
-  private
+    def map_values(&block)
+      raise ArgumentError, "map_values requires a block" unless block
-    def create_reducer(value, initial:, emit_many: false, finish: nil, &step_fn)
-      raise "internal error: reducer used outside stage context" unless @__jrf_stage
+      @__jrf_current_stage.allocate_map(:hash, @obj, &block)
+    end
-      reducers = (@__jrf_stage[:reducers] ||= [])
-      idx = @__jrf_stage[:reducer_cursor] || 0
-      reducers[idx] ||= Reducers.reduce(initial, finish: finish, &step_fn)
-      reducers[idx].step(value) unless @__jrf_stage[:reducer_probing]
-      @__jrf_stage[:reducer_cursor] = idx + 1
-      @__jrf_stage[:reducer_called] = true
-      @__jrf_stage[:reducer_emit_many] = emit_many if @__jrf_stage[:reducer_emit_many].nil?
-      ReducerToken.new(idx)
+    def group_by(key, &block)
+      block ||= proc { group }
+      @__jrf_current_stage.allocate_group_by(key, &block)
     end
+    private
     def reducer_initial_value(initial)
       return initial.call if initial.respond_to?(:call)
       return initial.dup if initial.is_a?(Array) || initial.is_a?(Hash)

data/lib/jrf/runner.rb CHANGED Viewed

@@ -5,9 +5,12 @@ require_relative "control"
 require_relative "pipeline_parser"
 require_relative "reducers"
 require_relative "row_context"
+require_relative "stage"
 module Jrf
   class Runner
+    RS_CHAR = "\x1e"
     class ProbeValue
       def [](key)
         self
@@ -24,10 +27,11 @@ module Jrf
     PROBE_VALUE = ProbeValue.new
-    def initialize(input: ARGF, out: $stdout, err: $stderr)
+    def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false)
       @input = input
       @out = out
       @err = err
+      @lax = lax
     end
     def run(expression, verbose: false)
@@ -37,20 +41,17 @@ module Jrf
       ctx = RowContext.new
       compiled = compile_stages(stages, ctx)
-      initialize_reducers(compiled, ctx)
+      compiled.each { |stage| stage.call(PROBE_VALUE, probing: true) rescue nil }
       error = nil
       begin
-        @input.each_line do |line|
-          line = line.strip
-          next if line.empty?
-          process_value(JSON.parse(line), compiled, ctx)
+        each_input_value do |value|
+          process_value(value, compiled)
         end
       rescue StandardError => e
         error = e
       ensure
-        flush_reducers(compiled, ctx)
+        flush_reducers(compiled)
       end
       raise error if error
@@ -58,17 +59,17 @@ module Jrf
     private
-    def process_value(input, stages, ctx)
+    def process_value(input, stages)
       current_values = [input]
       stages.each do |stage|
         next_values = []
         current_values.each do |value|
-          out = apply_stage(stage, value, ctx)
+          out = stage.call(value)
           if out.equal?(Control::DROPPED)
             next
-          elsif flat_event?(out)
+          elsif out.is_a?(Control::Flat)
             unless out.value.is_a?(Array)
               raise TypeError, "flat expects Array, got #{out.value.class}"
             end
@@ -85,94 +86,70 @@ module Jrf
       current_values.each { |value| @out.puts JSON.generate(value) }
     end
-    def apply_stage(stage, input, ctx)
-      value = eval_stage(stage, input, ctx)
-      if value.equal?(Control::DROPPED)
-        Control::DROPPED
-      elsif ctx.__jrf_reducer_called?
-        stage[:reducer_template] ||= value
-        Control::DROPPED
-      else
-        value
-      end
-    end
+    def each_input_value
+      return each_input_value_lax { |value| yield value } if @lax
-    def eval_stage(stage, input, ctx)
-      ctx.reset(input)
-      ctx.__jrf_begin_stage__(stage, probing: input.equal?(PROBE_VALUE))
-      ctx.public_send(stage[:method_name])
+      each_input_value_ndjson { |value| yield value }
     end
-    def flat_event?(value)
-      value.is_a?(Control::Flat)
+    def each_input_value_ndjson
+      @input.each_line do |raw_line|
+        line = raw_line.strip
+        next if line.empty?
+        yield JSON.parse(line)
+      end
     end
-    def flush_reducers(stages, ctx)
-      tail = stages
-      loop do
-        tail = tail.drop_while { |stage| !reducer_stage?(stage) }
-        break if tail.empty?
-        stage = tail.first
-        reducers = stage[:reducers]
-        break unless reducers&.any?
-        out = finish_reducer_template(stage[:reducer_template], reducers)
-        if stage[:reducer_emit_many]
-          out.each { |value| process_value(value, tail.drop(1), ctx) }
-        else
-          process_value(out, tail.drop(1), ctx)
+    def each_input_value_lax
+      require "oj"
+      source = @input.read.to_s
+      source = source.include?(RS_CHAR) ? source.tr(RS_CHAR, "\n") : source
+      handler = Class.new(Oj::ScHandler) do
+        def initialize(&emit)
+          @emit = emit
         end
-        tail = tail.drop(1)
-      end
+        def hash_start = {}
+        def hash_key(key) = key
+        def hash_set(hash, key, value) = hash[key] = value
+        def array_start = []
+        def array_append(array, value) = array << value
+        def add_value(value) = @emit.call(value)
+      end.new { |value| yield value }
+      Oj.sc_parse(handler, source)
+    rescue LoadError
+      raise "oj is required for --lax mode (gem install oj)"
+    rescue Oj::ParseError => e
+      raise JSON::ParserError, e.message
     end
     def compile_stages(stages, ctx)
       mod = Module.new
-      compiled = []
-      stages.each_with_index do |stage, i|
+      stages.each_with_index.map do |stage, i|
         method_name = :"__jrf_stage_#{i}"
         mod.module_eval("def #{method_name}; #{stage[:src]}; end", "(jrf stage #{i})", 1)
-        compiled << stage.merge(method_name: method_name)
-      end
-      ctx.extend(mod)
-      compiled
+        Stage.new(ctx, method_name, src: stage[:src])
+      end.tap { ctx.extend(mod) }
     end
     def dump_stages(stages)
       stages.each_with_index do |stage, i|
-        @err.puts "stage[#{i}] kind=#{stage[:kind]}"
-        @err.puts "  original: #{stage[:original]}"
-        @err.puts "  ruby: #{stage[:src]}"
-      end
-    end
-    def initialize_reducers(stages, ctx)
-      stages.each do |stage|
-        begin
-          value = eval_stage(stage, PROBE_VALUE, ctx)
-          stage[:reducer_template] ||= value if ctx.__jrf_reducer_called?
-        rescue StandardError
-          # Ignore probe-time errors; reducer will be created on first runtime event.
-        end
+        @err.puts "stage[#{i}]: #{stage[:src]}"
       end
     end
-    def reducer_stage?(stage)
-      stage[:reducers]&.any?
-    end
+    def flush_reducers(stages)
+      tail = stages
+      loop do
+        idx = tail.index(&:reducer?)
+        break unless idx
-    def finish_reducer_template(template, reducers)
-      if template.is_a?(RowContext::ReducerToken)
-        reducers.fetch(template.index).finish
-      elsif template.is_a?(Array)
-        template.map { |v| finish_reducer_template(v, reducers) }
-      elsif template.is_a?(Hash)
-        template.transform_values { |v| finish_reducer_template(v, reducers) }
-      else
-        template
+        rows = tail[idx].finish
+        rest = tail.drop(idx + 1)
+        rows.each { |value| process_value(value, rest) }
+        tail = rest
       end
     end
   end

data/lib/jrf/stage.rb ADDED Viewed

@@ -0,0 +1,162 @@
+# frozen_string_literal: true
+require_relative "control"
+require_relative "reducers"
+module Jrf
+  class Stage
+    ReducerToken = Struct.new(:index)
+    attr_reader :method_name, :src
+    def self.resolve_template(template, reducers)
+      if template.is_a?(ReducerToken)
+        rows = reducers.fetch(template.index).finish
+        rows.length == 1 ? rows.first : rows
+      elsif template.is_a?(Array)
+        template.map { |v| resolve_template(v, reducers) }
+      elsif template.is_a?(Hash)
+        template.transform_values { |v| resolve_template(v, reducers) }
+      else
+        template
+      end
+    end
+    def initialize(ctx, method_name, src: nil)
+      @ctx = ctx
+      @method_name = method_name
+      @src = src
+      @reducers = []
+      @cursor = 0
+      @template = nil
+      @mode = nil # nil=unknown, :reducer, :passthrough
+      @probing = false
+    end
+    def call(input, probing: false)
+      @ctx.reset(input)
+      @cursor = 0
+      @probing = probing
+      @ctx.__jrf_current_stage = self
+      result = @ctx.public_send(@method_name)
+      if @mode.nil? && @reducers.any?
+        @mode = :reducer
+        @template = result
+      elsif @mode.nil? && !probing
+        @mode = :passthrough
+      end
+      (@mode == :reducer) ? Control::DROPPED : result
+    end
+    def allocate_reducer(value, initial:, finish: nil, &step_fn)
+      idx = @cursor
+      finish_rows = finish || ->(acc) { [acc] }
+      @reducers[idx] ||= Reducers.reduce(initial, finish: finish_rows, &step_fn)
+      @reducers[idx].step(value) unless @probing
+      @cursor += 1
+      ReducerToken.new(idx)
+    end
+    def allocate_map(type, collection, &block)
+      idx = @cursor
+      map_reducer = (@reducers[idx] ||= MapReducer.new(type))
+      unless @probing
+        saved_obj = @ctx._
+        case type
+        when :array
+          raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
+          collection.each_with_index do |v, i|
+            @ctx.reset(v)
+            with_scoped_reducers(map_reducer.slots[i] ||= []) do
+              result = block.call(v)
+              map_reducer.templates[i] ||= result
+            end
+          end
+        when :hash
+          raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
+          collection.each do |k, v|
+            @ctx.reset(v)
+            with_scoped_reducers(map_reducer.slots[k] ||= []) do
+              result = block.call(v)
+              map_reducer.templates[k] ||= result
+            end
+          end
+        end
+        @ctx.reset(saved_obj)
+      end
+      @cursor += 1
+      ReducerToken.new(idx)
+    end
+    def allocate_group_by(key, &block)
+      idx = @cursor
+      map_reducer = (@reducers[idx] ||= MapReducer.new(:hash))
+      unless @probing
+        slot = (map_reducer.slots[key] ||= [])
+        with_scoped_reducers(slot) do
+          result = block.call
+          map_reducer.templates[key] ||= result
+        end
+      end
+      @cursor += 1
+      ReducerToken.new(idx)
+    end
+    def reducer?
+      @mode == :reducer
+    end
+    def finish
+      return [] unless @mode == :reducer && @reducers.any?
+      if @template.is_a?(ReducerToken)
+        @reducers.fetch(@template.index).finish
+      else
+        [self.class.resolve_template(@template, @reducers)]
+      end
+    end
+    private
+    def with_scoped_reducers(reducer_list)
+      saved_reducers = @reducers
+      saved_cursor = @cursor
+      @reducers = reducer_list
+      @cursor = 0
+      yield
+    ensure
+      @reducers = saved_reducers
+      @cursor = saved_cursor
+    end
+    class MapReducer
+      attr_reader :slots, :templates
+      def initialize(type)
+        @type = type
+        @slots = {}
+        @templates = {}
+      end
+      def finish
+        case @type
+        when :array
+          keys = @slots.keys.sort
+          [keys.map { |k| Stage.resolve_template(@templates[k], @slots[k]) }]
+        when :hash
+          result = {}
+          @slots.each { |k, reducers| result[k] = Stage.resolve_template(@templates[k], reducers) }
+          [result]
+        end
+      end
+    end
+  end
+end

data/lib/jrf/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Jrf
-  VERSION = "0.1.2"
+  VERSION = "0.1.3"
 end

data/test/jrf_test.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require "json"
 require "open3"
 def run_jrf(expr, input, *opts)
@@ -86,17 +87,14 @@ assert_equal(['{"hello":123}'], lines(stdout), "select-only hello output")
 stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "-v")
 assert_success(status, stderr, "dump stages")
 assert_equal(%w[123], lines(stdout), "dump stages output")
-assert_includes(stderr, "stage[0] kind=select")
-assert_includes(stderr, 'original: select(_["hello"] == 123)')
-assert_includes(stderr, 'ruby: (_["hello"] == 123) ? _ : ::Jrf::Control::DROPPED')
-assert_includes(stderr, "stage[1] kind=extract")
-assert_includes(stderr, 'original: _["hello"]')
-assert_includes(stderr, 'ruby: _["hello"]')
+assert_includes(stderr, 'stage[0]: select(_["hello"] == 123)')
+assert_includes(stderr, 'stage[1]: _["hello"]')
 stdout, stderr, status = Open3.capture3("./exe/jrf", "--help")
 assert_success(status, stderr, "help option")
-assert_includes(stdout, "usage: jrf [-v] [--help] 'STAGE >> STAGE >> ...'")
+assert_includes(stdout, "usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'")
 assert_includes(stdout, "JSON filter with the power and speed of Ruby.")
+assert_includes(stdout, "--lax")
 assert_includes(stdout, "Pipeline:")
 assert_includes(stdout, "Connect stages with top-level >>.")
 assert_includes(stdout, "The current value in each stage is available as _.")
@@ -108,7 +106,7 @@ assert_equal([], lines(stderr), "help stderr output")
 stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "--verbose")
 assert_success(status, stderr, "dump stages verbose alias")
 assert_equal(%w[123], lines(stdout), "dump stages verbose alias output")
-assert_includes(stderr, "stage[0] kind=select")
+assert_includes(stderr, 'stage[0]: select(_["hello"] == 123)')
 input_regex = <<~NDJSON
   {"foo":{"bar":"ok"},"x":50}
@@ -176,6 +174,14 @@ stdout, stderr, status = run_jrf('sum(_["foo"])', input_sum)
 assert_success(status, stderr, "sum only")
 assert_equal(%w[10], lines(stdout), "sum output")
+stdout, stderr, status = run_jrf('count()', input_sum)
+assert_success(status, stderr, "count only")
+assert_equal(%w[4], lines(stdout), "count output")
+stdout, stderr, status = run_jrf('count(_["foo"])', input_sum)
+assert_success(status, stderr, "count(expr) only")
+assert_equal(%w[4], lines(stdout), "count(expr) output")
 stdout, stderr, status = run_jrf('min(_["foo"])', input_sum)
 assert_success(status, stderr, "min only")
 assert_equal(%w[1], lines(stdout), "min output")
@@ -204,6 +210,14 @@ stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> sum(_["foo"])', input
 assert_success(status, stderr, "sum no matches")
 assert_equal(%w[0], lines(stdout), "sum no matches output")
+stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> count()', input_sum)
+assert_success(status, stderr, "count no matches")
+assert_equal(%w[0], lines(stdout), "count no matches output")
+stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> count(_["foo"])', input_sum)
+assert_success(status, stderr, "count(expr) no matches")
+assert_equal(%w[0], lines(stdout), "count(expr) no matches output")
 stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> average(_["foo"])', input_sum)
 assert_success(status, stderr, "average no matches")
 assert_equal(%w[null], lines(stdout), "average no matches output")
@@ -288,6 +302,97 @@ assert_equal(
   "array percentile output"
 )
+input_with_nil = <<~NDJSON
+  {"foo":1}
+  {"foo":null}
+  {"bar":999}
+  {"foo":3}
+NDJSON
+stdout, stderr, status = run_jrf('sum(_["foo"])', input_with_nil)
+assert_success(status, stderr, "sum ignores nil")
+assert_equal(%w[4], lines(stdout), "sum ignores nil output")
+stdout, stderr, status = run_jrf('min(_["foo"])', input_with_nil)
+assert_success(status, stderr, "min ignores nil")
+assert_equal(%w[1], lines(stdout), "min ignores nil output")
+stdout, stderr, status = run_jrf('max(_["foo"])', input_with_nil)
+assert_success(status, stderr, "max ignores nil")
+assert_equal(%w[3], lines(stdout), "max ignores nil output")
+stdout, stderr, status = run_jrf('average(_["foo"])', input_with_nil)
+assert_success(status, stderr, "average ignores nil")
+assert_float_close(2.0, lines(stdout).first.to_f, 1e-12, "average ignores nil output")
+stdout, stderr, status = run_jrf('stdev(_["foo"])', input_with_nil)
+assert_success(status, stderr, "stdev ignores nil")
+assert_float_close(1.0, lines(stdout).first.to_f, 1e-12, "stdev ignores nil output")
+stdout, stderr, status = run_jrf('percentile(_["foo"], [0.5, 1.0])', input_with_nil)
+assert_success(status, stderr, "percentile ignores nil")
+assert_equal(
+  ['{"percentile":0.5,"value":1}', '{"percentile":1.0,"value":3}'],
+  lines(stdout),
+  "percentile ignores nil output"
+)
+stdout, stderr, status = run_jrf('count()', input_with_nil)
+assert_success(status, stderr, "count with nil rows")
+assert_equal(%w[4], lines(stdout), "count with nil rows output")
+stdout, stderr, status = run_jrf('count(_["foo"])', input_with_nil)
+assert_success(status, stderr, "count(expr) ignores nil")
+assert_equal(%w[2], lines(stdout), "count(expr) ignores nil output")
+input_all_nil = <<~NDJSON
+  {"foo":null}
+  {"bar":1}
+NDJSON
+stdout, stderr, status = run_jrf('sum(_["foo"])', input_all_nil)
+assert_success(status, stderr, "sum all nil")
+assert_equal(%w[0], lines(stdout), "sum all nil output")
+stdout, stderr, status = run_jrf('min(_["foo"])', input_all_nil)
+assert_success(status, stderr, "min all nil")
+assert_equal(%w[null], lines(stdout), "min all nil output")
+stdout, stderr, status = run_jrf('max(_["foo"])', input_all_nil)
+assert_success(status, stderr, "max all nil")
+assert_equal(%w[null], lines(stdout), "max all nil output")
+stdout, stderr, status = run_jrf('average(_["foo"])', input_all_nil)
+assert_success(status, stderr, "average all nil")
+assert_equal(%w[null], lines(stdout), "average all nil output")
+stdout, stderr, status = run_jrf('stdev(_["foo"])', input_all_nil)
+assert_success(status, stderr, "stdev all nil")
+assert_equal(%w[null], lines(stdout), "stdev all nil output")
+stdout, stderr, status = run_jrf('percentile(_["foo"], 0.5)', input_all_nil)
+assert_success(status, stderr, "percentile all nil")
+assert_equal(%w[null], lines(stdout), "percentile all nil output")
+stdout, stderr, status = run_jrf('count(_["foo"])', input_all_nil)
+assert_success(status, stderr, "count(expr) all nil")
+assert_equal(%w[0], lines(stdout), "count(expr) all nil output")
+input_multi_cols = <<~NDJSON
+  {"a":1,"b":10}
+  {"a":2,"b":20}
+  {"a":3,"b":30}
+  {"a":4,"b":40}
+NDJSON
+stdout, stderr, status = run_jrf('{a: percentile(_["a"], [0.25, 0.50, 1.0]), b: percentile(_["b"], [0.25, 0.50, 1.0])}', input_multi_cols)
+assert_success(status, stderr, "nested array percentile for multiple columns")
+assert_equal(
+  ['{"a":[{"percentile":0.25,"value":1},{"percentile":0.5,"value":2},{"percentile":1.0,"value":4}],"b":[{"percentile":0.25,"value":10},{"percentile":0.5,"value":20},{"percentile":1.0,"value":40}]}'],
+  lines(stdout),
+  "nested array percentile output"
+)
 input_reduce = <<~NDJSON
   {"s":"hello"}
   {"s":"world"}
@@ -306,6 +411,57 @@ stdout, stderr, status = run_jrf('sum(_["foo"]) >> select(_ > 100)', input_sum)
 assert_success(status, stderr, "post-reduce select drop")
 assert_equal([], lines(stdout), "post-reduce select drop output")
+input_whitespace_stream = "{\"foo\":1} {\"foo\":2}\n\t{\"foo\":3}\n"
+stdout, stderr, status = run_jrf('_["foo"]', input_whitespace_stream)
+assert_failure(status, "default NDJSON should reject same-line multi-values")
+assert_includes(stderr, "JSON::ParserError")
+stdout, stderr, status = run_jrf('_["foo"]', input_whitespace_stream, "--lax")
+assert_success(status, stderr, "whitespace-separated JSON stream with --lax")
+assert_equal(%w[1 2 3], lines(stdout), "whitespace-separated stream output")
+input_json_seq = "\x1e{\"foo\":10}\n\x1e{\"foo\":20}\n"
+stdout, stderr, status = run_jrf('_["foo"]', input_json_seq)
+assert_failure(status, "RS framing requires --lax")
+assert_includes(stderr, "JSON::ParserError")
+stdout, stderr, status = run_jrf('_["foo"]', input_json_seq, "--lax")
+assert_success(status, stderr, "json-seq style RS framing with --lax")
+assert_equal(%w[10 20], lines(stdout), "json-seq style output")
+input_lax_multiline = <<~JSONS
+  {
+    "foo": 101,
+    "bar": {"x": 1}
+  }
+  {
+    "foo": 202,
+    "bar": {"x": 2}
+  }
+JSONS
+stdout, stderr, status = run_jrf('_["foo"]', input_lax_multiline)
+assert_failure(status, "default NDJSON rejects multiline objects")
+assert_includes(stderr, "JSON::ParserError")
+stdout, stderr, status = run_jrf('_["bar"]["x"]', input_lax_multiline, "--lax")
+assert_success(status, stderr, "lax accepts multiline objects")
+assert_equal(%w[1 2], lines(stdout), "lax multiline object output")
+input_lax_mixed_separators = "{\"foo\":1}\n\x1e{\"foo\":2}\t{\"foo\":3}\n"
+stdout, stderr, status = run_jrf('_["foo"]', input_lax_mixed_separators, "--lax")
+assert_success(status, stderr, "lax accepts mixed whitespace and RS separators")
+assert_equal(%w[1 2 3], lines(stdout), "lax mixed separators output")
+input_lax_with_escaped_newline = "{\"s\":\"line1\\nline2\"}\n{\"s\":\"ok\"}\n"
+stdout, stderr, status = run_jrf('_["s"]', input_lax_with_escaped_newline, "--lax")
+assert_success(status, stderr, "lax handles escaped newlines in strings")
+assert_equal(['"line1\nline2"', '"ok"'], lines(stdout), "lax escaped newline string output")
+input_lax_trailing_rs = "\x1e{\"foo\":9}\n\x1e"
+stdout, stderr, status = run_jrf('_["foo"]', input_lax_trailing_rs, "--lax")
+assert_success(status, stderr, "lax ignores trailing separator")
+assert_equal(%w[9], lines(stdout), "lax trailing separator output")
 stdout, stderr, status = run_jrf('select(_["x"] > ) >> _["foo"]', "")
 assert_failure(status, "syntax error should fail before row loop")
 assert_includes(stderr, "syntax error")
@@ -339,4 +495,120 @@ stdout, stderr, status = run_jrf('_["foo"] >> select(_["keep"]) >> _["bar"] >> s
 assert_success(status, stderr, "select/extract chain")
 assert_equal(%w[3], lines(stdout), "chain output")
+input_map = <<~NDJSON
+  {"values":[1,10,100]}
+  {"values":[2,20,200]}
+  {"values":[3,30,300]}
+NDJSON
+stdout, stderr, status = run_jrf('_["values"] >> map { |x| sum(x) }', input_map)
+assert_success(status, stderr, "map with sum")
+assert_equal(['[6,60,600]'], lines(stdout), "map with sum output")
+stdout, stderr, status = run_jrf('_["values"] >> map { |x| min(x) }', input_map)
+assert_success(status, stderr, "map with min")
+assert_equal(['[1,10,100]'], lines(stdout), "map with min output")
+stdout, stderr, status = run_jrf('_["values"] >> map { |x| max(x) }', input_map)
+assert_success(status, stderr, "map with max")
+assert_equal(['[3,30,300]'], lines(stdout), "map with max output")
+input_map_varying = <<~NDJSON
+  [1,10]
+  [2,20,200]
+  [3]
+NDJSON
+stdout, stderr, status = run_jrf('map { |x| sum(x) }', input_map_varying)
+assert_success(status, stderr, "map varying lengths")
+assert_equal(['[6,30,200]'], lines(stdout), "map varying lengths output")
+input_map_values = <<~NDJSON
+  {"a":1,"b":10}
+  {"a":2,"b":20}
+  {"a":3,"b":30}
+NDJSON
+stdout, stderr, status = run_jrf('map_values { |v| sum(v) }', input_map_values)
+assert_success(status, stderr, "map_values with sum")
+assert_equal(['{"a":6,"b":60}'], lines(stdout), "map_values with sum output")
+stdout, stderr, status = run_jrf('map_values { |v| min(v) }', input_map_values)
+assert_success(status, stderr, "map_values with min")
+assert_equal(['{"a":1,"b":10}'], lines(stdout), "map_values with min output")
+input_map_values_varying = <<~NDJSON
+  {"a":1}
+  {"a":2,"b":20}
+  {"a":3,"b":30}
+NDJSON
+stdout, stderr, status = run_jrf('map_values { |v| sum(v) }', input_map_values_varying)
+assert_success(status, stderr, "map_values varying keys")
+assert_equal(['{"a":6,"b":50}'], lines(stdout), "map_values varying keys output")
+stdout, stderr, status = run_jrf('map_values { |v| count(v) }', input_map_values)
+assert_success(status, stderr, "map_values with count")
+assert_equal(['{"a":3,"b":3}'], lines(stdout), "map_values with count output")
+stdout, stderr, status = run_jrf('select(false) >> map { |x| sum(x) }', input_map)
+assert_success(status, stderr, "map no matches")
+assert_equal(['[]'], lines(stdout), "map no matches output")
+stdout, stderr, status = run_jrf('select(false) >> map_values { |v| sum(v) }', input_map_values)
+assert_success(status, stderr, "map_values no matches")
+assert_equal(['{}'], lines(stdout), "map_values no matches output")
+stdout, stderr, status = run_jrf('map_values { |v| sum(v) } >> map_values { |v| v * 10 }', input_map_values)
+assert_success(status, stderr, "map_values piped to map_values passthrough")
+assert_equal(['{"a":60,"b":600}'], lines(stdout), "map_values piped output")
+input_gb = <<~NDJSON
+  {"status":200,"path":"/a","latency":10}
+  {"status":404,"path":"/b","latency":50}
+  {"status":200,"path":"/c","latency":30}
+  {"status":200,"path":"/d","latency":20}
+NDJSON
+stdout, stderr, status = run_jrf('group_by(_["status"]) { count() }', input_gb)
+assert_success(status, stderr, "group_by with count")
+assert_equal(['{"200":3,"404":1}'], lines(stdout), "group_by with count output")
+stdout, stderr, status = run_jrf('group_by(_["status"]) { sum(_["latency"]) }', input_gb)
+assert_success(status, stderr, "group_by with sum")
+assert_equal(['{"200":60,"404":50}'], lines(stdout), "group_by with sum output")
+stdout, stderr, status = run_jrf('group_by(_["status"]) { average(_["latency"]) }', input_gb)
+assert_success(status, stderr, "group_by with average")
+result = JSON.parse(lines(stdout).first)
+assert_float_close(20.0, result["200"], 1e-12, "group_by average 200")
+assert_float_close(50.0, result["404"], 1e-12, "group_by average 404")
+stdout, stderr, status = run_jrf('group_by(_["status"])', input_gb)
+assert_success(status, stderr, "group_by default (collect rows)")
+result = JSON.parse(lines(stdout).first)
+assert_equal(3, result["200"].length, "group_by default 200 count")
+assert_equal(1, result["404"].length, "group_by default 404 count")
+assert_equal("/a", result["200"][0]["path"], "group_by default first row")
+stdout, stderr, status = run_jrf('group_by(_["status"]) { group(_["path"]) }', input_gb)
+assert_success(status, stderr, "group_by with group(expr)")
+assert_equal(['{"200":["/a","/c","/d"],"404":["/b"]}'], lines(stdout), "group_by with group(expr) output")
+stdout, stderr, status = run_jrf('group_by(_["status"]) { min(_["latency"]) }', input_gb)
+assert_success(status, stderr, "group_by with min")
+assert_equal(['{"200":10,"404":50}'], lines(stdout), "group_by with min output")
+stdout, stderr, status = run_jrf('group_by(_["status"]) { {total: sum(_["latency"]), n: count()} }', input_gb)
+assert_success(status, stderr, "group_by with multi-reducer")
+assert_equal(['{"200":{"total":60,"n":3},"404":{"total":50,"n":1}}'], lines(stdout), "group_by multi-reducer output")
+stdout, stderr, status = run_jrf('select(false) >> group_by(_["status"]) { count() }', input_gb)
+assert_success(status, stderr, "group_by no matches")
+assert_equal(['{}'], lines(stdout), "group_by no matches output")
+stdout, stderr, status = run_jrf('group_by(_["status"]) { count() } >> _[200]', input_gb)
+assert_success(status, stderr, "group_by then extract")
+assert_equal(%w[3], lines(stdout), "group_by then extract output")
 puts "ok"

metadata CHANGED Viewed

@@ -1,14 +1,28 @@
 --- !ruby/object:Gem::Specification
 name: jrf
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 0.1.3
 platform: ruby
 authors:
 - kazuho
 bindir: exe
 cert_chain: []
 date: 1980-01-02 00:00:00.000000000 Z
-dependencies: []
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: oj
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '3.16'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '3.16'
 description: jrf is a JSON filter with the power and speed of Ruby. It lets you write
   transforms as Ruby expressions, so you can use arbitrary Ruby logic. It supports
   extraction, filtering, flattening, sorting, and aggregation in stage pipelines.
@@ -31,6 +45,7 @@ files:
 - lib/jrf/reducers.rb
 - lib/jrf/row_context.rb
 - lib/jrf/runner.rb
+- lib/jrf/stage.rb
 - lib/jrf/version.rb
 - test/jrf_test.rb
 licenses: