RubyGems - jrf - Versions diffs - 0.1.2 → 0.1.4 - Mend

jrf 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 2776f201f13bf8be05ec4615510f0810ceeff9115dd11ef4d54ed873c9c90030
-  data.tar.gz: 0b996f561536a47067d262122c3b1093fd9adfb499fc0fab7011226cee75f043
+  metadata.gz: 9ce648c2afbfe10dc161b08badb05acdb411baf839dde77433927380b6bb7439
+  data.tar.gz: 6be5a0851eecd3cfcbe93aff1cb8fdd163a84dd96a7b12e440fc514db03f67a0
 SHA512:
-  metadata.gz: bde0a34fdeb324132084a7621bc2a37767a506261dbafadfdebbeb7fc060a2e8a142c44132ed5228a2604a5dba767871d39303eb3aa941510ba0d3f2694d5e7e
-  data.tar.gz: 725b4611d5659ce994df183950676a7c8ff2582df7b02cbc0bf7ef47682ca2bf2f55507df10c6e907a1032c6b4dcd17625b4db807f28d304a331f0caf4c8a1cd
+  metadata.gz: aa4dfead95dbe09453ec720cdbcf77ba4c7e3f1047c60f51d4ff54724dfa540bb1dbd5630ecb07d09d745e1e61e4c236f50f4407ff6d4c17dd5431b385679f57
+  data.tar.gz: 03c3f5dd3f36675a2bc31981effc506bb1822bb170e754785ccffe077becdd5af13421b4cbfd18fea1c1262f06feef61561be3e3243ca0379e1e6af21ad003c5

data/Rakefile CHANGED Viewed

@@ -8,8 +8,3 @@ Rake::TestTask.new do |t|
 end
 task default: :test
-desc "Build man/jrf.1 from README.md"
-task :man do
-  ruby "script/build_man_from_readme.rb"
-end

data/exe/jrf CHANGED Viewed

@@ -1,6 +1,12 @@
 #!/usr/bin/env ruby
 # frozen_string_literal: true
+begin
+  require "bundler/setup"
+rescue LoadError
+  # Allow running without Bundler in plain Ruby environments.
+end
 $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
 require "jrf"

data/jrf.gemspec CHANGED Viewed

@@ -15,6 +15,7 @@ Gem::Specification.new do |spec|
   spec.bindir = "exe"
   spec.executables = ["jrf"]
+  spec.add_dependency "oj", ">= 3.16"
   spec.files = Dir.glob("{exe,lib,test}/*") + Dir.glob("lib/**/*") + %w[DESIGN.txt jrf.gemspec Gemfile Rakefile]
 end

data/lib/jrf/cli.rb CHANGED Viewed

@@ -4,15 +4,17 @@ require_relative "runner"
 module Jrf
   class CLI
-    USAGE = "usage: jrf [-v] [--help] 'STAGE >> STAGE >> ...'"
+    USAGE = "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'"
     HELP_TEXT = <<~'TEXT'
-      usage: jrf [-v] [--help] 'STAGE >> STAGE >> ...'
+      usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'
       JSON filter with the power and speed of Ruby.
       Options:
-        -v, --verbose  print compiled stage Ruby expressions
+        -v, --verbose  print parsed stage expressions
+        --lax          allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
+        -p, --pretty   pretty-print JSON output instead of compact NDJSON
         -h, --help     show this help and exit
       Pipeline:
@@ -27,18 +29,25 @@ module Jrf
         jrf '_["msg"] >> reduce(nil) { |acc, v| acc ? "#{acc} #{v}" : v }'
       See Also:
-        README.md
-        man jrf
+        https://github.com/kazuho/jrf#readme
     TEXT
     def self.run(argv = ARGV, input: ARGF, out: $stdout, err: $stderr)
       verbose = false
+      lax = false
+      pretty = false
       while argv.first&.start_with?("-")
         case argv.first
         when "-v", "--verbose"
           verbose = true
           argv.shift
+        when "--lax"
+          lax = true
+          argv.shift
+        when "-p", "--pretty"
+          pretty = true
+          argv.shift
         when "-h", "--help"
           out.puts HELP_TEXT
           return 0
@@ -55,7 +64,7 @@ module Jrf
       end
       expression = argv.shift
-      Runner.new(input: input, out: out, err: err).run(expression, verbose: verbose)
+      Runner.new(input: input, out: out, err: err, lax: lax, pretty: pretty).run(expression, verbose: verbose)
       0
     end
   end

data/lib/jrf/pipeline.rb ADDED Viewed

@@ -0,0 +1,85 @@
+# frozen_string_literal: true
+require_relative "control"
+require_relative "row_context"
+require_relative "stage"
+module Jrf
+  class Pipeline
+    def initialize(*blocks)
+      raise ArgumentError, "at least one stage block is required" if blocks.empty?
+      @ctx = RowContext.new
+      @stages = blocks.map { |block| Stage.new(@ctx, block, src: nil) }
+    end
+    # Run the pipeline on an enumerable of input values.
+    #
+    # Without a block, returns an Array of output values.
+    # With a block, streams each output value to the block.
+    #
+    # @param input [Enumerable] input values to process
+    # @yieldparam value output value
+    # @return [Array, nil] output values (without block), or nil (with block)
+    def call(input, &on_output)
+      if on_output
+        call_streaming(input, &on_output)
+      else
+        results = []
+        call_streaming(input) { |v| results << v }
+        results
+      end
+    end
+    private
+    def call_streaming(input, &on_output)
+      error = nil
+      begin
+        input.each { |value| process_value(value, @stages, &on_output) }
+      rescue StandardError => e
+        error = e
+      ensure
+        flush_reducers(@stages, &on_output)
+      end
+      raise error if error
+    end
+    def process_value(input, stages, &on_output)
+      current_values = [input]
+      stages.each do |stage|
+        next_values = []
+        current_values.each do |value|
+          out = stage.call(value)
+          if out.equal?(Control::DROPPED)
+            next
+          elsif out.is_a?(Control::Flat)
+            unless out.value.is_a?(Array)
+              raise TypeError, "flat expects Array, got #{out.value.class}"
+            end
+            next_values.concat(out.value)
+          else
+            next_values << out
+          end
+        end
+        return if next_values.empty?
+        current_values = next_values
+      end
+      current_values.each(&on_output)
+    end
+    def flush_reducers(stages, &on_output)
+      stages.each_with_index do |stage, idx|
+        rows = stage.finish
+        next if rows.empty?
+        rest = stages.drop(idx + 1)
+        rows.each { |value| process_value(value, rest, &on_output) }
+      end
+    end
+  end
+end

data/lib/jrf/pipeline_parser.rb CHANGED Viewed

@@ -9,51 +9,11 @@ module Jrf
     def parse
       stages = split_top_level_pipeline(@source).map(&:strip).reject(&:empty?)
       raise ArgumentError, "empty expression" if stages.empty?
-      { stages: stages.map { |stage| parse_stage!(stage) } }
+      { stages: stages.map { |stage| { src: stage } } }
     end
     private
-    def parse_stage!(stage)
-      if select_stage?(stage)
-        {
-          kind: :select,
-          original: stage,
-          src: "(#{parse_select!(stage)}) ? _ : ::Jrf::Control::DROPPED"
-        }
-      else
-        reject_unsupported_stage!(stage)
-        {
-          kind: :extract,
-          original: stage,
-          src: validate_extract!(stage)
-        }
-      end
-    end
-    def validate_extract!(stage)
-      reject_unsupported_stage!(stage)
-      stage
-    end
-    def parse_select!(stage)
-      reject_unsupported_stage!(stage)
-      match = /\Aselect\s*\((.*)\)\s*\z/m.match(stage)
-      raise ArgumentError, "first stage must be select(...)" unless match
-      inner = match[1].strip
-      raise ArgumentError, "select(...) must contain an expression" if inner.empty?
-      inner
-    end
-    def select_stage?(stage)
-      /\Aselect\s*\(/.match?(stage)
-    end
-    def reject_unsupported_stage!(stage)
-    end
     def split_top_level_pipeline(source)
       parts = []
       start_idx = 0

data/lib/jrf/row_context.rb CHANGED Viewed

@@ -1,21 +1,22 @@
 # frozen_string_literal: true
 require_relative "control"
 require_relative "reducers"
 module Jrf
   class RowContext
     MISSING = Object.new
-    ReducerToken = Struct.new(:index)
+    attr_writer :__jrf_current_stage
     class << self
       def define_reducer(name, &definition)
         define_method(name) do |*args, **kwargs, &block|
           spec = definition.call(self, *args, **kwargs, block: block)
-          create_reducer(
+          @__jrf_current_stage.allocate_reducer(
             spec.fetch(:value),
             initial: reducer_initial_value(spec.fetch(:initial)),
             finish: spec[:finish],
-            emit_many: spec.fetch(:emit_many, false),
             &spec.fetch(:step)
           )
         end
@@ -24,7 +25,7 @@ module Jrf
     def initialize(obj = nil)
       @obj = obj
-      @__jrf_stage = nil
+      @__jrf_current_stage = nil
     end
     def reset(obj)
@@ -40,24 +41,38 @@ module Jrf
       Control::Flat.new(@obj)
     end
+    def select(predicate)
+      predicate ? @obj : Control::DROPPED
+    end
     define_reducer(:sum) do |_ctx, value, initial: 0, block: nil|
-      { value: value, initial: initial, step: ->(acc, v) { acc + v } }
+      { value: value, initial: initial, step: ->(acc, v) { v.nil? ? acc : (acc + v) } }
+    end
+    define_reducer(:count) do |_ctx, value = MISSING, block: nil|
+      if value.equal?(MISSING)
+        { value: nil, initial: 0, step: ->(acc, _v) { acc + 1 } }
+      else
+        { value: value, initial: 0, step: ->(acc, v) { v.nil? ? acc : (acc + 1) } }
+      end
     end
     define_reducer(:min) do |_ctx, value, block: nil|
-      { value: value, initial: nil, step: ->(acc, v) { acc.nil? || v < acc ? v : acc } }
+      { value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v < acc ? v : acc) } }
     end
     define_reducer(:max) do |_ctx, value, block: nil|
-      { value: value, initial: nil, step: ->(acc, v) { acc.nil? || v > acc ? v : acc } }
+      { value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v > acc ? v : acc) } }
     end
     define_reducer(:average) do |_ctx, value, block: nil|
       {
         value: value,
         initial: -> { [0.0, 0] },
-        finish: ->((sum, count)) { count.zero? ? nil : (sum / count) },
+        finish: ->((sum, count)) { [count.zero? ? nil : (sum / count)] },
         step: ->(acc, v) {
+          return acc if v.nil?
           acc[0] += v
           acc[1] += 1
           acc
@@ -70,13 +85,15 @@ module Jrf
         value: value,
         initial: [0, 0.0, 0.0],
         finish: ->((count, mean, m2)) {
-          return nil if count.zero?
-          return nil if sample && count < 2
+          return [nil] if count.zero?
+          return [nil] if sample && count < 2
           denom = sample ? (count - 1) : count
-          Math.sqrt(m2 / denom)
+          [Math.sqrt(m2 / denom)]
         },
         step: ->(acc, x) {
+          return acc if x.nil?
           count, mean, m2 = acc
           count += 1
           delta = x - mean
@@ -96,7 +113,6 @@ module Jrf
         {
           value: ctx._,
           initial: -> { [] },
-          emit_many: true,
           finish: ->(rows) { rows.sort(&block) },
           step: ->(rows, row) { rows << row }
         }
@@ -105,7 +121,6 @@ module Jrf
         {
           value: [resolved_key, ctx._],
           initial: -> { [] },
-          emit_many: true,
           finish: ->(pairs) { pairs.sort_by(&:first).map(&:last) },
           step: ->(pairs, pair) { pairs << pair }
         }
@@ -124,57 +139,47 @@ module Jrf
       finish =
         if scalar
-          ->(values) { ctx.send(:percentile_value, values.sort, percentages.first) }
+          ->(values) { [ctx.send(:percentile_value, values.sort, percentages.first)] }
         else
           ->(values) {
             sorted = values.sort
-            percentages.map do |p|
-              { "percentile" => p, "value" => ctx.send(:percentile_value, sorted, p) }
-            end
+            [percentages.map { |p| ctx.send(:percentile_value, sorted, p) }]
           }
         end
       {
         value: value,
         initial: -> { [] },
-        emit_many: !scalar,
         finish: finish,
-        step: ->(acc, v) { acc << v }
+        step: ->(acc, v) { v.nil? ? acc : (acc << v) }
       }
     end
     def reduce(initial, &block)
       raise ArgumentError, "reduce requires a block" unless block
-      create_reducer(@obj, initial: initial, &block)
+      @__jrf_current_stage.allocate_reducer(@obj, initial: initial, &block)
     end
-    def __jrf_begin_stage__(stage, probing: false)
-      @__jrf_stage = stage
-      stage[:reducer_cursor] = 0
-      stage[:reducer_called] = false
-      stage[:reducer_probing] = probing
-    end
+    def map(&block)
+      raise ArgumentError, "map requires a block" unless block
-    def __jrf_reducer_called?
-      @__jrf_stage && @__jrf_stage[:reducer_called]
+      @__jrf_current_stage.allocate_map(:array, @obj, &block)
     end
-  private
+    def map_values(&block)
+      raise ArgumentError, "map_values requires a block" unless block
-    def create_reducer(value, initial:, emit_many: false, finish: nil, &step_fn)
-      raise "internal error: reducer used outside stage context" unless @__jrf_stage
+      @__jrf_current_stage.allocate_map(:hash, @obj, &block)
+    end
-      reducers = (@__jrf_stage[:reducers] ||= [])
-      idx = @__jrf_stage[:reducer_cursor] || 0
-      reducers[idx] ||= Reducers.reduce(initial, finish: finish, &step_fn)
-      reducers[idx].step(value) unless @__jrf_stage[:reducer_probing]
-      @__jrf_stage[:reducer_cursor] = idx + 1
-      @__jrf_stage[:reducer_called] = true
-      @__jrf_stage[:reducer_emit_many] = emit_many if @__jrf_stage[:reducer_emit_many].nil?
-      ReducerToken.new(idx)
+    def group_by(key, &block)
+      block ||= proc { group }
+      @__jrf_current_stage.allocate_group_by(key, &block)
     end
+    private
     def reducer_initial_value(initial)
       return initial.call if initial.respond_to?(:call)
       return initial.dup if initial.is_a?(Array) || initial.is_a?(Hash)

data/lib/jrf/runner.rb CHANGED Viewed

@@ -1,33 +1,19 @@
 # frozen_string_literal: true
 require "json"
-require_relative "control"
+require_relative "pipeline"
 require_relative "pipeline_parser"
-require_relative "reducers"
-require_relative "row_context"
 module Jrf
   class Runner
-    class ProbeValue
-      def [](key)
-        self
-      end
-      def method_missing(name, *args, &block)
-        self
-      end
-      def respond_to_missing?(name, include_private = false)
-        true
-      end
-    end
-    PROBE_VALUE = ProbeValue.new
+    RS_CHAR = "\x1e"
-    def initialize(input: ARGF, out: $stdout, err: $stderr)
+    def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false, pretty: false)
       @input = input
       @out = out
       @err = err
+      @lax = lax
+      @pretty = pretty
     end
     def run(expression, verbose: false)
@@ -35,144 +21,60 @@ module Jrf
       stages = parsed[:stages]
       dump_stages(stages) if verbose
-      ctx = RowContext.new
-      compiled = compile_stages(stages, ctx)
-      initialize_reducers(compiled, ctx)
-      error = nil
-      begin
-        @input.each_line do |line|
-          line = line.strip
-          next if line.empty?
+      blocks = stages.map { |stage|
+        eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
+      }
+      pipeline = Pipeline.new(*blocks)
-          process_value(JSON.parse(line), compiled, ctx)
-        end
-      rescue StandardError => e
-        error = e
-      ensure
-        flush_reducers(compiled, ctx)
+      input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
+      pipeline.call(input_enum) do |value|
+        @out.puts(@pretty ? JSON.pretty_generate(value) : JSON.generate(value))
       end
-      raise error if error
     end
     private
-    def process_value(input, stages, ctx)
-      current_values = [input]
+    def each_input_value
+      return each_input_value_lax { |value| yield value } if @lax
-      stages.each do |stage|
-        next_values = []
-        current_values.each do |value|
-          out = apply_stage(stage, value, ctx)
-          if out.equal?(Control::DROPPED)
-            next
-          elsif flat_event?(out)
-            unless out.value.is_a?(Array)
-              raise TypeError, "flat expects Array, got #{out.value.class}"
-            end
-            next_values.concat(out.value)
-          else
-            next_values << out
-          end
-        end
-        return if next_values.empty?
-        current_values = next_values
-      end
-      current_values.each { |value| @out.puts JSON.generate(value) }
+      each_input_value_ndjson { |value| yield value }
     end
-    def apply_stage(stage, input, ctx)
-      value = eval_stage(stage, input, ctx)
-      if value.equal?(Control::DROPPED)
-        Control::DROPPED
-      elsif ctx.__jrf_reducer_called?
-        stage[:reducer_template] ||= value
-        Control::DROPPED
-      else
-        value
-      end
-    end
-    def eval_stage(stage, input, ctx)
-      ctx.reset(input)
-      ctx.__jrf_begin_stage__(stage, probing: input.equal?(PROBE_VALUE))
-      ctx.public_send(stage[:method_name])
-    end
+    def each_input_value_ndjson
+      @input.each_line do |raw_line|
+        line = raw_line.strip
+        next if line.empty?
-    def flat_event?(value)
-      value.is_a?(Control::Flat)
-    end
-    def flush_reducers(stages, ctx)
-      tail = stages
-      loop do
-        tail = tail.drop_while { |stage| !reducer_stage?(stage) }
-        break if tail.empty?
-        stage = tail.first
-        reducers = stage[:reducers]
-        break unless reducers&.any?
-        out = finish_reducer_template(stage[:reducer_template], reducers)
-        if stage[:reducer_emit_many]
-          out.each { |value| process_value(value, tail.drop(1), ctx) }
-        else
-          process_value(out, tail.drop(1), ctx)
-        end
-        tail = tail.drop(1)
+        yield JSON.parse(line)
       end
     end
-    def compile_stages(stages, ctx)
-      mod = Module.new
-      compiled = []
-      stages.each_with_index do |stage, i|
-        method_name = :"__jrf_stage_#{i}"
-        mod.module_eval("def #{method_name}; #{stage[:src]}; end", "(jrf stage #{i})", 1)
-        compiled << stage.merge(method_name: method_name)
-      end
+    def each_input_value_lax
+      require "oj"
+      source = @input.read.to_s
+      source = source.include?(RS_CHAR) ? source.tr(RS_CHAR, "\n") : source
+      handler = Class.new(Oj::ScHandler) do
+        def initialize(&emit)
+          @emit = emit
+        end
-      ctx.extend(mod)
-      compiled
+        def hash_start = {}
+        def hash_key(key) = key
+        def hash_set(hash, key, value) = hash[key] = value
+        def array_start = []
+        def array_append(array, value) = array << value
+        def add_value(value) = @emit.call(value)
+      end.new { |value| yield value }
+      Oj.sc_parse(handler, source)
+    rescue LoadError
+      raise "oj is required for --lax mode (gem install oj)"
+    rescue Oj::ParseError => e
+      raise JSON::ParserError, e.message
     end
     def dump_stages(stages)
       stages.each_with_index do |stage, i|
-        @err.puts "stage[#{i}] kind=#{stage[:kind]}"
-        @err.puts "  original: #{stage[:original]}"
-        @err.puts "  ruby: #{stage[:src]}"
-      end
-    end
-    def initialize_reducers(stages, ctx)
-      stages.each do |stage|
-        begin
-          value = eval_stage(stage, PROBE_VALUE, ctx)
-          stage[:reducer_template] ||= value if ctx.__jrf_reducer_called?
-        rescue StandardError
-          # Ignore probe-time errors; reducer will be created on first runtime event.
-        end
-      end
-    end
-    def reducer_stage?(stage)
-      stage[:reducers]&.any?
-    end
-    def finish_reducer_template(template, reducers)
-      if template.is_a?(RowContext::ReducerToken)
-        reducers.fetch(template.index).finish
-      elsif template.is_a?(Array)
-        template.map { |v| finish_reducer_template(v, reducers) }
-      elsif template.is_a?(Hash)
-        template.transform_values { |v| finish_reducer_template(v, reducers) }
-      else
-        template
+        @err.puts "stage[#{i}]: #{stage[:src]}"
       end
     end
   end