RubyGems - jrf - Versions diffs - 0.1.5 → 0.1.7 - Mend

jrf 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 408c1f9706af5efaa1bf0125201d6647b4c108aa4aa28c99a93b59fb9cc94f02
-  data.tar.gz: 702f2fb14dc9d498292b02c41f0cdb4a91c0fa3e093ad9a71435d9a2604532fa
+  metadata.gz: 7ac8b4b0fe2489c04dcba49752df7143f7e218de9f21b0496e2c3fdd2f732088
+  data.tar.gz: 2787cc4714d0e99909c4430fe23aca1fcaae1c25a079f15b2092861b53c4f5ea
 SHA512:
-  metadata.gz: 80dfa6d2bb7c9304e779a3e80815efbde9c599d66665708738b833b08daa1918ae54bc5b170c8b90c60399fe18b0df06d576e2c8c3d8b76b74f9daa826efcfa8
-  data.tar.gz: 597b715fd3ebd31a49cb2839f7dda814b845cd5aa87a3ac9a9cf551553792b453af749e287652553903de851ea7b06a9e5940abc7c25fccd319a9e7e72d75840
+  metadata.gz: 61f498f33e794258ebed00a468aa779ece52eff4c29d0538f7bc1601391d0a6948c32ed5dfbd76439e55a283ad4c59dc8312254711341dae2b7e79bf45b8a0a0
+  data.tar.gz: 92e1c46977cf3d841c8469fcf7e757cfcb4b6c60e800b063771bed3cc88eac7622e7d9a0c4aab906cefd60d046fe77a1f4e2f932d37687c952db3a598a0f3b1c

data/exe/jrf CHANGED Viewed

@@ -10,4 +10,4 @@ end
 $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
 require "jrf"
-exit Jrf::CLI.run(ARGV)
+Jrf::CLI.run(ARGV)

data/lib/jrf/cli/runner.rb ADDED Viewed

@@ -0,0 +1,126 @@
+# frozen_string_literal: true
+require "json"
+require_relative "../pipeline"
+require_relative "../pipeline_parser"
+module Jrf
+  class CLI
+    class Runner
+      RS_CHAR = "\x1e"
+      DEFAULT_OUTPUT_BUFFER_LIMIT = 4096
+      class RsNormalizer
+        def initialize(input)
+          @input = input
+        end
+        def read(length = nil, outbuf = nil)
+          chunk = @input.read(length)
+          return nil if chunk.nil?
+          chunk = chunk.tr(RS_CHAR, "\n")
+          if outbuf
+            outbuf.replace(chunk)
+          else
+            chunk
+          end
+        end
+      end
+      def initialize(inputs:, out: $stdout, err: $stderr, lax: false, pretty: false, atomic_write_bytes: DEFAULT_OUTPUT_BUFFER_LIMIT)
+        @inputs = inputs
+        @out = out
+        @err = err
+        @lax = lax
+        @pretty = pretty
+        @atomic_write_bytes = atomic_write_bytes
+        @output_buffer = +""
+      end
+      def run(expression, verbose: false)
+        parsed = PipelineParser.new(expression).parse
+        stages = parsed[:stages]
+        dump_stages(stages) if verbose
+        blocks = stages.map { |stage|
+          eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
+        }
+        pipeline = Pipeline.new(*blocks)
+        input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
+        pipeline.call(input_enum) do |value|
+          emit_output(value)
+        end
+      ensure
+        write_output(@output_buffer)
+      end
+      private
+      def each_input_value
+        return each_input_value_lax { |value| yield value } if @lax
+        each_input_value_ndjson { |value| yield value }
+      end
+      def each_input_value_ndjson
+        each_input do |source|
+          source.each_line do |raw_line|
+            line = raw_line.strip
+            next if line.empty?
+            yield JSON.parse(line)
+          end
+        end
+      end
+      def each_input_value_lax
+        require "oj"
+        handler = Class.new(Oj::ScHandler) do
+          def initialize(&emit)
+            @emit = emit
+          end
+          def hash_start = {}
+          def hash_key(key) = key
+          def hash_set(hash, key, value) = hash[key] = value
+          def array_start = []
+          def array_append(array, value) = array << value
+          def add_value(value) = @emit.call(value)
+        end
+        each_input do |source|
+          Oj.sc_parse(handler.new { |value| yield value }, RsNormalizer.new(source))
+        end
+      rescue LoadError
+        raise "oj is required for --lax mode (gem install oj)"
+      rescue Oj::ParseError => e
+        raise JSON::ParserError, e.message
+      end
+      def dump_stages(stages)
+        stages.each_with_index do |stage, i|
+          @err.puts "stage[#{i}]: #{stage[:src]}"
+        end
+      end
+      def each_input
+        @inputs.each { |source| yield source }
+      end
+      def emit_output(value)
+        record = (@pretty ? JSON.pretty_generate(value) : JSON.generate(value)) << "\n"
+        if @output_buffer.bytesize + record.bytesize <= @atomic_write_bytes
+          @output_buffer << record
+        else
+          write_output(@output_buffer)
+          @output_buffer = record
+        end
+      end
+      def write_output(str)
+        @out.syswrite(str)
+      end
+    end
+  end
+end

data/lib/jrf/cli.rb CHANGED Viewed

@@ -1,13 +1,15 @@
 # frozen_string_literal: true
-require_relative "runner"
+require "optparse"
+require_relative "cli/runner"
+require_relative "version"
 module Jrf
   class CLI
-    USAGE = "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'"
+    USAGE = "usage: jrf [options] 'STAGE >> STAGE >> ...'"
     HELP_TEXT = <<~'TEXT'
-      usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'
+      usage: jrf [options] 'STAGE >> STAGE >> ...'
       JSON filter with the power and speed of Ruby.
@@ -15,6 +17,10 @@ module Jrf
         -v, --verbose  print parsed stage expressions
         --lax          allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
         -p, --pretty   pretty-print JSON output instead of compact NDJSON
+        --no-jit       do not enable YJIT, even when supported by the Ruby runtime
+        --atomic-write-bytes N
+                       group short outputs into atomic writes of up to N bytes
+        -V, --version  show version and exit
         -h, --help     show this help and exit
       Pipeline:
@@ -36,36 +42,81 @@ module Jrf
       verbose = false
       lax = false
       pretty = false
-      while argv.first&.start_with?("-")
-        case argv.first
-        when "-v", "--verbose"
-          verbose = true
-          argv.shift
-        when "--lax"
-          lax = true
-          argv.shift
-        when "-p", "--pretty"
-          pretty = true
-          argv.shift
-        when "-h", "--help"
-          out.puts HELP_TEXT
-          return 0
-        else
-          err.puts "unknown option: #{argv.first}"
-          err.puts USAGE
-          return 1
+      jit = true
+      atomic_write_bytes = Runner::DEFAULT_OUTPUT_BUFFER_LIMIT
+      begin
+        parser = OptionParser.new do |opts|
+          opts.banner = USAGE
+          opts.on("-v", "--verbose", "print parsed stage expressions") { verbose = true }
+          opts.on("--lax", "allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)") { lax = true }
+          opts.on("-p", "--pretty", "pretty-print JSON output instead of compact NDJSON") { pretty = true }
+          opts.on("--no-jit", "do not enable YJIT, even when supported by the Ruby runtime") { jit = false }
+          opts.on("--atomic-write-bytes N", Integer, "group short outputs into atomic writes of up to N bytes") do |value|
+            if value.positive?
+              atomic_write_bytes = value
+            else
+              raise OptionParser::InvalidArgument, "--atomic-write-bytes requires a positive integer"
+            end
+          end
+          opts.on("-V", "--version", "show version and exit") do
+            out.puts Jrf::VERSION
+            exit
+          end
+          opts.on("-h", "--help", "show this help and exit") do
+            out.puts HELP_TEXT
+            exit
+          end
         end
+        parser.order!(argv)
+      rescue OptionParser::ParseError => e
+        err.puts e.message
+        err.puts USAGE
+        exit 1
       end
       if argv.empty?
         err.puts USAGE
-        return 1
+        exit 1
       end
       expression = argv.shift
-      Runner.new(input: input, out: out, err: err, lax: lax, pretty: pretty).run(expression, verbose: verbose)
-      0
+      enable_yjit if jit
+      inputs = Enumerator.new do |y|
+        if argv.empty?
+          y << input
+        else
+          argv.each do |path|
+            if path == "-"
+              y << input
+            elsif path.end_with?(".gz")
+              require "zlib"
+              Zlib::GzipReader.open(path) do |source|
+                y << source
+              end
+            else
+              File.open(path, "rb") do |source|
+                y << source
+              end
+            end
+          end
+        end
+      end
+      Runner.new(
+        inputs: inputs,
+        out: out,
+        err: err,
+        lax: lax,
+        pretty: pretty,
+        atomic_write_bytes: atomic_write_bytes
+      ).run(expression, verbose: verbose)
+    end
+    def self.enable_yjit
+      return unless defined?(RubyVM::YJIT) && RubyVM::YJIT.respond_to?(:enable)
+      RubyVM::YJIT.enable
     end
   end
 end

data/lib/jrf/pipeline.rb CHANGED Viewed

@@ -22,54 +22,43 @@ module Jrf
     # @yieldparam value output value
     # @return [Array, nil] output values (without block), or nil (with block)
     def call(input, &on_output)
-      if on_output
-        call_streaming(input, &on_output)
-      else
+      if on_output.nil?
         results = []
-        call_streaming(input) { |v| results << v }
-        results
+        on_output = proc { |value| results << value }
       end
-    end
-    private
-    def call_streaming(input, &on_output)
-      error = nil
       begin
         input.each { |value| process_value(value, @stages, &on_output) }
-      rescue StandardError => e
-        error = e
       ensure
         flush_reducers(@stages, &on_output)
       end
-      raise error if error
+      results unless results.nil?
     end
-    def process_value(input, stages, &on_output)
-      current_values = [input]
+    private
-      stages.each do |stage|
-        next_values = []
+    def process_value(value, stages, idx = 0, &on_output)
+      while idx < stages.length
+        value = stages[idx].call(value)
-        current_values.each do |value|
-          out = stage.call(value)
-          if out.equal?(Control::DROPPED)
-            next
-          elsif out.is_a?(Control::Flat)
-            unless out.value.is_a?(Array)
-              raise TypeError, "flat expects Array, got #{out.value.class}"
-            end
-            next_values.concat(out.value)
-          else
-            next_values << out
+        if value.equal?(Control::DROPPED)
+          return
+        elsif value.is_a?(Control::Flat)
+          value = value.value
+          unless value.is_a?(Array)
+            raise TypeError, "flat expects Array, got #{value.class}"
+          end
+          value.each do |child|
+            process_value(child, stages, idx + 1, &on_output)
           end
+          return
         end
-        return if next_values.empty?
-        current_values = next_values
+        idx += 1
       end
-      current_values.each(&on_output)
+      on_output.call(value)
     end
     def flush_reducers(stages, &on_output)

data/lib/jrf/row_context.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module Jrf
       def define_reducer(name, &definition)
         define_method(name) do |*args, **kwargs, &block|
           spec = definition.call(self, *args, **kwargs, block: block)
-          @__jrf_current_stage.allocate_reducer(
+          @__jrf_current_stage.step_reduce(
             spec.fetch(:value),
             initial: reducer_initial_value(spec.fetch(:initial)),
             finish: spec[:finish],
@@ -161,24 +161,24 @@ module Jrf
     def reduce(initial, &block)
       raise ArgumentError, "reduce requires a block" unless block
-      @__jrf_current_stage.allocate_reducer(current_input, initial: initial, &block)
+      @__jrf_current_stage.step_reduce(current_input, initial: initial, &block)
     end
     def map(&block)
       raise ArgumentError, "map requires a block" unless block
-      @__jrf_current_stage.allocate_map(:array, @obj, &block)
+      @__jrf_current_stage.step_map(:map, @obj, &block)
     end
     def map_values(&block)
       raise ArgumentError, "map_values requires a block" unless block
-      @__jrf_current_stage.allocate_map(:hash, @obj, &block)
+      @__jrf_current_stage.step_map(:map_values, @obj, &block)
     end
     def group_by(key, &block)
       block ||= proc { group }
-      @__jrf_current_stage.allocate_group_by(key, &block)
+      @__jrf_current_stage.step_group_by(key, &block)
     end
     private

data/lib/jrf/stage.rb CHANGED Viewed

@@ -39,39 +39,52 @@ module Jrf
       @ctx.__jrf_current_stage = self
       result = @ctx.instance_eval(&@block)
-      if @mode.nil? && @reducers.any?
-        @mode = :reducer
-        @template = result
-      elsif @mode.nil?
-        @mode = :passthrough
+      if @mode.nil?
+        if @reducers.any?
+          @mode = :reducer
+          @template = result
+        else
+          @mode = :passthrough
+        end
       end
       (@mode == :reducer) ? Control::DROPPED : result
     end
-    def allocate_reducer(value, initial:, finish: nil, &step_fn)
+    def step_reduce(value, initial:, finish: nil, &step_fn)
       idx = @cursor
-      finish_rows = finish || ->(acc) { [acc] }
-      @reducers[idx] ||= Reducers.reduce(initial, finish: finish_rows, &step_fn)
+      if @reducers[idx].nil?
+        finish_rows = finish || ->(acc) { [acc] }
+        @reducers[idx] = Reducers.reduce(initial, finish: finish_rows, &step_fn)
+        result = ReducerToken.new(idx)
+      else
+        result = Control::DROPPED
+      end
       @reducers[idx].step(value)
-      @cursor += 1
-      ReducerToken.new(idx)
+      @cursor = idx + 1
+      result
     end
-    def allocate_map(type, collection, &block)
+    def step_map(builtin, collection, &block)
       idx = @cursor
       @cursor += 1
+      if collection.is_a?(Array)
+        raise TypeError, "map_values expects Hash, got Array" if builtin == :map_values
+      elsif !collection.is_a?(Hash)
+        raise TypeError, "#{builtin} expects #{builtin == :map_values ? "Hash" : "Array or Hash"}, got #{collection.class}"
+      end
       # Transformation mode (detected on first call)
       if @map_transforms[idx]
-        return transform_collection(type, collection, &block)
+        return transform_collection(builtin, collection, &block)
       end
-      map_reducer = (@reducers[idx] ||= MapReducer.new(type))
+      map_reducer = (@reducers[idx] ||= MapReducer.new(builtin, collection.is_a?(Array)))
-      case type
-      when :array
-        raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
+      if collection.is_a?(Array)
         collection.each_with_index do |v, i|
           slot = map_reducer.slot(i)
           with_scoped_reducers(slot.reducers) do
@@ -79,12 +92,11 @@ module Jrf
             slot.template ||= result
           end
         end
-      when :hash
-        raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
+      else
         collection.each do |k, v|
           slot = map_reducer.slot(k)
           with_scoped_reducers(slot.reducers) do
-            result = @ctx.send(:__jrf_with_current_input, v) { block.call(v) }
+            result = @ctx.send(:__jrf_with_current_input, v) { invoke_block(builtin, block, k, v) }
             slot.template ||= result
           end
         end
@@ -94,15 +106,15 @@ module Jrf
       if @mode.nil? && map_reducer.slots.values.all? { |s| s.reducers.empty? }
         @map_transforms[idx] = true
         @reducers[idx] = nil
-        return transformed_slots(type, map_reducer)
+        return transformed_slots(builtin, map_reducer)
       end
       ReducerToken.new(idx)
     end
-    def allocate_group_by(key, &block)
+    def step_group_by(key, &block)
       idx = @cursor
-      map_reducer = (@reducers[idx] ||= MapReducer.new(:hash))
+      map_reducer = (@reducers[idx] ||= MapReducer.new(:group_by, false))
       row = @ctx._
       slot = map_reducer.slot(key)
@@ -138,55 +150,82 @@ module Jrf
       @cursor = saved_cursor
     end
-    def transform_collection(type, collection, &block)
-      case type
-      when :array
-        raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
+    def invoke_block(builtin, block, key, value)
+      case builtin
+      when :map then block.call([key, value])
+      when :map_values then block.call(value)
+      else raise ArgumentError, "unexpected builtin: #{builtin}"
+      end
+    end
+    def transform_collection(builtin, collection, &block)
+      if collection.is_a?(Array)
         collection.each_with_object([]) do |value, result|
           mapped = @ctx.send(:__jrf_with_current_input, value) { block.call(value) }
-          append_map_result(result, mapped)
+          append_result(result, mapped, builtin)
         end
-      when :hash
-        raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
-        collection.each_with_object({}) do |(key, value), result|
-          mapped = @ctx.send(:__jrf_with_current_input, value) { block.call(value) }
-          next if mapped.equal?(Control::DROPPED)
-          raise TypeError, "flat is not supported inside map_values" if mapped.is_a?(Control::Flat)
+      else
+        case builtin
+        when :map
+          collection.each_with_object([]) do |(key, value), result|
+            mapped = @ctx.send(:__jrf_with_current_input, value) { invoke_block(builtin, block, key, value) }
+            append_result(result, mapped, builtin)
+          end
+        when :map_values
+          collection.each_with_object({}) do |(key, value), result|
+            mapped = @ctx.send(:__jrf_with_current_input, value) { invoke_block(builtin, block, key, value) }
+            next if mapped.equal?(Control::DROPPED)
+            raise TypeError, "flat is not supported inside map_values" if mapped.is_a?(Control::Flat)
-          result[key] = mapped
+            result[key] = mapped
+          end
+        else
+          raise ArgumentError, "unexpected builtin: #{builtin}"
         end
       end
     end
-    def transformed_slots(type, map_reducer)
-      case type
-      when :array
+    def transformed_slots(builtin, map_reducer)
+      if map_reducer.array_input?
         map_reducer.slots
           .sort_by { |k, _| k }
           .each_with_object([]) do |(_, slot), result|
-            append_map_result(result, slot.template)
+            append_result(result, slot.template, builtin)
+          end
+      else
+        case builtin
+        when :map
+          map_reducer.slots.each_with_object([]) do |(_key, slot), result|
+            append_result(result, slot.template, builtin)
           end
-      when :hash
-        map_reducer.slots.each_with_object({}) do |(key, slot), result|
-          next if slot.template.equal?(Control::DROPPED)
-          raise TypeError, "flat is not supported inside map_values" if slot.template.is_a?(Control::Flat)
+        when :map_values
+          map_reducer.slots.each_with_object({}) do |(key, slot), result|
+            next if slot.template.equal?(Control::DROPPED)
+            raise TypeError, "flat is not supported inside map_values" if slot.template.is_a?(Control::Flat)
-          result[key] = slot.template
+            result[key] = slot.template
+          end
+        else
+          raise ArgumentError, "unexpected builtin: #{builtin}"
         end
       end
     end
-    def append_map_result(result, mapped)
+    def append_result(result, mapped, builtin)
       return if mapped.equal?(Control::DROPPED)
       if mapped.is_a?(Control::Flat)
-        unless mapped.value.is_a?(Array)
-          raise TypeError, "flat expects Array, got #{mapped.value.class}"
+        case builtin
+        when :map
+          unless mapped.value.is_a?(Array)
+            raise TypeError, "flat expects Array, got #{mapped.value.class}"
+          end
+          result.concat(mapped.value)
+        when :map_values
+          raise TypeError, "flat is not supported inside map_values"
+        else
+          raise ArgumentError, "unexpected builtin: #{builtin}"
         end
-        result.concat(mapped.value)
       else
         result << mapped
       end
@@ -195,24 +234,35 @@ module Jrf
     class MapReducer
       attr_reader :slots
-      def initialize(type)
-        @type = type
+      def initialize(builtin, array_input)
+        @builtin = builtin
+        @array_input = array_input
         @slots = {}
       end
+      def array_input?
+        @array_input
+      end
       def slot(key)
         @slots[key] ||= SlotState.new
       end
       def finish
-        case @type
-        when :array
+        if @array_input
           keys = @slots.keys.sort
           [keys.map { |k| Stage.resolve_template(@slots[k].template, @slots[k].reducers) }]
-        when :hash
-          result = {}
-          @slots.each { |k, s| result[k] = Stage.resolve_template(s.template, s.reducers) }
-          [result]
+        else
+          case @builtin
+          when :map
+            [@slots.map { |_k, s| Stage.resolve_template(s.template, s.reducers) }]
+          when :map_values, :group_by
+            result = {}
+            @slots.each { |k, s| result[k] = Stage.resolve_template(s.template, s.reducers) }
+            [result]
+          else
+            raise ArgumentError, "unexpected builtin: #{@builtin}"
+          end
         end
       end

data/lib/jrf/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Jrf
-  VERSION = "0.1.5"
+  VERSION = "0.1.7"
 end

data/test/jrf_test.rb CHANGED Viewed

@@ -1,7 +1,17 @@
 # frozen_string_literal: true
+begin
+  require "bundler/setup"
+rescue LoadError
+  # Allow running tests in plain Ruby environments with globally installed gems.
+end
 require "json"
 require "open3"
+require "stringio"
+require "tmpdir"
+require "zlib"
+require_relative "../lib/jrf/cli/runner"
 def run_jrf(expr, input, *opts)
   Open3.capture3("./exe/jrf", *opts, expr, stdin_data: input)
@@ -41,6 +51,45 @@ def lines(str)
   str.lines.map(&:strip).reject(&:empty?)
 end
+class RecordingRunner < Jrf::CLI::Runner
+  attr_reader :writes
+  def initialize(**kwargs)
+    super
+    @writes = []
+  end
+  private
+  def write_output(str)
+    return if str.empty?
+    @writes << str
+  end
+end
+class ChunkedSource
+  def initialize(str, chunk_size: 5)
+    @str = str
+    @chunk_size = chunk_size
+    @offset = 0
+  end
+  def read(length = nil, outbuf = nil)
+    raise "expected chunked reads" if length.nil?
+    chunk = @str.byteslice(@offset, [length, @chunk_size].min)
+    return nil unless chunk
+    @offset += chunk.bytesize
+    if outbuf
+      outbuf.replace(chunk)
+    else
+      chunk
+    end
+  end
+end
 File.chmod(0o755, "./exe/jrf")
 input = <<~NDJSON
@@ -92,10 +141,14 @@ assert_includes(stderr, 'stage[1]: _["hello"]')
 stdout, stderr, status = Open3.capture3("./exe/jrf", "--help")
 assert_success(status, stderr, "help option")
-assert_includes(stdout, "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'")
+assert_includes(stdout, "usage: jrf [options] 'STAGE >> STAGE >> ...'")
 assert_includes(stdout, "JSON filter with the power and speed of Ruby.")
 assert_includes(stdout, "--lax")
 assert_includes(stdout, "--pretty")
+assert_includes(stdout, "--no-jit")
+assert_includes(stdout, "-V")
+assert_includes(stdout, "--version")
+assert_includes(stdout, "--atomic-write-bytes N")
 assert_includes(stdout, "Pipeline:")
 assert_includes(stdout, "Connect stages with top-level >>.")
 assert_includes(stdout, "The current value in each stage is available as _.")
@@ -103,11 +156,94 @@ assert_includes(stdout, "See Also:")
 assert_includes(stdout, "https://github.com/kazuho/jrf#readme")
 assert_equal([], lines(stderr), "help stderr output")
+stdout, stderr, status = Open3.capture3("./exe/jrf", "--version")
+assert_success(status, stderr, "version long option")
+assert_equal([Jrf::VERSION], lines(stdout), "version long option output")
+assert_equal([], lines(stderr), "version long option stderr")
+stdout, stderr, status = Open3.capture3("./exe/jrf", "-V")
+assert_success(status, stderr, "version short option")
+assert_equal([Jrf::VERSION], lines(stdout), "version short option output")
+assert_equal([], lines(stderr), "version short option stderr")
+threshold_input = StringIO.new((1..4).map { |i| "{\"foo\":\"#{'x' * 1020}\",\"i\":#{i}}\n" }.join)
+buffered_runner = RecordingRunner.new(inputs: [threshold_input], out: StringIO.new, err: StringIO.new)
+buffered_runner.run('_')
+expected_line = JSON.generate({"foo" => "x" * 1020, "i" => 1}) + "\n"
+assert_equal(2, buffered_runner.writes.length, "default atomic write limit buffers records until the configured threshold")
+assert_equal(expected_line.bytesize * 3, buffered_runner.writes.first.bytesize, "default atomic write limit flushes before the next record would exceed the threshold")
+assert_equal(expected_line.bytesize, buffered_runner.writes.last.bytesize, "final buffer flush emits the remaining record")
+small_limit_runner = RecordingRunner.new(inputs: [StringIO.new("{\"foo\":1}\n{\"foo\":2}\n")], out: StringIO.new, err: StringIO.new, atomic_write_bytes: 1)
+small_limit_runner.run('_["foo"]')
+assert_equal(["1\n", "2\n"], small_limit_runner.writes, "small atomic write limit emits oversized records directly")
+error_runner = RecordingRunner.new(inputs: [StringIO.new("{\"foo\":1}\n{\"foo\":")], out: StringIO.new, err: StringIO.new)
+begin
+  error_runner.run('_["foo"]')
+  raise "expected parse error for buffered flush test"
+rescue JSON::ParserError
+  assert_equal(["1\n"], error_runner.writes, "buffer flushes pending output before parse errors escape")
+end
 stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "--verbose")
 assert_success(status, stderr, "dump stages verbose alias")
 assert_equal(%w[123], lines(stdout), "dump stages verbose alias output")
 assert_includes(stderr, 'stage[0]: select(_["hello"] == 123)')
+stdout, stderr, status = run_jrf('_["hello"]', input_hello, "--atomic-write-bytes", "512")
+assert_success(status, stderr, "atomic write bytes option")
+assert_equal(%w[123 456], lines(stdout), "atomic write bytes option output")
+stdout, stderr, status = run_jrf('_["hello"]', input_hello, "--atomic-write-bytes=512")
+assert_success(status, stderr, "atomic write bytes equals form")
+assert_equal(%w[123 456], lines(stdout), "atomic write bytes equals form output")
+stdout, stderr, status = Open3.capture3("./exe/jrf", "--atomic-write-bytes", "0", '_["hello"]', stdin_data: input_hello)
+assert_failure(status, "atomic write bytes rejects zero")
+assert_includes(stderr, "--atomic-write-bytes requires a positive integer")
+if defined?(RubyVM::YJIT) && RubyVM::YJIT.respond_to?(:enabled?)
+  yjit_probe = "{\"probe\":1}\n"
+  stdout, stderr, status = run_jrf('RubyVM::YJIT.enabled?', yjit_probe)
+  assert_success(status, stderr, "default jit enablement")
+  assert_equal(%w[true], lines(stdout), "default jit enablement output")
+  stdout, stderr, status = run_jrf('RubyVM::YJIT.enabled?', yjit_probe, "--no-jit")
+  assert_success(status, stderr, "no-jit option")
+  assert_equal(%w[false], lines(stdout), "no-jit option output")
+end
+Dir.mktmpdir do |dir|
+  gz_path = File.join(dir, "input.ndjson.gz")
+  Zlib::GzipWriter.open(gz_path) do |io|
+    io.write("{\"foo\":10}\n{\"foo\":20}\n")
+  end
+  stdout, stderr, status = Open3.capture3("./exe/jrf", '_["foo"]', gz_path)
+  assert_success(status, stderr, "compressed input by suffix")
+  assert_equal(%w[10 20], lines(stdout), "compressed input output")
+  lax_gz_path = File.join(dir, "input-lax.json.gz")
+  Zlib::GzipWriter.open(lax_gz_path) do |io|
+    io.write("{\"foo\":30}\n\x1e{\"foo\":40}\n")
+  end
+  stdout, stderr, status = Open3.capture3("./exe/jrf", "--lax", '_["foo"]', lax_gz_path)
+  assert_success(status, stderr, "compressed lax input by suffix")
+  assert_equal(%w[30 40], lines(stdout), "compressed lax input output")
+  second_gz_path = File.join(dir, "input2.ndjson.gz")
+  Zlib::GzipWriter.open(second_gz_path) do |io|
+    io.write("{\"foo\":50}\n")
+  end
+  stdout, stderr, status = Open3.capture3("./exe/jrf", '_["foo"]', gz_path, second_gz_path)
+  assert_success(status, stderr, "multiple compressed inputs by suffix")
+  assert_equal(%w[10 20 50], lines(stdout), "multiple compressed input output")
+end
 stdout, stderr, status = run_jrf('_', input_hello, "--pretty")
 assert_success(status, stderr, "pretty output")
 assert_equal(
@@ -493,6 +629,26 @@ stdout, stderr, status = run_jrf('_["foo"]', input_lax_trailing_rs, "--lax")
 assert_success(status, stderr, "lax ignores trailing separator")
 assert_equal(%w[9], lines(stdout), "lax trailing separator output")
+chunked_lax_out = RecordingRunner.new(
+  inputs: [ChunkedSource.new("{\"foo\":1}\n\x1e{\"foo\":2}\n\t{\"foo\":3}\n")],
+  out: StringIO.new,
+  err: StringIO.new,
+  lax: true
+)
+chunked_lax_out.run('_["foo"]')
+assert_equal(%w[1 2 3], lines(chunked_lax_out.writes.join), "lax mode streams chunked input without whole-input reads")
+Dir.mktmpdir do |dir|
+  one = File.join(dir, "one.json")
+  two = File.join(dir, "two.json")
+  File.write(one, "1")
+  File.write(two, "2")
+  stdout, stderr, status = Open3.capture3("./exe/jrf", "--lax", "_", one, two)
+  assert_success(status, stderr, "lax keeps file boundaries")
+  assert_equal(%w[1 2], lines(stdout), "lax does not merge JSON across file boundaries")
+end
 stdout, stderr, status = run_jrf('select(_["x"] > ) >> _["foo"]', "")
 assert_failure(status, "syntax error should fail before row loop")
 assert_includes(stderr, "syntax error")
@@ -616,6 +772,26 @@ stdout, stderr, status = run_jrf('map_values { |v| reduce(0) { |acc, x| acc + x
 assert_success(status, stderr, "map_values with reduce")
 assert_equal(['{"a":6,"b":60}'], lines(stdout), "map_values with reduce output")
+stdout, stderr, status = run_jrf('map { |k, v| "#{k}:#{v}" }', input_map_values)
+assert_success(status, stderr, "map over hash transform")
+assert_equal(['["a:1","b:10"]', '["a:2","b:20"]', '["a:3","b:30"]'], lines(stdout), "map over hash transform output")
+stdout, stderr, status = run_jrf('map { |pair| pair }', input_map_values)
+assert_success(status, stderr, "map over hash single block arg")
+assert_equal(['[["a",1],["b",10]]', '[["a",2],["b",20]]', '[["a",3],["b",30]]'], lines(stdout), "map over hash single block arg output")
+stdout, stderr, status = run_jrf('map { |k, v| select(v >= 10 && k != "a") }', input_map_values)
+assert_success(status, stderr, "map over hash transform with select")
+assert_equal(['[10]', '[20]', '[30]'], lines(stdout), "map over hash transform with select output")
+stdout, stderr, status = run_jrf('map { |k, v| sum(v + k.length) }', input_map_values)
+assert_success(status, stderr, "map over hash with sum")
+assert_equal(['[9,63]'], lines(stdout), "map over hash with sum output")
+stdout, stderr, status = run_jrf('map { |k, v| sum(_["a"] + v + k.length) }', input_map_values)
+assert_success(status, stderr, "map over hash keeps ambient _")
+assert_equal(['[15,69]'], lines(stdout), "map over hash ambient _ output")
 stdout, stderr, status = run_jrf('select(false) >> map { |x| sum(x) }', input_map)
 assert_success(status, stderr, "map no matches")
 assert_equal([], lines(stdout), "map no matches output")
@@ -750,6 +926,18 @@ assert_equal([[4, 6]], j.call([[1, 2], [3, 4]]), "library map reduce")
 j = Jrf.new(proc { map_values { |v| v * 10 } })
 assert_equal([{"a" => 10, "b" => 20}], j.call([{"a" => 1, "b" => 2}]), "library map_values transform")
+# map hash transform
+j = Jrf.new(proc { map { |k, v| "#{k}=#{v}" } })
+assert_equal([["a=1", "b=2"]], j.call([{"a" => 1, "b" => 2}]), "library map hash transform")
+# map hash single block arg
+j = Jrf.new(proc { map { |pair| pair } })
+assert_equal([[["a", 1], ["b", 2]]], j.call([{"a" => 1, "b" => 2}]), "library map hash single block arg")
+# map hash reduce
+j = Jrf.new(proc { map { |k, v| sum(v + k.length) } })
+assert_equal([[5, 7]], j.call([{"a" => 1, "b" => 2}, {"a" => 2, "b" => 3}]), "library map hash reduce")
 # group_by
 j = Jrf.new(proc { group_by(_["k"]) { count() } })
 assert_equal([{"x" => 2, "y" => 1}], j.call([{"k" => "x"}, {"k" => "x"}, {"k" => "y"}]), "library group_by")
@@ -770,4 +958,13 @@ assert_equal([{"a" => 3}], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "librar
 j = Jrf.new(proc { sum(_) })
 assert_equal([], j.call([]), "library empty input")
+ctx = Jrf::RowContext.new
+stage = Jrf::Stage.new(ctx, proc { })
+first_token = stage.step_reduce(1, initial: 0) { |acc, v| acc + v }
+assert_equal(0, first_token.index, "step_reduce returns token while classifying reducer stage")
+stage.instance_variable_set(:@mode, :reducer)
+stage.instance_variable_set(:@cursor, 0)
+second_token = stage.step_reduce(2, initial: 0) { |acc, v| acc + v }
+raise "expected DROPPED for established reducer slot" unless second_token.equal?(Jrf::Control::DROPPED)
 puts "ok"

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: jrf
 version: !ruby/object:Gem::Version
-  version: 0.1.5
+  version: 0.1.7
 platform: ruby
 authors:
 - kazuho
@@ -40,12 +40,12 @@ files:
 - jrf.gemspec
 - lib/jrf.rb
 - lib/jrf/cli.rb
+- lib/jrf/cli/runner.rb
 - lib/jrf/control.rb
 - lib/jrf/pipeline.rb
 - lib/jrf/pipeline_parser.rb
 - lib/jrf/reducers.rb
 - lib/jrf/row_context.rb
-- lib/jrf/runner.rb
 - lib/jrf/stage.rb
 - lib/jrf/version.rb
 - test/jrf_test.rb

data/lib/jrf/runner.rb DELETED Viewed

@@ -1,81 +0,0 @@
-# frozen_string_literal: true
-require "json"
-require_relative "pipeline"
-require_relative "pipeline_parser"
-module Jrf
-  class Runner
-    RS_CHAR = "\x1e"
-    def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false, pretty: false)
-      @input = input
-      @out = out
-      @err = err
-      @lax = lax
-      @pretty = pretty
-    end
-    def run(expression, verbose: false)
-      parsed = PipelineParser.new(expression).parse
-      stages = parsed[:stages]
-      dump_stages(stages) if verbose
-      blocks = stages.map { |stage|
-        eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
-      }
-      pipeline = Pipeline.new(*blocks)
-      input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
-      pipeline.call(input_enum) do |value|
-        @out.puts(@pretty ? JSON.pretty_generate(value) : JSON.generate(value))
-      end
-    end
-    private
-    def each_input_value
-      return each_input_value_lax { |value| yield value } if @lax
-      each_input_value_ndjson { |value| yield value }
-    end
-    def each_input_value_ndjson
-      @input.each_line do |raw_line|
-        line = raw_line.strip
-        next if line.empty?
-        yield JSON.parse(line)
-      end
-    end
-    def each_input_value_lax
-      require "oj"
-      source = @input.read.to_s
-      source = source.include?(RS_CHAR) ? source.tr(RS_CHAR, "\n") : source
-      handler = Class.new(Oj::ScHandler) do
-        def initialize(&emit)
-          @emit = emit
-        end
-        def hash_start = {}
-        def hash_key(key) = key
-        def hash_set(hash, key, value) = hash[key] = value
-        def array_start = []
-        def array_append(array, value) = array << value
-        def add_value(value) = @emit.call(value)
-      end.new { |value| yield value }
-      Oj.sc_parse(handler, source)
-    rescue LoadError
-      raise "oj is required for --lax mode (gem install oj)"
-    rescue Oj::ParseError => e
-      raise JSON::ParserError, e.message
-    end
-    def dump_stages(stages)
-      stages.each_with_index do |stage, i|
-        @err.puts "stage[#{i}]: #{stage[:src]}"
-      end
-    end
-  end
-end