jrf 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9ce648c2afbfe10dc161b08badb05acdb411baf839dde77433927380b6bb7439
4
- data.tar.gz: 6be5a0851eecd3cfcbe93aff1cb8fdd163a84dd96a7b12e440fc514db03f67a0
3
+ metadata.gz: e9bb2a3a16d2bbe8cfb463267ff74d7d582511d4b4891e56ad3dfa6eee75fceb
4
+ data.tar.gz: a13b2e9c8517c3da997452166556505b24fc4d5f898765ad33495eafd57c3081
5
5
  SHA512:
6
- metadata.gz: aa4dfead95dbe09453ec720cdbcf77ba4c7e3f1047c60f51d4ff54724dfa540bb1dbd5630ecb07d09d745e1e61e4c236f50f4407ff6d4c17dd5431b385679f57
7
- data.tar.gz: 03c3f5dd3f36675a2bc31981effc506bb1822bb170e754785ccffe077becdd5af13421b4cbfd18fea1c1262f06feef61561be3e3243ca0379e1e6af21ad003c5
6
+ metadata.gz: 54b400cdaba584896f2511acfe9a41ef10af25033bf88cfc6e0386eaa840df9395fb0d008c320b3193d55a9c3fad444a7f54bd29f52c34f69bc9a9cf392a7809
7
+ data.tar.gz: 80c72675e179da483316bfeaee7114da6edb49dc66ae179aa072d48907c4c9caf74113c6681b2f4a83f4b97da6faac436f5d6af5bd31e82605b122d85892cede
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require_relative "../pipeline"
5
+ require_relative "../pipeline_parser"
6
+
7
+ module Jrf
8
+ class CLI
9
+ class Runner
10
+ RS_CHAR = "\x1e"
11
+ DEFAULT_OUTPUT_BUFFER_LIMIT = 4096
12
+
13
+ class RsNormalizer
14
+ def initialize(input)
15
+ @input = input
16
+ end
17
+
18
+ def read(length = nil, outbuf = nil)
19
+ chunk = @input.read(length)
20
+ return nil if chunk.nil?
21
+
22
+ chunk = chunk.tr(RS_CHAR, "\n")
23
+ if outbuf
24
+ outbuf.replace(chunk)
25
+ else
26
+ chunk
27
+ end
28
+ end
29
+ end
30
+
31
+ def initialize(inputs:, out: $stdout, err: $stderr, lax: false, pretty: false, atomic_write_bytes: DEFAULT_OUTPUT_BUFFER_LIMIT)
32
+ @inputs = inputs
33
+ @out = out
34
+ @err = err
35
+ @lax = lax
36
+ @pretty = pretty
37
+ @atomic_write_bytes = atomic_write_bytes
38
+ @output_buffer = +""
39
+ end
40
+
41
+ def run(expression, verbose: false)
42
+ parsed = PipelineParser.new(expression).parse
43
+ stages = parsed[:stages]
44
+ dump_stages(stages) if verbose
45
+
46
+ blocks = stages.map { |stage|
47
+ eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
48
+ }
49
+ pipeline = Pipeline.new(*blocks)
50
+
51
+ input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
52
+ pipeline.call(input_enum) do |value|
53
+ emit_output(value)
54
+ end
55
+ ensure
56
+ write_output(@output_buffer)
57
+ end
58
+
59
+ private
60
+
61
+ def each_input_value
62
+ return each_input_value_lax { |value| yield value } if @lax
63
+
64
+ each_input_value_ndjson { |value| yield value }
65
+ end
66
+
67
+ def each_input_value_ndjson
68
+ each_input do |source|
69
+ source.each_line do |raw_line|
70
+ line = raw_line.strip
71
+ next if line.empty?
72
+
73
+ yield JSON.parse(line)
74
+ end
75
+ end
76
+ end
77
+
78
+ def each_input_value_lax
79
+ require "oj"
80
+ handler = Class.new(Oj::ScHandler) do
81
+ def initialize(&emit)
82
+ @emit = emit
83
+ end
84
+
85
+ def hash_start = {}
86
+ def hash_key(key) = key
87
+ def hash_set(hash, key, value) = hash[key] = value
88
+ def array_start = []
89
+ def array_append(array, value) = array << value
90
+ def add_value(value) = @emit.call(value)
91
+ end
92
+ each_input do |source|
93
+ Oj.sc_parse(handler.new { |value| yield value }, RsNormalizer.new(source))
94
+ end
95
+ rescue LoadError
96
+ raise "oj is required for --lax mode (gem install oj)"
97
+ rescue Oj::ParseError => e
98
+ raise JSON::ParserError, e.message
99
+ end
100
+
101
+ def dump_stages(stages)
102
+ stages.each_with_index do |stage, i|
103
+ @err.puts "stage[#{i}]: #{stage[:src]}"
104
+ end
105
+ end
106
+
107
+ def each_input
108
+ @inputs.each { |source| yield source }
109
+ end
110
+
111
+ def emit_output(value)
112
+ record = (@pretty ? JSON.pretty_generate(value) : JSON.generate(value)) << "\n"
113
+ if @output_buffer.bytesize + record.bytesize <= @atomic_write_bytes
114
+ @output_buffer << record
115
+ else
116
+ write_output(@output_buffer)
117
+ @output_buffer = record
118
+ end
119
+ end
120
+
121
+ def write_output(str)
122
+ @out.syswrite(str)
123
+ end
124
+ end
125
+ end
126
+ end
data/lib/jrf/cli.rb CHANGED
@@ -1,13 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "runner"
3
+ require_relative "cli/runner"
4
4
 
5
5
  module Jrf
6
6
  class CLI
7
- USAGE = "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'"
7
+ USAGE = "usage: jrf [options] 'STAGE >> STAGE >> ...'"
8
8
 
9
9
  HELP_TEXT = <<~'TEXT'
10
- usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'
10
+ usage: jrf [options] 'STAGE >> STAGE >> ...'
11
11
 
12
12
  JSON filter with the power and speed of Ruby.
13
13
 
@@ -15,6 +15,8 @@ module Jrf
15
15
  -v, --verbose print parsed stage expressions
16
16
  --lax allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
17
17
  -p, --pretty pretty-print JSON output instead of compact NDJSON
18
+ --atomic-write-bytes N
19
+ group short outputs into atomic writes of up to N bytes
18
20
  -h, --help show this help and exit
19
21
 
20
22
  Pipeline:
@@ -36,6 +38,7 @@ module Jrf
36
38
  verbose = false
37
39
  lax = false
38
40
  pretty = false
41
+ atomic_write_bytes = Runner::DEFAULT_OUTPUT_BUFFER_LIMIT
39
42
 
40
43
  while argv.first&.start_with?("-")
41
44
  case argv.first
@@ -48,6 +51,14 @@ module Jrf
48
51
  when "-p", "--pretty"
49
52
  pretty = true
50
53
  argv.shift
54
+ when /\A--atomic-write-bytes=(.+)\z/
55
+ atomic_write_bytes = parse_atomic_write_bytes(Regexp.last_match(1), err)
56
+ return 1 unless atomic_write_bytes
57
+ argv.shift
58
+ when "--atomic-write-bytes"
59
+ argv.shift
60
+ atomic_write_bytes = parse_atomic_write_bytes(argv.shift, err)
61
+ return 1 unless atomic_write_bytes
51
62
  when "-h", "--help"
52
63
  out.puts HELP_TEXT
53
64
  return 0
@@ -64,8 +75,43 @@ module Jrf
64
75
  end
65
76
 
66
77
  expression = argv.shift
67
- Runner.new(input: input, out: out, err: err, lax: lax, pretty: pretty).run(expression, verbose: verbose)
78
+ inputs = Enumerator.new do |y|
79
+ if argv.empty?
80
+ y << input
81
+ else
82
+ argv.each do |path|
83
+ if path == "-"
84
+ y << input
85
+ elsif path.end_with?(".gz")
86
+ require "zlib"
87
+ Zlib::GzipReader.open(path) do |source|
88
+ y << source
89
+ end
90
+ else
91
+ File.open(path, "rb") do |source|
92
+ y << source
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
98
+ Runner.new(
99
+ inputs: inputs,
100
+ out: out,
101
+ err: err,
102
+ lax: lax,
103
+ pretty: pretty,
104
+ atomic_write_bytes: atomic_write_bytes
105
+ ).run(expression, verbose: verbose)
68
106
  0
69
107
  end
108
+
109
+ def self.parse_atomic_write_bytes(value, err)
110
+ bytes = Integer(value, exception: false)
111
+ return bytes if bytes && bytes.positive?
112
+
113
+ err.puts "--atomic-write-bytes requires a positive integer"
114
+ nil
115
+ end
70
116
  end
71
117
  end
@@ -26,10 +26,12 @@ module Jrf
26
26
  def initialize(obj = nil)
27
27
  @obj = obj
28
28
  @__jrf_current_stage = nil
29
+ @__jrf_current_input = obj
29
30
  end
30
31
 
31
32
  def reset(obj)
32
33
  @obj = obj
34
+ @__jrf_current_input = obj
33
35
  self
34
36
  end
35
37
 
@@ -38,11 +40,11 @@ module Jrf
38
40
  end
39
41
 
40
42
  def flat
41
- Control::Flat.new(@obj)
43
+ Control::Flat.new(current_input)
42
44
  end
43
45
 
44
46
  def select(predicate)
45
- predicate ? @obj : Control::DROPPED
47
+ predicate ? current_input : Control::DROPPED
46
48
  end
47
49
 
48
50
  define_reducer(:sum) do |_ctx, value, initial: 0, block: nil|
@@ -111,15 +113,16 @@ module Jrf
111
113
  define_reducer(:sort) do |ctx, key = MISSING, block: nil|
112
114
  if block
113
115
  {
114
- value: ctx._,
116
+ value: ctx.send(:current_input),
115
117
  initial: -> { [] },
116
118
  finish: ->(rows) { rows.sort(&block) },
117
119
  step: ->(rows, row) { rows << row }
118
120
  }
119
121
  else
120
- resolved_key = key.equal?(MISSING) ? ctx._ : key
122
+ current = ctx.send(:current_input)
123
+ resolved_key = key.equal?(MISSING) ? current : key
121
124
  {
122
- value: [resolved_key, ctx._],
125
+ value: [resolved_key, current],
123
126
  initial: -> { [] },
124
127
  finish: ->(pairs) { pairs.sort_by(&:first).map(&:last) },
125
128
  step: ->(pairs, pair) { pairs << pair }
@@ -128,7 +131,7 @@ module Jrf
128
131
  end
129
132
 
130
133
  define_reducer(:group) do |ctx, value = MISSING, block: nil|
131
- resolved_value = value.equal?(MISSING) ? ctx._ : value
134
+ resolved_value = value.equal?(MISSING) ? ctx.send(:current_input) : value
132
135
  { value: resolved_value, initial: -> { [] }, step: ->(acc, v) { acc << v } }
133
136
  end
134
137
 
@@ -158,7 +161,7 @@ module Jrf
158
161
  def reduce(initial, &block)
159
162
  raise ArgumentError, "reduce requires a block" unless block
160
163
 
161
- @__jrf_current_stage.allocate_reducer(@obj, initial: initial, &block)
164
+ @__jrf_current_stage.allocate_reducer(current_input, initial: initial, &block)
162
165
  end
163
166
 
164
167
  def map(&block)
@@ -180,6 +183,18 @@ module Jrf
180
183
 
181
184
  private
182
185
 
186
+ def current_input
187
+ @__jrf_current_input
188
+ end
189
+
190
+ def __jrf_with_current_input(value)
191
+ saved_input = current_input
192
+ @__jrf_current_input = value
193
+ yield
194
+ ensure
195
+ @__jrf_current_input = saved_input
196
+ end
197
+
183
198
  def reducer_initial_value(initial)
184
199
  return initial.call if initial.respond_to?(:call)
185
200
  return initial.dup if initial.is_a?(Array) || initial.is_a?(Hash)
data/lib/jrf/stage.rb CHANGED
@@ -64,10 +64,7 @@ module Jrf
64
64
 
65
65
  # Transformation mode (detected on first call)
66
66
  if @map_transforms[idx]
67
- case type
68
- when :array then return collection.map(&block)
69
- when :hash then return collection.transform_values(&block)
70
- end
67
+ return transform_collection(type, collection, &block)
71
68
  end
72
69
 
73
70
  map_reducer = (@reducers[idx] ||= MapReducer.new(type))
@@ -78,7 +75,7 @@ module Jrf
78
75
  collection.each_with_index do |v, i|
79
76
  slot = map_reducer.slot(i)
80
77
  with_scoped_reducers(slot.reducers) do
81
- result = block.call(v)
78
+ result = @ctx.send(:__jrf_with_current_input, v) { block.call(v) }
82
79
  slot.template ||= result
83
80
  end
84
81
  end
@@ -87,7 +84,7 @@ module Jrf
87
84
  collection.each do |k, v|
88
85
  slot = map_reducer.slot(k)
89
86
  with_scoped_reducers(slot.reducers) do
90
- result = block.call(v)
87
+ result = @ctx.send(:__jrf_with_current_input, v) { block.call(v) }
91
88
  slot.template ||= result
92
89
  end
93
90
  end
@@ -97,12 +94,7 @@ module Jrf
97
94
  if @mode.nil? && map_reducer.slots.values.all? { |s| s.reducers.empty? }
98
95
  @map_transforms[idx] = true
99
96
  @reducers[idx] = nil
100
- case type
101
- when :array
102
- return map_reducer.slots.sort_by { |k, _| k }.map { |_, s| s.template }
103
- when :hash
104
- return map_reducer.slots.transform_values(&:template)
105
- end
97
+ return transformed_slots(type, map_reducer)
106
98
  end
107
99
 
108
100
  ReducerToken.new(idx)
@@ -115,7 +107,7 @@ module Jrf
115
107
  row = @ctx._
116
108
  slot = map_reducer.slot(key)
117
109
  with_scoped_reducers(slot.reducers) do
118
- result = block.call(row)
110
+ result = @ctx.send(:__jrf_with_current_input, row) { block.call(row) }
119
111
  slot.template ||= result
120
112
  end
121
113
 
@@ -146,6 +138,60 @@ module Jrf
146
138
  @cursor = saved_cursor
147
139
  end
148
140
 
141
+ def transform_collection(type, collection, &block)
142
+ case type
143
+ when :array
144
+ raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
145
+
146
+ collection.each_with_object([]) do |value, result|
147
+ mapped = @ctx.send(:__jrf_with_current_input, value) { block.call(value) }
148
+ append_map_result(result, mapped)
149
+ end
150
+ when :hash
151
+ raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
152
+
153
+ collection.each_with_object({}) do |(key, value), result|
154
+ mapped = @ctx.send(:__jrf_with_current_input, value) { block.call(value) }
155
+ next if mapped.equal?(Control::DROPPED)
156
+ raise TypeError, "flat is not supported inside map_values" if mapped.is_a?(Control::Flat)
157
+
158
+ result[key] = mapped
159
+ end
160
+ end
161
+ end
162
+
163
+ def transformed_slots(type, map_reducer)
164
+ case type
165
+ when :array
166
+ map_reducer.slots
167
+ .sort_by { |k, _| k }
168
+ .each_with_object([]) do |(_, slot), result|
169
+ append_map_result(result, slot.template)
170
+ end
171
+ when :hash
172
+ map_reducer.slots.each_with_object({}) do |(key, slot), result|
173
+ next if slot.template.equal?(Control::DROPPED)
174
+ raise TypeError, "flat is not supported inside map_values" if slot.template.is_a?(Control::Flat)
175
+
176
+ result[key] = slot.template
177
+ end
178
+ end
179
+ end
180
+
181
+ def append_map_result(result, mapped)
182
+ return if mapped.equal?(Control::DROPPED)
183
+
184
+ if mapped.is_a?(Control::Flat)
185
+ unless mapped.value.is_a?(Array)
186
+ raise TypeError, "flat expects Array, got #{mapped.value.class}"
187
+ end
188
+
189
+ result.concat(mapped.value)
190
+ else
191
+ result << mapped
192
+ end
193
+ end
194
+
149
195
  class MapReducer
150
196
  attr_reader :slots
151
197
 
data/lib/jrf/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Jrf
4
- VERSION = "0.1.4"
4
+ VERSION = "0.1.6"
5
5
  end
data/test/jrf_test.rb CHANGED
@@ -1,7 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ begin
4
+ require "bundler/setup"
5
+ rescue LoadError
6
+ # Allow running tests in plain Ruby environments with globally installed gems.
7
+ end
8
+
3
9
  require "json"
4
10
  require "open3"
11
+ require "stringio"
12
+ require "tmpdir"
13
+ require "zlib"
14
+ require_relative "../lib/jrf/cli/runner"
5
15
 
6
16
  def run_jrf(expr, input, *opts)
7
17
  Open3.capture3("./exe/jrf", *opts, expr, stdin_data: input)
@@ -41,6 +51,45 @@ def lines(str)
41
51
  str.lines.map(&:strip).reject(&:empty?)
42
52
  end
43
53
 
54
+ class RecordingRunner < Jrf::CLI::Runner
55
+ attr_reader :writes
56
+
57
+ def initialize(**kwargs)
58
+ super
59
+ @writes = []
60
+ end
61
+
62
+ private
63
+
64
+ def write_output(str)
65
+ return if str.empty?
66
+
67
+ @writes << str
68
+ end
69
+ end
70
+
71
+ class ChunkedSource
72
+ def initialize(str, chunk_size: 5)
73
+ @str = str
74
+ @chunk_size = chunk_size
75
+ @offset = 0
76
+ end
77
+
78
+ def read(length = nil, outbuf = nil)
79
+ raise "expected chunked reads" if length.nil?
80
+
81
+ chunk = @str.byteslice(@offset, [length, @chunk_size].min)
82
+ return nil unless chunk
83
+
84
+ @offset += chunk.bytesize
85
+ if outbuf
86
+ outbuf.replace(chunk)
87
+ else
88
+ chunk
89
+ end
90
+ end
91
+ end
92
+
44
93
  File.chmod(0o755, "./exe/jrf")
45
94
 
46
95
  input = <<~NDJSON
@@ -92,10 +141,11 @@ assert_includes(stderr, 'stage[1]: _["hello"]')
92
141
 
93
142
  stdout, stderr, status = Open3.capture3("./exe/jrf", "--help")
94
143
  assert_success(status, stderr, "help option")
95
- assert_includes(stdout, "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'")
144
+ assert_includes(stdout, "usage: jrf [options] 'STAGE >> STAGE >> ...'")
96
145
  assert_includes(stdout, "JSON filter with the power and speed of Ruby.")
97
146
  assert_includes(stdout, "--lax")
98
147
  assert_includes(stdout, "--pretty")
148
+ assert_includes(stdout, "--atomic-write-bytes N")
99
149
  assert_includes(stdout, "Pipeline:")
100
150
  assert_includes(stdout, "Connect stages with top-level >>.")
101
151
  assert_includes(stdout, "The current value in each stage is available as _.")
@@ -103,11 +153,72 @@ assert_includes(stdout, "See Also:")
103
153
  assert_includes(stdout, "https://github.com/kazuho/jrf#readme")
104
154
  assert_equal([], lines(stderr), "help stderr output")
105
155
 
156
+ threshold_input = StringIO.new((1..4).map { |i| "{\"foo\":\"#{'x' * 1020}\",\"i\":#{i}}\n" }.join)
157
+ buffered_runner = RecordingRunner.new(inputs: [threshold_input], out: StringIO.new, err: StringIO.new)
158
+ buffered_runner.run('_')
159
+ expected_line = JSON.generate({"foo" => "x" * 1020, "i" => 1}) + "\n"
160
+ assert_equal(2, buffered_runner.writes.length, "default atomic write limit buffers records until the configured threshold")
161
+ assert_equal(expected_line.bytesize * 3, buffered_runner.writes.first.bytesize, "default atomic write limit flushes before the next record would exceed the threshold")
162
+ assert_equal(expected_line.bytesize, buffered_runner.writes.last.bytesize, "final buffer flush emits the remaining record")
163
+
164
+ small_limit_runner = RecordingRunner.new(inputs: [StringIO.new("{\"foo\":1}\n{\"foo\":2}\n")], out: StringIO.new, err: StringIO.new, atomic_write_bytes: 1)
165
+ small_limit_runner.run('_["foo"]')
166
+ assert_equal(["1\n", "2\n"], small_limit_runner.writes, "small atomic write limit emits oversized records directly")
167
+
168
+ error_runner = RecordingRunner.new(inputs: [StringIO.new("{\"foo\":1}\n{\"foo\":")], out: StringIO.new, err: StringIO.new)
169
+ begin
170
+ error_runner.run('_["foo"]')
171
+ raise "expected parse error for buffered flush test"
172
+ rescue JSON::ParserError
173
+ assert_equal(["1\n"], error_runner.writes, "buffer flushes pending output before parse errors escape")
174
+ end
175
+
106
176
  stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "--verbose")
107
177
  assert_success(status, stderr, "dump stages verbose alias")
108
178
  assert_equal(%w[123], lines(stdout), "dump stages verbose alias output")
109
179
  assert_includes(stderr, 'stage[0]: select(_["hello"] == 123)')
110
180
 
181
+ stdout, stderr, status = run_jrf('_["hello"]', input_hello, "--atomic-write-bytes", "512")
182
+ assert_success(status, stderr, "atomic write bytes option")
183
+ assert_equal(%w[123 456], lines(stdout), "atomic write bytes option output")
184
+
185
+ stdout, stderr, status = run_jrf('_["hello"]', input_hello, "--atomic-write-bytes=512")
186
+ assert_success(status, stderr, "atomic write bytes equals form")
187
+ assert_equal(%w[123 456], lines(stdout), "atomic write bytes equals form output")
188
+
189
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "--atomic-write-bytes", "0", '_["hello"]', stdin_data: input_hello)
190
+ assert_failure(status, "atomic write bytes rejects zero")
191
+ assert_includes(stderr, "--atomic-write-bytes requires a positive integer")
192
+
193
+ Dir.mktmpdir do |dir|
194
+ gz_path = File.join(dir, "input.ndjson.gz")
195
+ Zlib::GzipWriter.open(gz_path) do |io|
196
+ io.write("{\"foo\":10}\n{\"foo\":20}\n")
197
+ end
198
+
199
+ stdout, stderr, status = Open3.capture3("./exe/jrf", '_["foo"]', gz_path)
200
+ assert_success(status, stderr, "compressed input by suffix")
201
+ assert_equal(%w[10 20], lines(stdout), "compressed input output")
202
+
203
+ lax_gz_path = File.join(dir, "input-lax.json.gz")
204
+ Zlib::GzipWriter.open(lax_gz_path) do |io|
205
+ io.write("{\"foo\":30}\n\x1e{\"foo\":40}\n")
206
+ end
207
+
208
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "--lax", '_["foo"]', lax_gz_path)
209
+ assert_success(status, stderr, "compressed lax input by suffix")
210
+ assert_equal(%w[30 40], lines(stdout), "compressed lax input output")
211
+
212
+ second_gz_path = File.join(dir, "input2.ndjson.gz")
213
+ Zlib::GzipWriter.open(second_gz_path) do |io|
214
+ io.write("{\"foo\":50}\n")
215
+ end
216
+
217
+ stdout, stderr, status = Open3.capture3("./exe/jrf", '_["foo"]', gz_path, second_gz_path)
218
+ assert_success(status, stderr, "multiple compressed inputs by suffix")
219
+ assert_equal(%w[10 20 50], lines(stdout), "multiple compressed input output")
220
+ end
221
+
111
222
  stdout, stderr, status = run_jrf('_', input_hello, "--pretty")
112
223
  assert_success(status, stderr, "pretty output")
113
224
  assert_equal(
@@ -174,6 +285,14 @@ stdout, stderr, status = run_jrf('_["items"] >> flat >> group', input_flat)
174
285
  assert_success(status, stderr, "flat then group")
175
286
  assert_equal(['[1,2,3]'], lines(stdout), "flat then group output")
176
287
 
288
+ stdout, stderr, status = run_jrf('map { |x| flat }', "[[1,2],[3],[4,5,6]]\n")
289
+ assert_success(status, stderr, "flat inside map")
290
+ assert_equal(['[1,2,3,4,5,6]'], lines(stdout), "flat inside map output")
291
+
292
+ stdout, stderr, status = run_jrf('map_values { |v| flat }', "{\"a\":[1,2],\"b\":[3]}\n")
293
+ assert_failure(status, "flat inside map_values")
294
+ assert_includes(stderr, "flat is not supported inside map_values")
295
+
177
296
  stdout, stderr, status = run_jrf('_["foo"] >> flat', input)
178
297
  assert_failure(status, "flat requires array")
179
298
  assert_includes(stderr, "flat expects Array")
@@ -485,6 +604,26 @@ stdout, stderr, status = run_jrf('_["foo"]', input_lax_trailing_rs, "--lax")
485
604
  assert_success(status, stderr, "lax ignores trailing separator")
486
605
  assert_equal(%w[9], lines(stdout), "lax trailing separator output")
487
606
 
607
+ chunked_lax_out = RecordingRunner.new(
608
+ inputs: [ChunkedSource.new("{\"foo\":1}\n\x1e{\"foo\":2}\n\t{\"foo\":3}\n")],
609
+ out: StringIO.new,
610
+ err: StringIO.new,
611
+ lax: true
612
+ )
613
+ chunked_lax_out.run('_["foo"]')
614
+ assert_equal(%w[1 2 3], lines(chunked_lax_out.writes.join), "lax mode streams chunked input without whole-input reads")
615
+
616
+ Dir.mktmpdir do |dir|
617
+ one = File.join(dir, "one.json")
618
+ two = File.join(dir, "two.json")
619
+ File.write(one, "1")
620
+ File.write(two, "2")
621
+
622
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "--lax", "_", one, two)
623
+ assert_success(status, stderr, "lax keeps file boundaries")
624
+ assert_equal(%w[1 2], lines(stdout), "lax does not merge JSON across file boundaries")
625
+ end
626
+
488
627
  stdout, stderr, status = run_jrf('select(_["x"] > ) >> _["foo"]', "")
489
628
  assert_failure(status, "syntax error should fail before row loop")
490
629
  assert_includes(stderr, "syntax error")
@@ -540,6 +679,10 @@ stdout, stderr, status = run_jrf('_["values"] >> map { |x| sum(_[0] + x) }', inp
540
679
  assert_success(status, stderr, "map keeps ambient _")
541
680
  assert_equal(['[12,66,606]'], lines(stdout), "map ambient _ output")
542
681
 
682
+ stdout, stderr, status = run_jrf('_["values"] >> map { |x| reduce(0) { |acc, v| acc + v } }', input_map)
683
+ assert_success(status, stderr, "map with reduce")
684
+ assert_equal(['[6,60,600]'], lines(stdout), "map with reduce output")
685
+
543
686
  input_map_varying = <<~NDJSON
544
687
  [1,10]
545
688
  [2,20,200]
@@ -550,6 +693,20 @@ stdout, stderr, status = run_jrf('map { |x| sum(x) }', input_map_varying)
550
693
  assert_success(status, stderr, "map varying lengths")
551
694
  assert_equal(['[6,30,200]'], lines(stdout), "map varying lengths output")
552
695
 
696
+ input_map_unsorted = <<~NDJSON
697
+ {"values":[3,30]}
698
+ {"values":[1,10]}
699
+ {"values":[2,20]}
700
+ NDJSON
701
+
702
+ stdout, stderr, status = run_jrf('_["values"] >> map { |x| group }', input_map)
703
+ assert_success(status, stderr, "map with group")
704
+ assert_equal(['[[1,2,3],[10,20,30],[100,200,300]]'], lines(stdout), "map with group output")
705
+
706
+ stdout, stderr, status = run_jrf('_["values"] >> map { |x| sort }', input_map_unsorted)
707
+ assert_success(status, stderr, "map with sort default key")
708
+ assert_equal(['[[1,2,3],[10,20,30]]'], lines(stdout), "map with sort default key output")
709
+
553
710
  input_map_values = <<~NDJSON
554
711
  {"a":1,"b":10}
555
712
  {"a":2,"b":20}
@@ -578,10 +735,18 @@ stdout, stderr, status = run_jrf('map_values { |v| count(v) }', input_map_values
578
735
  assert_success(status, stderr, "map_values with count")
579
736
  assert_equal(['{"a":3,"b":3}'], lines(stdout), "map_values with count output")
580
737
 
738
+ stdout, stderr, status = run_jrf('map_values { |v| group }', input_map_values)
739
+ assert_success(status, stderr, "map_values with group")
740
+ assert_equal(['{"a":[1,2,3],"b":[10,20,30]}'], lines(stdout), "map_values with group output")
741
+
581
742
  stdout, stderr, status = run_jrf('map_values { |v| sum(_["a"] + v) }', input_map_values)
582
743
  assert_success(status, stderr, "map_values keeps ambient _")
583
744
  assert_equal(['{"a":12,"b":66}'], lines(stdout), "map_values ambient _ output")
584
745
 
746
+ stdout, stderr, status = run_jrf('map_values { |v| reduce(0) { |acc, x| acc + x } }', input_map_values)
747
+ assert_success(status, stderr, "map_values with reduce")
748
+ assert_equal(['{"a":6,"b":60}'], lines(stdout), "map_values with reduce output")
749
+
585
750
  stdout, stderr, status = run_jrf('select(false) >> map { |x| sum(x) }', input_map)
586
751
  assert_success(status, stderr, "map no matches")
587
752
  assert_equal([], lines(stdout), "map no matches output")
@@ -599,10 +764,18 @@ stdout, stderr, status = run_jrf('_["values"] >> map { |x| x + 1 }', input_map)
599
764
  assert_success(status, stderr, "map transform")
600
765
  assert_equal(['[2,11,101]', '[3,21,201]', '[4,31,301]'], lines(stdout), "map transform output")
601
766
 
767
+ stdout, stderr, status = run_jrf('_["values"] >> map { |x| select(x >= 20) }', input_map)
768
+ assert_success(status, stderr, "map transform with select")
769
+ assert_equal(['[100]', '[20,200]', '[30,300]'], lines(stdout), "map transform with select output")
770
+
602
771
  stdout, stderr, status = run_jrf('map_values { |v| v * 2 }', input_map_values)
603
772
  assert_success(status, stderr, "map_values transform")
604
773
  assert_equal(['{"a":2,"b":20}', '{"a":4,"b":40}', '{"a":6,"b":60}'], lines(stdout), "map_values transform output")
605
774
 
775
+ stdout, stderr, status = run_jrf('map_values { |v| select(v >= 10) }', input_map_values)
776
+ assert_success(status, stderr, "map_values transform with select")
777
+ assert_equal(['{"b":10}', '{"b":20}', '{"b":30}'], lines(stdout), "map_values transform with select output")
778
+
606
779
  stdout, stderr, status = run_jrf('_["values"] >> map { |x| x + 1 } >> map { |x| x * 10 }', input_map)
607
780
  assert_success(status, stderr, "chained map transforms")
608
781
  assert_equal(['[20,110,1010]', '[30,210,2010]', '[40,310,3010]'], lines(stdout), "chained map transforms output")
@@ -639,6 +812,12 @@ stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| group(row["path"
639
812
  assert_success(status, stderr, "group_by with group(expr)")
640
813
  assert_equal(['{"200":["/a","/c","/d"],"404":["/b"]}'], lines(stdout), "group_by with group(expr) output")
641
814
 
815
+ stdout, stderr, status = run_jrf('group_by(_["status"]) { group }', input_gb)
816
+ assert_success(status, stderr, "group_by with implicit group")
817
+ result = JSON.parse(lines(stdout).first)
818
+ assert_equal(3, result["200"].length, "group_by implicit group 200 count")
819
+ assert_equal("/a", result["200"][0]["path"], "group_by implicit group first row")
820
+
642
821
  stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| min(row["latency"]) }', input_gb)
643
822
  assert_success(status, stderr, "group_by with min")
644
823
  assert_equal(['{"200":10,"404":50}'], lines(stdout), "group_by with min output")
@@ -647,6 +826,10 @@ stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| {total: sum(row[
647
826
  assert_success(status, stderr, "group_by with multi-reducer")
648
827
  assert_equal(['{"200":{"total":60,"n":3},"404":{"total":50,"n":1}}'], lines(stdout), "group_by multi-reducer output")
649
828
 
829
+ stdout, stderr, status = run_jrf('group_by(_["status"]) { reduce(0) { |acc, row| acc + row["latency"] } }', input_gb)
830
+ assert_success(status, stderr, "group_by with reduce")
831
+ assert_equal(['{"200":60,"404":50}'], lines(stdout), "group_by with reduce output")
832
+
650
833
  stdout, stderr, status = run_jrf('select(false) >> group_by(_["status"]) { count() }', input_gb)
651
834
  assert_success(status, stderr, "group_by no matches")
652
835
  assert_equal([], lines(stdout), "group_by no matches output")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jrf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - kazuho
@@ -40,12 +40,12 @@ files:
40
40
  - jrf.gemspec
41
41
  - lib/jrf.rb
42
42
  - lib/jrf/cli.rb
43
+ - lib/jrf/cli/runner.rb
43
44
  - lib/jrf/control.rb
44
45
  - lib/jrf/pipeline.rb
45
46
  - lib/jrf/pipeline_parser.rb
46
47
  - lib/jrf/reducers.rb
47
48
  - lib/jrf/row_context.rb
48
- - lib/jrf/runner.rb
49
49
  - lib/jrf/stage.rb
50
50
  - lib/jrf/version.rb
51
51
  - test/jrf_test.rb
data/lib/jrf/runner.rb DELETED
@@ -1,81 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "json"
4
- require_relative "pipeline"
5
- require_relative "pipeline_parser"
6
-
7
- module Jrf
8
- class Runner
9
- RS_CHAR = "\x1e"
10
-
11
- def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false, pretty: false)
12
- @input = input
13
- @out = out
14
- @err = err
15
- @lax = lax
16
- @pretty = pretty
17
- end
18
-
19
- def run(expression, verbose: false)
20
- parsed = PipelineParser.new(expression).parse
21
- stages = parsed[:stages]
22
- dump_stages(stages) if verbose
23
-
24
- blocks = stages.map { |stage|
25
- eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
26
- }
27
- pipeline = Pipeline.new(*blocks)
28
-
29
- input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
30
- pipeline.call(input_enum) do |value|
31
- @out.puts(@pretty ? JSON.pretty_generate(value) : JSON.generate(value))
32
- end
33
- end
34
-
35
- private
36
-
37
- def each_input_value
38
- return each_input_value_lax { |value| yield value } if @lax
39
-
40
- each_input_value_ndjson { |value| yield value }
41
- end
42
-
43
- def each_input_value_ndjson
44
- @input.each_line do |raw_line|
45
- line = raw_line.strip
46
- next if line.empty?
47
-
48
- yield JSON.parse(line)
49
- end
50
- end
51
-
52
- def each_input_value_lax
53
- require "oj"
54
- source = @input.read.to_s
55
- source = source.include?(RS_CHAR) ? source.tr(RS_CHAR, "\n") : source
56
- handler = Class.new(Oj::ScHandler) do
57
- def initialize(&emit)
58
- @emit = emit
59
- end
60
-
61
- def hash_start = {}
62
- def hash_key(key) = key
63
- def hash_set(hash, key, value) = hash[key] = value
64
- def array_start = []
65
- def array_append(array, value) = array << value
66
- def add_value(value) = @emit.call(value)
67
- end.new { |value| yield value }
68
- Oj.sc_parse(handler, source)
69
- rescue LoadError
70
- raise "oj is required for --lax mode (gem install oj)"
71
- rescue Oj::ParseError => e
72
- raise JSON::ParserError, e.message
73
- end
74
-
75
- def dump_stages(stages)
76
- stages.each_with_index do |stage, i|
77
- @err.puts "stage[#{i}]: #{stage[:src]}"
78
- end
79
- end
80
- end
81
- end