jrf 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 408c1f9706af5efaa1bf0125201d6647b4c108aa4aa28c99a93b59fb9cc94f02
4
- data.tar.gz: 702f2fb14dc9d498292b02c41f0cdb4a91c0fa3e093ad9a71435d9a2604532fa
3
+ metadata.gz: 7ac8b4b0fe2489c04dcba49752df7143f7e218de9f21b0496e2c3fdd2f732088
4
+ data.tar.gz: 2787cc4714d0e99909c4430fe23aca1fcaae1c25a079f15b2092861b53c4f5ea
5
5
  SHA512:
6
- metadata.gz: 80dfa6d2bb7c9304e779a3e80815efbde9c599d66665708738b833b08daa1918ae54bc5b170c8b90c60399fe18b0df06d576e2c8c3d8b76b74f9daa826efcfa8
7
- data.tar.gz: 597b715fd3ebd31a49cb2839f7dda814b845cd5aa87a3ac9a9cf551553792b453af749e287652553903de851ea7b06a9e5940abc7c25fccd319a9e7e72d75840
6
+ metadata.gz: 61f498f33e794258ebed00a468aa779ece52eff4c29d0538f7bc1601391d0a6948c32ed5dfbd76439e55a283ad4c59dc8312254711341dae2b7e79bf45b8a0a0
7
+ data.tar.gz: 92e1c46977cf3d841c8469fcf7e757cfcb4b6c60e800b063771bed3cc88eac7622e7d9a0c4aab906cefd60d046fe77a1f4e2f932d37687c952db3a598a0f3b1c
data/exe/jrf CHANGED
@@ -10,4 +10,4 @@ end
10
10
  $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
11
11
  require "jrf"
12
12
 
13
- exit Jrf::CLI.run(ARGV)
13
+ Jrf::CLI.run(ARGV)
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require_relative "../pipeline"
5
+ require_relative "../pipeline_parser"
6
+
7
+ module Jrf
8
+ class CLI
9
+ class Runner
10
+ RS_CHAR = "\x1e"
11
+ DEFAULT_OUTPUT_BUFFER_LIMIT = 4096
12
+
13
+ class RsNormalizer
14
+ def initialize(input)
15
+ @input = input
16
+ end
17
+
18
+ def read(length = nil, outbuf = nil)
19
+ chunk = @input.read(length)
20
+ return nil if chunk.nil?
21
+
22
+ chunk = chunk.tr(RS_CHAR, "\n")
23
+ if outbuf
24
+ outbuf.replace(chunk)
25
+ else
26
+ chunk
27
+ end
28
+ end
29
+ end
30
+
31
+ def initialize(inputs:, out: $stdout, err: $stderr, lax: false, pretty: false, atomic_write_bytes: DEFAULT_OUTPUT_BUFFER_LIMIT)
32
+ @inputs = inputs
33
+ @out = out
34
+ @err = err
35
+ @lax = lax
36
+ @pretty = pretty
37
+ @atomic_write_bytes = atomic_write_bytes
38
+ @output_buffer = +""
39
+ end
40
+
41
+ def run(expression, verbose: false)
42
+ parsed = PipelineParser.new(expression).parse
43
+ stages = parsed[:stages]
44
+ dump_stages(stages) if verbose
45
+
46
+ blocks = stages.map { |stage|
47
+ eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
48
+ }
49
+ pipeline = Pipeline.new(*blocks)
50
+
51
+ input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
52
+ pipeline.call(input_enum) do |value|
53
+ emit_output(value)
54
+ end
55
+ ensure
56
+ write_output(@output_buffer)
57
+ end
58
+
59
+ private
60
+
61
+ def each_input_value
62
+ return each_input_value_lax { |value| yield value } if @lax
63
+
64
+ each_input_value_ndjson { |value| yield value }
65
+ end
66
+
67
+ def each_input_value_ndjson
68
+ each_input do |source|
69
+ source.each_line do |raw_line|
70
+ line = raw_line.strip
71
+ next if line.empty?
72
+
73
+ yield JSON.parse(line)
74
+ end
75
+ end
76
+ end
77
+
78
+ def each_input_value_lax
79
+ require "oj"
80
+ handler = Class.new(Oj::ScHandler) do
81
+ def initialize(&emit)
82
+ @emit = emit
83
+ end
84
+
85
+ def hash_start = {}
86
+ def hash_key(key) = key
87
+ def hash_set(hash, key, value) = hash[key] = value
88
+ def array_start = []
89
+ def array_append(array, value) = array << value
90
+ def add_value(value) = @emit.call(value)
91
+ end
92
+ each_input do |source|
93
+ Oj.sc_parse(handler.new { |value| yield value }, RsNormalizer.new(source))
94
+ end
95
+ rescue LoadError
96
+ raise "oj is required for --lax mode (gem install oj)"
97
+ rescue Oj::ParseError => e
98
+ raise JSON::ParserError, e.message
99
+ end
100
+
101
+ def dump_stages(stages)
102
+ stages.each_with_index do |stage, i|
103
+ @err.puts "stage[#{i}]: #{stage[:src]}"
104
+ end
105
+ end
106
+
107
+ def each_input
108
+ @inputs.each { |source| yield source }
109
+ end
110
+
111
+ def emit_output(value)
112
+ record = (@pretty ? JSON.pretty_generate(value) : JSON.generate(value)) << "\n"
113
+ if @output_buffer.bytesize + record.bytesize <= @atomic_write_bytes
114
+ @output_buffer << record
115
+ else
116
+ write_output(@output_buffer)
117
+ @output_buffer = record
118
+ end
119
+ end
120
+
121
+ def write_output(str)
122
+ @out.syswrite(str)
123
+ end
124
+ end
125
+ end
126
+ end
data/lib/jrf/cli.rb CHANGED
@@ -1,13 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "runner"
3
+ require "optparse"
4
+
5
+ require_relative "cli/runner"
6
+ require_relative "version"
4
7
 
5
8
  module Jrf
6
9
  class CLI
7
- USAGE = "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'"
8
-
10
+ USAGE = "usage: jrf [options] 'STAGE >> STAGE >> ...'"
9
11
  HELP_TEXT = <<~'TEXT'
10
- usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'
12
+ usage: jrf [options] 'STAGE >> STAGE >> ...'
11
13
 
12
14
  JSON filter with the power and speed of Ruby.
13
15
 
@@ -15,6 +17,10 @@ module Jrf
15
17
  -v, --verbose print parsed stage expressions
16
18
  --lax allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
17
19
  -p, --pretty pretty-print JSON output instead of compact NDJSON
20
+ --no-jit do not enable YJIT, even when supported by the Ruby runtime
21
+ --atomic-write-bytes N
22
+ group short outputs into atomic writes of up to N bytes
23
+ -V, --version show version and exit
18
24
  -h, --help show this help and exit
19
25
 
20
26
  Pipeline:
@@ -36,36 +42,81 @@ module Jrf
36
42
  verbose = false
37
43
  lax = false
38
44
  pretty = false
39
-
40
- while argv.first&.start_with?("-")
41
- case argv.first
42
- when "-v", "--verbose"
43
- verbose = true
44
- argv.shift
45
- when "--lax"
46
- lax = true
47
- argv.shift
48
- when "-p", "--pretty"
49
- pretty = true
50
- argv.shift
51
- when "-h", "--help"
52
- out.puts HELP_TEXT
53
- return 0
54
- else
55
- err.puts "unknown option: #{argv.first}"
56
- err.puts USAGE
57
- return 1
45
+ jit = true
46
+ atomic_write_bytes = Runner::DEFAULT_OUTPUT_BUFFER_LIMIT
47
+ begin
48
+ parser = OptionParser.new do |opts|
49
+ opts.banner = USAGE
50
+ opts.on("-v", "--verbose", "print parsed stage expressions") { verbose = true }
51
+ opts.on("--lax", "allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)") { lax = true }
52
+ opts.on("-p", "--pretty", "pretty-print JSON output instead of compact NDJSON") { pretty = true }
53
+ opts.on("--no-jit", "do not enable YJIT, even when supported by the Ruby runtime") { jit = false }
54
+ opts.on("--atomic-write-bytes N", Integer, "group short outputs into atomic writes of up to N bytes") do |value|
55
+ if value.positive?
56
+ atomic_write_bytes = value
57
+ else
58
+ raise OptionParser::InvalidArgument, "--atomic-write-bytes requires a positive integer"
59
+ end
60
+ end
61
+ opts.on("-V", "--version", "show version and exit") do
62
+ out.puts Jrf::VERSION
63
+ exit
64
+ end
65
+ opts.on("-h", "--help", "show this help and exit") do
66
+ out.puts HELP_TEXT
67
+ exit
68
+ end
58
69
  end
70
+
71
+ parser.order!(argv)
72
+ rescue OptionParser::ParseError => e
73
+ err.puts e.message
74
+ err.puts USAGE
75
+ exit 1
59
76
  end
60
77
 
61
78
  if argv.empty?
62
79
  err.puts USAGE
63
- return 1
80
+ exit 1
64
81
  end
65
82
 
66
83
  expression = argv.shift
67
- Runner.new(input: input, out: out, err: err, lax: lax, pretty: pretty).run(expression, verbose: verbose)
68
- 0
84
+ enable_yjit if jit
85
+
86
+ inputs = Enumerator.new do |y|
87
+ if argv.empty?
88
+ y << input
89
+ else
90
+ argv.each do |path|
91
+ if path == "-"
92
+ y << input
93
+ elsif path.end_with?(".gz")
94
+ require "zlib"
95
+ Zlib::GzipReader.open(path) do |source|
96
+ y << source
97
+ end
98
+ else
99
+ File.open(path, "rb") do |source|
100
+ y << source
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+ Runner.new(
107
+ inputs: inputs,
108
+ out: out,
109
+ err: err,
110
+ lax: lax,
111
+ pretty: pretty,
112
+ atomic_write_bytes: atomic_write_bytes
113
+ ).run(expression, verbose: verbose)
114
+ end
115
+
116
+ def self.enable_yjit
117
+ return unless defined?(RubyVM::YJIT) && RubyVM::YJIT.respond_to?(:enable)
118
+
119
+ RubyVM::YJIT.enable
69
120
  end
70
121
  end
71
122
  end
data/lib/jrf/pipeline.rb CHANGED
@@ -22,54 +22,43 @@ module Jrf
22
22
  # @yieldparam value output value
23
23
  # @return [Array, nil] output values (without block), or nil (with block)
24
24
  def call(input, &on_output)
25
- if on_output
26
- call_streaming(input, &on_output)
27
- else
25
+ if on_output.nil?
28
26
  results = []
29
- call_streaming(input) { |v| results << v }
30
- results
27
+ on_output = proc { |value| results << value }
31
28
  end
32
- end
33
-
34
- private
35
29
 
36
- def call_streaming(input, &on_output)
37
- error = nil
38
30
  begin
39
31
  input.each { |value| process_value(value, @stages, &on_output) }
40
- rescue StandardError => e
41
- error = e
42
32
  ensure
43
33
  flush_reducers(@stages, &on_output)
44
34
  end
45
- raise error if error
35
+
36
+ results unless results.nil?
46
37
  end
47
38
 
48
- def process_value(input, stages, &on_output)
49
- current_values = [input]
39
+ private
50
40
 
51
- stages.each do |stage|
52
- next_values = []
41
+ def process_value(value, stages, idx = 0, &on_output)
42
+ while idx < stages.length
43
+ value = stages[idx].call(value)
53
44
 
54
- current_values.each do |value|
55
- out = stage.call(value)
56
- if out.equal?(Control::DROPPED)
57
- next
58
- elsif out.is_a?(Control::Flat)
59
- unless out.value.is_a?(Array)
60
- raise TypeError, "flat expects Array, got #{out.value.class}"
61
- end
62
- next_values.concat(out.value)
63
- else
64
- next_values << out
45
+ if value.equal?(Control::DROPPED)
46
+ return
47
+ elsif value.is_a?(Control::Flat)
48
+ value = value.value
49
+ unless value.is_a?(Array)
50
+ raise TypeError, "flat expects Array, got #{value.class}"
51
+ end
52
+ value.each do |child|
53
+ process_value(child, stages, idx + 1, &on_output)
65
54
  end
55
+ return
66
56
  end
67
57
 
68
- return if next_values.empty?
69
- current_values = next_values
58
+ idx += 1
70
59
  end
71
60
 
72
- current_values.each(&on_output)
61
+ on_output.call(value)
73
62
  end
74
63
 
75
64
  def flush_reducers(stages, &on_output)
@@ -13,7 +13,7 @@ module Jrf
13
13
  def define_reducer(name, &definition)
14
14
  define_method(name) do |*args, **kwargs, &block|
15
15
  spec = definition.call(self, *args, **kwargs, block: block)
16
- @__jrf_current_stage.allocate_reducer(
16
+ @__jrf_current_stage.step_reduce(
17
17
  spec.fetch(:value),
18
18
  initial: reducer_initial_value(spec.fetch(:initial)),
19
19
  finish: spec[:finish],
@@ -161,24 +161,24 @@ module Jrf
161
161
  def reduce(initial, &block)
162
162
  raise ArgumentError, "reduce requires a block" unless block
163
163
 
164
- @__jrf_current_stage.allocate_reducer(current_input, initial: initial, &block)
164
+ @__jrf_current_stage.step_reduce(current_input, initial: initial, &block)
165
165
  end
166
166
 
167
167
  def map(&block)
168
168
  raise ArgumentError, "map requires a block" unless block
169
169
 
170
- @__jrf_current_stage.allocate_map(:array, @obj, &block)
170
+ @__jrf_current_stage.step_map(:map, @obj, &block)
171
171
  end
172
172
 
173
173
  def map_values(&block)
174
174
  raise ArgumentError, "map_values requires a block" unless block
175
175
 
176
- @__jrf_current_stage.allocate_map(:hash, @obj, &block)
176
+ @__jrf_current_stage.step_map(:map_values, @obj, &block)
177
177
  end
178
178
 
179
179
  def group_by(key, &block)
180
180
  block ||= proc { group }
181
- @__jrf_current_stage.allocate_group_by(key, &block)
181
+ @__jrf_current_stage.step_group_by(key, &block)
182
182
  end
183
183
 
184
184
  private
data/lib/jrf/stage.rb CHANGED
@@ -39,39 +39,52 @@ module Jrf
39
39
  @ctx.__jrf_current_stage = self
40
40
  result = @ctx.instance_eval(&@block)
41
41
 
42
- if @mode.nil? && @reducers.any?
43
- @mode = :reducer
44
- @template = result
45
- elsif @mode.nil?
46
- @mode = :passthrough
42
+ if @mode.nil?
43
+ if @reducers.any?
44
+ @mode = :reducer
45
+ @template = result
46
+ else
47
+ @mode = :passthrough
48
+ end
47
49
  end
48
50
 
49
51
  (@mode == :reducer) ? Control::DROPPED : result
50
52
  end
51
53
 
52
- def allocate_reducer(value, initial:, finish: nil, &step_fn)
54
+ def step_reduce(value, initial:, finish: nil, &step_fn)
53
55
  idx = @cursor
54
- finish_rows = finish || ->(acc) { [acc] }
55
- @reducers[idx] ||= Reducers.reduce(initial, finish: finish_rows, &step_fn)
56
+
57
+ if @reducers[idx].nil?
58
+ finish_rows = finish || ->(acc) { [acc] }
59
+ @reducers[idx] = Reducers.reduce(initial, finish: finish_rows, &step_fn)
60
+ result = ReducerToken.new(idx)
61
+ else
62
+ result = Control::DROPPED
63
+ end
64
+
56
65
  @reducers[idx].step(value)
57
- @cursor += 1
58
- ReducerToken.new(idx)
66
+ @cursor = idx + 1
67
+ result
59
68
  end
60
69
 
61
- def allocate_map(type, collection, &block)
70
+ def step_map(builtin, collection, &block)
62
71
  idx = @cursor
63
72
  @cursor += 1
64
73
 
74
+ if collection.is_a?(Array)
75
+ raise TypeError, "map_values expects Hash, got Array" if builtin == :map_values
76
+ elsif !collection.is_a?(Hash)
77
+ raise TypeError, "#{builtin} expects #{builtin == :map_values ? "Hash" : "Array or Hash"}, got #{collection.class}"
78
+ end
79
+
65
80
  # Transformation mode (detected on first call)
66
81
  if @map_transforms[idx]
67
- return transform_collection(type, collection, &block)
82
+ return transform_collection(builtin, collection, &block)
68
83
  end
69
84
 
70
- map_reducer = (@reducers[idx] ||= MapReducer.new(type))
85
+ map_reducer = (@reducers[idx] ||= MapReducer.new(builtin, collection.is_a?(Array)))
71
86
 
72
- case type
73
- when :array
74
- raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
87
+ if collection.is_a?(Array)
75
88
  collection.each_with_index do |v, i|
76
89
  slot = map_reducer.slot(i)
77
90
  with_scoped_reducers(slot.reducers) do
@@ -79,12 +92,11 @@ module Jrf
79
92
  slot.template ||= result
80
93
  end
81
94
  end
82
- when :hash
83
- raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
95
+ else
84
96
  collection.each do |k, v|
85
97
  slot = map_reducer.slot(k)
86
98
  with_scoped_reducers(slot.reducers) do
87
- result = @ctx.send(:__jrf_with_current_input, v) { block.call(v) }
99
+ result = @ctx.send(:__jrf_with_current_input, v) { invoke_block(builtin, block, k, v) }
88
100
  slot.template ||= result
89
101
  end
90
102
  end
@@ -94,15 +106,15 @@ module Jrf
94
106
  if @mode.nil? && map_reducer.slots.values.all? { |s| s.reducers.empty? }
95
107
  @map_transforms[idx] = true
96
108
  @reducers[idx] = nil
97
- return transformed_slots(type, map_reducer)
109
+ return transformed_slots(builtin, map_reducer)
98
110
  end
99
111
 
100
112
  ReducerToken.new(idx)
101
113
  end
102
114
 
103
- def allocate_group_by(key, &block)
115
+ def step_group_by(key, &block)
104
116
  idx = @cursor
105
- map_reducer = (@reducers[idx] ||= MapReducer.new(:hash))
117
+ map_reducer = (@reducers[idx] ||= MapReducer.new(:group_by, false))
106
118
 
107
119
  row = @ctx._
108
120
  slot = map_reducer.slot(key)
@@ -138,55 +150,82 @@ module Jrf
138
150
  @cursor = saved_cursor
139
151
  end
140
152
 
141
- def transform_collection(type, collection, &block)
142
- case type
143
- when :array
144
- raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
153
+ def invoke_block(builtin, block, key, value)
154
+ case builtin
155
+ when :map then block.call([key, value])
156
+ when :map_values then block.call(value)
157
+ else raise ArgumentError, "unexpected builtin: #{builtin}"
158
+ end
159
+ end
145
160
 
161
+ def transform_collection(builtin, collection, &block)
162
+ if collection.is_a?(Array)
146
163
  collection.each_with_object([]) do |value, result|
147
164
  mapped = @ctx.send(:__jrf_with_current_input, value) { block.call(value) }
148
- append_map_result(result, mapped)
165
+ append_result(result, mapped, builtin)
149
166
  end
150
- when :hash
151
- raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
152
-
153
- collection.each_with_object({}) do |(key, value), result|
154
- mapped = @ctx.send(:__jrf_with_current_input, value) { block.call(value) }
155
- next if mapped.equal?(Control::DROPPED)
156
- raise TypeError, "flat is not supported inside map_values" if mapped.is_a?(Control::Flat)
167
+ else
168
+ case builtin
169
+ when :map
170
+ collection.each_with_object([]) do |(key, value), result|
171
+ mapped = @ctx.send(:__jrf_with_current_input, value) { invoke_block(builtin, block, key, value) }
172
+ append_result(result, mapped, builtin)
173
+ end
174
+ when :map_values
175
+ collection.each_with_object({}) do |(key, value), result|
176
+ mapped = @ctx.send(:__jrf_with_current_input, value) { invoke_block(builtin, block, key, value) }
177
+ next if mapped.equal?(Control::DROPPED)
178
+ raise TypeError, "flat is not supported inside map_values" if mapped.is_a?(Control::Flat)
157
179
 
158
- result[key] = mapped
180
+ result[key] = mapped
181
+ end
182
+ else
183
+ raise ArgumentError, "unexpected builtin: #{builtin}"
159
184
  end
160
185
  end
161
186
  end
162
187
 
163
- def transformed_slots(type, map_reducer)
164
- case type
165
- when :array
188
+ def transformed_slots(builtin, map_reducer)
189
+ if map_reducer.array_input?
166
190
  map_reducer.slots
167
191
  .sort_by { |k, _| k }
168
192
  .each_with_object([]) do |(_, slot), result|
169
- append_map_result(result, slot.template)
193
+ append_result(result, slot.template, builtin)
194
+ end
195
+ else
196
+ case builtin
197
+ when :map
198
+ map_reducer.slots.each_with_object([]) do |(_key, slot), result|
199
+ append_result(result, slot.template, builtin)
170
200
  end
171
- when :hash
172
- map_reducer.slots.each_with_object({}) do |(key, slot), result|
173
- next if slot.template.equal?(Control::DROPPED)
174
- raise TypeError, "flat is not supported inside map_values" if slot.template.is_a?(Control::Flat)
201
+ when :map_values
202
+ map_reducer.slots.each_with_object({}) do |(key, slot), result|
203
+ next if slot.template.equal?(Control::DROPPED)
204
+ raise TypeError, "flat is not supported inside map_values" if slot.template.is_a?(Control::Flat)
175
205
 
176
- result[key] = slot.template
206
+ result[key] = slot.template
207
+ end
208
+ else
209
+ raise ArgumentError, "unexpected builtin: #{builtin}"
177
210
  end
178
211
  end
179
212
  end
180
213
 
181
- def append_map_result(result, mapped)
214
+ def append_result(result, mapped, builtin)
182
215
  return if mapped.equal?(Control::DROPPED)
183
216
 
184
217
  if mapped.is_a?(Control::Flat)
185
- unless mapped.value.is_a?(Array)
186
- raise TypeError, "flat expects Array, got #{mapped.value.class}"
218
+ case builtin
219
+ when :map
220
+ unless mapped.value.is_a?(Array)
221
+ raise TypeError, "flat expects Array, got #{mapped.value.class}"
222
+ end
223
+ result.concat(mapped.value)
224
+ when :map_values
225
+ raise TypeError, "flat is not supported inside map_values"
226
+ else
227
+ raise ArgumentError, "unexpected builtin: #{builtin}"
187
228
  end
188
-
189
- result.concat(mapped.value)
190
229
  else
191
230
  result << mapped
192
231
  end
@@ -195,24 +234,35 @@ module Jrf
195
234
  class MapReducer
196
235
  attr_reader :slots
197
236
 
198
- def initialize(type)
199
- @type = type
237
+ def initialize(builtin, array_input)
238
+ @builtin = builtin
239
+ @array_input = array_input
200
240
  @slots = {}
201
241
  end
202
242
 
243
+ def array_input?
244
+ @array_input
245
+ end
246
+
203
247
  def slot(key)
204
248
  @slots[key] ||= SlotState.new
205
249
  end
206
250
 
207
251
  def finish
208
- case @type
209
- when :array
252
+ if @array_input
210
253
  keys = @slots.keys.sort
211
254
  [keys.map { |k| Stage.resolve_template(@slots[k].template, @slots[k].reducers) }]
212
- when :hash
213
- result = {}
214
- @slots.each { |k, s| result[k] = Stage.resolve_template(s.template, s.reducers) }
215
- [result]
255
+ else
256
+ case @builtin
257
+ when :map
258
+ [@slots.map { |_k, s| Stage.resolve_template(s.template, s.reducers) }]
259
+ when :map_values, :group_by
260
+ result = {}
261
+ @slots.each { |k, s| result[k] = Stage.resolve_template(s.template, s.reducers) }
262
+ [result]
263
+ else
264
+ raise ArgumentError, "unexpected builtin: #{@builtin}"
265
+ end
216
266
  end
217
267
  end
218
268
 
data/lib/jrf/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Jrf
4
- VERSION = "0.1.5"
4
+ VERSION = "0.1.7"
5
5
  end
data/test/jrf_test.rb CHANGED
@@ -1,7 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ begin
4
+ require "bundler/setup"
5
+ rescue LoadError
6
+ # Allow running tests in plain Ruby environments with globally installed gems.
7
+ end
8
+
3
9
  require "json"
4
10
  require "open3"
11
+ require "stringio"
12
+ require "tmpdir"
13
+ require "zlib"
14
+ require_relative "../lib/jrf/cli/runner"
5
15
 
6
16
  def run_jrf(expr, input, *opts)
7
17
  Open3.capture3("./exe/jrf", *opts, expr, stdin_data: input)
@@ -41,6 +51,45 @@ def lines(str)
41
51
  str.lines.map(&:strip).reject(&:empty?)
42
52
  end
43
53
 
54
+ class RecordingRunner < Jrf::CLI::Runner
55
+ attr_reader :writes
56
+
57
+ def initialize(**kwargs)
58
+ super
59
+ @writes = []
60
+ end
61
+
62
+ private
63
+
64
+ def write_output(str)
65
+ return if str.empty?
66
+
67
+ @writes << str
68
+ end
69
+ end
70
+
71
+ class ChunkedSource
72
+ def initialize(str, chunk_size: 5)
73
+ @str = str
74
+ @chunk_size = chunk_size
75
+ @offset = 0
76
+ end
77
+
78
+ def read(length = nil, outbuf = nil)
79
+ raise "expected chunked reads" if length.nil?
80
+
81
+ chunk = @str.byteslice(@offset, [length, @chunk_size].min)
82
+ return nil unless chunk
83
+
84
+ @offset += chunk.bytesize
85
+ if outbuf
86
+ outbuf.replace(chunk)
87
+ else
88
+ chunk
89
+ end
90
+ end
91
+ end
92
+
44
93
  File.chmod(0o755, "./exe/jrf")
45
94
 
46
95
  input = <<~NDJSON
@@ -92,10 +141,14 @@ assert_includes(stderr, 'stage[1]: _["hello"]')
92
141
 
93
142
  stdout, stderr, status = Open3.capture3("./exe/jrf", "--help")
94
143
  assert_success(status, stderr, "help option")
95
- assert_includes(stdout, "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'")
144
+ assert_includes(stdout, "usage: jrf [options] 'STAGE >> STAGE >> ...'")
96
145
  assert_includes(stdout, "JSON filter with the power and speed of Ruby.")
97
146
  assert_includes(stdout, "--lax")
98
147
  assert_includes(stdout, "--pretty")
148
+ assert_includes(stdout, "--no-jit")
149
+ assert_includes(stdout, "-V")
150
+ assert_includes(stdout, "--version")
151
+ assert_includes(stdout, "--atomic-write-bytes N")
99
152
  assert_includes(stdout, "Pipeline:")
100
153
  assert_includes(stdout, "Connect stages with top-level >>.")
101
154
  assert_includes(stdout, "The current value in each stage is available as _.")
@@ -103,11 +156,94 @@ assert_includes(stdout, "See Also:")
103
156
  assert_includes(stdout, "https://github.com/kazuho/jrf#readme")
104
157
  assert_equal([], lines(stderr), "help stderr output")
105
158
 
159
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "--version")
160
+ assert_success(status, stderr, "version long option")
161
+ assert_equal([Jrf::VERSION], lines(stdout), "version long option output")
162
+ assert_equal([], lines(stderr), "version long option stderr")
163
+
164
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-V")
165
+ assert_success(status, stderr, "version short option")
166
+ assert_equal([Jrf::VERSION], lines(stdout), "version short option output")
167
+ assert_equal([], lines(stderr), "version short option stderr")
168
+
169
+ threshold_input = StringIO.new((1..4).map { |i| "{\"foo\":\"#{'x' * 1020}\",\"i\":#{i}}\n" }.join)
170
+ buffered_runner = RecordingRunner.new(inputs: [threshold_input], out: StringIO.new, err: StringIO.new)
171
+ buffered_runner.run('_')
172
+ expected_line = JSON.generate({"foo" => "x" * 1020, "i" => 1}) + "\n"
173
+ assert_equal(2, buffered_runner.writes.length, "default atomic write limit buffers records until the configured threshold")
174
+ assert_equal(expected_line.bytesize * 3, buffered_runner.writes.first.bytesize, "default atomic write limit flushes before the next record would exceed the threshold")
175
+ assert_equal(expected_line.bytesize, buffered_runner.writes.last.bytesize, "final buffer flush emits the remaining record")
176
+
177
+ small_limit_runner = RecordingRunner.new(inputs: [StringIO.new("{\"foo\":1}\n{\"foo\":2}\n")], out: StringIO.new, err: StringIO.new, atomic_write_bytes: 1)
178
+ small_limit_runner.run('_["foo"]')
179
+ assert_equal(["1\n", "2\n"], small_limit_runner.writes, "small atomic write limit emits oversized records directly")
180
+
181
+ error_runner = RecordingRunner.new(inputs: [StringIO.new("{\"foo\":1}\n{\"foo\":")], out: StringIO.new, err: StringIO.new)
182
+ begin
183
+ error_runner.run('_["foo"]')
184
+ raise "expected parse error for buffered flush test"
185
+ rescue JSON::ParserError
186
+ assert_equal(["1\n"], error_runner.writes, "buffer flushes pending output before parse errors escape")
187
+ end
188
+
106
189
  stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "--verbose")
107
190
  assert_success(status, stderr, "dump stages verbose alias")
108
191
  assert_equal(%w[123], lines(stdout), "dump stages verbose alias output")
109
192
  assert_includes(stderr, 'stage[0]: select(_["hello"] == 123)')
110
193
 
194
+ stdout, stderr, status = run_jrf('_["hello"]', input_hello, "--atomic-write-bytes", "512")
195
+ assert_success(status, stderr, "atomic write bytes option")
196
+ assert_equal(%w[123 456], lines(stdout), "atomic write bytes option output")
197
+
198
+ stdout, stderr, status = run_jrf('_["hello"]', input_hello, "--atomic-write-bytes=512")
199
+ assert_success(status, stderr, "atomic write bytes equals form")
200
+ assert_equal(%w[123 456], lines(stdout), "atomic write bytes equals form output")
201
+
202
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "--atomic-write-bytes", "0", '_["hello"]', stdin_data: input_hello)
203
+ assert_failure(status, "atomic write bytes rejects zero")
204
+ assert_includes(stderr, "--atomic-write-bytes requires a positive integer")
205
+
206
+ if defined?(RubyVM::YJIT) && RubyVM::YJIT.respond_to?(:enabled?)
207
+ yjit_probe = "{\"probe\":1}\n"
208
+
209
+ stdout, stderr, status = run_jrf('RubyVM::YJIT.enabled?', yjit_probe)
210
+ assert_success(status, stderr, "default jit enablement")
211
+ assert_equal(%w[true], lines(stdout), "default jit enablement output")
212
+
213
+ stdout, stderr, status = run_jrf('RubyVM::YJIT.enabled?', yjit_probe, "--no-jit")
214
+ assert_success(status, stderr, "no-jit option")
215
+ assert_equal(%w[false], lines(stdout), "no-jit option output")
216
+ end
217
+
218
+ Dir.mktmpdir do |dir|
219
+ gz_path = File.join(dir, "input.ndjson.gz")
220
+ Zlib::GzipWriter.open(gz_path) do |io|
221
+ io.write("{\"foo\":10}\n{\"foo\":20}\n")
222
+ end
223
+
224
+ stdout, stderr, status = Open3.capture3("./exe/jrf", '_["foo"]', gz_path)
225
+ assert_success(status, stderr, "compressed input by suffix")
226
+ assert_equal(%w[10 20], lines(stdout), "compressed input output")
227
+
228
+ lax_gz_path = File.join(dir, "input-lax.json.gz")
229
+ Zlib::GzipWriter.open(lax_gz_path) do |io|
230
+ io.write("{\"foo\":30}\n\x1e{\"foo\":40}\n")
231
+ end
232
+
233
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "--lax", '_["foo"]', lax_gz_path)
234
+ assert_success(status, stderr, "compressed lax input by suffix")
235
+ assert_equal(%w[30 40], lines(stdout), "compressed lax input output")
236
+
237
+ second_gz_path = File.join(dir, "input2.ndjson.gz")
238
+ Zlib::GzipWriter.open(second_gz_path) do |io|
239
+ io.write("{\"foo\":50}\n")
240
+ end
241
+
242
+ stdout, stderr, status = Open3.capture3("./exe/jrf", '_["foo"]', gz_path, second_gz_path)
243
+ assert_success(status, stderr, "multiple compressed inputs by suffix")
244
+ assert_equal(%w[10 20 50], lines(stdout), "multiple compressed input output")
245
+ end
246
+
111
247
  stdout, stderr, status = run_jrf('_', input_hello, "--pretty")
112
248
  assert_success(status, stderr, "pretty output")
113
249
  assert_equal(
@@ -493,6 +629,26 @@ stdout, stderr, status = run_jrf('_["foo"]', input_lax_trailing_rs, "--lax")
493
629
  assert_success(status, stderr, "lax ignores trailing separator")
494
630
  assert_equal(%w[9], lines(stdout), "lax trailing separator output")
495
631
 
632
+ chunked_lax_out = RecordingRunner.new(
633
+ inputs: [ChunkedSource.new("{\"foo\":1}\n\x1e{\"foo\":2}\n\t{\"foo\":3}\n")],
634
+ out: StringIO.new,
635
+ err: StringIO.new,
636
+ lax: true
637
+ )
638
+ chunked_lax_out.run('_["foo"]')
639
+ assert_equal(%w[1 2 3], lines(chunked_lax_out.writes.join), "lax mode streams chunked input without whole-input reads")
640
+
641
+ Dir.mktmpdir do |dir|
642
+ one = File.join(dir, "one.json")
643
+ two = File.join(dir, "two.json")
644
+ File.write(one, "1")
645
+ File.write(two, "2")
646
+
647
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "--lax", "_", one, two)
648
+ assert_success(status, stderr, "lax keeps file boundaries")
649
+ assert_equal(%w[1 2], lines(stdout), "lax does not merge JSON across file boundaries")
650
+ end
651
+
496
652
  stdout, stderr, status = run_jrf('select(_["x"] > ) >> _["foo"]', "")
497
653
  assert_failure(status, "syntax error should fail before row loop")
498
654
  assert_includes(stderr, "syntax error")
@@ -616,6 +772,26 @@ stdout, stderr, status = run_jrf('map_values { |v| reduce(0) { |acc, x| acc + x
616
772
  assert_success(status, stderr, "map_values with reduce")
617
773
  assert_equal(['{"a":6,"b":60}'], lines(stdout), "map_values with reduce output")
618
774
 
775
+ stdout, stderr, status = run_jrf('map { |k, v| "#{k}:#{v}" }', input_map_values)
776
+ assert_success(status, stderr, "map over hash transform")
777
+ assert_equal(['["a:1","b:10"]', '["a:2","b:20"]', '["a:3","b:30"]'], lines(stdout), "map over hash transform output")
778
+
779
+ stdout, stderr, status = run_jrf('map { |pair| pair }', input_map_values)
780
+ assert_success(status, stderr, "map over hash single block arg")
781
+ assert_equal(['[["a",1],["b",10]]', '[["a",2],["b",20]]', '[["a",3],["b",30]]'], lines(stdout), "map over hash single block arg output")
782
+
783
+ stdout, stderr, status = run_jrf('map { |k, v| select(v >= 10 && k != "a") }', input_map_values)
784
+ assert_success(status, stderr, "map over hash transform with select")
785
+ assert_equal(['[10]', '[20]', '[30]'], lines(stdout), "map over hash transform with select output")
786
+
787
+ stdout, stderr, status = run_jrf('map { |k, v| sum(v + k.length) }', input_map_values)
788
+ assert_success(status, stderr, "map over hash with sum")
789
+ assert_equal(['[9,63]'], lines(stdout), "map over hash with sum output")
790
+
791
+ stdout, stderr, status = run_jrf('map { |k, v| sum(_["a"] + v + k.length) }', input_map_values)
792
+ assert_success(status, stderr, "map over hash keeps ambient _")
793
+ assert_equal(['[15,69]'], lines(stdout), "map over hash ambient _ output")
794
+
619
795
  stdout, stderr, status = run_jrf('select(false) >> map { |x| sum(x) }', input_map)
620
796
  assert_success(status, stderr, "map no matches")
621
797
  assert_equal([], lines(stdout), "map no matches output")
@@ -750,6 +926,18 @@ assert_equal([[4, 6]], j.call([[1, 2], [3, 4]]), "library map reduce")
750
926
  j = Jrf.new(proc { map_values { |v| v * 10 } })
751
927
  assert_equal([{"a" => 10, "b" => 20}], j.call([{"a" => 1, "b" => 2}]), "library map_values transform")
752
928
 
929
+ # map hash transform
930
+ j = Jrf.new(proc { map { |k, v| "#{k}=#{v}" } })
931
+ assert_equal([["a=1", "b=2"]], j.call([{"a" => 1, "b" => 2}]), "library map hash transform")
932
+
933
+ # map hash single block arg
934
+ j = Jrf.new(proc { map { |pair| pair } })
935
+ assert_equal([[["a", 1], ["b", 2]]], j.call([{"a" => 1, "b" => 2}]), "library map hash single block arg")
936
+
937
+ # map hash reduce
938
+ j = Jrf.new(proc { map { |k, v| sum(v + k.length) } })
939
+ assert_equal([[5, 7]], j.call([{"a" => 1, "b" => 2}, {"a" => 2, "b" => 3}]), "library map hash reduce")
940
+
753
941
  # group_by
754
942
  j = Jrf.new(proc { group_by(_["k"]) { count() } })
755
943
  assert_equal([{"x" => 2, "y" => 1}], j.call([{"k" => "x"}, {"k" => "x"}, {"k" => "y"}]), "library group_by")
@@ -770,4 +958,13 @@ assert_equal([{"a" => 3}], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "librar
770
958
  j = Jrf.new(proc { sum(_) })
771
959
  assert_equal([], j.call([]), "library empty input")
772
960
 
961
+ ctx = Jrf::RowContext.new
962
+ stage = Jrf::Stage.new(ctx, proc { })
963
+ first_token = stage.step_reduce(1, initial: 0) { |acc, v| acc + v }
964
+ assert_equal(0, first_token.index, "step_reduce returns token while classifying reducer stage")
965
+ stage.instance_variable_set(:@mode, :reducer)
966
+ stage.instance_variable_set(:@cursor, 0)
967
+ second_token = stage.step_reduce(2, initial: 0) { |acc, v| acc + v }
968
+ raise "expected DROPPED for established reducer slot" unless second_token.equal?(Jrf::Control::DROPPED)
969
+
773
970
  puts "ok"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jrf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - kazuho
@@ -40,12 +40,12 @@ files:
40
40
  - jrf.gemspec
41
41
  - lib/jrf.rb
42
42
  - lib/jrf/cli.rb
43
+ - lib/jrf/cli/runner.rb
43
44
  - lib/jrf/control.rb
44
45
  - lib/jrf/pipeline.rb
45
46
  - lib/jrf/pipeline_parser.rb
46
47
  - lib/jrf/reducers.rb
47
48
  - lib/jrf/row_context.rb
48
- - lib/jrf/runner.rb
49
49
  - lib/jrf/stage.rb
50
50
  - lib/jrf/version.rb
51
51
  - test/jrf_test.rb
data/lib/jrf/runner.rb DELETED
@@ -1,81 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "json"
4
- require_relative "pipeline"
5
- require_relative "pipeline_parser"
6
-
7
- module Jrf
8
- class Runner
9
- RS_CHAR = "\x1e"
10
-
11
- def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false, pretty: false)
12
- @input = input
13
- @out = out
14
- @err = err
15
- @lax = lax
16
- @pretty = pretty
17
- end
18
-
19
- def run(expression, verbose: false)
20
- parsed = PipelineParser.new(expression).parse
21
- stages = parsed[:stages]
22
- dump_stages(stages) if verbose
23
-
24
- blocks = stages.map { |stage|
25
- eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
26
- }
27
- pipeline = Pipeline.new(*blocks)
28
-
29
- input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
30
- pipeline.call(input_enum) do |value|
31
- @out.puts(@pretty ? JSON.pretty_generate(value) : JSON.generate(value))
32
- end
33
- end
34
-
35
- private
36
-
37
- def each_input_value
38
- return each_input_value_lax { |value| yield value } if @lax
39
-
40
- each_input_value_ndjson { |value| yield value }
41
- end
42
-
43
- def each_input_value_ndjson
44
- @input.each_line do |raw_line|
45
- line = raw_line.strip
46
- next if line.empty?
47
-
48
- yield JSON.parse(line)
49
- end
50
- end
51
-
52
- def each_input_value_lax
53
- require "oj"
54
- source = @input.read.to_s
55
- source = source.include?(RS_CHAR) ? source.tr(RS_CHAR, "\n") : source
56
- handler = Class.new(Oj::ScHandler) do
57
- def initialize(&emit)
58
- @emit = emit
59
- end
60
-
61
- def hash_start = {}
62
- def hash_key(key) = key
63
- def hash_set(hash, key, value) = hash[key] = value
64
- def array_start = []
65
- def array_append(array, value) = array << value
66
- def add_value(value) = @emit.call(value)
67
- end.new { |value| yield value }
68
- Oj.sc_parse(handler, source)
69
- rescue LoadError
70
- raise "oj is required for --lax mode (gem install oj)"
71
- rescue Oj::ParseError => e
72
- raise JSON::ParserError, e.message
73
- end
74
-
75
- def dump_stages(stages)
76
- stages.each_with_index do |stage, i|
77
- @err.puts "stage[#{i}]: #{stage[:src]}"
78
- end
79
- end
80
- end
81
- end