jrf 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/jrf +1 -1
- data/lib/jrf/cli/runner.rb +126 -0
- data/lib/jrf/cli.rb +77 -26
- data/lib/jrf/pipeline.rb +20 -31
- data/lib/jrf/row_context.rb +5 -5
- data/lib/jrf/stage.rb +107 -57
- data/lib/jrf/version.rb +1 -1
- data/test/jrf_test.rb +198 -1
- metadata +2 -2
- data/lib/jrf/runner.rb +0 -81
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7ac8b4b0fe2489c04dcba49752df7143f7e218de9f21b0496e2c3fdd2f732088
|
|
4
|
+
data.tar.gz: 2787cc4714d0e99909c4430fe23aca1fcaae1c25a079f15b2092861b53c4f5ea
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 61f498f33e794258ebed00a468aa779ece52eff4c29d0538f7bc1601391d0a6948c32ed5dfbd76439e55a283ad4c59dc8312254711341dae2b7e79bf45b8a0a0
|
|
7
|
+
data.tar.gz: 92e1c46977cf3d841c8469fcf7e757cfcb4b6c60e800b063771bed3cc88eac7622e7d9a0c4aab906cefd60d046fe77a1f4e2f932d37687c952db3a598a0f3b1c
|
data/exe/jrf
CHANGED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require_relative "../pipeline"
|
|
5
|
+
require_relative "../pipeline_parser"
|
|
6
|
+
|
|
7
|
+
module Jrf
|
|
8
|
+
class CLI
|
|
9
|
+
class Runner
|
|
10
|
+
RS_CHAR = "\x1e"
|
|
11
|
+
DEFAULT_OUTPUT_BUFFER_LIMIT = 4096
|
|
12
|
+
|
|
13
|
+
class RsNormalizer
|
|
14
|
+
def initialize(input)
|
|
15
|
+
@input = input
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def read(length = nil, outbuf = nil)
|
|
19
|
+
chunk = @input.read(length)
|
|
20
|
+
return nil if chunk.nil?
|
|
21
|
+
|
|
22
|
+
chunk = chunk.tr(RS_CHAR, "\n")
|
|
23
|
+
if outbuf
|
|
24
|
+
outbuf.replace(chunk)
|
|
25
|
+
else
|
|
26
|
+
chunk
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def initialize(inputs:, out: $stdout, err: $stderr, lax: false, pretty: false, atomic_write_bytes: DEFAULT_OUTPUT_BUFFER_LIMIT)
|
|
32
|
+
@inputs = inputs
|
|
33
|
+
@out = out
|
|
34
|
+
@err = err
|
|
35
|
+
@lax = lax
|
|
36
|
+
@pretty = pretty
|
|
37
|
+
@atomic_write_bytes = atomic_write_bytes
|
|
38
|
+
@output_buffer = +""
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def run(expression, verbose: false)
|
|
42
|
+
parsed = PipelineParser.new(expression).parse
|
|
43
|
+
stages = parsed[:stages]
|
|
44
|
+
dump_stages(stages) if verbose
|
|
45
|
+
|
|
46
|
+
blocks = stages.map { |stage|
|
|
47
|
+
eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
|
|
48
|
+
}
|
|
49
|
+
pipeline = Pipeline.new(*blocks)
|
|
50
|
+
|
|
51
|
+
input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
|
|
52
|
+
pipeline.call(input_enum) do |value|
|
|
53
|
+
emit_output(value)
|
|
54
|
+
end
|
|
55
|
+
ensure
|
|
56
|
+
write_output(@output_buffer)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
def each_input_value
|
|
62
|
+
return each_input_value_lax { |value| yield value } if @lax
|
|
63
|
+
|
|
64
|
+
each_input_value_ndjson { |value| yield value }
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def each_input_value_ndjson
|
|
68
|
+
each_input do |source|
|
|
69
|
+
source.each_line do |raw_line|
|
|
70
|
+
line = raw_line.strip
|
|
71
|
+
next if line.empty?
|
|
72
|
+
|
|
73
|
+
yield JSON.parse(line)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def each_input_value_lax
|
|
79
|
+
require "oj"
|
|
80
|
+
handler = Class.new(Oj::ScHandler) do
|
|
81
|
+
def initialize(&emit)
|
|
82
|
+
@emit = emit
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def hash_start = {}
|
|
86
|
+
def hash_key(key) = key
|
|
87
|
+
def hash_set(hash, key, value) = hash[key] = value
|
|
88
|
+
def array_start = []
|
|
89
|
+
def array_append(array, value) = array << value
|
|
90
|
+
def add_value(value) = @emit.call(value)
|
|
91
|
+
end
|
|
92
|
+
each_input do |source|
|
|
93
|
+
Oj.sc_parse(handler.new { |value| yield value }, RsNormalizer.new(source))
|
|
94
|
+
end
|
|
95
|
+
rescue LoadError
|
|
96
|
+
raise "oj is required for --lax mode (gem install oj)"
|
|
97
|
+
rescue Oj::ParseError => e
|
|
98
|
+
raise JSON::ParserError, e.message
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def dump_stages(stages)
|
|
102
|
+
stages.each_with_index do |stage, i|
|
|
103
|
+
@err.puts "stage[#{i}]: #{stage[:src]}"
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def each_input
|
|
108
|
+
@inputs.each { |source| yield source }
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def emit_output(value)
|
|
112
|
+
record = (@pretty ? JSON.pretty_generate(value) : JSON.generate(value)) << "\n"
|
|
113
|
+
if @output_buffer.bytesize + record.bytesize <= @atomic_write_bytes
|
|
114
|
+
@output_buffer << record
|
|
115
|
+
else
|
|
116
|
+
write_output(@output_buffer)
|
|
117
|
+
@output_buffer = record
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def write_output(str)
|
|
122
|
+
@out.syswrite(str)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
data/lib/jrf/cli.rb
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
require "optparse"
|
|
4
|
+
|
|
5
|
+
require_relative "cli/runner"
|
|
6
|
+
require_relative "version"
|
|
4
7
|
|
|
5
8
|
module Jrf
|
|
6
9
|
class CLI
|
|
7
|
-
USAGE = "usage: jrf [
|
|
8
|
-
|
|
10
|
+
USAGE = "usage: jrf [options] 'STAGE >> STAGE >> ...'"
|
|
9
11
|
HELP_TEXT = <<~'TEXT'
|
|
10
|
-
usage: jrf [
|
|
12
|
+
usage: jrf [options] 'STAGE >> STAGE >> ...'
|
|
11
13
|
|
|
12
14
|
JSON filter with the power and speed of Ruby.
|
|
13
15
|
|
|
@@ -15,6 +17,10 @@ module Jrf
|
|
|
15
17
|
-v, --verbose print parsed stage expressions
|
|
16
18
|
--lax allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
|
|
17
19
|
-p, --pretty pretty-print JSON output instead of compact NDJSON
|
|
20
|
+
--no-jit do not enable YJIT, even when supported by the Ruby runtime
|
|
21
|
+
--atomic-write-bytes N
|
|
22
|
+
group short outputs into atomic writes of up to N bytes
|
|
23
|
+
-V, --version show version and exit
|
|
18
24
|
-h, --help show this help and exit
|
|
19
25
|
|
|
20
26
|
Pipeline:
|
|
@@ -36,36 +42,81 @@ module Jrf
|
|
|
36
42
|
verbose = false
|
|
37
43
|
lax = false
|
|
38
44
|
pretty = false
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
45
|
+
jit = true
|
|
46
|
+
atomic_write_bytes = Runner::DEFAULT_OUTPUT_BUFFER_LIMIT
|
|
47
|
+
begin
|
|
48
|
+
parser = OptionParser.new do |opts|
|
|
49
|
+
opts.banner = USAGE
|
|
50
|
+
opts.on("-v", "--verbose", "print parsed stage expressions") { verbose = true }
|
|
51
|
+
opts.on("--lax", "allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)") { lax = true }
|
|
52
|
+
opts.on("-p", "--pretty", "pretty-print JSON output instead of compact NDJSON") { pretty = true }
|
|
53
|
+
opts.on("--no-jit", "do not enable YJIT, even when supported by the Ruby runtime") { jit = false }
|
|
54
|
+
opts.on("--atomic-write-bytes N", Integer, "group short outputs into atomic writes of up to N bytes") do |value|
|
|
55
|
+
if value.positive?
|
|
56
|
+
atomic_write_bytes = value
|
|
57
|
+
else
|
|
58
|
+
raise OptionParser::InvalidArgument, "--atomic-write-bytes requires a positive integer"
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
opts.on("-V", "--version", "show version and exit") do
|
|
62
|
+
out.puts Jrf::VERSION
|
|
63
|
+
exit
|
|
64
|
+
end
|
|
65
|
+
opts.on("-h", "--help", "show this help and exit") do
|
|
66
|
+
out.puts HELP_TEXT
|
|
67
|
+
exit
|
|
68
|
+
end
|
|
58
69
|
end
|
|
70
|
+
|
|
71
|
+
parser.order!(argv)
|
|
72
|
+
rescue OptionParser::ParseError => e
|
|
73
|
+
err.puts e.message
|
|
74
|
+
err.puts USAGE
|
|
75
|
+
exit 1
|
|
59
76
|
end
|
|
60
77
|
|
|
61
78
|
if argv.empty?
|
|
62
79
|
err.puts USAGE
|
|
63
|
-
|
|
80
|
+
exit 1
|
|
64
81
|
end
|
|
65
82
|
|
|
66
83
|
expression = argv.shift
|
|
67
|
-
|
|
68
|
-
|
|
84
|
+
enable_yjit if jit
|
|
85
|
+
|
|
86
|
+
inputs = Enumerator.new do |y|
|
|
87
|
+
if argv.empty?
|
|
88
|
+
y << input
|
|
89
|
+
else
|
|
90
|
+
argv.each do |path|
|
|
91
|
+
if path == "-"
|
|
92
|
+
y << input
|
|
93
|
+
elsif path.end_with?(".gz")
|
|
94
|
+
require "zlib"
|
|
95
|
+
Zlib::GzipReader.open(path) do |source|
|
|
96
|
+
y << source
|
|
97
|
+
end
|
|
98
|
+
else
|
|
99
|
+
File.open(path, "rb") do |source|
|
|
100
|
+
y << source
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
Runner.new(
|
|
107
|
+
inputs: inputs,
|
|
108
|
+
out: out,
|
|
109
|
+
err: err,
|
|
110
|
+
lax: lax,
|
|
111
|
+
pretty: pretty,
|
|
112
|
+
atomic_write_bytes: atomic_write_bytes
|
|
113
|
+
).run(expression, verbose: verbose)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def self.enable_yjit
|
|
117
|
+
return unless defined?(RubyVM::YJIT) && RubyVM::YJIT.respond_to?(:enable)
|
|
118
|
+
|
|
119
|
+
RubyVM::YJIT.enable
|
|
69
120
|
end
|
|
70
121
|
end
|
|
71
122
|
end
|
data/lib/jrf/pipeline.rb
CHANGED
|
@@ -22,54 +22,43 @@ module Jrf
|
|
|
22
22
|
# @yieldparam value output value
|
|
23
23
|
# @return [Array, nil] output values (without block), or nil (with block)
|
|
24
24
|
def call(input, &on_output)
|
|
25
|
-
if on_output
|
|
26
|
-
call_streaming(input, &on_output)
|
|
27
|
-
else
|
|
25
|
+
if on_output.nil?
|
|
28
26
|
results = []
|
|
29
|
-
|
|
30
|
-
results
|
|
27
|
+
on_output = proc { |value| results << value }
|
|
31
28
|
end
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
private
|
|
35
29
|
|
|
36
|
-
def call_streaming(input, &on_output)
|
|
37
|
-
error = nil
|
|
38
30
|
begin
|
|
39
31
|
input.each { |value| process_value(value, @stages, &on_output) }
|
|
40
|
-
rescue StandardError => e
|
|
41
|
-
error = e
|
|
42
32
|
ensure
|
|
43
33
|
flush_reducers(@stages, &on_output)
|
|
44
34
|
end
|
|
45
|
-
|
|
35
|
+
|
|
36
|
+
results unless results.nil?
|
|
46
37
|
end
|
|
47
38
|
|
|
48
|
-
|
|
49
|
-
current_values = [input]
|
|
39
|
+
private
|
|
50
40
|
|
|
51
|
-
|
|
52
|
-
|
|
41
|
+
def process_value(value, stages, idx = 0, &on_output)
|
|
42
|
+
while idx < stages.length
|
|
43
|
+
value = stages[idx].call(value)
|
|
53
44
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
else
|
|
64
|
-
next_values << out
|
|
45
|
+
if value.equal?(Control::DROPPED)
|
|
46
|
+
return
|
|
47
|
+
elsif value.is_a?(Control::Flat)
|
|
48
|
+
value = value.value
|
|
49
|
+
unless value.is_a?(Array)
|
|
50
|
+
raise TypeError, "flat expects Array, got #{value.class}"
|
|
51
|
+
end
|
|
52
|
+
value.each do |child|
|
|
53
|
+
process_value(child, stages, idx + 1, &on_output)
|
|
65
54
|
end
|
|
55
|
+
return
|
|
66
56
|
end
|
|
67
57
|
|
|
68
|
-
|
|
69
|
-
current_values = next_values
|
|
58
|
+
idx += 1
|
|
70
59
|
end
|
|
71
60
|
|
|
72
|
-
|
|
61
|
+
on_output.call(value)
|
|
73
62
|
end
|
|
74
63
|
|
|
75
64
|
def flush_reducers(stages, &on_output)
|
data/lib/jrf/row_context.rb
CHANGED
|
@@ -13,7 +13,7 @@ module Jrf
|
|
|
13
13
|
def define_reducer(name, &definition)
|
|
14
14
|
define_method(name) do |*args, **kwargs, &block|
|
|
15
15
|
spec = definition.call(self, *args, **kwargs, block: block)
|
|
16
|
-
@__jrf_current_stage.
|
|
16
|
+
@__jrf_current_stage.step_reduce(
|
|
17
17
|
spec.fetch(:value),
|
|
18
18
|
initial: reducer_initial_value(spec.fetch(:initial)),
|
|
19
19
|
finish: spec[:finish],
|
|
@@ -161,24 +161,24 @@ module Jrf
|
|
|
161
161
|
def reduce(initial, &block)
|
|
162
162
|
raise ArgumentError, "reduce requires a block" unless block
|
|
163
163
|
|
|
164
|
-
@__jrf_current_stage.
|
|
164
|
+
@__jrf_current_stage.step_reduce(current_input, initial: initial, &block)
|
|
165
165
|
end
|
|
166
166
|
|
|
167
167
|
def map(&block)
|
|
168
168
|
raise ArgumentError, "map requires a block" unless block
|
|
169
169
|
|
|
170
|
-
@__jrf_current_stage.
|
|
170
|
+
@__jrf_current_stage.step_map(:map, @obj, &block)
|
|
171
171
|
end
|
|
172
172
|
|
|
173
173
|
def map_values(&block)
|
|
174
174
|
raise ArgumentError, "map_values requires a block" unless block
|
|
175
175
|
|
|
176
|
-
@__jrf_current_stage.
|
|
176
|
+
@__jrf_current_stage.step_map(:map_values, @obj, &block)
|
|
177
177
|
end
|
|
178
178
|
|
|
179
179
|
def group_by(key, &block)
|
|
180
180
|
block ||= proc { group }
|
|
181
|
-
@__jrf_current_stage.
|
|
181
|
+
@__jrf_current_stage.step_group_by(key, &block)
|
|
182
182
|
end
|
|
183
183
|
|
|
184
184
|
private
|
data/lib/jrf/stage.rb
CHANGED
|
@@ -39,39 +39,52 @@ module Jrf
|
|
|
39
39
|
@ctx.__jrf_current_stage = self
|
|
40
40
|
result = @ctx.instance_eval(&@block)
|
|
41
41
|
|
|
42
|
-
if @mode.nil?
|
|
43
|
-
@
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
42
|
+
if @mode.nil?
|
|
43
|
+
if @reducers.any?
|
|
44
|
+
@mode = :reducer
|
|
45
|
+
@template = result
|
|
46
|
+
else
|
|
47
|
+
@mode = :passthrough
|
|
48
|
+
end
|
|
47
49
|
end
|
|
48
50
|
|
|
49
51
|
(@mode == :reducer) ? Control::DROPPED : result
|
|
50
52
|
end
|
|
51
53
|
|
|
52
|
-
def
|
|
54
|
+
def step_reduce(value, initial:, finish: nil, &step_fn)
|
|
53
55
|
idx = @cursor
|
|
54
|
-
|
|
55
|
-
@reducers[idx]
|
|
56
|
+
|
|
57
|
+
if @reducers[idx].nil?
|
|
58
|
+
finish_rows = finish || ->(acc) { [acc] }
|
|
59
|
+
@reducers[idx] = Reducers.reduce(initial, finish: finish_rows, &step_fn)
|
|
60
|
+
result = ReducerToken.new(idx)
|
|
61
|
+
else
|
|
62
|
+
result = Control::DROPPED
|
|
63
|
+
end
|
|
64
|
+
|
|
56
65
|
@reducers[idx].step(value)
|
|
57
|
-
@cursor
|
|
58
|
-
|
|
66
|
+
@cursor = idx + 1
|
|
67
|
+
result
|
|
59
68
|
end
|
|
60
69
|
|
|
61
|
-
def
|
|
70
|
+
def step_map(builtin, collection, &block)
|
|
62
71
|
idx = @cursor
|
|
63
72
|
@cursor += 1
|
|
64
73
|
|
|
74
|
+
if collection.is_a?(Array)
|
|
75
|
+
raise TypeError, "map_values expects Hash, got Array" if builtin == :map_values
|
|
76
|
+
elsif !collection.is_a?(Hash)
|
|
77
|
+
raise TypeError, "#{builtin} expects #{builtin == :map_values ? "Hash" : "Array or Hash"}, got #{collection.class}"
|
|
78
|
+
end
|
|
79
|
+
|
|
65
80
|
# Transformation mode (detected on first call)
|
|
66
81
|
if @map_transforms[idx]
|
|
67
|
-
return transform_collection(
|
|
82
|
+
return transform_collection(builtin, collection, &block)
|
|
68
83
|
end
|
|
69
84
|
|
|
70
|
-
map_reducer = (@reducers[idx] ||= MapReducer.new(
|
|
85
|
+
map_reducer = (@reducers[idx] ||= MapReducer.new(builtin, collection.is_a?(Array)))
|
|
71
86
|
|
|
72
|
-
|
|
73
|
-
when :array
|
|
74
|
-
raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
|
|
87
|
+
if collection.is_a?(Array)
|
|
75
88
|
collection.each_with_index do |v, i|
|
|
76
89
|
slot = map_reducer.slot(i)
|
|
77
90
|
with_scoped_reducers(slot.reducers) do
|
|
@@ -79,12 +92,11 @@ module Jrf
|
|
|
79
92
|
slot.template ||= result
|
|
80
93
|
end
|
|
81
94
|
end
|
|
82
|
-
|
|
83
|
-
raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
|
|
95
|
+
else
|
|
84
96
|
collection.each do |k, v|
|
|
85
97
|
slot = map_reducer.slot(k)
|
|
86
98
|
with_scoped_reducers(slot.reducers) do
|
|
87
|
-
result = @ctx.send(:__jrf_with_current_input, v) { block
|
|
99
|
+
result = @ctx.send(:__jrf_with_current_input, v) { invoke_block(builtin, block, k, v) }
|
|
88
100
|
slot.template ||= result
|
|
89
101
|
end
|
|
90
102
|
end
|
|
@@ -94,15 +106,15 @@ module Jrf
|
|
|
94
106
|
if @mode.nil? && map_reducer.slots.values.all? { |s| s.reducers.empty? }
|
|
95
107
|
@map_transforms[idx] = true
|
|
96
108
|
@reducers[idx] = nil
|
|
97
|
-
return transformed_slots(
|
|
109
|
+
return transformed_slots(builtin, map_reducer)
|
|
98
110
|
end
|
|
99
111
|
|
|
100
112
|
ReducerToken.new(idx)
|
|
101
113
|
end
|
|
102
114
|
|
|
103
|
-
def
|
|
115
|
+
def step_group_by(key, &block)
|
|
104
116
|
idx = @cursor
|
|
105
|
-
map_reducer = (@reducers[idx] ||= MapReducer.new(:
|
|
117
|
+
map_reducer = (@reducers[idx] ||= MapReducer.new(:group_by, false))
|
|
106
118
|
|
|
107
119
|
row = @ctx._
|
|
108
120
|
slot = map_reducer.slot(key)
|
|
@@ -138,55 +150,82 @@ module Jrf
|
|
|
138
150
|
@cursor = saved_cursor
|
|
139
151
|
end
|
|
140
152
|
|
|
141
|
-
def
|
|
142
|
-
case
|
|
143
|
-
when :
|
|
144
|
-
|
|
153
|
+
def invoke_block(builtin, block, key, value)
|
|
154
|
+
case builtin
|
|
155
|
+
when :map then block.call([key, value])
|
|
156
|
+
when :map_values then block.call(value)
|
|
157
|
+
else raise ArgumentError, "unexpected builtin: #{builtin}"
|
|
158
|
+
end
|
|
159
|
+
end
|
|
145
160
|
|
|
161
|
+
def transform_collection(builtin, collection, &block)
|
|
162
|
+
if collection.is_a?(Array)
|
|
146
163
|
collection.each_with_object([]) do |value, result|
|
|
147
164
|
mapped = @ctx.send(:__jrf_with_current_input, value) { block.call(value) }
|
|
148
|
-
|
|
165
|
+
append_result(result, mapped, builtin)
|
|
149
166
|
end
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
167
|
+
else
|
|
168
|
+
case builtin
|
|
169
|
+
when :map
|
|
170
|
+
collection.each_with_object([]) do |(key, value), result|
|
|
171
|
+
mapped = @ctx.send(:__jrf_with_current_input, value) { invoke_block(builtin, block, key, value) }
|
|
172
|
+
append_result(result, mapped, builtin)
|
|
173
|
+
end
|
|
174
|
+
when :map_values
|
|
175
|
+
collection.each_with_object({}) do |(key, value), result|
|
|
176
|
+
mapped = @ctx.send(:__jrf_with_current_input, value) { invoke_block(builtin, block, key, value) }
|
|
177
|
+
next if mapped.equal?(Control::DROPPED)
|
|
178
|
+
raise TypeError, "flat is not supported inside map_values" if mapped.is_a?(Control::Flat)
|
|
157
179
|
|
|
158
|
-
|
|
180
|
+
result[key] = mapped
|
|
181
|
+
end
|
|
182
|
+
else
|
|
183
|
+
raise ArgumentError, "unexpected builtin: #{builtin}"
|
|
159
184
|
end
|
|
160
185
|
end
|
|
161
186
|
end
|
|
162
187
|
|
|
163
|
-
def transformed_slots(
|
|
164
|
-
|
|
165
|
-
when :array
|
|
188
|
+
def transformed_slots(builtin, map_reducer)
|
|
189
|
+
if map_reducer.array_input?
|
|
166
190
|
map_reducer.slots
|
|
167
191
|
.sort_by { |k, _| k }
|
|
168
192
|
.each_with_object([]) do |(_, slot), result|
|
|
169
|
-
|
|
193
|
+
append_result(result, slot.template, builtin)
|
|
194
|
+
end
|
|
195
|
+
else
|
|
196
|
+
case builtin
|
|
197
|
+
when :map
|
|
198
|
+
map_reducer.slots.each_with_object([]) do |(_key, slot), result|
|
|
199
|
+
append_result(result, slot.template, builtin)
|
|
170
200
|
end
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
201
|
+
when :map_values
|
|
202
|
+
map_reducer.slots.each_with_object({}) do |(key, slot), result|
|
|
203
|
+
next if slot.template.equal?(Control::DROPPED)
|
|
204
|
+
raise TypeError, "flat is not supported inside map_values" if slot.template.is_a?(Control::Flat)
|
|
175
205
|
|
|
176
|
-
|
|
206
|
+
result[key] = slot.template
|
|
207
|
+
end
|
|
208
|
+
else
|
|
209
|
+
raise ArgumentError, "unexpected builtin: #{builtin}"
|
|
177
210
|
end
|
|
178
211
|
end
|
|
179
212
|
end
|
|
180
213
|
|
|
181
|
-
def
|
|
214
|
+
def append_result(result, mapped, builtin)
|
|
182
215
|
return if mapped.equal?(Control::DROPPED)
|
|
183
216
|
|
|
184
217
|
if mapped.is_a?(Control::Flat)
|
|
185
|
-
|
|
186
|
-
|
|
218
|
+
case builtin
|
|
219
|
+
when :map
|
|
220
|
+
unless mapped.value.is_a?(Array)
|
|
221
|
+
raise TypeError, "flat expects Array, got #{mapped.value.class}"
|
|
222
|
+
end
|
|
223
|
+
result.concat(mapped.value)
|
|
224
|
+
when :map_values
|
|
225
|
+
raise TypeError, "flat is not supported inside map_values"
|
|
226
|
+
else
|
|
227
|
+
raise ArgumentError, "unexpected builtin: #{builtin}"
|
|
187
228
|
end
|
|
188
|
-
|
|
189
|
-
result.concat(mapped.value)
|
|
190
229
|
else
|
|
191
230
|
result << mapped
|
|
192
231
|
end
|
|
@@ -195,24 +234,35 @@ module Jrf
|
|
|
195
234
|
class MapReducer
|
|
196
235
|
attr_reader :slots
|
|
197
236
|
|
|
198
|
-
def initialize(
|
|
199
|
-
@
|
|
237
|
+
def initialize(builtin, array_input)
|
|
238
|
+
@builtin = builtin
|
|
239
|
+
@array_input = array_input
|
|
200
240
|
@slots = {}
|
|
201
241
|
end
|
|
202
242
|
|
|
243
|
+
def array_input?
|
|
244
|
+
@array_input
|
|
245
|
+
end
|
|
246
|
+
|
|
203
247
|
def slot(key)
|
|
204
248
|
@slots[key] ||= SlotState.new
|
|
205
249
|
end
|
|
206
250
|
|
|
207
251
|
def finish
|
|
208
|
-
|
|
209
|
-
when :array
|
|
252
|
+
if @array_input
|
|
210
253
|
keys = @slots.keys.sort
|
|
211
254
|
[keys.map { |k| Stage.resolve_template(@slots[k].template, @slots[k].reducers) }]
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
255
|
+
else
|
|
256
|
+
case @builtin
|
|
257
|
+
when :map
|
|
258
|
+
[@slots.map { |_k, s| Stage.resolve_template(s.template, s.reducers) }]
|
|
259
|
+
when :map_values, :group_by
|
|
260
|
+
result = {}
|
|
261
|
+
@slots.each { |k, s| result[k] = Stage.resolve_template(s.template, s.reducers) }
|
|
262
|
+
[result]
|
|
263
|
+
else
|
|
264
|
+
raise ArgumentError, "unexpected builtin: #{@builtin}"
|
|
265
|
+
end
|
|
216
266
|
end
|
|
217
267
|
end
|
|
218
268
|
|
data/lib/jrf/version.rb
CHANGED
data/test/jrf_test.rb
CHANGED
|
@@ -1,7 +1,17 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
begin
|
|
4
|
+
require "bundler/setup"
|
|
5
|
+
rescue LoadError
|
|
6
|
+
# Allow running tests in plain Ruby environments with globally installed gems.
|
|
7
|
+
end
|
|
8
|
+
|
|
3
9
|
require "json"
|
|
4
10
|
require "open3"
|
|
11
|
+
require "stringio"
|
|
12
|
+
require "tmpdir"
|
|
13
|
+
require "zlib"
|
|
14
|
+
require_relative "../lib/jrf/cli/runner"
|
|
5
15
|
|
|
6
16
|
def run_jrf(expr, input, *opts)
|
|
7
17
|
Open3.capture3("./exe/jrf", *opts, expr, stdin_data: input)
|
|
@@ -41,6 +51,45 @@ def lines(str)
|
|
|
41
51
|
str.lines.map(&:strip).reject(&:empty?)
|
|
42
52
|
end
|
|
43
53
|
|
|
54
|
+
class RecordingRunner < Jrf::CLI::Runner
|
|
55
|
+
attr_reader :writes
|
|
56
|
+
|
|
57
|
+
def initialize(**kwargs)
|
|
58
|
+
super
|
|
59
|
+
@writes = []
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def write_output(str)
|
|
65
|
+
return if str.empty?
|
|
66
|
+
|
|
67
|
+
@writes << str
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
class ChunkedSource
|
|
72
|
+
def initialize(str, chunk_size: 5)
|
|
73
|
+
@str = str
|
|
74
|
+
@chunk_size = chunk_size
|
|
75
|
+
@offset = 0
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def read(length = nil, outbuf = nil)
|
|
79
|
+
raise "expected chunked reads" if length.nil?
|
|
80
|
+
|
|
81
|
+
chunk = @str.byteslice(@offset, [length, @chunk_size].min)
|
|
82
|
+
return nil unless chunk
|
|
83
|
+
|
|
84
|
+
@offset += chunk.bytesize
|
|
85
|
+
if outbuf
|
|
86
|
+
outbuf.replace(chunk)
|
|
87
|
+
else
|
|
88
|
+
chunk
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
44
93
|
File.chmod(0o755, "./exe/jrf")
|
|
45
94
|
|
|
46
95
|
input = <<~NDJSON
|
|
@@ -92,10 +141,14 @@ assert_includes(stderr, 'stage[1]: _["hello"]')
|
|
|
92
141
|
|
|
93
142
|
stdout, stderr, status = Open3.capture3("./exe/jrf", "--help")
|
|
94
143
|
assert_success(status, stderr, "help option")
|
|
95
|
-
assert_includes(stdout, "usage: jrf [
|
|
144
|
+
assert_includes(stdout, "usage: jrf [options] 'STAGE >> STAGE >> ...'")
|
|
96
145
|
assert_includes(stdout, "JSON filter with the power and speed of Ruby.")
|
|
97
146
|
assert_includes(stdout, "--lax")
|
|
98
147
|
assert_includes(stdout, "--pretty")
|
|
148
|
+
assert_includes(stdout, "--no-jit")
|
|
149
|
+
assert_includes(stdout, "-V")
|
|
150
|
+
assert_includes(stdout, "--version")
|
|
151
|
+
assert_includes(stdout, "--atomic-write-bytes N")
|
|
99
152
|
assert_includes(stdout, "Pipeline:")
|
|
100
153
|
assert_includes(stdout, "Connect stages with top-level >>.")
|
|
101
154
|
assert_includes(stdout, "The current value in each stage is available as _.")
|
|
@@ -103,11 +156,94 @@ assert_includes(stdout, "See Also:")
|
|
|
103
156
|
assert_includes(stdout, "https://github.com/kazuho/jrf#readme")
|
|
104
157
|
assert_equal([], lines(stderr), "help stderr output")
|
|
105
158
|
|
|
159
|
+
stdout, stderr, status = Open3.capture3("./exe/jrf", "--version")
|
|
160
|
+
assert_success(status, stderr, "version long option")
|
|
161
|
+
assert_equal([Jrf::VERSION], lines(stdout), "version long option output")
|
|
162
|
+
assert_equal([], lines(stderr), "version long option stderr")
|
|
163
|
+
|
|
164
|
+
stdout, stderr, status = Open3.capture3("./exe/jrf", "-V")
|
|
165
|
+
assert_success(status, stderr, "version short option")
|
|
166
|
+
assert_equal([Jrf::VERSION], lines(stdout), "version short option output")
|
|
167
|
+
assert_equal([], lines(stderr), "version short option stderr")
|
|
168
|
+
|
|
169
|
+
threshold_input = StringIO.new((1..4).map { |i| "{\"foo\":\"#{'x' * 1020}\",\"i\":#{i}}\n" }.join)
|
|
170
|
+
buffered_runner = RecordingRunner.new(inputs: [threshold_input], out: StringIO.new, err: StringIO.new)
|
|
171
|
+
buffered_runner.run('_')
|
|
172
|
+
expected_line = JSON.generate({"foo" => "x" * 1020, "i" => 1}) + "\n"
|
|
173
|
+
assert_equal(2, buffered_runner.writes.length, "default atomic write limit buffers records until the configured threshold")
|
|
174
|
+
assert_equal(expected_line.bytesize * 3, buffered_runner.writes.first.bytesize, "default atomic write limit flushes before the next record would exceed the threshold")
|
|
175
|
+
assert_equal(expected_line.bytesize, buffered_runner.writes.last.bytesize, "final buffer flush emits the remaining record")
|
|
176
|
+
|
|
177
|
+
small_limit_runner = RecordingRunner.new(inputs: [StringIO.new("{\"foo\":1}\n{\"foo\":2}\n")], out: StringIO.new, err: StringIO.new, atomic_write_bytes: 1)
|
|
178
|
+
small_limit_runner.run('_["foo"]')
|
|
179
|
+
assert_equal(["1\n", "2\n"], small_limit_runner.writes, "small atomic write limit emits oversized records directly")
|
|
180
|
+
|
|
181
|
+
error_runner = RecordingRunner.new(inputs: [StringIO.new("{\"foo\":1}\n{\"foo\":")], out: StringIO.new, err: StringIO.new)
|
|
182
|
+
begin
|
|
183
|
+
error_runner.run('_["foo"]')
|
|
184
|
+
raise "expected parse error for buffered flush test"
|
|
185
|
+
rescue JSON::ParserError
|
|
186
|
+
assert_equal(["1\n"], error_runner.writes, "buffer flushes pending output before parse errors escape")
|
|
187
|
+
end
|
|
188
|
+
|
|
106
189
|
stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "--verbose")
|
|
107
190
|
assert_success(status, stderr, "dump stages verbose alias")
|
|
108
191
|
assert_equal(%w[123], lines(stdout), "dump stages verbose alias output")
|
|
109
192
|
assert_includes(stderr, 'stage[0]: select(_["hello"] == 123)')
|
|
110
193
|
|
|
194
|
+
stdout, stderr, status = run_jrf('_["hello"]', input_hello, "--atomic-write-bytes", "512")
|
|
195
|
+
assert_success(status, stderr, "atomic write bytes option")
|
|
196
|
+
assert_equal(%w[123 456], lines(stdout), "atomic write bytes option output")
|
|
197
|
+
|
|
198
|
+
stdout, stderr, status = run_jrf('_["hello"]', input_hello, "--atomic-write-bytes=512")
|
|
199
|
+
assert_success(status, stderr, "atomic write bytes equals form")
|
|
200
|
+
assert_equal(%w[123 456], lines(stdout), "atomic write bytes equals form output")
|
|
201
|
+
|
|
202
|
+
stdout, stderr, status = Open3.capture3("./exe/jrf", "--atomic-write-bytes", "0", '_["hello"]', stdin_data: input_hello)
|
|
203
|
+
assert_failure(status, "atomic write bytes rejects zero")
|
|
204
|
+
assert_includes(stderr, "--atomic-write-bytes requires a positive integer")
|
|
205
|
+
|
|
206
|
+
if defined?(RubyVM::YJIT) && RubyVM::YJIT.respond_to?(:enabled?)
|
|
207
|
+
yjit_probe = "{\"probe\":1}\n"
|
|
208
|
+
|
|
209
|
+
stdout, stderr, status = run_jrf('RubyVM::YJIT.enabled?', yjit_probe)
|
|
210
|
+
assert_success(status, stderr, "default jit enablement")
|
|
211
|
+
assert_equal(%w[true], lines(stdout), "default jit enablement output")
|
|
212
|
+
|
|
213
|
+
stdout, stderr, status = run_jrf('RubyVM::YJIT.enabled?', yjit_probe, "--no-jit")
|
|
214
|
+
assert_success(status, stderr, "no-jit option")
|
|
215
|
+
assert_equal(%w[false], lines(stdout), "no-jit option output")
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
Dir.mktmpdir do |dir|
|
|
219
|
+
gz_path = File.join(dir, "input.ndjson.gz")
|
|
220
|
+
Zlib::GzipWriter.open(gz_path) do |io|
|
|
221
|
+
io.write("{\"foo\":10}\n{\"foo\":20}\n")
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
stdout, stderr, status = Open3.capture3("./exe/jrf", '_["foo"]', gz_path)
|
|
225
|
+
assert_success(status, stderr, "compressed input by suffix")
|
|
226
|
+
assert_equal(%w[10 20], lines(stdout), "compressed input output")
|
|
227
|
+
|
|
228
|
+
lax_gz_path = File.join(dir, "input-lax.json.gz")
|
|
229
|
+
Zlib::GzipWriter.open(lax_gz_path) do |io|
|
|
230
|
+
io.write("{\"foo\":30}\n\x1e{\"foo\":40}\n")
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
stdout, stderr, status = Open3.capture3("./exe/jrf", "--lax", '_["foo"]', lax_gz_path)
|
|
234
|
+
assert_success(status, stderr, "compressed lax input by suffix")
|
|
235
|
+
assert_equal(%w[30 40], lines(stdout), "compressed lax input output")
|
|
236
|
+
|
|
237
|
+
second_gz_path = File.join(dir, "input2.ndjson.gz")
|
|
238
|
+
Zlib::GzipWriter.open(second_gz_path) do |io|
|
|
239
|
+
io.write("{\"foo\":50}\n")
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
stdout, stderr, status = Open3.capture3("./exe/jrf", '_["foo"]', gz_path, second_gz_path)
|
|
243
|
+
assert_success(status, stderr, "multiple compressed inputs by suffix")
|
|
244
|
+
assert_equal(%w[10 20 50], lines(stdout), "multiple compressed input output")
|
|
245
|
+
end
|
|
246
|
+
|
|
111
247
|
stdout, stderr, status = run_jrf('_', input_hello, "--pretty")
|
|
112
248
|
assert_success(status, stderr, "pretty output")
|
|
113
249
|
assert_equal(
|
|
@@ -493,6 +629,26 @@ stdout, stderr, status = run_jrf('_["foo"]', input_lax_trailing_rs, "--lax")
|
|
|
493
629
|
assert_success(status, stderr, "lax ignores trailing separator")
|
|
494
630
|
assert_equal(%w[9], lines(stdout), "lax trailing separator output")
|
|
495
631
|
|
|
632
|
+
chunked_lax_out = RecordingRunner.new(
|
|
633
|
+
inputs: [ChunkedSource.new("{\"foo\":1}\n\x1e{\"foo\":2}\n\t{\"foo\":3}\n")],
|
|
634
|
+
out: StringIO.new,
|
|
635
|
+
err: StringIO.new,
|
|
636
|
+
lax: true
|
|
637
|
+
)
|
|
638
|
+
chunked_lax_out.run('_["foo"]')
|
|
639
|
+
assert_equal(%w[1 2 3], lines(chunked_lax_out.writes.join), "lax mode streams chunked input without whole-input reads")
|
|
640
|
+
|
|
641
|
+
Dir.mktmpdir do |dir|
|
|
642
|
+
one = File.join(dir, "one.json")
|
|
643
|
+
two = File.join(dir, "two.json")
|
|
644
|
+
File.write(one, "1")
|
|
645
|
+
File.write(two, "2")
|
|
646
|
+
|
|
647
|
+
stdout, stderr, status = Open3.capture3("./exe/jrf", "--lax", "_", one, two)
|
|
648
|
+
assert_success(status, stderr, "lax keeps file boundaries")
|
|
649
|
+
assert_equal(%w[1 2], lines(stdout), "lax does not merge JSON across file boundaries")
|
|
650
|
+
end
|
|
651
|
+
|
|
496
652
|
stdout, stderr, status = run_jrf('select(_["x"] > ) >> _["foo"]', "")
|
|
497
653
|
assert_failure(status, "syntax error should fail before row loop")
|
|
498
654
|
assert_includes(stderr, "syntax error")
|
|
@@ -616,6 +772,26 @@ stdout, stderr, status = run_jrf('map_values { |v| reduce(0) { |acc, x| acc + x
|
|
|
616
772
|
assert_success(status, stderr, "map_values with reduce")
|
|
617
773
|
assert_equal(['{"a":6,"b":60}'], lines(stdout), "map_values with reduce output")
|
|
618
774
|
|
|
775
|
+
stdout, stderr, status = run_jrf('map { |k, v| "#{k}:#{v}" }', input_map_values)
|
|
776
|
+
assert_success(status, stderr, "map over hash transform")
|
|
777
|
+
assert_equal(['["a:1","b:10"]', '["a:2","b:20"]', '["a:3","b:30"]'], lines(stdout), "map over hash transform output")
|
|
778
|
+
|
|
779
|
+
stdout, stderr, status = run_jrf('map { |pair| pair }', input_map_values)
|
|
780
|
+
assert_success(status, stderr, "map over hash single block arg")
|
|
781
|
+
assert_equal(['[["a",1],["b",10]]', '[["a",2],["b",20]]', '[["a",3],["b",30]]'], lines(stdout), "map over hash single block arg output")
|
|
782
|
+
|
|
783
|
+
stdout, stderr, status = run_jrf('map { |k, v| select(v >= 10 && k != "a") }', input_map_values)
|
|
784
|
+
assert_success(status, stderr, "map over hash transform with select")
|
|
785
|
+
assert_equal(['[10]', '[20]', '[30]'], lines(stdout), "map over hash transform with select output")
|
|
786
|
+
|
|
787
|
+
stdout, stderr, status = run_jrf('map { |k, v| sum(v + k.length) }', input_map_values)
|
|
788
|
+
assert_success(status, stderr, "map over hash with sum")
|
|
789
|
+
assert_equal(['[9,63]'], lines(stdout), "map over hash with sum output")
|
|
790
|
+
|
|
791
|
+
stdout, stderr, status = run_jrf('map { |k, v| sum(_["a"] + v + k.length) }', input_map_values)
|
|
792
|
+
assert_success(status, stderr, "map over hash keeps ambient _")
|
|
793
|
+
assert_equal(['[15,69]'], lines(stdout), "map over hash ambient _ output")
|
|
794
|
+
|
|
619
795
|
stdout, stderr, status = run_jrf('select(false) >> map { |x| sum(x) }', input_map)
|
|
620
796
|
assert_success(status, stderr, "map no matches")
|
|
621
797
|
assert_equal([], lines(stdout), "map no matches output")
|
|
@@ -750,6 +926,18 @@ assert_equal([[4, 6]], j.call([[1, 2], [3, 4]]), "library map reduce")
|
|
|
750
926
|
j = Jrf.new(proc { map_values { |v| v * 10 } })
|
|
751
927
|
assert_equal([{"a" => 10, "b" => 20}], j.call([{"a" => 1, "b" => 2}]), "library map_values transform")
|
|
752
928
|
|
|
929
|
+
# map hash transform
|
|
930
|
+
j = Jrf.new(proc { map { |k, v| "#{k}=#{v}" } })
|
|
931
|
+
assert_equal([["a=1", "b=2"]], j.call([{"a" => 1, "b" => 2}]), "library map hash transform")
|
|
932
|
+
|
|
933
|
+
# map hash single block arg
|
|
934
|
+
j = Jrf.new(proc { map { |pair| pair } })
|
|
935
|
+
assert_equal([[["a", 1], ["b", 2]]], j.call([{"a" => 1, "b" => 2}]), "library map hash single block arg")
|
|
936
|
+
|
|
937
|
+
# map hash reduce
|
|
938
|
+
j = Jrf.new(proc { map { |k, v| sum(v + k.length) } })
|
|
939
|
+
assert_equal([[5, 7]], j.call([{"a" => 1, "b" => 2}, {"a" => 2, "b" => 3}]), "library map hash reduce")
|
|
940
|
+
|
|
753
941
|
# group_by
|
|
754
942
|
j = Jrf.new(proc { group_by(_["k"]) { count() } })
|
|
755
943
|
assert_equal([{"x" => 2, "y" => 1}], j.call([{"k" => "x"}, {"k" => "x"}, {"k" => "y"}]), "library group_by")
|
|
@@ -770,4 +958,13 @@ assert_equal([{"a" => 3}], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "librar
|
|
|
770
958
|
j = Jrf.new(proc { sum(_) })
|
|
771
959
|
assert_equal([], j.call([]), "library empty input")
|
|
772
960
|
|
|
961
|
+
ctx = Jrf::RowContext.new
|
|
962
|
+
stage = Jrf::Stage.new(ctx, proc { })
|
|
963
|
+
first_token = stage.step_reduce(1, initial: 0) { |acc, v| acc + v }
|
|
964
|
+
assert_equal(0, first_token.index, "step_reduce returns token while classifying reducer stage")
|
|
965
|
+
stage.instance_variable_set(:@mode, :reducer)
|
|
966
|
+
stage.instance_variable_set(:@cursor, 0)
|
|
967
|
+
second_token = stage.step_reduce(2, initial: 0) { |acc, v| acc + v }
|
|
968
|
+
raise "expected DROPPED for established reducer slot" unless second_token.equal?(Jrf::Control::DROPPED)
|
|
969
|
+
|
|
773
970
|
puts "ok"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: jrf
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- kazuho
|
|
@@ -40,12 +40,12 @@ files:
|
|
|
40
40
|
- jrf.gemspec
|
|
41
41
|
- lib/jrf.rb
|
|
42
42
|
- lib/jrf/cli.rb
|
|
43
|
+
- lib/jrf/cli/runner.rb
|
|
43
44
|
- lib/jrf/control.rb
|
|
44
45
|
- lib/jrf/pipeline.rb
|
|
45
46
|
- lib/jrf/pipeline_parser.rb
|
|
46
47
|
- lib/jrf/reducers.rb
|
|
47
48
|
- lib/jrf/row_context.rb
|
|
48
|
-
- lib/jrf/runner.rb
|
|
49
49
|
- lib/jrf/stage.rb
|
|
50
50
|
- lib/jrf/version.rb
|
|
51
51
|
- test/jrf_test.rb
|
data/lib/jrf/runner.rb
DELETED
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "json"
|
|
4
|
-
require_relative "pipeline"
|
|
5
|
-
require_relative "pipeline_parser"
|
|
6
|
-
|
|
7
|
-
module Jrf
|
|
8
|
-
class Runner
|
|
9
|
-
RS_CHAR = "\x1e"
|
|
10
|
-
|
|
11
|
-
def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false, pretty: false)
|
|
12
|
-
@input = input
|
|
13
|
-
@out = out
|
|
14
|
-
@err = err
|
|
15
|
-
@lax = lax
|
|
16
|
-
@pretty = pretty
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
def run(expression, verbose: false)
|
|
20
|
-
parsed = PipelineParser.new(expression).parse
|
|
21
|
-
stages = parsed[:stages]
|
|
22
|
-
dump_stages(stages) if verbose
|
|
23
|
-
|
|
24
|
-
blocks = stages.map { |stage|
|
|
25
|
-
eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
|
|
26
|
-
}
|
|
27
|
-
pipeline = Pipeline.new(*blocks)
|
|
28
|
-
|
|
29
|
-
input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
|
|
30
|
-
pipeline.call(input_enum) do |value|
|
|
31
|
-
@out.puts(@pretty ? JSON.pretty_generate(value) : JSON.generate(value))
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
private
|
|
36
|
-
|
|
37
|
-
def each_input_value
|
|
38
|
-
return each_input_value_lax { |value| yield value } if @lax
|
|
39
|
-
|
|
40
|
-
each_input_value_ndjson { |value| yield value }
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
def each_input_value_ndjson
|
|
44
|
-
@input.each_line do |raw_line|
|
|
45
|
-
line = raw_line.strip
|
|
46
|
-
next if line.empty?
|
|
47
|
-
|
|
48
|
-
yield JSON.parse(line)
|
|
49
|
-
end
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
def each_input_value_lax
|
|
53
|
-
require "oj"
|
|
54
|
-
source = @input.read.to_s
|
|
55
|
-
source = source.include?(RS_CHAR) ? source.tr(RS_CHAR, "\n") : source
|
|
56
|
-
handler = Class.new(Oj::ScHandler) do
|
|
57
|
-
def initialize(&emit)
|
|
58
|
-
@emit = emit
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
def hash_start = {}
|
|
62
|
-
def hash_key(key) = key
|
|
63
|
-
def hash_set(hash, key, value) = hash[key] = value
|
|
64
|
-
def array_start = []
|
|
65
|
-
def array_append(array, value) = array << value
|
|
66
|
-
def add_value(value) = @emit.call(value)
|
|
67
|
-
end.new { |value| yield value }
|
|
68
|
-
Oj.sc_parse(handler, source)
|
|
69
|
-
rescue LoadError
|
|
70
|
-
raise "oj is required for --lax mode (gem install oj)"
|
|
71
|
-
rescue Oj::ParseError => e
|
|
72
|
-
raise JSON::ParserError, e.message
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
def dump_stages(stages)
|
|
76
|
-
stages.each_with_index do |stage, i|
|
|
77
|
-
@err.puts "stage[#{i}]: #{stage[:src]}"
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
end
|
|
81
|
-
end
|