jrf 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/jrf/cli/runner.rb +126 -0
- data/lib/jrf/cli.rb +50 -4
- data/lib/jrf/version.rb +1 -1
- data/test/jrf_test.rb +132 -1
- metadata +2 -2
- data/lib/jrf/runner.rb +0 -81
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e9bb2a3a16d2bbe8cfb463267ff74d7d582511d4b4891e56ad3dfa6eee75fceb
|
|
4
|
+
data.tar.gz: a13b2e9c8517c3da997452166556505b24fc4d5f898765ad33495eafd57c3081
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 54b400cdaba584896f2511acfe9a41ef10af25033bf88cfc6e0386eaa840df9395fb0d008c320b3193d55a9c3fad444a7f54bd29f52c34f69bc9a9cf392a7809
|
|
7
|
+
data.tar.gz: 80c72675e179da483316bfeaee7114da6edb49dc66ae179aa072d48907c4c9caf74113c6681b2f4a83f4b97da6faac436f5d6af5bd31e82605b122d85892cede
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require_relative "../pipeline"
|
|
5
|
+
require_relative "../pipeline_parser"
|
|
6
|
+
|
|
7
|
+
module Jrf
|
|
8
|
+
class CLI
|
|
9
|
+
class Runner
|
|
10
|
+
RS_CHAR = "\x1e"
|
|
11
|
+
DEFAULT_OUTPUT_BUFFER_LIMIT = 4096
|
|
12
|
+
|
|
13
|
+
class RsNormalizer
|
|
14
|
+
def initialize(input)
|
|
15
|
+
@input = input
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def read(length = nil, outbuf = nil)
|
|
19
|
+
chunk = @input.read(length)
|
|
20
|
+
return nil if chunk.nil?
|
|
21
|
+
|
|
22
|
+
chunk = chunk.tr(RS_CHAR, "\n")
|
|
23
|
+
if outbuf
|
|
24
|
+
outbuf.replace(chunk)
|
|
25
|
+
else
|
|
26
|
+
chunk
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def initialize(inputs:, out: $stdout, err: $stderr, lax: false, pretty: false, atomic_write_bytes: DEFAULT_OUTPUT_BUFFER_LIMIT)
|
|
32
|
+
@inputs = inputs
|
|
33
|
+
@out = out
|
|
34
|
+
@err = err
|
|
35
|
+
@lax = lax
|
|
36
|
+
@pretty = pretty
|
|
37
|
+
@atomic_write_bytes = atomic_write_bytes
|
|
38
|
+
@output_buffer = +""
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def run(expression, verbose: false)
|
|
42
|
+
parsed = PipelineParser.new(expression).parse
|
|
43
|
+
stages = parsed[:stages]
|
|
44
|
+
dump_stages(stages) if verbose
|
|
45
|
+
|
|
46
|
+
blocks = stages.map { |stage|
|
|
47
|
+
eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
|
|
48
|
+
}
|
|
49
|
+
pipeline = Pipeline.new(*blocks)
|
|
50
|
+
|
|
51
|
+
input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
|
|
52
|
+
pipeline.call(input_enum) do |value|
|
|
53
|
+
emit_output(value)
|
|
54
|
+
end
|
|
55
|
+
ensure
|
|
56
|
+
write_output(@output_buffer)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
def each_input_value
|
|
62
|
+
return each_input_value_lax { |value| yield value } if @lax
|
|
63
|
+
|
|
64
|
+
each_input_value_ndjson { |value| yield value }
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def each_input_value_ndjson
|
|
68
|
+
each_input do |source|
|
|
69
|
+
source.each_line do |raw_line|
|
|
70
|
+
line = raw_line.strip
|
|
71
|
+
next if line.empty?
|
|
72
|
+
|
|
73
|
+
yield JSON.parse(line)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def each_input_value_lax
|
|
79
|
+
require "oj"
|
|
80
|
+
handler = Class.new(Oj::ScHandler) do
|
|
81
|
+
def initialize(&emit)
|
|
82
|
+
@emit = emit
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def hash_start = {}
|
|
86
|
+
def hash_key(key) = key
|
|
87
|
+
def hash_set(hash, key, value) = hash[key] = value
|
|
88
|
+
def array_start = []
|
|
89
|
+
def array_append(array, value) = array << value
|
|
90
|
+
def add_value(value) = @emit.call(value)
|
|
91
|
+
end
|
|
92
|
+
each_input do |source|
|
|
93
|
+
Oj.sc_parse(handler.new { |value| yield value }, RsNormalizer.new(source))
|
|
94
|
+
end
|
|
95
|
+
rescue LoadError
|
|
96
|
+
raise "oj is required for --lax mode (gem install oj)"
|
|
97
|
+
rescue Oj::ParseError => e
|
|
98
|
+
raise JSON::ParserError, e.message
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def dump_stages(stages)
|
|
102
|
+
stages.each_with_index do |stage, i|
|
|
103
|
+
@err.puts "stage[#{i}]: #{stage[:src]}"
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def each_input
|
|
108
|
+
@inputs.each { |source| yield source }
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def emit_output(value)
|
|
112
|
+
record = (@pretty ? JSON.pretty_generate(value) : JSON.generate(value)) << "\n"
|
|
113
|
+
if @output_buffer.bytesize + record.bytesize <= @atomic_write_bytes
|
|
114
|
+
@output_buffer << record
|
|
115
|
+
else
|
|
116
|
+
write_output(@output_buffer)
|
|
117
|
+
@output_buffer = record
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def write_output(str)
|
|
122
|
+
@out.syswrite(str)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
data/lib/jrf/cli.rb
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "runner"
|
|
3
|
+
require_relative "cli/runner"
|
|
4
4
|
|
|
5
5
|
module Jrf
|
|
6
6
|
class CLI
|
|
7
|
-
USAGE = "usage: jrf [
|
|
7
|
+
USAGE = "usage: jrf [options] 'STAGE >> STAGE >> ...'"
|
|
8
8
|
|
|
9
9
|
HELP_TEXT = <<~'TEXT'
|
|
10
|
-
usage: jrf [
|
|
10
|
+
usage: jrf [options] 'STAGE >> STAGE >> ...'
|
|
11
11
|
|
|
12
12
|
JSON filter with the power and speed of Ruby.
|
|
13
13
|
|
|
@@ -15,6 +15,8 @@ module Jrf
|
|
|
15
15
|
-v, --verbose print parsed stage expressions
|
|
16
16
|
--lax allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
|
|
17
17
|
-p, --pretty pretty-print JSON output instead of compact NDJSON
|
|
18
|
+
--atomic-write-bytes N
|
|
19
|
+
group short outputs into atomic writes of up to N bytes
|
|
18
20
|
-h, --help show this help and exit
|
|
19
21
|
|
|
20
22
|
Pipeline:
|
|
@@ -36,6 +38,7 @@ module Jrf
|
|
|
36
38
|
verbose = false
|
|
37
39
|
lax = false
|
|
38
40
|
pretty = false
|
|
41
|
+
atomic_write_bytes = Runner::DEFAULT_OUTPUT_BUFFER_LIMIT
|
|
39
42
|
|
|
40
43
|
while argv.first&.start_with?("-")
|
|
41
44
|
case argv.first
|
|
@@ -48,6 +51,14 @@ module Jrf
|
|
|
48
51
|
when "-p", "--pretty"
|
|
49
52
|
pretty = true
|
|
50
53
|
argv.shift
|
|
54
|
+
when /\A--atomic-write-bytes=(.+)\z/
|
|
55
|
+
atomic_write_bytes = parse_atomic_write_bytes(Regexp.last_match(1), err)
|
|
56
|
+
return 1 unless atomic_write_bytes
|
|
57
|
+
argv.shift
|
|
58
|
+
when "--atomic-write-bytes"
|
|
59
|
+
argv.shift
|
|
60
|
+
atomic_write_bytes = parse_atomic_write_bytes(argv.shift, err)
|
|
61
|
+
return 1 unless atomic_write_bytes
|
|
51
62
|
when "-h", "--help"
|
|
52
63
|
out.puts HELP_TEXT
|
|
53
64
|
return 0
|
|
@@ -64,8 +75,43 @@ module Jrf
|
|
|
64
75
|
end
|
|
65
76
|
|
|
66
77
|
expression = argv.shift
|
|
67
|
-
|
|
78
|
+
inputs = Enumerator.new do |y|
|
|
79
|
+
if argv.empty?
|
|
80
|
+
y << input
|
|
81
|
+
else
|
|
82
|
+
argv.each do |path|
|
|
83
|
+
if path == "-"
|
|
84
|
+
y << input
|
|
85
|
+
elsif path.end_with?(".gz")
|
|
86
|
+
require "zlib"
|
|
87
|
+
Zlib::GzipReader.open(path) do |source|
|
|
88
|
+
y << source
|
|
89
|
+
end
|
|
90
|
+
else
|
|
91
|
+
File.open(path, "rb") do |source|
|
|
92
|
+
y << source
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
Runner.new(
|
|
99
|
+
inputs: inputs,
|
|
100
|
+
out: out,
|
|
101
|
+
err: err,
|
|
102
|
+
lax: lax,
|
|
103
|
+
pretty: pretty,
|
|
104
|
+
atomic_write_bytes: atomic_write_bytes
|
|
105
|
+
).run(expression, verbose: verbose)
|
|
68
106
|
0
|
|
69
107
|
end
|
|
108
|
+
|
|
109
|
+
def self.parse_atomic_write_bytes(value, err)
|
|
110
|
+
bytes = Integer(value, exception: false)
|
|
111
|
+
return bytes if bytes && bytes.positive?
|
|
112
|
+
|
|
113
|
+
err.puts "--atomic-write-bytes requires a positive integer"
|
|
114
|
+
nil
|
|
115
|
+
end
|
|
70
116
|
end
|
|
71
117
|
end
|
data/lib/jrf/version.rb
CHANGED
data/test/jrf_test.rb
CHANGED
|
@@ -1,7 +1,17 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
begin
|
|
4
|
+
require "bundler/setup"
|
|
5
|
+
rescue LoadError
|
|
6
|
+
# Allow running tests in plain Ruby environments with globally installed gems.
|
|
7
|
+
end
|
|
8
|
+
|
|
3
9
|
require "json"
|
|
4
10
|
require "open3"
|
|
11
|
+
require "stringio"
|
|
12
|
+
require "tmpdir"
|
|
13
|
+
require "zlib"
|
|
14
|
+
require_relative "../lib/jrf/cli/runner"
|
|
5
15
|
|
|
6
16
|
def run_jrf(expr, input, *opts)
|
|
7
17
|
Open3.capture3("./exe/jrf", *opts, expr, stdin_data: input)
|
|
@@ -41,6 +51,45 @@ def lines(str)
|
|
|
41
51
|
str.lines.map(&:strip).reject(&:empty?)
|
|
42
52
|
end
|
|
43
53
|
|
|
54
|
+
class RecordingRunner < Jrf::CLI::Runner
|
|
55
|
+
attr_reader :writes
|
|
56
|
+
|
|
57
|
+
def initialize(**kwargs)
|
|
58
|
+
super
|
|
59
|
+
@writes = []
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def write_output(str)
|
|
65
|
+
return if str.empty?
|
|
66
|
+
|
|
67
|
+
@writes << str
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
class ChunkedSource
|
|
72
|
+
def initialize(str, chunk_size: 5)
|
|
73
|
+
@str = str
|
|
74
|
+
@chunk_size = chunk_size
|
|
75
|
+
@offset = 0
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def read(length = nil, outbuf = nil)
|
|
79
|
+
raise "expected chunked reads" if length.nil?
|
|
80
|
+
|
|
81
|
+
chunk = @str.byteslice(@offset, [length, @chunk_size].min)
|
|
82
|
+
return nil unless chunk
|
|
83
|
+
|
|
84
|
+
@offset += chunk.bytesize
|
|
85
|
+
if outbuf
|
|
86
|
+
outbuf.replace(chunk)
|
|
87
|
+
else
|
|
88
|
+
chunk
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
44
93
|
File.chmod(0o755, "./exe/jrf")
|
|
45
94
|
|
|
46
95
|
input = <<~NDJSON
|
|
@@ -92,10 +141,11 @@ assert_includes(stderr, 'stage[1]: _["hello"]')
|
|
|
92
141
|
|
|
93
142
|
stdout, stderr, status = Open3.capture3("./exe/jrf", "--help")
|
|
94
143
|
assert_success(status, stderr, "help option")
|
|
95
|
-
assert_includes(stdout, "usage: jrf [
|
|
144
|
+
assert_includes(stdout, "usage: jrf [options] 'STAGE >> STAGE >> ...'")
|
|
96
145
|
assert_includes(stdout, "JSON filter with the power and speed of Ruby.")
|
|
97
146
|
assert_includes(stdout, "--lax")
|
|
98
147
|
assert_includes(stdout, "--pretty")
|
|
148
|
+
assert_includes(stdout, "--atomic-write-bytes N")
|
|
99
149
|
assert_includes(stdout, "Pipeline:")
|
|
100
150
|
assert_includes(stdout, "Connect stages with top-level >>.")
|
|
101
151
|
assert_includes(stdout, "The current value in each stage is available as _.")
|
|
@@ -103,11 +153,72 @@ assert_includes(stdout, "See Also:")
|
|
|
103
153
|
assert_includes(stdout, "https://github.com/kazuho/jrf#readme")
|
|
104
154
|
assert_equal([], lines(stderr), "help stderr output")
|
|
105
155
|
|
|
156
|
+
threshold_input = StringIO.new((1..4).map { |i| "{\"foo\":\"#{'x' * 1020}\",\"i\":#{i}}\n" }.join)
|
|
157
|
+
buffered_runner = RecordingRunner.new(inputs: [threshold_input], out: StringIO.new, err: StringIO.new)
|
|
158
|
+
buffered_runner.run('_')
|
|
159
|
+
expected_line = JSON.generate({"foo" => "x" * 1020, "i" => 1}) + "\n"
|
|
160
|
+
assert_equal(2, buffered_runner.writes.length, "default atomic write limit buffers records until the configured threshold")
|
|
161
|
+
assert_equal(expected_line.bytesize * 3, buffered_runner.writes.first.bytesize, "default atomic write limit flushes before the next record would exceed the threshold")
|
|
162
|
+
assert_equal(expected_line.bytesize, buffered_runner.writes.last.bytesize, "final buffer flush emits the remaining record")
|
|
163
|
+
|
|
164
|
+
small_limit_runner = RecordingRunner.new(inputs: [StringIO.new("{\"foo\":1}\n{\"foo\":2}\n")], out: StringIO.new, err: StringIO.new, atomic_write_bytes: 1)
|
|
165
|
+
small_limit_runner.run('_["foo"]')
|
|
166
|
+
assert_equal(["1\n", "2\n"], small_limit_runner.writes, "small atomic write limit emits oversized records directly")
|
|
167
|
+
|
|
168
|
+
error_runner = RecordingRunner.new(inputs: [StringIO.new("{\"foo\":1}\n{\"foo\":")], out: StringIO.new, err: StringIO.new)
|
|
169
|
+
begin
|
|
170
|
+
error_runner.run('_["foo"]')
|
|
171
|
+
raise "expected parse error for buffered flush test"
|
|
172
|
+
rescue JSON::ParserError
|
|
173
|
+
assert_equal(["1\n"], error_runner.writes, "buffer flushes pending output before parse errors escape")
|
|
174
|
+
end
|
|
175
|
+
|
|
106
176
|
stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "--verbose")
|
|
107
177
|
assert_success(status, stderr, "dump stages verbose alias")
|
|
108
178
|
assert_equal(%w[123], lines(stdout), "dump stages verbose alias output")
|
|
109
179
|
assert_includes(stderr, 'stage[0]: select(_["hello"] == 123)')
|
|
110
180
|
|
|
181
|
+
stdout, stderr, status = run_jrf('_["hello"]', input_hello, "--atomic-write-bytes", "512")
|
|
182
|
+
assert_success(status, stderr, "atomic write bytes option")
|
|
183
|
+
assert_equal(%w[123 456], lines(stdout), "atomic write bytes option output")
|
|
184
|
+
|
|
185
|
+
stdout, stderr, status = run_jrf('_["hello"]', input_hello, "--atomic-write-bytes=512")
|
|
186
|
+
assert_success(status, stderr, "atomic write bytes equals form")
|
|
187
|
+
assert_equal(%w[123 456], lines(stdout), "atomic write bytes equals form output")
|
|
188
|
+
|
|
189
|
+
stdout, stderr, status = Open3.capture3("./exe/jrf", "--atomic-write-bytes", "0", '_["hello"]', stdin_data: input_hello)
|
|
190
|
+
assert_failure(status, "atomic write bytes rejects zero")
|
|
191
|
+
assert_includes(stderr, "--atomic-write-bytes requires a positive integer")
|
|
192
|
+
|
|
193
|
+
Dir.mktmpdir do |dir|
|
|
194
|
+
gz_path = File.join(dir, "input.ndjson.gz")
|
|
195
|
+
Zlib::GzipWriter.open(gz_path) do |io|
|
|
196
|
+
io.write("{\"foo\":10}\n{\"foo\":20}\n")
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
stdout, stderr, status = Open3.capture3("./exe/jrf", '_["foo"]', gz_path)
|
|
200
|
+
assert_success(status, stderr, "compressed input by suffix")
|
|
201
|
+
assert_equal(%w[10 20], lines(stdout), "compressed input output")
|
|
202
|
+
|
|
203
|
+
lax_gz_path = File.join(dir, "input-lax.json.gz")
|
|
204
|
+
Zlib::GzipWriter.open(lax_gz_path) do |io|
|
|
205
|
+
io.write("{\"foo\":30}\n\x1e{\"foo\":40}\n")
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
stdout, stderr, status = Open3.capture3("./exe/jrf", "--lax", '_["foo"]', lax_gz_path)
|
|
209
|
+
assert_success(status, stderr, "compressed lax input by suffix")
|
|
210
|
+
assert_equal(%w[30 40], lines(stdout), "compressed lax input output")
|
|
211
|
+
|
|
212
|
+
second_gz_path = File.join(dir, "input2.ndjson.gz")
|
|
213
|
+
Zlib::GzipWriter.open(second_gz_path) do |io|
|
|
214
|
+
io.write("{\"foo\":50}\n")
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
stdout, stderr, status = Open3.capture3("./exe/jrf", '_["foo"]', gz_path, second_gz_path)
|
|
218
|
+
assert_success(status, stderr, "multiple compressed inputs by suffix")
|
|
219
|
+
assert_equal(%w[10 20 50], lines(stdout), "multiple compressed input output")
|
|
220
|
+
end
|
|
221
|
+
|
|
111
222
|
stdout, stderr, status = run_jrf('_', input_hello, "--pretty")
|
|
112
223
|
assert_success(status, stderr, "pretty output")
|
|
113
224
|
assert_equal(
|
|
@@ -493,6 +604,26 @@ stdout, stderr, status = run_jrf('_["foo"]', input_lax_trailing_rs, "--lax")
|
|
|
493
604
|
assert_success(status, stderr, "lax ignores trailing separator")
|
|
494
605
|
assert_equal(%w[9], lines(stdout), "lax trailing separator output")
|
|
495
606
|
|
|
607
|
+
chunked_lax_out = RecordingRunner.new(
|
|
608
|
+
inputs: [ChunkedSource.new("{\"foo\":1}\n\x1e{\"foo\":2}\n\t{\"foo\":3}\n")],
|
|
609
|
+
out: StringIO.new,
|
|
610
|
+
err: StringIO.new,
|
|
611
|
+
lax: true
|
|
612
|
+
)
|
|
613
|
+
chunked_lax_out.run('_["foo"]')
|
|
614
|
+
assert_equal(%w[1 2 3], lines(chunked_lax_out.writes.join), "lax mode streams chunked input without whole-input reads")
|
|
615
|
+
|
|
616
|
+
Dir.mktmpdir do |dir|
|
|
617
|
+
one = File.join(dir, "one.json")
|
|
618
|
+
two = File.join(dir, "two.json")
|
|
619
|
+
File.write(one, "1")
|
|
620
|
+
File.write(two, "2")
|
|
621
|
+
|
|
622
|
+
stdout, stderr, status = Open3.capture3("./exe/jrf", "--lax", "_", one, two)
|
|
623
|
+
assert_success(status, stderr, "lax keeps file boundaries")
|
|
624
|
+
assert_equal(%w[1 2], lines(stdout), "lax does not merge JSON across file boundaries")
|
|
625
|
+
end
|
|
626
|
+
|
|
496
627
|
stdout, stderr, status = run_jrf('select(_["x"] > ) >> _["foo"]', "")
|
|
497
628
|
assert_failure(status, "syntax error should fail before row loop")
|
|
498
629
|
assert_includes(stderr, "syntax error")
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: jrf
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- kazuho
|
|
@@ -40,12 +40,12 @@ files:
|
|
|
40
40
|
- jrf.gemspec
|
|
41
41
|
- lib/jrf.rb
|
|
42
42
|
- lib/jrf/cli.rb
|
|
43
|
+
- lib/jrf/cli/runner.rb
|
|
43
44
|
- lib/jrf/control.rb
|
|
44
45
|
- lib/jrf/pipeline.rb
|
|
45
46
|
- lib/jrf/pipeline_parser.rb
|
|
46
47
|
- lib/jrf/reducers.rb
|
|
47
48
|
- lib/jrf/row_context.rb
|
|
48
|
-
- lib/jrf/runner.rb
|
|
49
49
|
- lib/jrf/stage.rb
|
|
50
50
|
- lib/jrf/version.rb
|
|
51
51
|
- test/jrf_test.rb
|
data/lib/jrf/runner.rb
DELETED
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "json"
|
|
4
|
-
require_relative "pipeline"
|
|
5
|
-
require_relative "pipeline_parser"
|
|
6
|
-
|
|
7
|
-
module Jrf
|
|
8
|
-
class Runner
|
|
9
|
-
RS_CHAR = "\x1e"
|
|
10
|
-
|
|
11
|
-
def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false, pretty: false)
|
|
12
|
-
@input = input
|
|
13
|
-
@out = out
|
|
14
|
-
@err = err
|
|
15
|
-
@lax = lax
|
|
16
|
-
@pretty = pretty
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
def run(expression, verbose: false)
|
|
20
|
-
parsed = PipelineParser.new(expression).parse
|
|
21
|
-
stages = parsed[:stages]
|
|
22
|
-
dump_stages(stages) if verbose
|
|
23
|
-
|
|
24
|
-
blocks = stages.map { |stage|
|
|
25
|
-
eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
|
|
26
|
-
}
|
|
27
|
-
pipeline = Pipeline.new(*blocks)
|
|
28
|
-
|
|
29
|
-
input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
|
|
30
|
-
pipeline.call(input_enum) do |value|
|
|
31
|
-
@out.puts(@pretty ? JSON.pretty_generate(value) : JSON.generate(value))
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
private
|
|
36
|
-
|
|
37
|
-
def each_input_value
|
|
38
|
-
return each_input_value_lax { |value| yield value } if @lax
|
|
39
|
-
|
|
40
|
-
each_input_value_ndjson { |value| yield value }
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
def each_input_value_ndjson
|
|
44
|
-
@input.each_line do |raw_line|
|
|
45
|
-
line = raw_line.strip
|
|
46
|
-
next if line.empty?
|
|
47
|
-
|
|
48
|
-
yield JSON.parse(line)
|
|
49
|
-
end
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
def each_input_value_lax
|
|
53
|
-
require "oj"
|
|
54
|
-
source = @input.read.to_s
|
|
55
|
-
source = source.include?(RS_CHAR) ? source.tr(RS_CHAR, "\n") : source
|
|
56
|
-
handler = Class.new(Oj::ScHandler) do
|
|
57
|
-
def initialize(&emit)
|
|
58
|
-
@emit = emit
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
def hash_start = {}
|
|
62
|
-
def hash_key(key) = key
|
|
63
|
-
def hash_set(hash, key, value) = hash[key] = value
|
|
64
|
-
def array_start = []
|
|
65
|
-
def array_append(array, value) = array << value
|
|
66
|
-
def add_value(value) = @emit.call(value)
|
|
67
|
-
end.new { |value| yield value }
|
|
68
|
-
Oj.sc_parse(handler, source)
|
|
69
|
-
rescue LoadError
|
|
70
|
-
raise "oj is required for --lax mode (gem install oj)"
|
|
71
|
-
rescue Oj::ParseError => e
|
|
72
|
-
raise JSON::ParserError, e.message
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
def dump_stages(stages)
|
|
76
|
-
stages.each_with_index do |stage, i|
|
|
77
|
-
@err.puts "stage[#{i}]: #{stage[:src]}"
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
end
|
|
81
|
-
end
|