jrf 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/jrf/cli/runner.rb +126 -0
- data/lib/jrf/cli.rb +50 -4
- data/lib/jrf/row_context.rb +22 -7
- data/lib/jrf/stage.rb +59 -13
- data/lib/jrf/version.rb +1 -1
- data/test/jrf_test.rb +184 -1
- metadata +2 -2
- data/lib/jrf/runner.rb +0 -81
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e9bb2a3a16d2bbe8cfb463267ff74d7d582511d4b4891e56ad3dfa6eee75fceb
|
|
4
|
+
data.tar.gz: a13b2e9c8517c3da997452166556505b24fc4d5f898765ad33495eafd57c3081
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 54b400cdaba584896f2511acfe9a41ef10af25033bf88cfc6e0386eaa840df9395fb0d008c320b3193d55a9c3fad444a7f54bd29f52c34f69bc9a9cf392a7809
|
|
7
|
+
data.tar.gz: 80c72675e179da483316bfeaee7114da6edb49dc66ae179aa072d48907c4c9caf74113c6681b2f4a83f4b97da6faac436f5d6af5bd31e82605b122d85892cede
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require_relative "../pipeline"
|
|
5
|
+
require_relative "../pipeline_parser"
|
|
6
|
+
|
|
7
|
+
module Jrf
|
|
8
|
+
class CLI
|
|
9
|
+
class Runner
|
|
10
|
+
RS_CHAR = "\x1e"
|
|
11
|
+
DEFAULT_OUTPUT_BUFFER_LIMIT = 4096
|
|
12
|
+
|
|
13
|
+
class RsNormalizer
|
|
14
|
+
def initialize(input)
|
|
15
|
+
@input = input
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def read(length = nil, outbuf = nil)
|
|
19
|
+
chunk = @input.read(length)
|
|
20
|
+
return nil if chunk.nil?
|
|
21
|
+
|
|
22
|
+
chunk = chunk.tr(RS_CHAR, "\n")
|
|
23
|
+
if outbuf
|
|
24
|
+
outbuf.replace(chunk)
|
|
25
|
+
else
|
|
26
|
+
chunk
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def initialize(inputs:, out: $stdout, err: $stderr, lax: false, pretty: false, atomic_write_bytes: DEFAULT_OUTPUT_BUFFER_LIMIT)
|
|
32
|
+
@inputs = inputs
|
|
33
|
+
@out = out
|
|
34
|
+
@err = err
|
|
35
|
+
@lax = lax
|
|
36
|
+
@pretty = pretty
|
|
37
|
+
@atomic_write_bytes = atomic_write_bytes
|
|
38
|
+
@output_buffer = +""
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def run(expression, verbose: false)
|
|
42
|
+
parsed = PipelineParser.new(expression).parse
|
|
43
|
+
stages = parsed[:stages]
|
|
44
|
+
dump_stages(stages) if verbose
|
|
45
|
+
|
|
46
|
+
blocks = stages.map { |stage|
|
|
47
|
+
eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
|
|
48
|
+
}
|
|
49
|
+
pipeline = Pipeline.new(*blocks)
|
|
50
|
+
|
|
51
|
+
input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
|
|
52
|
+
pipeline.call(input_enum) do |value|
|
|
53
|
+
emit_output(value)
|
|
54
|
+
end
|
|
55
|
+
ensure
|
|
56
|
+
write_output(@output_buffer)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
def each_input_value
|
|
62
|
+
return each_input_value_lax { |value| yield value } if @lax
|
|
63
|
+
|
|
64
|
+
each_input_value_ndjson { |value| yield value }
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def each_input_value_ndjson
|
|
68
|
+
each_input do |source|
|
|
69
|
+
source.each_line do |raw_line|
|
|
70
|
+
line = raw_line.strip
|
|
71
|
+
next if line.empty?
|
|
72
|
+
|
|
73
|
+
yield JSON.parse(line)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def each_input_value_lax
|
|
79
|
+
require "oj"
|
|
80
|
+
handler = Class.new(Oj::ScHandler) do
|
|
81
|
+
def initialize(&emit)
|
|
82
|
+
@emit = emit
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def hash_start = {}
|
|
86
|
+
def hash_key(key) = key
|
|
87
|
+
def hash_set(hash, key, value) = hash[key] = value
|
|
88
|
+
def array_start = []
|
|
89
|
+
def array_append(array, value) = array << value
|
|
90
|
+
def add_value(value) = @emit.call(value)
|
|
91
|
+
end
|
|
92
|
+
each_input do |source|
|
|
93
|
+
Oj.sc_parse(handler.new { |value| yield value }, RsNormalizer.new(source))
|
|
94
|
+
end
|
|
95
|
+
rescue LoadError
|
|
96
|
+
raise "oj is required for --lax mode (gem install oj)"
|
|
97
|
+
rescue Oj::ParseError => e
|
|
98
|
+
raise JSON::ParserError, e.message
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def dump_stages(stages)
|
|
102
|
+
stages.each_with_index do |stage, i|
|
|
103
|
+
@err.puts "stage[#{i}]: #{stage[:src]}"
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def each_input
|
|
108
|
+
@inputs.each { |source| yield source }
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def emit_output(value)
|
|
112
|
+
record = (@pretty ? JSON.pretty_generate(value) : JSON.generate(value)) << "\n"
|
|
113
|
+
if @output_buffer.bytesize + record.bytesize <= @atomic_write_bytes
|
|
114
|
+
@output_buffer << record
|
|
115
|
+
else
|
|
116
|
+
write_output(@output_buffer)
|
|
117
|
+
@output_buffer = record
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def write_output(str)
|
|
122
|
+
@out.syswrite(str)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
data/lib/jrf/cli.rb
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "runner"
|
|
3
|
+
require_relative "cli/runner"
|
|
4
4
|
|
|
5
5
|
module Jrf
|
|
6
6
|
class CLI
|
|
7
|
-
USAGE = "usage: jrf [
|
|
7
|
+
USAGE = "usage: jrf [options] 'STAGE >> STAGE >> ...'"
|
|
8
8
|
|
|
9
9
|
HELP_TEXT = <<~'TEXT'
|
|
10
|
-
usage: jrf [
|
|
10
|
+
usage: jrf [options] 'STAGE >> STAGE >> ...'
|
|
11
11
|
|
|
12
12
|
JSON filter with the power and speed of Ruby.
|
|
13
13
|
|
|
@@ -15,6 +15,8 @@ module Jrf
|
|
|
15
15
|
-v, --verbose print parsed stage expressions
|
|
16
16
|
--lax allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
|
|
17
17
|
-p, --pretty pretty-print JSON output instead of compact NDJSON
|
|
18
|
+
--atomic-write-bytes N
|
|
19
|
+
group short outputs into atomic writes of up to N bytes
|
|
18
20
|
-h, --help show this help and exit
|
|
19
21
|
|
|
20
22
|
Pipeline:
|
|
@@ -36,6 +38,7 @@ module Jrf
|
|
|
36
38
|
verbose = false
|
|
37
39
|
lax = false
|
|
38
40
|
pretty = false
|
|
41
|
+
atomic_write_bytes = Runner::DEFAULT_OUTPUT_BUFFER_LIMIT
|
|
39
42
|
|
|
40
43
|
while argv.first&.start_with?("-")
|
|
41
44
|
case argv.first
|
|
@@ -48,6 +51,14 @@ module Jrf
|
|
|
48
51
|
when "-p", "--pretty"
|
|
49
52
|
pretty = true
|
|
50
53
|
argv.shift
|
|
54
|
+
when /\A--atomic-write-bytes=(.+)\z/
|
|
55
|
+
atomic_write_bytes = parse_atomic_write_bytes(Regexp.last_match(1), err)
|
|
56
|
+
return 1 unless atomic_write_bytes
|
|
57
|
+
argv.shift
|
|
58
|
+
when "--atomic-write-bytes"
|
|
59
|
+
argv.shift
|
|
60
|
+
atomic_write_bytes = parse_atomic_write_bytes(argv.shift, err)
|
|
61
|
+
return 1 unless atomic_write_bytes
|
|
51
62
|
when "-h", "--help"
|
|
52
63
|
out.puts HELP_TEXT
|
|
53
64
|
return 0
|
|
@@ -64,8 +75,43 @@ module Jrf
|
|
|
64
75
|
end
|
|
65
76
|
|
|
66
77
|
expression = argv.shift
|
|
67
|
-
|
|
78
|
+
inputs = Enumerator.new do |y|
|
|
79
|
+
if argv.empty?
|
|
80
|
+
y << input
|
|
81
|
+
else
|
|
82
|
+
argv.each do |path|
|
|
83
|
+
if path == "-"
|
|
84
|
+
y << input
|
|
85
|
+
elsif path.end_with?(".gz")
|
|
86
|
+
require "zlib"
|
|
87
|
+
Zlib::GzipReader.open(path) do |source|
|
|
88
|
+
y << source
|
|
89
|
+
end
|
|
90
|
+
else
|
|
91
|
+
File.open(path, "rb") do |source|
|
|
92
|
+
y << source
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
Runner.new(
|
|
99
|
+
inputs: inputs,
|
|
100
|
+
out: out,
|
|
101
|
+
err: err,
|
|
102
|
+
lax: lax,
|
|
103
|
+
pretty: pretty,
|
|
104
|
+
atomic_write_bytes: atomic_write_bytes
|
|
105
|
+
).run(expression, verbose: verbose)
|
|
68
106
|
0
|
|
69
107
|
end
|
|
108
|
+
|
|
109
|
+
def self.parse_atomic_write_bytes(value, err)
|
|
110
|
+
bytes = Integer(value, exception: false)
|
|
111
|
+
return bytes if bytes && bytes.positive?
|
|
112
|
+
|
|
113
|
+
err.puts "--atomic-write-bytes requires a positive integer"
|
|
114
|
+
nil
|
|
115
|
+
end
|
|
70
116
|
end
|
|
71
117
|
end
|
data/lib/jrf/row_context.rb
CHANGED
|
@@ -26,10 +26,12 @@ module Jrf
|
|
|
26
26
|
def initialize(obj = nil)
|
|
27
27
|
@obj = obj
|
|
28
28
|
@__jrf_current_stage = nil
|
|
29
|
+
@__jrf_current_input = obj
|
|
29
30
|
end
|
|
30
31
|
|
|
31
32
|
def reset(obj)
|
|
32
33
|
@obj = obj
|
|
34
|
+
@__jrf_current_input = obj
|
|
33
35
|
self
|
|
34
36
|
end
|
|
35
37
|
|
|
@@ -38,11 +40,11 @@ module Jrf
|
|
|
38
40
|
end
|
|
39
41
|
|
|
40
42
|
def flat
|
|
41
|
-
Control::Flat.new(
|
|
43
|
+
Control::Flat.new(current_input)
|
|
42
44
|
end
|
|
43
45
|
|
|
44
46
|
def select(predicate)
|
|
45
|
-
predicate ?
|
|
47
|
+
predicate ? current_input : Control::DROPPED
|
|
46
48
|
end
|
|
47
49
|
|
|
48
50
|
define_reducer(:sum) do |_ctx, value, initial: 0, block: nil|
|
|
@@ -111,15 +113,16 @@ module Jrf
|
|
|
111
113
|
define_reducer(:sort) do |ctx, key = MISSING, block: nil|
|
|
112
114
|
if block
|
|
113
115
|
{
|
|
114
|
-
value: ctx.
|
|
116
|
+
value: ctx.send(:current_input),
|
|
115
117
|
initial: -> { [] },
|
|
116
118
|
finish: ->(rows) { rows.sort(&block) },
|
|
117
119
|
step: ->(rows, row) { rows << row }
|
|
118
120
|
}
|
|
119
121
|
else
|
|
120
|
-
|
|
122
|
+
current = ctx.send(:current_input)
|
|
123
|
+
resolved_key = key.equal?(MISSING) ? current : key
|
|
121
124
|
{
|
|
122
|
-
value: [resolved_key,
|
|
125
|
+
value: [resolved_key, current],
|
|
123
126
|
initial: -> { [] },
|
|
124
127
|
finish: ->(pairs) { pairs.sort_by(&:first).map(&:last) },
|
|
125
128
|
step: ->(pairs, pair) { pairs << pair }
|
|
@@ -128,7 +131,7 @@ module Jrf
|
|
|
128
131
|
end
|
|
129
132
|
|
|
130
133
|
define_reducer(:group) do |ctx, value = MISSING, block: nil|
|
|
131
|
-
resolved_value = value.equal?(MISSING) ? ctx.
|
|
134
|
+
resolved_value = value.equal?(MISSING) ? ctx.send(:current_input) : value
|
|
132
135
|
{ value: resolved_value, initial: -> { [] }, step: ->(acc, v) { acc << v } }
|
|
133
136
|
end
|
|
134
137
|
|
|
@@ -158,7 +161,7 @@ module Jrf
|
|
|
158
161
|
def reduce(initial, &block)
|
|
159
162
|
raise ArgumentError, "reduce requires a block" unless block
|
|
160
163
|
|
|
161
|
-
@__jrf_current_stage.allocate_reducer(
|
|
164
|
+
@__jrf_current_stage.allocate_reducer(current_input, initial: initial, &block)
|
|
162
165
|
end
|
|
163
166
|
|
|
164
167
|
def map(&block)
|
|
@@ -180,6 +183,18 @@ module Jrf
|
|
|
180
183
|
|
|
181
184
|
private
|
|
182
185
|
|
|
186
|
+
def current_input
|
|
187
|
+
@__jrf_current_input
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def __jrf_with_current_input(value)
|
|
191
|
+
saved_input = current_input
|
|
192
|
+
@__jrf_current_input = value
|
|
193
|
+
yield
|
|
194
|
+
ensure
|
|
195
|
+
@__jrf_current_input = saved_input
|
|
196
|
+
end
|
|
197
|
+
|
|
183
198
|
def reducer_initial_value(initial)
|
|
184
199
|
return initial.call if initial.respond_to?(:call)
|
|
185
200
|
return initial.dup if initial.is_a?(Array) || initial.is_a?(Hash)
|
data/lib/jrf/stage.rb
CHANGED
|
@@ -64,10 +64,7 @@ module Jrf
|
|
|
64
64
|
|
|
65
65
|
# Transformation mode (detected on first call)
|
|
66
66
|
if @map_transforms[idx]
|
|
67
|
-
|
|
68
|
-
when :array then return collection.map(&block)
|
|
69
|
-
when :hash then return collection.transform_values(&block)
|
|
70
|
-
end
|
|
67
|
+
return transform_collection(type, collection, &block)
|
|
71
68
|
end
|
|
72
69
|
|
|
73
70
|
map_reducer = (@reducers[idx] ||= MapReducer.new(type))
|
|
@@ -78,7 +75,7 @@ module Jrf
|
|
|
78
75
|
collection.each_with_index do |v, i|
|
|
79
76
|
slot = map_reducer.slot(i)
|
|
80
77
|
with_scoped_reducers(slot.reducers) do
|
|
81
|
-
result = block.call(v)
|
|
78
|
+
result = @ctx.send(:__jrf_with_current_input, v) { block.call(v) }
|
|
82
79
|
slot.template ||= result
|
|
83
80
|
end
|
|
84
81
|
end
|
|
@@ -87,7 +84,7 @@ module Jrf
|
|
|
87
84
|
collection.each do |k, v|
|
|
88
85
|
slot = map_reducer.slot(k)
|
|
89
86
|
with_scoped_reducers(slot.reducers) do
|
|
90
|
-
result = block.call(v)
|
|
87
|
+
result = @ctx.send(:__jrf_with_current_input, v) { block.call(v) }
|
|
91
88
|
slot.template ||= result
|
|
92
89
|
end
|
|
93
90
|
end
|
|
@@ -97,12 +94,7 @@ module Jrf
|
|
|
97
94
|
if @mode.nil? && map_reducer.slots.values.all? { |s| s.reducers.empty? }
|
|
98
95
|
@map_transforms[idx] = true
|
|
99
96
|
@reducers[idx] = nil
|
|
100
|
-
|
|
101
|
-
when :array
|
|
102
|
-
return map_reducer.slots.sort_by { |k, _| k }.map { |_, s| s.template }
|
|
103
|
-
when :hash
|
|
104
|
-
return map_reducer.slots.transform_values(&:template)
|
|
105
|
-
end
|
|
97
|
+
return transformed_slots(type, map_reducer)
|
|
106
98
|
end
|
|
107
99
|
|
|
108
100
|
ReducerToken.new(idx)
|
|
@@ -115,7 +107,7 @@ module Jrf
|
|
|
115
107
|
row = @ctx._
|
|
116
108
|
slot = map_reducer.slot(key)
|
|
117
109
|
with_scoped_reducers(slot.reducers) do
|
|
118
|
-
result = block.call(row)
|
|
110
|
+
result = @ctx.send(:__jrf_with_current_input, row) { block.call(row) }
|
|
119
111
|
slot.template ||= result
|
|
120
112
|
end
|
|
121
113
|
|
|
@@ -146,6 +138,60 @@ module Jrf
|
|
|
146
138
|
@cursor = saved_cursor
|
|
147
139
|
end
|
|
148
140
|
|
|
141
|
+
def transform_collection(type, collection, &block)
|
|
142
|
+
case type
|
|
143
|
+
when :array
|
|
144
|
+
raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
|
|
145
|
+
|
|
146
|
+
collection.each_with_object([]) do |value, result|
|
|
147
|
+
mapped = @ctx.send(:__jrf_with_current_input, value) { block.call(value) }
|
|
148
|
+
append_map_result(result, mapped)
|
|
149
|
+
end
|
|
150
|
+
when :hash
|
|
151
|
+
raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
|
|
152
|
+
|
|
153
|
+
collection.each_with_object({}) do |(key, value), result|
|
|
154
|
+
mapped = @ctx.send(:__jrf_with_current_input, value) { block.call(value) }
|
|
155
|
+
next if mapped.equal?(Control::DROPPED)
|
|
156
|
+
raise TypeError, "flat is not supported inside map_values" if mapped.is_a?(Control::Flat)
|
|
157
|
+
|
|
158
|
+
result[key] = mapped
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def transformed_slots(type, map_reducer)
|
|
164
|
+
case type
|
|
165
|
+
when :array
|
|
166
|
+
map_reducer.slots
|
|
167
|
+
.sort_by { |k, _| k }
|
|
168
|
+
.each_with_object([]) do |(_, slot), result|
|
|
169
|
+
append_map_result(result, slot.template)
|
|
170
|
+
end
|
|
171
|
+
when :hash
|
|
172
|
+
map_reducer.slots.each_with_object({}) do |(key, slot), result|
|
|
173
|
+
next if slot.template.equal?(Control::DROPPED)
|
|
174
|
+
raise TypeError, "flat is not supported inside map_values" if slot.template.is_a?(Control::Flat)
|
|
175
|
+
|
|
176
|
+
result[key] = slot.template
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def append_map_result(result, mapped)
|
|
182
|
+
return if mapped.equal?(Control::DROPPED)
|
|
183
|
+
|
|
184
|
+
if mapped.is_a?(Control::Flat)
|
|
185
|
+
unless mapped.value.is_a?(Array)
|
|
186
|
+
raise TypeError, "flat expects Array, got #{mapped.value.class}"
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
result.concat(mapped.value)
|
|
190
|
+
else
|
|
191
|
+
result << mapped
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
149
195
|
class MapReducer
|
|
150
196
|
attr_reader :slots
|
|
151
197
|
|
data/lib/jrf/version.rb
CHANGED
data/test/jrf_test.rb
CHANGED
|
@@ -1,7 +1,17 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
begin
|
|
4
|
+
require "bundler/setup"
|
|
5
|
+
rescue LoadError
|
|
6
|
+
# Allow running tests in plain Ruby environments with globally installed gems.
|
|
7
|
+
end
|
|
8
|
+
|
|
3
9
|
require "json"
|
|
4
10
|
require "open3"
|
|
11
|
+
require "stringio"
|
|
12
|
+
require "tmpdir"
|
|
13
|
+
require "zlib"
|
|
14
|
+
require_relative "../lib/jrf/cli/runner"
|
|
5
15
|
|
|
6
16
|
def run_jrf(expr, input, *opts)
|
|
7
17
|
Open3.capture3("./exe/jrf", *opts, expr, stdin_data: input)
|
|
@@ -41,6 +51,45 @@ def lines(str)
|
|
|
41
51
|
str.lines.map(&:strip).reject(&:empty?)
|
|
42
52
|
end
|
|
43
53
|
|
|
54
|
+
class RecordingRunner < Jrf::CLI::Runner
|
|
55
|
+
attr_reader :writes
|
|
56
|
+
|
|
57
|
+
def initialize(**kwargs)
|
|
58
|
+
super
|
|
59
|
+
@writes = []
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def write_output(str)
|
|
65
|
+
return if str.empty?
|
|
66
|
+
|
|
67
|
+
@writes << str
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
class ChunkedSource
|
|
72
|
+
def initialize(str, chunk_size: 5)
|
|
73
|
+
@str = str
|
|
74
|
+
@chunk_size = chunk_size
|
|
75
|
+
@offset = 0
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def read(length = nil, outbuf = nil)
|
|
79
|
+
raise "expected chunked reads" if length.nil?
|
|
80
|
+
|
|
81
|
+
chunk = @str.byteslice(@offset, [length, @chunk_size].min)
|
|
82
|
+
return nil unless chunk
|
|
83
|
+
|
|
84
|
+
@offset += chunk.bytesize
|
|
85
|
+
if outbuf
|
|
86
|
+
outbuf.replace(chunk)
|
|
87
|
+
else
|
|
88
|
+
chunk
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
44
93
|
File.chmod(0o755, "./exe/jrf")
|
|
45
94
|
|
|
46
95
|
input = <<~NDJSON
|
|
@@ -92,10 +141,11 @@ assert_includes(stderr, 'stage[1]: _["hello"]')
|
|
|
92
141
|
|
|
93
142
|
stdout, stderr, status = Open3.capture3("./exe/jrf", "--help")
|
|
94
143
|
assert_success(status, stderr, "help option")
|
|
95
|
-
assert_includes(stdout, "usage: jrf [
|
|
144
|
+
assert_includes(stdout, "usage: jrf [options] 'STAGE >> STAGE >> ...'")
|
|
96
145
|
assert_includes(stdout, "JSON filter with the power and speed of Ruby.")
|
|
97
146
|
assert_includes(stdout, "--lax")
|
|
98
147
|
assert_includes(stdout, "--pretty")
|
|
148
|
+
assert_includes(stdout, "--atomic-write-bytes N")
|
|
99
149
|
assert_includes(stdout, "Pipeline:")
|
|
100
150
|
assert_includes(stdout, "Connect stages with top-level >>.")
|
|
101
151
|
assert_includes(stdout, "The current value in each stage is available as _.")
|
|
@@ -103,11 +153,72 @@ assert_includes(stdout, "See Also:")
|
|
|
103
153
|
assert_includes(stdout, "https://github.com/kazuho/jrf#readme")
|
|
104
154
|
assert_equal([], lines(stderr), "help stderr output")
|
|
105
155
|
|
|
156
|
+
threshold_input = StringIO.new((1..4).map { |i| "{\"foo\":\"#{'x' * 1020}\",\"i\":#{i}}\n" }.join)
|
|
157
|
+
buffered_runner = RecordingRunner.new(inputs: [threshold_input], out: StringIO.new, err: StringIO.new)
|
|
158
|
+
buffered_runner.run('_')
|
|
159
|
+
expected_line = JSON.generate({"foo" => "x" * 1020, "i" => 1}) + "\n"
|
|
160
|
+
assert_equal(2, buffered_runner.writes.length, "default atomic write limit buffers records until the configured threshold")
|
|
161
|
+
assert_equal(expected_line.bytesize * 3, buffered_runner.writes.first.bytesize, "default atomic write limit flushes before the next record would exceed the threshold")
|
|
162
|
+
assert_equal(expected_line.bytesize, buffered_runner.writes.last.bytesize, "final buffer flush emits the remaining record")
|
|
163
|
+
|
|
164
|
+
small_limit_runner = RecordingRunner.new(inputs: [StringIO.new("{\"foo\":1}\n{\"foo\":2}\n")], out: StringIO.new, err: StringIO.new, atomic_write_bytes: 1)
|
|
165
|
+
small_limit_runner.run('_["foo"]')
|
|
166
|
+
assert_equal(["1\n", "2\n"], small_limit_runner.writes, "small atomic write limit emits oversized records directly")
|
|
167
|
+
|
|
168
|
+
error_runner = RecordingRunner.new(inputs: [StringIO.new("{\"foo\":1}\n{\"foo\":")], out: StringIO.new, err: StringIO.new)
|
|
169
|
+
begin
|
|
170
|
+
error_runner.run('_["foo"]')
|
|
171
|
+
raise "expected parse error for buffered flush test"
|
|
172
|
+
rescue JSON::ParserError
|
|
173
|
+
assert_equal(["1\n"], error_runner.writes, "buffer flushes pending output before parse errors escape")
|
|
174
|
+
end
|
|
175
|
+
|
|
106
176
|
stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "--verbose")
|
|
107
177
|
assert_success(status, stderr, "dump stages verbose alias")
|
|
108
178
|
assert_equal(%w[123], lines(stdout), "dump stages verbose alias output")
|
|
109
179
|
assert_includes(stderr, 'stage[0]: select(_["hello"] == 123)')
|
|
110
180
|
|
|
181
|
+
stdout, stderr, status = run_jrf('_["hello"]', input_hello, "--atomic-write-bytes", "512")
|
|
182
|
+
assert_success(status, stderr, "atomic write bytes option")
|
|
183
|
+
assert_equal(%w[123 456], lines(stdout), "atomic write bytes option output")
|
|
184
|
+
|
|
185
|
+
stdout, stderr, status = run_jrf('_["hello"]', input_hello, "--atomic-write-bytes=512")
|
|
186
|
+
assert_success(status, stderr, "atomic write bytes equals form")
|
|
187
|
+
assert_equal(%w[123 456], lines(stdout), "atomic write bytes equals form output")
|
|
188
|
+
|
|
189
|
+
stdout, stderr, status = Open3.capture3("./exe/jrf", "--atomic-write-bytes", "0", '_["hello"]', stdin_data: input_hello)
|
|
190
|
+
assert_failure(status, "atomic write bytes rejects zero")
|
|
191
|
+
assert_includes(stderr, "--atomic-write-bytes requires a positive integer")
|
|
192
|
+
|
|
193
|
+
Dir.mktmpdir do |dir|
|
|
194
|
+
gz_path = File.join(dir, "input.ndjson.gz")
|
|
195
|
+
Zlib::GzipWriter.open(gz_path) do |io|
|
|
196
|
+
io.write("{\"foo\":10}\n{\"foo\":20}\n")
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
stdout, stderr, status = Open3.capture3("./exe/jrf", '_["foo"]', gz_path)
|
|
200
|
+
assert_success(status, stderr, "compressed input by suffix")
|
|
201
|
+
assert_equal(%w[10 20], lines(stdout), "compressed input output")
|
|
202
|
+
|
|
203
|
+
lax_gz_path = File.join(dir, "input-lax.json.gz")
|
|
204
|
+
Zlib::GzipWriter.open(lax_gz_path) do |io|
|
|
205
|
+
io.write("{\"foo\":30}\n\x1e{\"foo\":40}\n")
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
stdout, stderr, status = Open3.capture3("./exe/jrf", "--lax", '_["foo"]', lax_gz_path)
|
|
209
|
+
assert_success(status, stderr, "compressed lax input by suffix")
|
|
210
|
+
assert_equal(%w[30 40], lines(stdout), "compressed lax input output")
|
|
211
|
+
|
|
212
|
+
second_gz_path = File.join(dir, "input2.ndjson.gz")
|
|
213
|
+
Zlib::GzipWriter.open(second_gz_path) do |io|
|
|
214
|
+
io.write("{\"foo\":50}\n")
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
stdout, stderr, status = Open3.capture3("./exe/jrf", '_["foo"]', gz_path, second_gz_path)
|
|
218
|
+
assert_success(status, stderr, "multiple compressed inputs by suffix")
|
|
219
|
+
assert_equal(%w[10 20 50], lines(stdout), "multiple compressed input output")
|
|
220
|
+
end
|
|
221
|
+
|
|
111
222
|
stdout, stderr, status = run_jrf('_', input_hello, "--pretty")
|
|
112
223
|
assert_success(status, stderr, "pretty output")
|
|
113
224
|
assert_equal(
|
|
@@ -174,6 +285,14 @@ stdout, stderr, status = run_jrf('_["items"] >> flat >> group', input_flat)
|
|
|
174
285
|
assert_success(status, stderr, "flat then group")
|
|
175
286
|
assert_equal(['[1,2,3]'], lines(stdout), "flat then group output")
|
|
176
287
|
|
|
288
|
+
stdout, stderr, status = run_jrf('map { |x| flat }', "[[1,2],[3],[4,5,6]]\n")
|
|
289
|
+
assert_success(status, stderr, "flat inside map")
|
|
290
|
+
assert_equal(['[1,2,3,4,5,6]'], lines(stdout), "flat inside map output")
|
|
291
|
+
|
|
292
|
+
stdout, stderr, status = run_jrf('map_values { |v| flat }', "{\"a\":[1,2],\"b\":[3]}\n")
|
|
293
|
+
assert_failure(status, "flat inside map_values")
|
|
294
|
+
assert_includes(stderr, "flat is not supported inside map_values")
|
|
295
|
+
|
|
177
296
|
stdout, stderr, status = run_jrf('_["foo"] >> flat', input)
|
|
178
297
|
assert_failure(status, "flat requires array")
|
|
179
298
|
assert_includes(stderr, "flat expects Array")
|
|
@@ -485,6 +604,26 @@ stdout, stderr, status = run_jrf('_["foo"]', input_lax_trailing_rs, "--lax")
|
|
|
485
604
|
assert_success(status, stderr, "lax ignores trailing separator")
|
|
486
605
|
assert_equal(%w[9], lines(stdout), "lax trailing separator output")
|
|
487
606
|
|
|
607
|
+
chunked_lax_out = RecordingRunner.new(
|
|
608
|
+
inputs: [ChunkedSource.new("{\"foo\":1}\n\x1e{\"foo\":2}\n\t{\"foo\":3}\n")],
|
|
609
|
+
out: StringIO.new,
|
|
610
|
+
err: StringIO.new,
|
|
611
|
+
lax: true
|
|
612
|
+
)
|
|
613
|
+
chunked_lax_out.run('_["foo"]')
|
|
614
|
+
assert_equal(%w[1 2 3], lines(chunked_lax_out.writes.join), "lax mode streams chunked input without whole-input reads")
|
|
615
|
+
|
|
616
|
+
Dir.mktmpdir do |dir|
|
|
617
|
+
one = File.join(dir, "one.json")
|
|
618
|
+
two = File.join(dir, "two.json")
|
|
619
|
+
File.write(one, "1")
|
|
620
|
+
File.write(two, "2")
|
|
621
|
+
|
|
622
|
+
stdout, stderr, status = Open3.capture3("./exe/jrf", "--lax", "_", one, two)
|
|
623
|
+
assert_success(status, stderr, "lax keeps file boundaries")
|
|
624
|
+
assert_equal(%w[1 2], lines(stdout), "lax does not merge JSON across file boundaries")
|
|
625
|
+
end
|
|
626
|
+
|
|
488
627
|
stdout, stderr, status = run_jrf('select(_["x"] > ) >> _["foo"]', "")
|
|
489
628
|
assert_failure(status, "syntax error should fail before row loop")
|
|
490
629
|
assert_includes(stderr, "syntax error")
|
|
@@ -540,6 +679,10 @@ stdout, stderr, status = run_jrf('_["values"] >> map { |x| sum(_[0] + x) }', inp
|
|
|
540
679
|
assert_success(status, stderr, "map keeps ambient _")
|
|
541
680
|
assert_equal(['[12,66,606]'], lines(stdout), "map ambient _ output")
|
|
542
681
|
|
|
682
|
+
stdout, stderr, status = run_jrf('_["values"] >> map { |x| reduce(0) { |acc, v| acc + v } }', input_map)
|
|
683
|
+
assert_success(status, stderr, "map with reduce")
|
|
684
|
+
assert_equal(['[6,60,600]'], lines(stdout), "map with reduce output")
|
|
685
|
+
|
|
543
686
|
input_map_varying = <<~NDJSON
|
|
544
687
|
[1,10]
|
|
545
688
|
[2,20,200]
|
|
@@ -550,6 +693,20 @@ stdout, stderr, status = run_jrf('map { |x| sum(x) }', input_map_varying)
|
|
|
550
693
|
assert_success(status, stderr, "map varying lengths")
|
|
551
694
|
assert_equal(['[6,30,200]'], lines(stdout), "map varying lengths output")
|
|
552
695
|
|
|
696
|
+
input_map_unsorted = <<~NDJSON
|
|
697
|
+
{"values":[3,30]}
|
|
698
|
+
{"values":[1,10]}
|
|
699
|
+
{"values":[2,20]}
|
|
700
|
+
NDJSON
|
|
701
|
+
|
|
702
|
+
stdout, stderr, status = run_jrf('_["values"] >> map { |x| group }', input_map)
|
|
703
|
+
assert_success(status, stderr, "map with group")
|
|
704
|
+
assert_equal(['[[1,2,3],[10,20,30],[100,200,300]]'], lines(stdout), "map with group output")
|
|
705
|
+
|
|
706
|
+
stdout, stderr, status = run_jrf('_["values"] >> map { |x| sort }', input_map_unsorted)
|
|
707
|
+
assert_success(status, stderr, "map with sort default key")
|
|
708
|
+
assert_equal(['[[1,2,3],[10,20,30]]'], lines(stdout), "map with sort default key output")
|
|
709
|
+
|
|
553
710
|
input_map_values = <<~NDJSON
|
|
554
711
|
{"a":1,"b":10}
|
|
555
712
|
{"a":2,"b":20}
|
|
@@ -578,10 +735,18 @@ stdout, stderr, status = run_jrf('map_values { |v| count(v) }', input_map_values
|
|
|
578
735
|
assert_success(status, stderr, "map_values with count")
|
|
579
736
|
assert_equal(['{"a":3,"b":3}'], lines(stdout), "map_values with count output")
|
|
580
737
|
|
|
738
|
+
stdout, stderr, status = run_jrf('map_values { |v| group }', input_map_values)
|
|
739
|
+
assert_success(status, stderr, "map_values with group")
|
|
740
|
+
assert_equal(['{"a":[1,2,3],"b":[10,20,30]}'], lines(stdout), "map_values with group output")
|
|
741
|
+
|
|
581
742
|
stdout, stderr, status = run_jrf('map_values { |v| sum(_["a"] + v) }', input_map_values)
|
|
582
743
|
assert_success(status, stderr, "map_values keeps ambient _")
|
|
583
744
|
assert_equal(['{"a":12,"b":66}'], lines(stdout), "map_values ambient _ output")
|
|
584
745
|
|
|
746
|
+
stdout, stderr, status = run_jrf('map_values { |v| reduce(0) { |acc, x| acc + x } }', input_map_values)
|
|
747
|
+
assert_success(status, stderr, "map_values with reduce")
|
|
748
|
+
assert_equal(['{"a":6,"b":60}'], lines(stdout), "map_values with reduce output")
|
|
749
|
+
|
|
585
750
|
stdout, stderr, status = run_jrf('select(false) >> map { |x| sum(x) }', input_map)
|
|
586
751
|
assert_success(status, stderr, "map no matches")
|
|
587
752
|
assert_equal([], lines(stdout), "map no matches output")
|
|
@@ -599,10 +764,18 @@ stdout, stderr, status = run_jrf('_["values"] >> map { |x| x + 1 }', input_map)
|
|
|
599
764
|
assert_success(status, stderr, "map transform")
|
|
600
765
|
assert_equal(['[2,11,101]', '[3,21,201]', '[4,31,301]'], lines(stdout), "map transform output")
|
|
601
766
|
|
|
767
|
+
stdout, stderr, status = run_jrf('_["values"] >> map { |x| select(x >= 20) }', input_map)
|
|
768
|
+
assert_success(status, stderr, "map transform with select")
|
|
769
|
+
assert_equal(['[100]', '[20,200]', '[30,300]'], lines(stdout), "map transform with select output")
|
|
770
|
+
|
|
602
771
|
stdout, stderr, status = run_jrf('map_values { |v| v * 2 }', input_map_values)
|
|
603
772
|
assert_success(status, stderr, "map_values transform")
|
|
604
773
|
assert_equal(['{"a":2,"b":20}', '{"a":4,"b":40}', '{"a":6,"b":60}'], lines(stdout), "map_values transform output")
|
|
605
774
|
|
|
775
|
+
stdout, stderr, status = run_jrf('map_values { |v| select(v >= 10) }', input_map_values)
|
|
776
|
+
assert_success(status, stderr, "map_values transform with select")
|
|
777
|
+
assert_equal(['{"b":10}', '{"b":20}', '{"b":30}'], lines(stdout), "map_values transform with select output")
|
|
778
|
+
|
|
606
779
|
stdout, stderr, status = run_jrf('_["values"] >> map { |x| x + 1 } >> map { |x| x * 10 }', input_map)
|
|
607
780
|
assert_success(status, stderr, "chained map transforms")
|
|
608
781
|
assert_equal(['[20,110,1010]', '[30,210,2010]', '[40,310,3010]'], lines(stdout), "chained map transforms output")
|
|
@@ -639,6 +812,12 @@ stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| group(row["path"
|
|
|
639
812
|
assert_success(status, stderr, "group_by with group(expr)")
|
|
640
813
|
assert_equal(['{"200":["/a","/c","/d"],"404":["/b"]}'], lines(stdout), "group_by with group(expr) output")
|
|
641
814
|
|
|
815
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { group }', input_gb)
|
|
816
|
+
assert_success(status, stderr, "group_by with implicit group")
|
|
817
|
+
result = JSON.parse(lines(stdout).first)
|
|
818
|
+
assert_equal(3, result["200"].length, "group_by implicit group 200 count")
|
|
819
|
+
assert_equal("/a", result["200"][0]["path"], "group_by implicit group first row")
|
|
820
|
+
|
|
642
821
|
stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| min(row["latency"]) }', input_gb)
|
|
643
822
|
assert_success(status, stderr, "group_by with min")
|
|
644
823
|
assert_equal(['{"200":10,"404":50}'], lines(stdout), "group_by with min output")
|
|
@@ -647,6 +826,10 @@ stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| {total: sum(row[
|
|
|
647
826
|
assert_success(status, stderr, "group_by with multi-reducer")
|
|
648
827
|
assert_equal(['{"200":{"total":60,"n":3},"404":{"total":50,"n":1}}'], lines(stdout), "group_by multi-reducer output")
|
|
649
828
|
|
|
829
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { reduce(0) { |acc, row| acc + row["latency"] } }', input_gb)
|
|
830
|
+
assert_success(status, stderr, "group_by with reduce")
|
|
831
|
+
assert_equal(['{"200":60,"404":50}'], lines(stdout), "group_by with reduce output")
|
|
832
|
+
|
|
650
833
|
stdout, stderr, status = run_jrf('select(false) >> group_by(_["status"]) { count() }', input_gb)
|
|
651
834
|
assert_success(status, stderr, "group_by no matches")
|
|
652
835
|
assert_equal([], lines(stdout), "group_by no matches output")
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: jrf
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- kazuho
|
|
@@ -40,12 +40,12 @@ files:
|
|
|
40
40
|
- jrf.gemspec
|
|
41
41
|
- lib/jrf.rb
|
|
42
42
|
- lib/jrf/cli.rb
|
|
43
|
+
- lib/jrf/cli/runner.rb
|
|
43
44
|
- lib/jrf/control.rb
|
|
44
45
|
- lib/jrf/pipeline.rb
|
|
45
46
|
- lib/jrf/pipeline_parser.rb
|
|
46
47
|
- lib/jrf/reducers.rb
|
|
47
48
|
- lib/jrf/row_context.rb
|
|
48
|
-
- lib/jrf/runner.rb
|
|
49
49
|
- lib/jrf/stage.rb
|
|
50
50
|
- lib/jrf/version.rb
|
|
51
51
|
- test/jrf_test.rb
|
data/lib/jrf/runner.rb
DELETED
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "json"
|
|
4
|
-
require_relative "pipeline"
|
|
5
|
-
require_relative "pipeline_parser"
|
|
6
|
-
|
|
7
|
-
module Jrf
|
|
8
|
-
class Runner
|
|
9
|
-
RS_CHAR = "\x1e"
|
|
10
|
-
|
|
11
|
-
def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false, pretty: false)
|
|
12
|
-
@input = input
|
|
13
|
-
@out = out
|
|
14
|
-
@err = err
|
|
15
|
-
@lax = lax
|
|
16
|
-
@pretty = pretty
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
def run(expression, verbose: false)
|
|
20
|
-
parsed = PipelineParser.new(expression).parse
|
|
21
|
-
stages = parsed[:stages]
|
|
22
|
-
dump_stages(stages) if verbose
|
|
23
|
-
|
|
24
|
-
blocks = stages.map { |stage|
|
|
25
|
-
eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
|
|
26
|
-
}
|
|
27
|
-
pipeline = Pipeline.new(*blocks)
|
|
28
|
-
|
|
29
|
-
input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
|
|
30
|
-
pipeline.call(input_enum) do |value|
|
|
31
|
-
@out.puts(@pretty ? JSON.pretty_generate(value) : JSON.generate(value))
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
private
|
|
36
|
-
|
|
37
|
-
def each_input_value
|
|
38
|
-
return each_input_value_lax { |value| yield value } if @lax
|
|
39
|
-
|
|
40
|
-
each_input_value_ndjson { |value| yield value }
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
def each_input_value_ndjson
|
|
44
|
-
@input.each_line do |raw_line|
|
|
45
|
-
line = raw_line.strip
|
|
46
|
-
next if line.empty?
|
|
47
|
-
|
|
48
|
-
yield JSON.parse(line)
|
|
49
|
-
end
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
def each_input_value_lax
|
|
53
|
-
require "oj"
|
|
54
|
-
source = @input.read.to_s
|
|
55
|
-
source = source.include?(RS_CHAR) ? source.tr(RS_CHAR, "\n") : source
|
|
56
|
-
handler = Class.new(Oj::ScHandler) do
|
|
57
|
-
def initialize(&emit)
|
|
58
|
-
@emit = emit
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
def hash_start = {}
|
|
62
|
-
def hash_key(key) = key
|
|
63
|
-
def hash_set(hash, key, value) = hash[key] = value
|
|
64
|
-
def array_start = []
|
|
65
|
-
def array_append(array, value) = array << value
|
|
66
|
-
def add_value(value) = @emit.call(value)
|
|
67
|
-
end.new { |value| yield value }
|
|
68
|
-
Oj.sc_parse(handler, source)
|
|
69
|
-
rescue LoadError
|
|
70
|
-
raise "oj is required for --lax mode (gem install oj)"
|
|
71
|
-
rescue Oj::ParseError => e
|
|
72
|
-
raise JSON::ParserError, e.message
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
def dump_stages(stages)
|
|
76
|
-
stages.each_with_index do |stage, i|
|
|
77
|
-
@err.puts "stage[#{i}]: #{stage[:src]}"
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
end
|
|
81
|
-
end
|