jrf 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +0 -5
- data/exe/jrf +6 -0
- data/jrf.gemspec +1 -0
- data/lib/jrf/cli.rb +15 -6
- data/lib/jrf/pipeline.rb +85 -0
- data/lib/jrf/pipeline_parser.rb +1 -41
- data/lib/jrf/row_context.rb +44 -39
- data/lib/jrf/runner.rb +41 -139
- data/lib/jrf/stage.rb +184 -0
- data/lib/jrf/version.rb +1 -1
- data/lib/jrf.rb +18 -0
- data/test/jrf_test.rb +397 -18
- metadata +18 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9ce648c2afbfe10dc161b08badb05acdb411baf839dde77433927380b6bb7439
|
|
4
|
+
data.tar.gz: 6be5a0851eecd3cfcbe93aff1cb8fdd163a84dd96a7b12e440fc514db03f67a0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: aa4dfead95dbe09453ec720cdbcf77ba4c7e3f1047c60f51d4ff54724dfa540bb1dbd5630ecb07d09d745e1e61e4c236f50f4407ff6d4c17dd5431b385679f57
|
|
7
|
+
data.tar.gz: 03c3f5dd3f36675a2bc31981effc506bb1822bb170e754785ccffe077becdd5af13421b4cbfd18fea1c1262f06feef61561be3e3243ca0379e1e6af21ad003c5
|
data/Rakefile
CHANGED
data/exe/jrf
CHANGED
data/jrf.gemspec
CHANGED
data/lib/jrf/cli.rb
CHANGED
|
@@ -4,15 +4,17 @@ require_relative "runner"
|
|
|
4
4
|
|
|
5
5
|
module Jrf
|
|
6
6
|
class CLI
|
|
7
|
-
USAGE = "usage: jrf [-v] [--help] 'STAGE >> STAGE >> ...'"
|
|
7
|
+
USAGE = "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'"
|
|
8
8
|
|
|
9
9
|
HELP_TEXT = <<~'TEXT'
|
|
10
|
-
usage: jrf [-v] [--help] 'STAGE >> STAGE >> ...'
|
|
10
|
+
usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'
|
|
11
11
|
|
|
12
12
|
JSON filter with the power and speed of Ruby.
|
|
13
13
|
|
|
14
14
|
Options:
|
|
15
|
-
-v, --verbose print
|
|
15
|
+
-v, --verbose print parsed stage expressions
|
|
16
|
+
--lax allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
|
|
17
|
+
-p, --pretty pretty-print JSON output instead of compact NDJSON
|
|
16
18
|
-h, --help show this help and exit
|
|
17
19
|
|
|
18
20
|
Pipeline:
|
|
@@ -27,18 +29,25 @@ module Jrf
|
|
|
27
29
|
jrf '_["msg"] >> reduce(nil) { |acc, v| acc ? "#{acc} #{v}" : v }'
|
|
28
30
|
|
|
29
31
|
See Also:
|
|
30
|
-
|
|
31
|
-
man jrf
|
|
32
|
+
https://github.com/kazuho/jrf#readme
|
|
32
33
|
TEXT
|
|
33
34
|
|
|
34
35
|
def self.run(argv = ARGV, input: ARGF, out: $stdout, err: $stderr)
|
|
35
36
|
verbose = false
|
|
37
|
+
lax = false
|
|
38
|
+
pretty = false
|
|
36
39
|
|
|
37
40
|
while argv.first&.start_with?("-")
|
|
38
41
|
case argv.first
|
|
39
42
|
when "-v", "--verbose"
|
|
40
43
|
verbose = true
|
|
41
44
|
argv.shift
|
|
45
|
+
when "--lax"
|
|
46
|
+
lax = true
|
|
47
|
+
argv.shift
|
|
48
|
+
when "-p", "--pretty"
|
|
49
|
+
pretty = true
|
|
50
|
+
argv.shift
|
|
42
51
|
when "-h", "--help"
|
|
43
52
|
out.puts HELP_TEXT
|
|
44
53
|
return 0
|
|
@@ -55,7 +64,7 @@ module Jrf
|
|
|
55
64
|
end
|
|
56
65
|
|
|
57
66
|
expression = argv.shift
|
|
58
|
-
Runner.new(input: input, out: out, err: err).run(expression, verbose: verbose)
|
|
67
|
+
Runner.new(input: input, out: out, err: err, lax: lax, pretty: pretty).run(expression, verbose: verbose)
|
|
59
68
|
0
|
|
60
69
|
end
|
|
61
70
|
end
|
data/lib/jrf/pipeline.rb
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "control"
|
|
4
|
+
require_relative "row_context"
|
|
5
|
+
require_relative "stage"
|
|
6
|
+
|
|
7
|
+
module Jrf
|
|
8
|
+
class Pipeline
|
|
9
|
+
def initialize(*blocks)
|
|
10
|
+
raise ArgumentError, "at least one stage block is required" if blocks.empty?
|
|
11
|
+
|
|
12
|
+
@ctx = RowContext.new
|
|
13
|
+
@stages = blocks.map { |block| Stage.new(@ctx, block, src: nil) }
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Run the pipeline on an enumerable of input values.
|
|
17
|
+
#
|
|
18
|
+
# Without a block, returns an Array of output values.
|
|
19
|
+
# With a block, streams each output value to the block.
|
|
20
|
+
#
|
|
21
|
+
# @param input [Enumerable] input values to process
|
|
22
|
+
# @yieldparam value output value
|
|
23
|
+
# @return [Array, nil] output values (without block), or nil (with block)
|
|
24
|
+
def call(input, &on_output)
|
|
25
|
+
if on_output
|
|
26
|
+
call_streaming(input, &on_output)
|
|
27
|
+
else
|
|
28
|
+
results = []
|
|
29
|
+
call_streaming(input) { |v| results << v }
|
|
30
|
+
results
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def call_streaming(input, &on_output)
|
|
37
|
+
error = nil
|
|
38
|
+
begin
|
|
39
|
+
input.each { |value| process_value(value, @stages, &on_output) }
|
|
40
|
+
rescue StandardError => e
|
|
41
|
+
error = e
|
|
42
|
+
ensure
|
|
43
|
+
flush_reducers(@stages, &on_output)
|
|
44
|
+
end
|
|
45
|
+
raise error if error
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def process_value(input, stages, &on_output)
|
|
49
|
+
current_values = [input]
|
|
50
|
+
|
|
51
|
+
stages.each do |stage|
|
|
52
|
+
next_values = []
|
|
53
|
+
|
|
54
|
+
current_values.each do |value|
|
|
55
|
+
out = stage.call(value)
|
|
56
|
+
if out.equal?(Control::DROPPED)
|
|
57
|
+
next
|
|
58
|
+
elsif out.is_a?(Control::Flat)
|
|
59
|
+
unless out.value.is_a?(Array)
|
|
60
|
+
raise TypeError, "flat expects Array, got #{out.value.class}"
|
|
61
|
+
end
|
|
62
|
+
next_values.concat(out.value)
|
|
63
|
+
else
|
|
64
|
+
next_values << out
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
return if next_values.empty?
|
|
69
|
+
current_values = next_values
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
current_values.each(&on_output)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def flush_reducers(stages, &on_output)
|
|
76
|
+
stages.each_with_index do |stage, idx|
|
|
77
|
+
rows = stage.finish
|
|
78
|
+
next if rows.empty?
|
|
79
|
+
|
|
80
|
+
rest = stages.drop(idx + 1)
|
|
81
|
+
rows.each { |value| process_value(value, rest, &on_output) }
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
data/lib/jrf/pipeline_parser.rb
CHANGED
|
@@ -9,51 +9,11 @@ module Jrf
|
|
|
9
9
|
def parse
|
|
10
10
|
stages = split_top_level_pipeline(@source).map(&:strip).reject(&:empty?)
|
|
11
11
|
raise ArgumentError, "empty expression" if stages.empty?
|
|
12
|
-
{ stages: stages.map { |stage|
|
|
12
|
+
{ stages: stages.map { |stage| { src: stage } } }
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
private
|
|
16
16
|
|
|
17
|
-
def parse_stage!(stage)
|
|
18
|
-
if select_stage?(stage)
|
|
19
|
-
{
|
|
20
|
-
kind: :select,
|
|
21
|
-
original: stage,
|
|
22
|
-
src: "(#{parse_select!(stage)}) ? _ : ::Jrf::Control::DROPPED"
|
|
23
|
-
}
|
|
24
|
-
else
|
|
25
|
-
reject_unsupported_stage!(stage)
|
|
26
|
-
{
|
|
27
|
-
kind: :extract,
|
|
28
|
-
original: stage,
|
|
29
|
-
src: validate_extract!(stage)
|
|
30
|
-
}
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
def validate_extract!(stage)
|
|
35
|
-
reject_unsupported_stage!(stage)
|
|
36
|
-
stage
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
def parse_select!(stage)
|
|
40
|
-
reject_unsupported_stage!(stage)
|
|
41
|
-
match = /\Aselect\s*\((.*)\)\s*\z/m.match(stage)
|
|
42
|
-
raise ArgumentError, "first stage must be select(...)" unless match
|
|
43
|
-
|
|
44
|
-
inner = match[1].strip
|
|
45
|
-
raise ArgumentError, "select(...) must contain an expression" if inner.empty?
|
|
46
|
-
|
|
47
|
-
inner
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
def select_stage?(stage)
|
|
51
|
-
/\Aselect\s*\(/.match?(stage)
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
def reject_unsupported_stage!(stage)
|
|
55
|
-
end
|
|
56
|
-
|
|
57
17
|
def split_top_level_pipeline(source)
|
|
58
18
|
parts = []
|
|
59
19
|
start_idx = 0
|
data/lib/jrf/row_context.rb
CHANGED
|
@@ -1,21 +1,22 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
require_relative "control"
|
|
3
4
|
require_relative "reducers"
|
|
4
5
|
|
|
5
6
|
module Jrf
|
|
6
7
|
class RowContext
|
|
7
8
|
MISSING = Object.new
|
|
8
|
-
|
|
9
|
+
|
|
10
|
+
attr_writer :__jrf_current_stage
|
|
9
11
|
|
|
10
12
|
class << self
|
|
11
13
|
def define_reducer(name, &definition)
|
|
12
14
|
define_method(name) do |*args, **kwargs, &block|
|
|
13
15
|
spec = definition.call(self, *args, **kwargs, block: block)
|
|
14
|
-
|
|
16
|
+
@__jrf_current_stage.allocate_reducer(
|
|
15
17
|
spec.fetch(:value),
|
|
16
18
|
initial: reducer_initial_value(spec.fetch(:initial)),
|
|
17
19
|
finish: spec[:finish],
|
|
18
|
-
emit_many: spec.fetch(:emit_many, false),
|
|
19
20
|
&spec.fetch(:step)
|
|
20
21
|
)
|
|
21
22
|
end
|
|
@@ -24,7 +25,7 @@ module Jrf
|
|
|
24
25
|
|
|
25
26
|
def initialize(obj = nil)
|
|
26
27
|
@obj = obj
|
|
27
|
-
@
|
|
28
|
+
@__jrf_current_stage = nil
|
|
28
29
|
end
|
|
29
30
|
|
|
30
31
|
def reset(obj)
|
|
@@ -40,24 +41,38 @@ module Jrf
|
|
|
40
41
|
Control::Flat.new(@obj)
|
|
41
42
|
end
|
|
42
43
|
|
|
44
|
+
def select(predicate)
|
|
45
|
+
predicate ? @obj : Control::DROPPED
|
|
46
|
+
end
|
|
47
|
+
|
|
43
48
|
define_reducer(:sum) do |_ctx, value, initial: 0, block: nil|
|
|
44
|
-
{ value: value, initial: initial, step: ->(acc, v) { acc + v } }
|
|
49
|
+
{ value: value, initial: initial, step: ->(acc, v) { v.nil? ? acc : (acc + v) } }
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
define_reducer(:count) do |_ctx, value = MISSING, block: nil|
|
|
53
|
+
if value.equal?(MISSING)
|
|
54
|
+
{ value: nil, initial: 0, step: ->(acc, _v) { acc + 1 } }
|
|
55
|
+
else
|
|
56
|
+
{ value: value, initial: 0, step: ->(acc, v) { v.nil? ? acc : (acc + 1) } }
|
|
57
|
+
end
|
|
45
58
|
end
|
|
46
59
|
|
|
47
60
|
define_reducer(:min) do |_ctx, value, block: nil|
|
|
48
|
-
{ value: value, initial: nil, step: ->(acc, v) { acc.nil? || v < acc ? v : acc } }
|
|
61
|
+
{ value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v < acc ? v : acc) } }
|
|
49
62
|
end
|
|
50
63
|
|
|
51
64
|
define_reducer(:max) do |_ctx, value, block: nil|
|
|
52
|
-
{ value: value, initial: nil, step: ->(acc, v) { acc.nil? || v > acc ? v : acc } }
|
|
65
|
+
{ value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v > acc ? v : acc) } }
|
|
53
66
|
end
|
|
54
67
|
|
|
55
68
|
define_reducer(:average) do |_ctx, value, block: nil|
|
|
56
69
|
{
|
|
57
70
|
value: value,
|
|
58
71
|
initial: -> { [0.0, 0] },
|
|
59
|
-
finish: ->((sum, count)) { count.zero? ? nil : (sum / count) },
|
|
72
|
+
finish: ->((sum, count)) { [count.zero? ? nil : (sum / count)] },
|
|
60
73
|
step: ->(acc, v) {
|
|
74
|
+
return acc if v.nil?
|
|
75
|
+
|
|
61
76
|
acc[0] += v
|
|
62
77
|
acc[1] += 1
|
|
63
78
|
acc
|
|
@@ -70,13 +85,15 @@ module Jrf
|
|
|
70
85
|
value: value,
|
|
71
86
|
initial: [0, 0.0, 0.0],
|
|
72
87
|
finish: ->((count, mean, m2)) {
|
|
73
|
-
return nil if count.zero?
|
|
74
|
-
return nil if sample && count < 2
|
|
88
|
+
return [nil] if count.zero?
|
|
89
|
+
return [nil] if sample && count < 2
|
|
75
90
|
|
|
76
91
|
denom = sample ? (count - 1) : count
|
|
77
|
-
Math.sqrt(m2 / denom)
|
|
92
|
+
[Math.sqrt(m2 / denom)]
|
|
78
93
|
},
|
|
79
94
|
step: ->(acc, x) {
|
|
95
|
+
return acc if x.nil?
|
|
96
|
+
|
|
80
97
|
count, mean, m2 = acc
|
|
81
98
|
count += 1
|
|
82
99
|
delta = x - mean
|
|
@@ -96,7 +113,6 @@ module Jrf
|
|
|
96
113
|
{
|
|
97
114
|
value: ctx._,
|
|
98
115
|
initial: -> { [] },
|
|
99
|
-
emit_many: true,
|
|
100
116
|
finish: ->(rows) { rows.sort(&block) },
|
|
101
117
|
step: ->(rows, row) { rows << row }
|
|
102
118
|
}
|
|
@@ -105,7 +121,6 @@ module Jrf
|
|
|
105
121
|
{
|
|
106
122
|
value: [resolved_key, ctx._],
|
|
107
123
|
initial: -> { [] },
|
|
108
|
-
emit_many: true,
|
|
109
124
|
finish: ->(pairs) { pairs.sort_by(&:first).map(&:last) },
|
|
110
125
|
step: ->(pairs, pair) { pairs << pair }
|
|
111
126
|
}
|
|
@@ -124,57 +139,47 @@ module Jrf
|
|
|
124
139
|
|
|
125
140
|
finish =
|
|
126
141
|
if scalar
|
|
127
|
-
->(values) { ctx.send(:percentile_value, values.sort, percentages.first) }
|
|
142
|
+
->(values) { [ctx.send(:percentile_value, values.sort, percentages.first)] }
|
|
128
143
|
else
|
|
129
144
|
->(values) {
|
|
130
145
|
sorted = values.sort
|
|
131
|
-
percentages.map
|
|
132
|
-
{ "percentile" => p, "value" => ctx.send(:percentile_value, sorted, p) }
|
|
133
|
-
end
|
|
146
|
+
[percentages.map { |p| ctx.send(:percentile_value, sorted, p) }]
|
|
134
147
|
}
|
|
135
148
|
end
|
|
136
149
|
|
|
137
150
|
{
|
|
138
151
|
value: value,
|
|
139
152
|
initial: -> { [] },
|
|
140
|
-
emit_many: !scalar,
|
|
141
153
|
finish: finish,
|
|
142
|
-
step: ->(acc, v) { acc << v }
|
|
154
|
+
step: ->(acc, v) { v.nil? ? acc : (acc << v) }
|
|
143
155
|
}
|
|
144
156
|
end
|
|
145
157
|
|
|
146
158
|
def reduce(initial, &block)
|
|
147
159
|
raise ArgumentError, "reduce requires a block" unless block
|
|
148
160
|
|
|
149
|
-
|
|
161
|
+
@__jrf_current_stage.allocate_reducer(@obj, initial: initial, &block)
|
|
150
162
|
end
|
|
151
163
|
|
|
152
|
-
def
|
|
153
|
-
|
|
154
|
-
stage[:reducer_cursor] = 0
|
|
155
|
-
stage[:reducer_called] = false
|
|
156
|
-
stage[:reducer_probing] = probing
|
|
157
|
-
end
|
|
164
|
+
def map(&block)
|
|
165
|
+
raise ArgumentError, "map requires a block" unless block
|
|
158
166
|
|
|
159
|
-
|
|
160
|
-
@__jrf_stage && @__jrf_stage[:reducer_called]
|
|
167
|
+
@__jrf_current_stage.allocate_map(:array, @obj, &block)
|
|
161
168
|
end
|
|
162
169
|
|
|
163
|
-
|
|
170
|
+
def map_values(&block)
|
|
171
|
+
raise ArgumentError, "map_values requires a block" unless block
|
|
164
172
|
|
|
165
|
-
|
|
166
|
-
|
|
173
|
+
@__jrf_current_stage.allocate_map(:hash, @obj, &block)
|
|
174
|
+
end
|
|
167
175
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
reducers[idx].step(value) unless @__jrf_stage[:reducer_probing]
|
|
172
|
-
@__jrf_stage[:reducer_cursor] = idx + 1
|
|
173
|
-
@__jrf_stage[:reducer_called] = true
|
|
174
|
-
@__jrf_stage[:reducer_emit_many] = emit_many if @__jrf_stage[:reducer_emit_many].nil?
|
|
175
|
-
ReducerToken.new(idx)
|
|
176
|
+
def group_by(key, &block)
|
|
177
|
+
block ||= proc { group }
|
|
178
|
+
@__jrf_current_stage.allocate_group_by(key, &block)
|
|
176
179
|
end
|
|
177
180
|
|
|
181
|
+
private
|
|
182
|
+
|
|
178
183
|
def reducer_initial_value(initial)
|
|
179
184
|
return initial.call if initial.respond_to?(:call)
|
|
180
185
|
return initial.dup if initial.is_a?(Array) || initial.is_a?(Hash)
|
data/lib/jrf/runner.rb
CHANGED
|
@@ -1,33 +1,19 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "json"
|
|
4
|
-
require_relative "
|
|
4
|
+
require_relative "pipeline"
|
|
5
5
|
require_relative "pipeline_parser"
|
|
6
|
-
require_relative "reducers"
|
|
7
|
-
require_relative "row_context"
|
|
8
6
|
|
|
9
7
|
module Jrf
|
|
10
8
|
class Runner
|
|
11
|
-
|
|
12
|
-
def [](key)
|
|
13
|
-
self
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
def method_missing(name, *args, &block)
|
|
17
|
-
self
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
def respond_to_missing?(name, include_private = false)
|
|
21
|
-
true
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
PROBE_VALUE = ProbeValue.new
|
|
9
|
+
RS_CHAR = "\x1e"
|
|
26
10
|
|
|
27
|
-
def initialize(input: ARGF, out: $stdout, err: $stderr)
|
|
11
|
+
def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false, pretty: false)
|
|
28
12
|
@input = input
|
|
29
13
|
@out = out
|
|
30
14
|
@err = err
|
|
15
|
+
@lax = lax
|
|
16
|
+
@pretty = pretty
|
|
31
17
|
end
|
|
32
18
|
|
|
33
19
|
def run(expression, verbose: false)
|
|
@@ -35,144 +21,60 @@ module Jrf
|
|
|
35
21
|
stages = parsed[:stages]
|
|
36
22
|
dump_stages(stages) if verbose
|
|
37
23
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
begin
|
|
44
|
-
@input.each_line do |line|
|
|
45
|
-
line = line.strip
|
|
46
|
-
next if line.empty?
|
|
24
|
+
blocks = stages.map { |stage|
|
|
25
|
+
eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
|
|
26
|
+
}
|
|
27
|
+
pipeline = Pipeline.new(*blocks)
|
|
47
28
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
error = e
|
|
52
|
-
ensure
|
|
53
|
-
flush_reducers(compiled, ctx)
|
|
29
|
+
input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
|
|
30
|
+
pipeline.call(input_enum) do |value|
|
|
31
|
+
@out.puts(@pretty ? JSON.pretty_generate(value) : JSON.generate(value))
|
|
54
32
|
end
|
|
55
|
-
|
|
56
|
-
raise error if error
|
|
57
33
|
end
|
|
58
34
|
|
|
59
35
|
private
|
|
60
36
|
|
|
61
|
-
def
|
|
62
|
-
|
|
37
|
+
def each_input_value
|
|
38
|
+
return each_input_value_lax { |value| yield value } if @lax
|
|
63
39
|
|
|
64
|
-
|
|
65
|
-
next_values = []
|
|
66
|
-
|
|
67
|
-
current_values.each do |value|
|
|
68
|
-
out = apply_stage(stage, value, ctx)
|
|
69
|
-
if out.equal?(Control::DROPPED)
|
|
70
|
-
next
|
|
71
|
-
elsif flat_event?(out)
|
|
72
|
-
unless out.value.is_a?(Array)
|
|
73
|
-
raise TypeError, "flat expects Array, got #{out.value.class}"
|
|
74
|
-
end
|
|
75
|
-
next_values.concat(out.value)
|
|
76
|
-
else
|
|
77
|
-
next_values << out
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
return if next_values.empty?
|
|
82
|
-
current_values = next_values
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
current_values.each { |value| @out.puts JSON.generate(value) }
|
|
40
|
+
each_input_value_ndjson { |value| yield value }
|
|
86
41
|
end
|
|
87
42
|
|
|
88
|
-
def
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
elsif ctx.__jrf_reducer_called?
|
|
93
|
-
stage[:reducer_template] ||= value
|
|
94
|
-
Control::DROPPED
|
|
95
|
-
else
|
|
96
|
-
value
|
|
97
|
-
end
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
def eval_stage(stage, input, ctx)
|
|
101
|
-
ctx.reset(input)
|
|
102
|
-
ctx.__jrf_begin_stage__(stage, probing: input.equal?(PROBE_VALUE))
|
|
103
|
-
ctx.public_send(stage[:method_name])
|
|
104
|
-
end
|
|
43
|
+
def each_input_value_ndjson
|
|
44
|
+
@input.each_line do |raw_line|
|
|
45
|
+
line = raw_line.strip
|
|
46
|
+
next if line.empty?
|
|
105
47
|
|
|
106
|
-
|
|
107
|
-
value.is_a?(Control::Flat)
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
def flush_reducers(stages, ctx)
|
|
111
|
-
tail = stages
|
|
112
|
-
loop do
|
|
113
|
-
tail = tail.drop_while { |stage| !reducer_stage?(stage) }
|
|
114
|
-
break if tail.empty?
|
|
115
|
-
|
|
116
|
-
stage = tail.first
|
|
117
|
-
reducers = stage[:reducers]
|
|
118
|
-
break unless reducers&.any?
|
|
119
|
-
|
|
120
|
-
out = finish_reducer_template(stage[:reducer_template], reducers)
|
|
121
|
-
if stage[:reducer_emit_many]
|
|
122
|
-
out.each { |value| process_value(value, tail.drop(1), ctx) }
|
|
123
|
-
else
|
|
124
|
-
process_value(out, tail.drop(1), ctx)
|
|
125
|
-
end
|
|
126
|
-
tail = tail.drop(1)
|
|
48
|
+
yield JSON.parse(line)
|
|
127
49
|
end
|
|
128
50
|
end
|
|
129
51
|
|
|
130
|
-
def
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
end
|
|
52
|
+
def each_input_value_lax
|
|
53
|
+
require "oj"
|
|
54
|
+
source = @input.read.to_s
|
|
55
|
+
source = source.include?(RS_CHAR) ? source.tr(RS_CHAR, "\n") : source
|
|
56
|
+
handler = Class.new(Oj::ScHandler) do
|
|
57
|
+
def initialize(&emit)
|
|
58
|
+
@emit = emit
|
|
59
|
+
end
|
|
139
60
|
|
|
140
|
-
|
|
141
|
-
|
|
61
|
+
def hash_start = {}
|
|
62
|
+
def hash_key(key) = key
|
|
63
|
+
def hash_set(hash, key, value) = hash[key] = value
|
|
64
|
+
def array_start = []
|
|
65
|
+
def array_append(array, value) = array << value
|
|
66
|
+
def add_value(value) = @emit.call(value)
|
|
67
|
+
end.new { |value| yield value }
|
|
68
|
+
Oj.sc_parse(handler, source)
|
|
69
|
+
rescue LoadError
|
|
70
|
+
raise "oj is required for --lax mode (gem install oj)"
|
|
71
|
+
rescue Oj::ParseError => e
|
|
72
|
+
raise JSON::ParserError, e.message
|
|
142
73
|
end
|
|
143
74
|
|
|
144
75
|
def dump_stages(stages)
|
|
145
76
|
stages.each_with_index do |stage, i|
|
|
146
|
-
@err.puts "stage[#{i}]
|
|
147
|
-
@err.puts " original: #{stage[:original]}"
|
|
148
|
-
@err.puts " ruby: #{stage[:src]}"
|
|
149
|
-
end
|
|
150
|
-
end
|
|
151
|
-
|
|
152
|
-
def initialize_reducers(stages, ctx)
|
|
153
|
-
stages.each do |stage|
|
|
154
|
-
begin
|
|
155
|
-
value = eval_stage(stage, PROBE_VALUE, ctx)
|
|
156
|
-
stage[:reducer_template] ||= value if ctx.__jrf_reducer_called?
|
|
157
|
-
rescue StandardError
|
|
158
|
-
# Ignore probe-time errors; reducer will be created on first runtime event.
|
|
159
|
-
end
|
|
160
|
-
end
|
|
161
|
-
end
|
|
162
|
-
|
|
163
|
-
def reducer_stage?(stage)
|
|
164
|
-
stage[:reducers]&.any?
|
|
165
|
-
end
|
|
166
|
-
|
|
167
|
-
def finish_reducer_template(template, reducers)
|
|
168
|
-
if template.is_a?(RowContext::ReducerToken)
|
|
169
|
-
reducers.fetch(template.index).finish
|
|
170
|
-
elsif template.is_a?(Array)
|
|
171
|
-
template.map { |v| finish_reducer_template(v, reducers) }
|
|
172
|
-
elsif template.is_a?(Hash)
|
|
173
|
-
template.transform_values { |v| finish_reducer_template(v, reducers) }
|
|
174
|
-
else
|
|
175
|
-
template
|
|
77
|
+
@err.puts "stage[#{i}]: #{stage[:src]}"
|
|
176
78
|
end
|
|
177
79
|
end
|
|
178
80
|
end
|