jrf 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/jrf +6 -0
- data/jrf.gemspec +1 -0
- data/lib/jrf/cli.rb +9 -4
- data/lib/jrf/pipeline_parser.rb +1 -41
- data/lib/jrf/row_context.rb +43 -36
- data/lib/jrf/runner.rb +55 -78
- data/lib/jrf/stage.rb +162 -0
- data/lib/jrf/version.rb +1 -1
- data/test/jrf_test.rb +280 -8
- metadata +17 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 34475ad560159e50a8f6ea6dbfae40dc57173b40df31065f4b222abaafe66012
|
|
4
|
+
data.tar.gz: 22c046afd9f4fba04788f08796f9ccfe24b21a87522820c8e5873f164de8cc53
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7d90e4a754ae7ca9170db6c7221571cb90077bbd48d6cd55cbefd29342afa89996075c86a3bf645dac94b337b91eceefa036968f490c30bacf52744a319d238f
|
|
7
|
+
data.tar.gz: 3d00c51e46a07f63e1d44b8f2013663dd66d3b2f3393046a00a3c26a5f1cb3dd4eabc2db82eacb12ff874625835f49a63e358baaf44639d38b8a9e01a6c3b06d
|
data/exe/jrf
CHANGED
data/jrf.gemspec
CHANGED
data/lib/jrf/cli.rb
CHANGED
|
@@ -4,15 +4,16 @@ require_relative "runner"
|
|
|
4
4
|
|
|
5
5
|
module Jrf
|
|
6
6
|
class CLI
|
|
7
|
-
USAGE = "usage: jrf [-v] [--help] 'STAGE >> STAGE >> ...'"
|
|
7
|
+
USAGE = "usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'"
|
|
8
8
|
|
|
9
9
|
HELP_TEXT = <<~'TEXT'
|
|
10
|
-
usage: jrf [-v] [--help] 'STAGE >> STAGE >> ...'
|
|
10
|
+
usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'
|
|
11
11
|
|
|
12
12
|
JSON filter with the power and speed of Ruby.
|
|
13
13
|
|
|
14
14
|
Options:
|
|
15
|
-
-v, --verbose print
|
|
15
|
+
-v, --verbose print parsed stage expressions
|
|
16
|
+
--lax allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
|
|
16
17
|
-h, --help show this help and exit
|
|
17
18
|
|
|
18
19
|
Pipeline:
|
|
@@ -33,12 +34,16 @@ module Jrf
|
|
|
33
34
|
|
|
34
35
|
def self.run(argv = ARGV, input: ARGF, out: $stdout, err: $stderr)
|
|
35
36
|
verbose = false
|
|
37
|
+
lax = false
|
|
36
38
|
|
|
37
39
|
while argv.first&.start_with?("-")
|
|
38
40
|
case argv.first
|
|
39
41
|
when "-v", "--verbose"
|
|
40
42
|
verbose = true
|
|
41
43
|
argv.shift
|
|
44
|
+
when "--lax"
|
|
45
|
+
lax = true
|
|
46
|
+
argv.shift
|
|
42
47
|
when "-h", "--help"
|
|
43
48
|
out.puts HELP_TEXT
|
|
44
49
|
return 0
|
|
@@ -55,7 +60,7 @@ module Jrf
|
|
|
55
60
|
end
|
|
56
61
|
|
|
57
62
|
expression = argv.shift
|
|
58
|
-
Runner.new(input: input, out: out, err: err).run(expression, verbose: verbose)
|
|
63
|
+
Runner.new(input: input, out: out, err: err, lax: lax).run(expression, verbose: verbose)
|
|
59
64
|
0
|
|
60
65
|
end
|
|
61
66
|
end
|
data/lib/jrf/pipeline_parser.rb
CHANGED
|
@@ -9,51 +9,11 @@ module Jrf
|
|
|
9
9
|
def parse
|
|
10
10
|
stages = split_top_level_pipeline(@source).map(&:strip).reject(&:empty?)
|
|
11
11
|
raise ArgumentError, "empty expression" if stages.empty?
|
|
12
|
-
{ stages: stages.map { |stage|
|
|
12
|
+
{ stages: stages.map { |stage| { src: stage } } }
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
private
|
|
16
16
|
|
|
17
|
-
def parse_stage!(stage)
|
|
18
|
-
if select_stage?(stage)
|
|
19
|
-
{
|
|
20
|
-
kind: :select,
|
|
21
|
-
original: stage,
|
|
22
|
-
src: "(#{parse_select!(stage)}) ? _ : ::Jrf::Control::DROPPED"
|
|
23
|
-
}
|
|
24
|
-
else
|
|
25
|
-
reject_unsupported_stage!(stage)
|
|
26
|
-
{
|
|
27
|
-
kind: :extract,
|
|
28
|
-
original: stage,
|
|
29
|
-
src: validate_extract!(stage)
|
|
30
|
-
}
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
def validate_extract!(stage)
|
|
35
|
-
reject_unsupported_stage!(stage)
|
|
36
|
-
stage
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
def parse_select!(stage)
|
|
40
|
-
reject_unsupported_stage!(stage)
|
|
41
|
-
match = /\Aselect\s*\((.*)\)\s*\z/m.match(stage)
|
|
42
|
-
raise ArgumentError, "first stage must be select(...)" unless match
|
|
43
|
-
|
|
44
|
-
inner = match[1].strip
|
|
45
|
-
raise ArgumentError, "select(...) must contain an expression" if inner.empty?
|
|
46
|
-
|
|
47
|
-
inner
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
def select_stage?(stage)
|
|
51
|
-
/\Aselect\s*\(/.match?(stage)
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
def reject_unsupported_stage!(stage)
|
|
55
|
-
end
|
|
56
|
-
|
|
57
17
|
def split_top_level_pipeline(source)
|
|
58
18
|
parts = []
|
|
59
19
|
start_idx = 0
|
data/lib/jrf/row_context.rb
CHANGED
|
@@ -1,21 +1,22 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
require_relative "control"
|
|
3
4
|
require_relative "reducers"
|
|
4
5
|
|
|
5
6
|
module Jrf
|
|
6
7
|
class RowContext
|
|
7
8
|
MISSING = Object.new
|
|
8
|
-
|
|
9
|
+
|
|
10
|
+
attr_writer :__jrf_current_stage
|
|
9
11
|
|
|
10
12
|
class << self
|
|
11
13
|
def define_reducer(name, &definition)
|
|
12
14
|
define_method(name) do |*args, **kwargs, &block|
|
|
13
15
|
spec = definition.call(self, *args, **kwargs, block: block)
|
|
14
|
-
|
|
16
|
+
@__jrf_current_stage.allocate_reducer(
|
|
15
17
|
spec.fetch(:value),
|
|
16
18
|
initial: reducer_initial_value(spec.fetch(:initial)),
|
|
17
19
|
finish: spec[:finish],
|
|
18
|
-
emit_many: spec.fetch(:emit_many, false),
|
|
19
20
|
&spec.fetch(:step)
|
|
20
21
|
)
|
|
21
22
|
end
|
|
@@ -24,7 +25,7 @@ module Jrf
|
|
|
24
25
|
|
|
25
26
|
def initialize(obj = nil)
|
|
26
27
|
@obj = obj
|
|
27
|
-
@
|
|
28
|
+
@__jrf_current_stage = nil
|
|
28
29
|
end
|
|
29
30
|
|
|
30
31
|
def reset(obj)
|
|
@@ -40,24 +41,38 @@ module Jrf
|
|
|
40
41
|
Control::Flat.new(@obj)
|
|
41
42
|
end
|
|
42
43
|
|
|
44
|
+
def select(predicate)
|
|
45
|
+
predicate ? @obj : Control::DROPPED
|
|
46
|
+
end
|
|
47
|
+
|
|
43
48
|
define_reducer(:sum) do |_ctx, value, initial: 0, block: nil|
|
|
44
|
-
{ value: value, initial: initial, step: ->(acc, v) { acc + v } }
|
|
49
|
+
{ value: value, initial: initial, step: ->(acc, v) { v.nil? ? acc : (acc + v) } }
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
define_reducer(:count) do |_ctx, value = MISSING, block: nil|
|
|
53
|
+
if value.equal?(MISSING)
|
|
54
|
+
{ value: nil, initial: 0, step: ->(acc, _v) { acc + 1 } }
|
|
55
|
+
else
|
|
56
|
+
{ value: value, initial: 0, step: ->(acc, v) { v.nil? ? acc : (acc + 1) } }
|
|
57
|
+
end
|
|
45
58
|
end
|
|
46
59
|
|
|
47
60
|
define_reducer(:min) do |_ctx, value, block: nil|
|
|
48
|
-
{ value: value, initial: nil, step: ->(acc, v) { acc.nil? || v < acc ? v : acc } }
|
|
61
|
+
{ value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v < acc ? v : acc) } }
|
|
49
62
|
end
|
|
50
63
|
|
|
51
64
|
define_reducer(:max) do |_ctx, value, block: nil|
|
|
52
|
-
{ value: value, initial: nil, step: ->(acc, v) { acc.nil? || v > acc ? v : acc } }
|
|
65
|
+
{ value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v > acc ? v : acc) } }
|
|
53
66
|
end
|
|
54
67
|
|
|
55
68
|
define_reducer(:average) do |_ctx, value, block: nil|
|
|
56
69
|
{
|
|
57
70
|
value: value,
|
|
58
71
|
initial: -> { [0.0, 0] },
|
|
59
|
-
finish: ->((sum, count)) { count.zero? ? nil : (sum / count) },
|
|
72
|
+
finish: ->((sum, count)) { [count.zero? ? nil : (sum / count)] },
|
|
60
73
|
step: ->(acc, v) {
|
|
74
|
+
return acc if v.nil?
|
|
75
|
+
|
|
61
76
|
acc[0] += v
|
|
62
77
|
acc[1] += 1
|
|
63
78
|
acc
|
|
@@ -70,13 +85,15 @@ module Jrf
|
|
|
70
85
|
value: value,
|
|
71
86
|
initial: [0, 0.0, 0.0],
|
|
72
87
|
finish: ->((count, mean, m2)) {
|
|
73
|
-
return nil if count.zero?
|
|
74
|
-
return nil if sample && count < 2
|
|
88
|
+
return [nil] if count.zero?
|
|
89
|
+
return [nil] if sample && count < 2
|
|
75
90
|
|
|
76
91
|
denom = sample ? (count - 1) : count
|
|
77
|
-
Math.sqrt(m2 / denom)
|
|
92
|
+
[Math.sqrt(m2 / denom)]
|
|
78
93
|
},
|
|
79
94
|
step: ->(acc, x) {
|
|
95
|
+
return acc if x.nil?
|
|
96
|
+
|
|
80
97
|
count, mean, m2 = acc
|
|
81
98
|
count += 1
|
|
82
99
|
delta = x - mean
|
|
@@ -96,7 +113,6 @@ module Jrf
|
|
|
96
113
|
{
|
|
97
114
|
value: ctx._,
|
|
98
115
|
initial: -> { [] },
|
|
99
|
-
emit_many: true,
|
|
100
116
|
finish: ->(rows) { rows.sort(&block) },
|
|
101
117
|
step: ->(rows, row) { rows << row }
|
|
102
118
|
}
|
|
@@ -105,7 +121,6 @@ module Jrf
|
|
|
105
121
|
{
|
|
106
122
|
value: [resolved_key, ctx._],
|
|
107
123
|
initial: -> { [] },
|
|
108
|
-
emit_many: true,
|
|
109
124
|
finish: ->(pairs) { pairs.sort_by(&:first).map(&:last) },
|
|
110
125
|
step: ->(pairs, pair) { pairs << pair }
|
|
111
126
|
}
|
|
@@ -124,7 +139,7 @@ module Jrf
|
|
|
124
139
|
|
|
125
140
|
finish =
|
|
126
141
|
if scalar
|
|
127
|
-
->(values) { ctx.send(:percentile_value, values.sort, percentages.first) }
|
|
142
|
+
->(values) { [ctx.send(:percentile_value, values.sort, percentages.first)] }
|
|
128
143
|
else
|
|
129
144
|
->(values) {
|
|
130
145
|
sorted = values.sort
|
|
@@ -137,44 +152,36 @@ module Jrf
|
|
|
137
152
|
{
|
|
138
153
|
value: value,
|
|
139
154
|
initial: -> { [] },
|
|
140
|
-
emit_many: !scalar,
|
|
141
155
|
finish: finish,
|
|
142
|
-
step: ->(acc, v) { acc << v }
|
|
156
|
+
step: ->(acc, v) { v.nil? ? acc : (acc << v) }
|
|
143
157
|
}
|
|
144
158
|
end
|
|
145
159
|
|
|
146
160
|
def reduce(initial, &block)
|
|
147
161
|
raise ArgumentError, "reduce requires a block" unless block
|
|
148
162
|
|
|
149
|
-
|
|
163
|
+
@__jrf_current_stage.allocate_reducer(@obj, initial: initial, &block)
|
|
150
164
|
end
|
|
151
165
|
|
|
152
|
-
def
|
|
153
|
-
|
|
154
|
-
stage[:reducer_cursor] = 0
|
|
155
|
-
stage[:reducer_called] = false
|
|
156
|
-
stage[:reducer_probing] = probing
|
|
157
|
-
end
|
|
166
|
+
def map(&block)
|
|
167
|
+
raise ArgumentError, "map requires a block" unless block
|
|
158
168
|
|
|
159
|
-
|
|
160
|
-
@__jrf_stage && @__jrf_stage[:reducer_called]
|
|
169
|
+
@__jrf_current_stage.allocate_map(:array, @obj, &block)
|
|
161
170
|
end
|
|
162
171
|
|
|
163
|
-
|
|
172
|
+
def map_values(&block)
|
|
173
|
+
raise ArgumentError, "map_values requires a block" unless block
|
|
164
174
|
|
|
165
|
-
|
|
166
|
-
|
|
175
|
+
@__jrf_current_stage.allocate_map(:hash, @obj, &block)
|
|
176
|
+
end
|
|
167
177
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
reducers[idx].step(value) unless @__jrf_stage[:reducer_probing]
|
|
172
|
-
@__jrf_stage[:reducer_cursor] = idx + 1
|
|
173
|
-
@__jrf_stage[:reducer_called] = true
|
|
174
|
-
@__jrf_stage[:reducer_emit_many] = emit_many if @__jrf_stage[:reducer_emit_many].nil?
|
|
175
|
-
ReducerToken.new(idx)
|
|
178
|
+
def group_by(key, &block)
|
|
179
|
+
block ||= proc { group }
|
|
180
|
+
@__jrf_current_stage.allocate_group_by(key, &block)
|
|
176
181
|
end
|
|
177
182
|
|
|
183
|
+
private
|
|
184
|
+
|
|
178
185
|
def reducer_initial_value(initial)
|
|
179
186
|
return initial.call if initial.respond_to?(:call)
|
|
180
187
|
return initial.dup if initial.is_a?(Array) || initial.is_a?(Hash)
|
data/lib/jrf/runner.rb
CHANGED
|
@@ -5,9 +5,12 @@ require_relative "control"
|
|
|
5
5
|
require_relative "pipeline_parser"
|
|
6
6
|
require_relative "reducers"
|
|
7
7
|
require_relative "row_context"
|
|
8
|
+
require_relative "stage"
|
|
8
9
|
|
|
9
10
|
module Jrf
|
|
10
11
|
class Runner
|
|
12
|
+
RS_CHAR = "\x1e"
|
|
13
|
+
|
|
11
14
|
class ProbeValue
|
|
12
15
|
def [](key)
|
|
13
16
|
self
|
|
@@ -24,10 +27,11 @@ module Jrf
|
|
|
24
27
|
|
|
25
28
|
PROBE_VALUE = ProbeValue.new
|
|
26
29
|
|
|
27
|
-
def initialize(input: ARGF, out: $stdout, err: $stderr)
|
|
30
|
+
def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false)
|
|
28
31
|
@input = input
|
|
29
32
|
@out = out
|
|
30
33
|
@err = err
|
|
34
|
+
@lax = lax
|
|
31
35
|
end
|
|
32
36
|
|
|
33
37
|
def run(expression, verbose: false)
|
|
@@ -37,20 +41,17 @@ module Jrf
|
|
|
37
41
|
|
|
38
42
|
ctx = RowContext.new
|
|
39
43
|
compiled = compile_stages(stages, ctx)
|
|
40
|
-
|
|
44
|
+
compiled.each { |stage| stage.call(PROBE_VALUE, probing: true) rescue nil }
|
|
41
45
|
error = nil
|
|
42
46
|
|
|
43
47
|
begin
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
next if line.empty?
|
|
47
|
-
|
|
48
|
-
process_value(JSON.parse(line), compiled, ctx)
|
|
48
|
+
each_input_value do |value|
|
|
49
|
+
process_value(value, compiled)
|
|
49
50
|
end
|
|
50
51
|
rescue StandardError => e
|
|
51
52
|
error = e
|
|
52
53
|
ensure
|
|
53
|
-
flush_reducers(compiled
|
|
54
|
+
flush_reducers(compiled)
|
|
54
55
|
end
|
|
55
56
|
|
|
56
57
|
raise error if error
|
|
@@ -58,17 +59,17 @@ module Jrf
|
|
|
58
59
|
|
|
59
60
|
private
|
|
60
61
|
|
|
61
|
-
def process_value(input, stages
|
|
62
|
+
def process_value(input, stages)
|
|
62
63
|
current_values = [input]
|
|
63
64
|
|
|
64
65
|
stages.each do |stage|
|
|
65
66
|
next_values = []
|
|
66
67
|
|
|
67
68
|
current_values.each do |value|
|
|
68
|
-
out =
|
|
69
|
+
out = stage.call(value)
|
|
69
70
|
if out.equal?(Control::DROPPED)
|
|
70
71
|
next
|
|
71
|
-
elsif
|
|
72
|
+
elsif out.is_a?(Control::Flat)
|
|
72
73
|
unless out.value.is_a?(Array)
|
|
73
74
|
raise TypeError, "flat expects Array, got #{out.value.class}"
|
|
74
75
|
end
|
|
@@ -85,94 +86,70 @@ module Jrf
|
|
|
85
86
|
current_values.each { |value| @out.puts JSON.generate(value) }
|
|
86
87
|
end
|
|
87
88
|
|
|
88
|
-
def
|
|
89
|
-
value
|
|
90
|
-
if value.equal?(Control::DROPPED)
|
|
91
|
-
Control::DROPPED
|
|
92
|
-
elsif ctx.__jrf_reducer_called?
|
|
93
|
-
stage[:reducer_template] ||= value
|
|
94
|
-
Control::DROPPED
|
|
95
|
-
else
|
|
96
|
-
value
|
|
97
|
-
end
|
|
98
|
-
end
|
|
89
|
+
def each_input_value
|
|
90
|
+
return each_input_value_lax { |value| yield value } if @lax
|
|
99
91
|
|
|
100
|
-
|
|
101
|
-
ctx.reset(input)
|
|
102
|
-
ctx.__jrf_begin_stage__(stage, probing: input.equal?(PROBE_VALUE))
|
|
103
|
-
ctx.public_send(stage[:method_name])
|
|
92
|
+
each_input_value_ndjson { |value| yield value }
|
|
104
93
|
end
|
|
105
94
|
|
|
106
|
-
def
|
|
107
|
-
|
|
95
|
+
def each_input_value_ndjson
|
|
96
|
+
@input.each_line do |raw_line|
|
|
97
|
+
line = raw_line.strip
|
|
98
|
+
next if line.empty?
|
|
99
|
+
|
|
100
|
+
yield JSON.parse(line)
|
|
101
|
+
end
|
|
108
102
|
end
|
|
109
103
|
|
|
110
|
-
def
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
reducers = stage[:reducers]
|
|
118
|
-
break unless reducers&.any?
|
|
119
|
-
|
|
120
|
-
out = finish_reducer_template(stage[:reducer_template], reducers)
|
|
121
|
-
if stage[:reducer_emit_many]
|
|
122
|
-
out.each { |value| process_value(value, tail.drop(1), ctx) }
|
|
123
|
-
else
|
|
124
|
-
process_value(out, tail.drop(1), ctx)
|
|
104
|
+
def each_input_value_lax
|
|
105
|
+
require "oj"
|
|
106
|
+
source = @input.read.to_s
|
|
107
|
+
source = source.include?(RS_CHAR) ? source.tr(RS_CHAR, "\n") : source
|
|
108
|
+
handler = Class.new(Oj::ScHandler) do
|
|
109
|
+
def initialize(&emit)
|
|
110
|
+
@emit = emit
|
|
125
111
|
end
|
|
126
|
-
|
|
127
|
-
|
|
112
|
+
|
|
113
|
+
def hash_start = {}
|
|
114
|
+
def hash_key(key) = key
|
|
115
|
+
def hash_set(hash, key, value) = hash[key] = value
|
|
116
|
+
def array_start = []
|
|
117
|
+
def array_append(array, value) = array << value
|
|
118
|
+
def add_value(value) = @emit.call(value)
|
|
119
|
+
end.new { |value| yield value }
|
|
120
|
+
Oj.sc_parse(handler, source)
|
|
121
|
+
rescue LoadError
|
|
122
|
+
raise "oj is required for --lax mode (gem install oj)"
|
|
123
|
+
rescue Oj::ParseError => e
|
|
124
|
+
raise JSON::ParserError, e.message
|
|
128
125
|
end
|
|
129
126
|
|
|
130
127
|
def compile_stages(stages, ctx)
|
|
131
128
|
mod = Module.new
|
|
132
|
-
compiled = []
|
|
133
129
|
|
|
134
|
-
stages.each_with_index do |stage, i|
|
|
130
|
+
stages.each_with_index.map do |stage, i|
|
|
135
131
|
method_name = :"__jrf_stage_#{i}"
|
|
136
132
|
mod.module_eval("def #{method_name}; #{stage[:src]}; end", "(jrf stage #{i})", 1)
|
|
137
|
-
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
ctx.extend(mod)
|
|
141
|
-
compiled
|
|
133
|
+
Stage.new(ctx, method_name, src: stage[:src])
|
|
134
|
+
end.tap { ctx.extend(mod) }
|
|
142
135
|
end
|
|
143
136
|
|
|
144
137
|
def dump_stages(stages)
|
|
145
138
|
stages.each_with_index do |stage, i|
|
|
146
|
-
@err.puts "stage[#{i}]
|
|
147
|
-
@err.puts " original: #{stage[:original]}"
|
|
148
|
-
@err.puts " ruby: #{stage[:src]}"
|
|
149
|
-
end
|
|
150
|
-
end
|
|
151
|
-
|
|
152
|
-
def initialize_reducers(stages, ctx)
|
|
153
|
-
stages.each do |stage|
|
|
154
|
-
begin
|
|
155
|
-
value = eval_stage(stage, PROBE_VALUE, ctx)
|
|
156
|
-
stage[:reducer_template] ||= value if ctx.__jrf_reducer_called?
|
|
157
|
-
rescue StandardError
|
|
158
|
-
# Ignore probe-time errors; reducer will be created on first runtime event.
|
|
159
|
-
end
|
|
139
|
+
@err.puts "stage[#{i}]: #{stage[:src]}"
|
|
160
140
|
end
|
|
161
141
|
end
|
|
162
142
|
|
|
163
|
-
def
|
|
164
|
-
|
|
165
|
-
|
|
143
|
+
def flush_reducers(stages)
|
|
144
|
+
tail = stages
|
|
145
|
+
loop do
|
|
146
|
+
idx = tail.index(&:reducer?)
|
|
147
|
+
break unless idx
|
|
166
148
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
template.map { |v| finish_reducer_template(v, reducers) }
|
|
172
|
-
elsif template.is_a?(Hash)
|
|
173
|
-
template.transform_values { |v| finish_reducer_template(v, reducers) }
|
|
174
|
-
else
|
|
175
|
-
template
|
|
149
|
+
rows = tail[idx].finish
|
|
150
|
+
rest = tail.drop(idx + 1)
|
|
151
|
+
rows.each { |value| process_value(value, rest) }
|
|
152
|
+
tail = rest
|
|
176
153
|
end
|
|
177
154
|
end
|
|
178
155
|
end
|
data/lib/jrf/stage.rb
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "control"
|
|
4
|
+
require_relative "reducers"
|
|
5
|
+
|
|
6
|
+
module Jrf
|
|
7
|
+
class Stage
|
|
8
|
+
ReducerToken = Struct.new(:index)
|
|
9
|
+
|
|
10
|
+
attr_reader :method_name, :src
|
|
11
|
+
|
|
12
|
+
def self.resolve_template(template, reducers)
|
|
13
|
+
if template.is_a?(ReducerToken)
|
|
14
|
+
rows = reducers.fetch(template.index).finish
|
|
15
|
+
rows.length == 1 ? rows.first : rows
|
|
16
|
+
elsif template.is_a?(Array)
|
|
17
|
+
template.map { |v| resolve_template(v, reducers) }
|
|
18
|
+
elsif template.is_a?(Hash)
|
|
19
|
+
template.transform_values { |v| resolve_template(v, reducers) }
|
|
20
|
+
else
|
|
21
|
+
template
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def initialize(ctx, method_name, src: nil)
|
|
26
|
+
@ctx = ctx
|
|
27
|
+
@method_name = method_name
|
|
28
|
+
@src = src
|
|
29
|
+
@reducers = []
|
|
30
|
+
@cursor = 0
|
|
31
|
+
@template = nil
|
|
32
|
+
@mode = nil # nil=unknown, :reducer, :passthrough
|
|
33
|
+
@probing = false
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def call(input, probing: false)
|
|
37
|
+
@ctx.reset(input)
|
|
38
|
+
@cursor = 0
|
|
39
|
+
@probing = probing
|
|
40
|
+
@ctx.__jrf_current_stage = self
|
|
41
|
+
result = @ctx.public_send(@method_name)
|
|
42
|
+
|
|
43
|
+
if @mode.nil? && @reducers.any?
|
|
44
|
+
@mode = :reducer
|
|
45
|
+
@template = result
|
|
46
|
+
elsif @mode.nil? && !probing
|
|
47
|
+
@mode = :passthrough
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
(@mode == :reducer) ? Control::DROPPED : result
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def allocate_reducer(value, initial:, finish: nil, &step_fn)
|
|
54
|
+
idx = @cursor
|
|
55
|
+
finish_rows = finish || ->(acc) { [acc] }
|
|
56
|
+
@reducers[idx] ||= Reducers.reduce(initial, finish: finish_rows, &step_fn)
|
|
57
|
+
@reducers[idx].step(value) unless @probing
|
|
58
|
+
@cursor += 1
|
|
59
|
+
ReducerToken.new(idx)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def allocate_map(type, collection, &block)
|
|
63
|
+
idx = @cursor
|
|
64
|
+
map_reducer = (@reducers[idx] ||= MapReducer.new(type))
|
|
65
|
+
|
|
66
|
+
unless @probing
|
|
67
|
+
saved_obj = @ctx._
|
|
68
|
+
|
|
69
|
+
case type
|
|
70
|
+
when :array
|
|
71
|
+
raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
|
|
72
|
+
collection.each_with_index do |v, i|
|
|
73
|
+
@ctx.reset(v)
|
|
74
|
+
with_scoped_reducers(map_reducer.slots[i] ||= []) do
|
|
75
|
+
result = block.call(v)
|
|
76
|
+
map_reducer.templates[i] ||= result
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
when :hash
|
|
80
|
+
raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
|
|
81
|
+
collection.each do |k, v|
|
|
82
|
+
@ctx.reset(v)
|
|
83
|
+
with_scoped_reducers(map_reducer.slots[k] ||= []) do
|
|
84
|
+
result = block.call(v)
|
|
85
|
+
map_reducer.templates[k] ||= result
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
@ctx.reset(saved_obj)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
@cursor += 1
|
|
94
|
+
ReducerToken.new(idx)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def allocate_group_by(key, &block)
|
|
98
|
+
idx = @cursor
|
|
99
|
+
map_reducer = (@reducers[idx] ||= MapReducer.new(:hash))
|
|
100
|
+
|
|
101
|
+
unless @probing
|
|
102
|
+
slot = (map_reducer.slots[key] ||= [])
|
|
103
|
+
with_scoped_reducers(slot) do
|
|
104
|
+
result = block.call
|
|
105
|
+
map_reducer.templates[key] ||= result
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
@cursor += 1
|
|
110
|
+
ReducerToken.new(idx)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def reducer?
|
|
114
|
+
@mode == :reducer
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def finish
|
|
118
|
+
return [] unless @mode == :reducer && @reducers.any?
|
|
119
|
+
|
|
120
|
+
if @template.is_a?(ReducerToken)
|
|
121
|
+
@reducers.fetch(@template.index).finish
|
|
122
|
+
else
|
|
123
|
+
[self.class.resolve_template(@template, @reducers)]
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
private
|
|
128
|
+
|
|
129
|
+
def with_scoped_reducers(reducer_list)
|
|
130
|
+
saved_reducers = @reducers
|
|
131
|
+
saved_cursor = @cursor
|
|
132
|
+
@reducers = reducer_list
|
|
133
|
+
@cursor = 0
|
|
134
|
+
yield
|
|
135
|
+
ensure
|
|
136
|
+
@reducers = saved_reducers
|
|
137
|
+
@cursor = saved_cursor
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
class MapReducer
|
|
141
|
+
attr_reader :slots, :templates
|
|
142
|
+
|
|
143
|
+
def initialize(type)
|
|
144
|
+
@type = type
|
|
145
|
+
@slots = {}
|
|
146
|
+
@templates = {}
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def finish
|
|
150
|
+
case @type
|
|
151
|
+
when :array
|
|
152
|
+
keys = @slots.keys.sort
|
|
153
|
+
[keys.map { |k| Stage.resolve_template(@templates[k], @slots[k]) }]
|
|
154
|
+
when :hash
|
|
155
|
+
result = {}
|
|
156
|
+
@slots.each { |k, reducers| result[k] = Stage.resolve_template(@templates[k], reducers) }
|
|
157
|
+
[result]
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
data/lib/jrf/version.rb
CHANGED
data/test/jrf_test.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "json"
|
|
3
4
|
require "open3"
|
|
4
5
|
|
|
5
6
|
def run_jrf(expr, input, *opts)
|
|
@@ -86,17 +87,14 @@ assert_equal(['{"hello":123}'], lines(stdout), "select-only hello output")
|
|
|
86
87
|
stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "-v")
|
|
87
88
|
assert_success(status, stderr, "dump stages")
|
|
88
89
|
assert_equal(%w[123], lines(stdout), "dump stages output")
|
|
89
|
-
assert_includes(stderr,
|
|
90
|
-
assert_includes(stderr, '
|
|
91
|
-
assert_includes(stderr, 'ruby: (_["hello"] == 123) ? _ : ::Jrf::Control::DROPPED')
|
|
92
|
-
assert_includes(stderr, "stage[1] kind=extract")
|
|
93
|
-
assert_includes(stderr, 'original: _["hello"]')
|
|
94
|
-
assert_includes(stderr, 'ruby: _["hello"]')
|
|
90
|
+
assert_includes(stderr, 'stage[0]: select(_["hello"] == 123)')
|
|
91
|
+
assert_includes(stderr, 'stage[1]: _["hello"]')
|
|
95
92
|
|
|
96
93
|
stdout, stderr, status = Open3.capture3("./exe/jrf", "--help")
|
|
97
94
|
assert_success(status, stderr, "help option")
|
|
98
|
-
assert_includes(stdout, "usage: jrf [-v] [--help] 'STAGE >> STAGE >> ...'")
|
|
95
|
+
assert_includes(stdout, "usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'")
|
|
99
96
|
assert_includes(stdout, "JSON filter with the power and speed of Ruby.")
|
|
97
|
+
assert_includes(stdout, "--lax")
|
|
100
98
|
assert_includes(stdout, "Pipeline:")
|
|
101
99
|
assert_includes(stdout, "Connect stages with top-level >>.")
|
|
102
100
|
assert_includes(stdout, "The current value in each stage is available as _.")
|
|
@@ -108,7 +106,7 @@ assert_equal([], lines(stderr), "help stderr output")
|
|
|
108
106
|
stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "--verbose")
|
|
109
107
|
assert_success(status, stderr, "dump stages verbose alias")
|
|
110
108
|
assert_equal(%w[123], lines(stdout), "dump stages verbose alias output")
|
|
111
|
-
assert_includes(stderr,
|
|
109
|
+
assert_includes(stderr, 'stage[0]: select(_["hello"] == 123)')
|
|
112
110
|
|
|
113
111
|
input_regex = <<~NDJSON
|
|
114
112
|
{"foo":{"bar":"ok"},"x":50}
|
|
@@ -176,6 +174,14 @@ stdout, stderr, status = run_jrf('sum(_["foo"])', input_sum)
|
|
|
176
174
|
assert_success(status, stderr, "sum only")
|
|
177
175
|
assert_equal(%w[10], lines(stdout), "sum output")
|
|
178
176
|
|
|
177
|
+
stdout, stderr, status = run_jrf('count()', input_sum)
|
|
178
|
+
assert_success(status, stderr, "count only")
|
|
179
|
+
assert_equal(%w[4], lines(stdout), "count output")
|
|
180
|
+
|
|
181
|
+
stdout, stderr, status = run_jrf('count(_["foo"])', input_sum)
|
|
182
|
+
assert_success(status, stderr, "count(expr) only")
|
|
183
|
+
assert_equal(%w[4], lines(stdout), "count(expr) output")
|
|
184
|
+
|
|
179
185
|
stdout, stderr, status = run_jrf('min(_["foo"])', input_sum)
|
|
180
186
|
assert_success(status, stderr, "min only")
|
|
181
187
|
assert_equal(%w[1], lines(stdout), "min output")
|
|
@@ -204,6 +210,14 @@ stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> sum(_["foo"])', input
|
|
|
204
210
|
assert_success(status, stderr, "sum no matches")
|
|
205
211
|
assert_equal(%w[0], lines(stdout), "sum no matches output")
|
|
206
212
|
|
|
213
|
+
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> count()', input_sum)
|
|
214
|
+
assert_success(status, stderr, "count no matches")
|
|
215
|
+
assert_equal(%w[0], lines(stdout), "count no matches output")
|
|
216
|
+
|
|
217
|
+
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> count(_["foo"])', input_sum)
|
|
218
|
+
assert_success(status, stderr, "count(expr) no matches")
|
|
219
|
+
assert_equal(%w[0], lines(stdout), "count(expr) no matches output")
|
|
220
|
+
|
|
207
221
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> average(_["foo"])', input_sum)
|
|
208
222
|
assert_success(status, stderr, "average no matches")
|
|
209
223
|
assert_equal(%w[null], lines(stdout), "average no matches output")
|
|
@@ -288,6 +302,97 @@ assert_equal(
|
|
|
288
302
|
"array percentile output"
|
|
289
303
|
)
|
|
290
304
|
|
|
305
|
+
input_with_nil = <<~NDJSON
|
|
306
|
+
{"foo":1}
|
|
307
|
+
{"foo":null}
|
|
308
|
+
{"bar":999}
|
|
309
|
+
{"foo":3}
|
|
310
|
+
NDJSON
|
|
311
|
+
|
|
312
|
+
stdout, stderr, status = run_jrf('sum(_["foo"])', input_with_nil)
|
|
313
|
+
assert_success(status, stderr, "sum ignores nil")
|
|
314
|
+
assert_equal(%w[4], lines(stdout), "sum ignores nil output")
|
|
315
|
+
|
|
316
|
+
stdout, stderr, status = run_jrf('min(_["foo"])', input_with_nil)
|
|
317
|
+
assert_success(status, stderr, "min ignores nil")
|
|
318
|
+
assert_equal(%w[1], lines(stdout), "min ignores nil output")
|
|
319
|
+
|
|
320
|
+
stdout, stderr, status = run_jrf('max(_["foo"])', input_with_nil)
|
|
321
|
+
assert_success(status, stderr, "max ignores nil")
|
|
322
|
+
assert_equal(%w[3], lines(stdout), "max ignores nil output")
|
|
323
|
+
|
|
324
|
+
stdout, stderr, status = run_jrf('average(_["foo"])', input_with_nil)
|
|
325
|
+
assert_success(status, stderr, "average ignores nil")
|
|
326
|
+
assert_float_close(2.0, lines(stdout).first.to_f, 1e-12, "average ignores nil output")
|
|
327
|
+
|
|
328
|
+
stdout, stderr, status = run_jrf('stdev(_["foo"])', input_with_nil)
|
|
329
|
+
assert_success(status, stderr, "stdev ignores nil")
|
|
330
|
+
assert_float_close(1.0, lines(stdout).first.to_f, 1e-12, "stdev ignores nil output")
|
|
331
|
+
|
|
332
|
+
stdout, stderr, status = run_jrf('percentile(_["foo"], [0.5, 1.0])', input_with_nil)
|
|
333
|
+
assert_success(status, stderr, "percentile ignores nil")
|
|
334
|
+
assert_equal(
|
|
335
|
+
['{"percentile":0.5,"value":1}', '{"percentile":1.0,"value":3}'],
|
|
336
|
+
lines(stdout),
|
|
337
|
+
"percentile ignores nil output"
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
stdout, stderr, status = run_jrf('count()', input_with_nil)
|
|
341
|
+
assert_success(status, stderr, "count with nil rows")
|
|
342
|
+
assert_equal(%w[4], lines(stdout), "count with nil rows output")
|
|
343
|
+
|
|
344
|
+
stdout, stderr, status = run_jrf('count(_["foo"])', input_with_nil)
|
|
345
|
+
assert_success(status, stderr, "count(expr) ignores nil")
|
|
346
|
+
assert_equal(%w[2], lines(stdout), "count(expr) ignores nil output")
|
|
347
|
+
|
|
348
|
+
input_all_nil = <<~NDJSON
|
|
349
|
+
{"foo":null}
|
|
350
|
+
{"bar":1}
|
|
351
|
+
NDJSON
|
|
352
|
+
|
|
353
|
+
stdout, stderr, status = run_jrf('sum(_["foo"])', input_all_nil)
|
|
354
|
+
assert_success(status, stderr, "sum all nil")
|
|
355
|
+
assert_equal(%w[0], lines(stdout), "sum all nil output")
|
|
356
|
+
|
|
357
|
+
stdout, stderr, status = run_jrf('min(_["foo"])', input_all_nil)
|
|
358
|
+
assert_success(status, stderr, "min all nil")
|
|
359
|
+
assert_equal(%w[null], lines(stdout), "min all nil output")
|
|
360
|
+
|
|
361
|
+
stdout, stderr, status = run_jrf('max(_["foo"])', input_all_nil)
|
|
362
|
+
assert_success(status, stderr, "max all nil")
|
|
363
|
+
assert_equal(%w[null], lines(stdout), "max all nil output")
|
|
364
|
+
|
|
365
|
+
stdout, stderr, status = run_jrf('average(_["foo"])', input_all_nil)
|
|
366
|
+
assert_success(status, stderr, "average all nil")
|
|
367
|
+
assert_equal(%w[null], lines(stdout), "average all nil output")
|
|
368
|
+
|
|
369
|
+
stdout, stderr, status = run_jrf('stdev(_["foo"])', input_all_nil)
|
|
370
|
+
assert_success(status, stderr, "stdev all nil")
|
|
371
|
+
assert_equal(%w[null], lines(stdout), "stdev all nil output")
|
|
372
|
+
|
|
373
|
+
stdout, stderr, status = run_jrf('percentile(_["foo"], 0.5)', input_all_nil)
|
|
374
|
+
assert_success(status, stderr, "percentile all nil")
|
|
375
|
+
assert_equal(%w[null], lines(stdout), "percentile all nil output")
|
|
376
|
+
|
|
377
|
+
stdout, stderr, status = run_jrf('count(_["foo"])', input_all_nil)
|
|
378
|
+
assert_success(status, stderr, "count(expr) all nil")
|
|
379
|
+
assert_equal(%w[0], lines(stdout), "count(expr) all nil output")
|
|
380
|
+
|
|
381
|
+
input_multi_cols = <<~NDJSON
|
|
382
|
+
{"a":1,"b":10}
|
|
383
|
+
{"a":2,"b":20}
|
|
384
|
+
{"a":3,"b":30}
|
|
385
|
+
{"a":4,"b":40}
|
|
386
|
+
NDJSON
|
|
387
|
+
|
|
388
|
+
stdout, stderr, status = run_jrf('{a: percentile(_["a"], [0.25, 0.50, 1.0]), b: percentile(_["b"], [0.25, 0.50, 1.0])}', input_multi_cols)
|
|
389
|
+
assert_success(status, stderr, "nested array percentile for multiple columns")
|
|
390
|
+
assert_equal(
|
|
391
|
+
['{"a":[{"percentile":0.25,"value":1},{"percentile":0.5,"value":2},{"percentile":1.0,"value":4}],"b":[{"percentile":0.25,"value":10},{"percentile":0.5,"value":20},{"percentile":1.0,"value":40}]}'],
|
|
392
|
+
lines(stdout),
|
|
393
|
+
"nested array percentile output"
|
|
394
|
+
)
|
|
395
|
+
|
|
291
396
|
input_reduce = <<~NDJSON
|
|
292
397
|
{"s":"hello"}
|
|
293
398
|
{"s":"world"}
|
|
@@ -306,6 +411,57 @@ stdout, stderr, status = run_jrf('sum(_["foo"]) >> select(_ > 100)', input_sum)
|
|
|
306
411
|
assert_success(status, stderr, "post-reduce select drop")
|
|
307
412
|
assert_equal([], lines(stdout), "post-reduce select drop output")
|
|
308
413
|
|
|
414
|
+
input_whitespace_stream = "{\"foo\":1} {\"foo\":2}\n\t{\"foo\":3}\n"
|
|
415
|
+
stdout, stderr, status = run_jrf('_["foo"]', input_whitespace_stream)
|
|
416
|
+
assert_failure(status, "default NDJSON should reject same-line multi-values")
|
|
417
|
+
assert_includes(stderr, "JSON::ParserError")
|
|
418
|
+
|
|
419
|
+
stdout, stderr, status = run_jrf('_["foo"]', input_whitespace_stream, "--lax")
|
|
420
|
+
assert_success(status, stderr, "whitespace-separated JSON stream with --lax")
|
|
421
|
+
assert_equal(%w[1 2 3], lines(stdout), "whitespace-separated stream output")
|
|
422
|
+
|
|
423
|
+
input_json_seq = "\x1e{\"foo\":10}\n\x1e{\"foo\":20}\n"
|
|
424
|
+
stdout, stderr, status = run_jrf('_["foo"]', input_json_seq)
|
|
425
|
+
assert_failure(status, "RS framing requires --lax")
|
|
426
|
+
assert_includes(stderr, "JSON::ParserError")
|
|
427
|
+
|
|
428
|
+
stdout, stderr, status = run_jrf('_["foo"]', input_json_seq, "--lax")
|
|
429
|
+
assert_success(status, stderr, "json-seq style RS framing with --lax")
|
|
430
|
+
assert_equal(%w[10 20], lines(stdout), "json-seq style output")
|
|
431
|
+
|
|
432
|
+
input_lax_multiline = <<~JSONS
|
|
433
|
+
{
|
|
434
|
+
"foo": 101,
|
|
435
|
+
"bar": {"x": 1}
|
|
436
|
+
}
|
|
437
|
+
{
|
|
438
|
+
"foo": 202,
|
|
439
|
+
"bar": {"x": 2}
|
|
440
|
+
}
|
|
441
|
+
JSONS
|
|
442
|
+
stdout, stderr, status = run_jrf('_["foo"]', input_lax_multiline)
|
|
443
|
+
assert_failure(status, "default NDJSON rejects multiline objects")
|
|
444
|
+
assert_includes(stderr, "JSON::ParserError")
|
|
445
|
+
|
|
446
|
+
stdout, stderr, status = run_jrf('_["bar"]["x"]', input_lax_multiline, "--lax")
|
|
447
|
+
assert_success(status, stderr, "lax accepts multiline objects")
|
|
448
|
+
assert_equal(%w[1 2], lines(stdout), "lax multiline object output")
|
|
449
|
+
|
|
450
|
+
input_lax_mixed_separators = "{\"foo\":1}\n\x1e{\"foo\":2}\t{\"foo\":3}\n"
|
|
451
|
+
stdout, stderr, status = run_jrf('_["foo"]', input_lax_mixed_separators, "--lax")
|
|
452
|
+
assert_success(status, stderr, "lax accepts mixed whitespace and RS separators")
|
|
453
|
+
assert_equal(%w[1 2 3], lines(stdout), "lax mixed separators output")
|
|
454
|
+
|
|
455
|
+
input_lax_with_escaped_newline = "{\"s\":\"line1\\nline2\"}\n{\"s\":\"ok\"}\n"
|
|
456
|
+
stdout, stderr, status = run_jrf('_["s"]', input_lax_with_escaped_newline, "--lax")
|
|
457
|
+
assert_success(status, stderr, "lax handles escaped newlines in strings")
|
|
458
|
+
assert_equal(['"line1\nline2"', '"ok"'], lines(stdout), "lax escaped newline string output")
|
|
459
|
+
|
|
460
|
+
input_lax_trailing_rs = "\x1e{\"foo\":9}\n\x1e"
|
|
461
|
+
stdout, stderr, status = run_jrf('_["foo"]', input_lax_trailing_rs, "--lax")
|
|
462
|
+
assert_success(status, stderr, "lax ignores trailing separator")
|
|
463
|
+
assert_equal(%w[9], lines(stdout), "lax trailing separator output")
|
|
464
|
+
|
|
309
465
|
stdout, stderr, status = run_jrf('select(_["x"] > ) >> _["foo"]', "")
|
|
310
466
|
assert_failure(status, "syntax error should fail before row loop")
|
|
311
467
|
assert_includes(stderr, "syntax error")
|
|
@@ -339,4 +495,120 @@ stdout, stderr, status = run_jrf('_["foo"] >> select(_["keep"]) >> _["bar"] >> s
|
|
|
339
495
|
assert_success(status, stderr, "select/extract chain")
|
|
340
496
|
assert_equal(%w[3], lines(stdout), "chain output")
|
|
341
497
|
|
|
498
|
+
input_map = <<~NDJSON
|
|
499
|
+
{"values":[1,10,100]}
|
|
500
|
+
{"values":[2,20,200]}
|
|
501
|
+
{"values":[3,30,300]}
|
|
502
|
+
NDJSON
|
|
503
|
+
|
|
504
|
+
stdout, stderr, status = run_jrf('_["values"] >> map { |x| sum(x) }', input_map)
|
|
505
|
+
assert_success(status, stderr, "map with sum")
|
|
506
|
+
assert_equal(['[6,60,600]'], lines(stdout), "map with sum output")
|
|
507
|
+
|
|
508
|
+
stdout, stderr, status = run_jrf('_["values"] >> map { |x| min(x) }', input_map)
|
|
509
|
+
assert_success(status, stderr, "map with min")
|
|
510
|
+
assert_equal(['[1,10,100]'], lines(stdout), "map with min output")
|
|
511
|
+
|
|
512
|
+
stdout, stderr, status = run_jrf('_["values"] >> map { |x| max(x) }', input_map)
|
|
513
|
+
assert_success(status, stderr, "map with max")
|
|
514
|
+
assert_equal(['[3,30,300]'], lines(stdout), "map with max output")
|
|
515
|
+
|
|
516
|
+
input_map_varying = <<~NDJSON
|
|
517
|
+
[1,10]
|
|
518
|
+
[2,20,200]
|
|
519
|
+
[3]
|
|
520
|
+
NDJSON
|
|
521
|
+
|
|
522
|
+
stdout, stderr, status = run_jrf('map { |x| sum(x) }', input_map_varying)
|
|
523
|
+
assert_success(status, stderr, "map varying lengths")
|
|
524
|
+
assert_equal(['[6,30,200]'], lines(stdout), "map varying lengths output")
|
|
525
|
+
|
|
526
|
+
input_map_values = <<~NDJSON
|
|
527
|
+
{"a":1,"b":10}
|
|
528
|
+
{"a":2,"b":20}
|
|
529
|
+
{"a":3,"b":30}
|
|
530
|
+
NDJSON
|
|
531
|
+
|
|
532
|
+
stdout, stderr, status = run_jrf('map_values { |v| sum(v) }', input_map_values)
|
|
533
|
+
assert_success(status, stderr, "map_values with sum")
|
|
534
|
+
assert_equal(['{"a":6,"b":60}'], lines(stdout), "map_values with sum output")
|
|
535
|
+
|
|
536
|
+
stdout, stderr, status = run_jrf('map_values { |v| min(v) }', input_map_values)
|
|
537
|
+
assert_success(status, stderr, "map_values with min")
|
|
538
|
+
assert_equal(['{"a":1,"b":10}'], lines(stdout), "map_values with min output")
|
|
539
|
+
|
|
540
|
+
input_map_values_varying = <<~NDJSON
|
|
541
|
+
{"a":1}
|
|
542
|
+
{"a":2,"b":20}
|
|
543
|
+
{"a":3,"b":30}
|
|
544
|
+
NDJSON
|
|
545
|
+
|
|
546
|
+
stdout, stderr, status = run_jrf('map_values { |v| sum(v) }', input_map_values_varying)
|
|
547
|
+
assert_success(status, stderr, "map_values varying keys")
|
|
548
|
+
assert_equal(['{"a":6,"b":50}'], lines(stdout), "map_values varying keys output")
|
|
549
|
+
|
|
550
|
+
stdout, stderr, status = run_jrf('map_values { |v| count(v) }', input_map_values)
|
|
551
|
+
assert_success(status, stderr, "map_values with count")
|
|
552
|
+
assert_equal(['{"a":3,"b":3}'], lines(stdout), "map_values with count output")
|
|
553
|
+
|
|
554
|
+
stdout, stderr, status = run_jrf('select(false) >> map { |x| sum(x) }', input_map)
|
|
555
|
+
assert_success(status, stderr, "map no matches")
|
|
556
|
+
assert_equal(['[]'], lines(stdout), "map no matches output")
|
|
557
|
+
|
|
558
|
+
stdout, stderr, status = run_jrf('select(false) >> map_values { |v| sum(v) }', input_map_values)
|
|
559
|
+
assert_success(status, stderr, "map_values no matches")
|
|
560
|
+
assert_equal(['{}'], lines(stdout), "map_values no matches output")
|
|
561
|
+
|
|
562
|
+
stdout, stderr, status = run_jrf('map_values { |v| sum(v) } >> map_values { |v| v * 10 }', input_map_values)
|
|
563
|
+
assert_success(status, stderr, "map_values piped to map_values passthrough")
|
|
564
|
+
assert_equal(['{"a":60,"b":600}'], lines(stdout), "map_values piped output")
|
|
565
|
+
|
|
566
|
+
input_gb = <<~NDJSON
|
|
567
|
+
{"status":200,"path":"/a","latency":10}
|
|
568
|
+
{"status":404,"path":"/b","latency":50}
|
|
569
|
+
{"status":200,"path":"/c","latency":30}
|
|
570
|
+
{"status":200,"path":"/d","latency":20}
|
|
571
|
+
NDJSON
|
|
572
|
+
|
|
573
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { count() }', input_gb)
|
|
574
|
+
assert_success(status, stderr, "group_by with count")
|
|
575
|
+
assert_equal(['{"200":3,"404":1}'], lines(stdout), "group_by with count output")
|
|
576
|
+
|
|
577
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { sum(_["latency"]) }', input_gb)
|
|
578
|
+
assert_success(status, stderr, "group_by with sum")
|
|
579
|
+
assert_equal(['{"200":60,"404":50}'], lines(stdout), "group_by with sum output")
|
|
580
|
+
|
|
581
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { average(_["latency"]) }', input_gb)
|
|
582
|
+
assert_success(status, stderr, "group_by with average")
|
|
583
|
+
result = JSON.parse(lines(stdout).first)
|
|
584
|
+
assert_float_close(20.0, result["200"], 1e-12, "group_by average 200")
|
|
585
|
+
assert_float_close(50.0, result["404"], 1e-12, "group_by average 404")
|
|
586
|
+
|
|
587
|
+
stdout, stderr, status = run_jrf('group_by(_["status"])', input_gb)
|
|
588
|
+
assert_success(status, stderr, "group_by default (collect rows)")
|
|
589
|
+
result = JSON.parse(lines(stdout).first)
|
|
590
|
+
assert_equal(3, result["200"].length, "group_by default 200 count")
|
|
591
|
+
assert_equal(1, result["404"].length, "group_by default 404 count")
|
|
592
|
+
assert_equal("/a", result["200"][0]["path"], "group_by default first row")
|
|
593
|
+
|
|
594
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { group(_["path"]) }', input_gb)
|
|
595
|
+
assert_success(status, stderr, "group_by with group(expr)")
|
|
596
|
+
assert_equal(['{"200":["/a","/c","/d"],"404":["/b"]}'], lines(stdout), "group_by with group(expr) output")
|
|
597
|
+
|
|
598
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { min(_["latency"]) }', input_gb)
|
|
599
|
+
assert_success(status, stderr, "group_by with min")
|
|
600
|
+
assert_equal(['{"200":10,"404":50}'], lines(stdout), "group_by with min output")
|
|
601
|
+
|
|
602
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { {total: sum(_["latency"]), n: count()} }', input_gb)
|
|
603
|
+
assert_success(status, stderr, "group_by with multi-reducer")
|
|
604
|
+
assert_equal(['{"200":{"total":60,"n":3},"404":{"total":50,"n":1}}'], lines(stdout), "group_by multi-reducer output")
|
|
605
|
+
|
|
606
|
+
stdout, stderr, status = run_jrf('select(false) >> group_by(_["status"]) { count() }', input_gb)
|
|
607
|
+
assert_success(status, stderr, "group_by no matches")
|
|
608
|
+
assert_equal(['{}'], lines(stdout), "group_by no matches output")
|
|
609
|
+
|
|
610
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { count() } >> _[200]', input_gb)
|
|
611
|
+
assert_success(status, stderr, "group_by then extract")
|
|
612
|
+
assert_equal(%w[3], lines(stdout), "group_by then extract output")
|
|
613
|
+
|
|
342
614
|
puts "ok"
|
metadata
CHANGED
|
@@ -1,14 +1,28 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: jrf
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- kazuho
|
|
8
8
|
bindir: exe
|
|
9
9
|
cert_chain: []
|
|
10
10
|
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
-
dependencies:
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: oj
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '3.16'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - ">="
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '3.16'
|
|
12
26
|
description: jrf is a JSON filter with the power and speed of Ruby. It lets you write
|
|
13
27
|
transforms as Ruby expressions, so you can use arbitrary Ruby logic. It supports
|
|
14
28
|
extraction, filtering, flattening, sorting, and aggregation in stage pipelines.
|
|
@@ -31,6 +45,7 @@ files:
|
|
|
31
45
|
- lib/jrf/reducers.rb
|
|
32
46
|
- lib/jrf/row_context.rb
|
|
33
47
|
- lib/jrf/runner.rb
|
|
48
|
+
- lib/jrf/stage.rb
|
|
34
49
|
- lib/jrf/version.rb
|
|
35
50
|
- test/jrf_test.rb
|
|
36
51
|
licenses:
|