jrf 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +0 -5
- data/lib/jrf/cli.rb +9 -5
- data/lib/jrf/pipeline.rb +85 -0
- data/lib/jrf/row_context.rb +23 -10
- data/lib/jrf/runner.rb +10 -85
- data/lib/jrf/stage.rb +113 -45
- data/lib/jrf/version.rb +1 -1
- data/lib/jrf.rb +18 -0
- data/test/jrf_test.rb +182 -23
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 408c1f9706af5efaa1bf0125201d6647b4c108aa4aa28c99a93b59fb9cc94f02
|
|
4
|
+
data.tar.gz: 702f2fb14dc9d498292b02c41f0cdb4a91c0fa3e093ad9a71435d9a2604532fa
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 80dfa6d2bb7c9304e779a3e80815efbde9c599d66665708738b833b08daa1918ae54bc5b170c8b90c60399fe18b0df06d576e2c8c3d8b76b74f9daa826efcfa8
|
|
7
|
+
data.tar.gz: 597b715fd3ebd31a49cb2839f7dda814b845cd5aa87a3ac9a9cf551553792b453af749e287652553903de851ea7b06a9e5940abc7c25fccd319a9e7e72d75840
|
data/Rakefile
CHANGED
data/lib/jrf/cli.rb
CHANGED
|
@@ -4,16 +4,17 @@ require_relative "runner"
|
|
|
4
4
|
|
|
5
5
|
module Jrf
|
|
6
6
|
class CLI
|
|
7
|
-
USAGE = "usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'"
|
|
7
|
+
USAGE = "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'"
|
|
8
8
|
|
|
9
9
|
HELP_TEXT = <<~'TEXT'
|
|
10
|
-
usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'
|
|
10
|
+
usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'
|
|
11
11
|
|
|
12
12
|
JSON filter with the power and speed of Ruby.
|
|
13
13
|
|
|
14
14
|
Options:
|
|
15
15
|
-v, --verbose print parsed stage expressions
|
|
16
16
|
--lax allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
|
|
17
|
+
-p, --pretty pretty-print JSON output instead of compact NDJSON
|
|
17
18
|
-h, --help show this help and exit
|
|
18
19
|
|
|
19
20
|
Pipeline:
|
|
@@ -28,13 +29,13 @@ module Jrf
|
|
|
28
29
|
jrf '_["msg"] >> reduce(nil) { |acc, v| acc ? "#{acc} #{v}" : v }'
|
|
29
30
|
|
|
30
31
|
See Also:
|
|
31
|
-
|
|
32
|
-
man jrf
|
|
32
|
+
https://github.com/kazuho/jrf#readme
|
|
33
33
|
TEXT
|
|
34
34
|
|
|
35
35
|
def self.run(argv = ARGV, input: ARGF, out: $stdout, err: $stderr)
|
|
36
36
|
verbose = false
|
|
37
37
|
lax = false
|
|
38
|
+
pretty = false
|
|
38
39
|
|
|
39
40
|
while argv.first&.start_with?("-")
|
|
40
41
|
case argv.first
|
|
@@ -44,6 +45,9 @@ module Jrf
|
|
|
44
45
|
when "--lax"
|
|
45
46
|
lax = true
|
|
46
47
|
argv.shift
|
|
48
|
+
when "-p", "--pretty"
|
|
49
|
+
pretty = true
|
|
50
|
+
argv.shift
|
|
47
51
|
when "-h", "--help"
|
|
48
52
|
out.puts HELP_TEXT
|
|
49
53
|
return 0
|
|
@@ -60,7 +64,7 @@ module Jrf
|
|
|
60
64
|
end
|
|
61
65
|
|
|
62
66
|
expression = argv.shift
|
|
63
|
-
Runner.new(input: input, out: out, err: err, lax: lax).run(expression, verbose: verbose)
|
|
67
|
+
Runner.new(input: input, out: out, err: err, lax: lax, pretty: pretty).run(expression, verbose: verbose)
|
|
64
68
|
0
|
|
65
69
|
end
|
|
66
70
|
end
|
data/lib/jrf/pipeline.rb
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "control"
|
|
4
|
+
require_relative "row_context"
|
|
5
|
+
require_relative "stage"
|
|
6
|
+
|
|
7
|
+
module Jrf
|
|
8
|
+
class Pipeline
|
|
9
|
+
def initialize(*blocks)
|
|
10
|
+
raise ArgumentError, "at least one stage block is required" if blocks.empty?
|
|
11
|
+
|
|
12
|
+
@ctx = RowContext.new
|
|
13
|
+
@stages = blocks.map { |block| Stage.new(@ctx, block, src: nil) }
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Run the pipeline on an enumerable of input values.
|
|
17
|
+
#
|
|
18
|
+
# Without a block, returns an Array of output values.
|
|
19
|
+
# With a block, streams each output value to the block.
|
|
20
|
+
#
|
|
21
|
+
# @param input [Enumerable] input values to process
|
|
22
|
+
# @yieldparam value output value
|
|
23
|
+
# @return [Array, nil] output values (without block), or nil (with block)
|
|
24
|
+
def call(input, &on_output)
|
|
25
|
+
if on_output
|
|
26
|
+
call_streaming(input, &on_output)
|
|
27
|
+
else
|
|
28
|
+
results = []
|
|
29
|
+
call_streaming(input) { |v| results << v }
|
|
30
|
+
results
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def call_streaming(input, &on_output)
|
|
37
|
+
error = nil
|
|
38
|
+
begin
|
|
39
|
+
input.each { |value| process_value(value, @stages, &on_output) }
|
|
40
|
+
rescue StandardError => e
|
|
41
|
+
error = e
|
|
42
|
+
ensure
|
|
43
|
+
flush_reducers(@stages, &on_output)
|
|
44
|
+
end
|
|
45
|
+
raise error if error
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def process_value(input, stages, &on_output)
|
|
49
|
+
current_values = [input]
|
|
50
|
+
|
|
51
|
+
stages.each do |stage|
|
|
52
|
+
next_values = []
|
|
53
|
+
|
|
54
|
+
current_values.each do |value|
|
|
55
|
+
out = stage.call(value)
|
|
56
|
+
if out.equal?(Control::DROPPED)
|
|
57
|
+
next
|
|
58
|
+
elsif out.is_a?(Control::Flat)
|
|
59
|
+
unless out.value.is_a?(Array)
|
|
60
|
+
raise TypeError, "flat expects Array, got #{out.value.class}"
|
|
61
|
+
end
|
|
62
|
+
next_values.concat(out.value)
|
|
63
|
+
else
|
|
64
|
+
next_values << out
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
return if next_values.empty?
|
|
69
|
+
current_values = next_values
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
current_values.each(&on_output)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def flush_reducers(stages, &on_output)
|
|
76
|
+
stages.each_with_index do |stage, idx|
|
|
77
|
+
rows = stage.finish
|
|
78
|
+
next if rows.empty?
|
|
79
|
+
|
|
80
|
+
rest = stages.drop(idx + 1)
|
|
81
|
+
rows.each { |value| process_value(value, rest, &on_output) }
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
data/lib/jrf/row_context.rb
CHANGED
|
@@ -26,10 +26,12 @@ module Jrf
|
|
|
26
26
|
def initialize(obj = nil)
|
|
27
27
|
@obj = obj
|
|
28
28
|
@__jrf_current_stage = nil
|
|
29
|
+
@__jrf_current_input = obj
|
|
29
30
|
end
|
|
30
31
|
|
|
31
32
|
def reset(obj)
|
|
32
33
|
@obj = obj
|
|
34
|
+
@__jrf_current_input = obj
|
|
33
35
|
self
|
|
34
36
|
end
|
|
35
37
|
|
|
@@ -38,11 +40,11 @@ module Jrf
|
|
|
38
40
|
end
|
|
39
41
|
|
|
40
42
|
def flat
|
|
41
|
-
Control::Flat.new(
|
|
43
|
+
Control::Flat.new(current_input)
|
|
42
44
|
end
|
|
43
45
|
|
|
44
46
|
def select(predicate)
|
|
45
|
-
predicate ?
|
|
47
|
+
predicate ? current_input : Control::DROPPED
|
|
46
48
|
end
|
|
47
49
|
|
|
48
50
|
define_reducer(:sum) do |_ctx, value, initial: 0, block: nil|
|
|
@@ -111,15 +113,16 @@ module Jrf
|
|
|
111
113
|
define_reducer(:sort) do |ctx, key = MISSING, block: nil|
|
|
112
114
|
if block
|
|
113
115
|
{
|
|
114
|
-
value: ctx.
|
|
116
|
+
value: ctx.send(:current_input),
|
|
115
117
|
initial: -> { [] },
|
|
116
118
|
finish: ->(rows) { rows.sort(&block) },
|
|
117
119
|
step: ->(rows, row) { rows << row }
|
|
118
120
|
}
|
|
119
121
|
else
|
|
120
|
-
|
|
122
|
+
current = ctx.send(:current_input)
|
|
123
|
+
resolved_key = key.equal?(MISSING) ? current : key
|
|
121
124
|
{
|
|
122
|
-
value: [resolved_key,
|
|
125
|
+
value: [resolved_key, current],
|
|
123
126
|
initial: -> { [] },
|
|
124
127
|
finish: ->(pairs) { pairs.sort_by(&:first).map(&:last) },
|
|
125
128
|
step: ->(pairs, pair) { pairs << pair }
|
|
@@ -128,7 +131,7 @@ module Jrf
|
|
|
128
131
|
end
|
|
129
132
|
|
|
130
133
|
define_reducer(:group) do |ctx, value = MISSING, block: nil|
|
|
131
|
-
resolved_value = value.equal?(MISSING) ? ctx.
|
|
134
|
+
resolved_value = value.equal?(MISSING) ? ctx.send(:current_input) : value
|
|
132
135
|
{ value: resolved_value, initial: -> { [] }, step: ->(acc, v) { acc << v } }
|
|
133
136
|
end
|
|
134
137
|
|
|
@@ -143,9 +146,7 @@ module Jrf
|
|
|
143
146
|
else
|
|
144
147
|
->(values) {
|
|
145
148
|
sorted = values.sort
|
|
146
|
-
percentages.map
|
|
147
|
-
{ "percentile" => p, "value" => ctx.send(:percentile_value, sorted, p) }
|
|
148
|
-
end
|
|
149
|
+
[percentages.map { |p| ctx.send(:percentile_value, sorted, p) }]
|
|
149
150
|
}
|
|
150
151
|
end
|
|
151
152
|
|
|
@@ -160,7 +161,7 @@ module Jrf
|
|
|
160
161
|
def reduce(initial, &block)
|
|
161
162
|
raise ArgumentError, "reduce requires a block" unless block
|
|
162
163
|
|
|
163
|
-
@__jrf_current_stage.allocate_reducer(
|
|
164
|
+
@__jrf_current_stage.allocate_reducer(current_input, initial: initial, &block)
|
|
164
165
|
end
|
|
165
166
|
|
|
166
167
|
def map(&block)
|
|
@@ -182,6 +183,18 @@ module Jrf
|
|
|
182
183
|
|
|
183
184
|
private
|
|
184
185
|
|
|
186
|
+
def current_input
|
|
187
|
+
@__jrf_current_input
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def __jrf_with_current_input(value)
|
|
191
|
+
saved_input = current_input
|
|
192
|
+
@__jrf_current_input = value
|
|
193
|
+
yield
|
|
194
|
+
ensure
|
|
195
|
+
@__jrf_current_input = saved_input
|
|
196
|
+
end
|
|
197
|
+
|
|
185
198
|
def reducer_initial_value(initial)
|
|
186
199
|
return initial.call if initial.respond_to?(:call)
|
|
187
200
|
return initial.dup if initial.is_a?(Array) || initial.is_a?(Hash)
|
data/lib/jrf/runner.rb
CHANGED
|
@@ -1,37 +1,19 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "json"
|
|
4
|
-
require_relative "
|
|
4
|
+
require_relative "pipeline"
|
|
5
5
|
require_relative "pipeline_parser"
|
|
6
|
-
require_relative "reducers"
|
|
7
|
-
require_relative "row_context"
|
|
8
|
-
require_relative "stage"
|
|
9
6
|
|
|
10
7
|
module Jrf
|
|
11
8
|
class Runner
|
|
12
9
|
RS_CHAR = "\x1e"
|
|
13
10
|
|
|
14
|
-
|
|
15
|
-
def [](key)
|
|
16
|
-
self
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
def method_missing(name, *args, &block)
|
|
20
|
-
self
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
def respond_to_missing?(name, include_private = false)
|
|
24
|
-
true
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
PROBE_VALUE = ProbeValue.new
|
|
29
|
-
|
|
30
|
-
def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false)
|
|
11
|
+
def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false, pretty: false)
|
|
31
12
|
@input = input
|
|
32
13
|
@out = out
|
|
33
14
|
@err = err
|
|
34
15
|
@lax = lax
|
|
16
|
+
@pretty = pretty
|
|
35
17
|
end
|
|
36
18
|
|
|
37
19
|
def run(expression, verbose: false)
|
|
@@ -39,53 +21,19 @@ module Jrf
|
|
|
39
21
|
stages = parsed[:stages]
|
|
40
22
|
dump_stages(stages) if verbose
|
|
41
23
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
24
|
+
blocks = stages.map { |stage|
|
|
25
|
+
eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
|
|
26
|
+
}
|
|
27
|
+
pipeline = Pipeline.new(*blocks)
|
|
46
28
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
end
|
|
51
|
-
rescue StandardError => e
|
|
52
|
-
error = e
|
|
53
|
-
ensure
|
|
54
|
-
flush_reducers(compiled)
|
|
29
|
+
input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
|
|
30
|
+
pipeline.call(input_enum) do |value|
|
|
31
|
+
@out.puts(@pretty ? JSON.pretty_generate(value) : JSON.generate(value))
|
|
55
32
|
end
|
|
56
|
-
|
|
57
|
-
raise error if error
|
|
58
33
|
end
|
|
59
34
|
|
|
60
35
|
private
|
|
61
36
|
|
|
62
|
-
def process_value(input, stages)
|
|
63
|
-
current_values = [input]
|
|
64
|
-
|
|
65
|
-
stages.each do |stage|
|
|
66
|
-
next_values = []
|
|
67
|
-
|
|
68
|
-
current_values.each do |value|
|
|
69
|
-
out = stage.call(value)
|
|
70
|
-
if out.equal?(Control::DROPPED)
|
|
71
|
-
next
|
|
72
|
-
elsif out.is_a?(Control::Flat)
|
|
73
|
-
unless out.value.is_a?(Array)
|
|
74
|
-
raise TypeError, "flat expects Array, got #{out.value.class}"
|
|
75
|
-
end
|
|
76
|
-
next_values.concat(out.value)
|
|
77
|
-
else
|
|
78
|
-
next_values << out
|
|
79
|
-
end
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
return if next_values.empty?
|
|
83
|
-
current_values = next_values
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
current_values.each { |value| @out.puts JSON.generate(value) }
|
|
87
|
-
end
|
|
88
|
-
|
|
89
37
|
def each_input_value
|
|
90
38
|
return each_input_value_lax { |value| yield value } if @lax
|
|
91
39
|
|
|
@@ -124,33 +72,10 @@ module Jrf
|
|
|
124
72
|
raise JSON::ParserError, e.message
|
|
125
73
|
end
|
|
126
74
|
|
|
127
|
-
def compile_stages(stages, ctx)
|
|
128
|
-
mod = Module.new
|
|
129
|
-
|
|
130
|
-
stages.each_with_index.map do |stage, i|
|
|
131
|
-
method_name = :"__jrf_stage_#{i}"
|
|
132
|
-
mod.module_eval("def #{method_name}; #{stage[:src]}; end", "(jrf stage #{i})", 1)
|
|
133
|
-
Stage.new(ctx, method_name, src: stage[:src])
|
|
134
|
-
end.tap { ctx.extend(mod) }
|
|
135
|
-
end
|
|
136
|
-
|
|
137
75
|
def dump_stages(stages)
|
|
138
76
|
stages.each_with_index do |stage, i|
|
|
139
77
|
@err.puts "stage[#{i}]: #{stage[:src]}"
|
|
140
78
|
end
|
|
141
79
|
end
|
|
142
|
-
|
|
143
|
-
def flush_reducers(stages)
|
|
144
|
-
tail = stages
|
|
145
|
-
loop do
|
|
146
|
-
idx = tail.index(&:reducer?)
|
|
147
|
-
break unless idx
|
|
148
|
-
|
|
149
|
-
rows = tail[idx].finish
|
|
150
|
-
rest = tail.drop(idx + 1)
|
|
151
|
-
rows.each { |value| process_value(value, rest) }
|
|
152
|
-
tail = rest
|
|
153
|
-
end
|
|
154
|
-
end
|
|
155
80
|
end
|
|
156
81
|
end
|
data/lib/jrf/stage.rb
CHANGED
|
@@ -7,7 +7,7 @@ module Jrf
|
|
|
7
7
|
class Stage
|
|
8
8
|
ReducerToken = Struct.new(:index)
|
|
9
9
|
|
|
10
|
-
attr_reader :
|
|
10
|
+
attr_reader :src
|
|
11
11
|
|
|
12
12
|
def self.resolve_template(template, reducers)
|
|
13
13
|
if template.is_a?(ReducerToken)
|
|
@@ -22,28 +22,27 @@ module Jrf
|
|
|
22
22
|
end
|
|
23
23
|
end
|
|
24
24
|
|
|
25
|
-
def initialize(ctx,
|
|
25
|
+
def initialize(ctx, block, src: nil)
|
|
26
26
|
@ctx = ctx
|
|
27
|
-
@
|
|
27
|
+
@block = block
|
|
28
28
|
@src = src
|
|
29
29
|
@reducers = []
|
|
30
30
|
@cursor = 0
|
|
31
31
|
@template = nil
|
|
32
32
|
@mode = nil # nil=unknown, :reducer, :passthrough
|
|
33
|
-
@
|
|
33
|
+
@map_transforms = {}
|
|
34
34
|
end
|
|
35
35
|
|
|
36
|
-
def call(input
|
|
36
|
+
def call(input)
|
|
37
37
|
@ctx.reset(input)
|
|
38
38
|
@cursor = 0
|
|
39
|
-
@probing = probing
|
|
40
39
|
@ctx.__jrf_current_stage = self
|
|
41
|
-
result = @ctx.
|
|
40
|
+
result = @ctx.instance_eval(&@block)
|
|
42
41
|
|
|
43
42
|
if @mode.nil? && @reducers.any?
|
|
44
43
|
@mode = :reducer
|
|
45
44
|
@template = result
|
|
46
|
-
elsif @mode.nil?
|
|
45
|
+
elsif @mode.nil?
|
|
47
46
|
@mode = :passthrough
|
|
48
47
|
end
|
|
49
48
|
|
|
@@ -54,43 +53,50 @@ module Jrf
|
|
|
54
53
|
idx = @cursor
|
|
55
54
|
finish_rows = finish || ->(acc) { [acc] }
|
|
56
55
|
@reducers[idx] ||= Reducers.reduce(initial, finish: finish_rows, &step_fn)
|
|
57
|
-
@reducers[idx].step(value)
|
|
56
|
+
@reducers[idx].step(value)
|
|
58
57
|
@cursor += 1
|
|
59
58
|
ReducerToken.new(idx)
|
|
60
59
|
end
|
|
61
60
|
|
|
62
61
|
def allocate_map(type, collection, &block)
|
|
63
62
|
idx = @cursor
|
|
64
|
-
|
|
63
|
+
@cursor += 1
|
|
65
64
|
|
|
66
|
-
|
|
67
|
-
|
|
65
|
+
# Transformation mode (detected on first call)
|
|
66
|
+
if @map_transforms[idx]
|
|
67
|
+
return transform_collection(type, collection, &block)
|
|
68
|
+
end
|
|
68
69
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
70
|
+
map_reducer = (@reducers[idx] ||= MapReducer.new(type))
|
|
71
|
+
|
|
72
|
+
case type
|
|
73
|
+
when :array
|
|
74
|
+
raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
|
|
75
|
+
collection.each_with_index do |v, i|
|
|
76
|
+
slot = map_reducer.slot(i)
|
|
77
|
+
with_scoped_reducers(slot.reducers) do
|
|
78
|
+
result = @ctx.send(:__jrf_with_current_input, v) { block.call(v) }
|
|
79
|
+
slot.template ||= result
|
|
78
80
|
end
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
81
|
+
end
|
|
82
|
+
when :hash
|
|
83
|
+
raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
|
|
84
|
+
collection.each do |k, v|
|
|
85
|
+
slot = map_reducer.slot(k)
|
|
86
|
+
with_scoped_reducers(slot.reducers) do
|
|
87
|
+
result = @ctx.send(:__jrf_with_current_input, v) { block.call(v) }
|
|
88
|
+
slot.template ||= result
|
|
87
89
|
end
|
|
88
90
|
end
|
|
91
|
+
end
|
|
89
92
|
|
|
90
|
-
|
|
93
|
+
# Detect transformation: no reducers were allocated in any slot
|
|
94
|
+
if @mode.nil? && map_reducer.slots.values.all? { |s| s.reducers.empty? }
|
|
95
|
+
@map_transforms[idx] = true
|
|
96
|
+
@reducers[idx] = nil
|
|
97
|
+
return transformed_slots(type, map_reducer)
|
|
91
98
|
end
|
|
92
99
|
|
|
93
|
-
@cursor += 1
|
|
94
100
|
ReducerToken.new(idx)
|
|
95
101
|
end
|
|
96
102
|
|
|
@@ -98,22 +104,17 @@ module Jrf
|
|
|
98
104
|
idx = @cursor
|
|
99
105
|
map_reducer = (@reducers[idx] ||= MapReducer.new(:hash))
|
|
100
106
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
end
|
|
107
|
+
row = @ctx._
|
|
108
|
+
slot = map_reducer.slot(key)
|
|
109
|
+
with_scoped_reducers(slot.reducers) do
|
|
110
|
+
result = @ctx.send(:__jrf_with_current_input, row) { block.call(row) }
|
|
111
|
+
slot.template ||= result
|
|
107
112
|
end
|
|
108
113
|
|
|
109
114
|
@cursor += 1
|
|
110
115
|
ReducerToken.new(idx)
|
|
111
116
|
end
|
|
112
117
|
|
|
113
|
-
def reducer?
|
|
114
|
-
@mode == :reducer
|
|
115
|
-
end
|
|
116
|
-
|
|
117
118
|
def finish
|
|
118
119
|
return [] unless @mode == :reducer && @reducers.any?
|
|
119
120
|
|
|
@@ -137,26 +138,93 @@ module Jrf
|
|
|
137
138
|
@cursor = saved_cursor
|
|
138
139
|
end
|
|
139
140
|
|
|
141
|
+
def transform_collection(type, collection, &block)
|
|
142
|
+
case type
|
|
143
|
+
when :array
|
|
144
|
+
raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
|
|
145
|
+
|
|
146
|
+
collection.each_with_object([]) do |value, result|
|
|
147
|
+
mapped = @ctx.send(:__jrf_with_current_input, value) { block.call(value) }
|
|
148
|
+
append_map_result(result, mapped)
|
|
149
|
+
end
|
|
150
|
+
when :hash
|
|
151
|
+
raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
|
|
152
|
+
|
|
153
|
+
collection.each_with_object({}) do |(key, value), result|
|
|
154
|
+
mapped = @ctx.send(:__jrf_with_current_input, value) { block.call(value) }
|
|
155
|
+
next if mapped.equal?(Control::DROPPED)
|
|
156
|
+
raise TypeError, "flat is not supported inside map_values" if mapped.is_a?(Control::Flat)
|
|
157
|
+
|
|
158
|
+
result[key] = mapped
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def transformed_slots(type, map_reducer)
|
|
164
|
+
case type
|
|
165
|
+
when :array
|
|
166
|
+
map_reducer.slots
|
|
167
|
+
.sort_by { |k, _| k }
|
|
168
|
+
.each_with_object([]) do |(_, slot), result|
|
|
169
|
+
append_map_result(result, slot.template)
|
|
170
|
+
end
|
|
171
|
+
when :hash
|
|
172
|
+
map_reducer.slots.each_with_object({}) do |(key, slot), result|
|
|
173
|
+
next if slot.template.equal?(Control::DROPPED)
|
|
174
|
+
raise TypeError, "flat is not supported inside map_values" if slot.template.is_a?(Control::Flat)
|
|
175
|
+
|
|
176
|
+
result[key] = slot.template
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def append_map_result(result, mapped)
|
|
182
|
+
return if mapped.equal?(Control::DROPPED)
|
|
183
|
+
|
|
184
|
+
if mapped.is_a?(Control::Flat)
|
|
185
|
+
unless mapped.value.is_a?(Array)
|
|
186
|
+
raise TypeError, "flat expects Array, got #{mapped.value.class}"
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
result.concat(mapped.value)
|
|
190
|
+
else
|
|
191
|
+
result << mapped
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
140
195
|
class MapReducer
|
|
141
|
-
attr_reader :slots
|
|
196
|
+
attr_reader :slots
|
|
142
197
|
|
|
143
198
|
def initialize(type)
|
|
144
199
|
@type = type
|
|
145
200
|
@slots = {}
|
|
146
|
-
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def slot(key)
|
|
204
|
+
@slots[key] ||= SlotState.new
|
|
147
205
|
end
|
|
148
206
|
|
|
149
207
|
def finish
|
|
150
208
|
case @type
|
|
151
209
|
when :array
|
|
152
210
|
keys = @slots.keys.sort
|
|
153
|
-
[keys.map { |k| Stage.resolve_template(@
|
|
211
|
+
[keys.map { |k| Stage.resolve_template(@slots[k].template, @slots[k].reducers) }]
|
|
154
212
|
when :hash
|
|
155
213
|
result = {}
|
|
156
|
-
@slots.each { |k,
|
|
214
|
+
@slots.each { |k, s| result[k] = Stage.resolve_template(s.template, s.reducers) }
|
|
157
215
|
[result]
|
|
158
216
|
end
|
|
159
217
|
end
|
|
218
|
+
|
|
219
|
+
class SlotState
|
|
220
|
+
attr_reader :reducers
|
|
221
|
+
attr_accessor :template
|
|
222
|
+
|
|
223
|
+
def initialize
|
|
224
|
+
@reducers = []
|
|
225
|
+
@template = nil
|
|
226
|
+
end
|
|
227
|
+
end
|
|
160
228
|
end
|
|
161
229
|
end
|
|
162
230
|
end
|
data/lib/jrf/version.rb
CHANGED
data/lib/jrf.rb
CHANGED
|
@@ -2,3 +2,21 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "jrf/version"
|
|
4
4
|
require_relative "jrf/cli"
|
|
5
|
+
require_relative "jrf/pipeline"
|
|
6
|
+
|
|
7
|
+
module Jrf
|
|
8
|
+
# Create a pipeline from one or more stage blocks.
|
|
9
|
+
#
|
|
10
|
+
# Each block is evaluated in a context where +_+ is the current value.
|
|
11
|
+
# All jrf built-in functions (+select+, +sum+, +map+, +group_by+, etc.)
|
|
12
|
+
# are available inside blocks. See https://github.com/kazuho/jrf#readme for the full list.
|
|
13
|
+
#
|
|
14
|
+
# @param blocks [Array<Proc>] one or more stage procs
|
|
15
|
+
# @return [Pipeline] a callable pipeline
|
|
16
|
+
# @example
|
|
17
|
+
# j = Jrf.new(proc { select(_["x"] > 10) }, proc { sum(_["x"]) })
|
|
18
|
+
# j.call([{"x" => 20}, {"x" => 30}]) # => [50]
|
|
19
|
+
def self.new(*blocks)
|
|
20
|
+
Pipeline.new(*blocks)
|
|
21
|
+
end
|
|
22
|
+
end
|
data/test/jrf_test.rb
CHANGED
|
@@ -92,15 +92,15 @@ assert_includes(stderr, 'stage[1]: _["hello"]')
|
|
|
92
92
|
|
|
93
93
|
stdout, stderr, status = Open3.capture3("./exe/jrf", "--help")
|
|
94
94
|
assert_success(status, stderr, "help option")
|
|
95
|
-
assert_includes(stdout, "usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'")
|
|
95
|
+
assert_includes(stdout, "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'")
|
|
96
96
|
assert_includes(stdout, "JSON filter with the power and speed of Ruby.")
|
|
97
97
|
assert_includes(stdout, "--lax")
|
|
98
|
+
assert_includes(stdout, "--pretty")
|
|
98
99
|
assert_includes(stdout, "Pipeline:")
|
|
99
100
|
assert_includes(stdout, "Connect stages with top-level >>.")
|
|
100
101
|
assert_includes(stdout, "The current value in each stage is available as _.")
|
|
101
102
|
assert_includes(stdout, "See Also:")
|
|
102
|
-
assert_includes(stdout, "
|
|
103
|
-
assert_includes(stdout, "man jrf")
|
|
103
|
+
assert_includes(stdout, "https://github.com/kazuho/jrf#readme")
|
|
104
104
|
assert_equal([], lines(stderr), "help stderr output")
|
|
105
105
|
|
|
106
106
|
stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "--verbose")
|
|
@@ -108,6 +108,21 @@ assert_success(status, stderr, "dump stages verbose alias")
|
|
|
108
108
|
assert_equal(%w[123], lines(stdout), "dump stages verbose alias output")
|
|
109
109
|
assert_includes(stderr, 'stage[0]: select(_["hello"] == 123)')
|
|
110
110
|
|
|
111
|
+
stdout, stderr, status = run_jrf('_', input_hello, "--pretty")
|
|
112
|
+
assert_success(status, stderr, "pretty output")
|
|
113
|
+
assert_equal(
|
|
114
|
+
[
|
|
115
|
+
"{",
|
|
116
|
+
"\"hello\": 123",
|
|
117
|
+
"}",
|
|
118
|
+
"{",
|
|
119
|
+
"\"hello\": 456",
|
|
120
|
+
"}"
|
|
121
|
+
],
|
|
122
|
+
lines(stdout),
|
|
123
|
+
"pretty output lines"
|
|
124
|
+
)
|
|
125
|
+
|
|
111
126
|
input_regex = <<~NDJSON
|
|
112
127
|
{"foo":{"bar":"ok"},"x":50}
|
|
113
128
|
{"foo":{"bar":"ng"},"x":70}
|
|
@@ -159,6 +174,14 @@ stdout, stderr, status = run_jrf('_["items"] >> flat >> group', input_flat)
|
|
|
159
174
|
assert_success(status, stderr, "flat then group")
|
|
160
175
|
assert_equal(['[1,2,3]'], lines(stdout), "flat then group output")
|
|
161
176
|
|
|
177
|
+
stdout, stderr, status = run_jrf('map { |x| flat }', "[[1,2],[3],[4,5,6]]\n")
|
|
178
|
+
assert_success(status, stderr, "flat inside map")
|
|
179
|
+
assert_equal(['[1,2,3,4,5,6]'], lines(stdout), "flat inside map output")
|
|
180
|
+
|
|
181
|
+
stdout, stderr, status = run_jrf('map_values { |v| flat }', "{\"a\":[1,2],\"b\":[3]}\n")
|
|
182
|
+
assert_failure(status, "flat inside map_values")
|
|
183
|
+
assert_includes(stderr, "flat is not supported inside map_values")
|
|
184
|
+
|
|
162
185
|
stdout, stderr, status = run_jrf('_["foo"] >> flat', input)
|
|
163
186
|
assert_failure(status, "flat requires array")
|
|
164
187
|
assert_includes(stderr, "flat expects Array")
|
|
@@ -194,6 +217,10 @@ stdout, stderr, status = run_jrf('select(_["x"] > 10) >> sum(_["foo"])', input_s
|
|
|
194
217
|
assert_success(status, stderr, "select + sum")
|
|
195
218
|
assert_equal(%w[9], lines(stdout), "select + sum output")
|
|
196
219
|
|
|
220
|
+
stdout, stderr, status = run_jrf('{total: sum(_["foo"]), n: count()}', input_sum)
|
|
221
|
+
assert_success(status, stderr, "structured reducer result")
|
|
222
|
+
assert_equal(['{"total":10,"n":4}'], lines(stdout), "structured reducer result output")
|
|
223
|
+
|
|
197
224
|
stdout, stderr, status = run_jrf('average(_["foo"])', input_sum)
|
|
198
225
|
assert_success(status, stderr, "average")
|
|
199
226
|
assert_float_close(2.5, lines(stdout).first.to_f, 1e-12, "average output")
|
|
@@ -206,33 +233,37 @@ stdout, stderr, status = run_jrf('_["foo"] >> sum(_ * 2)', input_sum)
|
|
|
206
233
|
assert_success(status, stderr, "extract + sum")
|
|
207
234
|
assert_equal(%w[20], lines(stdout), "extract + sum output")
|
|
208
235
|
|
|
236
|
+
stdout, stderr, status = run_jrf('sum(2 * _["foo"])', input_sum)
|
|
237
|
+
assert_success(status, stderr, "sum with literal on left")
|
|
238
|
+
assert_equal(%w[20], lines(stdout), "sum with literal on left output")
|
|
239
|
+
|
|
209
240
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> sum(_["foo"])', input_sum)
|
|
210
241
|
assert_success(status, stderr, "sum no matches")
|
|
211
|
-
assert_equal(
|
|
242
|
+
assert_equal([], lines(stdout), "sum no matches output")
|
|
212
243
|
|
|
213
244
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> count()', input_sum)
|
|
214
245
|
assert_success(status, stderr, "count no matches")
|
|
215
|
-
assert_equal(
|
|
246
|
+
assert_equal([], lines(stdout), "count no matches output")
|
|
216
247
|
|
|
217
248
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> count(_["foo"])', input_sum)
|
|
218
249
|
assert_success(status, stderr, "count(expr) no matches")
|
|
219
|
-
assert_equal(
|
|
250
|
+
assert_equal([], lines(stdout), "count(expr) no matches output")
|
|
220
251
|
|
|
221
252
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> average(_["foo"])', input_sum)
|
|
222
253
|
assert_success(status, stderr, "average no matches")
|
|
223
|
-
assert_equal(
|
|
254
|
+
assert_equal([], lines(stdout), "average no matches output")
|
|
224
255
|
|
|
225
256
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> stdev(_["foo"])', input_sum)
|
|
226
257
|
assert_success(status, stderr, "stdev no matches")
|
|
227
|
-
assert_equal(
|
|
258
|
+
assert_equal([], lines(stdout), "stdev no matches output")
|
|
228
259
|
|
|
229
260
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> min(_["foo"])', input_sum)
|
|
230
261
|
assert_success(status, stderr, "min no matches")
|
|
231
|
-
assert_equal(
|
|
262
|
+
assert_equal([], lines(stdout), "min no matches output")
|
|
232
263
|
|
|
233
264
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> max(_["foo"])', input_sum)
|
|
234
265
|
assert_success(status, stderr, "max no matches")
|
|
235
|
-
assert_equal(
|
|
266
|
+
assert_equal([], lines(stdout), "max no matches output")
|
|
236
267
|
|
|
237
268
|
stdout, stderr, status = run_jrf('sum(_["foo"]) >> _ + 1', input_sum)
|
|
238
269
|
assert_success(status, stderr, "reduce in middle")
|
|
@@ -274,7 +305,7 @@ assert_equal([], lines(stdout), "sort no matches output")
|
|
|
274
305
|
|
|
275
306
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> _["foo"] >> group', input_sum)
|
|
276
307
|
assert_success(status, stderr, "group no matches")
|
|
277
|
-
assert_equal([
|
|
308
|
+
assert_equal([], lines(stdout), "group no matches output")
|
|
278
309
|
|
|
279
310
|
input_group_multi = <<~NDJSON
|
|
280
311
|
{"x":1,"y":"a"}
|
|
@@ -288,7 +319,7 @@ assert_equal(['{"a":[1,2,3],"b":["a","b","c"]}'], lines(stdout), "group in hash
|
|
|
288
319
|
|
|
289
320
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> {a: group(_["x"]), b: group(_["y"])}', input_group_multi)
|
|
290
321
|
assert_success(status, stderr, "group in hash no matches")
|
|
291
|
-
assert_equal([
|
|
322
|
+
assert_equal([], lines(stdout), "group in hash no-match output")
|
|
292
323
|
|
|
293
324
|
stdout, stderr, status = run_jrf('percentile(_["foo"], 0.50)', input_sum)
|
|
294
325
|
assert_success(status, stderr, "single percentile")
|
|
@@ -297,7 +328,7 @@ assert_equal(%w[2], lines(stdout), "single percentile output")
|
|
|
297
328
|
stdout, stderr, status = run_jrf('percentile(_["foo"], [0.25, 0.50, 1.0])', input_sum)
|
|
298
329
|
assert_success(status, stderr, "array percentile")
|
|
299
330
|
assert_equal(
|
|
300
|
-
['
|
|
331
|
+
['[1,2,4]'],
|
|
301
332
|
lines(stdout),
|
|
302
333
|
"array percentile output"
|
|
303
334
|
)
|
|
@@ -332,7 +363,7 @@ assert_float_close(1.0, lines(stdout).first.to_f, 1e-12, "stdev ignores nil outp
|
|
|
332
363
|
stdout, stderr, status = run_jrf('percentile(_["foo"], [0.5, 1.0])', input_with_nil)
|
|
333
364
|
assert_success(status, stderr, "percentile ignores nil")
|
|
334
365
|
assert_equal(
|
|
335
|
-
['
|
|
366
|
+
['[1,3]'],
|
|
336
367
|
lines(stdout),
|
|
337
368
|
"percentile ignores nil output"
|
|
338
369
|
)
|
|
@@ -388,7 +419,7 @@ NDJSON
|
|
|
388
419
|
stdout, stderr, status = run_jrf('{a: percentile(_["a"], [0.25, 0.50, 1.0]), b: percentile(_["b"], [0.25, 0.50, 1.0])}', input_multi_cols)
|
|
389
420
|
assert_success(status, stderr, "nested array percentile for multiple columns")
|
|
390
421
|
assert_equal(
|
|
391
|
-
['{"a":[
|
|
422
|
+
['{"a":[1,2,4],"b":[10,20,40]}'],
|
|
392
423
|
lines(stdout),
|
|
393
424
|
"nested array percentile output"
|
|
394
425
|
)
|
|
@@ -513,6 +544,14 @@ stdout, stderr, status = run_jrf('_["values"] >> map { |x| max(x) }', input_map)
|
|
|
513
544
|
assert_success(status, stderr, "map with max")
|
|
514
545
|
assert_equal(['[3,30,300]'], lines(stdout), "map with max output")
|
|
515
546
|
|
|
547
|
+
stdout, stderr, status = run_jrf('_["values"] >> map { |x| sum(_[0] + x) }', input_map)
|
|
548
|
+
assert_success(status, stderr, "map keeps ambient _")
|
|
549
|
+
assert_equal(['[12,66,606]'], lines(stdout), "map ambient _ output")
|
|
550
|
+
|
|
551
|
+
stdout, stderr, status = run_jrf('_["values"] >> map { |x| reduce(0) { |acc, v| acc + v } }', input_map)
|
|
552
|
+
assert_success(status, stderr, "map with reduce")
|
|
553
|
+
assert_equal(['[6,60,600]'], lines(stdout), "map with reduce output")
|
|
554
|
+
|
|
516
555
|
input_map_varying = <<~NDJSON
|
|
517
556
|
[1,10]
|
|
518
557
|
[2,20,200]
|
|
@@ -523,6 +562,20 @@ stdout, stderr, status = run_jrf('map { |x| sum(x) }', input_map_varying)
|
|
|
523
562
|
assert_success(status, stderr, "map varying lengths")
|
|
524
563
|
assert_equal(['[6,30,200]'], lines(stdout), "map varying lengths output")
|
|
525
564
|
|
|
565
|
+
input_map_unsorted = <<~NDJSON
|
|
566
|
+
{"values":[3,30]}
|
|
567
|
+
{"values":[1,10]}
|
|
568
|
+
{"values":[2,20]}
|
|
569
|
+
NDJSON
|
|
570
|
+
|
|
571
|
+
stdout, stderr, status = run_jrf('_["values"] >> map { |x| group }', input_map)
|
|
572
|
+
assert_success(status, stderr, "map with group")
|
|
573
|
+
assert_equal(['[[1,2,3],[10,20,30],[100,200,300]]'], lines(stdout), "map with group output")
|
|
574
|
+
|
|
575
|
+
stdout, stderr, status = run_jrf('_["values"] >> map { |x| sort }', input_map_unsorted)
|
|
576
|
+
assert_success(status, stderr, "map with sort default key")
|
|
577
|
+
assert_equal(['[[1,2,3],[10,20,30]]'], lines(stdout), "map with sort default key output")
|
|
578
|
+
|
|
526
579
|
input_map_values = <<~NDJSON
|
|
527
580
|
{"a":1,"b":10}
|
|
528
581
|
{"a":2,"b":20}
|
|
@@ -551,18 +604,51 @@ stdout, stderr, status = run_jrf('map_values { |v| count(v) }', input_map_values
|
|
|
551
604
|
assert_success(status, stderr, "map_values with count")
|
|
552
605
|
assert_equal(['{"a":3,"b":3}'], lines(stdout), "map_values with count output")
|
|
553
606
|
|
|
607
|
+
stdout, stderr, status = run_jrf('map_values { |v| group }', input_map_values)
|
|
608
|
+
assert_success(status, stderr, "map_values with group")
|
|
609
|
+
assert_equal(['{"a":[1,2,3],"b":[10,20,30]}'], lines(stdout), "map_values with group output")
|
|
610
|
+
|
|
611
|
+
stdout, stderr, status = run_jrf('map_values { |v| sum(_["a"] + v) }', input_map_values)
|
|
612
|
+
assert_success(status, stderr, "map_values keeps ambient _")
|
|
613
|
+
assert_equal(['{"a":12,"b":66}'], lines(stdout), "map_values ambient _ output")
|
|
614
|
+
|
|
615
|
+
stdout, stderr, status = run_jrf('map_values { |v| reduce(0) { |acc, x| acc + x } }', input_map_values)
|
|
616
|
+
assert_success(status, stderr, "map_values with reduce")
|
|
617
|
+
assert_equal(['{"a":6,"b":60}'], lines(stdout), "map_values with reduce output")
|
|
618
|
+
|
|
554
619
|
stdout, stderr, status = run_jrf('select(false) >> map { |x| sum(x) }', input_map)
|
|
555
620
|
assert_success(status, stderr, "map no matches")
|
|
556
|
-
assert_equal([
|
|
621
|
+
assert_equal([], lines(stdout), "map no matches output")
|
|
557
622
|
|
|
558
623
|
stdout, stderr, status = run_jrf('select(false) >> map_values { |v| sum(v) }', input_map_values)
|
|
559
624
|
assert_success(status, stderr, "map_values no matches")
|
|
560
|
-
assert_equal([
|
|
625
|
+
assert_equal([], lines(stdout), "map_values no matches output")
|
|
561
626
|
|
|
562
627
|
stdout, stderr, status = run_jrf('map_values { |v| sum(v) } >> map_values { |v| v * 10 }', input_map_values)
|
|
563
628
|
assert_success(status, stderr, "map_values piped to map_values passthrough")
|
|
564
629
|
assert_equal(['{"a":60,"b":600}'], lines(stdout), "map_values piped output")
|
|
565
630
|
|
|
631
|
+
# map/map_values transformation (no reducers)
|
|
632
|
+
stdout, stderr, status = run_jrf('_["values"] >> map { |x| x + 1 }', input_map)
|
|
633
|
+
assert_success(status, stderr, "map transform")
|
|
634
|
+
assert_equal(['[2,11,101]', '[3,21,201]', '[4,31,301]'], lines(stdout), "map transform output")
|
|
635
|
+
|
|
636
|
+
stdout, stderr, status = run_jrf('_["values"] >> map { |x| select(x >= 20) }', input_map)
|
|
637
|
+
assert_success(status, stderr, "map transform with select")
|
|
638
|
+
assert_equal(['[100]', '[20,200]', '[30,300]'], lines(stdout), "map transform with select output")
|
|
639
|
+
|
|
640
|
+
stdout, stderr, status = run_jrf('map_values { |v| v * 2 }', input_map_values)
|
|
641
|
+
assert_success(status, stderr, "map_values transform")
|
|
642
|
+
assert_equal(['{"a":2,"b":20}', '{"a":4,"b":40}', '{"a":6,"b":60}'], lines(stdout), "map_values transform output")
|
|
643
|
+
|
|
644
|
+
stdout, stderr, status = run_jrf('map_values { |v| select(v >= 10) }', input_map_values)
|
|
645
|
+
assert_success(status, stderr, "map_values transform with select")
|
|
646
|
+
assert_equal(['{"b":10}', '{"b":20}', '{"b":30}'], lines(stdout), "map_values transform with select output")
|
|
647
|
+
|
|
648
|
+
stdout, stderr, status = run_jrf('_["values"] >> map { |x| x + 1 } >> map { |x| x * 10 }', input_map)
|
|
649
|
+
assert_success(status, stderr, "chained map transforms")
|
|
650
|
+
assert_equal(['[20,110,1010]', '[30,210,2010]', '[40,310,3010]'], lines(stdout), "chained map transforms output")
|
|
651
|
+
|
|
566
652
|
input_gb = <<~NDJSON
|
|
567
653
|
{"status":200,"path":"/a","latency":10}
|
|
568
654
|
{"status":404,"path":"/b","latency":50}
|
|
@@ -574,11 +660,11 @@ stdout, stderr, status = run_jrf('group_by(_["status"]) { count() }', input_gb)
|
|
|
574
660
|
assert_success(status, stderr, "group_by with count")
|
|
575
661
|
assert_equal(['{"200":3,"404":1}'], lines(stdout), "group_by with count output")
|
|
576
662
|
|
|
577
|
-
stdout, stderr, status = run_jrf('group_by(_["status"]) { sum(
|
|
663
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| sum(row["latency"]) }', input_gb)
|
|
578
664
|
assert_success(status, stderr, "group_by with sum")
|
|
579
665
|
assert_equal(['{"200":60,"404":50}'], lines(stdout), "group_by with sum output")
|
|
580
666
|
|
|
581
|
-
stdout, stderr, status = run_jrf('group_by(_["status"]) { average(
|
|
667
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| average(row["latency"]) }', input_gb)
|
|
582
668
|
assert_success(status, stderr, "group_by with average")
|
|
583
669
|
result = JSON.parse(lines(stdout).first)
|
|
584
670
|
assert_float_close(20.0, result["200"], 1e-12, "group_by average 200")
|
|
@@ -591,24 +677,97 @@ assert_equal(3, result["200"].length, "group_by default 200 count")
|
|
|
591
677
|
assert_equal(1, result["404"].length, "group_by default 404 count")
|
|
592
678
|
assert_equal("/a", result["200"][0]["path"], "group_by default first row")
|
|
593
679
|
|
|
594
|
-
stdout, stderr, status = run_jrf('group_by(_["status"]) { group(
|
|
680
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| group(row["path"]) }', input_gb)
|
|
595
681
|
assert_success(status, stderr, "group_by with group(expr)")
|
|
596
682
|
assert_equal(['{"200":["/a","/c","/d"],"404":["/b"]}'], lines(stdout), "group_by with group(expr) output")
|
|
597
683
|
|
|
598
|
-
stdout, stderr, status = run_jrf('group_by(_["status"]) {
|
|
684
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { group }', input_gb)
|
|
685
|
+
assert_success(status, stderr, "group_by with implicit group")
|
|
686
|
+
result = JSON.parse(lines(stdout).first)
|
|
687
|
+
assert_equal(3, result["200"].length, "group_by implicit group 200 count")
|
|
688
|
+
assert_equal("/a", result["200"][0]["path"], "group_by implicit group first row")
|
|
689
|
+
|
|
690
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| min(row["latency"]) }', input_gb)
|
|
599
691
|
assert_success(status, stderr, "group_by with min")
|
|
600
692
|
assert_equal(['{"200":10,"404":50}'], lines(stdout), "group_by with min output")
|
|
601
693
|
|
|
602
|
-
stdout, stderr, status = run_jrf('group_by(_["status"]) { {total: sum(
|
|
694
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| {total: sum(row["latency"]), n: count()} }', input_gb)
|
|
603
695
|
assert_success(status, stderr, "group_by with multi-reducer")
|
|
604
696
|
assert_equal(['{"200":{"total":60,"n":3},"404":{"total":50,"n":1}}'], lines(stdout), "group_by multi-reducer output")
|
|
605
697
|
|
|
698
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { reduce(0) { |acc, row| acc + row["latency"] } }', input_gb)
|
|
699
|
+
assert_success(status, stderr, "group_by with reduce")
|
|
700
|
+
assert_equal(['{"200":60,"404":50}'], lines(stdout), "group_by with reduce output")
|
|
701
|
+
|
|
606
702
|
stdout, stderr, status = run_jrf('select(false) >> group_by(_["status"]) { count() }', input_gb)
|
|
607
703
|
assert_success(status, stderr, "group_by no matches")
|
|
608
|
-
assert_equal([
|
|
704
|
+
assert_equal([], lines(stdout), "group_by no matches output")
|
|
609
705
|
|
|
610
706
|
stdout, stderr, status = run_jrf('group_by(_["status"]) { count() } >> _[200]', input_gb)
|
|
611
707
|
assert_success(status, stderr, "group_by then extract")
|
|
612
708
|
assert_equal(%w[3], lines(stdout), "group_by then extract output")
|
|
613
709
|
|
|
710
|
+
# === Library API (Jrf.new) ===
|
|
711
|
+
|
|
712
|
+
require_relative "../lib/jrf"
|
|
713
|
+
|
|
714
|
+
# passthrough
|
|
715
|
+
j = Jrf.new(proc { _ })
|
|
716
|
+
assert_equal([{"a" => 1}, {"a" => 2}], j.call([{"a" => 1}, {"a" => 2}]), "library passthrough")
|
|
717
|
+
|
|
718
|
+
# extract
|
|
719
|
+
j = Jrf.new(proc { _["a"] })
|
|
720
|
+
assert_equal([1, 2], j.call([{"a" => 1}, {"a" => 2}]), "library extract")
|
|
721
|
+
|
|
722
|
+
# select + extract (two stages)
|
|
723
|
+
j = Jrf.new(
|
|
724
|
+
proc { select(_["a"] > 1) },
|
|
725
|
+
proc { _["a"] }
|
|
726
|
+
)
|
|
727
|
+
assert_equal([2, 3], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library select + extract")
|
|
728
|
+
|
|
729
|
+
# sum
|
|
730
|
+
j = Jrf.new(proc { sum(_["a"]) })
|
|
731
|
+
assert_equal([6], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library sum")
|
|
732
|
+
|
|
733
|
+
# sum with literal on left
|
|
734
|
+
j = Jrf.new(proc { sum(2 * _["a"]) })
|
|
735
|
+
assert_equal([12], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library sum literal on left")
|
|
736
|
+
|
|
737
|
+
# structured reducers
|
|
738
|
+
j = Jrf.new(proc { {total: sum(_["a"]), n: count()} })
|
|
739
|
+
assert_equal([{total: 6, n: 3}], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library structured reducers")
|
|
740
|
+
|
|
741
|
+
# map transform
|
|
742
|
+
j = Jrf.new(proc { map { |x| x + 1 } })
|
|
743
|
+
assert_equal([[2, 3], [4, 5]], j.call([[1, 2], [3, 4]]), "library map transform")
|
|
744
|
+
|
|
745
|
+
# map reduce
|
|
746
|
+
j = Jrf.new(proc { map { |x| sum(x) } })
|
|
747
|
+
assert_equal([[4, 6]], j.call([[1, 2], [3, 4]]), "library map reduce")
|
|
748
|
+
|
|
749
|
+
# map_values transform
|
|
750
|
+
j = Jrf.new(proc { map_values { |v| v * 10 } })
|
|
751
|
+
assert_equal([{"a" => 10, "b" => 20}], j.call([{"a" => 1, "b" => 2}]), "library map_values transform")
|
|
752
|
+
|
|
753
|
+
# group_by
|
|
754
|
+
j = Jrf.new(proc { group_by(_["k"]) { count() } })
|
|
755
|
+
assert_equal([{"x" => 2, "y" => 1}], j.call([{"k" => "x"}, {"k" => "x"}, {"k" => "y"}]), "library group_by")
|
|
756
|
+
|
|
757
|
+
# reducer then passthrough
|
|
758
|
+
j = Jrf.new(
|
|
759
|
+
proc { sum(_["a"]) },
|
|
760
|
+
proc { _ + 1 }
|
|
761
|
+
)
|
|
762
|
+
assert_equal([7], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library reducer then passthrough")
|
|
763
|
+
|
|
764
|
+
# closure over local variables
|
|
765
|
+
threshold = 2
|
|
766
|
+
j = Jrf.new(proc { select(_["a"] > threshold) })
|
|
767
|
+
assert_equal([{"a" => 3}], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library closure")
|
|
768
|
+
|
|
769
|
+
# empty input
|
|
770
|
+
j = Jrf.new(proc { sum(_) })
|
|
771
|
+
assert_equal([], j.call([]), "library empty input")
|
|
772
|
+
|
|
614
773
|
puts "ok"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: jrf
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- kazuho
|
|
@@ -41,6 +41,7 @@ files:
|
|
|
41
41
|
- lib/jrf.rb
|
|
42
42
|
- lib/jrf/cli.rb
|
|
43
43
|
- lib/jrf/control.rb
|
|
44
|
+
- lib/jrf/pipeline.rb
|
|
44
45
|
- lib/jrf/pipeline_parser.rb
|
|
45
46
|
- lib/jrf/reducers.rb
|
|
46
47
|
- lib/jrf/row_context.rb
|