jrf 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +0 -5
- data/lib/jrf/cli.rb +9 -5
- data/lib/jrf/pipeline.rb +85 -0
- data/lib/jrf/row_context.rb +1 -3
- data/lib/jrf/runner.rb +10 -85
- data/lib/jrf/stage.rb +66 -44
- data/lib/jrf/version.rb +1 -1
- data/lib/jrf.rb +18 -0
- data/test/jrf_test.rb +130 -23
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9ce648c2afbfe10dc161b08badb05acdb411baf839dde77433927380b6bb7439
|
|
4
|
+
data.tar.gz: 6be5a0851eecd3cfcbe93aff1cb8fdd163a84dd96a7b12e440fc514db03f67a0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: aa4dfead95dbe09453ec720cdbcf77ba4c7e3f1047c60f51d4ff54724dfa540bb1dbd5630ecb07d09d745e1e61e4c236f50f4407ff6d4c17dd5431b385679f57
|
|
7
|
+
data.tar.gz: 03c3f5dd3f36675a2bc31981effc506bb1822bb170e754785ccffe077becdd5af13421b4cbfd18fea1c1262f06feef61561be3e3243ca0379e1e6af21ad003c5
|
data/Rakefile
CHANGED
data/lib/jrf/cli.rb
CHANGED
|
@@ -4,16 +4,17 @@ require_relative "runner"
|
|
|
4
4
|
|
|
5
5
|
module Jrf
|
|
6
6
|
class CLI
|
|
7
|
-
USAGE = "usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'"
|
|
7
|
+
USAGE = "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'"
|
|
8
8
|
|
|
9
9
|
HELP_TEXT = <<~'TEXT'
|
|
10
|
-
usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'
|
|
10
|
+
usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'
|
|
11
11
|
|
|
12
12
|
JSON filter with the power and speed of Ruby.
|
|
13
13
|
|
|
14
14
|
Options:
|
|
15
15
|
-v, --verbose print parsed stage expressions
|
|
16
16
|
--lax allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
|
|
17
|
+
-p, --pretty pretty-print JSON output instead of compact NDJSON
|
|
17
18
|
-h, --help show this help and exit
|
|
18
19
|
|
|
19
20
|
Pipeline:
|
|
@@ -28,13 +29,13 @@ module Jrf
|
|
|
28
29
|
jrf '_["msg"] >> reduce(nil) { |acc, v| acc ? "#{acc} #{v}" : v }'
|
|
29
30
|
|
|
30
31
|
See Also:
|
|
31
|
-
|
|
32
|
-
man jrf
|
|
32
|
+
https://github.com/kazuho/jrf#readme
|
|
33
33
|
TEXT
|
|
34
34
|
|
|
35
35
|
def self.run(argv = ARGV, input: ARGF, out: $stdout, err: $stderr)
|
|
36
36
|
verbose = false
|
|
37
37
|
lax = false
|
|
38
|
+
pretty = false
|
|
38
39
|
|
|
39
40
|
while argv.first&.start_with?("-")
|
|
40
41
|
case argv.first
|
|
@@ -44,6 +45,9 @@ module Jrf
|
|
|
44
45
|
when "--lax"
|
|
45
46
|
lax = true
|
|
46
47
|
argv.shift
|
|
48
|
+
when "-p", "--pretty"
|
|
49
|
+
pretty = true
|
|
50
|
+
argv.shift
|
|
47
51
|
when "-h", "--help"
|
|
48
52
|
out.puts HELP_TEXT
|
|
49
53
|
return 0
|
|
@@ -60,7 +64,7 @@ module Jrf
|
|
|
60
64
|
end
|
|
61
65
|
|
|
62
66
|
expression = argv.shift
|
|
63
|
-
Runner.new(input: input, out: out, err: err, lax: lax).run(expression, verbose: verbose)
|
|
67
|
+
Runner.new(input: input, out: out, err: err, lax: lax, pretty: pretty).run(expression, verbose: verbose)
|
|
64
68
|
0
|
|
65
69
|
end
|
|
66
70
|
end
|
data/lib/jrf/pipeline.rb
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "control"
|
|
4
|
+
require_relative "row_context"
|
|
5
|
+
require_relative "stage"
|
|
6
|
+
|
|
7
|
+
module Jrf
|
|
8
|
+
class Pipeline
|
|
9
|
+
def initialize(*blocks)
|
|
10
|
+
raise ArgumentError, "at least one stage block is required" if blocks.empty?
|
|
11
|
+
|
|
12
|
+
@ctx = RowContext.new
|
|
13
|
+
@stages = blocks.map { |block| Stage.new(@ctx, block, src: nil) }
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Run the pipeline on an enumerable of input values.
|
|
17
|
+
#
|
|
18
|
+
# Without a block, returns an Array of output values.
|
|
19
|
+
# With a block, streams each output value to the block.
|
|
20
|
+
#
|
|
21
|
+
# @param input [Enumerable] input values to process
|
|
22
|
+
# @yieldparam value output value
|
|
23
|
+
# @return [Array, nil] output values (without block), or nil (with block)
|
|
24
|
+
def call(input, &on_output)
|
|
25
|
+
if on_output
|
|
26
|
+
call_streaming(input, &on_output)
|
|
27
|
+
else
|
|
28
|
+
results = []
|
|
29
|
+
call_streaming(input) { |v| results << v }
|
|
30
|
+
results
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def call_streaming(input, &on_output)
|
|
37
|
+
error = nil
|
|
38
|
+
begin
|
|
39
|
+
input.each { |value| process_value(value, @stages, &on_output) }
|
|
40
|
+
rescue StandardError => e
|
|
41
|
+
error = e
|
|
42
|
+
ensure
|
|
43
|
+
flush_reducers(@stages, &on_output)
|
|
44
|
+
end
|
|
45
|
+
raise error if error
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def process_value(input, stages, &on_output)
|
|
49
|
+
current_values = [input]
|
|
50
|
+
|
|
51
|
+
stages.each do |stage|
|
|
52
|
+
next_values = []
|
|
53
|
+
|
|
54
|
+
current_values.each do |value|
|
|
55
|
+
out = stage.call(value)
|
|
56
|
+
if out.equal?(Control::DROPPED)
|
|
57
|
+
next
|
|
58
|
+
elsif out.is_a?(Control::Flat)
|
|
59
|
+
unless out.value.is_a?(Array)
|
|
60
|
+
raise TypeError, "flat expects Array, got #{out.value.class}"
|
|
61
|
+
end
|
|
62
|
+
next_values.concat(out.value)
|
|
63
|
+
else
|
|
64
|
+
next_values << out
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
return if next_values.empty?
|
|
69
|
+
current_values = next_values
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
current_values.each(&on_output)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def flush_reducers(stages, &on_output)
|
|
76
|
+
stages.each_with_index do |stage, idx|
|
|
77
|
+
rows = stage.finish
|
|
78
|
+
next if rows.empty?
|
|
79
|
+
|
|
80
|
+
rest = stages.drop(idx + 1)
|
|
81
|
+
rows.each { |value| process_value(value, rest, &on_output) }
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
data/lib/jrf/row_context.rb
CHANGED
|
@@ -143,9 +143,7 @@ module Jrf
|
|
|
143
143
|
else
|
|
144
144
|
->(values) {
|
|
145
145
|
sorted = values.sort
|
|
146
|
-
percentages.map
|
|
147
|
-
{ "percentile" => p, "value" => ctx.send(:percentile_value, sorted, p) }
|
|
148
|
-
end
|
|
146
|
+
[percentages.map { |p| ctx.send(:percentile_value, sorted, p) }]
|
|
149
147
|
}
|
|
150
148
|
end
|
|
151
149
|
|
data/lib/jrf/runner.rb
CHANGED
|
@@ -1,37 +1,19 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "json"
|
|
4
|
-
require_relative "
|
|
4
|
+
require_relative "pipeline"
|
|
5
5
|
require_relative "pipeline_parser"
|
|
6
|
-
require_relative "reducers"
|
|
7
|
-
require_relative "row_context"
|
|
8
|
-
require_relative "stage"
|
|
9
6
|
|
|
10
7
|
module Jrf
|
|
11
8
|
class Runner
|
|
12
9
|
RS_CHAR = "\x1e"
|
|
13
10
|
|
|
14
|
-
|
|
15
|
-
def [](key)
|
|
16
|
-
self
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
def method_missing(name, *args, &block)
|
|
20
|
-
self
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
def respond_to_missing?(name, include_private = false)
|
|
24
|
-
true
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
PROBE_VALUE = ProbeValue.new
|
|
29
|
-
|
|
30
|
-
def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false)
|
|
11
|
+
def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false, pretty: false)
|
|
31
12
|
@input = input
|
|
32
13
|
@out = out
|
|
33
14
|
@err = err
|
|
34
15
|
@lax = lax
|
|
16
|
+
@pretty = pretty
|
|
35
17
|
end
|
|
36
18
|
|
|
37
19
|
def run(expression, verbose: false)
|
|
@@ -39,53 +21,19 @@ module Jrf
|
|
|
39
21
|
stages = parsed[:stages]
|
|
40
22
|
dump_stages(stages) if verbose
|
|
41
23
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
24
|
+
blocks = stages.map { |stage|
|
|
25
|
+
eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
|
|
26
|
+
}
|
|
27
|
+
pipeline = Pipeline.new(*blocks)
|
|
46
28
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
end
|
|
51
|
-
rescue StandardError => e
|
|
52
|
-
error = e
|
|
53
|
-
ensure
|
|
54
|
-
flush_reducers(compiled)
|
|
29
|
+
input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
|
|
30
|
+
pipeline.call(input_enum) do |value|
|
|
31
|
+
@out.puts(@pretty ? JSON.pretty_generate(value) : JSON.generate(value))
|
|
55
32
|
end
|
|
56
|
-
|
|
57
|
-
raise error if error
|
|
58
33
|
end
|
|
59
34
|
|
|
60
35
|
private
|
|
61
36
|
|
|
62
|
-
def process_value(input, stages)
|
|
63
|
-
current_values = [input]
|
|
64
|
-
|
|
65
|
-
stages.each do |stage|
|
|
66
|
-
next_values = []
|
|
67
|
-
|
|
68
|
-
current_values.each do |value|
|
|
69
|
-
out = stage.call(value)
|
|
70
|
-
if out.equal?(Control::DROPPED)
|
|
71
|
-
next
|
|
72
|
-
elsif out.is_a?(Control::Flat)
|
|
73
|
-
unless out.value.is_a?(Array)
|
|
74
|
-
raise TypeError, "flat expects Array, got #{out.value.class}"
|
|
75
|
-
end
|
|
76
|
-
next_values.concat(out.value)
|
|
77
|
-
else
|
|
78
|
-
next_values << out
|
|
79
|
-
end
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
return if next_values.empty?
|
|
83
|
-
current_values = next_values
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
current_values.each { |value| @out.puts JSON.generate(value) }
|
|
87
|
-
end
|
|
88
|
-
|
|
89
37
|
def each_input_value
|
|
90
38
|
return each_input_value_lax { |value| yield value } if @lax
|
|
91
39
|
|
|
@@ -124,33 +72,10 @@ module Jrf
|
|
|
124
72
|
raise JSON::ParserError, e.message
|
|
125
73
|
end
|
|
126
74
|
|
|
127
|
-
def compile_stages(stages, ctx)
|
|
128
|
-
mod = Module.new
|
|
129
|
-
|
|
130
|
-
stages.each_with_index.map do |stage, i|
|
|
131
|
-
method_name = :"__jrf_stage_#{i}"
|
|
132
|
-
mod.module_eval("def #{method_name}; #{stage[:src]}; end", "(jrf stage #{i})", 1)
|
|
133
|
-
Stage.new(ctx, method_name, src: stage[:src])
|
|
134
|
-
end.tap { ctx.extend(mod) }
|
|
135
|
-
end
|
|
136
|
-
|
|
137
75
|
def dump_stages(stages)
|
|
138
76
|
stages.each_with_index do |stage, i|
|
|
139
77
|
@err.puts "stage[#{i}]: #{stage[:src]}"
|
|
140
78
|
end
|
|
141
79
|
end
|
|
142
|
-
|
|
143
|
-
def flush_reducers(stages)
|
|
144
|
-
tail = stages
|
|
145
|
-
loop do
|
|
146
|
-
idx = tail.index(&:reducer?)
|
|
147
|
-
break unless idx
|
|
148
|
-
|
|
149
|
-
rows = tail[idx].finish
|
|
150
|
-
rest = tail.drop(idx + 1)
|
|
151
|
-
rows.each { |value| process_value(value, rest) }
|
|
152
|
-
tail = rest
|
|
153
|
-
end
|
|
154
|
-
end
|
|
155
80
|
end
|
|
156
81
|
end
|
data/lib/jrf/stage.rb
CHANGED
|
@@ -7,7 +7,7 @@ module Jrf
|
|
|
7
7
|
class Stage
|
|
8
8
|
ReducerToken = Struct.new(:index)
|
|
9
9
|
|
|
10
|
-
attr_reader :
|
|
10
|
+
attr_reader :src
|
|
11
11
|
|
|
12
12
|
def self.resolve_template(template, reducers)
|
|
13
13
|
if template.is_a?(ReducerToken)
|
|
@@ -22,28 +22,27 @@ module Jrf
|
|
|
22
22
|
end
|
|
23
23
|
end
|
|
24
24
|
|
|
25
|
-
def initialize(ctx,
|
|
25
|
+
def initialize(ctx, block, src: nil)
|
|
26
26
|
@ctx = ctx
|
|
27
|
-
@
|
|
27
|
+
@block = block
|
|
28
28
|
@src = src
|
|
29
29
|
@reducers = []
|
|
30
30
|
@cursor = 0
|
|
31
31
|
@template = nil
|
|
32
32
|
@mode = nil # nil=unknown, :reducer, :passthrough
|
|
33
|
-
@
|
|
33
|
+
@map_transforms = {}
|
|
34
34
|
end
|
|
35
35
|
|
|
36
|
-
def call(input
|
|
36
|
+
def call(input)
|
|
37
37
|
@ctx.reset(input)
|
|
38
38
|
@cursor = 0
|
|
39
|
-
@probing = probing
|
|
40
39
|
@ctx.__jrf_current_stage = self
|
|
41
|
-
result = @ctx.
|
|
40
|
+
result = @ctx.instance_eval(&@block)
|
|
42
41
|
|
|
43
42
|
if @mode.nil? && @reducers.any?
|
|
44
43
|
@mode = :reducer
|
|
45
44
|
@template = result
|
|
46
|
-
elsif @mode.nil?
|
|
45
|
+
elsif @mode.nil?
|
|
47
46
|
@mode = :passthrough
|
|
48
47
|
end
|
|
49
48
|
|
|
@@ -54,43 +53,58 @@ module Jrf
|
|
|
54
53
|
idx = @cursor
|
|
55
54
|
finish_rows = finish || ->(acc) { [acc] }
|
|
56
55
|
@reducers[idx] ||= Reducers.reduce(initial, finish: finish_rows, &step_fn)
|
|
57
|
-
@reducers[idx].step(value)
|
|
56
|
+
@reducers[idx].step(value)
|
|
58
57
|
@cursor += 1
|
|
59
58
|
ReducerToken.new(idx)
|
|
60
59
|
end
|
|
61
60
|
|
|
62
61
|
def allocate_map(type, collection, &block)
|
|
63
62
|
idx = @cursor
|
|
63
|
+
@cursor += 1
|
|
64
|
+
|
|
65
|
+
# Transformation mode (detected on first call)
|
|
66
|
+
if @map_transforms[idx]
|
|
67
|
+
case type
|
|
68
|
+
when :array then return collection.map(&block)
|
|
69
|
+
when :hash then return collection.transform_values(&block)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
64
73
|
map_reducer = (@reducers[idx] ||= MapReducer.new(type))
|
|
65
74
|
|
|
66
|
-
|
|
67
|
-
|
|
75
|
+
case type
|
|
76
|
+
when :array
|
|
77
|
+
raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
|
|
78
|
+
collection.each_with_index do |v, i|
|
|
79
|
+
slot = map_reducer.slot(i)
|
|
80
|
+
with_scoped_reducers(slot.reducers) do
|
|
81
|
+
result = block.call(v)
|
|
82
|
+
slot.template ||= result
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
when :hash
|
|
86
|
+
raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
|
|
87
|
+
collection.each do |k, v|
|
|
88
|
+
slot = map_reducer.slot(k)
|
|
89
|
+
with_scoped_reducers(slot.reducers) do
|
|
90
|
+
result = block.call(v)
|
|
91
|
+
slot.template ||= result
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
68
95
|
|
|
96
|
+
# Detect transformation: no reducers were allocated in any slot
|
|
97
|
+
if @mode.nil? && map_reducer.slots.values.all? { |s| s.reducers.empty? }
|
|
98
|
+
@map_transforms[idx] = true
|
|
99
|
+
@reducers[idx] = nil
|
|
69
100
|
case type
|
|
70
101
|
when :array
|
|
71
|
-
|
|
72
|
-
collection.each_with_index do |v, i|
|
|
73
|
-
@ctx.reset(v)
|
|
74
|
-
with_scoped_reducers(map_reducer.slots[i] ||= []) do
|
|
75
|
-
result = block.call(v)
|
|
76
|
-
map_reducer.templates[i] ||= result
|
|
77
|
-
end
|
|
78
|
-
end
|
|
102
|
+
return map_reducer.slots.sort_by { |k, _| k }.map { |_, s| s.template }
|
|
79
103
|
when :hash
|
|
80
|
-
|
|
81
|
-
collection.each do |k, v|
|
|
82
|
-
@ctx.reset(v)
|
|
83
|
-
with_scoped_reducers(map_reducer.slots[k] ||= []) do
|
|
84
|
-
result = block.call(v)
|
|
85
|
-
map_reducer.templates[k] ||= result
|
|
86
|
-
end
|
|
87
|
-
end
|
|
104
|
+
return map_reducer.slots.transform_values(&:template)
|
|
88
105
|
end
|
|
89
|
-
|
|
90
|
-
@ctx.reset(saved_obj)
|
|
91
106
|
end
|
|
92
107
|
|
|
93
|
-
@cursor += 1
|
|
94
108
|
ReducerToken.new(idx)
|
|
95
109
|
end
|
|
96
110
|
|
|
@@ -98,22 +112,17 @@ module Jrf
|
|
|
98
112
|
idx = @cursor
|
|
99
113
|
map_reducer = (@reducers[idx] ||= MapReducer.new(:hash))
|
|
100
114
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
end
|
|
115
|
+
row = @ctx._
|
|
116
|
+
slot = map_reducer.slot(key)
|
|
117
|
+
with_scoped_reducers(slot.reducers) do
|
|
118
|
+
result = block.call(row)
|
|
119
|
+
slot.template ||= result
|
|
107
120
|
end
|
|
108
121
|
|
|
109
122
|
@cursor += 1
|
|
110
123
|
ReducerToken.new(idx)
|
|
111
124
|
end
|
|
112
125
|
|
|
113
|
-
def reducer?
|
|
114
|
-
@mode == :reducer
|
|
115
|
-
end
|
|
116
|
-
|
|
117
126
|
def finish
|
|
118
127
|
return [] unless @mode == :reducer && @reducers.any?
|
|
119
128
|
|
|
@@ -138,25 +147,38 @@ module Jrf
|
|
|
138
147
|
end
|
|
139
148
|
|
|
140
149
|
class MapReducer
|
|
141
|
-
attr_reader :slots
|
|
150
|
+
attr_reader :slots
|
|
142
151
|
|
|
143
152
|
def initialize(type)
|
|
144
153
|
@type = type
|
|
145
154
|
@slots = {}
|
|
146
|
-
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def slot(key)
|
|
158
|
+
@slots[key] ||= SlotState.new
|
|
147
159
|
end
|
|
148
160
|
|
|
149
161
|
def finish
|
|
150
162
|
case @type
|
|
151
163
|
when :array
|
|
152
164
|
keys = @slots.keys.sort
|
|
153
|
-
[keys.map { |k| Stage.resolve_template(@
|
|
165
|
+
[keys.map { |k| Stage.resolve_template(@slots[k].template, @slots[k].reducers) }]
|
|
154
166
|
when :hash
|
|
155
167
|
result = {}
|
|
156
|
-
@slots.each { |k,
|
|
168
|
+
@slots.each { |k, s| result[k] = Stage.resolve_template(s.template, s.reducers) }
|
|
157
169
|
[result]
|
|
158
170
|
end
|
|
159
171
|
end
|
|
172
|
+
|
|
173
|
+
class SlotState
|
|
174
|
+
attr_reader :reducers
|
|
175
|
+
attr_accessor :template
|
|
176
|
+
|
|
177
|
+
def initialize
|
|
178
|
+
@reducers = []
|
|
179
|
+
@template = nil
|
|
180
|
+
end
|
|
181
|
+
end
|
|
160
182
|
end
|
|
161
183
|
end
|
|
162
184
|
end
|
data/lib/jrf/version.rb
CHANGED
data/lib/jrf.rb
CHANGED
|
@@ -2,3 +2,21 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "jrf/version"
|
|
4
4
|
require_relative "jrf/cli"
|
|
5
|
+
require_relative "jrf/pipeline"
|
|
6
|
+
|
|
7
|
+
module Jrf
|
|
8
|
+
# Create a pipeline from one or more stage blocks.
|
|
9
|
+
#
|
|
10
|
+
# Each block is evaluated in a context where +_+ is the current value.
|
|
11
|
+
# All jrf built-in functions (+select+, +sum+, +map+, +group_by+, etc.)
|
|
12
|
+
# are available inside blocks. See https://github.com/kazuho/jrf#readme for the full list.
|
|
13
|
+
#
|
|
14
|
+
# @param blocks [Array<Proc>] one or more stage procs
|
|
15
|
+
# @return [Pipeline] a callable pipeline
|
|
16
|
+
# @example
|
|
17
|
+
# j = Jrf.new(proc { select(_["x"] > 10) }, proc { sum(_["x"]) })
|
|
18
|
+
# j.call([{"x" => 20}, {"x" => 30}]) # => [50]
|
|
19
|
+
def self.new(*blocks)
|
|
20
|
+
Pipeline.new(*blocks)
|
|
21
|
+
end
|
|
22
|
+
end
|
data/test/jrf_test.rb
CHANGED
|
@@ -92,15 +92,15 @@ assert_includes(stderr, 'stage[1]: _["hello"]')
|
|
|
92
92
|
|
|
93
93
|
stdout, stderr, status = Open3.capture3("./exe/jrf", "--help")
|
|
94
94
|
assert_success(status, stderr, "help option")
|
|
95
|
-
assert_includes(stdout, "usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'")
|
|
95
|
+
assert_includes(stdout, "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'")
|
|
96
96
|
assert_includes(stdout, "JSON filter with the power and speed of Ruby.")
|
|
97
97
|
assert_includes(stdout, "--lax")
|
|
98
|
+
assert_includes(stdout, "--pretty")
|
|
98
99
|
assert_includes(stdout, "Pipeline:")
|
|
99
100
|
assert_includes(stdout, "Connect stages with top-level >>.")
|
|
100
101
|
assert_includes(stdout, "The current value in each stage is available as _.")
|
|
101
102
|
assert_includes(stdout, "See Also:")
|
|
102
|
-
assert_includes(stdout, "
|
|
103
|
-
assert_includes(stdout, "man jrf")
|
|
103
|
+
assert_includes(stdout, "https://github.com/kazuho/jrf#readme")
|
|
104
104
|
assert_equal([], lines(stderr), "help stderr output")
|
|
105
105
|
|
|
106
106
|
stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "--verbose")
|
|
@@ -108,6 +108,21 @@ assert_success(status, stderr, "dump stages verbose alias")
|
|
|
108
108
|
assert_equal(%w[123], lines(stdout), "dump stages verbose alias output")
|
|
109
109
|
assert_includes(stderr, 'stage[0]: select(_["hello"] == 123)')
|
|
110
110
|
|
|
111
|
+
stdout, stderr, status = run_jrf('_', input_hello, "--pretty")
|
|
112
|
+
assert_success(status, stderr, "pretty output")
|
|
113
|
+
assert_equal(
|
|
114
|
+
[
|
|
115
|
+
"{",
|
|
116
|
+
"\"hello\": 123",
|
|
117
|
+
"}",
|
|
118
|
+
"{",
|
|
119
|
+
"\"hello\": 456",
|
|
120
|
+
"}"
|
|
121
|
+
],
|
|
122
|
+
lines(stdout),
|
|
123
|
+
"pretty output lines"
|
|
124
|
+
)
|
|
125
|
+
|
|
111
126
|
input_regex = <<~NDJSON
|
|
112
127
|
{"foo":{"bar":"ok"},"x":50}
|
|
113
128
|
{"foo":{"bar":"ng"},"x":70}
|
|
@@ -194,6 +209,10 @@ stdout, stderr, status = run_jrf('select(_["x"] > 10) >> sum(_["foo"])', input_s
|
|
|
194
209
|
assert_success(status, stderr, "select + sum")
|
|
195
210
|
assert_equal(%w[9], lines(stdout), "select + sum output")
|
|
196
211
|
|
|
212
|
+
stdout, stderr, status = run_jrf('{total: sum(_["foo"]), n: count()}', input_sum)
|
|
213
|
+
assert_success(status, stderr, "structured reducer result")
|
|
214
|
+
assert_equal(['{"total":10,"n":4}'], lines(stdout), "structured reducer result output")
|
|
215
|
+
|
|
197
216
|
stdout, stderr, status = run_jrf('average(_["foo"])', input_sum)
|
|
198
217
|
assert_success(status, stderr, "average")
|
|
199
218
|
assert_float_close(2.5, lines(stdout).first.to_f, 1e-12, "average output")
|
|
@@ -206,33 +225,37 @@ stdout, stderr, status = run_jrf('_["foo"] >> sum(_ * 2)', input_sum)
|
|
|
206
225
|
assert_success(status, stderr, "extract + sum")
|
|
207
226
|
assert_equal(%w[20], lines(stdout), "extract + sum output")
|
|
208
227
|
|
|
228
|
+
stdout, stderr, status = run_jrf('sum(2 * _["foo"])', input_sum)
|
|
229
|
+
assert_success(status, stderr, "sum with literal on left")
|
|
230
|
+
assert_equal(%w[20], lines(stdout), "sum with literal on left output")
|
|
231
|
+
|
|
209
232
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> sum(_["foo"])', input_sum)
|
|
210
233
|
assert_success(status, stderr, "sum no matches")
|
|
211
|
-
assert_equal(
|
|
234
|
+
assert_equal([], lines(stdout), "sum no matches output")
|
|
212
235
|
|
|
213
236
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> count()', input_sum)
|
|
214
237
|
assert_success(status, stderr, "count no matches")
|
|
215
|
-
assert_equal(
|
|
238
|
+
assert_equal([], lines(stdout), "count no matches output")
|
|
216
239
|
|
|
217
240
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> count(_["foo"])', input_sum)
|
|
218
241
|
assert_success(status, stderr, "count(expr) no matches")
|
|
219
|
-
assert_equal(
|
|
242
|
+
assert_equal([], lines(stdout), "count(expr) no matches output")
|
|
220
243
|
|
|
221
244
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> average(_["foo"])', input_sum)
|
|
222
245
|
assert_success(status, stderr, "average no matches")
|
|
223
|
-
assert_equal(
|
|
246
|
+
assert_equal([], lines(stdout), "average no matches output")
|
|
224
247
|
|
|
225
248
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> stdev(_["foo"])', input_sum)
|
|
226
249
|
assert_success(status, stderr, "stdev no matches")
|
|
227
|
-
assert_equal(
|
|
250
|
+
assert_equal([], lines(stdout), "stdev no matches output")
|
|
228
251
|
|
|
229
252
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> min(_["foo"])', input_sum)
|
|
230
253
|
assert_success(status, stderr, "min no matches")
|
|
231
|
-
assert_equal(
|
|
254
|
+
assert_equal([], lines(stdout), "min no matches output")
|
|
232
255
|
|
|
233
256
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> max(_["foo"])', input_sum)
|
|
234
257
|
assert_success(status, stderr, "max no matches")
|
|
235
|
-
assert_equal(
|
|
258
|
+
assert_equal([], lines(stdout), "max no matches output")
|
|
236
259
|
|
|
237
260
|
stdout, stderr, status = run_jrf('sum(_["foo"]) >> _ + 1', input_sum)
|
|
238
261
|
assert_success(status, stderr, "reduce in middle")
|
|
@@ -274,7 +297,7 @@ assert_equal([], lines(stdout), "sort no matches output")
|
|
|
274
297
|
|
|
275
298
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> _["foo"] >> group', input_sum)
|
|
276
299
|
assert_success(status, stderr, "group no matches")
|
|
277
|
-
assert_equal([
|
|
300
|
+
assert_equal([], lines(stdout), "group no matches output")
|
|
278
301
|
|
|
279
302
|
input_group_multi = <<~NDJSON
|
|
280
303
|
{"x":1,"y":"a"}
|
|
@@ -288,7 +311,7 @@ assert_equal(['{"a":[1,2,3],"b":["a","b","c"]}'], lines(stdout), "group in hash
|
|
|
288
311
|
|
|
289
312
|
stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> {a: group(_["x"]), b: group(_["y"])}', input_group_multi)
|
|
290
313
|
assert_success(status, stderr, "group in hash no matches")
|
|
291
|
-
assert_equal([
|
|
314
|
+
assert_equal([], lines(stdout), "group in hash no-match output")
|
|
292
315
|
|
|
293
316
|
stdout, stderr, status = run_jrf('percentile(_["foo"], 0.50)', input_sum)
|
|
294
317
|
assert_success(status, stderr, "single percentile")
|
|
@@ -297,7 +320,7 @@ assert_equal(%w[2], lines(stdout), "single percentile output")
|
|
|
297
320
|
stdout, stderr, status = run_jrf('percentile(_["foo"], [0.25, 0.50, 1.0])', input_sum)
|
|
298
321
|
assert_success(status, stderr, "array percentile")
|
|
299
322
|
assert_equal(
|
|
300
|
-
['
|
|
323
|
+
['[1,2,4]'],
|
|
301
324
|
lines(stdout),
|
|
302
325
|
"array percentile output"
|
|
303
326
|
)
|
|
@@ -332,7 +355,7 @@ assert_float_close(1.0, lines(stdout).first.to_f, 1e-12, "stdev ignores nil outp
|
|
|
332
355
|
stdout, stderr, status = run_jrf('percentile(_["foo"], [0.5, 1.0])', input_with_nil)
|
|
333
356
|
assert_success(status, stderr, "percentile ignores nil")
|
|
334
357
|
assert_equal(
|
|
335
|
-
['
|
|
358
|
+
['[1,3]'],
|
|
336
359
|
lines(stdout),
|
|
337
360
|
"percentile ignores nil output"
|
|
338
361
|
)
|
|
@@ -388,7 +411,7 @@ NDJSON
|
|
|
388
411
|
stdout, stderr, status = run_jrf('{a: percentile(_["a"], [0.25, 0.50, 1.0]), b: percentile(_["b"], [0.25, 0.50, 1.0])}', input_multi_cols)
|
|
389
412
|
assert_success(status, stderr, "nested array percentile for multiple columns")
|
|
390
413
|
assert_equal(
|
|
391
|
-
['{"a":[
|
|
414
|
+
['{"a":[1,2,4],"b":[10,20,40]}'],
|
|
392
415
|
lines(stdout),
|
|
393
416
|
"nested array percentile output"
|
|
394
417
|
)
|
|
@@ -513,6 +536,10 @@ stdout, stderr, status = run_jrf('_["values"] >> map { |x| max(x) }', input_map)
|
|
|
513
536
|
assert_success(status, stderr, "map with max")
|
|
514
537
|
assert_equal(['[3,30,300]'], lines(stdout), "map with max output")
|
|
515
538
|
|
|
539
|
+
stdout, stderr, status = run_jrf('_["values"] >> map { |x| sum(_[0] + x) }', input_map)
|
|
540
|
+
assert_success(status, stderr, "map keeps ambient _")
|
|
541
|
+
assert_equal(['[12,66,606]'], lines(stdout), "map ambient _ output")
|
|
542
|
+
|
|
516
543
|
input_map_varying = <<~NDJSON
|
|
517
544
|
[1,10]
|
|
518
545
|
[2,20,200]
|
|
@@ -551,18 +578,35 @@ stdout, stderr, status = run_jrf('map_values { |v| count(v) }', input_map_values
|
|
|
551
578
|
assert_success(status, stderr, "map_values with count")
|
|
552
579
|
assert_equal(['{"a":3,"b":3}'], lines(stdout), "map_values with count output")
|
|
553
580
|
|
|
581
|
+
stdout, stderr, status = run_jrf('map_values { |v| sum(_["a"] + v) }', input_map_values)
|
|
582
|
+
assert_success(status, stderr, "map_values keeps ambient _")
|
|
583
|
+
assert_equal(['{"a":12,"b":66}'], lines(stdout), "map_values ambient _ output")
|
|
584
|
+
|
|
554
585
|
stdout, stderr, status = run_jrf('select(false) >> map { |x| sum(x) }', input_map)
|
|
555
586
|
assert_success(status, stderr, "map no matches")
|
|
556
|
-
assert_equal([
|
|
587
|
+
assert_equal([], lines(stdout), "map no matches output")
|
|
557
588
|
|
|
558
589
|
stdout, stderr, status = run_jrf('select(false) >> map_values { |v| sum(v) }', input_map_values)
|
|
559
590
|
assert_success(status, stderr, "map_values no matches")
|
|
560
|
-
assert_equal([
|
|
591
|
+
assert_equal([], lines(stdout), "map_values no matches output")
|
|
561
592
|
|
|
562
593
|
stdout, stderr, status = run_jrf('map_values { |v| sum(v) } >> map_values { |v| v * 10 }', input_map_values)
|
|
563
594
|
assert_success(status, stderr, "map_values piped to map_values passthrough")
|
|
564
595
|
assert_equal(['{"a":60,"b":600}'], lines(stdout), "map_values piped output")
|
|
565
596
|
|
|
597
|
+
# map/map_values transformation (no reducers)
|
|
598
|
+
stdout, stderr, status = run_jrf('_["values"] >> map { |x| x + 1 }', input_map)
|
|
599
|
+
assert_success(status, stderr, "map transform")
|
|
600
|
+
assert_equal(['[2,11,101]', '[3,21,201]', '[4,31,301]'], lines(stdout), "map transform output")
|
|
601
|
+
|
|
602
|
+
stdout, stderr, status = run_jrf('map_values { |v| v * 2 }', input_map_values)
|
|
603
|
+
assert_success(status, stderr, "map_values transform")
|
|
604
|
+
assert_equal(['{"a":2,"b":20}', '{"a":4,"b":40}', '{"a":6,"b":60}'], lines(stdout), "map_values transform output")
|
|
605
|
+
|
|
606
|
+
stdout, stderr, status = run_jrf('_["values"] >> map { |x| x + 1 } >> map { |x| x * 10 }', input_map)
|
|
607
|
+
assert_success(status, stderr, "chained map transforms")
|
|
608
|
+
assert_equal(['[20,110,1010]', '[30,210,2010]', '[40,310,3010]'], lines(stdout), "chained map transforms output")
|
|
609
|
+
|
|
566
610
|
input_gb = <<~NDJSON
|
|
567
611
|
{"status":200,"path":"/a","latency":10}
|
|
568
612
|
{"status":404,"path":"/b","latency":50}
|
|
@@ -574,11 +618,11 @@ stdout, stderr, status = run_jrf('group_by(_["status"]) { count() }', input_gb)
|
|
|
574
618
|
assert_success(status, stderr, "group_by with count")
|
|
575
619
|
assert_equal(['{"200":3,"404":1}'], lines(stdout), "group_by with count output")
|
|
576
620
|
|
|
577
|
-
stdout, stderr, status = run_jrf('group_by(_["status"]) { sum(
|
|
621
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| sum(row["latency"]) }', input_gb)
|
|
578
622
|
assert_success(status, stderr, "group_by with sum")
|
|
579
623
|
assert_equal(['{"200":60,"404":50}'], lines(stdout), "group_by with sum output")
|
|
580
624
|
|
|
581
|
-
stdout, stderr, status = run_jrf('group_by(_["status"]) { average(
|
|
625
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| average(row["latency"]) }', input_gb)
|
|
582
626
|
assert_success(status, stderr, "group_by with average")
|
|
583
627
|
result = JSON.parse(lines(stdout).first)
|
|
584
628
|
assert_float_close(20.0, result["200"], 1e-12, "group_by average 200")
|
|
@@ -591,24 +635,87 @@ assert_equal(3, result["200"].length, "group_by default 200 count")
|
|
|
591
635
|
assert_equal(1, result["404"].length, "group_by default 404 count")
|
|
592
636
|
assert_equal("/a", result["200"][0]["path"], "group_by default first row")
|
|
593
637
|
|
|
594
|
-
stdout, stderr, status = run_jrf('group_by(_["status"]) { group(
|
|
638
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| group(row["path"]) }', input_gb)
|
|
595
639
|
assert_success(status, stderr, "group_by with group(expr)")
|
|
596
640
|
assert_equal(['{"200":["/a","/c","/d"],"404":["/b"]}'], lines(stdout), "group_by with group(expr) output")
|
|
597
641
|
|
|
598
|
-
stdout, stderr, status = run_jrf('group_by(_["status"]) { min(
|
|
642
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| min(row["latency"]) }', input_gb)
|
|
599
643
|
assert_success(status, stderr, "group_by with min")
|
|
600
644
|
assert_equal(['{"200":10,"404":50}'], lines(stdout), "group_by with min output")
|
|
601
645
|
|
|
602
|
-
stdout, stderr, status = run_jrf('group_by(_["status"]) { {total: sum(
|
|
646
|
+
stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| {total: sum(row["latency"]), n: count()} }', input_gb)
|
|
603
647
|
assert_success(status, stderr, "group_by with multi-reducer")
|
|
604
648
|
assert_equal(['{"200":{"total":60,"n":3},"404":{"total":50,"n":1}}'], lines(stdout), "group_by multi-reducer output")
|
|
605
649
|
|
|
606
650
|
stdout, stderr, status = run_jrf('select(false) >> group_by(_["status"]) { count() }', input_gb)
|
|
607
651
|
assert_success(status, stderr, "group_by no matches")
|
|
608
|
-
assert_equal([
|
|
652
|
+
assert_equal([], lines(stdout), "group_by no matches output")
|
|
609
653
|
|
|
610
654
|
stdout, stderr, status = run_jrf('group_by(_["status"]) { count() } >> _[200]', input_gb)
|
|
611
655
|
assert_success(status, stderr, "group_by then extract")
|
|
612
656
|
assert_equal(%w[3], lines(stdout), "group_by then extract output")
|
|
613
657
|
|
|
658
|
+
# === Library API (Jrf.new) ===
|
|
659
|
+
|
|
660
|
+
require_relative "../lib/jrf"
|
|
661
|
+
|
|
662
|
+
# passthrough
|
|
663
|
+
j = Jrf.new(proc { _ })
|
|
664
|
+
assert_equal([{"a" => 1}, {"a" => 2}], j.call([{"a" => 1}, {"a" => 2}]), "library passthrough")
|
|
665
|
+
|
|
666
|
+
# extract
|
|
667
|
+
j = Jrf.new(proc { _["a"] })
|
|
668
|
+
assert_equal([1, 2], j.call([{"a" => 1}, {"a" => 2}]), "library extract")
|
|
669
|
+
|
|
670
|
+
# select + extract (two stages)
|
|
671
|
+
j = Jrf.new(
|
|
672
|
+
proc { select(_["a"] > 1) },
|
|
673
|
+
proc { _["a"] }
|
|
674
|
+
)
|
|
675
|
+
assert_equal([2, 3], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library select + extract")
|
|
676
|
+
|
|
677
|
+
# sum
|
|
678
|
+
j = Jrf.new(proc { sum(_["a"]) })
|
|
679
|
+
assert_equal([6], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library sum")
|
|
680
|
+
|
|
681
|
+
# sum with literal on left
|
|
682
|
+
j = Jrf.new(proc { sum(2 * _["a"]) })
|
|
683
|
+
assert_equal([12], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library sum literal on left")
|
|
684
|
+
|
|
685
|
+
# structured reducers
|
|
686
|
+
j = Jrf.new(proc { {total: sum(_["a"]), n: count()} })
|
|
687
|
+
assert_equal([{total: 6, n: 3}], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library structured reducers")
|
|
688
|
+
|
|
689
|
+
# map transform
|
|
690
|
+
j = Jrf.new(proc { map { |x| x + 1 } })
|
|
691
|
+
assert_equal([[2, 3], [4, 5]], j.call([[1, 2], [3, 4]]), "library map transform")
|
|
692
|
+
|
|
693
|
+
# map reduce
|
|
694
|
+
j = Jrf.new(proc { map { |x| sum(x) } })
|
|
695
|
+
assert_equal([[4, 6]], j.call([[1, 2], [3, 4]]), "library map reduce")
|
|
696
|
+
|
|
697
|
+
# map_values transform
|
|
698
|
+
j = Jrf.new(proc { map_values { |v| v * 10 } })
|
|
699
|
+
assert_equal([{"a" => 10, "b" => 20}], j.call([{"a" => 1, "b" => 2}]), "library map_values transform")
|
|
700
|
+
|
|
701
|
+
# group_by
|
|
702
|
+
j = Jrf.new(proc { group_by(_["k"]) { count() } })
|
|
703
|
+
assert_equal([{"x" => 2, "y" => 1}], j.call([{"k" => "x"}, {"k" => "x"}, {"k" => "y"}]), "library group_by")
|
|
704
|
+
|
|
705
|
+
# reducer then passthrough
|
|
706
|
+
j = Jrf.new(
|
|
707
|
+
proc { sum(_["a"]) },
|
|
708
|
+
proc { _ + 1 }
|
|
709
|
+
)
|
|
710
|
+
assert_equal([7], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library reducer then passthrough")
|
|
711
|
+
|
|
712
|
+
# closure over local variables
|
|
713
|
+
threshold = 2
|
|
714
|
+
j = Jrf.new(proc { select(_["a"] > threshold) })
|
|
715
|
+
assert_equal([{"a" => 3}], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library closure")
|
|
716
|
+
|
|
717
|
+
# empty input
|
|
718
|
+
j = Jrf.new(proc { sum(_) })
|
|
719
|
+
assert_equal([], j.call([]), "library empty input")
|
|
720
|
+
|
|
614
721
|
puts "ok"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: jrf
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- kazuho
|
|
@@ -41,6 +41,7 @@ files:
|
|
|
41
41
|
- lib/jrf.rb
|
|
42
42
|
- lib/jrf/cli.rb
|
|
43
43
|
- lib/jrf/control.rb
|
|
44
|
+
- lib/jrf/pipeline.rb
|
|
44
45
|
- lib/jrf/pipeline_parser.rb
|
|
45
46
|
- lib/jrf/reducers.rb
|
|
46
47
|
- lib/jrf/row_context.rb
|