jrf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/DESIGN.txt +455 -0
- data/Gemfile +5 -0
- data/Rakefile +10 -0
- data/exe/jrf +7 -0
- data/jrf.gemspec +20 -0
- data/lib/jrf/cli.rb +32 -0
- data/lib/jrf/control.rb +8 -0
- data/lib/jrf/pipeline_parser.rb +147 -0
- data/lib/jrf/reducers.rb +27 -0
- data/lib/jrf/row_context.rb +200 -0
- data/lib/jrf/runner.rb +179 -0
- data/lib/jrf/version.rb +5 -0
- data/lib/jrf.rb +4 -0
- data/test/jrf_test.rb +325 -0
- metadata +54 -0
data/lib/jrf/reducers.rb
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Jrf
|
|
4
|
+
module Reducers
|
|
5
|
+
module_function
|
|
6
|
+
|
|
7
|
+
class Reduce
|
|
8
|
+
def initialize(initial, finish_fn: nil, &step_fn)
|
|
9
|
+
@acc = initial
|
|
10
|
+
@step_fn = step_fn
|
|
11
|
+
@finish_fn = finish_fn || ->(acc) { acc }
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def step(value)
|
|
15
|
+
@acc = @step_fn.call(@acc, value)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def finish
|
|
19
|
+
@finish_fn.call(@acc)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def reduce(initial, finish: nil, &step_fn)
|
|
24
|
+
Reduce.new(initial, finish_fn: finish, &step_fn)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
require_relative "control"
|
|
3
|
+
require_relative "reducers"
|
|
4
|
+
|
|
5
|
+
module Jrf
|
|
6
|
+
class RowContext
|
|
7
|
+
MISSING = Object.new
|
|
8
|
+
ReducerToken = Struct.new(:index)
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
def define_reducer(name, &definition)
|
|
12
|
+
define_method(name) do |*args, **kwargs, &block|
|
|
13
|
+
spec = definition.call(self, *args, **kwargs, block: block)
|
|
14
|
+
create_reducer(
|
|
15
|
+
spec.fetch(:value),
|
|
16
|
+
initial: reducer_initial_value(spec.fetch(:initial)),
|
|
17
|
+
finish: spec[:finish],
|
|
18
|
+
emit_many: spec.fetch(:emit_many, false),
|
|
19
|
+
&spec.fetch(:step)
|
|
20
|
+
)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def initialize(obj = nil)
|
|
26
|
+
@obj = obj
|
|
27
|
+
@__jrf_stage = nil
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def reset(obj)
|
|
31
|
+
@obj = obj
|
|
32
|
+
self
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def _
|
|
36
|
+
@obj
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def flat
|
|
40
|
+
Control::Flat.new(@obj)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
define_reducer(:sum) do |_ctx, value, initial: 0, block: nil|
|
|
44
|
+
{ value: value, initial: initial, step: ->(acc, v) { acc + v } }
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
define_reducer(:min) do |_ctx, value, block: nil|
|
|
48
|
+
{ value: value, initial: nil, step: ->(acc, v) { acc.nil? || v < acc ? v : acc } }
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
define_reducer(:max) do |_ctx, value, block: nil|
|
|
52
|
+
{ value: value, initial: nil, step: ->(acc, v) { acc.nil? || v > acc ? v : acc } }
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
define_reducer(:average) do |_ctx, value, block: nil|
|
|
56
|
+
{
|
|
57
|
+
value: value,
|
|
58
|
+
initial: -> { [0.0, 0] },
|
|
59
|
+
finish: ->((sum, count)) { count.zero? ? nil : (sum / count) },
|
|
60
|
+
step: ->(acc, v) {
|
|
61
|
+
acc[0] += v
|
|
62
|
+
acc[1] += 1
|
|
63
|
+
acc
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
define_reducer(:stdev) do |_ctx, value, sample: false, block: nil|
|
|
69
|
+
{
|
|
70
|
+
value: value,
|
|
71
|
+
initial: [0, 0.0, 0.0],
|
|
72
|
+
finish: ->((count, mean, m2)) {
|
|
73
|
+
return nil if count.zero?
|
|
74
|
+
return nil if sample && count < 2
|
|
75
|
+
|
|
76
|
+
denom = sample ? (count - 1) : count
|
|
77
|
+
Math.sqrt(m2 / denom)
|
|
78
|
+
},
|
|
79
|
+
step: ->(acc, x) {
|
|
80
|
+
count, mean, m2 = acc
|
|
81
|
+
count += 1
|
|
82
|
+
delta = x - mean
|
|
83
|
+
mean += delta / count
|
|
84
|
+
delta2 = x - mean
|
|
85
|
+
m2 += delta * delta2
|
|
86
|
+
acc[0] = count
|
|
87
|
+
acc[1] = mean
|
|
88
|
+
acc[2] = m2
|
|
89
|
+
acc
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
define_reducer(:sort) do |ctx, key = MISSING, block: nil|
|
|
95
|
+
if block
|
|
96
|
+
{
|
|
97
|
+
value: ctx._,
|
|
98
|
+
initial: -> { [] },
|
|
99
|
+
emit_many: true,
|
|
100
|
+
finish: ->(rows) { rows.sort(&block) },
|
|
101
|
+
step: ->(rows, row) { rows << row }
|
|
102
|
+
}
|
|
103
|
+
else
|
|
104
|
+
resolved_key = key.equal?(MISSING) ? ctx._ : key
|
|
105
|
+
{
|
|
106
|
+
value: [resolved_key, ctx._],
|
|
107
|
+
initial: -> { [] },
|
|
108
|
+
emit_many: true,
|
|
109
|
+
finish: ->(pairs) { pairs.sort_by(&:first).map(&:last) },
|
|
110
|
+
step: ->(pairs, pair) { pairs << pair }
|
|
111
|
+
}
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
define_reducer(:group) do |ctx, value = MISSING, block: nil|
|
|
116
|
+
resolved_value = value.equal?(MISSING) ? ctx._ : value
|
|
117
|
+
{ value: resolved_value, initial: -> { [] }, step: ->(acc, v) { acc << v } }
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
define_reducer(:percentile) do |ctx, value, percentage, block: nil|
|
|
121
|
+
percentages = percentage.is_a?(Array) ? percentage : [percentage]
|
|
122
|
+
percentages.each { |p| ctx.send(:validate_percentile!, p) }
|
|
123
|
+
scalar = !percentage.is_a?(Array)
|
|
124
|
+
|
|
125
|
+
finish =
|
|
126
|
+
if scalar
|
|
127
|
+
->(values) { ctx.send(:percentile_value, values.sort, percentages.first) }
|
|
128
|
+
else
|
|
129
|
+
->(values) {
|
|
130
|
+
sorted = values.sort
|
|
131
|
+
percentages.map do |p|
|
|
132
|
+
{ "percentile" => p, "value" => ctx.send(:percentile_value, sorted, p) }
|
|
133
|
+
end
|
|
134
|
+
}
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
{
|
|
138
|
+
value: value,
|
|
139
|
+
initial: -> { [] },
|
|
140
|
+
emit_many: !scalar,
|
|
141
|
+
finish: finish,
|
|
142
|
+
step: ->(acc, v) { acc << v }
|
|
143
|
+
}
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def reduce(initial, &block)
|
|
147
|
+
raise ArgumentError, "reduce requires a block" unless block
|
|
148
|
+
|
|
149
|
+
create_reducer(@obj, initial: initial, &block)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def __jrf_begin_stage__(stage, probing: false)
|
|
153
|
+
@__jrf_stage = stage
|
|
154
|
+
stage[:reducer_cursor] = 0
|
|
155
|
+
stage[:reducer_called] = false
|
|
156
|
+
stage[:reducer_probing] = probing
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def __jrf_reducer_called?
|
|
160
|
+
@__jrf_stage && @__jrf_stage[:reducer_called]
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
private
|
|
164
|
+
|
|
165
|
+
def create_reducer(value, initial:, emit_many: false, finish: nil, &step_fn)
|
|
166
|
+
raise "internal error: reducer used outside stage context" unless @__jrf_stage
|
|
167
|
+
|
|
168
|
+
reducers = (@__jrf_stage[:reducers] ||= [])
|
|
169
|
+
idx = @__jrf_stage[:reducer_cursor] || 0
|
|
170
|
+
reducers[idx] ||= Reducers.reduce(initial, finish: finish, &step_fn)
|
|
171
|
+
reducers[idx].step(value) unless @__jrf_stage[:reducer_probing]
|
|
172
|
+
@__jrf_stage[:reducer_cursor] = idx + 1
|
|
173
|
+
@__jrf_stage[:reducer_called] = true
|
|
174
|
+
@__jrf_stage[:reducer_emit_many] = emit_many if @__jrf_stage[:reducer_emit_many].nil?
|
|
175
|
+
ReducerToken.new(idx)
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def reducer_initial_value(initial)
|
|
179
|
+
return initial.call if initial.respond_to?(:call)
|
|
180
|
+
return initial.dup if initial.is_a?(Array) || initial.is_a?(Hash)
|
|
181
|
+
|
|
182
|
+
initial
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def validate_percentile!(value)
|
|
186
|
+
unless value.is_a?(Numeric) && value >= 0 && value <= 1
|
|
187
|
+
raise ArgumentError, "percentile must be numeric in [0, 1]"
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def percentile_value(sorted_values, percentile)
|
|
192
|
+
return nil if sorted_values.empty?
|
|
193
|
+
|
|
194
|
+
idx = (percentile.to_f * sorted_values.length).ceil - 1
|
|
195
|
+
idx = 0 if idx.negative?
|
|
196
|
+
idx = sorted_values.length - 1 if idx >= sorted_values.length
|
|
197
|
+
sorted_values[idx]
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
data/lib/jrf/runner.rb
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require_relative "control"
|
|
5
|
+
require_relative "pipeline_parser"
|
|
6
|
+
require_relative "reducers"
|
|
7
|
+
require_relative "row_context"
|
|
8
|
+
|
|
9
|
+
module Jrf
|
|
10
|
+
class Runner
|
|
11
|
+
class ProbeValue
|
|
12
|
+
def [](key)
|
|
13
|
+
self
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def method_missing(name, *args, &block)
|
|
17
|
+
self
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def respond_to_missing?(name, include_private = false)
|
|
21
|
+
true
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
PROBE_VALUE = ProbeValue.new
|
|
26
|
+
|
|
27
|
+
def initialize(input: ARGF, out: $stdout, err: $stderr)
|
|
28
|
+
@input = input
|
|
29
|
+
@out = out
|
|
30
|
+
@err = err
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def run(expression, verbose: false)
|
|
34
|
+
parsed = PipelineParser.new(expression).parse
|
|
35
|
+
stages = parsed[:stages]
|
|
36
|
+
dump_stages(stages) if verbose
|
|
37
|
+
|
|
38
|
+
ctx = RowContext.new
|
|
39
|
+
compiled = compile_stages(stages, ctx)
|
|
40
|
+
initialize_reducers(compiled, ctx)
|
|
41
|
+
error = nil
|
|
42
|
+
|
|
43
|
+
begin
|
|
44
|
+
@input.each_line do |line|
|
|
45
|
+
line = line.strip
|
|
46
|
+
next if line.empty?
|
|
47
|
+
|
|
48
|
+
process_value(JSON.parse(line), compiled, ctx)
|
|
49
|
+
end
|
|
50
|
+
rescue StandardError => e
|
|
51
|
+
error = e
|
|
52
|
+
ensure
|
|
53
|
+
flush_reducers(compiled, ctx)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
raise error if error
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
def process_value(input, stages, ctx)
|
|
62
|
+
current_values = [input]
|
|
63
|
+
|
|
64
|
+
stages.each do |stage|
|
|
65
|
+
next_values = []
|
|
66
|
+
|
|
67
|
+
current_values.each do |value|
|
|
68
|
+
out = apply_stage(stage, value, ctx)
|
|
69
|
+
if out.equal?(Control::DROPPED)
|
|
70
|
+
next
|
|
71
|
+
elsif flat_event?(out)
|
|
72
|
+
unless out.value.is_a?(Array)
|
|
73
|
+
raise TypeError, "flat expects Array, got #{out.value.class}"
|
|
74
|
+
end
|
|
75
|
+
next_values.concat(out.value)
|
|
76
|
+
else
|
|
77
|
+
next_values << out
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
return if next_values.empty?
|
|
82
|
+
current_values = next_values
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
current_values.each { |value| @out.puts JSON.generate(value) }
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def apply_stage(stage, input, ctx)
|
|
89
|
+
value = eval_stage(stage, input, ctx)
|
|
90
|
+
if value.equal?(Control::DROPPED)
|
|
91
|
+
Control::DROPPED
|
|
92
|
+
elsif ctx.__jrf_reducer_called?
|
|
93
|
+
stage[:reducer_template] ||= value
|
|
94
|
+
Control::DROPPED
|
|
95
|
+
else
|
|
96
|
+
value
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def eval_stage(stage, input, ctx)
|
|
101
|
+
ctx.reset(input)
|
|
102
|
+
ctx.__jrf_begin_stage__(stage, probing: input.equal?(PROBE_VALUE))
|
|
103
|
+
ctx.public_send(stage[:method_name])
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def flat_event?(value)
|
|
107
|
+
value.is_a?(Control::Flat)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def flush_reducers(stages, ctx)
|
|
111
|
+
tail = stages
|
|
112
|
+
loop do
|
|
113
|
+
tail = tail.drop_while { |stage| !reducer_stage?(stage) }
|
|
114
|
+
break if tail.empty?
|
|
115
|
+
|
|
116
|
+
stage = tail.first
|
|
117
|
+
reducers = stage[:reducers]
|
|
118
|
+
break unless reducers&.any?
|
|
119
|
+
|
|
120
|
+
out = finish_reducer_template(stage[:reducer_template], reducers)
|
|
121
|
+
if stage[:reducer_emit_many]
|
|
122
|
+
out.each { |value| process_value(value, tail.drop(1), ctx) }
|
|
123
|
+
else
|
|
124
|
+
process_value(out, tail.drop(1), ctx)
|
|
125
|
+
end
|
|
126
|
+
tail = tail.drop(1)
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def compile_stages(stages, ctx)
|
|
131
|
+
mod = Module.new
|
|
132
|
+
compiled = []
|
|
133
|
+
|
|
134
|
+
stages.each_with_index do |stage, i|
|
|
135
|
+
method_name = :"__jrf_stage_#{i}"
|
|
136
|
+
mod.module_eval("def #{method_name}; #{stage[:src]}; end", "(jrf stage #{i})", 1)
|
|
137
|
+
compiled << stage.merge(method_name: method_name)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
ctx.extend(mod)
|
|
141
|
+
compiled
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def dump_stages(stages)
|
|
145
|
+
stages.each_with_index do |stage, i|
|
|
146
|
+
@err.puts "stage[#{i}] kind=#{stage[:kind]}"
|
|
147
|
+
@err.puts " original: #{stage[:original]}"
|
|
148
|
+
@err.puts " ruby: #{stage[:src]}"
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def initialize_reducers(stages, ctx)
|
|
153
|
+
stages.each do |stage|
|
|
154
|
+
begin
|
|
155
|
+
value = eval_stage(stage, PROBE_VALUE, ctx)
|
|
156
|
+
stage[:reducer_template] ||= value if ctx.__jrf_reducer_called?
|
|
157
|
+
rescue StandardError
|
|
158
|
+
# Ignore probe-time errors; reducer will be created on first runtime event.
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def reducer_stage?(stage)
|
|
164
|
+
stage[:reducers]&.any?
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def finish_reducer_template(template, reducers)
|
|
168
|
+
if template.is_a?(RowContext::ReducerToken)
|
|
169
|
+
reducers.fetch(template.index).finish
|
|
170
|
+
elsif template.is_a?(Array)
|
|
171
|
+
template.map { |v| finish_reducer_template(v, reducers) }
|
|
172
|
+
elsif template.is_a?(Hash)
|
|
173
|
+
template.transform_values { |v| finish_reducer_template(v, reducers) }
|
|
174
|
+
else
|
|
175
|
+
template
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
end
|
data/lib/jrf/version.rb
ADDED
data/lib/jrf.rb
ADDED