jrf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jrf
4
+ module Reducers
5
+ module_function
6
+
7
+ class Reduce
8
+ def initialize(initial, finish_fn: nil, &step_fn)
9
+ @acc = initial
10
+ @step_fn = step_fn
11
+ @finish_fn = finish_fn || ->(acc) { acc }
12
+ end
13
+
14
+ def step(value)
15
+ @acc = @step_fn.call(@acc, value)
16
+ end
17
+
18
+ def finish
19
+ @finish_fn.call(@acc)
20
+ end
21
+ end
22
+
23
+ def reduce(initial, finish: nil, &step_fn)
24
+ Reduce.new(initial, finish_fn: finish, &step_fn)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,200 @@
1
+ # frozen_string_literal: true
2
+ require_relative "control"
3
+ require_relative "reducers"
4
+
5
+ module Jrf
6
+ class RowContext
7
+ MISSING = Object.new
8
+ ReducerToken = Struct.new(:index)
9
+
10
+ class << self
11
+ def define_reducer(name, &definition)
12
+ define_method(name) do |*args, **kwargs, &block|
13
+ spec = definition.call(self, *args, **kwargs, block: block)
14
+ create_reducer(
15
+ spec.fetch(:value),
16
+ initial: reducer_initial_value(spec.fetch(:initial)),
17
+ finish: spec[:finish],
18
+ emit_many: spec.fetch(:emit_many, false),
19
+ &spec.fetch(:step)
20
+ )
21
+ end
22
+ end
23
+ end
24
+
25
+ def initialize(obj = nil)
26
+ @obj = obj
27
+ @__jrf_stage = nil
28
+ end
29
+
30
+ def reset(obj)
31
+ @obj = obj
32
+ self
33
+ end
34
+
35
+ def _
36
+ @obj
37
+ end
38
+
39
+ def flat
40
+ Control::Flat.new(@obj)
41
+ end
42
+
43
+ define_reducer(:sum) do |_ctx, value, initial: 0, block: nil|
44
+ { value: value, initial: initial, step: ->(acc, v) { acc + v } }
45
+ end
46
+
47
+ define_reducer(:min) do |_ctx, value, block: nil|
48
+ { value: value, initial: nil, step: ->(acc, v) { acc.nil? || v < acc ? v : acc } }
49
+ end
50
+
51
+ define_reducer(:max) do |_ctx, value, block: nil|
52
+ { value: value, initial: nil, step: ->(acc, v) { acc.nil? || v > acc ? v : acc } }
53
+ end
54
+
55
+ define_reducer(:average) do |_ctx, value, block: nil|
56
+ {
57
+ value: value,
58
+ initial: -> { [0.0, 0] },
59
+ finish: ->((sum, count)) { count.zero? ? nil : (sum / count) },
60
+ step: ->(acc, v) {
61
+ acc[0] += v
62
+ acc[1] += 1
63
+ acc
64
+ }
65
+ }
66
+ end
67
+
68
+ define_reducer(:stdev) do |_ctx, value, sample: false, block: nil|
69
+ {
70
+ value: value,
71
+ initial: [0, 0.0, 0.0],
72
+ finish: ->((count, mean, m2)) {
73
+ return nil if count.zero?
74
+ return nil if sample && count < 2
75
+
76
+ denom = sample ? (count - 1) : count
77
+ Math.sqrt(m2 / denom)
78
+ },
79
+ step: ->(acc, x) {
80
+ count, mean, m2 = acc
81
+ count += 1
82
+ delta = x - mean
83
+ mean += delta / count
84
+ delta2 = x - mean
85
+ m2 += delta * delta2
86
+ acc[0] = count
87
+ acc[1] = mean
88
+ acc[2] = m2
89
+ acc
90
+ }
91
+ }
92
+ end
93
+
94
+ define_reducer(:sort) do |ctx, key = MISSING, block: nil|
95
+ if block
96
+ {
97
+ value: ctx._,
98
+ initial: -> { [] },
99
+ emit_many: true,
100
+ finish: ->(rows) { rows.sort(&block) },
101
+ step: ->(rows, row) { rows << row }
102
+ }
103
+ else
104
+ resolved_key = key.equal?(MISSING) ? ctx._ : key
105
+ {
106
+ value: [resolved_key, ctx._],
107
+ initial: -> { [] },
108
+ emit_many: true,
109
+ finish: ->(pairs) { pairs.sort_by(&:first).map(&:last) },
110
+ step: ->(pairs, pair) { pairs << pair }
111
+ }
112
+ end
113
+ end
114
+
115
+ define_reducer(:group) do |ctx, value = MISSING, block: nil|
116
+ resolved_value = value.equal?(MISSING) ? ctx._ : value
117
+ { value: resolved_value, initial: -> { [] }, step: ->(acc, v) { acc << v } }
118
+ end
119
+
120
+ define_reducer(:percentile) do |ctx, value, percentage, block: nil|
121
+ percentages = percentage.is_a?(Array) ? percentage : [percentage]
122
+ percentages.each { |p| ctx.send(:validate_percentile!, p) }
123
+ scalar = !percentage.is_a?(Array)
124
+
125
+ finish =
126
+ if scalar
127
+ ->(values) { ctx.send(:percentile_value, values.sort, percentages.first) }
128
+ else
129
+ ->(values) {
130
+ sorted = values.sort
131
+ percentages.map do |p|
132
+ { "percentile" => p, "value" => ctx.send(:percentile_value, sorted, p) }
133
+ end
134
+ }
135
+ end
136
+
137
+ {
138
+ value: value,
139
+ initial: -> { [] },
140
+ emit_many: !scalar,
141
+ finish: finish,
142
+ step: ->(acc, v) { acc << v }
143
+ }
144
+ end
145
+
146
+ def reduce(initial, &block)
147
+ raise ArgumentError, "reduce requires a block" unless block
148
+
149
+ create_reducer(@obj, initial: initial, &block)
150
+ end
151
+
152
+ def __jrf_begin_stage__(stage, probing: false)
153
+ @__jrf_stage = stage
154
+ stage[:reducer_cursor] = 0
155
+ stage[:reducer_called] = false
156
+ stage[:reducer_probing] = probing
157
+ end
158
+
159
+ def __jrf_reducer_called?
160
+ @__jrf_stage && @__jrf_stage[:reducer_called]
161
+ end
162
+
163
+ private
164
+
165
+ def create_reducer(value, initial:, emit_many: false, finish: nil, &step_fn)
166
+ raise "internal error: reducer used outside stage context" unless @__jrf_stage
167
+
168
+ reducers = (@__jrf_stage[:reducers] ||= [])
169
+ idx = @__jrf_stage[:reducer_cursor] || 0
170
+ reducers[idx] ||= Reducers.reduce(initial, finish: finish, &step_fn)
171
+ reducers[idx].step(value) unless @__jrf_stage[:reducer_probing]
172
+ @__jrf_stage[:reducer_cursor] = idx + 1
173
+ @__jrf_stage[:reducer_called] = true
174
+ @__jrf_stage[:reducer_emit_many] = emit_many if @__jrf_stage[:reducer_emit_many].nil?
175
+ ReducerToken.new(idx)
176
+ end
177
+
178
+ def reducer_initial_value(initial)
179
+ return initial.call if initial.respond_to?(:call)
180
+ return initial.dup if initial.is_a?(Array) || initial.is_a?(Hash)
181
+
182
+ initial
183
+ end
184
+
185
+ def validate_percentile!(value)
186
+ unless value.is_a?(Numeric) && value >= 0 && value <= 1
187
+ raise ArgumentError, "percentile must be numeric in [0, 1]"
188
+ end
189
+ end
190
+
191
+ def percentile_value(sorted_values, percentile)
192
+ return nil if sorted_values.empty?
193
+
194
+ idx = (percentile.to_f * sorted_values.length).ceil - 1
195
+ idx = 0 if idx.negative?
196
+ idx = sorted_values.length - 1 if idx >= sorted_values.length
197
+ sorted_values[idx]
198
+ end
199
+ end
200
+ end
data/lib/jrf/runner.rb ADDED
@@ -0,0 +1,179 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require_relative "control"
5
+ require_relative "pipeline_parser"
6
+ require_relative "reducers"
7
+ require_relative "row_context"
8
+
9
+ module Jrf
10
+ class Runner
11
+ class ProbeValue
12
+ def [](key)
13
+ self
14
+ end
15
+
16
+ def method_missing(name, *args, &block)
17
+ self
18
+ end
19
+
20
+ def respond_to_missing?(name, include_private = false)
21
+ true
22
+ end
23
+ end
24
+
25
+ PROBE_VALUE = ProbeValue.new
26
+
27
+ def initialize(input: ARGF, out: $stdout, err: $stderr)
28
+ @input = input
29
+ @out = out
30
+ @err = err
31
+ end
32
+
33
+ def run(expression, verbose: false)
34
+ parsed = PipelineParser.new(expression).parse
35
+ stages = parsed[:stages]
36
+ dump_stages(stages) if verbose
37
+
38
+ ctx = RowContext.new
39
+ compiled = compile_stages(stages, ctx)
40
+ initialize_reducers(compiled, ctx)
41
+ error = nil
42
+
43
+ begin
44
+ @input.each_line do |line|
45
+ line = line.strip
46
+ next if line.empty?
47
+
48
+ process_value(JSON.parse(line), compiled, ctx)
49
+ end
50
+ rescue StandardError => e
51
+ error = e
52
+ ensure
53
+ flush_reducers(compiled, ctx)
54
+ end
55
+
56
+ raise error if error
57
+ end
58
+
59
+ private
60
+
61
+ def process_value(input, stages, ctx)
62
+ current_values = [input]
63
+
64
+ stages.each do |stage|
65
+ next_values = []
66
+
67
+ current_values.each do |value|
68
+ out = apply_stage(stage, value, ctx)
69
+ if out.equal?(Control::DROPPED)
70
+ next
71
+ elsif flat_event?(out)
72
+ unless out.value.is_a?(Array)
73
+ raise TypeError, "flat expects Array, got #{out.value.class}"
74
+ end
75
+ next_values.concat(out.value)
76
+ else
77
+ next_values << out
78
+ end
79
+ end
80
+
81
+ return if next_values.empty?
82
+ current_values = next_values
83
+ end
84
+
85
+ current_values.each { |value| @out.puts JSON.generate(value) }
86
+ end
87
+
88
+ def apply_stage(stage, input, ctx)
89
+ value = eval_stage(stage, input, ctx)
90
+ if value.equal?(Control::DROPPED)
91
+ Control::DROPPED
92
+ elsif ctx.__jrf_reducer_called?
93
+ stage[:reducer_template] ||= value
94
+ Control::DROPPED
95
+ else
96
+ value
97
+ end
98
+ end
99
+
100
+ def eval_stage(stage, input, ctx)
101
+ ctx.reset(input)
102
+ ctx.__jrf_begin_stage__(stage, probing: input.equal?(PROBE_VALUE))
103
+ ctx.public_send(stage[:method_name])
104
+ end
105
+
106
+ def flat_event?(value)
107
+ value.is_a?(Control::Flat)
108
+ end
109
+
110
+ def flush_reducers(stages, ctx)
111
+ tail = stages
112
+ loop do
113
+ tail = tail.drop_while { |stage| !reducer_stage?(stage) }
114
+ break if tail.empty?
115
+
116
+ stage = tail.first
117
+ reducers = stage[:reducers]
118
+ break unless reducers&.any?
119
+
120
+ out = finish_reducer_template(stage[:reducer_template], reducers)
121
+ if stage[:reducer_emit_many]
122
+ out.each { |value| process_value(value, tail.drop(1), ctx) }
123
+ else
124
+ process_value(out, tail.drop(1), ctx)
125
+ end
126
+ tail = tail.drop(1)
127
+ end
128
+ end
129
+
130
+ def compile_stages(stages, ctx)
131
+ mod = Module.new
132
+ compiled = []
133
+
134
+ stages.each_with_index do |stage, i|
135
+ method_name = :"__jrf_stage_#{i}"
136
+ mod.module_eval("def #{method_name}; #{stage[:src]}; end", "(jrf stage #{i})", 1)
137
+ compiled << stage.merge(method_name: method_name)
138
+ end
139
+
140
+ ctx.extend(mod)
141
+ compiled
142
+ end
143
+
144
+ def dump_stages(stages)
145
+ stages.each_with_index do |stage, i|
146
+ @err.puts "stage[#{i}] kind=#{stage[:kind]}"
147
+ @err.puts " original: #{stage[:original]}"
148
+ @err.puts " ruby: #{stage[:src]}"
149
+ end
150
+ end
151
+
152
+ def initialize_reducers(stages, ctx)
153
+ stages.each do |stage|
154
+ begin
155
+ value = eval_stage(stage, PROBE_VALUE, ctx)
156
+ stage[:reducer_template] ||= value if ctx.__jrf_reducer_called?
157
+ rescue StandardError
158
+ # Ignore probe-time errors; reducer will be created on first runtime event.
159
+ end
160
+ end
161
+ end
162
+
163
+ def reducer_stage?(stage)
164
+ stage[:reducers]&.any?
165
+ end
166
+
167
+ def finish_reducer_template(template, reducers)
168
+ if template.is_a?(RowContext::ReducerToken)
169
+ reducers.fetch(template.index).finish
170
+ elsif template.is_a?(Array)
171
+ template.map { |v| finish_reducer_template(v, reducers) }
172
+ elsif template.is_a?(Hash)
173
+ template.transform_values { |v| finish_reducer_template(v, reducers) }
174
+ else
175
+ template
176
+ end
177
+ end
178
+ end
179
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jrf
4
+ VERSION = "0.1.0"
5
+ end
data/lib/jrf.rb ADDED
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "jrf/version"
4
+ require_relative "jrf/cli"