jrf 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2776f201f13bf8be05ec4615510f0810ceeff9115dd11ef4d54ed873c9c90030
4
- data.tar.gz: 0b996f561536a47067d262122c3b1093fd9adfb499fc0fab7011226cee75f043
3
+ metadata.gz: 34475ad560159e50a8f6ea6dbfae40dc57173b40df31065f4b222abaafe66012
4
+ data.tar.gz: 22c046afd9f4fba04788f08796f9ccfe24b21a87522820c8e5873f164de8cc53
5
5
  SHA512:
6
- metadata.gz: bde0a34fdeb324132084a7621bc2a37767a506261dbafadfdebbeb7fc060a2e8a142c44132ed5228a2604a5dba767871d39303eb3aa941510ba0d3f2694d5e7e
7
- data.tar.gz: 725b4611d5659ce994df183950676a7c8ff2582df7b02cbc0bf7ef47682ca2bf2f55507df10c6e907a1032c6b4dcd17625b4db807f28d304a331f0caf4c8a1cd
6
+ metadata.gz: 7d90e4a754ae7ca9170db6c7221571cb90077bbd48d6cd55cbefd29342afa89996075c86a3bf645dac94b337b91eceefa036968f490c30bacf52744a319d238f
7
+ data.tar.gz: 3d00c51e46a07f63e1d44b8f2013663dd66d3b2f3393046a00a3c26a5f1cb3dd4eabc2db82eacb12ff874625835f49a63e358baaf44639d38b8a9e01a6c3b06d
data/exe/jrf CHANGED
@@ -1,6 +1,12 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
+ begin
5
+ require "bundler/setup"
6
+ rescue LoadError
7
+ # Allow running without Bundler in plain Ruby environments.
8
+ end
9
+
4
10
  $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
5
11
  require "jrf"
6
12
 
data/jrf.gemspec CHANGED
@@ -15,6 +15,7 @@ Gem::Specification.new do |spec|
15
15
 
16
16
  spec.bindir = "exe"
17
17
  spec.executables = ["jrf"]
18
+ spec.add_dependency "oj", ">= 3.16"
18
19
 
19
20
  spec.files = Dir.glob("{exe,lib,test}/*") + Dir.glob("lib/**/*") + %w[DESIGN.txt jrf.gemspec Gemfile Rakefile]
20
21
  end
data/lib/jrf/cli.rb CHANGED
@@ -4,15 +4,16 @@ require_relative "runner"
4
4
 
5
5
  module Jrf
6
6
  class CLI
7
- USAGE = "usage: jrf [-v] [--help] 'STAGE >> STAGE >> ...'"
7
+ USAGE = "usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'"
8
8
 
9
9
  HELP_TEXT = <<~'TEXT'
10
- usage: jrf [-v] [--help] 'STAGE >> STAGE >> ...'
10
+ usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'
11
11
 
12
12
  JSON filter with the power and speed of Ruby.
13
13
 
14
14
  Options:
15
- -v, --verbose print compiled stage Ruby expressions
15
+ -v, --verbose print parsed stage expressions
16
+ --lax allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
16
17
  -h, --help show this help and exit
17
18
 
18
19
  Pipeline:
@@ -33,12 +34,16 @@ module Jrf
33
34
 
34
35
  def self.run(argv = ARGV, input: ARGF, out: $stdout, err: $stderr)
35
36
  verbose = false
37
+ lax = false
36
38
 
37
39
  while argv.first&.start_with?("-")
38
40
  case argv.first
39
41
  when "-v", "--verbose"
40
42
  verbose = true
41
43
  argv.shift
44
+ when "--lax"
45
+ lax = true
46
+ argv.shift
42
47
  when "-h", "--help"
43
48
  out.puts HELP_TEXT
44
49
  return 0
@@ -55,7 +60,7 @@ module Jrf
55
60
  end
56
61
 
57
62
  expression = argv.shift
58
- Runner.new(input: input, out: out, err: err).run(expression, verbose: verbose)
63
+ Runner.new(input: input, out: out, err: err, lax: lax).run(expression, verbose: verbose)
59
64
  0
60
65
  end
61
66
  end
@@ -9,51 +9,11 @@ module Jrf
9
9
  def parse
10
10
  stages = split_top_level_pipeline(@source).map(&:strip).reject(&:empty?)
11
11
  raise ArgumentError, "empty expression" if stages.empty?
12
- { stages: stages.map { |stage| parse_stage!(stage) } }
12
+ { stages: stages.map { |stage| { src: stage } } }
13
13
  end
14
14
 
15
15
  private
16
16
 
17
- def parse_stage!(stage)
18
- if select_stage?(stage)
19
- {
20
- kind: :select,
21
- original: stage,
22
- src: "(#{parse_select!(stage)}) ? _ : ::Jrf::Control::DROPPED"
23
- }
24
- else
25
- reject_unsupported_stage!(stage)
26
- {
27
- kind: :extract,
28
- original: stage,
29
- src: validate_extract!(stage)
30
- }
31
- end
32
- end
33
-
34
- def validate_extract!(stage)
35
- reject_unsupported_stage!(stage)
36
- stage
37
- end
38
-
39
- def parse_select!(stage)
40
- reject_unsupported_stage!(stage)
41
- match = /\Aselect\s*\((.*)\)\s*\z/m.match(stage)
42
- raise ArgumentError, "first stage must be select(...)" unless match
43
-
44
- inner = match[1].strip
45
- raise ArgumentError, "select(...) must contain an expression" if inner.empty?
46
-
47
- inner
48
- end
49
-
50
- def select_stage?(stage)
51
- /\Aselect\s*\(/.match?(stage)
52
- end
53
-
54
- def reject_unsupported_stage!(stage)
55
- end
56
-
57
17
  def split_top_level_pipeline(source)
58
18
  parts = []
59
19
  start_idx = 0
@@ -1,21 +1,22 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require_relative "control"
3
4
  require_relative "reducers"
4
5
 
5
6
  module Jrf
6
7
  class RowContext
7
8
  MISSING = Object.new
8
- ReducerToken = Struct.new(:index)
9
+
10
+ attr_writer :__jrf_current_stage
9
11
 
10
12
  class << self
11
13
  def define_reducer(name, &definition)
12
14
  define_method(name) do |*args, **kwargs, &block|
13
15
  spec = definition.call(self, *args, **kwargs, block: block)
14
- create_reducer(
16
+ @__jrf_current_stage.allocate_reducer(
15
17
  spec.fetch(:value),
16
18
  initial: reducer_initial_value(spec.fetch(:initial)),
17
19
  finish: spec[:finish],
18
- emit_many: spec.fetch(:emit_many, false),
19
20
  &spec.fetch(:step)
20
21
  )
21
22
  end
@@ -24,7 +25,7 @@ module Jrf
24
25
 
25
26
  def initialize(obj = nil)
26
27
  @obj = obj
27
- @__jrf_stage = nil
28
+ @__jrf_current_stage = nil
28
29
  end
29
30
 
30
31
  def reset(obj)
@@ -40,24 +41,38 @@ module Jrf
40
41
  Control::Flat.new(@obj)
41
42
  end
42
43
 
44
+ def select(predicate)
45
+ predicate ? @obj : Control::DROPPED
46
+ end
47
+
43
48
  define_reducer(:sum) do |_ctx, value, initial: 0, block: nil|
44
- { value: value, initial: initial, step: ->(acc, v) { acc + v } }
49
+ { value: value, initial: initial, step: ->(acc, v) { v.nil? ? acc : (acc + v) } }
50
+ end
51
+
52
+ define_reducer(:count) do |_ctx, value = MISSING, block: nil|
53
+ if value.equal?(MISSING)
54
+ { value: nil, initial: 0, step: ->(acc, _v) { acc + 1 } }
55
+ else
56
+ { value: value, initial: 0, step: ->(acc, v) { v.nil? ? acc : (acc + 1) } }
57
+ end
45
58
  end
46
59
 
47
60
  define_reducer(:min) do |_ctx, value, block: nil|
48
- { value: value, initial: nil, step: ->(acc, v) { acc.nil? || v < acc ? v : acc } }
61
+ { value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v < acc ? v : acc) } }
49
62
  end
50
63
 
51
64
  define_reducer(:max) do |_ctx, value, block: nil|
52
- { value: value, initial: nil, step: ->(acc, v) { acc.nil? || v > acc ? v : acc } }
65
+ { value: value, initial: nil, step: ->(acc, v) { v.nil? ? acc : (acc.nil? || v > acc ? v : acc) } }
53
66
  end
54
67
 
55
68
  define_reducer(:average) do |_ctx, value, block: nil|
56
69
  {
57
70
  value: value,
58
71
  initial: -> { [0.0, 0] },
59
- finish: ->((sum, count)) { count.zero? ? nil : (sum / count) },
72
+ finish: ->((sum, count)) { [count.zero? ? nil : (sum / count)] },
60
73
  step: ->(acc, v) {
74
+ return acc if v.nil?
75
+
61
76
  acc[0] += v
62
77
  acc[1] += 1
63
78
  acc
@@ -70,13 +85,15 @@ module Jrf
70
85
  value: value,
71
86
  initial: [0, 0.0, 0.0],
72
87
  finish: ->((count, mean, m2)) {
73
- return nil if count.zero?
74
- return nil if sample && count < 2
88
+ return [nil] if count.zero?
89
+ return [nil] if sample && count < 2
75
90
 
76
91
  denom = sample ? (count - 1) : count
77
- Math.sqrt(m2 / denom)
92
+ [Math.sqrt(m2 / denom)]
78
93
  },
79
94
  step: ->(acc, x) {
95
+ return acc if x.nil?
96
+
80
97
  count, mean, m2 = acc
81
98
  count += 1
82
99
  delta = x - mean
@@ -96,7 +113,6 @@ module Jrf
96
113
  {
97
114
  value: ctx._,
98
115
  initial: -> { [] },
99
- emit_many: true,
100
116
  finish: ->(rows) { rows.sort(&block) },
101
117
  step: ->(rows, row) { rows << row }
102
118
  }
@@ -105,7 +121,6 @@ module Jrf
105
121
  {
106
122
  value: [resolved_key, ctx._],
107
123
  initial: -> { [] },
108
- emit_many: true,
109
124
  finish: ->(pairs) { pairs.sort_by(&:first).map(&:last) },
110
125
  step: ->(pairs, pair) { pairs << pair }
111
126
  }
@@ -124,7 +139,7 @@ module Jrf
124
139
 
125
140
  finish =
126
141
  if scalar
127
- ->(values) { ctx.send(:percentile_value, values.sort, percentages.first) }
142
+ ->(values) { [ctx.send(:percentile_value, values.sort, percentages.first)] }
128
143
  else
129
144
  ->(values) {
130
145
  sorted = values.sort
@@ -137,44 +152,36 @@ module Jrf
137
152
  {
138
153
  value: value,
139
154
  initial: -> { [] },
140
- emit_many: !scalar,
141
155
  finish: finish,
142
- step: ->(acc, v) { acc << v }
156
+ step: ->(acc, v) { v.nil? ? acc : (acc << v) }
143
157
  }
144
158
  end
145
159
 
146
160
  def reduce(initial, &block)
147
161
  raise ArgumentError, "reduce requires a block" unless block
148
162
 
149
- create_reducer(@obj, initial: initial, &block)
163
+ @__jrf_current_stage.allocate_reducer(@obj, initial: initial, &block)
150
164
  end
151
165
 
152
- def __jrf_begin_stage__(stage, probing: false)
153
- @__jrf_stage = stage
154
- stage[:reducer_cursor] = 0
155
- stage[:reducer_called] = false
156
- stage[:reducer_probing] = probing
157
- end
166
+ def map(&block)
167
+ raise ArgumentError, "map requires a block" unless block
158
168
 
159
- def __jrf_reducer_called?
160
- @__jrf_stage && @__jrf_stage[:reducer_called]
169
+ @__jrf_current_stage.allocate_map(:array, @obj, &block)
161
170
  end
162
171
 
163
- private
172
+ def map_values(&block)
173
+ raise ArgumentError, "map_values requires a block" unless block
164
174
 
165
- def create_reducer(value, initial:, emit_many: false, finish: nil, &step_fn)
166
- raise "internal error: reducer used outside stage context" unless @__jrf_stage
175
+ @__jrf_current_stage.allocate_map(:hash, @obj, &block)
176
+ end
167
177
 
168
- reducers = (@__jrf_stage[:reducers] ||= [])
169
- idx = @__jrf_stage[:reducer_cursor] || 0
170
- reducers[idx] ||= Reducers.reduce(initial, finish: finish, &step_fn)
171
- reducers[idx].step(value) unless @__jrf_stage[:reducer_probing]
172
- @__jrf_stage[:reducer_cursor] = idx + 1
173
- @__jrf_stage[:reducer_called] = true
174
- @__jrf_stage[:reducer_emit_many] = emit_many if @__jrf_stage[:reducer_emit_many].nil?
175
- ReducerToken.new(idx)
178
+ def group_by(key, &block)
179
+ block ||= proc { group }
180
+ @__jrf_current_stage.allocate_group_by(key, &block)
176
181
  end
177
182
 
183
+ private
184
+
178
185
  def reducer_initial_value(initial)
179
186
  return initial.call if initial.respond_to?(:call)
180
187
  return initial.dup if initial.is_a?(Array) || initial.is_a?(Hash)
data/lib/jrf/runner.rb CHANGED
@@ -5,9 +5,12 @@ require_relative "control"
5
5
  require_relative "pipeline_parser"
6
6
  require_relative "reducers"
7
7
  require_relative "row_context"
8
+ require_relative "stage"
8
9
 
9
10
  module Jrf
10
11
  class Runner
12
+ RS_CHAR = "\x1e"
13
+
11
14
  class ProbeValue
12
15
  def [](key)
13
16
  self
@@ -24,10 +27,11 @@ module Jrf
24
27
 
25
28
  PROBE_VALUE = ProbeValue.new
26
29
 
27
- def initialize(input: ARGF, out: $stdout, err: $stderr)
30
+ def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false)
28
31
  @input = input
29
32
  @out = out
30
33
  @err = err
34
+ @lax = lax
31
35
  end
32
36
 
33
37
  def run(expression, verbose: false)
@@ -37,20 +41,17 @@ module Jrf
37
41
 
38
42
  ctx = RowContext.new
39
43
  compiled = compile_stages(stages, ctx)
40
- initialize_reducers(compiled, ctx)
44
+ compiled.each { |stage| stage.call(PROBE_VALUE, probing: true) rescue nil }
41
45
  error = nil
42
46
 
43
47
  begin
44
- @input.each_line do |line|
45
- line = line.strip
46
- next if line.empty?
47
-
48
- process_value(JSON.parse(line), compiled, ctx)
48
+ each_input_value do |value|
49
+ process_value(value, compiled)
49
50
  end
50
51
  rescue StandardError => e
51
52
  error = e
52
53
  ensure
53
- flush_reducers(compiled, ctx)
54
+ flush_reducers(compiled)
54
55
  end
55
56
 
56
57
  raise error if error
@@ -58,17 +59,17 @@ module Jrf
58
59
 
59
60
  private
60
61
 
61
- def process_value(input, stages, ctx)
62
+ def process_value(input, stages)
62
63
  current_values = [input]
63
64
 
64
65
  stages.each do |stage|
65
66
  next_values = []
66
67
 
67
68
  current_values.each do |value|
68
- out = apply_stage(stage, value, ctx)
69
+ out = stage.call(value)
69
70
  if out.equal?(Control::DROPPED)
70
71
  next
71
- elsif flat_event?(out)
72
+ elsif out.is_a?(Control::Flat)
72
73
  unless out.value.is_a?(Array)
73
74
  raise TypeError, "flat expects Array, got #{out.value.class}"
74
75
  end
@@ -85,94 +86,70 @@ module Jrf
85
86
  current_values.each { |value| @out.puts JSON.generate(value) }
86
87
  end
87
88
 
88
- def apply_stage(stage, input, ctx)
89
- value = eval_stage(stage, input, ctx)
90
- if value.equal?(Control::DROPPED)
91
- Control::DROPPED
92
- elsif ctx.__jrf_reducer_called?
93
- stage[:reducer_template] ||= value
94
- Control::DROPPED
95
- else
96
- value
97
- end
98
- end
89
+ def each_input_value
90
+ return each_input_value_lax { |value| yield value } if @lax
99
91
 
100
- def eval_stage(stage, input, ctx)
101
- ctx.reset(input)
102
- ctx.__jrf_begin_stage__(stage, probing: input.equal?(PROBE_VALUE))
103
- ctx.public_send(stage[:method_name])
92
+ each_input_value_ndjson { |value| yield value }
104
93
  end
105
94
 
106
- def flat_event?(value)
107
- value.is_a?(Control::Flat)
95
+ def each_input_value_ndjson
96
+ @input.each_line do |raw_line|
97
+ line = raw_line.strip
98
+ next if line.empty?
99
+
100
+ yield JSON.parse(line)
101
+ end
108
102
  end
109
103
 
110
- def flush_reducers(stages, ctx)
111
- tail = stages
112
- loop do
113
- tail = tail.drop_while { |stage| !reducer_stage?(stage) }
114
- break if tail.empty?
115
-
116
- stage = tail.first
117
- reducers = stage[:reducers]
118
- break unless reducers&.any?
119
-
120
- out = finish_reducer_template(stage[:reducer_template], reducers)
121
- if stage[:reducer_emit_many]
122
- out.each { |value| process_value(value, tail.drop(1), ctx) }
123
- else
124
- process_value(out, tail.drop(1), ctx)
104
+ def each_input_value_lax
105
+ require "oj"
106
+ source = @input.read.to_s
107
+ source = source.include?(RS_CHAR) ? source.tr(RS_CHAR, "\n") : source
108
+ handler = Class.new(Oj::ScHandler) do
109
+ def initialize(&emit)
110
+ @emit = emit
125
111
  end
126
- tail = tail.drop(1)
127
- end
112
+
113
+ def hash_start = {}
114
+ def hash_key(key) = key
115
+ def hash_set(hash, key, value) = hash[key] = value
116
+ def array_start = []
117
+ def array_append(array, value) = array << value
118
+ def add_value(value) = @emit.call(value)
119
+ end.new { |value| yield value }
120
+ Oj.sc_parse(handler, source)
121
+ rescue LoadError
122
+ raise "oj is required for --lax mode (gem install oj)"
123
+ rescue Oj::ParseError => e
124
+ raise JSON::ParserError, e.message
128
125
  end
129
126
 
130
127
  def compile_stages(stages, ctx)
131
128
  mod = Module.new
132
- compiled = []
133
129
 
134
- stages.each_with_index do |stage, i|
130
+ stages.each_with_index.map do |stage, i|
135
131
  method_name = :"__jrf_stage_#{i}"
136
132
  mod.module_eval("def #{method_name}; #{stage[:src]}; end", "(jrf stage #{i})", 1)
137
- compiled << stage.merge(method_name: method_name)
138
- end
139
-
140
- ctx.extend(mod)
141
- compiled
133
+ Stage.new(ctx, method_name, src: stage[:src])
134
+ end.tap { ctx.extend(mod) }
142
135
  end
143
136
 
144
137
  def dump_stages(stages)
145
138
  stages.each_with_index do |stage, i|
146
- @err.puts "stage[#{i}] kind=#{stage[:kind]}"
147
- @err.puts " original: #{stage[:original]}"
148
- @err.puts " ruby: #{stage[:src]}"
149
- end
150
- end
151
-
152
- def initialize_reducers(stages, ctx)
153
- stages.each do |stage|
154
- begin
155
- value = eval_stage(stage, PROBE_VALUE, ctx)
156
- stage[:reducer_template] ||= value if ctx.__jrf_reducer_called?
157
- rescue StandardError
158
- # Ignore probe-time errors; reducer will be created on first runtime event.
159
- end
139
+ @err.puts "stage[#{i}]: #{stage[:src]}"
160
140
  end
161
141
  end
162
142
 
163
- def reducer_stage?(stage)
164
- stage[:reducers]&.any?
165
- end
143
+ def flush_reducers(stages)
144
+ tail = stages
145
+ loop do
146
+ idx = tail.index(&:reducer?)
147
+ break unless idx
166
148
 
167
- def finish_reducer_template(template, reducers)
168
- if template.is_a?(RowContext::ReducerToken)
169
- reducers.fetch(template.index).finish
170
- elsif template.is_a?(Array)
171
- template.map { |v| finish_reducer_template(v, reducers) }
172
- elsif template.is_a?(Hash)
173
- template.transform_values { |v| finish_reducer_template(v, reducers) }
174
- else
175
- template
149
+ rows = tail[idx].finish
150
+ rest = tail.drop(idx + 1)
151
+ rows.each { |value| process_value(value, rest) }
152
+ tail = rest
176
153
  end
177
154
  end
178
155
  end
data/lib/jrf/stage.rb ADDED
@@ -0,0 +1,162 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "control"
4
+ require_relative "reducers"
5
+
6
+ module Jrf
7
+ class Stage
8
+ ReducerToken = Struct.new(:index)
9
+
10
+ attr_reader :method_name, :src
11
+
12
+ def self.resolve_template(template, reducers)
13
+ if template.is_a?(ReducerToken)
14
+ rows = reducers.fetch(template.index).finish
15
+ rows.length == 1 ? rows.first : rows
16
+ elsif template.is_a?(Array)
17
+ template.map { |v| resolve_template(v, reducers) }
18
+ elsif template.is_a?(Hash)
19
+ template.transform_values { |v| resolve_template(v, reducers) }
20
+ else
21
+ template
22
+ end
23
+ end
24
+
25
+ def initialize(ctx, method_name, src: nil)
26
+ @ctx = ctx
27
+ @method_name = method_name
28
+ @src = src
29
+ @reducers = []
30
+ @cursor = 0
31
+ @template = nil
32
+ @mode = nil # nil=unknown, :reducer, :passthrough
33
+ @probing = false
34
+ end
35
+
36
+ def call(input, probing: false)
37
+ @ctx.reset(input)
38
+ @cursor = 0
39
+ @probing = probing
40
+ @ctx.__jrf_current_stage = self
41
+ result = @ctx.public_send(@method_name)
42
+
43
+ if @mode.nil? && @reducers.any?
44
+ @mode = :reducer
45
+ @template = result
46
+ elsif @mode.nil? && !probing
47
+ @mode = :passthrough
48
+ end
49
+
50
+ (@mode == :reducer) ? Control::DROPPED : result
51
+ end
52
+
53
+ def allocate_reducer(value, initial:, finish: nil, &step_fn)
54
+ idx = @cursor
55
+ finish_rows = finish || ->(acc) { [acc] }
56
+ @reducers[idx] ||= Reducers.reduce(initial, finish: finish_rows, &step_fn)
57
+ @reducers[idx].step(value) unless @probing
58
+ @cursor += 1
59
+ ReducerToken.new(idx)
60
+ end
61
+
62
+ def allocate_map(type, collection, &block)
63
+ idx = @cursor
64
+ map_reducer = (@reducers[idx] ||= MapReducer.new(type))
65
+
66
+ unless @probing
67
+ saved_obj = @ctx._
68
+
69
+ case type
70
+ when :array
71
+ raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
72
+ collection.each_with_index do |v, i|
73
+ @ctx.reset(v)
74
+ with_scoped_reducers(map_reducer.slots[i] ||= []) do
75
+ result = block.call(v)
76
+ map_reducer.templates[i] ||= result
77
+ end
78
+ end
79
+ when :hash
80
+ raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
81
+ collection.each do |k, v|
82
+ @ctx.reset(v)
83
+ with_scoped_reducers(map_reducer.slots[k] ||= []) do
84
+ result = block.call(v)
85
+ map_reducer.templates[k] ||= result
86
+ end
87
+ end
88
+ end
89
+
90
+ @ctx.reset(saved_obj)
91
+ end
92
+
93
+ @cursor += 1
94
+ ReducerToken.new(idx)
95
+ end
96
+
97
+ def allocate_group_by(key, &block)
98
+ idx = @cursor
99
+ map_reducer = (@reducers[idx] ||= MapReducer.new(:hash))
100
+
101
+ unless @probing
102
+ slot = (map_reducer.slots[key] ||= [])
103
+ with_scoped_reducers(slot) do
104
+ result = block.call
105
+ map_reducer.templates[key] ||= result
106
+ end
107
+ end
108
+
109
+ @cursor += 1
110
+ ReducerToken.new(idx)
111
+ end
112
+
113
+ def reducer?
114
+ @mode == :reducer
115
+ end
116
+
117
+ def finish
118
+ return [] unless @mode == :reducer && @reducers.any?
119
+
120
+ if @template.is_a?(ReducerToken)
121
+ @reducers.fetch(@template.index).finish
122
+ else
123
+ [self.class.resolve_template(@template, @reducers)]
124
+ end
125
+ end
126
+
127
+ private
128
+
129
+ def with_scoped_reducers(reducer_list)
130
+ saved_reducers = @reducers
131
+ saved_cursor = @cursor
132
+ @reducers = reducer_list
133
+ @cursor = 0
134
+ yield
135
+ ensure
136
+ @reducers = saved_reducers
137
+ @cursor = saved_cursor
138
+ end
139
+
140
+ class MapReducer
141
+ attr_reader :slots, :templates
142
+
143
+ def initialize(type)
144
+ @type = type
145
+ @slots = {}
146
+ @templates = {}
147
+ end
148
+
149
+ def finish
150
+ case @type
151
+ when :array
152
+ keys = @slots.keys.sort
153
+ [keys.map { |k| Stage.resolve_template(@templates[k], @slots[k]) }]
154
+ when :hash
155
+ result = {}
156
+ @slots.each { |k, reducers| result[k] = Stage.resolve_template(@templates[k], reducers) }
157
+ [result]
158
+ end
159
+ end
160
+ end
161
+ end
162
+ end
data/lib/jrf/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Jrf
4
- VERSION = "0.1.2"
4
+ VERSION = "0.1.3"
5
5
  end
data/test/jrf_test.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "json"
3
4
  require "open3"
4
5
 
5
6
  def run_jrf(expr, input, *opts)
@@ -86,17 +87,14 @@ assert_equal(['{"hello":123}'], lines(stdout), "select-only hello output")
86
87
  stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "-v")
87
88
  assert_success(status, stderr, "dump stages")
88
89
  assert_equal(%w[123], lines(stdout), "dump stages output")
89
- assert_includes(stderr, "stage[0] kind=select")
90
- assert_includes(stderr, 'original: select(_["hello"] == 123)')
91
- assert_includes(stderr, 'ruby: (_["hello"] == 123) ? _ : ::Jrf::Control::DROPPED')
92
- assert_includes(stderr, "stage[1] kind=extract")
93
- assert_includes(stderr, 'original: _["hello"]')
94
- assert_includes(stderr, 'ruby: _["hello"]')
90
+ assert_includes(stderr, 'stage[0]: select(_["hello"] == 123)')
91
+ assert_includes(stderr, 'stage[1]: _["hello"]')
95
92
 
96
93
  stdout, stderr, status = Open3.capture3("./exe/jrf", "--help")
97
94
  assert_success(status, stderr, "help option")
98
- assert_includes(stdout, "usage: jrf [-v] [--help] 'STAGE >> STAGE >> ...'")
95
+ assert_includes(stdout, "usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'")
99
96
  assert_includes(stdout, "JSON filter with the power and speed of Ruby.")
97
+ assert_includes(stdout, "--lax")
100
98
  assert_includes(stdout, "Pipeline:")
101
99
  assert_includes(stdout, "Connect stages with top-level >>.")
102
100
  assert_includes(stdout, "The current value in each stage is available as _.")
@@ -108,7 +106,7 @@ assert_equal([], lines(stderr), "help stderr output")
108
106
  stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "--verbose")
109
107
  assert_success(status, stderr, "dump stages verbose alias")
110
108
  assert_equal(%w[123], lines(stdout), "dump stages verbose alias output")
111
- assert_includes(stderr, "stage[0] kind=select")
109
+ assert_includes(stderr, 'stage[0]: select(_["hello"] == 123)')
112
110
 
113
111
  input_regex = <<~NDJSON
114
112
  {"foo":{"bar":"ok"},"x":50}
@@ -176,6 +174,14 @@ stdout, stderr, status = run_jrf('sum(_["foo"])', input_sum)
176
174
  assert_success(status, stderr, "sum only")
177
175
  assert_equal(%w[10], lines(stdout), "sum output")
178
176
 
177
+ stdout, stderr, status = run_jrf('count()', input_sum)
178
+ assert_success(status, stderr, "count only")
179
+ assert_equal(%w[4], lines(stdout), "count output")
180
+
181
+ stdout, stderr, status = run_jrf('count(_["foo"])', input_sum)
182
+ assert_success(status, stderr, "count(expr) only")
183
+ assert_equal(%w[4], lines(stdout), "count(expr) output")
184
+
179
185
  stdout, stderr, status = run_jrf('min(_["foo"])', input_sum)
180
186
  assert_success(status, stderr, "min only")
181
187
  assert_equal(%w[1], lines(stdout), "min output")
@@ -204,6 +210,14 @@ stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> sum(_["foo"])', input
204
210
  assert_success(status, stderr, "sum no matches")
205
211
  assert_equal(%w[0], lines(stdout), "sum no matches output")
206
212
 
213
+ stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> count()', input_sum)
214
+ assert_success(status, stderr, "count no matches")
215
+ assert_equal(%w[0], lines(stdout), "count no matches output")
216
+
217
+ stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> count(_["foo"])', input_sum)
218
+ assert_success(status, stderr, "count(expr) no matches")
219
+ assert_equal(%w[0], lines(stdout), "count(expr) no matches output")
220
+
207
221
  stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> average(_["foo"])', input_sum)
208
222
  assert_success(status, stderr, "average no matches")
209
223
  assert_equal(%w[null], lines(stdout), "average no matches output")
@@ -288,6 +302,97 @@ assert_equal(
288
302
  "array percentile output"
289
303
  )
290
304
 
305
+ input_with_nil = <<~NDJSON
306
+ {"foo":1}
307
+ {"foo":null}
308
+ {"bar":999}
309
+ {"foo":3}
310
+ NDJSON
311
+
312
+ stdout, stderr, status = run_jrf('sum(_["foo"])', input_with_nil)
313
+ assert_success(status, stderr, "sum ignores nil")
314
+ assert_equal(%w[4], lines(stdout), "sum ignores nil output")
315
+
316
+ stdout, stderr, status = run_jrf('min(_["foo"])', input_with_nil)
317
+ assert_success(status, stderr, "min ignores nil")
318
+ assert_equal(%w[1], lines(stdout), "min ignores nil output")
319
+
320
+ stdout, stderr, status = run_jrf('max(_["foo"])', input_with_nil)
321
+ assert_success(status, stderr, "max ignores nil")
322
+ assert_equal(%w[3], lines(stdout), "max ignores nil output")
323
+
324
+ stdout, stderr, status = run_jrf('average(_["foo"])', input_with_nil)
325
+ assert_success(status, stderr, "average ignores nil")
326
+ assert_float_close(2.0, lines(stdout).first.to_f, 1e-12, "average ignores nil output")
327
+
328
+ stdout, stderr, status = run_jrf('stdev(_["foo"])', input_with_nil)
329
+ assert_success(status, stderr, "stdev ignores nil")
330
+ assert_float_close(1.0, lines(stdout).first.to_f, 1e-12, "stdev ignores nil output")
331
+
332
+ stdout, stderr, status = run_jrf('percentile(_["foo"], [0.5, 1.0])', input_with_nil)
333
+ assert_success(status, stderr, "percentile ignores nil")
334
+ assert_equal(
335
+ ['{"percentile":0.5,"value":1}', '{"percentile":1.0,"value":3}'],
336
+ lines(stdout),
337
+ "percentile ignores nil output"
338
+ )
339
+
340
+ stdout, stderr, status = run_jrf('count()', input_with_nil)
341
+ assert_success(status, stderr, "count with nil rows")
342
+ assert_equal(%w[4], lines(stdout), "count with nil rows output")
343
+
344
+ stdout, stderr, status = run_jrf('count(_["foo"])', input_with_nil)
345
+ assert_success(status, stderr, "count(expr) ignores nil")
346
+ assert_equal(%w[2], lines(stdout), "count(expr) ignores nil output")
347
+
348
+ input_all_nil = <<~NDJSON
349
+ {"foo":null}
350
+ {"bar":1}
351
+ NDJSON
352
+
353
+ stdout, stderr, status = run_jrf('sum(_["foo"])', input_all_nil)
354
+ assert_success(status, stderr, "sum all nil")
355
+ assert_equal(%w[0], lines(stdout), "sum all nil output")
356
+
357
+ stdout, stderr, status = run_jrf('min(_["foo"])', input_all_nil)
358
+ assert_success(status, stderr, "min all nil")
359
+ assert_equal(%w[null], lines(stdout), "min all nil output")
360
+
361
+ stdout, stderr, status = run_jrf('max(_["foo"])', input_all_nil)
362
+ assert_success(status, stderr, "max all nil")
363
+ assert_equal(%w[null], lines(stdout), "max all nil output")
364
+
365
+ stdout, stderr, status = run_jrf('average(_["foo"])', input_all_nil)
366
+ assert_success(status, stderr, "average all nil")
367
+ assert_equal(%w[null], lines(stdout), "average all nil output")
368
+
369
+ stdout, stderr, status = run_jrf('stdev(_["foo"])', input_all_nil)
370
+ assert_success(status, stderr, "stdev all nil")
371
+ assert_equal(%w[null], lines(stdout), "stdev all nil output")
372
+
373
+ stdout, stderr, status = run_jrf('percentile(_["foo"], 0.5)', input_all_nil)
374
+ assert_success(status, stderr, "percentile all nil")
375
+ assert_equal(%w[null], lines(stdout), "percentile all nil output")
376
+
377
+ stdout, stderr, status = run_jrf('count(_["foo"])', input_all_nil)
378
+ assert_success(status, stderr, "count(expr) all nil")
379
+ assert_equal(%w[0], lines(stdout), "count(expr) all nil output")
380
+
381
+ input_multi_cols = <<~NDJSON
382
+ {"a":1,"b":10}
383
+ {"a":2,"b":20}
384
+ {"a":3,"b":30}
385
+ {"a":4,"b":40}
386
+ NDJSON
387
+
388
+ stdout, stderr, status = run_jrf('{a: percentile(_["a"], [0.25, 0.50, 1.0]), b: percentile(_["b"], [0.25, 0.50, 1.0])}', input_multi_cols)
389
+ assert_success(status, stderr, "nested array percentile for multiple columns")
390
+ assert_equal(
391
+ ['{"a":[{"percentile":0.25,"value":1},{"percentile":0.5,"value":2},{"percentile":1.0,"value":4}],"b":[{"percentile":0.25,"value":10},{"percentile":0.5,"value":20},{"percentile":1.0,"value":40}]}'],
392
+ lines(stdout),
393
+ "nested array percentile output"
394
+ )
395
+
291
396
  input_reduce = <<~NDJSON
292
397
  {"s":"hello"}
293
398
  {"s":"world"}
@@ -306,6 +411,57 @@ stdout, stderr, status = run_jrf('sum(_["foo"]) >> select(_ > 100)', input_sum)
306
411
  assert_success(status, stderr, "post-reduce select drop")
307
412
  assert_equal([], lines(stdout), "post-reduce select drop output")
308
413
 
414
+ input_whitespace_stream = "{\"foo\":1} {\"foo\":2}\n\t{\"foo\":3}\n"
415
+ stdout, stderr, status = run_jrf('_["foo"]', input_whitespace_stream)
416
+ assert_failure(status, "default NDJSON should reject same-line multi-values")
417
+ assert_includes(stderr, "JSON::ParserError")
418
+
419
+ stdout, stderr, status = run_jrf('_["foo"]', input_whitespace_stream, "--lax")
420
+ assert_success(status, stderr, "whitespace-separated JSON stream with --lax")
421
+ assert_equal(%w[1 2 3], lines(stdout), "whitespace-separated stream output")
422
+
423
+ input_json_seq = "\x1e{\"foo\":10}\n\x1e{\"foo\":20}\n"
424
+ stdout, stderr, status = run_jrf('_["foo"]', input_json_seq)
425
+ assert_failure(status, "RS framing requires --lax")
426
+ assert_includes(stderr, "JSON::ParserError")
427
+
428
+ stdout, stderr, status = run_jrf('_["foo"]', input_json_seq, "--lax")
429
+ assert_success(status, stderr, "json-seq style RS framing with --lax")
430
+ assert_equal(%w[10 20], lines(stdout), "json-seq style output")
431
+
432
+ input_lax_multiline = <<~JSONS
433
+ {
434
+ "foo": 101,
435
+ "bar": {"x": 1}
436
+ }
437
+ {
438
+ "foo": 202,
439
+ "bar": {"x": 2}
440
+ }
441
+ JSONS
442
+ stdout, stderr, status = run_jrf('_["foo"]', input_lax_multiline)
443
+ assert_failure(status, "default NDJSON rejects multiline objects")
444
+ assert_includes(stderr, "JSON::ParserError")
445
+
446
+ stdout, stderr, status = run_jrf('_["bar"]["x"]', input_lax_multiline, "--lax")
447
+ assert_success(status, stderr, "lax accepts multiline objects")
448
+ assert_equal(%w[1 2], lines(stdout), "lax multiline object output")
449
+
450
+ input_lax_mixed_separators = "{\"foo\":1}\n\x1e{\"foo\":2}\t{\"foo\":3}\n"
451
+ stdout, stderr, status = run_jrf('_["foo"]', input_lax_mixed_separators, "--lax")
452
+ assert_success(status, stderr, "lax accepts mixed whitespace and RS separators")
453
+ assert_equal(%w[1 2 3], lines(stdout), "lax mixed separators output")
454
+
455
+ input_lax_with_escaped_newline = "{\"s\":\"line1\\nline2\"}\n{\"s\":\"ok\"}\n"
456
+ stdout, stderr, status = run_jrf('_["s"]', input_lax_with_escaped_newline, "--lax")
457
+ assert_success(status, stderr, "lax handles escaped newlines in strings")
458
+ assert_equal(['"line1\nline2"', '"ok"'], lines(stdout), "lax escaped newline string output")
459
+
460
+ input_lax_trailing_rs = "\x1e{\"foo\":9}\n\x1e"
461
+ stdout, stderr, status = run_jrf('_["foo"]', input_lax_trailing_rs, "--lax")
462
+ assert_success(status, stderr, "lax ignores trailing separator")
463
+ assert_equal(%w[9], lines(stdout), "lax trailing separator output")
464
+
309
465
  stdout, stderr, status = run_jrf('select(_["x"] > ) >> _["foo"]', "")
310
466
  assert_failure(status, "syntax error should fail before row loop")
311
467
  assert_includes(stderr, "syntax error")
@@ -339,4 +495,120 @@ stdout, stderr, status = run_jrf('_["foo"] >> select(_["keep"]) >> _["bar"] >> s
339
495
  assert_success(status, stderr, "select/extract chain")
340
496
  assert_equal(%w[3], lines(stdout), "chain output")
341
497
 
498
+ input_map = <<~NDJSON
499
+ {"values":[1,10,100]}
500
+ {"values":[2,20,200]}
501
+ {"values":[3,30,300]}
502
+ NDJSON
503
+
504
+ stdout, stderr, status = run_jrf('_["values"] >> map { |x| sum(x) }', input_map)
505
+ assert_success(status, stderr, "map with sum")
506
+ assert_equal(['[6,60,600]'], lines(stdout), "map with sum output")
507
+
508
+ stdout, stderr, status = run_jrf('_["values"] >> map { |x| min(x) }', input_map)
509
+ assert_success(status, stderr, "map with min")
510
+ assert_equal(['[1,10,100]'], lines(stdout), "map with min output")
511
+
512
+ stdout, stderr, status = run_jrf('_["values"] >> map { |x| max(x) }', input_map)
513
+ assert_success(status, stderr, "map with max")
514
+ assert_equal(['[3,30,300]'], lines(stdout), "map with max output")
515
+
516
+ input_map_varying = <<~NDJSON
517
+ [1,10]
518
+ [2,20,200]
519
+ [3]
520
+ NDJSON
521
+
522
+ stdout, stderr, status = run_jrf('map { |x| sum(x) }', input_map_varying)
523
+ assert_success(status, stderr, "map varying lengths")
524
+ assert_equal(['[6,30,200]'], lines(stdout), "map varying lengths output")
525
+
526
+ input_map_values = <<~NDJSON
527
+ {"a":1,"b":10}
528
+ {"a":2,"b":20}
529
+ {"a":3,"b":30}
530
+ NDJSON
531
+
532
+ stdout, stderr, status = run_jrf('map_values { |v| sum(v) }', input_map_values)
533
+ assert_success(status, stderr, "map_values with sum")
534
+ assert_equal(['{"a":6,"b":60}'], lines(stdout), "map_values with sum output")
535
+
536
+ stdout, stderr, status = run_jrf('map_values { |v| min(v) }', input_map_values)
537
+ assert_success(status, stderr, "map_values with min")
538
+ assert_equal(['{"a":1,"b":10}'], lines(stdout), "map_values with min output")
539
+
540
+ input_map_values_varying = <<~NDJSON
541
+ {"a":1}
542
+ {"a":2,"b":20}
543
+ {"a":3,"b":30}
544
+ NDJSON
545
+
546
+ stdout, stderr, status = run_jrf('map_values { |v| sum(v) }', input_map_values_varying)
547
+ assert_success(status, stderr, "map_values varying keys")
548
+ assert_equal(['{"a":6,"b":50}'], lines(stdout), "map_values varying keys output")
549
+
550
+ stdout, stderr, status = run_jrf('map_values { |v| count(v) }', input_map_values)
551
+ assert_success(status, stderr, "map_values with count")
552
+ assert_equal(['{"a":3,"b":3}'], lines(stdout), "map_values with count output")
553
+
554
+ stdout, stderr, status = run_jrf('select(false) >> map { |x| sum(x) }', input_map)
555
+ assert_success(status, stderr, "map no matches")
556
+ assert_equal(['[]'], lines(stdout), "map no matches output")
557
+
558
+ stdout, stderr, status = run_jrf('select(false) >> map_values { |v| sum(v) }', input_map_values)
559
+ assert_success(status, stderr, "map_values no matches")
560
+ assert_equal(['{}'], lines(stdout), "map_values no matches output")
561
+
562
+ stdout, stderr, status = run_jrf('map_values { |v| sum(v) } >> map_values { |v| v * 10 }', input_map_values)
563
+ assert_success(status, stderr, "map_values piped to map_values passthrough")
564
+ assert_equal(['{"a":60,"b":600}'], lines(stdout), "map_values piped output")
565
+
566
+ input_gb = <<~NDJSON
567
+ {"status":200,"path":"/a","latency":10}
568
+ {"status":404,"path":"/b","latency":50}
569
+ {"status":200,"path":"/c","latency":30}
570
+ {"status":200,"path":"/d","latency":20}
571
+ NDJSON
572
+
573
+ stdout, stderr, status = run_jrf('group_by(_["status"]) { count() }', input_gb)
574
+ assert_success(status, stderr, "group_by with count")
575
+ assert_equal(['{"200":3,"404":1}'], lines(stdout), "group_by with count output")
576
+
577
+ stdout, stderr, status = run_jrf('group_by(_["status"]) { sum(_["latency"]) }', input_gb)
578
+ assert_success(status, stderr, "group_by with sum")
579
+ assert_equal(['{"200":60,"404":50}'], lines(stdout), "group_by with sum output")
580
+
581
+ stdout, stderr, status = run_jrf('group_by(_["status"]) { average(_["latency"]) }', input_gb)
582
+ assert_success(status, stderr, "group_by with average")
583
+ result = JSON.parse(lines(stdout).first)
584
+ assert_float_close(20.0, result["200"], 1e-12, "group_by average 200")
585
+ assert_float_close(50.0, result["404"], 1e-12, "group_by average 404")
586
+
587
+ stdout, stderr, status = run_jrf('group_by(_["status"])', input_gb)
588
+ assert_success(status, stderr, "group_by default (collect rows)")
589
+ result = JSON.parse(lines(stdout).first)
590
+ assert_equal(3, result["200"].length, "group_by default 200 count")
591
+ assert_equal(1, result["404"].length, "group_by default 404 count")
592
+ assert_equal("/a", result["200"][0]["path"], "group_by default first row")
593
+
594
+ stdout, stderr, status = run_jrf('group_by(_["status"]) { group(_["path"]) }', input_gb)
595
+ assert_success(status, stderr, "group_by with group(expr)")
596
+ assert_equal(['{"200":["/a","/c","/d"],"404":["/b"]}'], lines(stdout), "group_by with group(expr) output")
597
+
598
+ stdout, stderr, status = run_jrf('group_by(_["status"]) { min(_["latency"]) }', input_gb)
599
+ assert_success(status, stderr, "group_by with min")
600
+ assert_equal(['{"200":10,"404":50}'], lines(stdout), "group_by with min output")
601
+
602
+ stdout, stderr, status = run_jrf('group_by(_["status"]) { {total: sum(_["latency"]), n: count()} }', input_gb)
603
+ assert_success(status, stderr, "group_by with multi-reducer")
604
+ assert_equal(['{"200":{"total":60,"n":3},"404":{"total":50,"n":1}}'], lines(stdout), "group_by multi-reducer output")
605
+
606
+ stdout, stderr, status = run_jrf('select(false) >> group_by(_["status"]) { count() }', input_gb)
607
+ assert_success(status, stderr, "group_by no matches")
608
+ assert_equal(['{}'], lines(stdout), "group_by no matches output")
609
+
610
+ stdout, stderr, status = run_jrf('group_by(_["status"]) { count() } >> _[200]', input_gb)
611
+ assert_success(status, stderr, "group_by then extract")
612
+ assert_equal(%w[3], lines(stdout), "group_by then extract output")
613
+
342
614
  puts "ok"
metadata CHANGED
@@ -1,14 +1,28 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jrf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - kazuho
8
8
  bindir: exe
9
9
  cert_chain: []
10
10
  date: 1980-01-02 00:00:00.000000000 Z
11
- dependencies: []
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: oj
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '3.16'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '3.16'
12
26
  description: jrf is a JSON filter with the power and speed of Ruby. It lets you write
13
27
  transforms as Ruby expressions, so you can use arbitrary Ruby logic. It supports
14
28
  extraction, filtering, flattening, sorting, and aggregation in stage pipelines.
@@ -31,6 +45,7 @@ files:
31
45
  - lib/jrf/reducers.rb
32
46
  - lib/jrf/row_context.rb
33
47
  - lib/jrf/runner.rb
48
+ - lib/jrf/stage.rb
34
49
  - lib/jrf/version.rb
35
50
  - test/jrf_test.rb
36
51
  licenses: