jrf 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 34475ad560159e50a8f6ea6dbfae40dc57173b40df31065f4b222abaafe66012
4
- data.tar.gz: 22c046afd9f4fba04788f08796f9ccfe24b21a87522820c8e5873f164de8cc53
3
+ metadata.gz: 9ce648c2afbfe10dc161b08badb05acdb411baf839dde77433927380b6bb7439
4
+ data.tar.gz: 6be5a0851eecd3cfcbe93aff1cb8fdd163a84dd96a7b12e440fc514db03f67a0
5
5
  SHA512:
6
- metadata.gz: 7d90e4a754ae7ca9170db6c7221571cb90077bbd48d6cd55cbefd29342afa89996075c86a3bf645dac94b337b91eceefa036968f490c30bacf52744a319d238f
7
- data.tar.gz: 3d00c51e46a07f63e1d44b8f2013663dd66d3b2f3393046a00a3c26a5f1cb3dd4eabc2db82eacb12ff874625835f49a63e358baaf44639d38b8a9e01a6c3b06d
6
+ metadata.gz: aa4dfead95dbe09453ec720cdbcf77ba4c7e3f1047c60f51d4ff54724dfa540bb1dbd5630ecb07d09d745e1e61e4c236f50f4407ff6d4c17dd5431b385679f57
7
+ data.tar.gz: 03c3f5dd3f36675a2bc31981effc506bb1822bb170e754785ccffe077becdd5af13421b4cbfd18fea1c1262f06feef61561be3e3243ca0379e1e6af21ad003c5
data/Rakefile CHANGED
@@ -8,8 +8,3 @@ Rake::TestTask.new do |t|
8
8
  end
9
9
 
10
10
  task default: :test
11
-
12
- desc "Build man/jrf.1 from README.md"
13
- task :man do
14
- ruby "script/build_man_from_readme.rb"
15
- end
data/lib/jrf/cli.rb CHANGED
@@ -4,16 +4,17 @@ require_relative "runner"
4
4
 
5
5
  module Jrf
6
6
  class CLI
7
- USAGE = "usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'"
7
+ USAGE = "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'"
8
8
 
9
9
  HELP_TEXT = <<~'TEXT'
10
- usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'
10
+ usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'
11
11
 
12
12
  JSON filter with the power and speed of Ruby.
13
13
 
14
14
  Options:
15
15
  -v, --verbose print parsed stage expressions
16
16
  --lax allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
17
+ -p, --pretty pretty-print JSON output instead of compact NDJSON
17
18
  -h, --help show this help and exit
18
19
 
19
20
  Pipeline:
@@ -28,13 +29,13 @@ module Jrf
28
29
  jrf '_["msg"] >> reduce(nil) { |acc, v| acc ? "#{acc} #{v}" : v }'
29
30
 
30
31
  See Also:
31
- README.md
32
- man jrf
32
+ https://github.com/kazuho/jrf#readme
33
33
  TEXT
34
34
 
35
35
  def self.run(argv = ARGV, input: ARGF, out: $stdout, err: $stderr)
36
36
  verbose = false
37
37
  lax = false
38
+ pretty = false
38
39
 
39
40
  while argv.first&.start_with?("-")
40
41
  case argv.first
@@ -44,6 +45,9 @@ module Jrf
44
45
  when "--lax"
45
46
  lax = true
46
47
  argv.shift
48
+ when "-p", "--pretty"
49
+ pretty = true
50
+ argv.shift
47
51
  when "-h", "--help"
48
52
  out.puts HELP_TEXT
49
53
  return 0
@@ -60,7 +64,7 @@ module Jrf
60
64
  end
61
65
 
62
66
  expression = argv.shift
63
- Runner.new(input: input, out: out, err: err, lax: lax).run(expression, verbose: verbose)
67
+ Runner.new(input: input, out: out, err: err, lax: lax, pretty: pretty).run(expression, verbose: verbose)
64
68
  0
65
69
  end
66
70
  end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "control"
4
+ require_relative "row_context"
5
+ require_relative "stage"
6
+
7
+ module Jrf
8
+ class Pipeline
9
+ def initialize(*blocks)
10
+ raise ArgumentError, "at least one stage block is required" if blocks.empty?
11
+
12
+ @ctx = RowContext.new
13
+ @stages = blocks.map { |block| Stage.new(@ctx, block, src: nil) }
14
+ end
15
+
16
+ # Run the pipeline on an enumerable of input values.
17
+ #
18
+ # Without a block, returns an Array of output values.
19
+ # With a block, streams each output value to the block.
20
+ #
21
+ # @param input [Enumerable] input values to process
22
+ # @yieldparam value output value
23
+ # @return [Array, nil] output values (without block), or nil (with block)
24
+ def call(input, &on_output)
25
+ if on_output
26
+ call_streaming(input, &on_output)
27
+ else
28
+ results = []
29
+ call_streaming(input) { |v| results << v }
30
+ results
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ def call_streaming(input, &on_output)
37
+ error = nil
38
+ begin
39
+ input.each { |value| process_value(value, @stages, &on_output) }
40
+ rescue StandardError => e
41
+ error = e
42
+ ensure
43
+ flush_reducers(@stages, &on_output)
44
+ end
45
+ raise error if error
46
+ end
47
+
48
+ def process_value(input, stages, &on_output)
49
+ current_values = [input]
50
+
51
+ stages.each do |stage|
52
+ next_values = []
53
+
54
+ current_values.each do |value|
55
+ out = stage.call(value)
56
+ if out.equal?(Control::DROPPED)
57
+ next
58
+ elsif out.is_a?(Control::Flat)
59
+ unless out.value.is_a?(Array)
60
+ raise TypeError, "flat expects Array, got #{out.value.class}"
61
+ end
62
+ next_values.concat(out.value)
63
+ else
64
+ next_values << out
65
+ end
66
+ end
67
+
68
+ return if next_values.empty?
69
+ current_values = next_values
70
+ end
71
+
72
+ current_values.each(&on_output)
73
+ end
74
+
75
+ def flush_reducers(stages, &on_output)
76
+ stages.each_with_index do |stage, idx|
77
+ rows = stage.finish
78
+ next if rows.empty?
79
+
80
+ rest = stages.drop(idx + 1)
81
+ rows.each { |value| process_value(value, rest, &on_output) }
82
+ end
83
+ end
84
+ end
85
+ end
@@ -143,9 +143,7 @@ module Jrf
143
143
  else
144
144
  ->(values) {
145
145
  sorted = values.sort
146
- percentages.map do |p|
147
- { "percentile" => p, "value" => ctx.send(:percentile_value, sorted, p) }
148
- end
146
+ [percentages.map { |p| ctx.send(:percentile_value, sorted, p) }]
149
147
  }
150
148
  end
151
149
 
data/lib/jrf/runner.rb CHANGED
@@ -1,37 +1,19 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "json"
4
- require_relative "control"
4
+ require_relative "pipeline"
5
5
  require_relative "pipeline_parser"
6
- require_relative "reducers"
7
- require_relative "row_context"
8
- require_relative "stage"
9
6
 
10
7
  module Jrf
11
8
  class Runner
12
9
  RS_CHAR = "\x1e"
13
10
 
14
- class ProbeValue
15
- def [](key)
16
- self
17
- end
18
-
19
- def method_missing(name, *args, &block)
20
- self
21
- end
22
-
23
- def respond_to_missing?(name, include_private = false)
24
- true
25
- end
26
- end
27
-
28
- PROBE_VALUE = ProbeValue.new
29
-
30
- def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false)
11
+ def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false, pretty: false)
31
12
  @input = input
32
13
  @out = out
33
14
  @err = err
34
15
  @lax = lax
16
+ @pretty = pretty
35
17
  end
36
18
 
37
19
  def run(expression, verbose: false)
@@ -39,53 +21,19 @@ module Jrf
39
21
  stages = parsed[:stages]
40
22
  dump_stages(stages) if verbose
41
23
 
42
- ctx = RowContext.new
43
- compiled = compile_stages(stages, ctx)
44
- compiled.each { |stage| stage.call(PROBE_VALUE, probing: true) rescue nil }
45
- error = nil
24
+ blocks = stages.map { |stage|
25
+ eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
26
+ }
27
+ pipeline = Pipeline.new(*blocks)
46
28
 
47
- begin
48
- each_input_value do |value|
49
- process_value(value, compiled)
50
- end
51
- rescue StandardError => e
52
- error = e
53
- ensure
54
- flush_reducers(compiled)
29
+ input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
30
+ pipeline.call(input_enum) do |value|
31
+ @out.puts(@pretty ? JSON.pretty_generate(value) : JSON.generate(value))
55
32
  end
56
-
57
- raise error if error
58
33
  end
59
34
 
60
35
  private
61
36
 
62
- def process_value(input, stages)
63
- current_values = [input]
64
-
65
- stages.each do |stage|
66
- next_values = []
67
-
68
- current_values.each do |value|
69
- out = stage.call(value)
70
- if out.equal?(Control::DROPPED)
71
- next
72
- elsif out.is_a?(Control::Flat)
73
- unless out.value.is_a?(Array)
74
- raise TypeError, "flat expects Array, got #{out.value.class}"
75
- end
76
- next_values.concat(out.value)
77
- else
78
- next_values << out
79
- end
80
- end
81
-
82
- return if next_values.empty?
83
- current_values = next_values
84
- end
85
-
86
- current_values.each { |value| @out.puts JSON.generate(value) }
87
- end
88
-
89
37
  def each_input_value
90
38
  return each_input_value_lax { |value| yield value } if @lax
91
39
 
@@ -124,33 +72,10 @@ module Jrf
124
72
  raise JSON::ParserError, e.message
125
73
  end
126
74
 
127
- def compile_stages(stages, ctx)
128
- mod = Module.new
129
-
130
- stages.each_with_index.map do |stage, i|
131
- method_name = :"__jrf_stage_#{i}"
132
- mod.module_eval("def #{method_name}; #{stage[:src]}; end", "(jrf stage #{i})", 1)
133
- Stage.new(ctx, method_name, src: stage[:src])
134
- end.tap { ctx.extend(mod) }
135
- end
136
-
137
75
  def dump_stages(stages)
138
76
  stages.each_with_index do |stage, i|
139
77
  @err.puts "stage[#{i}]: #{stage[:src]}"
140
78
  end
141
79
  end
142
-
143
- def flush_reducers(stages)
144
- tail = stages
145
- loop do
146
- idx = tail.index(&:reducer?)
147
- break unless idx
148
-
149
- rows = tail[idx].finish
150
- rest = tail.drop(idx + 1)
151
- rows.each { |value| process_value(value, rest) }
152
- tail = rest
153
- end
154
- end
155
80
  end
156
81
  end
data/lib/jrf/stage.rb CHANGED
@@ -7,7 +7,7 @@ module Jrf
7
7
  class Stage
8
8
  ReducerToken = Struct.new(:index)
9
9
 
10
- attr_reader :method_name, :src
10
+ attr_reader :src
11
11
 
12
12
  def self.resolve_template(template, reducers)
13
13
  if template.is_a?(ReducerToken)
@@ -22,28 +22,27 @@ module Jrf
22
22
  end
23
23
  end
24
24
 
25
- def initialize(ctx, method_name, src: nil)
25
+ def initialize(ctx, block, src: nil)
26
26
  @ctx = ctx
27
- @method_name = method_name
27
+ @block = block
28
28
  @src = src
29
29
  @reducers = []
30
30
  @cursor = 0
31
31
  @template = nil
32
32
  @mode = nil # nil=unknown, :reducer, :passthrough
33
- @probing = false
33
+ @map_transforms = {}
34
34
  end
35
35
 
36
- def call(input, probing: false)
36
+ def call(input)
37
37
  @ctx.reset(input)
38
38
  @cursor = 0
39
- @probing = probing
40
39
  @ctx.__jrf_current_stage = self
41
- result = @ctx.public_send(@method_name)
40
+ result = @ctx.instance_eval(&@block)
42
41
 
43
42
  if @mode.nil? && @reducers.any?
44
43
  @mode = :reducer
45
44
  @template = result
46
- elsif @mode.nil? && !probing
45
+ elsif @mode.nil?
47
46
  @mode = :passthrough
48
47
  end
49
48
 
@@ -54,43 +53,58 @@ module Jrf
54
53
  idx = @cursor
55
54
  finish_rows = finish || ->(acc) { [acc] }
56
55
  @reducers[idx] ||= Reducers.reduce(initial, finish: finish_rows, &step_fn)
57
- @reducers[idx].step(value) unless @probing
56
+ @reducers[idx].step(value)
58
57
  @cursor += 1
59
58
  ReducerToken.new(idx)
60
59
  end
61
60
 
62
61
  def allocate_map(type, collection, &block)
63
62
  idx = @cursor
63
+ @cursor += 1
64
+
65
+ # Transformation mode (detected on first call)
66
+ if @map_transforms[idx]
67
+ case type
68
+ when :array then return collection.map(&block)
69
+ when :hash then return collection.transform_values(&block)
70
+ end
71
+ end
72
+
64
73
  map_reducer = (@reducers[idx] ||= MapReducer.new(type))
65
74
 
66
- unless @probing
67
- saved_obj = @ctx._
75
+ case type
76
+ when :array
77
+ raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
78
+ collection.each_with_index do |v, i|
79
+ slot = map_reducer.slot(i)
80
+ with_scoped_reducers(slot.reducers) do
81
+ result = block.call(v)
82
+ slot.template ||= result
83
+ end
84
+ end
85
+ when :hash
86
+ raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
87
+ collection.each do |k, v|
88
+ slot = map_reducer.slot(k)
89
+ with_scoped_reducers(slot.reducers) do
90
+ result = block.call(v)
91
+ slot.template ||= result
92
+ end
93
+ end
94
+ end
68
95
 
96
+ # Detect transformation: no reducers were allocated in any slot
97
+ if @mode.nil? && map_reducer.slots.values.all? { |s| s.reducers.empty? }
98
+ @map_transforms[idx] = true
99
+ @reducers[idx] = nil
69
100
  case type
70
101
  when :array
71
- raise TypeError, "map expects Array, got #{collection.class}" unless collection.is_a?(Array)
72
- collection.each_with_index do |v, i|
73
- @ctx.reset(v)
74
- with_scoped_reducers(map_reducer.slots[i] ||= []) do
75
- result = block.call(v)
76
- map_reducer.templates[i] ||= result
77
- end
78
- end
102
+ return map_reducer.slots.sort_by { |k, _| k }.map { |_, s| s.template }
79
103
  when :hash
80
- raise TypeError, "map_values expects Hash, got #{collection.class}" unless collection.is_a?(Hash)
81
- collection.each do |k, v|
82
- @ctx.reset(v)
83
- with_scoped_reducers(map_reducer.slots[k] ||= []) do
84
- result = block.call(v)
85
- map_reducer.templates[k] ||= result
86
- end
87
- end
104
+ return map_reducer.slots.transform_values(&:template)
88
105
  end
89
-
90
- @ctx.reset(saved_obj)
91
106
  end
92
107
 
93
- @cursor += 1
94
108
  ReducerToken.new(idx)
95
109
  end
96
110
 
@@ -98,22 +112,17 @@ module Jrf
98
112
  idx = @cursor
99
113
  map_reducer = (@reducers[idx] ||= MapReducer.new(:hash))
100
114
 
101
- unless @probing
102
- slot = (map_reducer.slots[key] ||= [])
103
- with_scoped_reducers(slot) do
104
- result = block.call
105
- map_reducer.templates[key] ||= result
106
- end
115
+ row = @ctx._
116
+ slot = map_reducer.slot(key)
117
+ with_scoped_reducers(slot.reducers) do
118
+ result = block.call(row)
119
+ slot.template ||= result
107
120
  end
108
121
 
109
122
  @cursor += 1
110
123
  ReducerToken.new(idx)
111
124
  end
112
125
 
113
- def reducer?
114
- @mode == :reducer
115
- end
116
-
117
126
  def finish
118
127
  return [] unless @mode == :reducer && @reducers.any?
119
128
 
@@ -138,25 +147,38 @@ module Jrf
138
147
  end
139
148
 
140
149
  class MapReducer
141
- attr_reader :slots, :templates
150
+ attr_reader :slots
142
151
 
143
152
  def initialize(type)
144
153
  @type = type
145
154
  @slots = {}
146
- @templates = {}
155
+ end
156
+
157
+ def slot(key)
158
+ @slots[key] ||= SlotState.new
147
159
  end
148
160
 
149
161
  def finish
150
162
  case @type
151
163
  when :array
152
164
  keys = @slots.keys.sort
153
- [keys.map { |k| Stage.resolve_template(@templates[k], @slots[k]) }]
165
+ [keys.map { |k| Stage.resolve_template(@slots[k].template, @slots[k].reducers) }]
154
166
  when :hash
155
167
  result = {}
156
- @slots.each { |k, reducers| result[k] = Stage.resolve_template(@templates[k], reducers) }
168
+ @slots.each { |k, s| result[k] = Stage.resolve_template(s.template, s.reducers) }
157
169
  [result]
158
170
  end
159
171
  end
172
+
173
+ class SlotState
174
+ attr_reader :reducers
175
+ attr_accessor :template
176
+
177
+ def initialize
178
+ @reducers = []
179
+ @template = nil
180
+ end
181
+ end
160
182
  end
161
183
  end
162
184
  end
data/lib/jrf/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Jrf
4
- VERSION = "0.1.3"
4
+ VERSION = "0.1.4"
5
5
  end
data/lib/jrf.rb CHANGED
@@ -2,3 +2,21 @@
2
2
 
3
3
  require_relative "jrf/version"
4
4
  require_relative "jrf/cli"
5
+ require_relative "jrf/pipeline"
6
+
7
+ module Jrf
8
+ # Create a pipeline from one or more stage blocks.
9
+ #
10
+ # Each block is evaluated in a context where +_+ is the current value.
11
+ # All jrf built-in functions (+select+, +sum+, +map+, +group_by+, etc.)
12
+ # are available inside blocks. See https://github.com/kazuho/jrf#readme for the full list.
13
+ #
14
+ # @param blocks [Array<Proc>] one or more stage procs
15
+ # @return [Pipeline] a callable pipeline
16
+ # @example
17
+ # j = Jrf.new(proc { select(_["x"] > 10) }, proc { sum(_["x"]) })
18
+ # j.call([{"x" => 20}, {"x" => 30}]) # => [50]
19
+ def self.new(*blocks)
20
+ Pipeline.new(*blocks)
21
+ end
22
+ end
data/test/jrf_test.rb CHANGED
@@ -92,15 +92,15 @@ assert_includes(stderr, 'stage[1]: _["hello"]')
92
92
 
93
93
  stdout, stderr, status = Open3.capture3("./exe/jrf", "--help")
94
94
  assert_success(status, stderr, "help option")
95
- assert_includes(stdout, "usage: jrf [-v] [--lax] [--help] 'STAGE >> STAGE >> ...'")
95
+ assert_includes(stdout, "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'")
96
96
  assert_includes(stdout, "JSON filter with the power and speed of Ruby.")
97
97
  assert_includes(stdout, "--lax")
98
+ assert_includes(stdout, "--pretty")
98
99
  assert_includes(stdout, "Pipeline:")
99
100
  assert_includes(stdout, "Connect stages with top-level >>.")
100
101
  assert_includes(stdout, "The current value in each stage is available as _.")
101
102
  assert_includes(stdout, "See Also:")
102
- assert_includes(stdout, "README.md")
103
- assert_includes(stdout, "man jrf")
103
+ assert_includes(stdout, "https://github.com/kazuho/jrf#readme")
104
104
  assert_equal([], lines(stderr), "help stderr output")
105
105
 
106
106
  stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "--verbose")
@@ -108,6 +108,21 @@ assert_success(status, stderr, "dump stages verbose alias")
108
108
  assert_equal(%w[123], lines(stdout), "dump stages verbose alias output")
109
109
  assert_includes(stderr, 'stage[0]: select(_["hello"] == 123)')
110
110
 
111
+ stdout, stderr, status = run_jrf('_', input_hello, "--pretty")
112
+ assert_success(status, stderr, "pretty output")
113
+ assert_equal(
114
+ [
115
+ "{",
116
+ "\"hello\": 123",
117
+ "}",
118
+ "{",
119
+ "\"hello\": 456",
120
+ "}"
121
+ ],
122
+ lines(stdout),
123
+ "pretty output lines"
124
+ )
125
+
111
126
  input_regex = <<~NDJSON
112
127
  {"foo":{"bar":"ok"},"x":50}
113
128
  {"foo":{"bar":"ng"},"x":70}
@@ -194,6 +209,10 @@ stdout, stderr, status = run_jrf('select(_["x"] > 10) >> sum(_["foo"])', input_s
194
209
  assert_success(status, stderr, "select + sum")
195
210
  assert_equal(%w[9], lines(stdout), "select + sum output")
196
211
 
212
+ stdout, stderr, status = run_jrf('{total: sum(_["foo"]), n: count()}', input_sum)
213
+ assert_success(status, stderr, "structured reducer result")
214
+ assert_equal(['{"total":10,"n":4}'], lines(stdout), "structured reducer result output")
215
+
197
216
  stdout, stderr, status = run_jrf('average(_["foo"])', input_sum)
198
217
  assert_success(status, stderr, "average")
199
218
  assert_float_close(2.5, lines(stdout).first.to_f, 1e-12, "average output")
@@ -206,33 +225,37 @@ stdout, stderr, status = run_jrf('_["foo"] >> sum(_ * 2)', input_sum)
206
225
  assert_success(status, stderr, "extract + sum")
207
226
  assert_equal(%w[20], lines(stdout), "extract + sum output")
208
227
 
228
+ stdout, stderr, status = run_jrf('sum(2 * _["foo"])', input_sum)
229
+ assert_success(status, stderr, "sum with literal on left")
230
+ assert_equal(%w[20], lines(stdout), "sum with literal on left output")
231
+
209
232
  stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> sum(_["foo"])', input_sum)
210
233
  assert_success(status, stderr, "sum no matches")
211
- assert_equal(%w[0], lines(stdout), "sum no matches output")
234
+ assert_equal([], lines(stdout), "sum no matches output")
212
235
 
213
236
  stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> count()', input_sum)
214
237
  assert_success(status, stderr, "count no matches")
215
- assert_equal(%w[0], lines(stdout), "count no matches output")
238
+ assert_equal([], lines(stdout), "count no matches output")
216
239
 
217
240
  stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> count(_["foo"])', input_sum)
218
241
  assert_success(status, stderr, "count(expr) no matches")
219
- assert_equal(%w[0], lines(stdout), "count(expr) no matches output")
242
+ assert_equal([], lines(stdout), "count(expr) no matches output")
220
243
 
221
244
  stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> average(_["foo"])', input_sum)
222
245
  assert_success(status, stderr, "average no matches")
223
- assert_equal(%w[null], lines(stdout), "average no matches output")
246
+ assert_equal([], lines(stdout), "average no matches output")
224
247
 
225
248
  stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> stdev(_["foo"])', input_sum)
226
249
  assert_success(status, stderr, "stdev no matches")
227
- assert_equal(%w[null], lines(stdout), "stdev no matches output")
250
+ assert_equal([], lines(stdout), "stdev no matches output")
228
251
 
229
252
  stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> min(_["foo"])', input_sum)
230
253
  assert_success(status, stderr, "min no matches")
231
- assert_equal(%w[null], lines(stdout), "min no matches output")
254
+ assert_equal([], lines(stdout), "min no matches output")
232
255
 
233
256
  stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> max(_["foo"])', input_sum)
234
257
  assert_success(status, stderr, "max no matches")
235
- assert_equal(%w[null], lines(stdout), "max no matches output")
258
+ assert_equal([], lines(stdout), "max no matches output")
236
259
 
237
260
  stdout, stderr, status = run_jrf('sum(_["foo"]) >> _ + 1', input_sum)
238
261
  assert_success(status, stderr, "reduce in middle")
@@ -274,7 +297,7 @@ assert_equal([], lines(stdout), "sort no matches output")
274
297
 
275
298
  stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> _["foo"] >> group', input_sum)
276
299
  assert_success(status, stderr, "group no matches")
277
- assert_equal(['[]'], lines(stdout), "group no matches output")
300
+ assert_equal([], lines(stdout), "group no matches output")
278
301
 
279
302
  input_group_multi = <<~NDJSON
280
303
  {"x":1,"y":"a"}
@@ -288,7 +311,7 @@ assert_equal(['{"a":[1,2,3],"b":["a","b","c"]}'], lines(stdout), "group in hash
288
311
 
289
312
  stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> {a: group(_["x"]), b: group(_["y"])}', input_group_multi)
290
313
  assert_success(status, stderr, "group in hash no matches")
291
- assert_equal(['{"a":[],"b":[]}'], lines(stdout), "group in hash no-match output")
314
+ assert_equal([], lines(stdout), "group in hash no-match output")
292
315
 
293
316
  stdout, stderr, status = run_jrf('percentile(_["foo"], 0.50)', input_sum)
294
317
  assert_success(status, stderr, "single percentile")
@@ -297,7 +320,7 @@ assert_equal(%w[2], lines(stdout), "single percentile output")
297
320
  stdout, stderr, status = run_jrf('percentile(_["foo"], [0.25, 0.50, 1.0])', input_sum)
298
321
  assert_success(status, stderr, "array percentile")
299
322
  assert_equal(
300
- ['{"percentile":0.25,"value":1}', '{"percentile":0.5,"value":2}', '{"percentile":1.0,"value":4}'],
323
+ ['[1,2,4]'],
301
324
  lines(stdout),
302
325
  "array percentile output"
303
326
  )
@@ -332,7 +355,7 @@ assert_float_close(1.0, lines(stdout).first.to_f, 1e-12, "stdev ignores nil outp
332
355
  stdout, stderr, status = run_jrf('percentile(_["foo"], [0.5, 1.0])', input_with_nil)
333
356
  assert_success(status, stderr, "percentile ignores nil")
334
357
  assert_equal(
335
- ['{"percentile":0.5,"value":1}', '{"percentile":1.0,"value":3}'],
358
+ ['[1,3]'],
336
359
  lines(stdout),
337
360
  "percentile ignores nil output"
338
361
  )
@@ -388,7 +411,7 @@ NDJSON
388
411
  stdout, stderr, status = run_jrf('{a: percentile(_["a"], [0.25, 0.50, 1.0]), b: percentile(_["b"], [0.25, 0.50, 1.0])}', input_multi_cols)
389
412
  assert_success(status, stderr, "nested array percentile for multiple columns")
390
413
  assert_equal(
391
- ['{"a":[{"percentile":0.25,"value":1},{"percentile":0.5,"value":2},{"percentile":1.0,"value":4}],"b":[{"percentile":0.25,"value":10},{"percentile":0.5,"value":20},{"percentile":1.0,"value":40}]}'],
414
+ ['{"a":[1,2,4],"b":[10,20,40]}'],
392
415
  lines(stdout),
393
416
  "nested array percentile output"
394
417
  )
@@ -513,6 +536,10 @@ stdout, stderr, status = run_jrf('_["values"] >> map { |x| max(x) }', input_map)
513
536
  assert_success(status, stderr, "map with max")
514
537
  assert_equal(['[3,30,300]'], lines(stdout), "map with max output")
515
538
 
539
+ stdout, stderr, status = run_jrf('_["values"] >> map { |x| sum(_[0] + x) }', input_map)
540
+ assert_success(status, stderr, "map keeps ambient _")
541
+ assert_equal(['[12,66,606]'], lines(stdout), "map ambient _ output")
542
+
516
543
  input_map_varying = <<~NDJSON
517
544
  [1,10]
518
545
  [2,20,200]
@@ -551,18 +578,35 @@ stdout, stderr, status = run_jrf('map_values { |v| count(v) }', input_map_values
551
578
  assert_success(status, stderr, "map_values with count")
552
579
  assert_equal(['{"a":3,"b":3}'], lines(stdout), "map_values with count output")
553
580
 
581
+ stdout, stderr, status = run_jrf('map_values { |v| sum(_["a"] + v) }', input_map_values)
582
+ assert_success(status, stderr, "map_values keeps ambient _")
583
+ assert_equal(['{"a":12,"b":66}'], lines(stdout), "map_values ambient _ output")
584
+
554
585
  stdout, stderr, status = run_jrf('select(false) >> map { |x| sum(x) }', input_map)
555
586
  assert_success(status, stderr, "map no matches")
556
- assert_equal(['[]'], lines(stdout), "map no matches output")
587
+ assert_equal([], lines(stdout), "map no matches output")
557
588
 
558
589
  stdout, stderr, status = run_jrf('select(false) >> map_values { |v| sum(v) }', input_map_values)
559
590
  assert_success(status, stderr, "map_values no matches")
560
- assert_equal(['{}'], lines(stdout), "map_values no matches output")
591
+ assert_equal([], lines(stdout), "map_values no matches output")
561
592
 
562
593
  stdout, stderr, status = run_jrf('map_values { |v| sum(v) } >> map_values { |v| v * 10 }', input_map_values)
563
594
  assert_success(status, stderr, "map_values piped to map_values passthrough")
564
595
  assert_equal(['{"a":60,"b":600}'], lines(stdout), "map_values piped output")
565
596
 
597
+ # map/map_values transformation (no reducers)
598
+ stdout, stderr, status = run_jrf('_["values"] >> map { |x| x + 1 }', input_map)
599
+ assert_success(status, stderr, "map transform")
600
+ assert_equal(['[2,11,101]', '[3,21,201]', '[4,31,301]'], lines(stdout), "map transform output")
601
+
602
+ stdout, stderr, status = run_jrf('map_values { |v| v * 2 }', input_map_values)
603
+ assert_success(status, stderr, "map_values transform")
604
+ assert_equal(['{"a":2,"b":20}', '{"a":4,"b":40}', '{"a":6,"b":60}'], lines(stdout), "map_values transform output")
605
+
606
+ stdout, stderr, status = run_jrf('_["values"] >> map { |x| x + 1 } >> map { |x| x * 10 }', input_map)
607
+ assert_success(status, stderr, "chained map transforms")
608
+ assert_equal(['[20,110,1010]', '[30,210,2010]', '[40,310,3010]'], lines(stdout), "chained map transforms output")
609
+
566
610
  input_gb = <<~NDJSON
567
611
  {"status":200,"path":"/a","latency":10}
568
612
  {"status":404,"path":"/b","latency":50}
@@ -574,11 +618,11 @@ stdout, stderr, status = run_jrf('group_by(_["status"]) { count() }', input_gb)
574
618
  assert_success(status, stderr, "group_by with count")
575
619
  assert_equal(['{"200":3,"404":1}'], lines(stdout), "group_by with count output")
576
620
 
577
- stdout, stderr, status = run_jrf('group_by(_["status"]) { sum(_["latency"]) }', input_gb)
621
+ stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| sum(row["latency"]) }', input_gb)
578
622
  assert_success(status, stderr, "group_by with sum")
579
623
  assert_equal(['{"200":60,"404":50}'], lines(stdout), "group_by with sum output")
580
624
 
581
- stdout, stderr, status = run_jrf('group_by(_["status"]) { average(_["latency"]) }', input_gb)
625
+ stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| average(row["latency"]) }', input_gb)
582
626
  assert_success(status, stderr, "group_by with average")
583
627
  result = JSON.parse(lines(stdout).first)
584
628
  assert_float_close(20.0, result["200"], 1e-12, "group_by average 200")
@@ -591,24 +635,87 @@ assert_equal(3, result["200"].length, "group_by default 200 count")
591
635
  assert_equal(1, result["404"].length, "group_by default 404 count")
592
636
  assert_equal("/a", result["200"][0]["path"], "group_by default first row")
593
637
 
594
- stdout, stderr, status = run_jrf('group_by(_["status"]) { group(_["path"]) }', input_gb)
638
+ stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| group(row["path"]) }', input_gb)
595
639
  assert_success(status, stderr, "group_by with group(expr)")
596
640
  assert_equal(['{"200":["/a","/c","/d"],"404":["/b"]}'], lines(stdout), "group_by with group(expr) output")
597
641
 
598
- stdout, stderr, status = run_jrf('group_by(_["status"]) { min(_["latency"]) }', input_gb)
642
+ stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| min(row["latency"]) }', input_gb)
599
643
  assert_success(status, stderr, "group_by with min")
600
644
  assert_equal(['{"200":10,"404":50}'], lines(stdout), "group_by with min output")
601
645
 
602
- stdout, stderr, status = run_jrf('group_by(_["status"]) { {total: sum(_["latency"]), n: count()} }', input_gb)
646
+ stdout, stderr, status = run_jrf('group_by(_["status"]) { |row| {total: sum(row["latency"]), n: count()} }', input_gb)
603
647
  assert_success(status, stderr, "group_by with multi-reducer")
604
648
  assert_equal(['{"200":{"total":60,"n":3},"404":{"total":50,"n":1}}'], lines(stdout), "group_by multi-reducer output")
605
649
 
606
650
  stdout, stderr, status = run_jrf('select(false) >> group_by(_["status"]) { count() }', input_gb)
607
651
  assert_success(status, stderr, "group_by no matches")
608
- assert_equal(['{}'], lines(stdout), "group_by no matches output")
652
+ assert_equal([], lines(stdout), "group_by no matches output")
609
653
 
610
654
  stdout, stderr, status = run_jrf('group_by(_["status"]) { count() } >> _[200]', input_gb)
611
655
  assert_success(status, stderr, "group_by then extract")
612
656
  assert_equal(%w[3], lines(stdout), "group_by then extract output")
613
657
 
658
+ # === Library API (Jrf.new) ===
659
+
660
+ require_relative "../lib/jrf"
661
+
662
+ # passthrough
663
+ j = Jrf.new(proc { _ })
664
+ assert_equal([{"a" => 1}, {"a" => 2}], j.call([{"a" => 1}, {"a" => 2}]), "library passthrough")
665
+
666
+ # extract
667
+ j = Jrf.new(proc { _["a"] })
668
+ assert_equal([1, 2], j.call([{"a" => 1}, {"a" => 2}]), "library extract")
669
+
670
+ # select + extract (two stages)
671
+ j = Jrf.new(
672
+ proc { select(_["a"] > 1) },
673
+ proc { _["a"] }
674
+ )
675
+ assert_equal([2, 3], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library select + extract")
676
+
677
+ # sum
678
+ j = Jrf.new(proc { sum(_["a"]) })
679
+ assert_equal([6], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library sum")
680
+
681
+ # sum with literal on left
682
+ j = Jrf.new(proc { sum(2 * _["a"]) })
683
+ assert_equal([12], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library sum literal on left")
684
+
685
+ # structured reducers
686
+ j = Jrf.new(proc { {total: sum(_["a"]), n: count()} })
687
+ assert_equal([{total: 6, n: 3}], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library structured reducers")
688
+
689
+ # map transform
690
+ j = Jrf.new(proc { map { |x| x + 1 } })
691
+ assert_equal([[2, 3], [4, 5]], j.call([[1, 2], [3, 4]]), "library map transform")
692
+
693
+ # map reduce
694
+ j = Jrf.new(proc { map { |x| sum(x) } })
695
+ assert_equal([[4, 6]], j.call([[1, 2], [3, 4]]), "library map reduce")
696
+
697
+ # map_values transform
698
+ j = Jrf.new(proc { map_values { |v| v * 10 } })
699
+ assert_equal([{"a" => 10, "b" => 20}], j.call([{"a" => 1, "b" => 2}]), "library map_values transform")
700
+
701
+ # group_by
702
+ j = Jrf.new(proc { group_by(_["k"]) { count() } })
703
+ assert_equal([{"x" => 2, "y" => 1}], j.call([{"k" => "x"}, {"k" => "x"}, {"k" => "y"}]), "library group_by")
704
+
705
+ # reducer then passthrough
706
+ j = Jrf.new(
707
+ proc { sum(_["a"]) },
708
+ proc { _ + 1 }
709
+ )
710
+ assert_equal([7], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library reducer then passthrough")
711
+
712
+ # closure over local variables
713
+ threshold = 2
714
+ j = Jrf.new(proc { select(_["a"] > threshold) })
715
+ assert_equal([{"a" => 3}], j.call([{"a" => 1}, {"a" => 2}, {"a" => 3}]), "library closure")
716
+
717
+ # empty input
718
+ j = Jrf.new(proc { sum(_) })
719
+ assert_equal([], j.call([]), "library empty input")
720
+
614
721
  puts "ok"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jrf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - kazuho
@@ -41,6 +41,7 @@ files:
41
41
  - lib/jrf.rb
42
42
  - lib/jrf/cli.rb
43
43
  - lib/jrf/control.rb
44
+ - lib/jrf/pipeline.rb
44
45
  - lib/jrf/pipeline_parser.rb
45
46
  - lib/jrf/reducers.rb
46
47
  - lib/jrf/row_context.rb