jrf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/test/jrf_test.rb ADDED
@@ -0,0 +1,325 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "open3"
4
+
5
+ def run_jrf(expr, input, *opts)
6
+ Open3.capture3("./exe/jrf", *opts, expr, stdin_data: input)
7
+ end
8
+
9
+ def assert_equal(expected, actual, msg = nil)
10
+ return if expected == actual
11
+
12
+ raise "assert_equal failed#{msg ? " (#{msg})" : ""}\nexpected: #{expected.inspect}\nactual: #{actual.inspect}"
13
+ end
14
+
15
+ def assert_includes(text, fragment, msg = nil)
16
+ return if text.include?(fragment)
17
+
18
+ raise "assert_includes failed#{msg ? " (#{msg})" : ""}\ntext: #{text.inspect}\nfragment: #{fragment.inspect}"
19
+ end
20
+
21
+ def assert_success(status, stderr, msg = nil)
22
+ return if status.success?
23
+
24
+ raise "expected success#{msg ? " (#{msg})" : ""}, got failure\nstderr: #{stderr}"
25
+ end
26
+
27
+ def assert_failure(status, msg = nil)
28
+ return unless status.success?
29
+
30
+ raise "expected failure#{msg ? " (#{msg})" : ""}, got success"
31
+ end
32
+
33
+ def assert_float_close(expected, actual, epsilon = 1e-9, msg = nil)
34
+ return if (expected - actual).abs <= epsilon
35
+
36
+ raise "assert_float_close failed#{msg ? " (#{msg})" : ""}\nexpected: #{expected}\nactual: #{actual}\nepsilon: #{epsilon}"
37
+ end
38
+
39
+ def lines(str)
40
+ str.lines.map(&:strip).reject(&:empty?)
41
+ end
42
+
43
+ File.chmod(0o755, "./exe/jrf")
44
+
45
+ input = <<~NDJSON
46
+ {"foo":1,"x":5}
47
+ {"foo":2,"x":11}
48
+ {"foo":{"bar":"ok"},"x":50}
49
+ {"x":70}
50
+ NDJSON
51
+
52
+ stdout, stderr, status = run_jrf('_["foo"]', input)
53
+ assert_success(status, stderr, "simple extract")
54
+ assert_equal(%w[1 2 {"bar":"ok"} null], lines(stdout), "extract output")
55
+
56
+ input_nested = <<~NDJSON
57
+ {"foo":{"bar":"a"}}
58
+ {"foo":{"bar":"b"}}
59
+ NDJSON
60
+
61
+ stdout, stderr, status = run_jrf('_["foo"]["bar"]', input_nested)
62
+ assert_success(status, stderr, "nested extract")
63
+ assert_equal(%w["a" "b"], lines(stdout), "nested output")
64
+
65
+ stdout, stderr, status = run_jrf('select(_["x"] > 10) >> _["foo"]', input)
66
+ assert_success(status, stderr, "select + extract")
67
+ assert_equal(%w[2 {"bar":"ok"} null], lines(stdout), "filtered output")
68
+
69
+ stdout, stderr, status = run_jrf('select(_["x"] > 10)', input)
70
+ assert_success(status, stderr, "select only")
71
+ assert_equal(
72
+ ['{"foo":2,"x":11}', '{"foo":{"bar":"ok"},"x":50}', '{"x":70}'],
73
+ lines(stdout),
74
+ "select-only output"
75
+ )
76
+
77
+ input_hello = <<~NDJSON
78
+ {"hello":123}
79
+ {"hello":456}
80
+ NDJSON
81
+
82
+ stdout, stderr, status = run_jrf('select(_["hello"] == 123)', input_hello)
83
+ assert_success(status, stderr, "select-only hello")
84
+ assert_equal(['{"hello":123}'], lines(stdout), "select-only hello output")
85
+
86
+ stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "-v")
87
+ assert_success(status, stderr, "dump stages")
88
+ assert_equal(%w[123], lines(stdout), "dump stages output")
89
+ assert_includes(stderr, "stage[0] kind=select")
90
+ assert_includes(stderr, 'original: select(_["hello"] == 123)')
91
+ assert_includes(stderr, 'ruby: (_["hello"] == 123) ? _ : ::Jrf::Control::DROPPED')
92
+ assert_includes(stderr, "stage[1] kind=extract")
93
+ assert_includes(stderr, 'original: _["hello"]')
94
+ assert_includes(stderr, 'ruby: _["hello"]')
95
+
96
+ input_regex = <<~NDJSON
97
+ {"foo":{"bar":"ok"},"x":50}
98
+ {"foo":{"bar":"ng"},"x":70}
99
+ NDJSON
100
+
101
+ stdout, stderr, status = run_jrf('select(/ok/.match(_["foo"]["bar"])) >> _["x"]', input_regex)
102
+ assert_success(status, stderr, "regex in select")
103
+ assert_equal(%w[50], lines(stdout), "regex filter output")
104
+
105
+ input_split = <<~NDJSON
106
+ {"x":1}
107
+ NDJSON
108
+
109
+ stdout, stderr, status = run_jrf('[1 >> 2] >> _', input_split)
110
+ assert_success(status, stderr, "no split inside []")
111
+ assert_equal(['[0]'], lines(stdout), "no split inside [] output")
112
+
113
+ stdout, stderr, status = run_jrf('{a: 1 >> 2} >> _[:a]', input_split)
114
+ assert_success(status, stderr, "no split inside {}")
115
+ assert_equal(%w[0], lines(stdout), "no split inside {} output")
116
+
117
+ stdout, stderr, status = run_jrf('(-> { 1 >> 2 }).call >> _ + 1', input_split)
118
+ assert_success(status, stderr, "no split inside block")
119
+ assert_equal(%w[1], lines(stdout), "no split inside block output")
120
+
121
+ input_flat = <<~NDJSON
122
+ {"items":[1,2]}
123
+ {"items":[3]}
124
+ {"items":[]}
125
+ NDJSON
126
+
127
+ stdout, stderr, status = run_jrf('_["items"] >> flat', input_flat)
128
+ assert_success(status, stderr, "flat basic")
129
+ assert_equal(%w[1 2 3], lines(stdout), "flat basic output")
130
+
131
+ input_flat_hash = <<~NDJSON
132
+ {"items":[{"x":1},{"x":2}]}
133
+ NDJSON
134
+
135
+ stdout, stderr, status = run_jrf('_["items"] >> flat >> _["x"]', input_flat_hash)
136
+ assert_success(status, stderr, "flat then extract")
137
+ assert_equal(%w[1 2], lines(stdout), "flat then extract output")
138
+
139
+ stdout, stderr, status = run_jrf('_["items"] >> flat >> sum(_)', input_flat)
140
+ assert_success(status, stderr, "flat then sum")
141
+ assert_equal(%w[6], lines(stdout), "flat then sum output")
142
+
143
+ stdout, stderr, status = run_jrf('_["items"] >> flat >> group', input_flat)
144
+ assert_success(status, stderr, "flat then group")
145
+ assert_equal(['[1,2,3]'], lines(stdout), "flat then group output")
146
+
147
+ stdout, stderr, status = run_jrf('_["foo"] >> flat', input)
148
+ assert_failure(status, "flat requires array")
149
+ assert_includes(stderr, "flat expects Array")
150
+
151
+ input_sum = <<~NDJSON
152
+ {"foo":1,"x":5}
153
+ {"foo":2,"x":11}
154
+ {"foo":3,"x":50}
155
+ {"foo":4,"x":70}
156
+ NDJSON
157
+
158
+ stdout, stderr, status = run_jrf('sum(_["foo"])', input_sum)
159
+ assert_success(status, stderr, "sum only")
160
+ assert_equal(%w[10], lines(stdout), "sum output")
161
+
162
+ stdout, stderr, status = run_jrf('min(_["foo"])', input_sum)
163
+ assert_success(status, stderr, "min only")
164
+ assert_equal(%w[1], lines(stdout), "min output")
165
+
166
+ stdout, stderr, status = run_jrf('max(_["foo"])', input_sum)
167
+ assert_success(status, stderr, "max only")
168
+ assert_equal(%w[4], lines(stdout), "max output")
169
+
170
+ stdout, stderr, status = run_jrf('select(_["x"] > 10) >> sum(_["foo"])', input_sum)
171
+ assert_success(status, stderr, "select + sum")
172
+ assert_equal(%w[9], lines(stdout), "select + sum output")
173
+
174
+ stdout, stderr, status = run_jrf('average(_["foo"])', input_sum)
175
+ assert_success(status, stderr, "average")
176
+ assert_float_close(2.5, lines(stdout).first.to_f, 1e-12, "average output")
177
+
178
+ stdout, stderr, status = run_jrf('stdev(_["foo"])', input_sum)
179
+ assert_success(status, stderr, "stdev")
180
+ assert_float_close(1.118033988749895, lines(stdout).first.to_f, 1e-12, "stdev output")
181
+
182
+ stdout, stderr, status = run_jrf('_["foo"] >> sum(_ * 2)', input_sum)
183
+ assert_success(status, stderr, "extract + sum")
184
+ assert_equal(%w[20], lines(stdout), "extract + sum output")
185
+
186
+ stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> sum(_["foo"])', input_sum)
187
+ assert_success(status, stderr, "sum no matches")
188
+ assert_equal(%w[0], lines(stdout), "sum no matches output")
189
+
190
+ stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> average(_["foo"])', input_sum)
191
+ assert_success(status, stderr, "average no matches")
192
+ assert_equal(%w[null], lines(stdout), "average no matches output")
193
+
194
+ stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> stdev(_["foo"])', input_sum)
195
+ assert_success(status, stderr, "stdev no matches")
196
+ assert_equal(%w[null], lines(stdout), "stdev no matches output")
197
+
198
+ stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> min(_["foo"])', input_sum)
199
+ assert_success(status, stderr, "min no matches")
200
+ assert_equal(%w[null], lines(stdout), "min no matches output")
201
+
202
+ stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> max(_["foo"])', input_sum)
203
+ assert_success(status, stderr, "max no matches")
204
+ assert_equal(%w[null], lines(stdout), "max no matches output")
205
+
206
+ stdout, stderr, status = run_jrf('sum(_["foo"]) >> _ + 1', input_sum)
207
+ assert_success(status, stderr, "reduce in middle")
208
+ assert_equal(%w[11], lines(stdout), "reduce in middle output")
209
+
210
+ stdout, stderr, status = run_jrf('select(_["x"] > 10) >> _["foo"] >> sum(_ * 2) >> select(_ > 10) >> _ + 1', input_sum)
211
+ assert_success(status, stderr, "reduce mixed with select/extract")
212
+ assert_equal(%w[19], lines(stdout), "reduce mixed output")
213
+
214
+ stdout, stderr, status = run_jrf('_["foo"] >> sum(_) >> _ * 10 >> sum(_)', input_sum)
215
+ assert_success(status, stderr, "multiple reducers")
216
+ assert_equal(%w[100], lines(stdout), "multiple reducers output")
217
+
218
+ stdout, stderr, status = run_jrf('_["foo"] >> min(_) >> _ * 10 >> max(_)', input_sum)
219
+ assert_success(status, stderr, "min/max mixed reducers")
220
+ assert_equal(%w[10], lines(stdout), "min/max mixed reducers output")
221
+
222
+ input_sort_rows = <<~NDJSON
223
+ {"foo":"b","at":2}
224
+ {"foo":"c","at":3}
225
+ {"foo":"a","at":1}
226
+ NDJSON
227
+
228
+ stdout, stderr, status = run_jrf('sort(_["at"]) >> _["foo"]', input_sort_rows)
229
+ assert_success(status, stderr, "sort rows by field")
230
+ assert_equal(%w["a" "b" "c"], lines(stdout), "sort rows by field output")
231
+
232
+ stdout, stderr, status = run_jrf('sort { |a, b| b["at"] <=> a["at"] } >> _["foo"]', input_sort_rows)
233
+ assert_success(status, stderr, "sort rows by comparator")
234
+ assert_equal(%w["c" "b" "a"], lines(stdout), "sort rows by comparator output")
235
+
236
+ stdout, stderr, status = run_jrf('sort(_["at"]) >> _["foo"] >> group', input_sort_rows)
237
+ assert_success(status, stderr, "sort then group")
238
+ assert_equal(['["a","b","c"]'], lines(stdout), "sort then group output")
239
+
240
+ stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> sort(_["x"]) >> _["foo"]', input_sum)
241
+ assert_success(status, stderr, "sort no matches")
242
+ assert_equal([], lines(stdout), "sort no matches output")
243
+
244
+ stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> _["foo"] >> group', input_sum)
245
+ assert_success(status, stderr, "group no matches")
246
+ assert_equal(['[]'], lines(stdout), "group no matches output")
247
+
248
+ input_group_multi = <<~NDJSON
249
+ {"x":1,"y":"a"}
250
+ {"x":2,"y":"b"}
251
+ {"x":3,"y":"c"}
252
+ NDJSON
253
+
254
+ stdout, stderr, status = run_jrf('{a: group(_["x"]), b: group(_["y"])}', input_group_multi)
255
+ assert_success(status, stderr, "group in hash")
256
+ assert_equal(['{"a":[1,2,3],"b":["a","b","c"]}'], lines(stdout), "group in hash output")
257
+
258
+ stdout, stderr, status = run_jrf('select(_["x"] > 1000) >> {a: group(_["x"]), b: group(_["y"])}', input_group_multi)
259
+ assert_success(status, stderr, "group in hash no matches")
260
+ assert_equal(['{"a":[],"b":[]}'], lines(stdout), "group in hash no-match output")
261
+
262
+ stdout, stderr, status = run_jrf('percentile(_["foo"], 0.50)', input_sum)
263
+ assert_success(status, stderr, "single percentile")
264
+ assert_equal(%w[2], lines(stdout), "single percentile output")
265
+
266
+ stdout, stderr, status = run_jrf('percentile(_["foo"], [0.25, 0.50, 1.0])', input_sum)
267
+ assert_success(status, stderr, "array percentile")
268
+ assert_equal(
269
+ ['{"percentile":0.25,"value":1}', '{"percentile":0.5,"value":2}', '{"percentile":1.0,"value":4}'],
270
+ lines(stdout),
271
+ "array percentile output"
272
+ )
273
+
274
+ input_reduce = <<~NDJSON
275
+ {"s":"hello"}
276
+ {"s":"world"}
277
+ {"s":"jrf"}
278
+ NDJSON
279
+
280
+ stdout, stderr, status = run_jrf('_["s"] >> reduce("") { |acc, v| acc.empty? ? v : "#{acc} #{v}" }', input_reduce)
281
+ assert_success(status, stderr, "reduce with implicit value")
282
+ assert_equal(['"hello world jrf"'], lines(stdout), "reduce implicit value output")
283
+
284
+ stdout, stderr, status = run_jrf('_["s"] >> reduce("") { |acc, v| acc.empty? ? v : "#{acc} #{v}" }', input_reduce)
285
+ assert_success(status, stderr, "reduce in two-stage form")
286
+ assert_equal(['"hello world jrf"'], lines(stdout), "reduce in two-stage form output")
287
+
288
+ stdout, stderr, status = run_jrf('sum(_["foo"]) >> select(_ > 100)', input_sum)
289
+ assert_success(status, stderr, "post-reduce select drop")
290
+ assert_equal([], lines(stdout), "post-reduce select drop output")
291
+
292
+ stdout, stderr, status = run_jrf('select(_["x"] > ) >> _["foo"]', "")
293
+ assert_failure(status, "syntax error should fail before row loop")
294
+ assert_includes(stderr, "syntax error")
295
+
296
+ stdout, stderr, status = run_jrf('([)] >> _', "")
297
+ assert_failure(status, "mismatched delimiter should fail")
298
+ assert_includes(stderr, "mismatched delimiter")
299
+
300
+ stdout, stderr, status = run_jrf('(_["x"] >> _["y"]', "")
301
+ assert_failure(status, "unclosed delimiter should fail")
302
+ assert_includes(stderr, "unclosed delimiter")
303
+
304
+ input_broken_tail = <<~NDJSON
305
+ {"foo":1}
306
+ {"foo":2}
307
+ {"foo":
308
+ NDJSON
309
+
310
+ stdout, stderr, status = run_jrf('sum(_["foo"])', input_broken_tail)
311
+ assert_failure(status, "broken input should fail")
312
+ assert_equal(%w[3], lines(stdout), "reducers flush before parse error")
313
+ assert_includes(stderr, "JSON::ParserError")
314
+
315
+ input_chain = <<~NDJSON
316
+ {"foo":{"bar":{"z":1},"keep":true}}
317
+ {"foo":{"bar":{"z":2},"keep":false}}
318
+ {"foo":{"bar":{"z":3},"keep":true}}
319
+ NDJSON
320
+
321
+ stdout, stderr, status = run_jrf('_["foo"] >> select(_["keep"]) >> _["bar"] >> select(_["z"] > 1) >> _["z"]', input_chain)
322
+ assert_success(status, stderr, "select/extract chain")
323
+ assert_equal(%w[3], lines(stdout), "chain output")
324
+
325
+ puts "ok"
metadata ADDED
@@ -0,0 +1,54 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jrf
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - kazuho
8
+ bindir: exe
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies: []
12
+ description: A small, lightweight NDJSON transformer with Ruby-like expressions.
13
+ email:
14
+ - n/a@example.com
15
+ executables:
16
+ - jrf
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - DESIGN.txt
21
+ - Gemfile
22
+ - Rakefile
23
+ - exe/jrf
24
+ - jrf.gemspec
25
+ - lib/jrf.rb
26
+ - lib/jrf/cli.rb
27
+ - lib/jrf/control.rb
28
+ - lib/jrf/pipeline_parser.rb
29
+ - lib/jrf/reducers.rb
30
+ - lib/jrf/row_context.rb
31
+ - lib/jrf/runner.rb
32
+ - lib/jrf/version.rb
33
+ - test/jrf_test.rb
34
+ licenses:
35
+ - MIT
36
+ metadata: {}
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: '3.0'
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ requirements: []
51
+ rubygems_version: 4.0.3
52
+ specification_version: 4
53
+ summary: Small NDJSON transformer with Ruby expressions
54
+ test_files: []