jrf 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e826b340d1d48d7e8cdd3de9b619e5b019f1b568794d0d306b4001f93b5478b8
4
- data.tar.gz: 061102212643ccfd8654c31e3c8b7e48b6f1bc0131dcac77badb0295a7074d84
3
+ metadata.gz: edaeb729eecf63449c68fce92740acb8221885ff28a5ed26fe7bea32eb0a9bfa
4
+ data.tar.gz: 30364ac5e6eee46506da8c4d3ff2d8a05f12cd91b2b5cd248a12e612ea333891
5
5
  SHA512:
6
- metadata.gz: '09883c9cd3ba7f52190df336553cfd45156a90f400a4b87099a86c7e753dab2e07dbe73cfea1c45fb37ca88cb06ebee7f15e286988c0ec42fe526599d897b355'
7
- data.tar.gz: 5d8d0cc2d749c647a3587d2cf8659876a55e8fe8a41e38a9183ad3551b6cca2f16f99ba7c806aebbe0f82afcd5b2c4e026f81fb781f338a6898d4d807f15c347
6
+ metadata.gz: 22e73015ffb398d376c3636f8a9e324c01bd56ae5dc1170b39a8ad5c600c10273fadec1de5385706798e3aff19449b446744e608f1437ca73113871c33aa9b1b
7
+ data.tar.gz: 4c803ad64d65d6ae1d671426c2e81a4741ebcebd66ca4fb288574336e0a1088b8be2aa78f4235213ffc5794c92877410c88cbce7dae1f88bfb6369f6f37e3970
data/jrf.gemspec CHANGED
@@ -17,5 +17,5 @@ Gem::Specification.new do |spec|
17
17
  spec.executables = ["jrf"]
18
18
  spec.add_dependency "oj", ">= 3.16"
19
19
 
20
- spec.files = Dir.glob("{exe,lib,test}/*") + Dir.glob("lib/**/*") + %w[DESIGN.txt jrf.gemspec Gemfile Rakefile]
20
+ spec.files = Dir.glob("{exe,lib,test}/*") + Dir.glob("lib/**/*") + %w[DESIGN.txt jrf.gemspec Gemfile Rakefile].select { |path| File.file?(path) }
21
21
  end
data/lib/jrf/cli.rb CHANGED
@@ -17,6 +17,8 @@ module Jrf
17
17
  -v, --verbose print parsed stage expressions
18
18
  --lax allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
19
19
  -p, --pretty pretty-print JSON output instead of compact NDJSON
20
+ -r, --require LIBRARY
21
+ require LIBRARY before evaluating stages
20
22
  --no-jit do not enable YJIT, even when supported by the Ruby runtime
21
23
  --atomic-write-bytes N
22
24
  group short outputs into atomic writes of up to N bytes
@@ -43,6 +45,7 @@ module Jrf
43
45
  lax = false
44
46
  pretty = false
45
47
  jit = true
48
+ required_libraries = []
46
49
  atomic_write_bytes = Runner::DEFAULT_OUTPUT_BUFFER_LIMIT
47
50
  begin
48
51
  parser = OptionParser.new do |opts|
@@ -50,6 +53,7 @@ module Jrf
50
53
  opts.on("-v", "--verbose", "print parsed stage expressions") { verbose = true }
51
54
  opts.on("--lax", "allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)") { lax = true }
52
55
  opts.on("-p", "--pretty", "pretty-print JSON output instead of compact NDJSON") { pretty = true }
56
+ opts.on("-r", "--require LIBRARY", "require LIBRARY before evaluating stages") { |library| required_libraries << library }
53
57
  opts.on("--no-jit", "do not enable YJIT, even when supported by the Ruby runtime") { jit = false }
54
58
  opts.on("--atomic-write-bytes N", Integer, "group short outputs into atomic writes of up to N bytes") do |value|
55
59
  if value.positive?
@@ -82,6 +86,7 @@ module Jrf
82
86
 
83
87
  expression = argv.shift
84
88
  enable_yjit if jit
89
+ required_libraries.each { |library| require library }
85
90
 
86
91
  inputs = Enumerator.new do |y|
87
92
  if argv.empty?
@@ -173,13 +173,13 @@ module Jrf
173
173
  def map(&block)
174
174
  raise ArgumentError, "map requires a block" unless block
175
175
 
176
- @__jrf_current_stage.step_map(:map, @obj, &block)
176
+ @__jrf_current_stage.step_map(:map, current_input, &block)
177
177
  end
178
178
 
179
179
  def map_values(&block)
180
180
  raise ArgumentError, "map_values requires a block" unless block
181
181
 
182
- @__jrf_current_stage.step_map(:map_values, @obj, &block)
182
+ @__jrf_current_stage.step_map(:map_values, current_input, &block)
183
183
  end
184
184
 
185
185
  def group_by(key, &block)
data/lib/jrf/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Jrf
4
- VERSION = "0.1.9"
4
+ VERSION = "0.1.11"
5
5
  end
data/test/jrf_test.rb CHANGED
@@ -145,6 +145,7 @@ assert_includes(stdout, "usage: jrf [options] 'STAGE >> STAGE >> ...'")
145
145
  assert_includes(stdout, "JSON filter with the power and speed of Ruby.")
146
146
  assert_includes(stdout, "--lax")
147
147
  assert_includes(stdout, "--pretty")
148
+ assert_includes(stdout, "--require LIBRARY")
148
149
  assert_includes(stdout, "--no-jit")
149
150
  assert_includes(stdout, "-V")
150
151
  assert_includes(stdout, "--version")
@@ -203,6 +204,19 @@ stdout, stderr, status = Open3.capture3("./exe/jrf", "--atomic-write-bytes", "0"
203
204
  assert_failure(status, "atomic write bytes rejects zero")
204
205
  assert_includes(stderr, "--atomic-write-bytes requires a positive integer")
205
206
 
207
+ Dir.mktmpdir do |dir|
208
+ helper = File.join(dir, "helpers.rb")
209
+ File.write(helper, <<~RUBY)
210
+ def double(value)
211
+ value * 2
212
+ end
213
+ RUBY
214
+
215
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-r", helper, 'double(_["hello"])', stdin_data: input_hello)
216
+ assert_success(status, stderr, "require helper option")
217
+ assert_equal(%w[246 912], lines(stdout), "require helper option output")
218
+ end
219
+
206
220
  if defined?(RubyVM::YJIT) && RubyVM::YJIT.respond_to?(:enabled?)
207
221
  yjit_probe = "{\"probe\":1}\n"
208
222
 
@@ -833,6 +847,14 @@ stdout, stderr, status = run_jrf('_["values"] >> map { |x| x + 1 } >> map { |x|
833
847
  assert_success(status, stderr, "chained map transforms")
834
848
  assert_equal(['[20,110,1010]', '[30,210,2010]', '[40,310,3010]'], lines(stdout), "chained map transforms output")
835
849
 
850
+ stdout, stderr, status = run_jrf('map { map { |y| [ sum(y[0]), sum(y[1]) ] } }', "[[[1,2]]]\n[[[3,4]]]\n")
851
+ assert_success(status, stderr, "nested map reducer binds to current target")
852
+ assert_equal(['[[[4,6]]]'], lines(stdout), "nested map reducer output")
853
+
854
+ stdout, stderr, status = run_jrf('map_values { |obj| map_values { |v| sum(v) } }', "{\"a\":{\"x\":1,\"y\":2},\"b\":{\"x\":10,\"y\":20}}\n{\"a\":{\"x\":3,\"y\":4},\"b\":{\"x\":30,\"y\":40}}\n")
855
+ assert_success(status, stderr, "nested map_values reducer binds to current target")
856
+ assert_equal(['{"a":{"x":4,"y":6},"b":{"x":40,"y":60}}'], lines(stdout), "nested map_values reducer output")
857
+
836
858
  input_gb = <<~NDJSON
837
859
  {"status":200,"path":"/a","latency":10}
838
860
  {"status":404,"path":"/b","latency":50}
@@ -930,10 +952,18 @@ assert_equal([[2, 3], [4, 5]], j.call([[1, 2], [3, 4]]), "library map transform"
930
952
  j = Jrf.new(proc { map { |x| sum(x) } })
931
953
  assert_equal([[4, 6]], j.call([[1, 2], [3, 4]]), "library map reduce")
932
954
 
955
+ # nested map reduce binds to current target
956
+ j = Jrf.new(proc { map { map { |y| [sum(y[0]), sum(y[1])] } } })
957
+ assert_equal([[[[4, 6]]]], j.call([[[[1, 2]]], [[[3, 4]]]]), "library nested map reduce")
958
+
933
959
  # map_values transform
934
960
  j = Jrf.new(proc { map_values { |v| v * 10 } })
935
961
  assert_equal([{"a" => 10, "b" => 20}], j.call([{"a" => 1, "b" => 2}]), "library map_values transform")
936
962
 
963
+ # nested map_values reduce binds to current target
964
+ j = Jrf.new(proc { map_values { |obj| map_values { |v| sum(v) } } })
965
+ assert_equal([{"a" => {"x" => 4, "y" => 6}, "b" => {"x" => 40, "y" => 60}}], j.call([{"a" => {"x" => 1, "y" => 2}, "b" => {"x" => 10, "y" => 20}}, {"a" => {"x" => 3, "y" => 4}, "b" => {"x" => 30, "y" => 40}}]), "library nested map_values reduce")
966
+
937
967
  # map hash transform
938
968
  j = Jrf.new(proc { map { |k, v| "#{k}=#{v}" } })
939
969
  assert_equal([["a=1", "b=2"]], j.call([{"a" => 1, "b" => 2}]), "library map hash transform")
metadata CHANGED
@@ -1,13 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jrf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.1.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - kazuho
8
+ autorequire:
8
9
  bindir: exe
9
10
  cert_chain: []
10
- date: 1980-01-02 00:00:00.000000000 Z
11
+ date: 2026-03-12 00:00:00.000000000 Z
11
12
  dependencies:
12
13
  - !ruby/object:Gem::Dependency
13
14
  name: oj
@@ -33,7 +34,6 @@ executables:
33
34
  extensions: []
34
35
  extra_rdoc_files: []
35
36
  files:
36
- - DESIGN.txt
37
37
  - Gemfile
38
38
  - Rakefile
39
39
  - exe/jrf
@@ -49,9 +49,11 @@ files:
49
49
  - lib/jrf/stage.rb
50
50
  - lib/jrf/version.rb
51
51
  - test/jrf_test.rb
52
+ homepage:
52
53
  licenses:
53
54
  - MIT
54
55
  metadata: {}
56
+ post_install_message:
55
57
  rdoc_options: []
56
58
  require_paths:
57
59
  - lib
@@ -66,7 +68,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
66
68
  - !ruby/object:Gem::Version
67
69
  version: '0'
68
70
  requirements: []
69
- rubygems_version: 4.0.3
71
+ rubygems_version: 3.0.3.1
72
+ signing_key:
70
73
  specification_version: 4
71
74
  summary: JSON filter with the power and speed of Ruby
72
75
  test_files: []
data/DESIGN.txt DELETED
@@ -1,455 +0,0 @@
1
- NAME
2
- jr - a small, lightweight NDJSON transformer with Ruby-like expressions
3
-
4
- OVERVIEW
5
- jr is a command-line tool for transforming NDJSON using Ruby-like
6
- expressions.
7
-
8
- It is intentionally not a jq-compatible general-purpose JSON language.
9
- Its value comes from a much narrower scope and from being implementable
10
- in a very simple way.
11
-
12
- The goal is to support expressions like:
13
-
14
- jr '["foo"]'
15
- jr 'select(/abc/.match(["aaa"])) >> ["foo"]'
16
- jr '["items"] >> flat'
17
- jr 'sum(["foo"])'
18
- jr 'select(["x"] > 10) >> ["foo"] >> sum(["bar"])'
19
-
20
- That is:
21
-
22
- * extract a value from each JSON line
23
-
24
- * filter lines by a predicate
25
-
26
- * flatten arrays into multiple output lines
27
-
28
- * aggregate values, such as summing them
29
-
30
- This document is not just a user-facing description. It is a design
31
- constraint document for implementors. The point is to preserve the
32
- simplicity we agreed on, so that jr does not drift into a heavy
33
- implementation.
34
-
35
- DESIGN PRINCIPLE
36
- jr must be implemented in a way that keeps the runtime model extremely
37
- simple.
38
-
39
- The implementation must not drift into:
40
-
41
- * AST construction and optimization
42
-
43
- * wrapping child objects in DSL wrapper objects
44
-
45
- * a large generic streaming-stage framework
46
-
47
- * per-line allocation of many intermediate DSL objects
48
-
49
- * jq-like general stream semantics
50
-
51
- Instead, jr should be implemented under the following constraints.
52
-
53
- CORE MODEL
54
- Input model
55
- Input is NDJSON.
56
-
57
- Each line is parsed as one JSON value.
58
-
59
- The primary execution model is line-by-line processing.
60
-
61
- A simple conceptual loop is sufficient:
62
-
63
- ARGF.each_line do |line|
64
- row = JSON.parse(line)
65
- ...
66
- end
67
-
68
- Evaluation context
69
- Expressions are evaluated with the current row bound as "self".
70
-
71
- That means the basic field access syntax is:
72
-
73
- ["foo"]
74
- ["foo"]["bar"]
75
-
76
- No "_" or "_." prefix is required.
77
-
78
- Root-only DSL
79
- The DSL exists only at the root context.
80
-
81
- This is a mandatory design rule.
82
-
83
- The expression context object only needs to represent the current row.
84
- Child values are not wrapped.
85
-
86
- Return value of "[]"
87
- "["foo"]" returns the underlying Ruby value directly.
88
-
89
- That means:
90
-
91
- * Hash values remain Hash
92
-
93
- * Array values remain Array
94
-
95
- * String values remain String
96
-
97
- * Numeric values remain Numeric
98
-
99
- * "nil" remains "nil"
100
-
101
- This is critical.
102
-
103
- For example:
104
-
105
- ["foo"]["bar"]
106
-
107
- must work simply because "["foo"]" returned a normal Ruby "Hash", and
108
- the next "["bar"]" is just Ruby's normal "Hash#[]".
109
-
110
- Child wrappers must not exist.
111
-
112
- Reuse of the root context
113
- The root row context must be reused across all input lines.
114
-
115
- A minimal model is:
116
-
117
- class RowContext
118
- def initialize(obj = nil)
119
- @obj = obj
120
- end
121
-
122
- def reset(obj)
123
- @obj = obj
124
- self
125
- end
126
-
127
- def [](key)
128
- @obj[key]
129
- end
130
- end
131
-
132
- The per-line execution model should be conceptually as simple as:
133
-
134
- ctx.reset(row)
135
- ctx.instance_eval(expr_source)
136
-
137
- The implementation should not allocate a new root DSL object for every
138
- line.
139
-
140
- PIPELINE SYNTAX
141
- Multiple stages are connected using top-level ">>".
142
-
143
- Example:
144
-
145
- jr 'select(["x"] > 10) >> ["foo"] >> sum(["bar"])'
146
-
147
- This ">>" is not Ruby's shift operator in the execution model.
148
-
149
- Instead, jr splits the top-level source string on top-level occurrences
150
- of ">>" before evaluating the individual stage expressions as Ruby.
151
-
152
- So the above is treated internally as three stages:
153
-
154
- select(["x"] > 10)
155
- ["foo"]
156
- sum(["bar"])
157
-
158
- This design choice is intentional and important.
159
-
160
- It allows jr to have pipeline syntax without requiring a
161
- delayed-expression DSL, operator overloading, or AST construction.
162
-
163
- Consequence of reserving top-level ">>"
164
- At top level, ">>" belongs to jr.
165
-
166
- If users need Ruby's actual ">>" operator inside a stage expression,
167
- they must use an alternative spelling such as "send(:">, ...)>, or some
168
- other escape/alternative mechanism chosen by the implementation.
169
-
170
- That tradeoff is acceptable because the primary value of jr is
171
- simplicity.
172
-
173
- STAGE KINDS
174
- Each pipeline segment is interpreted according to a small set of
175
- explicit rules.
176
-
177
- The stage kinds are:
178
-
179
- * "select(...)" - filter stage
180
-
181
- * plain expression - extract stage
182
-
183
- * "flat" - flatten stage
184
-
185
- * "sum(...)" - reduce/aggregate stage
186
-
187
- These roles must remain separate. Their responsibilities must not be
188
- mixed.
189
-
190
- Filter stage
191
- "select(...)" denotes a filter stage.
192
-
193
- Examples:
194
-
195
- select(["x"] > 10)
196
- select(/abc/.match(["aaa"]))
197
-
198
- A filter stage decides whether the current value passes to the next
199
- stage.
200
-
201
- It should not also act as an extractor.
202
-
203
- Extract stage
204
- Any stage expression that is not one of the explicit special forms is an
205
- extract stage.
206
-
207
- Examples:
208
-
209
- ["foo"]
210
- ["foo"]["bar"]
211
- ["items"]
212
-
213
- An extract stage computes a value from the current input and passes it
214
- forward.
215
-
216
- It should not also act as flattening or aggregation.
217
-
218
- Flat stage
219
- "flat" is a stage with no argument.
220
-
221
- Example:
222
-
223
- ["items"] >> flat
224
-
225
- It means that the result of the previous stage should be expanded into
226
- multiple output lines.
227
-
228
- Without "flat", an array is emitted as one JSON array value.
229
-
230
- With "flat", each element is emitted separately.
231
-
232
- "flat" must not also be used as a filter or aggregator.
233
-
234
- Reduce stage
235
- "sum(...)" denotes an aggregate stage.
236
-
237
- Examples:
238
-
239
- sum(["foo"])
240
- sum(["foo"]["bar"])
241
-
242
- A reduce stage consumes values across all matching rows and emits one
243
- final value at the end.
244
-
245
- For the first implementation, "sum(...)" is sufficient as the only
246
- required aggregate.
247
-
248
- IMPLEMENTATION DISCIPLINE
249
- This section is the most important part of the document.
250
-
251
- The implementation should stay close to the following simple execution
252
- shapes.
253
-
254
- Filter + extract only
255
- Conceptually:
256
-
257
- ctx = RowContext.new
258
-
259
- ARGF.each_line do |line|
260
- row = JSON.parse(line)
261
- ctx.reset(row)
262
-
263
- next unless ctx.instance_eval(filter_src)
264
- out = ctx.instance_eval(extract_src)
265
-
266
- emit(out)
267
- end
268
-
269
- This is the target level of simplicity.
270
-
271
- Filter + extract + flat
272
- Conceptually:
273
-
274
- ctx = RowContext.new
275
-
276
- ARGF.each_line do |line|
277
- row = JSON.parse(line)
278
- ctx.reset(row)
279
-
280
- next unless ctx.instance_eval(filter_src)
281
- out = ctx.instance_eval(extract_src)
282
-
283
- if flat
284
- out.each { |v| emit(v) }
285
- else
286
- emit(out)
287
- end
288
- end
289
-
290
- Again, this is intentionally simple.
291
-
292
- Filter + extract + sum
293
- Conceptually:
294
-
295
- ctx = RowContext.new
296
- acc = 0
297
-
298
- ARGF.each_line do |line|
299
- row = JSON.parse(line)
300
- ctx.reset(row)
301
-
302
- next unless ctx.instance_eval(filter_src)
303
- value = ctx.instance_eval(extract_src)
304
-
305
- acc += value
306
- end
307
-
308
- emit(acc)
309
-
310
- This is the intended model.
311
-
312
- The implementation must not introduce a heavyweight generic framework
313
- unless a clear need arises later.
314
-
315
- Meaning of "sum(...)"
316
- "sum(expr)" should be treated as syntactic sugar for:
317
-
318
- * evaluate "expr" for each matching input row
319
-
320
- * add the result to an accumulator
321
-
322
- * emit the accumulator once, at the end
323
-
324
- The important thing is not the internal abstraction but preserving the
325
- simple runtime shape.
326
-
327
- REQUIRED CONSTRAINTS
328
- An implementation that follows this design must satisfy all of the
329
- following.
330
-
331
- 1. NDJSON only
332
- The initial implementation targets NDJSON line-by-line processing.
333
-
334
- General stream semantics are out of scope.
335
-
336
- 2. Current row is "self"
337
- Expressions run with the current row context bound as "self".
338
-
339
- 3. "["foo"]" is the primary field access syntax
340
- This is the only required syntax for the first implementation.
341
-
342
- Bareword sugar such as "foo" or dotted syntax such as "_.foo" is out of
343
- scope.
344
-
345
- 4. "[]" returns raw Ruby values
346
- No child wrapper objects are allowed.
347
-
348
- 5. Only one root context object is reused
349
- A fresh DSL context object per row is not allowed.
350
-
351
- The current row object inside the root context should simply be
352
- replaced.
353
-
354
- 6. Pipeline parsing happens before Ruby evaluation
355
- Top-level ">>" is split by jr itself before stage evaluation.
356
-
357
- The implementation does not need to make ">>" work as a Ruby operator.
358
-
359
- 7. Stage responsibilities must stay separate
360
- * "select(...)" filters
361
-
362
- * plain expressions extract
363
-
364
- * "flat" flattens
365
-
366
- * "sum(...)" aggregates
367
-
368
- Do not overload one stage kind with multiple semantics.
369
-
370
- 8. No "nil means skip" rule in extract
371
- Skipping rows belongs to filtering.
372
-
373
- Extract stages return values.
374
-
375
- Do not make extract return-value conventions more complicated than
376
- necessary.
377
-
378
- 9. No child DSL wrappers
379
- This is worth repeating.
380
-
381
- If a child value is a Hash, then further indexing is just normal Ruby
382
- indexing. If a child value is an Array, then array access is just normal
383
- Ruby array access.
384
-
385
- 10. Avoid heavyweight abstraction
386
- Do not introduce any of the following in the first implementation unless
387
- they are absolutely necessary:
388
-
389
- * AST nodes
390
-
391
- * delayed expression objects
392
-
393
- * generic stage graphs
394
-
395
- * EOF-marker-based general reducer pipelines
396
-
397
- * jq-style multi-valued stream semantics
398
-
399
- * child wrapper chains
400
-
401
- WHAT IS EXPLICITLY OUT OF SCOPE FOR NOW
402
- The following are intentionally deferred.
403
-
404
- * jq compatibility
405
-
406
- * bareword field access such as "foo"
407
-
408
- * dotted field access such as "_.foo"
409
-
410
- * child wrappers
411
-
412
- * general reducer framework
413
-
414
- * EOF-marker stage propagation
415
-
416
- * general delayed-expression DSL
417
-
418
- * AST optimization
419
-
420
- * complicated "nil" output rules
421
-
422
- * advanced aggregate families beyond the initial "sum(...)"
423
-
424
- SUMMARY
425
- jr is valuable only if it stays small and simple.
426
-
427
- That means the implementation should follow these core rules:
428
-
429
- * NDJSON input, processed line by line
430
-
431
- * current row bound as "self"
432
-
433
- * field access through "["foo"]"
434
-
435
- * "[]" returns raw Ruby values
436
-
437
- * no child wrappers
438
-
439
- * one reusable root context object
440
-
441
- * top-level pipeline split on ">>"
442
-
443
- * "select(...)" for filter
444
-
445
- * plain expressions for extract
446
-
447
- * "flat" for flattening
448
-
449
- * "sum(...)" for aggregation
450
-
451
- * simple loops instead of heavyweight framework
452
-
453
- If an implementation stops looking this simple, it has probably drifted
454
- away from the intended design.
455
-