jrf 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 408c1f9706af5efaa1bf0125201d6647b4c108aa4aa28c99a93b59fb9cc94f02
4
- data.tar.gz: 702f2fb14dc9d498292b02c41f0cdb4a91c0fa3e093ad9a71435d9a2604532fa
3
+ metadata.gz: e9bb2a3a16d2bbe8cfb463267ff74d7d582511d4b4891e56ad3dfa6eee75fceb
4
+ data.tar.gz: a13b2e9c8517c3da997452166556505b24fc4d5f898765ad33495eafd57c3081
5
5
  SHA512:
6
- metadata.gz: 80dfa6d2bb7c9304e779a3e80815efbde9c599d66665708738b833b08daa1918ae54bc5b170c8b90c60399fe18b0df06d576e2c8c3d8b76b74f9daa826efcfa8
7
- data.tar.gz: 597b715fd3ebd31a49cb2839f7dda814b845cd5aa87a3ac9a9cf551553792b453af749e287652553903de851ea7b06a9e5940abc7c25fccd319a9e7e72d75840
6
+ metadata.gz: 54b400cdaba584896f2511acfe9a41ef10af25033bf88cfc6e0386eaa840df9395fb0d008c320b3193d55a9c3fad444a7f54bd29f52c34f69bc9a9cf392a7809
7
+ data.tar.gz: 80c72675e179da483316bfeaee7114da6edb49dc66ae179aa072d48907c4c9caf74113c6681b2f4a83f4b97da6faac436f5d6af5bd31e82605b122d85892cede
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require_relative "../pipeline"
5
+ require_relative "../pipeline_parser"
6
+
7
+ module Jrf
8
+ class CLI
9
+ class Runner
10
+ RS_CHAR = "\x1e"
11
+ DEFAULT_OUTPUT_BUFFER_LIMIT = 4096
12
+
13
+ class RsNormalizer
14
+ def initialize(input)
15
+ @input = input
16
+ end
17
+
18
+ def read(length = nil, outbuf = nil)
19
+ chunk = @input.read(length)
20
+ return nil if chunk.nil?
21
+
22
+ chunk = chunk.tr(RS_CHAR, "\n")
23
+ if outbuf
24
+ outbuf.replace(chunk)
25
+ else
26
+ chunk
27
+ end
28
+ end
29
+ end
30
+
31
+ def initialize(inputs:, out: $stdout, err: $stderr, lax: false, pretty: false, atomic_write_bytes: DEFAULT_OUTPUT_BUFFER_LIMIT)
32
+ @inputs = inputs
33
+ @out = out
34
+ @err = err
35
+ @lax = lax
36
+ @pretty = pretty
37
+ @atomic_write_bytes = atomic_write_bytes
38
+ @output_buffer = +""
39
+ end
40
+
41
+ def run(expression, verbose: false)
42
+ parsed = PipelineParser.new(expression).parse
43
+ stages = parsed[:stages]
44
+ dump_stages(stages) if verbose
45
+
46
+ blocks = stages.map { |stage|
47
+ eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
48
+ }
49
+ pipeline = Pipeline.new(*blocks)
50
+
51
+ input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
52
+ pipeline.call(input_enum) do |value|
53
+ emit_output(value)
54
+ end
55
+ ensure
56
+ write_output(@output_buffer)
57
+ end
58
+
59
+ private
60
+
61
+ def each_input_value
62
+ return each_input_value_lax { |value| yield value } if @lax
63
+
64
+ each_input_value_ndjson { |value| yield value }
65
+ end
66
+
67
+ def each_input_value_ndjson
68
+ each_input do |source|
69
+ source.each_line do |raw_line|
70
+ line = raw_line.strip
71
+ next if line.empty?
72
+
73
+ yield JSON.parse(line)
74
+ end
75
+ end
76
+ end
77
+
78
+ def each_input_value_lax
79
+ require "oj"
80
+ handler = Class.new(Oj::ScHandler) do
81
+ def initialize(&emit)
82
+ @emit = emit
83
+ end
84
+
85
+ def hash_start = {}
86
+ def hash_key(key) = key
87
+ def hash_set(hash, key, value) = hash[key] = value
88
+ def array_start = []
89
+ def array_append(array, value) = array << value
90
+ def add_value(value) = @emit.call(value)
91
+ end
92
+ each_input do |source|
93
+ Oj.sc_parse(handler.new { |value| yield value }, RsNormalizer.new(source))
94
+ end
95
+ rescue LoadError
96
+ raise "oj is required for --lax mode (gem install oj)"
97
+ rescue Oj::ParseError => e
98
+ raise JSON::ParserError, e.message
99
+ end
100
+
101
+ def dump_stages(stages)
102
+ stages.each_with_index do |stage, i|
103
+ @err.puts "stage[#{i}]: #{stage[:src]}"
104
+ end
105
+ end
106
+
107
+ def each_input
108
+ @inputs.each { |source| yield source }
109
+ end
110
+
111
+ def emit_output(value)
112
+ record = (@pretty ? JSON.pretty_generate(value) : JSON.generate(value)) << "\n"
113
+ if @output_buffer.bytesize + record.bytesize <= @atomic_write_bytes
114
+ @output_buffer << record
115
+ else
116
+ write_output(@output_buffer)
117
+ @output_buffer = record
118
+ end
119
+ end
120
+
121
+ def write_output(str)
122
+ @out.syswrite(str)
123
+ end
124
+ end
125
+ end
126
+ end
data/lib/jrf/cli.rb CHANGED
@@ -1,13 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "runner"
3
+ require_relative "cli/runner"
4
4
 
5
5
  module Jrf
6
6
  class CLI
7
- USAGE = "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'"
7
+ USAGE = "usage: jrf [options] 'STAGE >> STAGE >> ...'"
8
8
 
9
9
  HELP_TEXT = <<~'TEXT'
10
- usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'
10
+ usage: jrf [options] 'STAGE >> STAGE >> ...'
11
11
 
12
12
  JSON filter with the power and speed of Ruby.
13
13
 
@@ -15,6 +15,8 @@ module Jrf
15
15
  -v, --verbose print parsed stage expressions
16
16
  --lax allow multiline JSON texts; split inputs by whitespace (also detects JSON-SEQ RS 0x1e)
17
17
  -p, --pretty pretty-print JSON output instead of compact NDJSON
18
+ --atomic-write-bytes N
19
+ group short outputs into atomic writes of up to N bytes
18
20
  -h, --help show this help and exit
19
21
 
20
22
  Pipeline:
@@ -36,6 +38,7 @@ module Jrf
36
38
  verbose = false
37
39
  lax = false
38
40
  pretty = false
41
+ atomic_write_bytes = Runner::DEFAULT_OUTPUT_BUFFER_LIMIT
39
42
 
40
43
  while argv.first&.start_with?("-")
41
44
  case argv.first
@@ -48,6 +51,14 @@ module Jrf
48
51
  when "-p", "--pretty"
49
52
  pretty = true
50
53
  argv.shift
54
+ when /\A--atomic-write-bytes=(.+)\z/
55
+ atomic_write_bytes = parse_atomic_write_bytes(Regexp.last_match(1), err)
56
+ return 1 unless atomic_write_bytes
57
+ argv.shift
58
+ when "--atomic-write-bytes"
59
+ argv.shift
60
+ atomic_write_bytes = parse_atomic_write_bytes(argv.shift, err)
61
+ return 1 unless atomic_write_bytes
51
62
  when "-h", "--help"
52
63
  out.puts HELP_TEXT
53
64
  return 0
@@ -64,8 +75,43 @@ module Jrf
64
75
  end
65
76
 
66
77
  expression = argv.shift
67
- Runner.new(input: input, out: out, err: err, lax: lax, pretty: pretty).run(expression, verbose: verbose)
78
+ inputs = Enumerator.new do |y|
79
+ if argv.empty?
80
+ y << input
81
+ else
82
+ argv.each do |path|
83
+ if path == "-"
84
+ y << input
85
+ elsif path.end_with?(".gz")
86
+ require "zlib"
87
+ Zlib::GzipReader.open(path) do |source|
88
+ y << source
89
+ end
90
+ else
91
+ File.open(path, "rb") do |source|
92
+ y << source
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
98
+ Runner.new(
99
+ inputs: inputs,
100
+ out: out,
101
+ err: err,
102
+ lax: lax,
103
+ pretty: pretty,
104
+ atomic_write_bytes: atomic_write_bytes
105
+ ).run(expression, verbose: verbose)
68
106
  0
69
107
  end
108
+
109
+ def self.parse_atomic_write_bytes(value, err)
110
+ bytes = Integer(value, exception: false)
111
+ return bytes if bytes && bytes.positive?
112
+
113
+ err.puts "--atomic-write-bytes requires a positive integer"
114
+ nil
115
+ end
70
116
  end
71
117
  end
data/lib/jrf/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Jrf
4
- VERSION = "0.1.5"
4
+ VERSION = "0.1.6"
5
5
  end
data/test/jrf_test.rb CHANGED
@@ -1,7 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ begin
4
+ require "bundler/setup"
5
+ rescue LoadError
6
+ # Allow running tests in plain Ruby environments with globally installed gems.
7
+ end
8
+
3
9
  require "json"
4
10
  require "open3"
11
+ require "stringio"
12
+ require "tmpdir"
13
+ require "zlib"
14
+ require_relative "../lib/jrf/cli/runner"
5
15
 
6
16
  def run_jrf(expr, input, *opts)
7
17
  Open3.capture3("./exe/jrf", *opts, expr, stdin_data: input)
@@ -41,6 +51,45 @@ def lines(str)
41
51
  str.lines.map(&:strip).reject(&:empty?)
42
52
  end
43
53
 
54
+ class RecordingRunner < Jrf::CLI::Runner
55
+ attr_reader :writes
56
+
57
+ def initialize(**kwargs)
58
+ super
59
+ @writes = []
60
+ end
61
+
62
+ private
63
+
64
+ def write_output(str)
65
+ return if str.empty?
66
+
67
+ @writes << str
68
+ end
69
+ end
70
+
71
+ class ChunkedSource
72
+ def initialize(str, chunk_size: 5)
73
+ @str = str
74
+ @chunk_size = chunk_size
75
+ @offset = 0
76
+ end
77
+
78
+ def read(length = nil, outbuf = nil)
79
+ raise "expected chunked reads" if length.nil?
80
+
81
+ chunk = @str.byteslice(@offset, [length, @chunk_size].min)
82
+ return nil unless chunk
83
+
84
+ @offset += chunk.bytesize
85
+ if outbuf
86
+ outbuf.replace(chunk)
87
+ else
88
+ chunk
89
+ end
90
+ end
91
+ end
92
+
44
93
  File.chmod(0o755, "./exe/jrf")
45
94
 
46
95
  input = <<~NDJSON
@@ -92,10 +141,11 @@ assert_includes(stderr, 'stage[1]: _["hello"]')
92
141
 
93
142
  stdout, stderr, status = Open3.capture3("./exe/jrf", "--help")
94
143
  assert_success(status, stderr, "help option")
95
- assert_includes(stdout, "usage: jrf [-v] [--lax] [--pretty] [--help] 'STAGE >> STAGE >> ...'")
144
+ assert_includes(stdout, "usage: jrf [options] 'STAGE >> STAGE >> ...'")
96
145
  assert_includes(stdout, "JSON filter with the power and speed of Ruby.")
97
146
  assert_includes(stdout, "--lax")
98
147
  assert_includes(stdout, "--pretty")
148
+ assert_includes(stdout, "--atomic-write-bytes N")
99
149
  assert_includes(stdout, "Pipeline:")
100
150
  assert_includes(stdout, "Connect stages with top-level >>.")
101
151
  assert_includes(stdout, "The current value in each stage is available as _.")
@@ -103,11 +153,72 @@ assert_includes(stdout, "See Also:")
103
153
  assert_includes(stdout, "https://github.com/kazuho/jrf#readme")
104
154
  assert_equal([], lines(stderr), "help stderr output")
105
155
 
156
+ threshold_input = StringIO.new((1..4).map { |i| "{\"foo\":\"#{'x' * 1020}\",\"i\":#{i}}\n" }.join)
157
+ buffered_runner = RecordingRunner.new(inputs: [threshold_input], out: StringIO.new, err: StringIO.new)
158
+ buffered_runner.run('_')
159
+ expected_line = JSON.generate({"foo" => "x" * 1020, "i" => 1}) + "\n"
160
+ assert_equal(2, buffered_runner.writes.length, "default atomic write limit buffers records until the configured threshold")
161
+ assert_equal(expected_line.bytesize * 3, buffered_runner.writes.first.bytesize, "default atomic write limit flushes before the next record would exceed the threshold")
162
+ assert_equal(expected_line.bytesize, buffered_runner.writes.last.bytesize, "final buffer flush emits the remaining record")
163
+
164
+ small_limit_runner = RecordingRunner.new(inputs: [StringIO.new("{\"foo\":1}\n{\"foo\":2}\n")], out: StringIO.new, err: StringIO.new, atomic_write_bytes: 1)
165
+ small_limit_runner.run('_["foo"]')
166
+ assert_equal(["1\n", "2\n"], small_limit_runner.writes, "small atomic write limit emits oversized records directly")
167
+
168
+ error_runner = RecordingRunner.new(inputs: [StringIO.new("{\"foo\":1}\n{\"foo\":")], out: StringIO.new, err: StringIO.new)
169
+ begin
170
+ error_runner.run('_["foo"]')
171
+ raise "expected parse error for buffered flush test"
172
+ rescue JSON::ParserError
173
+ assert_equal(["1\n"], error_runner.writes, "buffer flushes pending output before parse errors escape")
174
+ end
175
+
106
176
  stdout, stderr, status = run_jrf('select(_["hello"] == 123) >> _["hello"]', input_hello, "--verbose")
107
177
  assert_success(status, stderr, "dump stages verbose alias")
108
178
  assert_equal(%w[123], lines(stdout), "dump stages verbose alias output")
109
179
  assert_includes(stderr, 'stage[0]: select(_["hello"] == 123)')
110
180
 
181
+ stdout, stderr, status = run_jrf('_["hello"]', input_hello, "--atomic-write-bytes", "512")
182
+ assert_success(status, stderr, "atomic write bytes option")
183
+ assert_equal(%w[123 456], lines(stdout), "atomic write bytes option output")
184
+
185
+ stdout, stderr, status = run_jrf('_["hello"]', input_hello, "--atomic-write-bytes=512")
186
+ assert_success(status, stderr, "atomic write bytes equals form")
187
+ assert_equal(%w[123 456], lines(stdout), "atomic write bytes equals form output")
188
+
189
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "--atomic-write-bytes", "0", '_["hello"]', stdin_data: input_hello)
190
+ assert_failure(status, "atomic write bytes rejects zero")
191
+ assert_includes(stderr, "--atomic-write-bytes requires a positive integer")
192
+
193
+ Dir.mktmpdir do |dir|
194
+ gz_path = File.join(dir, "input.ndjson.gz")
195
+ Zlib::GzipWriter.open(gz_path) do |io|
196
+ io.write("{\"foo\":10}\n{\"foo\":20}\n")
197
+ end
198
+
199
+ stdout, stderr, status = Open3.capture3("./exe/jrf", '_["foo"]', gz_path)
200
+ assert_success(status, stderr, "compressed input by suffix")
201
+ assert_equal(%w[10 20], lines(stdout), "compressed input output")
202
+
203
+ lax_gz_path = File.join(dir, "input-lax.json.gz")
204
+ Zlib::GzipWriter.open(lax_gz_path) do |io|
205
+ io.write("{\"foo\":30}\n\x1e{\"foo\":40}\n")
206
+ end
207
+
208
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "--lax", '_["foo"]', lax_gz_path)
209
+ assert_success(status, stderr, "compressed lax input by suffix")
210
+ assert_equal(%w[30 40], lines(stdout), "compressed lax input output")
211
+
212
+ second_gz_path = File.join(dir, "input2.ndjson.gz")
213
+ Zlib::GzipWriter.open(second_gz_path) do |io|
214
+ io.write("{\"foo\":50}\n")
215
+ end
216
+
217
+ stdout, stderr, status = Open3.capture3("./exe/jrf", '_["foo"]', gz_path, second_gz_path)
218
+ assert_success(status, stderr, "multiple compressed inputs by suffix")
219
+ assert_equal(%w[10 20 50], lines(stdout), "multiple compressed input output")
220
+ end
221
+
111
222
  stdout, stderr, status = run_jrf('_', input_hello, "--pretty")
112
223
  assert_success(status, stderr, "pretty output")
113
224
  assert_equal(
@@ -493,6 +604,26 @@ stdout, stderr, status = run_jrf('_["foo"]', input_lax_trailing_rs, "--lax")
493
604
  assert_success(status, stderr, "lax ignores trailing separator")
494
605
  assert_equal(%w[9], lines(stdout), "lax trailing separator output")
495
606
 
607
+ chunked_lax_out = RecordingRunner.new(
608
+ inputs: [ChunkedSource.new("{\"foo\":1}\n\x1e{\"foo\":2}\n\t{\"foo\":3}\n")],
609
+ out: StringIO.new,
610
+ err: StringIO.new,
611
+ lax: true
612
+ )
613
+ chunked_lax_out.run('_["foo"]')
614
+ assert_equal(%w[1 2 3], lines(chunked_lax_out.writes.join), "lax mode streams chunked input without whole-input reads")
615
+
616
+ Dir.mktmpdir do |dir|
617
+ one = File.join(dir, "one.json")
618
+ two = File.join(dir, "two.json")
619
+ File.write(one, "1")
620
+ File.write(two, "2")
621
+
622
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "--lax", "_", one, two)
623
+ assert_success(status, stderr, "lax keeps file boundaries")
624
+ assert_equal(%w[1 2], lines(stdout), "lax does not merge JSON across file boundaries")
625
+ end
626
+
496
627
  stdout, stderr, status = run_jrf('select(_["x"] > ) >> _["foo"]', "")
497
628
  assert_failure(status, "syntax error should fail before row loop")
498
629
  assert_includes(stderr, "syntax error")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jrf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - kazuho
@@ -40,12 +40,12 @@ files:
40
40
  - jrf.gemspec
41
41
  - lib/jrf.rb
42
42
  - lib/jrf/cli.rb
43
+ - lib/jrf/cli/runner.rb
43
44
  - lib/jrf/control.rb
44
45
  - lib/jrf/pipeline.rb
45
46
  - lib/jrf/pipeline_parser.rb
46
47
  - lib/jrf/reducers.rb
47
48
  - lib/jrf/row_context.rb
48
- - lib/jrf/runner.rb
49
49
  - lib/jrf/stage.rb
50
50
  - lib/jrf/version.rb
51
51
  - test/jrf_test.rb
data/lib/jrf/runner.rb DELETED
@@ -1,81 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "json"
4
- require_relative "pipeline"
5
- require_relative "pipeline_parser"
6
-
7
- module Jrf
8
- class Runner
9
- RS_CHAR = "\x1e"
10
-
11
- def initialize(input: ARGF, out: $stdout, err: $stderr, lax: false, pretty: false)
12
- @input = input
13
- @out = out
14
- @err = err
15
- @lax = lax
16
- @pretty = pretty
17
- end
18
-
19
- def run(expression, verbose: false)
20
- parsed = PipelineParser.new(expression).parse
21
- stages = parsed[:stages]
22
- dump_stages(stages) if verbose
23
-
24
- blocks = stages.map { |stage|
25
- eval("proc { #{stage[:src]} }", nil, "(jrf stage)", 1) # rubocop:disable Security/Eval
26
- }
27
- pipeline = Pipeline.new(*blocks)
28
-
29
- input_enum = Enumerator.new { |y| each_input_value { |v| y << v } }
30
- pipeline.call(input_enum) do |value|
31
- @out.puts(@pretty ? JSON.pretty_generate(value) : JSON.generate(value))
32
- end
33
- end
34
-
35
- private
36
-
37
- def each_input_value
38
- return each_input_value_lax { |value| yield value } if @lax
39
-
40
- each_input_value_ndjson { |value| yield value }
41
- end
42
-
43
- def each_input_value_ndjson
44
- @input.each_line do |raw_line|
45
- line = raw_line.strip
46
- next if line.empty?
47
-
48
- yield JSON.parse(line)
49
- end
50
- end
51
-
52
- def each_input_value_lax
53
- require "oj"
54
- source = @input.read.to_s
55
- source = source.include?(RS_CHAR) ? source.tr(RS_CHAR, "\n") : source
56
- handler = Class.new(Oj::ScHandler) do
57
- def initialize(&emit)
58
- @emit = emit
59
- end
60
-
61
- def hash_start = {}
62
- def hash_key(key) = key
63
- def hash_set(hash, key, value) = hash[key] = value
64
- def array_start = []
65
- def array_append(array, value) = array << value
66
- def add_value(value) = @emit.call(value)
67
- end.new { |value| yield value }
68
- Oj.sc_parse(handler, source)
69
- rescue LoadError
70
- raise "oj is required for --lax mode (gem install oj)"
71
- rescue Oj::ParseError => e
72
- raise JSON::ParserError, e.message
73
- end
74
-
75
- def dump_stages(stages)
76
- stages.each_with_index do |stage, i|
77
- @err.puts "stage[#{i}]: #{stage[:src]}"
78
- end
79
- end
80
- end
81
- end