jrf 0.1.12 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,195 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "test_helper"
4
+
5
+ class CliParallelTest < JrfTestCase
6
+ def test_parallel_map_only
7
+ Dir.mktmpdir do |dir|
8
+ write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
9
+ write_ndjson(dir, "b.ndjson", [{"x" => 3}, {"x" => 4}])
10
+
11
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", '_["x"]', *ndjson_files(dir))
12
+ assert_success(status, stderr, "parallel map only")
13
+ assert_equal([1, 2, 3, 4], lines(stdout).map(&:to_i).sort, "parallel map only output")
14
+ assert_includes(stderr, "parallel: enabled workers=2 files=2 split=1/1", "parallel verbose summary")
15
+ end
16
+ end
17
+
18
+ def test_parallel_map_only_pretty_output
19
+ Dir.mktmpdir do |dir|
20
+ write_ndjson(dir, "a.ndjson", [{"x" => 1}])
21
+ write_ndjson(dir, "b.ndjson", [{"x" => 2}])
22
+
23
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-P", "2", "-o", "pretty", '_["x"]', *ndjson_files(dir))
24
+ assert_success(status, stderr, "parallel pretty map only")
25
+ assert_equal(["1", "2"], stdout.lines.map(&:strip).reject(&:empty?).sort, "parallel pretty map only output")
26
+ end
27
+ end
28
+
29
+ def test_parallel_map_only_tsv_output
30
+ Dir.mktmpdir do |dir|
31
+ write_ndjson(dir, "a.ndjson", [{"a" => 1, "b" => 2}])
32
+ write_ndjson(dir, "b.ndjson", [{"a" => 3, "b" => 4}])
33
+
34
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-P", "2", "-o", "tsv", "_", *ndjson_files(dir))
35
+ assert_success(status, stderr, "parallel tsv map only")
36
+ assert_equal(["a\t1", "a\t3", "b\t2", "b\t4"], lines(stdout).sort, "parallel tsv map only output")
37
+ end
38
+ end
39
+
40
+ def test_parallel_map_reduce
41
+ Dir.mktmpdir do |dir|
42
+ write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
43
+ write_ndjson(dir, "b.ndjson", [{"x" => 3}, {"x" => 4}])
44
+
45
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-P", "2", 'sum(_["x"])', *ndjson_files(dir))
46
+ assert_success(status, stderr, "parallel map reduce")
47
+ assert_equal(%w[10], lines(stdout), "parallel sum output")
48
+ end
49
+ end
50
+
51
+ def test_parallel_split_map_and_reduce
52
+ Dir.mktmpdir do |dir|
53
+ write_ndjson(dir, "a.ndjson", [{"x" => 10}, {"x" => 20}])
54
+ write_ndjson(dir, "b.ndjson", [{"x" => 30}, {"x" => 40}])
55
+
56
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-P", "2", 'select(_["x"] > 10) >> sum(_["x"])', *ndjson_files(dir))
57
+ assert_success(status, stderr, "parallel split map+reduce")
58
+ assert_equal(%w[90], lines(stdout), "parallel split map+reduce output")
59
+ end
60
+ end
61
+
62
+ def test_parallel_group_by
63
+ Dir.mktmpdir do |dir|
64
+ write_ndjson(dir, "a.ndjson", [{"k" => "a", "v" => 1}, {"k" => "b", "v" => 2}])
65
+ write_ndjson(dir, "b.ndjson", [{"k" => "a", "v" => 3}, {"k" => "b", "v" => 4}])
66
+
67
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-P", "2", 'group_by(_["k"]) { |r| sum(r["v"]) }', *ndjson_files(dir))
68
+ assert_success(status, stderr, "parallel group_by")
69
+ result = JSON.parse(lines(stdout).first)
70
+ assert_equal(4, result["a"], "parallel group_by a")
71
+ assert_equal(6, result["b"], "parallel group_by b")
72
+ end
73
+ end
74
+
75
+ def test_parallel_all_reducers_falls_back_to_serial
76
+ Dir.mktmpdir do |dir|
77
+ write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
78
+ write_ndjson(dir, "b.ndjson", [{"x" => 3}])
79
+
80
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-v", "-P", "2", 'sum(_["x"])', *ndjson_files(dir))
81
+ assert_success(status, stderr, "all-reducer serial fallback")
82
+ assert_equal(%w[6], lines(stdout), "all-reducer serial fallback output")
83
+ assert_includes(stderr, "parallel: disabled", "parallel disabled summary")
84
+ end
85
+ end
86
+
87
+ def test_parallel_with_gz_files
88
+ Dir.mktmpdir do |dir|
89
+ gz_path_a = File.join(dir, "a.ndjson.gz")
90
+ Zlib::GzipWriter.open(gz_path_a) { |io| io.write("{\"x\":10}\n{\"x\":20}\n") }
91
+ gz_path_b = File.join(dir, "b.ndjson.gz")
92
+ Zlib::GzipWriter.open(gz_path_b) { |io| io.write("{\"x\":30}\n") }
93
+
94
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-P", "2", 'sum(_["x"])', gz_path_a, gz_path_b)
95
+ assert_success(status, stderr, "parallel with gz")
96
+ assert_equal(%w[60], lines(stdout), "parallel with gz output")
97
+ end
98
+ end
99
+
100
+ def test_parallel_matches_serial_output
101
+ Dir.mktmpdir do |dir|
102
+ write_ndjson(dir, "a.ndjson", (1..50).map { |i| {"v" => i, "g" => i % 3} })
103
+ write_ndjson(dir, "b.ndjson", (51..100).map { |i| {"v" => i, "g" => i % 3} })
104
+
105
+ files = ndjson_files(dir)
106
+ expr = 'group_by(_["g"]) { |r| sum(r["v"]) }'
107
+
108
+ serial_stdout, serial_stderr, serial_status = Open3.capture3("./exe/jrf", expr, *files)
109
+ assert_success(serial_status, serial_stderr, "serial baseline")
110
+
111
+ parallel_stdout, parallel_stderr, parallel_status = Open3.capture3("./exe/jrf", "-P", "2", expr, *files)
112
+ assert_success(parallel_status, parallel_stderr, "parallel run")
113
+
114
+ assert_equal(JSON.parse(serial_stdout), JSON.parse(parallel_stdout), "parallel matches serial")
115
+ end
116
+ end
117
+
118
+ def test_parallel_worker_error_handling
119
+ Dir.mktmpdir do |dir|
120
+ good_path = File.join(dir, "a.ndjson")
121
+ File.write(good_path, "{\"x\":1}\n{\"x\":2}\n")
122
+
123
+ # Create a truncated gz file (valid header, truncated body)
124
+ bad_gz_path = File.join(dir, "b.ndjson.gz")
125
+ full_gz = StringIO.new
126
+ Zlib::GzipWriter.wrap(full_gz) { |io| io.write("{\"x\":10}\n" * 100) }
127
+ # Write only the first half to simulate truncation
128
+ File.binwrite(bad_gz_path, full_gz.string[0, full_gz.string.bytesize / 2])
129
+
130
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-P", "2", '_["x"]', good_path, bad_gz_path)
131
+ assert_failure(status, "worker error causes non-zero exit")
132
+ assert_includes(stderr, bad_gz_path, "error message includes filename")
133
+ # Good file data should still be present
134
+ output_values = lines(stdout).map(&:to_i)
135
+ assert_includes(output_values, 1, "good file data preserved")
136
+ assert_includes(output_values, 2, "good file data preserved")
137
+ end
138
+ end
139
+
140
+ def test_parallel_requires_multiple_files
141
+ # With single file and -P, should still work (falls back to serial)
142
+ Dir.mktmpdir do |dir|
143
+ write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 2}])
144
+
145
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-P", "2", 'sum(_["x"])', *ndjson_files(dir))
146
+ assert_success(status, stderr, "single file with -P")
147
+ assert_equal(%w[3], lines(stdout), "single file with -P output")
148
+ end
149
+ end
150
+
151
+ def test_parallel_select_then_sum
152
+ Dir.mktmpdir do |dir|
153
+ write_ndjson(dir, "a.ndjson", [{"x" => 1}, {"x" => 20}, {"x" => 3}])
154
+ write_ndjson(dir, "b.ndjson", [{"x" => 40}, {"x" => 5}])
155
+
156
+ stdout, stderr, status = Open3.capture3("./exe/jrf", "-P", "2", 'select(_["x"] > 10) >> sum(_["x"])', *ndjson_files(dir))
157
+ assert_success(status, stderr, "parallel select then sum")
158
+ assert_equal(%w[60], lines(stdout), "parallel select then sum output")
159
+ end
160
+ end
161
+
162
+ def test_serial_error_includes_filename
163
+ Dir.mktmpdir do |dir|
164
+ good_path = File.join(dir, "a.ndjson")
165
+ File.write(good_path, "{\"x\":1}\n{\"x\":2}\n")
166
+
167
+ bad_gz_path = File.join(dir, "b.ndjson.gz")
168
+ full_gz = StringIO.new
169
+ Zlib::GzipWriter.wrap(full_gz) { |io| io.write("{\"x\":10}\n" * 100) }
170
+ File.binwrite(bad_gz_path, full_gz.string[0, full_gz.string.bytesize / 2])
171
+
172
+ good_path2 = File.join(dir, "c.ndjson")
173
+ File.write(good_path2, "{\"x\":3}\n")
174
+
175
+ stdout, stderr, status = Open3.capture3("./exe/jrf", '_["x"]', good_path, bad_gz_path, good_path2)
176
+ assert_failure(status, "serial error causes non-zero exit")
177
+ assert_includes(stderr, bad_gz_path, "serial error message includes filename")
178
+ refute_includes(stderr, "from ", "serial error does not include stacktrace")
179
+ # Data from good files should still be present
180
+ output_values = lines(stdout).map(&:to_i)
181
+ assert_includes(output_values, 1, "data before bad file preserved")
182
+ assert_includes(output_values, 3, "data after bad file preserved")
183
+ end
184
+ end
185
+
186
+ private
187
+
188
+ def write_ndjson(dir, name, rows)
189
+ File.write(File.join(dir, name), rows.map { |r| JSON.generate(r) + "\n" }.join)
190
+ end
191
+
192
+ def ndjson_files(dir)
193
+ Dir.glob(File.join(dir, "*.ndjson")).sort
194
+ end
195
+ end