csvops 0.3.0.alpha → 0.4.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +56 -142
- data/docs/architecture.md +266 -0
- data/docs/release-v0.4.0-alpha.md +87 -0
- data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +93 -0
- data/lib/csvtool/application/use_cases/run_extraction.rb +3 -3
- data/lib/csvtool/application/use_cases/run_row_extraction.rb +3 -3
- data/lib/csvtool/application/use_cases/run_row_randomization.rb +3 -3
- data/lib/csvtool/cli.rb +5 -1
- data/lib/csvtool/domain/cross_csv_dedupe_session/column_selector.rb +44 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session.rb +46 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/csv_profile.rb +24 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/key_mapping.rb +22 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/match_options.rb +29 -0
- data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +1 -0
- data/lib/csvtool/domain/row_session/row_source.rb +3 -0
- data/lib/csvtool/domain/{column_session → shared}/output_destination.rb +1 -1
- data/lib/csvtool/infrastructure/csv/cross_csv_deduper.rb +85 -0
- data/lib/csvtool/infrastructure/csv/selector_validator.rb +30 -0
- data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +163 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +113 -0
- data/test/csvtool/cli_test.rb +130 -16
- data/test/csvtool/cli_unit_test.rb +16 -3
- data/test/csvtool/domain/column_session/column_session_test.rb +2 -2
- data/test/csvtool/domain/column_session/csv_source_test.rb +10 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/column_selector_test.rb +42 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session_test.rb +75 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/csv_profile_test.rb +26 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/key_mapping_test.rb +31 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/match_options_test.rb +52 -0
- data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +2 -2
- data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +15 -1
- data/test/csvtool/domain/row_session/row_session_test.rb +2 -2
- data/test/csvtool/domain/row_session/row_source_test.rb +16 -0
- data/test/csvtool/domain/shared/output_destination_test.rb +24 -0
- data/test/csvtool/infrastructure/csv/cross_csv_deduper_test.rb +155 -0
- data/test/csvtool/infrastructure/csv/selector_validator_test.rb +72 -0
- data/test/csvtool/interface/cli/menu_loop_test.rb +50 -13
- data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +246 -0
- data/test/fixtures/dedupe_reference.csv +3 -0
- data/test/fixtures/dedupe_reference.tsv +3 -0
- data/test/fixtures/dedupe_reference_all.csv +5 -0
- data/test/fixtures/dedupe_reference_no_headers.csv +2 -0
- data/test/fixtures/dedupe_reference_none.csv +2 -0
- data/test/fixtures/dedupe_reference_normalization.csv +3 -0
- data/test/fixtures/dedupe_source.csv +6 -0
- data/test/fixtures/dedupe_source.tsv +6 -0
- data/test/fixtures/dedupe_source_no_headers.csv +5 -0
- data/test/fixtures/dedupe_source_normalization.csv +4 -0
- metadata +34 -8
- data/lib/csvtool/domain/row_randomization_session/randomization_output_destination.rb +0 -31
- data/lib/csvtool/domain/row_session/row_output_destination.rb +0 -31
- data/test/csvtool/domain/column_session/output_destination_test.rb +0 -18
- data/test/csvtool/domain/row_randomization_session/randomization_output_destination_test.rb +0 -21
- data/test/csvtool/domain/row_session/row_output_destination_test.rb +0 -23
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/application/use_cases/run_cross_csv_dedupe"
|
|
5
|
+
require "csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session"
|
|
6
|
+
require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
|
|
7
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
8
|
+
require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
|
|
9
|
+
require "csvtool/domain/cross_csv_dedupe_session/match_options"
|
|
10
|
+
require "csvtool/domain/shared/output_destination"
|
|
11
|
+
require "tmpdir"
|
|
12
|
+
|
|
13
|
+
class RunCrossCsvDedupeTest < Minitest::Test
|
|
14
|
+
def fixture_path(name)
|
|
15
|
+
File.expand_path("../../../fixtures/#{name}", __dir__)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def test_streams_retained_rows_to_callbacks
|
|
19
|
+
use_case = Csvtool::Application::UseCases::RunCrossCsvDedupe.new
|
|
20
|
+
headers = nil
|
|
21
|
+
rows = []
|
|
22
|
+
|
|
23
|
+
result = use_case.call(
|
|
24
|
+
session: build_session(
|
|
25
|
+
source_path: fixture_path("dedupe_source.csv"),
|
|
26
|
+
reference_path: fixture_path("dedupe_reference.csv"),
|
|
27
|
+
source_selector_input: "customer_id",
|
|
28
|
+
reference_selector_input: "external_id",
|
|
29
|
+
output_destination: Csvtool::Domain::Shared::OutputDestination.console
|
|
30
|
+
),
|
|
31
|
+
on_header: ->(value) { headers = value },
|
|
32
|
+
on_row: ->(fields) { rows << fields }
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
assert_equal true, result.ok?
|
|
36
|
+
assert_equal ["customer_id", "name"], headers
|
|
37
|
+
assert_equal [%w[1 Alice], %w[3 Cara]], rows
|
|
38
|
+
assert_equal 5, result.data[:stats][:source_rows]
|
|
39
|
+
assert_equal 3, result.data[:stats][:removed_rows]
|
|
40
|
+
assert_equal 2, result.data[:stats][:kept_rows_count]
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def test_writes_to_file_output_destination
|
|
44
|
+
use_case = Csvtool::Application::UseCases::RunCrossCsvDedupe.new
|
|
45
|
+
|
|
46
|
+
Dir.mktmpdir do |dir|
|
|
47
|
+
output_path = File.join(dir, "deduped.csv")
|
|
48
|
+
result = use_case.call(
|
|
49
|
+
session: build_session(
|
|
50
|
+
source_path: fixture_path("dedupe_source.csv"),
|
|
51
|
+
reference_path: fixture_path("dedupe_reference.csv"),
|
|
52
|
+
source_selector_input: "customer_id",
|
|
53
|
+
reference_selector_input: "external_id",
|
|
54
|
+
output_destination: Csvtool::Domain::Shared::OutputDestination.file(path: output_path)
|
|
55
|
+
)
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
assert_equal true, result.ok?
|
|
59
|
+
assert_equal output_path, result.data[:output_path]
|
|
60
|
+
assert_equal "customer_id,name\n1,Alice\n3,Cara\n", File.read(output_path)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def test_returns_column_not_found_when_selector_invalid
|
|
65
|
+
use_case = Csvtool::Application::UseCases::RunCrossCsvDedupe.new
|
|
66
|
+
|
|
67
|
+
result = use_case.call(
|
|
68
|
+
session: build_session(
|
|
69
|
+
source_path: fixture_path("dedupe_source.csv"),
|
|
70
|
+
reference_path: fixture_path("dedupe_reference.csv"),
|
|
71
|
+
source_selector_input: "missing",
|
|
72
|
+
reference_selector_input: "external_id",
|
|
73
|
+
output_destination: Csvtool::Domain::Shared::OutputDestination.console
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
assert_equal false, result.ok?
|
|
78
|
+
assert_equal :column_not_found, result.error
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
private
|
|
82
|
+
|
|
83
|
+
def build_session(source_path:, reference_path:, source_selector_input:, reference_selector_input:, output_destination:)
|
|
84
|
+
source = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
85
|
+
path: source_path,
|
|
86
|
+
separator: ",",
|
|
87
|
+
headers_present: true
|
|
88
|
+
)
|
|
89
|
+
reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
90
|
+
path: reference_path,
|
|
91
|
+
separator: ",",
|
|
92
|
+
headers_present: true
|
|
93
|
+
)
|
|
94
|
+
key_mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
95
|
+
source_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(
|
|
96
|
+
headers_present: true,
|
|
97
|
+
input: source_selector_input
|
|
98
|
+
),
|
|
99
|
+
reference_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(
|
|
100
|
+
headers_present: true,
|
|
101
|
+
input: reference_selector_input
|
|
102
|
+
)
|
|
103
|
+
)
|
|
104
|
+
match_options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
105
|
+
trim_whitespace: true,
|
|
106
|
+
case_insensitive: false
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
Csvtool::Domain::CrossCsvDedupeSession::CrossCsvDedupeSession
|
|
110
|
+
.start(source: source, reference: reference, key_mapping: key_mapping, match_options: match_options)
|
|
111
|
+
.with_output_destination(output_destination)
|
|
112
|
+
end
|
|
113
|
+
end
|
data/test/csvtool/cli_test.rb
CHANGED
|
@@ -11,7 +11,7 @@ class TestCli < Minitest::Test
|
|
|
11
11
|
|
|
12
12
|
def test_menu_can_exit_cleanly
|
|
13
13
|
output = StringIO.new
|
|
14
|
-
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("
|
|
14
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("5\n"), stdout: output, stderr: StringIO.new)
|
|
15
15
|
assert_equal 0, status
|
|
16
16
|
assert_includes output.string, "CSV Tool Menu"
|
|
17
17
|
end
|
|
@@ -26,7 +26,7 @@ class TestCli < Minitest::Test
|
|
|
26
26
|
"",
|
|
27
27
|
"y",
|
|
28
28
|
"",
|
|
29
|
-
"
|
|
29
|
+
"5"
|
|
30
30
|
].join("\n") + "\n"
|
|
31
31
|
|
|
32
32
|
output = StringIO.new
|
|
@@ -58,7 +58,7 @@ class TestCli < Minitest::Test
|
|
|
58
58
|
"2",
|
|
59
59
|
"3",
|
|
60
60
|
"",
|
|
61
|
-
"
|
|
61
|
+
"5"
|
|
62
62
|
].join("\n") + "\n"
|
|
63
63
|
|
|
64
64
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -79,7 +79,7 @@ class TestCli < Minitest::Test
|
|
|
79
79
|
"0",
|
|
80
80
|
"3",
|
|
81
81
|
"",
|
|
82
|
-
"
|
|
82
|
+
"5"
|
|
83
83
|
].join("\n") + "\n"
|
|
84
84
|
|
|
85
85
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -98,7 +98,7 @@ class TestCli < Minitest::Test
|
|
|
98
98
|
"2",
|
|
99
99
|
"3",
|
|
100
100
|
"",
|
|
101
|
-
"
|
|
101
|
+
"5"
|
|
102
102
|
].join("\n") + "\n"
|
|
103
103
|
|
|
104
104
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -119,7 +119,7 @@ class TestCli < Minitest::Test
|
|
|
119
119
|
"2",
|
|
120
120
|
"3",
|
|
121
121
|
"",
|
|
122
|
-
"
|
|
122
|
+
"5"
|
|
123
123
|
].join("\n") + "\n"
|
|
124
124
|
|
|
125
125
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -144,7 +144,7 @@ class TestCli < Minitest::Test
|
|
|
144
144
|
"3",
|
|
145
145
|
"2",
|
|
146
146
|
output_path,
|
|
147
|
-
"
|
|
147
|
+
"5"
|
|
148
148
|
].join("\n") + "\n"
|
|
149
149
|
|
|
150
150
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -164,7 +164,7 @@ class TestCli < Minitest::Test
|
|
|
164
164
|
"1",
|
|
165
165
|
"2",
|
|
166
166
|
"",
|
|
167
|
-
"
|
|
167
|
+
"5"
|
|
168
168
|
].join("\n") + "\n"
|
|
169
169
|
|
|
170
170
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -184,7 +184,7 @@ class TestCli < Minitest::Test
|
|
|
184
184
|
"",
|
|
185
185
|
"",
|
|
186
186
|
"",
|
|
187
|
-
"
|
|
187
|
+
"5"
|
|
188
188
|
].join("\n") + "\n"
|
|
189
189
|
|
|
190
190
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -209,7 +209,7 @@ class TestCli < Minitest::Test
|
|
|
209
209
|
"",
|
|
210
210
|
"2",
|
|
211
211
|
output_path,
|
|
212
|
-
"
|
|
212
|
+
"5"
|
|
213
213
|
].join("\n") + "\n"
|
|
214
214
|
|
|
215
215
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -231,7 +231,7 @@ class TestCli < Minitest::Test
|
|
|
231
231
|
"",
|
|
232
232
|
"",
|
|
233
233
|
"",
|
|
234
|
-
"
|
|
234
|
+
"5"
|
|
235
235
|
].join("\n") + "\n"
|
|
236
236
|
|
|
237
237
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -250,7 +250,7 @@ class TestCli < Minitest::Test
|
|
|
250
250
|
"n",
|
|
251
251
|
"",
|
|
252
252
|
"",
|
|
253
|
-
"
|
|
253
|
+
"5"
|
|
254
254
|
].join("\n") + "\n"
|
|
255
255
|
|
|
256
256
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -270,7 +270,7 @@ class TestCli < Minitest::Test
|
|
|
270
270
|
"",
|
|
271
271
|
"",
|
|
272
272
|
"abc",
|
|
273
|
-
"
|
|
273
|
+
"5"
|
|
274
274
|
].join("\n") + "\n"
|
|
275
275
|
|
|
276
276
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -280,6 +280,120 @@ class TestCli < Minitest::Test
|
|
|
280
280
|
assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
|
|
281
281
|
end
|
|
282
282
|
|
|
283
|
+
def test_dedupe_workflow_shell_prompts_and_returns_to_menu
|
|
284
|
+
output = StringIO.new
|
|
285
|
+
input = [
|
|
286
|
+
"4",
|
|
287
|
+
fixture_path("dedupe_source.csv"),
|
|
288
|
+
"",
|
|
289
|
+
"",
|
|
290
|
+
fixture_path("dedupe_reference.csv"),
|
|
291
|
+
"",
|
|
292
|
+
"",
|
|
293
|
+
"customer_id",
|
|
294
|
+
"external_id",
|
|
295
|
+
"",
|
|
296
|
+
"",
|
|
297
|
+
"",
|
|
298
|
+
"5"
|
|
299
|
+
].join("\n") + "\n"
|
|
300
|
+
|
|
301
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
302
|
+
|
|
303
|
+
assert_equal 0, status
|
|
304
|
+
assert_includes output.string, "Reference CSV file path:"
|
|
305
|
+
assert_includes output.string, "Source key column name:"
|
|
306
|
+
assert_includes output.string, "Reference key column name:"
|
|
307
|
+
assert_includes output.string, "customer_id,name"
|
|
308
|
+
assert_includes output.string, "1,Alice"
|
|
309
|
+
assert_includes output.string, "3,Cara"
|
|
310
|
+
assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def test_dedupe_workflow_can_write_to_file
|
|
314
|
+
output = StringIO.new
|
|
315
|
+
|
|
316
|
+
Dir.mktmpdir do |dir|
|
|
317
|
+
output_path = File.join(dir, "deduped.csv")
|
|
318
|
+
input = [
|
|
319
|
+
"4",
|
|
320
|
+
fixture_path("dedupe_source.csv"),
|
|
321
|
+
"",
|
|
322
|
+
"",
|
|
323
|
+
fixture_path("dedupe_reference.csv"),
|
|
324
|
+
"",
|
|
325
|
+
"",
|
|
326
|
+
"customer_id",
|
|
327
|
+
"external_id",
|
|
328
|
+
"",
|
|
329
|
+
"",
|
|
330
|
+
"2",
|
|
331
|
+
output_path,
|
|
332
|
+
"5"
|
|
333
|
+
].join("\n") + "\n"
|
|
334
|
+
|
|
335
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
336
|
+
|
|
337
|
+
assert_equal 0, status
|
|
338
|
+
assert_includes output.string, "Wrote output to #{output_path}"
|
|
339
|
+
assert_equal "customer_id,name\n1,Alice\n3,Cara\n", File.read(output_path)
|
|
340
|
+
assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def test_dedupe_workflow_supports_tsv_separators
|
|
345
|
+
output = StringIO.new
|
|
346
|
+
input = [
|
|
347
|
+
"4",
|
|
348
|
+
fixture_path("dedupe_source.tsv"),
|
|
349
|
+
"2",
|
|
350
|
+
"",
|
|
351
|
+
fixture_path("dedupe_reference.tsv"),
|
|
352
|
+
"2",
|
|
353
|
+
"",
|
|
354
|
+
"customer_id",
|
|
355
|
+
"external_id",
|
|
356
|
+
"",
|
|
357
|
+
"",
|
|
358
|
+
"",
|
|
359
|
+
"5"
|
|
360
|
+
].join("\n") + "\n"
|
|
361
|
+
|
|
362
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
363
|
+
|
|
364
|
+
assert_equal 0, status
|
|
365
|
+
assert_includes output.string, "customer_id\tname"
|
|
366
|
+
assert_includes output.string, "1\tAlice"
|
|
367
|
+
assert_includes output.string, "3\tCara"
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
def test_dedupe_workflow_headerless_mode_supports_index
|
|
371
|
+
output = StringIO.new
|
|
372
|
+
input = [
|
|
373
|
+
"4",
|
|
374
|
+
fixture_path("dedupe_source_no_headers.csv"),
|
|
375
|
+
"",
|
|
376
|
+
"n",
|
|
377
|
+
fixture_path("dedupe_reference_no_headers.csv"),
|
|
378
|
+
"",
|
|
379
|
+
"n",
|
|
380
|
+
"1",
|
|
381
|
+
"1",
|
|
382
|
+
"",
|
|
383
|
+
"",
|
|
384
|
+
"",
|
|
385
|
+
"5"
|
|
386
|
+
].join("\n") + "\n"
|
|
387
|
+
|
|
388
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
389
|
+
|
|
390
|
+
assert_equal 0, status
|
|
391
|
+
refute_includes output.string, "customer_id,name"
|
|
392
|
+
assert_includes output.string, "1,Alice"
|
|
393
|
+
assert_includes output.string, "3,Cara"
|
|
394
|
+
assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
395
|
+
end
|
|
396
|
+
|
|
283
397
|
def test_end_to_end_file_output_writes_expected_csv
|
|
284
398
|
output = StringIO.new
|
|
285
399
|
output_path = nil
|
|
@@ -296,7 +410,7 @@ class TestCli < Minitest::Test
|
|
|
296
410
|
"y",
|
|
297
411
|
"2",
|
|
298
412
|
output_path,
|
|
299
|
-
"
|
|
413
|
+
"5"
|
|
300
414
|
].join("\n") + "\n"
|
|
301
415
|
|
|
302
416
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -316,7 +430,7 @@ class TestCli < Minitest::Test
|
|
|
316
430
|
"1",
|
|
317
431
|
"",
|
|
318
432
|
"n",
|
|
319
|
-
"
|
|
433
|
+
"5"
|
|
320
434
|
].join("\n") + "\n"
|
|
321
435
|
|
|
322
436
|
output = StringIO.new
|
|
@@ -352,7 +466,7 @@ class TestCli < Minitest::Test
|
|
|
352
466
|
"y",
|
|
353
467
|
"2",
|
|
354
468
|
"/tmp/not-a-dir/out.csv",
|
|
355
|
-
"
|
|
469
|
+
"5"
|
|
356
470
|
].join("\n") + "\n"
|
|
357
471
|
|
|
358
472
|
output = StringIO.new
|
|
@@ -16,7 +16,7 @@ class CliUnitTest < Minitest::Test
|
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
def test_menu_command_can_exit_zero
|
|
19
|
-
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("
|
|
19
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("5\n"), stdout: StringIO.new, stderr: StringIO.new)
|
|
20
20
|
assert_equal 0, status
|
|
21
21
|
end
|
|
22
22
|
|
|
@@ -28,7 +28,7 @@ class CliUnitTest < Minitest::Test
|
|
|
28
28
|
def test_menu_routes_to_row_range_shell
|
|
29
29
|
stdout = StringIO.new
|
|
30
30
|
fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
|
|
31
|
-
input = ["2", fixture, "", "2", "3", "", "
|
|
31
|
+
input = ["2", fixture, "", "2", "3", "", "5"].join("\n") + "\n"
|
|
32
32
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
|
|
33
33
|
assert_equal 0, status
|
|
34
34
|
assert_includes stdout.string, "name,city"
|
|
@@ -39,7 +39,7 @@ class CliUnitTest < Minitest::Test
|
|
|
39
39
|
def test_menu_routes_to_randomize_rows_shell
|
|
40
40
|
stdout = StringIO.new
|
|
41
41
|
fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
|
|
42
|
-
input = ["3", fixture, "", "", "", "", "
|
|
42
|
+
input = ["3", fixture, "", "", "", "", "5"].join("\n") + "\n"
|
|
43
43
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
|
|
44
44
|
assert_equal 0, status
|
|
45
45
|
assert_includes stdout.string, "name,city"
|
|
@@ -47,4 +47,17 @@ class CliUnitTest < Minitest::Test
|
|
|
47
47
|
assert_includes stdout.string, "Bob,Paris"
|
|
48
48
|
assert_includes stdout.string, "Cara,Berlin"
|
|
49
49
|
end
|
|
50
|
+
|
|
51
|
+
def test_menu_routes_to_dedupe_shell
|
|
52
|
+
stdout = StringIO.new
|
|
53
|
+
source_fixture = File.expand_path("../fixtures/dedupe_source.csv", __dir__)
|
|
54
|
+
reference_fixture = File.expand_path("../fixtures/dedupe_reference.csv", __dir__)
|
|
55
|
+
input = ["4", source_fixture, "", "", reference_fixture, "", "", "customer_id", "external_id", "", "", "", "5"].join("\n") + "\n"
|
|
56
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
|
|
57
|
+
assert_equal 0, status
|
|
58
|
+
assert_includes stdout.string, "customer_id,name"
|
|
59
|
+
assert_includes stdout.string, "1,Alice"
|
|
60
|
+
assert_includes stdout.string, "3,Cara"
|
|
61
|
+
assert_includes stdout.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
62
|
+
end
|
|
50
63
|
end
|
|
@@ -8,7 +8,7 @@ require "csvtool/domain/column_session/column_selection"
|
|
|
8
8
|
require "csvtool/domain/column_session/extraction_options"
|
|
9
9
|
require "csvtool/domain/column_session/preview"
|
|
10
10
|
require "csvtool/domain/column_session/extraction_value"
|
|
11
|
-
require "csvtool/domain/
|
|
11
|
+
require "csvtool/domain/shared/output_destination"
|
|
12
12
|
|
|
13
13
|
class ColumnSessionTest < Minitest::Test
|
|
14
14
|
def test_state_transitions
|
|
@@ -25,7 +25,7 @@ class ColumnSessionTest < Minitest::Test
|
|
|
25
25
|
values: [Csvtool::Domain::ColumnSession::ExtractionValue.new("Alice")]
|
|
26
26
|
)
|
|
27
27
|
session = session.with_preview(preview).confirm!.with_output_destination(
|
|
28
|
-
Csvtool::Domain::
|
|
28
|
+
Csvtool::Domain::Shared::OutputDestination.console
|
|
29
29
|
)
|
|
30
30
|
|
|
31
31
|
assert_equal true, session.confirmed?
|
|
@@ -11,4 +11,14 @@ class CsvSourceTest < Minitest::Test
|
|
|
11
11
|
assert_equal "/tmp/a.csv", source.path
|
|
12
12
|
assert_equal separator, source.separator
|
|
13
13
|
end
|
|
14
|
+
|
|
15
|
+
def test_rejects_empty_path
|
|
16
|
+
separator = Csvtool::Domain::ColumnSession::Separator.new(",")
|
|
17
|
+
|
|
18
|
+
error = assert_raises(ArgumentError) do
|
|
19
|
+
Csvtool::Domain::ColumnSession::CsvSource.new(path: "", separator: separator)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
assert_equal "path cannot be empty", error.message
|
|
23
|
+
end
|
|
14
24
|
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
5
|
+
|
|
6
|
+
class CrossCsvDedupeColumnSelectorTest < Minitest::Test
|
|
7
|
+
def test_builds_header_selector_from_input
|
|
8
|
+
selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "customer_id")
|
|
9
|
+
|
|
10
|
+
assert_equal "customer_id", selector.value
|
|
11
|
+
assert_equal true, selector.headers_present?
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def test_builds_index_selector_from_input
|
|
15
|
+
selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "2")
|
|
16
|
+
|
|
17
|
+
assert_equal 2, selector.value
|
|
18
|
+
assert_equal true, selector.index?
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def test_rejects_invalid_index_input
|
|
22
|
+
error = assert_raises(ArgumentError) do
|
|
23
|
+
Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "0")
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
assert_equal "column index must be a positive integer", error.message
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def test_extracts_from_headered_row
|
|
30
|
+
selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "customer_id")
|
|
31
|
+
row = { "customer_id" => "42" }
|
|
32
|
+
|
|
33
|
+
assert_equal "42", selector.extract_from(row)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def test_extracts_from_headerless_row_by_index
|
|
37
|
+
selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "2")
|
|
38
|
+
row = ["a", "b", "c"]
|
|
39
|
+
|
|
40
|
+
assert_equal "b", selector.extract_from(row)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session"
|
|
5
|
+
require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
|
|
6
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
7
|
+
require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
|
|
8
|
+
require "csvtool/domain/cross_csv_dedupe_session/match_options"
|
|
9
|
+
require "csvtool/domain/shared/output_destination"
|
|
10
|
+
|
|
11
|
+
class CrossCsvDedupeSessionTest < Minitest::Test
|
|
12
|
+
def test_start_and_with_output_destination
|
|
13
|
+
source = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
14
|
+
path: "/tmp/source.csv",
|
|
15
|
+
separator: ",",
|
|
16
|
+
headers_present: true
|
|
17
|
+
)
|
|
18
|
+
reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
19
|
+
path: "/tmp/reference.csv",
|
|
20
|
+
separator: ",",
|
|
21
|
+
headers_present: true
|
|
22
|
+
)
|
|
23
|
+
key_mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
24
|
+
source_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id"),
|
|
25
|
+
reference_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "reference_id")
|
|
26
|
+
)
|
|
27
|
+
match_options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
28
|
+
trim_whitespace: true,
|
|
29
|
+
case_insensitive: false
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
session = Csvtool::Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
|
|
33
|
+
source: source,
|
|
34
|
+
reference: reference,
|
|
35
|
+
key_mapping: key_mapping,
|
|
36
|
+
match_options: match_options
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
destination = Csvtool::Domain::Shared::OutputDestination.console
|
|
40
|
+
updated = session.with_output_destination(destination)
|
|
41
|
+
|
|
42
|
+
assert_equal source, updated.source
|
|
43
|
+
assert_equal reference, updated.reference
|
|
44
|
+
assert_equal key_mapping, updated.key_mapping
|
|
45
|
+
assert_equal match_options, updated.match_options
|
|
46
|
+
assert_equal destination, updated.output_destination
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def test_rejects_invalid_source_type
|
|
50
|
+
reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
51
|
+
path: "/tmp/reference.csv",
|
|
52
|
+
separator: ",",
|
|
53
|
+
headers_present: true
|
|
54
|
+
)
|
|
55
|
+
key_mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
56
|
+
source_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id"),
|
|
57
|
+
reference_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "reference_id")
|
|
58
|
+
)
|
|
59
|
+
match_options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
60
|
+
trim_whitespace: true,
|
|
61
|
+
case_insensitive: false
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
error = assert_raises(ArgumentError) do
|
|
65
|
+
Csvtool::Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
|
|
66
|
+
source: "bad",
|
|
67
|
+
reference: reference,
|
|
68
|
+
key_mapping: key_mapping,
|
|
69
|
+
match_options: match_options
|
|
70
|
+
)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
assert_equal "source must be CsvProfile", error.message
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
|
|
5
|
+
|
|
6
|
+
class CrossCsvDedupeCsvProfileTest < Minitest::Test
|
|
7
|
+
def test_initializes_with_expected_fields
|
|
8
|
+
profile = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
9
|
+
path: "/tmp/source.csv",
|
|
10
|
+
separator: ",",
|
|
11
|
+
headers_present: true
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
assert_equal "/tmp/source.csv", profile.path
|
|
15
|
+
assert_equal ",", profile.separator
|
|
16
|
+
assert_equal true, profile.headers_present?
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def test_requires_path
|
|
20
|
+
error = assert_raises(ArgumentError) do
|
|
21
|
+
Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(path: "", separator: ",", headers_present: true)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
assert_equal "path cannot be empty", error.message
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
|
|
5
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
6
|
+
|
|
7
|
+
class CrossCsvDedupeKeyMappingTest < Minitest::Test
|
|
8
|
+
def test_holds_source_and_reference_selectors
|
|
9
|
+
source_selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id")
|
|
10
|
+
reference_selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "ref_id")
|
|
11
|
+
|
|
12
|
+
mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
13
|
+
source_selector: source_selector,
|
|
14
|
+
reference_selector: reference_selector
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
assert_equal source_selector, mapping.source_selector
|
|
18
|
+
assert_equal reference_selector, mapping.reference_selector
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def test_rejects_non_selector_inputs
|
|
22
|
+
error = assert_raises(ArgumentError) do
|
|
23
|
+
Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
24
|
+
source_selector: "id",
|
|
25
|
+
reference_selector: "external_id"
|
|
26
|
+
)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
assert_equal "selectors must be ColumnSelector", error.message
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/match_options"
|
|
5
|
+
|
|
6
|
+
class CrossCsvDedupeMatchOptionsTest < Minitest::Test
|
|
7
|
+
def test_predicates_return_boolean_flags
|
|
8
|
+
options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
9
|
+
trim_whitespace: true,
|
|
10
|
+
case_insensitive: false
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
assert_equal true, options.trim_whitespace?
|
|
14
|
+
assert_equal false, options.case_insensitive?
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def test_normalize_trim_on_case_off
|
|
18
|
+
options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
19
|
+
trim_whitespace: true,
|
|
20
|
+
case_insensitive: false
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
assert_equal "AbC", options.normalize(" AbC ")
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def test_normalize_trim_on_case_on
|
|
27
|
+
options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
28
|
+
trim_whitespace: true,
|
|
29
|
+
case_insensitive: true
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
assert_equal "abc", options.normalize(" AbC ")
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def test_normalize_trim_off_case_on
|
|
36
|
+
options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
37
|
+
trim_whitespace: false,
|
|
38
|
+
case_insensitive: true
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
assert_equal " abc ", options.normalize(" AbC ")
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def test_normalize_trim_off_case_off
|
|
45
|
+
options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
46
|
+
trim_whitespace: false,
|
|
47
|
+
case_insensitive: false
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
assert_equal " AbC ", options.normalize(" AbC ")
|
|
51
|
+
end
|
|
52
|
+
end
|