csvops 0.7.0.alpha → 0.9.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +80 -20
- data/docs/architecture.md +67 -4
- data/docs/cli-output-conventions.md +49 -0
- data/docs/release-v0.8.0-alpha.md +88 -0
- data/docs/release-v0.9.0-alpha.md +80 -0
- data/lib/csvtool/application/use_cases/run_csv_stats.rb +64 -0
- data/lib/csvtool/cli.rb +136 -12
- data/lib/csvtool/domain/csv_stats_session/stats_options.rb +11 -0
- data/lib/csvtool/domain/csv_stats_session/stats_session.rb +25 -0
- data/lib/csvtool/domain/csv_stats_session/stats_source.rb +17 -0
- data/lib/csvtool/infrastructure/csv/csv_stats_scanner.rb +67 -0
- data/lib/csvtool/infrastructure/output/csv_stats_file_writer.rb +26 -0
- data/lib/csvtool/interface/cli/menu_loop.rb +9 -5
- data/lib/csvtool/interface/cli/output/color_policy.rb +25 -0
- data/lib/csvtool/interface/cli/output/colorizer.rb +27 -0
- data/lib/csvtool/interface/cli/output/formatters/csv_row_formatter.rb +19 -0
- data/lib/csvtool/interface/cli/output/formatters/stats_formatter.rb +57 -0
- data/lib/csvtool/interface/cli/output/streams.rb +22 -0
- data/lib/csvtool/interface/cli/output/table_renderer.rb +70 -0
- data/lib/csvtool/interface/cli/workflows/builders/csv_stats_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +17 -5
- data/lib/csvtool/interface/cli/workflows/presenters/csv_parity_presenter.rb +15 -4
- data/lib/csvtool/interface/cli/workflows/presenters/csv_split_presenter.rb +15 -6
- data/lib/csvtool/interface/cli/workflows/presenters/csv_stats_presenter.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +5 -4
- data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +5 -4
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +9 -8
- data/lib/csvtool/interface/cli/workflows/run_csv_parity_workflow.rb +6 -5
- data/lib/csvtool/interface/cli/workflows/run_csv_split_workflow.rb +11 -10
- data/lib/csvtool/interface/cli/workflows/run_csv_stats_workflow.rb +78 -0
- data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +9 -8
- data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +7 -6
- data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +8 -7
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step.rb +27 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step.rb +31 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/execute_step.rb +27 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/run_csv_stats_test.rb +165 -0
- data/test/csvtool/cli_test.rb +376 -68
- data/test/csvtool/cli_unit_test.rb +5 -5
- data/test/csvtool/infrastructure/csv/csv_stats_scanner_test.rb +68 -0
- data/test/csvtool/infrastructure/output/csv_stats_file_writer_test.rb +38 -0
- data/test/csvtool/interface/cli/menu_loop_test.rb +34 -11
- data/test/csvtool/interface/cli/output/color_policy_test.rb +40 -0
- data/test/csvtool/interface/cli/output/colorizer_test.rb +28 -0
- data/test/csvtool/interface/cli/output/formatters/csv_row_formatter_test.rb +22 -0
- data/test/csvtool/interface/cli/output/formatters/stats_formatter_test.rb +51 -0
- data/test/csvtool/interface/cli/output/streams_test.rb +25 -0
- data/test/csvtool/interface/cli/output/table_renderer_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/builders/csv_stats_session_builder_test.rb +19 -0
- data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +4 -1
- data/test/csvtool/interface/cli/workflows/presenters/csv_parity_presenter_test.rb +5 -1
- data/test/csvtool/interface/cli/workflows/presenters/csv_split_presenter_test.rb +22 -4
- data/test/csvtool/interface/cli/workflows/presenters/csv_stats_presenter_test.rb +39 -0
- data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +10 -7
- data/test/csvtool/interface/cli/workflows/run_csv_parity_workflow_test.rb +3 -1
- data/test/csvtool/interface/cli/workflows/run_csv_split_workflow_test.rb +5 -3
- data/test/csvtool/interface/cli/workflows/run_csv_stats_workflow_test.rb +151 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step_test.rb +49 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step_test.rb +61 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/execute_step_test.rb +65 -0
- metadata +39 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 940c2492f5bea33d56ad0a47ebf6933cb2e43817530aa04d4e02950affe9d493
|
|
4
|
+
data.tar.gz: 9c32a162b4393f25e99b55e908df2b9dcb55099ce0d820aa3255c310fc09d983
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3babceb657e3e3c366c19daa305fcffe18fca1e145dafa8d8f66f022531e17fc21f0788da7b27a0d76e17e07c88fa098105d20116353cced5e4b207870a0f88e
|
|
7
|
+
data.tar.gz: a8efe4b0a86dd3e303ca8946a0b25b3dbcffb1bbeb76823910d43ab8ff1ba4fc7670f1a2c16e135941caf74a3a216f988010bd0178b892a60eb9bd82963c4596
|
data/README.md
CHANGED
|
@@ -39,30 +39,28 @@ CSV Tool Menu
|
|
|
39
39
|
4. Dedupe using another CSV
|
|
40
40
|
5. Validate parity
|
|
41
41
|
6. Split CSV into chunks
|
|
42
|
-
7.
|
|
42
|
+
7. CSV stats summary
|
|
43
|
+
8. Exit
|
|
43
44
|
>
|
|
44
45
|
```
|
|
45
46
|
|
|
46
|
-
Select `1` for column extraction, `2` for row-range extraction, `3` for row randomization, `4` for cross-CSV dedupe, `5` for parity validation, or `
|
|
47
|
+
Select `1` for column extraction, `2` for row-range extraction, `3` for row randomization, `4` for cross-CSV dedupe, `5` for parity validation, `6` for CSV splitting, or `7` for CSV stats.
|
|
47
48
|
|
|
48
49
|
### 3. Follow prompts
|
|
49
50
|
|
|
50
|
-
Each
|
|
51
|
+
Each action asks only for what it needs (file path, separator, and any action-specific options), then prints results to the console or writes to a file when selected.
|
|
51
52
|
|
|
52
|
-
|
|
53
|
+
Typical prompt pattern:
|
|
53
54
|
|
|
54
|
-
-
|
|
55
|
-
-
|
|
56
|
-
-
|
|
55
|
+
- choose source file(s)
|
|
56
|
+
- choose separator/header options when relevant
|
|
57
|
+
- choose action-specific options
|
|
58
|
+
- choose output destination (console or file)
|
|
57
59
|
|
|
58
|
-
|
|
60
|
+
For architecture and internal design details, see:
|
|
59
61
|
|
|
60
|
-
- `
|
|
61
|
-
- `
|
|
62
|
-
- `Randomize rows`: file path, separator, headers present, optional seed, output destination.
|
|
63
|
-
- `Dedupe using another CSV`: source/reference files, separators, header modes, key selectors, match options, output destination.
|
|
64
|
-
- `Validate parity`: left/right files, separator, header mode, parity summary, mismatch samples.
|
|
65
|
-
- `Split CSV into chunks`: source file, separator, header mode, chunk size, output directory/prefix, overwrite policy, optional manifest.
|
|
62
|
+
- [`docs/architecture.md`](docs/architecture.md)
|
|
63
|
+
- [`docs/cli-output-conventions.md`](docs/cli-output-conventions.md)
|
|
66
64
|
|
|
67
65
|
### 4. Example interaction (console output)
|
|
68
66
|
|
|
@@ -123,6 +121,22 @@ With Bundler:
|
|
|
123
121
|
bundle exec csvtool column /path/to/file.csv column_name
|
|
124
122
|
```
|
|
125
123
|
|
|
124
|
+
Get CSV stats directly (default text output):
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
csvtool stats /path/to/file.csv
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Optional output format and color mode:
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
csvtool stats /path/to/file.csv --format json
|
|
134
|
+
csvtool stats /path/to/file.csv --format csv
|
|
135
|
+
csvtool stats /path/to/file.csv --color auto
|
|
136
|
+
csvtool stats /path/to/file.csv --color always
|
|
137
|
+
csvtool stats /path/to/file.csv --color never
|
|
138
|
+
```
|
|
139
|
+
|
|
126
140
|
### 7. Dedupe interaction example
|
|
127
141
|
|
|
128
142
|
Legend: ` ` = prompt/menu, `+` = user input, `-` = tool output
|
|
@@ -134,8 +148,9 @@ Legend: ` ` = prompt/menu, `+` = user input, `-` = tool output
|
|
|
134
148
|
3. Randomize rows
|
|
135
149
|
4. Dedupe using another CSV
|
|
136
150
|
5. Validate parity
|
|
137
|
-
|
|
138
|
-
7.
|
|
151
|
+
6. Split CSV into chunks
|
|
152
|
+
7. CSV stats summary
|
|
153
|
+
8. Exit
|
|
139
154
|
+> 4
|
|
140
155
|
CSV file path: /tmp/source.csv
|
|
141
156
|
Source CSV separator:
|
|
@@ -184,7 +199,8 @@ Legend: ` ` = prompt/menu, `+` = user input, `-` = tool output
|
|
|
184
199
|
4. Dedupe using another CSV
|
|
185
200
|
5. Validate parity
|
|
186
201
|
6. Split CSV into chunks
|
|
187
|
-
7.
|
|
202
|
+
7. CSV stats summary
|
|
203
|
+
8. Exit
|
|
188
204
|
+> 5
|
|
189
205
|
Left CSV file path: /tmp/left.csv
|
|
190
206
|
Right CSV file path: /tmp/right.csv
|
|
@@ -224,7 +240,8 @@ Legend: ` ` = prompt/menu, `+` = user input, `-` = tool output
|
|
|
224
240
|
4. Dedupe using another CSV
|
|
225
241
|
5. Validate parity
|
|
226
242
|
6. Split CSV into chunks
|
|
227
|
-
7.
|
|
243
|
+
7. CSV stats summary
|
|
244
|
+
8. Exit
|
|
228
245
|
+> 6
|
|
229
246
|
Source CSV file path: /tmp/people.csv
|
|
230
247
|
Choose separator:
|
|
@@ -247,6 +264,48 @@ Legend: ` ` = prompt/menu, `+` = user input, `-` = tool output
|
|
|
247
264
|
-/tmp/people_part_001.csv
|
|
248
265
|
```
|
|
249
266
|
|
|
267
|
+
### 11. CSV stats interaction example
|
|
268
|
+
|
|
269
|
+
Legend: ` ` = prompt/menu, `+` = user input, `-` = tool output
|
|
270
|
+
|
|
271
|
+
```diff
|
|
272
|
+
CSV Tool Menu
|
|
273
|
+
1. Extract column
|
|
274
|
+
2. Extract rows (range)
|
|
275
|
+
3. Randomize rows
|
|
276
|
+
4. Dedupe using another CSV
|
|
277
|
+
5. Validate parity
|
|
278
|
+
6. Split CSV into chunks
|
|
279
|
+
7. CSV stats summary
|
|
280
|
+
8. Exit
|
|
281
|
+
+> 7
|
|
282
|
+
CSV file path: /tmp/people.csv
|
|
283
|
+
Choose separator:
|
|
284
|
+
1. comma (,)
|
|
285
|
+
2. tab (\t)
|
|
286
|
+
3. semicolon (;)
|
|
287
|
+
4. pipe (|)
|
|
288
|
+
5. custom
|
|
289
|
+
+Separator choice [1]: 1
|
|
290
|
+
Headers present? [Y/n]:
|
|
291
|
+
Output destination:
|
|
292
|
+
1. console
|
|
293
|
+
2. file
|
|
294
|
+
+Output destination [1]: 1
|
|
295
|
+
-CSV Stats Summary
|
|
296
|
+
-Rows: 3
|
|
297
|
+
-Columns: 2
|
|
298
|
+
-Headers: name, city
|
|
299
|
+
-Column completeness:
|
|
300
|
+
- name: non_blank=3 blank=0
|
|
301
|
+
- city: non_blank=3 blank=0
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
### 12. CSV stats large-file behavior
|
|
305
|
+
|
|
306
|
+
- Stats scanning is streaming (`CSV.foreach`), processed in one pass.
|
|
307
|
+
- Memory grows with per-column aggregates (`column_stats`), not with total row count.
|
|
308
|
+
|
|
250
309
|
## Testing
|
|
251
310
|
|
|
252
311
|
Run tests:
|
|
@@ -263,7 +322,7 @@ bundle exec rake test
|
|
|
263
322
|
|
|
264
323
|
## Alpha release
|
|
265
324
|
|
|
266
|
-
Current prerelease version: `0.
|
|
325
|
+
Current prerelease version: `0.9.0.alpha`
|
|
267
326
|
|
|
268
327
|
Install prerelease from RubyGems:
|
|
269
328
|
|
|
@@ -273,7 +332,7 @@ gem install csvops --pre
|
|
|
273
332
|
|
|
274
333
|
Release runbook:
|
|
275
334
|
|
|
276
|
-
- `docs/release-v0.
|
|
335
|
+
- `docs/release-v0.9.0-alpha.md`
|
|
277
336
|
|
|
278
337
|
|
|
279
338
|
## Architecture
|
|
@@ -281,3 +340,4 @@ Release runbook:
|
|
|
281
340
|
Full architecture and domain documentation lives in:
|
|
282
341
|
|
|
283
342
|
- [`docs/architecture.md`](docs/architecture.md)
|
|
343
|
+
- [`docs/cli-output-conventions.md`](docs/cli-output-conventions.md)
|
data/docs/architecture.md
CHANGED
|
@@ -2,15 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
The codebase follows a DDD-lite layered structure:
|
|
4
4
|
|
|
5
|
-
- `domain/`: core domain models and invariants (`ColumnSession`, `RowSession`, `RandomizationSession`, `CrossCsvDedupeSession`, and `
|
|
6
|
-
- `application/`: use-case orchestration (`RunExtraction`, `RunRowExtraction`, `RunRowRandomization`, `RunCrossCsvDedupe`, `RunCsvParity`, `RunCsvSplit`).
|
|
5
|
+
- `domain/`: core domain models and invariants (`ColumnSession`, `RowSession`, `RandomizationSession`, `CrossCsvDedupeSession`, `ParitySession`, `SplitSession`, and `CsvStatsSession` aggregates + supporting entities/value objects).
|
|
6
|
+
- `application/`: use-case orchestration (`RunExtraction`, `RunRowExtraction`, `RunRowRandomization`, `RunCrossCsvDedupe`, `RunCsvParity`, `RunCsvSplit`, `RunCsvStats`).
|
|
7
7
|
- `infrastructure/`: CSV reading/streaming/comparison and output adapters (console/file).
|
|
8
8
|
- `interface/cli/`: menu, prompts, workflows, and user-facing error presentation.
|
|
9
9
|
- `Csvtool::CLI`: entrypoint wiring from command args to interface/application flow.
|
|
10
10
|
|
|
11
11
|
## Workflow boundary (standardized)
|
|
12
12
|
|
|
13
|
-
For all interactive domains (`Column Extraction`, `Row Extraction`, `Row Randomization`, `Cross-CSV Dedupe`, `CSV Parity`, `CSV Split`), the boundary is:
|
|
13
|
+
For all interactive domains (`Column Extraction`, `Row Extraction`, `Row Randomization`, `Cross-CSV Dedupe`, `CSV Parity`, `CSV Split`, `CSV Stats`), the boundary is:
|
|
14
14
|
|
|
15
15
|
- `interface/cli/workflows/*`: owns prompts, stdout rendering, and user-facing error presentation.
|
|
16
16
|
- `interface/cli/workflows/builders/*`: builds domain sessions/aggregates from prompt results.
|
|
@@ -21,6 +21,9 @@ For all interactive domains (`Column Extraction`, `Row Extraction`, `Row Randomi
|
|
|
21
21
|
- `domain/*`: invariants and domain policies.
|
|
22
22
|
- `infrastructure/*`: CSV mechanics and output adapters.
|
|
23
23
|
|
|
24
|
+
Output UI rules:
|
|
25
|
+
- See [`docs/cli-output-conventions.md`](cli-output-conventions.md) for stream, format, color, and table rendering contracts used across workflows.
|
|
26
|
+
|
|
24
27
|
Write-boundary rule:
|
|
25
28
|
- Use cases coordinate write paths but do not perform direct file writes.
|
|
26
29
|
- Direct write APIs (`CSV.open`, writable `File.open`, `File.write`, `IO.write`) are infrastructure-only.
|
|
@@ -34,6 +37,7 @@ Current usage:
|
|
|
34
37
|
- `RunCrossCsvDedupeWorkflow` uses `WorkflowStepPipeline` + `Steps::CrossCsvDedupe::*`.
|
|
35
38
|
- `RunCsvParityWorkflow` uses `WorkflowStepPipeline` + `Steps::Parity::*`.
|
|
36
39
|
- `RunCsvSplitWorkflow` uses `WorkflowStepPipeline` + `Steps::CsvSplit::*`.
|
|
40
|
+
- `RunCsvStatsWorkflow` uses `WorkflowStepPipeline` + `Steps::CsvStats::*`.
|
|
37
41
|
|
|
38
42
|
## Adding New Concepts
|
|
39
43
|
|
|
@@ -109,7 +113,7 @@ For a new function type, prefer one of these patterns:
|
|
|
109
113
|
|
|
110
114
|
## Domain model
|
|
111
115
|
|
|
112
|
-
Bounded contexts: `Column Extraction`, `Row Extraction`, `Row Randomization`, `Cross-CSV Dedupe`, `CSV Parity`, and `CSV
|
|
116
|
+
Bounded contexts: `Column Extraction`, `Row Extraction`, `Row Randomization`, `Cross-CSV Dedupe`, `CSV Parity`, `CSV Split`, and `CSV Stats`.
|
|
113
117
|
|
|
114
118
|
### Cross-CSV Dedupe (Large-file behavior)
|
|
115
119
|
|
|
@@ -476,6 +480,63 @@ classDiagram
|
|
|
476
480
|
RunCsvSplit --> CsvSplitManifestWriter
|
|
477
481
|
```
|
|
478
482
|
|
|
483
|
+
### CSV Stats
|
|
484
|
+
|
|
485
|
+
Core DDD structure:
|
|
486
|
+
|
|
487
|
+
- Aggregate root: `StatsSession`
|
|
488
|
+
- Captures one stats summary request.
|
|
489
|
+
- Holds source profile and output destination.
|
|
490
|
+
- Entity:
|
|
491
|
+
- `StatsSource` (path + separator + header mode)
|
|
492
|
+
- Value objects:
|
|
493
|
+
- `StatsOptions` (currently lightweight; keeps option growth explicit)
|
|
494
|
+
- Shared `OutputDestination` (`console` or `file(path)`)
|
|
495
|
+
- Application service:
|
|
496
|
+
- `Application::UseCases::RunCsvStats` orchestrates stats scanning and output routing.
|
|
497
|
+
- Infrastructure adapters:
|
|
498
|
+
- `Infrastructure::CSV::CsvStatsScanner` (streaming one-pass row aggregation)
|
|
499
|
+
- `Infrastructure::Output::CsvStatsFileWriter` (metric/value artifact writer)
|
|
500
|
+
- Interface adapters:
|
|
501
|
+
- `Interface::CLI::MenuLoop`
|
|
502
|
+
- `Interface::CLI::Workflows::RunCsvStatsWorkflow`
|
|
503
|
+
- `Interface::CLI::Workflows::Builders::CsvStatsSessionBuilder`
|
|
504
|
+
- `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
|
|
505
|
+
- `Interface::CLI::Workflows::Steps::CsvStats::*`
|
|
506
|
+
- `Interface::CLI::Workflows::Presenters::CsvStatsPresenter`
|
|
507
|
+
- `Interface::CLI::Workflows::Support::{OutputDestinationMapper,ResultErrorHandler}`
|
|
508
|
+
- `Interface::CLI::Prompts::*`
|
|
509
|
+
- `Interface::CLI::Errors::Presenter`
|
|
510
|
+
|
|
511
|
+
```mermaid
|
|
512
|
+
classDiagram
|
|
513
|
+
direction LR
|
|
514
|
+
class MenuLoop
|
|
515
|
+
class RunCsvStatsWorkflow
|
|
516
|
+
class Prompts
|
|
517
|
+
class Errors
|
|
518
|
+
class RunCsvStats
|
|
519
|
+
class StatsSession
|
|
520
|
+
class StatsSource
|
|
521
|
+
class StatsOptions
|
|
522
|
+
class OutputDestination
|
|
523
|
+
class CsvStatsScanner
|
|
524
|
+
class CsvStatsFileWriter
|
|
525
|
+
class CsvStatsPresenter
|
|
526
|
+
|
|
527
|
+
MenuLoop --> RunCsvStatsWorkflow : invokes
|
|
528
|
+
RunCsvStatsWorkflow --> Prompts : uses
|
|
529
|
+
RunCsvStatsWorkflow --> Errors : reports failures
|
|
530
|
+
RunCsvStatsWorkflow --> CsvStatsPresenter : renders
|
|
531
|
+
RunCsvStatsWorkflow --> RunCsvStats : calls
|
|
532
|
+
RunCsvStats --> StatsSession : orchestrates
|
|
533
|
+
StatsSession o-- StatsSource
|
|
534
|
+
StatsSession o-- StatsOptions
|
|
535
|
+
StatsSession o-- OutputDestination
|
|
536
|
+
RunCsvStats --> CsvStatsScanner
|
|
537
|
+
RunCsvStats --> CsvStatsFileWriter
|
|
538
|
+
```
|
|
539
|
+
|
|
479
540
|
## Project layout
|
|
480
541
|
|
|
481
542
|
```text
|
|
@@ -487,6 +548,7 @@ lib/csvtool/domain/row_randomization_session/*
|
|
|
487
548
|
lib/csvtool/domain/cross_csv_dedupe_session/*
|
|
488
549
|
lib/csvtool/domain/csv_parity_session/*
|
|
489
550
|
lib/csvtool/domain/csv_split_session/*
|
|
551
|
+
lib/csvtool/domain/csv_stats_session/*
|
|
490
552
|
lib/csvtool/domain/shared/output_destination.rb
|
|
491
553
|
lib/csvtool/application/use_cases/run_extraction.rb
|
|
492
554
|
lib/csvtool/application/use_cases/run_row_extraction.rb
|
|
@@ -494,6 +556,7 @@ lib/csvtool/application/use_cases/run_row_randomization.rb
|
|
|
494
556
|
lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb
|
|
495
557
|
lib/csvtool/application/use_cases/run_csv_parity.rb
|
|
496
558
|
lib/csvtool/application/use_cases/run_csv_split.rb
|
|
559
|
+
lib/csvtool/application/use_cases/run_csv_stats.rb
|
|
497
560
|
lib/csvtool/infrastructure/csv/*
|
|
498
561
|
lib/csvtool/infrastructure/output/*
|
|
499
562
|
lib/csvtool/interface/cli/menu_loop.rb
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# CLI Output Conventions
|
|
2
|
+
|
|
3
|
+
This document defines the output contract for all CLI workflows.
|
|
4
|
+
|
|
5
|
+
## 1. Stream contract
|
|
6
|
+
|
|
7
|
+
- `stdout` is for data output only.
|
|
8
|
+
- `stderr` is for prompts, menu UI, status, and errors.
|
|
9
|
+
- Commands should be pipe-safe: redirecting `stdout` must not capture prompts/errors.
|
|
10
|
+
|
|
11
|
+
## 2. Format contract
|
|
12
|
+
|
|
13
|
+
- Supported formats: `text`, `json`, `csv`.
|
|
14
|
+
- `text` is human-readable and may include tables/colors.
|
|
15
|
+
- `json` and `csv` are machine-readable and should remain stable over time.
|
|
16
|
+
- Structured formats must avoid decorative output.
|
|
17
|
+
|
|
18
|
+
## 3. Color policy
|
|
19
|
+
|
|
20
|
+
- Supported modes: `auto`, `always`, `never`.
|
|
21
|
+
- `auto` colors only when output target is a TTY.
|
|
22
|
+
- `NO_COLOR` disables color in `auto` mode.
|
|
23
|
+
- `always` overrides `NO_COLOR`.
|
|
24
|
+
- Structured formats (`json`, `csv`) are not colorized.
|
|
25
|
+
|
|
26
|
+
## 4. Table rendering rules
|
|
27
|
+
|
|
28
|
+
- Use shared table renderer for summary-style text output.
|
|
29
|
+
- Render within terminal width constraints.
|
|
30
|
+
- Truncate long cells with ellipsis when necessary.
|
|
31
|
+
- Avoid broken/overlapping columns in narrow terminals.
|
|
32
|
+
|
|
33
|
+
## 5. Shared services usage
|
|
34
|
+
|
|
35
|
+
All workflows should use shared output services under:
|
|
36
|
+
|
|
37
|
+
- `lib/csvtool/interface/cli/output/streams.rb`
|
|
38
|
+
- `lib/csvtool/interface/cli/output/formatters/*`
|
|
39
|
+
- `lib/csvtool/interface/cli/output/color_policy.rb`
|
|
40
|
+
- `lib/csvtool/interface/cli/output/colorizer.rb`
|
|
41
|
+
- `lib/csvtool/interface/cli/output/table_renderer.rb`
|
|
42
|
+
|
|
43
|
+
Prefer these services over ad-hoc formatting in presenters/workflows.
|
|
44
|
+
|
|
45
|
+
## 6. Testing expectations
|
|
46
|
+
|
|
47
|
+
- Add focused unit tests for each output service.
|
|
48
|
+
- Add workflow/CLI tests for stream separation and representative formatting behavior.
|
|
49
|
+
- Keep acceptance assertions centered on contract semantics rather than fragile spacing where possible.
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# Release Checklist: v0.8.0-alpha
|
|
2
|
+
|
|
3
|
+
## 1. Verify environment
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
ruby -v
|
|
7
|
+
bundle -v
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
Expected:
|
|
11
|
+
- Ruby `3.3.x`
|
|
12
|
+
|
|
13
|
+
## 2. Install dependencies
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
bundle install
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## 3. Run quality checks
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
bundle exec rake test
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## 4. Smoke test CLI commands
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
bundle exec csvtool menu
|
|
29
|
+
bundle exec csvtool column test/fixtures/sample_people.csv name
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## 5. Smoke test workflows
|
|
33
|
+
|
|
34
|
+
### CSV stats workflow (new in this release)
|
|
35
|
+
|
|
36
|
+
Use menu option `7` (`CSV stats summary`) and verify:
|
|
37
|
+
- happy path summary prints rows/columns/headers
|
|
38
|
+
- separator and header mode options work (CSV/TSV/headerless/custom)
|
|
39
|
+
- column completeness output is correct for blanks
|
|
40
|
+
- output destination supports console and file
|
|
41
|
+
- invalid output path returns friendly error
|
|
42
|
+
|
|
43
|
+
### Existing workflows regression pass
|
|
44
|
+
|
|
45
|
+
Use menu options `1-6` and verify:
|
|
46
|
+
- column extraction still works
|
|
47
|
+
- row-range extraction still works
|
|
48
|
+
- row randomization still works
|
|
49
|
+
- cross-CSV dedupe still works
|
|
50
|
+
- parity validation still works
|
|
51
|
+
- CSV split still works
|
|
52
|
+
|
|
53
|
+
## 6. Build and validate gem package
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
gem build csvops.gemspec
|
|
57
|
+
gem install ./csvops-0.8.0.alpha.gem
|
|
58
|
+
csvtool menu
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## 7. Commit release prep
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
git add -A
|
|
65
|
+
git commit -m "chore(release): prepare v0.8.0-alpha"
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## 8. Tag release
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
git tag -a v0.8.0-alpha -m "v0.8.0-alpha"
|
|
72
|
+
git push origin main --tags
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## 9. Publish gem
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
gem push csvops-0.8.0.alpha.gem
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## 10. Create GitHub release
|
|
82
|
+
|
|
83
|
+
Create release `v0.8.0-alpha` with:
|
|
84
|
+
- New `CSV stats summary` workflow
|
|
85
|
+
- Stats-domain architecture (workflow steps, builder, presenter, use case, infrastructure adapters)
|
|
86
|
+
- Console/file output destination support for stats summary artifacts
|
|
87
|
+
- Streaming stats scanner coverage for large files
|
|
88
|
+
- Stats documentation updates in README + architecture guide
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# Release Checklist: v0.9.0-alpha
|
|
2
|
+
|
|
3
|
+
## 1. Verify environment
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
ruby -v
|
|
7
|
+
bundle -v
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
Expected:
|
|
11
|
+
- Ruby `3.3.x`
|
|
12
|
+
|
|
13
|
+
## 2. Install dependencies
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
bundle install
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## 3. Run quality checks
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
bundle exec rake test
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## 4. Smoke test CLI commands
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
bundle exec csvtool menu
|
|
29
|
+
bundle exec csvtool stats test/fixtures/sample_people.csv --format text
|
|
30
|
+
bundle exec csvtool stats test/fixtures/sample_people.csv --format json
|
|
31
|
+
bundle exec csvtool stats test/fixtures/sample_people.csv --format csv
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## 5. Smoke test output conventions across workflows
|
|
35
|
+
|
|
36
|
+
Verify in menu-driven workflows:
|
|
37
|
+
- prompts/menu/errors are on `stderr`
|
|
38
|
+
- data output is on `stdout`
|
|
39
|
+
|
|
40
|
+
Verify shared output behavior:
|
|
41
|
+
- formatter consistency (`text|json|csv`)
|
|
42
|
+
- color policy (`auto|always|never`, `NO_COLOR`)
|
|
43
|
+
- width-aware summary tables in stats/parity/split/dedupe
|
|
44
|
+
|
|
45
|
+
## 6. Build and validate gem package
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
gem build csvops.gemspec
|
|
49
|
+
gem install ./csvops-0.9.0.alpha.gem
|
|
50
|
+
csvtool menu
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## 7. Commit release prep
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
git add -A
|
|
57
|
+
git commit -m "chore(release): prepare v0.9.0-alpha"
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## 8. Tag release
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
git tag -a v0.9.0-alpha -m "v0.9.0-alpha"
|
|
64
|
+
git push origin main --tags
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## 9. Publish gem
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
gem push csvops-0.9.0.alpha.gem
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## 10. Create GitHub release
|
|
74
|
+
|
|
75
|
+
Create release `v0.9.0-alpha` with:
|
|
76
|
+
- Shared output stream services across workflows (`stdout` data, `stderr` UI/errors)
|
|
77
|
+
- Shared formatter services and migrated presenters
|
|
78
|
+
- Shared color policy + colorizer across workflows
|
|
79
|
+
- Shared width-aware table rendering across summary presenters
|
|
80
|
+
- New output conventions documentation (`docs/cli-output-conventions.md`)
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
require "csvtool/infrastructure/csv/csv_stats_scanner"
|
|
5
|
+
require "csvtool/infrastructure/output/csv_stats_file_writer"
|
|
6
|
+
|
|
7
|
+
module Csvtool
|
|
8
|
+
module Application
|
|
9
|
+
module UseCases
|
|
10
|
+
class RunCsvStats
|
|
11
|
+
Result = Struct.new(:ok, :error, :data, keyword_init: true) do
|
|
12
|
+
def ok?
|
|
13
|
+
ok
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def initialize(
|
|
18
|
+
scanner: Infrastructure::CSV::CsvStatsScanner.new,
|
|
19
|
+
csv_stats_file_writer: Infrastructure::Output::CsvStatsFileWriter.new
|
|
20
|
+
)
|
|
21
|
+
@scanner = scanner
|
|
22
|
+
@csv_stats_file_writer = csv_stats_file_writer
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def call(session:)
|
|
26
|
+
path = session.source.path
|
|
27
|
+
return failure(:file_not_found, path: path) unless File.file?(path)
|
|
28
|
+
|
|
29
|
+
stats = @scanner.call(
|
|
30
|
+
file_path: path,
|
|
31
|
+
col_sep: session.source.separator,
|
|
32
|
+
headers_present: session.source.headers_present
|
|
33
|
+
)
|
|
34
|
+
if session.output_destination&.file?
|
|
35
|
+
@csv_stats_file_writer.call(path: session.output_destination.path, data: stats)
|
|
36
|
+
return success(stats.merge(output_path: session.output_destination.path))
|
|
37
|
+
end
|
|
38
|
+
success(stats)
|
|
39
|
+
rescue CSV::MalformedCSVError
|
|
40
|
+
failure(:could_not_parse_csv)
|
|
41
|
+
rescue Errno::EACCES => e
|
|
42
|
+
if session.output_destination&.file?
|
|
43
|
+
return failure(:cannot_write_output_file, path: session.output_destination.path, error_class: e.class)
|
|
44
|
+
end
|
|
45
|
+
failure(:cannot_read_file, path: path)
|
|
46
|
+
rescue Errno::ENOENT => e
|
|
47
|
+
return failure(:cannot_write_output_file, path: session.output_destination.path, error_class: e.class) if session.output_destination&.file?
|
|
48
|
+
|
|
49
|
+
failure(:cannot_read_file, path: path)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
def success(data)
|
|
55
|
+
Result.new(ok: true, error: nil, data: data)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def failure(code, data = {})
|
|
59
|
+
Result.new(ok: false, error: code, data: data)
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|