recursive-cleaner 0.7.1__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/CLAUDE.md +10 -2
  2. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/PKG-INFO +21 -2
  3. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/README.md +18 -1
  4. recursive_cleaner-0.8.0/demo_tui.py +54 -0
  5. recursive_cleaner-0.8.0/docs/contracts/v080-api-contract.md +62 -0
  6. recursive_cleaner-0.8.0/docs/contracts/v080-data-schema.md +90 -0
  7. recursive_cleaner-0.8.0/docs/contracts/v080-success-criteria.md +70 -0
  8. recursive_cleaner-0.8.0/docs/implementation-plan-v080.md +182 -0
  9. recursive_cleaner-0.8.0/docs/research/rich-tui-patterns.md +110 -0
  10. recursive_cleaner-0.8.0/docs/workflow-state.md +24 -0
  11. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/pyproject.toml +4 -1
  12. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/__init__.py +3 -0
  13. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/cleaner.py +117 -26
  14. recursive_cleaner-0.8.0/recursive_cleaner/tui.py +595 -0
  15. recursive_cleaner-0.8.0/tests/test_tui.py +758 -0
  16. recursive_cleaner-0.7.1/docs/workflow-state.md +0 -26
  17. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/.gitignore +0 -0
  18. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/LICENSE +0 -0
  19. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/TODO.md +0 -0
  20. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/backends/__init__.py +0 -0
  21. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/backends/mlx_backend.py +0 -0
  22. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/contracts/api-contract.md +0 -0
  23. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/contracts/data-schema.md +0 -0
  24. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/contracts/success-criteria.md +0 -0
  25. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/contracts/text-mode-contract.md +0 -0
  26. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/contracts/tier2-contract.md +0 -0
  27. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/contracts/tier4-contract.md +0 -0
  28. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/contracts/tier4-success-criteria.md +0 -0
  29. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/contracts/two-pass-contract.md +0 -0
  30. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/contracts/v070-success-criteria.md +0 -0
  31. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/handoffs/tier4-handoff.md +0 -0
  32. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/implementation-plan-tier4.md +0 -0
  33. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/implementation-plan-v03.md +0 -0
  34. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/implementation-plan-v04.md +0 -0
  35. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/implementation-plan-v05.md +0 -0
  36. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/implementation-plan.md +0 -0
  37. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/langchain-analysis.md +0 -0
  38. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/langgraph-analysis.md +0 -0
  39. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/mlx-lm-guide.md +0 -0
  40. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/other-frameworks-analysis.md +0 -0
  41. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/refactor-assessment/data/dependency.json +0 -0
  42. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/refactor-assessment/data/stats.json +0 -0
  43. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/refactor-assessment/plan.md +0 -0
  44. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/refactor-assessment/report.md +0 -0
  45. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/research/chonkie-extraction.md +0 -0
  46. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/research/chonkie.md +0 -0
  47. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/research/markitdown.md +0 -0
  48. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/docs/smolagents-analysis.md +0 -0
  49. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/context.py +0 -0
  50. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/dependencies.py +0 -0
  51. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/errors.py +0 -0
  52. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/metrics.py +0 -0
  53. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/optimizer.py +0 -0
  54. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/output.py +0 -0
  55. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/parser_generator.py +0 -0
  56. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/parsers.py +0 -0
  57. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/prompt.py +0 -0
  58. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/report.py +0 -0
  59. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/response.py +0 -0
  60. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/schema.py +0 -0
  61. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/types.py +0 -0
  62. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/validation.py +0 -0
  63. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/vendor/__init__.py +0 -0
  64. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/recursive_cleaner/vendor/chunker.py +0 -0
  65. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/test_cases/ecommerce_instructions.txt +0 -0
  66. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/test_cases/ecommerce_products.jsonl +0 -0
  67. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/test_cases/financial_instructions.txt +0 -0
  68. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/test_cases/financial_transactions.jsonl +0 -0
  69. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/test_cases/healthcare_instructions.txt +0 -0
  70. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/test_cases/healthcare_patients.jsonl +0 -0
  71. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/test_cases/run_ecommerce_test.py +0 -0
  72. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/test_cases/run_financial_test.py +0 -0
  73. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/test_cases/run_healthcare_test.py +0 -0
  74. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/__init__.py +0 -0
  75. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_callbacks.py +0 -0
  76. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_cleaner.py +0 -0
  77. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_context.py +0 -0
  78. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_dependencies.py +0 -0
  79. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_dry_run.py +0 -0
  80. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_holdout.py +0 -0
  81. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_incremental.py +0 -0
  82. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_integration.py +0 -0
  83. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_latency.py +0 -0
  84. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_metrics.py +0 -0
  85. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_optimizer.py +0 -0
  86. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_output.py +0 -0
  87. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_parser_generator.py +0 -0
  88. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_parsers.py +0 -0
  89. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_report.py +0 -0
  90. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_sampling.py +0 -0
  91. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_schema.py +0 -0
  92. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_text_mode.py +0 -0
  93. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_validation.py +0 -0
  94. {recursive_cleaner-0.7.1 → recursive_cleaner-0.8.0}/tests/test_vendor_chunker.py +0 -0
@@ -4,7 +4,9 @@
4
4
 
5
5
  | Version | Status | Date |
6
6
  |---------|--------|------|
7
- | v0.6.0 | **Implemented** | 2025-01-15 |
7
+ | v0.8.0 | **Implemented** | 2025-01-19 |
8
+ | v0.7.0 | Implemented | 2025-01-17 |
9
+ | v0.6.0 | Implemented | 2025-01-15 |
8
10
  | v0.5.1 | Implemented | 2025-01-15 |
9
11
  | v0.5.0 | Implemented | 2025-01-15 |
10
12
  | v0.4.0 | Implemented | 2025-01-15 |
@@ -12,9 +14,11 @@
12
14
  | v0.2.0 | Implemented | 2025-01-14 |
13
15
  | v0.1.0 | Implemented | 2025-01-14 |
14
16
 
15
- **Current State**: v0.6.0 complete. 392 tests passing, 2,967 lines total.
17
+ **Current State**: v0.8.0 complete. 465 tests passing.
16
18
 
17
19
  ### Version History
20
+ - **v0.8.0**: Terminal UI with Rich dashboard, mission control aesthetic, transmission log
21
+ - **v0.7.0**: Markitdown integration (20+ formats), Parquet support, LLM-generated parsers
18
22
  - **v0.6.0**: Latency metrics, import consolidation, cleaning report, dry-run mode
19
23
  - **v0.5.1**: Dangerous code detection (AST-based security)
20
24
  - **v0.5.0**: Two-pass optimization with LLM agency (consolidation, early termination)
@@ -69,6 +73,8 @@ cleaner = DataCleaner(
69
73
  # Observability (v0.6.0)
70
74
  report_path="cleaning_report.md", # Generate markdown report (None to disable)
71
75
  dry_run=False, # Set True to analyze without generating functions
76
+ # Terminal UI (v0.8.0)
77
+ tui=True, # Enable Rich dashboard (requires pip install recursive-cleaner[tui])
72
78
  )
73
79
 
74
80
  cleaner.run() # Outputs: cleaning_functions.py, cleaning_report.md
@@ -159,6 +165,7 @@ recursive_cleaner/
159
165
  report.py # Markdown report generation (~120 lines) [v0.6.0]
160
166
  response.py # XML/markdown parsing + agency dataclasses (~292 lines)
161
167
  schema.py # Schema inference (~117 lines) [v0.2.0]
168
+ tui.py # Rich terminal dashboard (~520 lines) [v0.8.0]
162
169
  types.py # LLMBackend protocol (~11 lines)
163
170
  validation.py # Runtime validation + safety checks (~200 lines)
164
171
  vendor/
@@ -187,6 +194,7 @@ tests/ # 392 tests
187
194
  test_sampling.py # Sampling strategy tests [v0.4.0]
188
195
  test_schema.py # Schema inference tests
189
196
  test_text_mode.py # Text mode tests [v0.3.0]
197
+ test_tui.py # Terminal UI tests [v0.8.0]
190
198
  test_validation.py # Runtime validation + safety tests
191
199
  test_vendor_chunker.py # Vendored chunker tests [v0.3.0]
192
200
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: recursive-cleaner
3
- Version: 0.7.1
3
+ Version: 0.8.0
4
4
  Summary: LLM-powered incremental data cleaning pipeline that processes massive datasets in chunks and generates Python cleaning functions
5
5
  Project-URL: Homepage, https://github.com/gaztrabisme/recursive-data-cleaner
6
6
  Project-URL: Repository, https://github.com/gaztrabisme/recursive-data-cleaner
@@ -32,6 +32,8 @@ Provides-Extra: mlx
32
32
  Requires-Dist: mlx-lm>=0.10.0; extra == 'mlx'
33
33
  Provides-Extra: parquet
34
34
  Requires-Dist: pyarrow>=14.0.0; extra == 'parquet'
35
+ Provides-Extra: tui
36
+ Requires-Dist: rich>=13.0; extra == 'tui'
35
37
  Description-Content-Type: text/markdown
36
38
 
37
39
  # Recursive Data Cleaner
@@ -69,6 +71,11 @@ For Parquet files:
69
71
  pip install -e ".[parquet]"
70
72
  ```
71
73
 
74
+ For Terminal UI (Rich dashboard):
75
+ ```bash
76
+ pip install -e ".[tui]"
77
+ ```
78
+
72
79
  ## Quick Start
73
80
 
74
81
  ```python
@@ -126,6 +133,13 @@ cleaner.run() # Generates cleaning_functions.py
126
133
  - **Parquet Support**: Load parquet files as structured data via pyarrow
127
134
  - **LLM-Generated Parsers**: Auto-generate parsers for XML and unknown formats (`auto_parse=True`)
128
135
 
136
+ ### Terminal UI (v0.8.0)
137
+ - **Mission Control Dashboard**: Rich-based live terminal UI with retro aesthetic
138
+ - **Real-time Progress**: Animated progress bars, chunk/iteration counters
139
+ - **Transmission Log**: Parsed LLM responses showing issues detected and functions being generated
140
+ - **Token Estimation**: Track estimated input/output tokens across the run
141
+ - **Graceful Fallback**: Works without Rich installed (falls back to callbacks)
142
+
129
143
  ## Configuration
130
144
 
131
145
  ```python
@@ -160,6 +174,9 @@ cleaner = DataCleaner(
160
174
  # Format Expansion
161
175
  auto_parse=False, # LLM generates parser for unknown formats
162
176
 
177
+ # Terminal UI
178
+ tui=True, # Enable Rich dashboard (requires [tui] extra)
179
+
163
180
  # Progress & State
164
181
  on_progress=callback, # Progress event callback
165
182
  state_file="state.json", # Enable resume on interrupt
@@ -265,6 +282,7 @@ recursive_cleaner/
265
282
  ├── report.py # Markdown report generation
266
283
  ├── response.py # XML/markdown parsing + agency dataclasses
267
284
  ├── schema.py # Schema inference
285
+ ├── tui.py # Rich terminal dashboard
268
286
  ├── validation.py # Runtime validation + holdout
269
287
  └── vendor/
270
288
  └── chunker.py # Vendored sentence-aware chunker
@@ -276,7 +294,7 @@ recursive_cleaner/
276
294
  pytest tests/ -v
277
295
  ```
278
296
 
279
- 432 tests covering all features. Test datasets in `test_cases/`:
297
+ 465 tests covering all features. Test datasets in `test_cases/`:
280
298
  - E-commerce product catalogs
281
299
  - Healthcare patient records
282
300
  - Financial transaction data
@@ -292,6 +310,7 @@ pytest tests/ -v
292
310
 
293
311
  | Version | Features |
294
312
  |---------|----------|
313
+ | v0.8.0 | Terminal UI with Rich dashboard, mission control aesthetic, transmission log |
295
314
  | v0.7.0 | Markitdown (20+ formats), Parquet support, LLM-generated parsers |
296
315
  | v0.6.0 | Latency metrics, import consolidation, cleaning report, dry-run mode |
297
316
  | v0.5.1 | Dangerous code detection (AST-based security) |
@@ -33,6 +33,11 @@ For Parquet files:
33
33
  pip install -e ".[parquet]"
34
34
  ```
35
35
 
36
+ For Terminal UI (Rich dashboard):
37
+ ```bash
38
+ pip install -e ".[tui]"
39
+ ```
40
+
36
41
  ## Quick Start
37
42
 
38
43
  ```python
@@ -90,6 +95,13 @@ cleaner.run() # Generates cleaning_functions.py
90
95
  - **Parquet Support**: Load parquet files as structured data via pyarrow
91
96
  - **LLM-Generated Parsers**: Auto-generate parsers for XML and unknown formats (`auto_parse=True`)
92
97
 
98
+ ### Terminal UI (v0.8.0)
99
+ - **Mission Control Dashboard**: Rich-based live terminal UI with retro aesthetic
100
+ - **Real-time Progress**: Animated progress bars, chunk/iteration counters
101
+ - **Transmission Log**: Parsed LLM responses showing issues detected and functions being generated
102
+ - **Token Estimation**: Track estimated input/output tokens across the run
103
+ - **Graceful Fallback**: Works without Rich installed (falls back to callbacks)
104
+
93
105
  ## Configuration
94
106
 
95
107
  ```python
@@ -124,6 +136,9 @@ cleaner = DataCleaner(
124
136
  # Format Expansion
125
137
  auto_parse=False, # LLM generates parser for unknown formats
126
138
 
139
+ # Terminal UI
140
+ tui=True, # Enable Rich dashboard (requires [tui] extra)
141
+
127
142
  # Progress & State
128
143
  on_progress=callback, # Progress event callback
129
144
  state_file="state.json", # Enable resume on interrupt
@@ -229,6 +244,7 @@ recursive_cleaner/
229
244
  ├── report.py # Markdown report generation
230
245
  ├── response.py # XML/markdown parsing + agency dataclasses
231
246
  ├── schema.py # Schema inference
247
+ ├── tui.py # Rich terminal dashboard
232
248
  ├── validation.py # Runtime validation + holdout
233
249
  └── vendor/
234
250
  └── chunker.py # Vendored sentence-aware chunker
@@ -240,7 +256,7 @@ recursive_cleaner/
240
256
  pytest tests/ -v
241
257
  ```
242
258
 
243
- 432 tests covering all features. Test datasets in `test_cases/`:
259
+ 465 tests covering all features. Test datasets in `test_cases/`:
244
260
  - E-commerce product catalogs
245
261
  - Healthcare patient records
246
262
  - Financial transaction data
@@ -256,6 +272,7 @@ pytest tests/ -v
256
272
 
257
273
  | Version | Features |
258
274
  |---------|----------|
275
+ | v0.8.0 | Terminal UI with Rich dashboard, mission control aesthetic, transmission log |
259
276
  | v0.7.0 | Markitdown (20+ formats), Parquet support, LLM-generated parsers |
260
277
  | v0.6.0 | Latency metrics, import consolidation, cleaning report, dry-run mode |
261
278
  | v0.5.1 | Dangerous code detection (AST-based security) |
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Demo script to showcase the Rich TUI with real MLX backend.
4
+
5
+ Run with:
6
+ python demo_tui.py
7
+
8
+ Requirements:
9
+ pip install recursive-cleaner[mlx,tui]
10
+ """
11
+
12
+ from backends import MLXBackend
13
+ from recursive_cleaner import DataCleaner
14
+
15
+ # Use a smaller/faster model for demo (change to your preferred model)
16
+ MODEL = "lmstudio-community/Qwen3-Next-80B-A3B-Instruct-MLX-4bit"
17
+
18
+ print("=" * 60)
19
+ print(" RECURSIVE DATA CLEANER - TUI DEMO")
20
+ print("=" * 60)
21
+ print(f"\nLoading model: {MODEL}")
22
+ print("This may take a moment on first run...\n")
23
+
24
+ llm = MLXBackend(
25
+ model_path=MODEL,
26
+ max_tokens=2048,
27
+ temperature=0.3, # Lower for more consistent output
28
+ verbose=False, # Disable token streaming to avoid interfering with TUI
29
+ )
30
+
31
+ cleaner = DataCleaner(
32
+ llm_backend=llm,
33
+ file_path="test_cases/ecommerce_products.jsonl",
34
+ chunk_size=5, # Small chunks for demo
35
+ max_iterations=3, # Limit iterations per chunk
36
+ instructions="""
37
+ E-commerce product data cleaning:
38
+ - Normalize prices to float (remove $ symbols)
39
+ - Fix category typos and normalize to Title Case
40
+ - Convert weights to kg as float
41
+ - Ensure stock_quantity is non-negative integer
42
+ """,
43
+ tui=True, # Enable the Rich dashboard!
44
+ track_metrics=True,
45
+ )
46
+
47
+ print("\nStarting cleaner with TUI enabled...")
48
+ print("Watch the dashboard below!\n")
49
+
50
+ cleaner.run()
51
+
52
+ print("\n" + "=" * 60)
53
+ print("Demo complete! Check cleaning_functions.py for output.")
54
+ print("=" * 60)
@@ -0,0 +1,62 @@
1
+ # API Contract: Rich TUI (v0.8.0)
2
+
3
+ ## New Parameter
4
+
5
+ ```python
6
+ DataCleaner(
7
+ ...,
8
+ tui: bool = False, # Enable Rich terminal dashboard
9
+ )
10
+ ```
11
+
12
+ ## Behavior Matrix
13
+
14
+ | `tui` | Rich installed | Behavior |
15
+ |-------|----------------|----------|
16
+ | `False` | Any | Existing callback-based output (no change) |
17
+ | `True` | Yes | Live dashboard replaces callback prints |
18
+ | `True` | No | Warning logged, falls back to callbacks |
19
+
20
+ ## New Optional Dependency
21
+
22
+ ```toml
23
+ [project.optional-dependencies]
24
+ tui = ["rich>=13.0"]
25
+ ```
26
+
27
+ ```bash
28
+ pip install recursive-cleaner[tui]
29
+ ```
30
+
31
+ ## TUI Module API
32
+
33
+ ### `recursive_cleaner/tui.py`
34
+
35
+ ```python
36
+ # Check availability
37
+ HAS_RICH: bool
38
+
39
+ # Main renderer class
40
+ class TUIRenderer:
41
+ def __init__(self, file_path: str, total_chunks: int, total_records: int)
42
+ def start(self) -> None
43
+ def stop(self) -> None
44
+ def update_chunk(self, chunk_index: int, iteration: int, max_iterations: int) -> None
45
+ def update_llm_status(self, status: str) -> None # "calling" | "idle"
46
+ def add_function(self, name: str, docstring: str) -> None
47
+ def update_metrics(self, quality_delta: float, latency_last: float, latency_avg: float, latency_total: float, llm_calls: int) -> None
48
+ def show_complete(self, summary: dict) -> None
49
+ ```
50
+
51
+ ## Integration with DataCleaner
52
+
53
+ When `tui=True` and Rich available:
54
+ 1. `on_progress` callback still fires (for logging, state tracking)
55
+ 2. TUI replaces console output, not callbacks
56
+ 3. TUI auto-stops on completion or error
57
+
58
+ ## No Breaking Changes
59
+
60
+ - All existing parameters unchanged
61
+ - All existing callbacks unchanged
62
+ - `tui=False` (default) = identical to v0.7.0 behavior
@@ -0,0 +1,90 @@
1
+ # Data Schema: TUI Display State (v0.8.0)
2
+
3
+ ## Dashboard State
4
+
5
+ ```python
6
+ @dataclass
7
+ class TUIState:
8
+ # Header
9
+ file_path: str
10
+ total_records: int
11
+ version: str = "0.8.0"
12
+
13
+ # Progress
14
+ current_chunk: int = 0
15
+ total_chunks: int = 0
16
+ current_iteration: int = 0
17
+ max_iterations: int = 5
18
+
19
+ # LLM Status
20
+ llm_status: Literal["idle", "calling"] = "idle"
21
+
22
+ # Functions
23
+ functions: list[FunctionInfo] = field(default_factory=list)
24
+
25
+ # Metrics
26
+ quality_delta: float = 0.0 # Percentage improvement
27
+ latency_last_ms: float = 0.0
28
+ latency_avg_ms: float = 0.0
29
+ latency_total_ms: float = 0.0
30
+ llm_call_count: int = 0
31
+
32
+ @dataclass
33
+ class FunctionInfo:
34
+ name: str
35
+ docstring: str # First 50 chars displayed
36
+ ```
37
+
38
+ ## Dashboard Layout Schema
39
+
40
+ ```
41
+ ┌─────────────────────────────────────────────────────────┐
42
+ │ {file_path} v{version} │ <- HEADER (size=3)
43
+ ├────────────────────┬────────────────────────────────────┤
44
+ │ PROGRESS │ FUNCTIONS ({len(functions)}) │ <- BODY
45
+ │ [████░░░░░░] {%} │ ├─ {functions[0].name} │
46
+ │ Chunk {cur}/{tot} │ ├─ {functions[1].name} │
47
+ │ Iter {i}/{max} │ └─ {functions[2].name} │
48
+ │ │ (+{n} more) │
49
+ │ {spinner} {status}│ QUALITY: +{quality_delta}% │
50
+ ├────────────────────┴────────────────────────────────────┤
51
+ │ ⏱️ {latency_last}ms │ avg {latency_avg}ms │ {llm_calls} │ <- FOOTER (size=3)
52
+ └─────────────────────────────────────────────────────────┘
53
+ ```
54
+
55
+ ## Color Scheme
56
+
57
+ | Element | Color | Condition |
58
+ |---------|-------|-----------|
59
+ | Header title | cyan | Always |
60
+ | Progress bar | yellow | In progress |
61
+ | Progress bar | green | Chunk complete |
62
+ | Spinner | yellow | LLM calling |
63
+ | Function names | green | Always |
64
+ | Quality delta | green | Positive |
65
+ | Quality delta | red | Negative |
66
+ | Latency | dim white | Always |
67
+
68
+ ## Spinner States
69
+
70
+ | `llm_status` | Display |
71
+ |--------------|---------|
72
+ | `"calling"` | Animated spinner + "Calling LLM..." |
73
+ | `"idle"` | Static checkmark or empty |
74
+
75
+ ## Completion Summary
76
+
77
+ On `show_complete()`:
78
+
79
+ ```
80
+ ┌─────────────────────────────────────────────────────────┐
81
+ │ ✓ COMPLETE │
82
+ ├─────────────────────────────────────────────────────────┤
83
+ │ Functions generated: {n} │
84
+ │ Chunks processed: {total_chunks} │
85
+ │ Quality improvement: +{quality_delta}% │
86
+ │ Total time: {latency_total}ms ({llm_calls} LLM calls) │
87
+ │ │
88
+ │ Output: cleaning_functions.py │
89
+ └─────────────────────────────────────────────────────────┘
90
+ ```
@@ -0,0 +1,70 @@
1
+ # Success Criteria: Rich TUI (v0.8.0)
2
+
3
+ ## Project-Level Success
4
+
5
+ - [ ] `pip install recursive-cleaner[tui]` installs rich>=13.0
6
+ - [ ] `DataCleaner(..., tui=True)` shows live dashboard
7
+ - [ ] Dashboard displays all state from data schema contract
8
+ - [ ] Falls back gracefully when Rich not installed
9
+ - [ ] All 432 existing tests pass
10
+ - [ ] Zero breaking changes to existing API
11
+
12
+ ## Phase 1: Core TUI Module
13
+
14
+ **Deliverables:**
15
+ - [ ] `recursive_cleaner/tui.py` with `TUIRenderer` class
16
+ - [ ] `HAS_RICH` check with graceful import
17
+ - [ ] Basic `start()` / `stop()` lifecycle
18
+ - [ ] Static layout matching schema (header, body split, footer)
19
+
20
+ **Success Criteria:**
21
+ - [ ] `from recursive_cleaner.tui import TUIRenderer, HAS_RICH` works
22
+ - [ ] `TUIRenderer` can be instantiated without Rich (no crash)
23
+ - [ ] With Rich: `start()` shows layout, `stop()` exits cleanly
24
+ - [ ] Layout has correct sections per data schema
25
+
26
+ **Tests:**
27
+ - [ ] test_tui_import_without_rich
28
+ - [ ] test_tui_renderer_lifecycle
29
+ - [ ] test_tui_layout_structure
30
+
31
+ ## Phase 2: Dynamic Updates
32
+
33
+ **Deliverables:**
34
+ - [ ] `update_chunk()` updates progress bar and counters
35
+ - [ ] `update_llm_status()` shows/hides spinner
36
+ - [ ] `add_function()` appends to function list
37
+ - [ ] `update_metrics()` updates footer stats
38
+
39
+ **Success Criteria:**
40
+ - [ ] Progress bar fills based on chunk_index/total_chunks
41
+ - [ ] Spinner animates when status="calling", stops when "idle"
42
+ - [ ] Functions list grows, shows "+N more" when >5 functions
43
+ - [ ] Metrics panel shows formatted latency and counts
44
+
45
+ **Tests:**
46
+ - [ ] test_progress_updates
47
+ - [ ] test_spinner_states
48
+ - [ ] test_function_list_display
49
+ - [ ] test_metrics_display
50
+
51
+ ## Phase 3: Integration & Polish
52
+
53
+ **Deliverables:**
54
+ - [ ] `tui=True` parameter on DataCleaner
55
+ - [ ] Integration: TUI updates from cleaner loop
56
+ - [ ] `show_complete()` with summary panel
57
+ - [ ] Fallback warning when Rich not installed
58
+ - [ ] Color transitions (yellow→green on chunk complete)
59
+
60
+ **Success Criteria:**
61
+ - [ ] Full cleaner run with `tui=True` shows live dashboard
62
+ - [ ] Completion shows summary with all stats
63
+ - [ ] `tui=True` without Rich logs warning, uses callbacks
64
+ - [ ] Chunk completion triggers green color flash
65
+
66
+ **Tests:**
67
+ - [ ] test_datacleaner_tui_integration
68
+ - [ ] test_tui_fallback_warning
69
+ - [ ] test_completion_summary
70
+ - [ ] test_color_transitions
@@ -0,0 +1,182 @@
1
+ # Implementation Plan: Rich TUI (v0.8.0)
2
+
3
+ ## Overview
4
+
5
+ Add optional Rich-based terminal dashboard for visual progress tracking during data cleaning runs.
6
+
7
+ ## Technology Stack
8
+
9
+ | Layer | Choice | Rationale |
10
+ |-------|--------|-----------|
11
+ | TUI Library | Rich >=13.0 | Simple API, same author as Textual, 50KB |
12
+ | Pattern | Live + Layout | Mission control style, update sections independently |
13
+ | Fallback | Plain callbacks | Zero-dep baseline preserved |
14
+
15
+ ## Phase Breakdown
16
+
17
+ ### Phase 1: Core TUI Module
18
+
19
+ **Objective:** Create standalone TUI renderer with basic layout.
20
+
21
+ **Deliverables:**
22
+ - [ ] `recursive_cleaner/tui.py` (~150 lines)
23
+ - [ ] `tests/test_tui.py` (basic tests)
24
+ - [ ] `pyproject.toml` update for `[tui]` extra
25
+
26
+ **Implementation:**
27
+ ```python
28
+ # tui.py structure
29
+ try:
30
+ from rich.live import Live
31
+ from rich.layout import Layout
32
+ from rich.panel import Panel
33
+ HAS_RICH = True
34
+ except ImportError:
35
+ HAS_RICH = False
36
+
37
+ class TUIRenderer:
38
+ def __init__(self, file_path, total_chunks, total_records):
39
+ self._state = TUIState(...)
40
+ self._layout = self._make_layout() if HAS_RICH else None
41
+ self._live = None
42
+
43
+ def _make_layout(self):
44
+ layout = Layout()
45
+ layout.split_column(
46
+ Layout(name="header", size=3),
47
+ Layout(name="body"),
48
+ Layout(name="footer", size=3)
49
+ )
50
+ layout["body"].split_row(
51
+ Layout(name="progress"),
52
+ Layout(name="functions")
53
+ )
54
+ return layout
55
+
56
+ def start(self):
57
+ if not HAS_RICH:
58
+ return
59
+ self._live = Live(self._layout, refresh_per_second=2)
60
+ self._live.start()
61
+
62
+ def stop(self):
63
+ if self._live:
64
+ self._live.stop()
65
+ ```
66
+
67
+ **Success Criteria:**
68
+ - Import works with/without Rich
69
+ - Layout renders with correct sections
70
+ - Start/stop lifecycle works
71
+
72
+ ---
73
+
74
+ ### Phase 2: Dynamic Updates
75
+
76
+ **Objective:** Wire up all state updates to visual components.
77
+
78
+ **Deliverables:**
79
+ - [ ] `update_chunk()` - progress bar + counters
80
+ - [ ] `update_llm_status()` - spinner control
81
+ - [ ] `add_function()` - function list panel
82
+ - [ ] `update_metrics()` - footer stats
83
+ - [ ] Additional tests for each update method
84
+
85
+ **Implementation:**
86
+ ```python
87
+ def update_chunk(self, chunk_index, iteration, max_iterations):
88
+ self._state.current_chunk = chunk_index
89
+ self._state.current_iteration = iteration
90
+ self._refresh_progress_panel()
91
+
92
+ def _refresh_progress_panel(self):
93
+ progress = Progress(BarColumn(), TextColumn("{task.percentage:.0f}%"))
94
+ task = progress.add_task("", total=self._state.total_chunks)
95
+ progress.update(task, completed=self._state.current_chunk)
96
+
97
+ content = Group(
98
+ progress,
99
+ Text(f"Chunk {self._state.current_chunk}/{self._state.total_chunks}"),
100
+ Text(f"Iteration {self._state.current_iteration}/{self._state.max_iterations}"),
101
+ self._make_spinner()
102
+ )
103
+ self._layout["progress"].update(Panel(content, title="Progress"))
104
+ ```
105
+
106
+ **Success Criteria:**
107
+ - Progress bar animates smoothly
108
+ - Spinner shows during LLM calls
109
+ - Function list grows dynamically
110
+ - Metrics update in real-time
111
+
112
+ ---
113
+
114
+ ### Phase 3: Integration & Polish
115
+
116
+ **Objective:** Connect TUI to DataCleaner and add finishing touches.
117
+
118
+ **Deliverables:**
119
+ - [ ] `tui=True` parameter on DataCleaner.__init__
120
+ - [ ] TUI updates from main processing loop
121
+ - [ ] `show_complete()` summary panel
122
+ - [ ] Fallback warning via logging
123
+ - [ ] Color transitions on chunk completion
124
+ - [ ] Integration tests
125
+
126
+ **Implementation in cleaner.py:**
127
+ ```python
128
+ def __init__(self, ..., tui: bool = False):
129
+ self.tui = tui
130
+ self._tui_renderer = None
131
+
132
+ def run(self):
133
+ if self.tui:
134
+ from recursive_cleaner.tui import TUIRenderer, HAS_RICH
135
+ if HAS_RICH:
136
+ self._tui_renderer = TUIRenderer(...)
137
+ self._tui_renderer.start()
138
+ else:
139
+ import logging
140
+ logging.warning("tui=True but Rich not installed. pip install recursive-cleaner[tui]")
141
+
142
+ # ... existing loop with TUI updates injected ...
143
+
144
+ if self._tui_renderer:
145
+ self._tui_renderer.show_complete(summary)
146
+ self._tui_renderer.stop()
147
+ ```
148
+
149
+ **Success Criteria:**
150
+ - Full run with tui=True shows dashboard
151
+ - Fallback logs warning, uses callbacks
152
+ - Completion summary displays all stats
153
+ - Green flash on chunk completion
154
+
155
+ ---
156
+
157
+ ## Risk Register
158
+
159
+ | Risk | Likelihood | Impact | Mitigation |
160
+ |------|------------|--------|------------|
161
+ | Terminal size too small | Low | Medium | Use `vertical_overflow="crop"` |
162
+ | Rich version incompatibility | Low | Medium | Pin `>=13.0` (stable API) |
163
+ | Performance overhead | Low | Low | refresh_per_second=2 is fine |
164
+
165
+ ## Out of Scope
166
+
167
+ - Keyboard interactivity (pause/resume)
168
+ - Mouse support
169
+ - Scrollable function list
170
+ - Custom themes
171
+ - Textual upgrade path
172
+
173
+ ## File Changes Summary
174
+
175
+ | File | Change |
176
+ |------|--------|
177
+ | `recursive_cleaner/tui.py` | NEW (~200 lines) |
178
+ | `recursive_cleaner/cleaner.py` | Add `tui` param, TUI integration |
179
+ | `recursive_cleaner/__init__.py` | Export TUIRenderer, HAS_RICH |
180
+ | `pyproject.toml` | Add `[tui]` optional dependency |
181
+ | `tests/test_tui.py` | NEW (~15 tests) |
182
+ | `README.md` | Document TUI feature |