recursive-cleaner 0.8.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
backends/__init__.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """Backend implementations for Recursive Data Cleaner."""
2
2
 
3
3
  from .mlx_backend import MLXBackend
4
+ from .openai_backend import OpenAIBackend
4
5
 
5
- __all__ = ["MLXBackend"]
6
+ __all__ = ["MLXBackend", "OpenAIBackend"]
@@ -0,0 +1,71 @@
1
+ """OpenAI-compatible backend for Recursive Data Cleaner."""
2
+
3
+ import os
4
+
5
+
6
+ class OpenAIBackend:
7
+ """
8
+ OpenAI-compatible backend implementation.
9
+
10
+ Works with OpenAI API, LM Studio, Ollama, and other OpenAI-compatible servers.
11
+ Conforms to the LLMBackend protocol.
12
+ """
13
+
14
+ def __init__(
15
+ self,
16
+ model: str,
17
+ api_key: str | None = None,
18
+ base_url: str | None = None,
19
+ max_tokens: int = 4096,
20
+ temperature: float = 0.7,
21
+ ):
22
+ """
23
+ Initialize the OpenAI backend.
24
+
25
+ Args:
26
+ model: Model name (e.g., "gpt-4o", "gpt-3.5-turbo")
27
+ api_key: API key (defaults to OPENAI_API_KEY env var, or "not-needed" for local)
28
+ base_url: API base URL (defaults to OpenAI's API)
29
+ max_tokens: Maximum tokens to generate
30
+ temperature: Sampling temperature
31
+ """
32
+ try:
33
+ import openai
34
+ except ImportError:
35
+ raise ImportError(
36
+ "OpenAI SDK not installed. Install with: pip install openai"
37
+ )
38
+
39
+ self.model = model
40
+ self.max_tokens = max_tokens
41
+ self.temperature = temperature
42
+
43
+ # Resolve API key: explicit > env var > "not-needed" for local servers
44
+ if api_key is not None:
45
+ resolved_key = api_key
46
+ else:
47
+ resolved_key = os.environ.get("OPENAI_API_KEY", "not-needed")
48
+
49
+ # Create client
50
+ self._client = openai.OpenAI(
51
+ api_key=resolved_key,
52
+ base_url=base_url,
53
+ )
54
+
55
+ def generate(self, prompt: str) -> str:
56
+ """
57
+ Generate a response from the LLM.
58
+
59
+ Args:
60
+ prompt: The input prompt
61
+
62
+ Returns:
63
+ The generated text response
64
+ """
65
+ response = self._client.chat.completions.create(
66
+ model=self.model,
67
+ messages=[{"role": "user", "content": prompt}],
68
+ max_tokens=self.max_tokens,
69
+ temperature=self.temperature,
70
+ )
71
+ return response.choices[0].message.content or ""
@@ -1,5 +1,6 @@
1
1
  """Recursive Data Cleaner - LLM-powered incremental data cleaning pipeline."""
2
2
 
3
+ from recursive_cleaner.apply import apply_cleaning
3
4
  from recursive_cleaner.cleaner import DataCleaner
4
5
  from recursive_cleaner.context import build_context
5
6
  from recursive_cleaner.dependencies import resolve_dependencies
@@ -21,9 +22,10 @@ from recursive_cleaner.prompt import build_prompt
21
22
  from recursive_cleaner.response import extract_python_block, parse_response
22
23
  from recursive_cleaner.parser_generator import check_parser_safety, generate_parser
23
24
  from recursive_cleaner.tui import HAS_RICH, TUIRenderer
24
- from recursive_cleaner.validation import check_code_safety, extract_sample_data, validate_function
25
+ from recursive_cleaner.validation import check_code_safety, extract_modified_fields, extract_sample_data, validate_function
25
26
 
26
27
  __all__ = [
28
+ "apply_cleaning",
27
29
  "CleanerError",
28
30
  "ParseError",
29
31
  "MaxIterationsError",
@@ -41,6 +43,7 @@ __all__ = [
41
43
  "validate_function",
42
44
  "extract_sample_data",
43
45
  "check_code_safety",
46
+ "extract_modified_fields",
44
47
  "resolve_dependencies",
45
48
  "QualityMetrics",
46
49
  "measure_quality",
@@ -0,0 +1,8 @@
1
+ """Entry point for python -m recursive_cleaner."""
2
+
3
+ import sys
4
+
5
+ from recursive_cleaner.cli import main
6
+
7
+ if __name__ == "__main__":
8
+ sys.exit(main())
@@ -0,0 +1,483 @@
1
+ """Apply cleaning functions to data files."""
2
+
3
+ import csv
4
+ import importlib.util
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Callable
8
+
9
+ from .parsers import MARKITDOWN_EXTENSIONS
10
+
11
+ # Text formats that should be converted to markdown (excludes spreadsheets)
12
+ TEXT_MARKITDOWN_EXTENSIONS = MARKITDOWN_EXTENSIONS - {".xlsx", ".xls", ".ods"}
13
+
14
+
15
+ def load_cleaning_module(functions_path: str):
16
+ """
17
+ Dynamically import a cleaning_functions.py file.
18
+
19
+ Args:
20
+ functions_path: Path to the cleaning functions file
21
+
22
+ Returns:
23
+ The imported module
24
+
25
+ Raises:
26
+ FileNotFoundError: If the functions file doesn't exist
27
+ ImportError: If the module cannot be imported
28
+ """
29
+ path = Path(functions_path)
30
+ if not path.exists():
31
+ raise FileNotFoundError(f"Functions file not found: {functions_path}")
32
+
33
+ spec = importlib.util.spec_from_file_location("cleaning_module", path)
34
+ if spec is None or spec.loader is None:
35
+ raise ImportError(f"Cannot load module from: {functions_path}")
36
+
37
+ module = importlib.util.module_from_spec(spec)
38
+ spec.loader.exec_module(module)
39
+ return module
40
+
41
+
42
+ def get_default_output_path(input_path: str, force_ext: str | None = None) -> str:
43
+ """
44
+ Generate default output path: input.cleaned.ext
45
+
46
+ Args:
47
+ input_path: Path to the input file
48
+ force_ext: Override the output extension (e.g., ".xlsx" for .xls files)
49
+
50
+ Returns:
51
+ Path string for the output file
52
+ """
53
+ path = Path(input_path)
54
+ suffix = path.suffix.lower()
55
+
56
+ if force_ext:
57
+ ext = force_ext
58
+ elif suffix == ".xls":
59
+ # .xls files are written as .xlsx
60
+ ext = ".xlsx"
61
+ elif suffix == ".txt" or suffix in TEXT_MARKITDOWN_EXTENSIONS:
62
+ # Text formats output as markdown
63
+ ext = ".md"
64
+ else:
65
+ ext = path.suffix
66
+
67
+ return str(path.with_suffix(f".cleaned{ext}"))
68
+
69
+
70
+ def apply_to_jsonl(
71
+ input_path: str,
72
+ output_path: str,
73
+ clean_fn: Callable,
74
+ on_progress: Callable[[dict], None] | None = None,
75
+ ) -> int:
76
+ """
77
+ Stream JSONL: read line, clean, write line.
78
+
79
+ Args:
80
+ input_path: Path to input JSONL file
81
+ output_path: Path for output JSONL file
82
+ clean_fn: Cleaning function to apply to each record
83
+ on_progress: Optional progress callback
84
+
85
+ Returns:
86
+ Number of records processed
87
+ """
88
+ records_processed = 0
89
+
90
+ with open(input_path, "r", encoding="utf-8") as infile, \
91
+ open(output_path, "w", encoding="utf-8") as outfile:
92
+ for line in infile:
93
+ line = line.strip()
94
+ if not line:
95
+ continue
96
+
97
+ record = json.loads(line)
98
+ cleaned = clean_fn(record)
99
+ outfile.write(json.dumps(cleaned) + "\n")
100
+
101
+ records_processed += 1
102
+ if on_progress:
103
+ on_progress({"type": "apply_progress", "records_processed": records_processed})
104
+
105
+ return records_processed
106
+
107
+
108
+ def apply_to_csv(
109
+ input_path: str,
110
+ output_path: str,
111
+ clean_fn: Callable,
112
+ on_progress: Callable[[dict], None] | None = None,
113
+ ) -> int:
114
+ """
115
+ Stream CSV: DictReader to clean each row, DictWriter to output.
116
+
117
+ Args:
118
+ input_path: Path to input CSV file
119
+ output_path: Path for output CSV file
120
+ clean_fn: Cleaning function to apply to each record
121
+ on_progress: Optional progress callback
122
+
123
+ Returns:
124
+ Number of records processed
125
+ """
126
+ records_processed = 0
127
+
128
+ with open(input_path, "r", encoding="utf-8", newline="") as infile:
129
+ reader = csv.DictReader(infile)
130
+ fieldnames = reader.fieldnames
131
+
132
+ if not fieldnames:
133
+ return 0
134
+
135
+ with open(output_path, "w", encoding="utf-8", newline="") as outfile:
136
+ writer = csv.DictWriter(outfile, fieldnames=fieldnames)
137
+ writer.writeheader()
138
+
139
+ for row in reader:
140
+ cleaned = clean_fn(row)
141
+ writer.writerow(cleaned)
142
+
143
+ records_processed += 1
144
+ if on_progress:
145
+ on_progress({"type": "apply_progress", "records_processed": records_processed})
146
+
147
+ return records_processed
148
+
149
+
150
+ def apply_to_json(
151
+ input_path: str,
152
+ output_path: str,
153
+ clean_fn: Callable,
154
+ on_progress: Callable[[dict], None] | None = None,
155
+ ) -> int:
156
+ """
157
+ Batch JSON array: load all, clean each, write array.
158
+
159
+ Args:
160
+ input_path: Path to input JSON file
161
+ output_path: Path for output JSON file
162
+ clean_fn: Cleaning function to apply to each record
163
+ on_progress: Optional progress callback
164
+
165
+ Returns:
166
+ Number of records processed
167
+ """
168
+ with open(input_path, "r", encoding="utf-8") as f:
169
+ data = json.load(f)
170
+
171
+ if not isinstance(data, list):
172
+ # Single object - wrap, clean, unwrap
173
+ cleaned = clean_fn(data)
174
+ with open(output_path, "w", encoding="utf-8") as f:
175
+ json.dump(cleaned, f, indent=2)
176
+ if on_progress:
177
+ on_progress({"type": "apply_progress", "records_processed": 1})
178
+ return 1
179
+
180
+ cleaned_data = []
181
+ for i, record in enumerate(data):
182
+ cleaned = clean_fn(record)
183
+ cleaned_data.append(cleaned)
184
+
185
+ if on_progress:
186
+ on_progress({"type": "apply_progress", "records_processed": i + 1})
187
+
188
+ with open(output_path, "w", encoding="utf-8") as f:
189
+ json.dump(cleaned_data, f, indent=2)
190
+
191
+ return len(cleaned_data)
192
+
193
+
194
+ def apply_to_parquet(
195
+ input_path: str,
196
+ output_path: str,
197
+ clean_fn: Callable,
198
+ on_progress: Callable[[dict], None] | None = None,
199
+ ) -> int:
200
+ """
201
+ Batch Parquet: load as list of dicts, clean each, write back.
202
+
203
+ Args:
204
+ input_path: Path to input Parquet file
205
+ output_path: Path for output Parquet file
206
+ clean_fn: Cleaning function to apply to each record
207
+ on_progress: Optional progress callback
208
+
209
+ Returns:
210
+ Number of records processed
211
+
212
+ Raises:
213
+ ImportError: If pyarrow is not installed
214
+ """
215
+ try:
216
+ import pyarrow as pa
217
+ import pyarrow.parquet as pq
218
+ except ImportError:
219
+ raise ImportError(
220
+ "pyarrow is required for parquet files. "
221
+ "Install with: pip install recursive-cleaner[parquet]"
222
+ )
223
+
224
+ table = pq.read_table(input_path)
225
+ records = table.to_pylist()
226
+
227
+ cleaned_data = []
228
+ for i, record in enumerate(records):
229
+ cleaned = clean_fn(record)
230
+ cleaned_data.append(cleaned)
231
+
232
+ if on_progress:
233
+ on_progress({"type": "apply_progress", "records_processed": i + 1})
234
+
235
+ # Write back as parquet
236
+ cleaned_table = pa.Table.from_pylist(cleaned_data)
237
+ pq.write_table(cleaned_table, output_path)
238
+
239
+ return len(cleaned_data)
240
+
241
+
242
+ def apply_to_excel(
243
+ input_path: str,
244
+ output_path: str,
245
+ clean_fn: Callable,
246
+ on_progress: Callable[[dict], None] | None = None,
247
+ ) -> int:
248
+ """
249
+ Batch Excel: load as list of dicts, clean each, write back.
250
+
251
+ Args:
252
+ input_path: Path to input Excel file (.xlsx or .xls)
253
+ output_path: Path for output Excel file (.xlsx)
254
+ clean_fn: Cleaning function to apply to each record
255
+ on_progress: Optional progress callback
256
+
257
+ Returns:
258
+ Number of records processed
259
+
260
+ Raises:
261
+ ImportError: If openpyxl (or xlrd for .xls) is not installed
262
+ """
263
+ suffix = Path(input_path).suffix.lower()
264
+
265
+ if suffix == ".xls":
266
+ # Use xlrd for .xls files
267
+ try:
268
+ import xlrd
269
+ except ImportError:
270
+ raise ImportError(
271
+ "xlrd is required for .xls files. "
272
+ "Install with: pip install recursive-cleaner[excel]"
273
+ )
274
+
275
+ workbook = xlrd.open_workbook(input_path)
276
+ sheet = workbook.sheet_by_index(0)
277
+
278
+ if sheet.nrows < 1:
279
+ return 0
280
+
281
+ # First row is headers
282
+ headers = [str(sheet.cell_value(0, col)) for col in range(sheet.ncols)]
283
+ records = []
284
+ for row_idx in range(1, sheet.nrows):
285
+ row_data = {}
286
+ for col_idx, header in enumerate(headers):
287
+ row_data[header] = sheet.cell_value(row_idx, col_idx)
288
+ records.append(row_data)
289
+ else:
290
+ # Use openpyxl for .xlsx files
291
+ try:
292
+ from openpyxl import load_workbook
293
+ except ImportError:
294
+ raise ImportError(
295
+ "openpyxl is required for .xlsx files. "
296
+ "Install with: pip install recursive-cleaner[excel]"
297
+ )
298
+
299
+ workbook = load_workbook(input_path, read_only=True)
300
+ sheet = workbook.active
301
+
302
+ rows = list(sheet.iter_rows(values_only=True))
303
+ if not rows:
304
+ return 0
305
+
306
+ # First row is headers
307
+ headers = [str(h) if h is not None else "" for h in rows[0]]
308
+ records = []
309
+ for row in rows[1:]:
310
+ row_data = {}
311
+ for col_idx, header in enumerate(headers):
312
+ value = row[col_idx] if col_idx < len(row) else None
313
+ row_data[header] = value
314
+ records.append(row_data)
315
+
316
+ workbook.close()
317
+
318
+ # Clean records
319
+ cleaned_data = []
320
+ for i, record in enumerate(records):
321
+ cleaned = clean_fn(record)
322
+ cleaned_data.append(cleaned)
323
+
324
+ if on_progress:
325
+ on_progress({"type": "apply_progress", "records_processed": i + 1})
326
+
327
+ # Write back as xlsx using openpyxl
328
+ try:
329
+ from openpyxl import Workbook
330
+ except ImportError:
331
+ raise ImportError(
332
+ "openpyxl is required for writing Excel files. "
333
+ "Install with: pip install recursive-cleaner[excel]"
334
+ )
335
+
336
+ wb = Workbook()
337
+ ws = wb.active
338
+
339
+ if cleaned_data:
340
+ # Write headers
341
+ fieldnames = list(cleaned_data[0].keys())
342
+ ws.append(fieldnames)
343
+
344
+ # Write data rows
345
+ for record in cleaned_data:
346
+ ws.append([record.get(k) for k in fieldnames])
347
+
348
+ wb.save(output_path)
349
+
350
+ return len(cleaned_data)
351
+
352
+
353
+ def apply_to_text(
354
+ input_path: str,
355
+ output_path: str,
356
+ clean_fn: Callable,
357
+ on_progress: Callable[[dict], None] | None = None,
358
+ ) -> int:
359
+ """
360
+ Process text/document files: extract text, clean, write as markdown.
361
+
362
+ Args:
363
+ input_path: Path to input file (.txt or markitdown format)
364
+ output_path: Path for output markdown file
365
+ clean_fn: Cleaning function to apply to the text
366
+ on_progress: Optional progress callback
367
+
368
+ Returns:
369
+ Number of records processed (always 1 for text)
370
+
371
+ Raises:
372
+ ImportError: If markitdown is not installed (for non-.txt files)
373
+ """
374
+ suffix = Path(input_path).suffix.lower()
375
+
376
+ if suffix == ".txt":
377
+ # Plain text - read directly
378
+ with open(input_path, "r", encoding="utf-8") as f:
379
+ content = f.read()
380
+ else:
381
+ # Use markitdown for other formats
382
+ try:
383
+ from markitdown import MarkItDown
384
+ except ImportError:
385
+ raise ImportError(
386
+ "markitdown is required for this file type. "
387
+ "Install with: pip install recursive-cleaner[markitdown]"
388
+ )
389
+
390
+ md = MarkItDown()
391
+ result = md.convert(input_path)
392
+ content = result.text_content
393
+
394
+ # Clean the text content
395
+ cleaned = clean_fn(content)
396
+
397
+ # Write as markdown
398
+ with open(output_path, "w", encoding="utf-8") as f:
399
+ f.write(cleaned)
400
+
401
+ if on_progress:
402
+ on_progress({"type": "apply_progress", "records_processed": 1})
403
+
404
+ return 1
405
+
406
+
407
+ def apply_cleaning(
408
+ input_path: str,
409
+ functions_path: str,
410
+ output_path: str | None = None,
411
+ on_progress: Callable[[dict], None] | None = None,
412
+ ) -> str:
413
+ """
414
+ Apply cleaning functions to a data file.
415
+
416
+ Args:
417
+ input_path: Path to input data file
418
+ functions_path: Path to cleaning_functions.py
419
+ output_path: Path for output file (default: input.cleaned.ext)
420
+ on_progress: Optional progress callback
421
+
422
+ Returns:
423
+ Path to output file
424
+
425
+ Raises:
426
+ FileNotFoundError: If input or functions file not found
427
+ ImportError: If functions file cannot be imported
428
+ ValueError: If input format is unsupported
429
+ """
430
+ # Validate input file exists
431
+ if not Path(input_path).exists():
432
+ raise FileNotFoundError(f"Input file not found: {input_path}")
433
+
434
+ # Load cleaning module
435
+ module = load_cleaning_module(functions_path)
436
+
437
+ # Get the clean_data function
438
+ if not hasattr(module, "clean_data"):
439
+ raise ImportError(f"Functions file missing clean_data() function: {functions_path}")
440
+
441
+ clean_fn = module.clean_data
442
+
443
+ # Determine output path
444
+ suffix = Path(input_path).suffix.lower()
445
+ if output_path is None:
446
+ output_path = get_default_output_path(input_path)
447
+
448
+ # Route by format
449
+ format_handlers = {
450
+ ".jsonl": apply_to_jsonl,
451
+ ".csv": apply_to_csv,
452
+ ".json": apply_to_json,
453
+ ".parquet": apply_to_parquet,
454
+ ".xlsx": apply_to_excel,
455
+ ".xls": apply_to_excel,
456
+ }
457
+
458
+ handler = format_handlers.get(suffix)
459
+
460
+ # Check for text formats (.txt and markitdown extensions, excluding spreadsheets)
461
+ if handler is None:
462
+ if suffix == ".txt" or suffix in TEXT_MARKITDOWN_EXTENSIONS:
463
+ handler = apply_to_text
464
+
465
+ if handler is None:
466
+ raise ValueError(f"Unsupported format: {suffix}")
467
+
468
+ # Emit start event (total_records unknown for streaming formats)
469
+ if on_progress:
470
+ on_progress({"type": "apply_start", "total_records": None})
471
+
472
+ # Apply cleaning
473
+ total_records = handler(input_path, output_path, clean_fn, on_progress)
474
+
475
+ # Emit complete event
476
+ if on_progress:
477
+ on_progress({
478
+ "type": "apply_complete",
479
+ "total_records": total_records,
480
+ "output_path": output_path,
481
+ })
482
+
483
+ return output_path
@@ -17,7 +17,7 @@ from .prompt import build_prompt
17
17
  from .response import parse_response
18
18
  from .schema import format_schema_for_prompt, infer_schema
19
19
  from .types import LLMBackend
20
- from .validation import check_code_safety, extract_sample_data, split_holdout, validate_function
20
+ from .validation import check_code_safety, extract_modified_fields, extract_sample_data, split_holdout, validate_function
21
21
 
22
22
  STATE_VERSION = "0.5.0"
23
23
 
@@ -63,6 +63,7 @@ class DataCleaner:
63
63
  dry_run: bool = False,
64
64
  auto_parse: bool = False,
65
65
  tui: bool = False,
66
+ output_path: str = "cleaning_functions.py",
66
67
  ):
67
68
  self.backend = llm_backend
68
69
  self.file_path = file_path
@@ -88,6 +89,7 @@ class DataCleaner:
88
89
  self.dry_run = dry_run
89
90
  self.auto_parse = auto_parse
90
91
  self.tui = tui
92
+ self.output_path = output_path
91
93
  self.functions: list[dict] = [] # List of {name, docstring, code}
92
94
  self._tui_renderer = None # TUIRenderer instance when tui=True
93
95
  self._generated_parser: callable | None = None # LLM-generated parser for unknown formats
@@ -108,6 +110,8 @@ class DataCleaner:
108
110
  "min_ms": float("inf"),
109
111
  "max_ms": 0.0,
110
112
  }
113
+ # Track fields already covered by generated functions (per chunk)
114
+ self._fields_covered: set[str] = set()
111
115
 
112
116
  def _emit(self, event_type: str, chunk_index: int = 0, **kwargs) -> None:
113
117
  """Emit a progress event to the callback, if set."""
@@ -520,7 +524,7 @@ class DataCleaner:
520
524
  "quality_delta": 0.0, # Could be calculated from metrics
521
525
  "latency_total_ms": latency_summary.get("total_ms", 0.0),
522
526
  "llm_calls": latency_summary.get("call_count", 0),
523
- "output_file": "cleaning_functions.py",
527
+ "output_file": self.output_path,
524
528
  })
525
529
  self._tui_renderer.stop()
526
530
 
@@ -531,6 +535,8 @@ class DataCleaner:
531
535
  """Process a single chunk, iterating until clean or max iterations."""
532
536
  self._emit("chunk_start", chunk_index=chunk_idx)
533
537
  error_feedback = ""
538
+ # Reset fields covered for new chunk
539
+ self._fields_covered = set()
534
540
 
535
541
  # Dry run mode: just detect issues, don't generate functions
536
542
  if self.dry_run:
@@ -592,6 +598,20 @@ class DataCleaner:
592
598
  print(f" Safety check failed: {safety_error}")
593
599
  continue
594
600
 
601
+ # Check for duplicate field coverage
602
+ new_fields = extract_modified_fields(result["code"])
603
+ overlap = new_fields & self._fields_covered
604
+ if overlap:
605
+ field_list = ", ".join(sorted(overlap))
606
+ error_feedback = f"You already generated a function for field(s): {field_list}. This issue is solved. Move on to the next unsolved issue."
607
+ self._emit(
608
+ "duplicate_field",
609
+ chunk_index=chunk_idx,
610
+ function_name=result["name"],
611
+ fields=list(overlap),
612
+ )
613
+ continue
614
+
595
615
  # Runtime validation if enabled
596
616
  if self.validate_runtime:
597
617
  # Use holdout data if available, else sample from generation chunk
@@ -626,6 +646,8 @@ class DataCleaner:
626
646
  "docstring": result["docstring"],
627
647
  "code": result["code"],
628
648
  })
649
+ # Track fields covered by this function
650
+ self._fields_covered.update(new_fields)
629
651
  # Track for saturation check
630
652
  self._recent_new_function_count += 1
631
653
 
@@ -687,11 +709,11 @@ class DataCleaner:
687
709
  self._emit("chunk_done", chunk_index=chunk_idx)
688
710
 
689
711
  def _write_output(self) -> None:
690
- """Write generated functions to cleaning_functions.py."""
712
+ """Write generated functions to output file."""
691
713
  from .output import write_cleaning_file
692
714
 
693
715
  try:
694
- write_cleaning_file(self.functions)
716
+ write_cleaning_file(self.functions, self.output_path)
695
717
  except OutputValidationError as e:
696
718
  if not self.tui:
697
719
  print(f" Error: {e}")
@@ -707,7 +729,7 @@ class DataCleaner:
707
729
  if not self.tui:
708
730
  print(f" Skipping invalid function: {f['name']}")
709
731
  if valid_functions:
710
- write_cleaning_file(valid_functions)
732
+ write_cleaning_file(valid_functions, self.output_path)
711
733
  elif not self.tui:
712
734
  print(" No valid functions to write.")
713
735