recursive-cleaner 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
backends/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Backend implementations for Recursive Data Cleaner."""
2
+
3
+ from .mlx_backend import MLXBackend
4
+
5
+ __all__ = ["MLXBackend"]
@@ -0,0 +1,95 @@
1
+ """MLX-LM backend for Recursive Data Cleaner."""
2
+
3
+ from mlx_lm import load, generate
4
+ from mlx_lm.sample_utils import make_sampler, make_logits_processors
5
+
6
+
7
+ class MLXBackend:
8
+ """
9
+ MLX-LM backend implementation for Apple Silicon.
10
+
11
+ Conforms to the LLMBackend protocol.
12
+ """
13
+
14
+ def __init__(
15
+ self,
16
+ model_path: str = "lmstudio-community/Qwen3-Next-80B-A3B-Instruct-MLX-4bit",
17
+ max_tokens: int = 4096,
18
+ temperature: float = 0.7,
19
+ top_p: float = 0.9,
20
+ repetition_penalty: float = 1.1,
21
+ verbose: bool = False,
22
+ ):
23
+ """
24
+ Initialize the MLX backend.
25
+
26
+ Args:
27
+ model_path: HuggingFace model path or local path
28
+ max_tokens: Maximum tokens to generate
29
+ temperature: Sampling temperature (0 = deterministic)
30
+ top_p: Nucleus sampling threshold
31
+ repetition_penalty: Penalty for repeated tokens
32
+ verbose: Whether to print loading info
33
+ """
34
+ self.model_path = model_path
35
+ self.max_tokens = max_tokens
36
+ self.temperature = temperature
37
+ self.top_p = top_p
38
+ self.repetition_penalty = repetition_penalty
39
+ self.verbose = verbose
40
+
41
+ self._model = None
42
+ self._tokenizer = None
43
+ self._sampler = None
44
+ self._logits_processors = None
45
+
46
+ def _ensure_loaded(self):
47
+ """Lazy load the model on first use."""
48
+ if self._model is None:
49
+ if self.verbose:
50
+ print(f"Loading model: {self.model_path}")
51
+ self._model, self._tokenizer = load(self.model_path)
52
+
53
+ # Create sampler and processors
54
+ self._sampler = make_sampler(temp=self.temperature, top_p=self.top_p)
55
+ self._logits_processors = make_logits_processors(
56
+ repetition_penalty=self.repetition_penalty
57
+ )
58
+
59
+ if self.verbose:
60
+ print("Model loaded successfully")
61
+
62
+ def generate(self, prompt: str) -> str:
63
+ """
64
+ Generate a response from the LLM.
65
+
66
+ Args:
67
+ prompt: The input prompt
68
+
69
+ Returns:
70
+ The generated text response
71
+ """
72
+ self._ensure_loaded()
73
+
74
+ # Apply chat template if available (for instruction-tuned models)
75
+ if hasattr(self._tokenizer, 'apply_chat_template'):
76
+ messages = [{"role": "user", "content": prompt}]
77
+ formatted_prompt = self._tokenizer.apply_chat_template(
78
+ messages,
79
+ tokenize=False,
80
+ add_generation_prompt=True
81
+ )
82
+ else:
83
+ formatted_prompt = prompt
84
+
85
+ response = generate(
86
+ self._model,
87
+ self._tokenizer,
88
+ prompt=formatted_prompt,
89
+ max_tokens=self.max_tokens,
90
+ sampler=self._sampler,
91
+ logits_processors=self._logits_processors,
92
+ verbose=self.verbose,
93
+ )
94
+
95
+ return response
@@ -0,0 +1,46 @@
1
+ """Recursive Data Cleaner - LLM-powered incremental data cleaning pipeline."""
2
+
3
+ from recursive_cleaner.cleaner import DataCleaner
4
+ from recursive_cleaner.context import build_context
5
+ from recursive_cleaner.dependencies import resolve_dependencies
6
+ from recursive_cleaner.errors import (
7
+ CleanerError,
8
+ MaxIterationsError,
9
+ OutputValidationError,
10
+ ParseError,
11
+ )
12
+ from recursive_cleaner.metrics import QualityMetrics, compare_quality, measure_quality
13
+ from recursive_cleaner.optimizer import (
14
+ consolidate_with_agency,
15
+ extract_tags,
16
+ group_by_salience,
17
+ )
18
+ from recursive_cleaner.output import write_cleaning_file
19
+ from recursive_cleaner.parsers import chunk_file
20
+ from recursive_cleaner.prompt import build_prompt
21
+ from recursive_cleaner.response import extract_python_block, parse_response
22
+ from recursive_cleaner.validation import check_code_safety, extract_sample_data, validate_function
23
+
24
+ __all__ = [
25
+ "CleanerError",
26
+ "ParseError",
27
+ "MaxIterationsError",
28
+ "OutputValidationError",
29
+ "chunk_file",
30
+ "parse_response",
31
+ "extract_python_block",
32
+ "build_context",
33
+ "build_prompt",
34
+ "write_cleaning_file",
35
+ "DataCleaner",
36
+ "validate_function",
37
+ "extract_sample_data",
38
+ "check_code_safety",
39
+ "resolve_dependencies",
40
+ "QualityMetrics",
41
+ "measure_quality",
42
+ "compare_quality",
43
+ "extract_tags",
44
+ "group_by_salience",
45
+ "consolidate_with_agency",
46
+ ]