duckguard 3.1.0__py3-none-any.whl → 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckguard/__init__.py CHANGED
@@ -85,7 +85,7 @@ from duckguard.semantic import (
85
85
  detect_types_for_dataset,
86
86
  )
87
87
 
88
- __version__ = "3.1.0"
88
+ __version__ = "3.2.0"
89
89
 
90
90
  __all__ = [
91
91
  # Core classes
@@ -0,0 +1,33 @@
1
+ """AI-powered data quality features for DuckGuard.
2
+
3
+ This module provides LLM-powered data quality capabilities:
4
+ - explain: Natural language data quality summaries
5
+ - suggest: AI-generated validation rules
6
+ - fix: AI-suggested data cleaning steps
7
+ - natural_rules: Plain English validation rules
8
+
9
+ Requires: pip install duckguard[llm]
10
+
11
+ Example:
12
+ from duckguard import connect
13
+ from duckguard.ai import explain, suggest_rules
14
+
15
+ orders = connect("orders.csv")
16
+ print(explain(orders))
17
+ rules = suggest_rules(orders)
18
+ """
19
+
20
+ from duckguard.ai.config import configure, get_config
21
+ from duckguard.ai.explainer import explain
22
+ from duckguard.ai.fixer import suggest_fixes
23
+ from duckguard.ai.natural_language import natural_rules
24
+ from duckguard.ai.rules_generator import suggest_rules
25
+
26
+ __all__ = [
27
+ "configure",
28
+ "get_config",
29
+ "explain",
30
+ "suggest_rules",
31
+ "suggest_fixes",
32
+ "natural_rules",
33
+ ]
duckguard/ai/config.py ADDED
@@ -0,0 +1,201 @@
1
+ """AI configuration for DuckGuard."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from dataclasses import dataclass, field
7
+ from typing import Any
8
+
9
+
10
+ @dataclass
11
+ class AIConfig:
12
+ """Configuration for AI-powered features."""
13
+
14
+ provider: str = "openai"
15
+ model: str | None = None
16
+ api_key: str | None = None
17
+ base_url: str | None = None
18
+ temperature: float = 0.3
19
+ max_tokens: int = 2000
20
+ extra: dict[str, Any] = field(default_factory=dict)
21
+
22
+ @property
23
+ def effective_model(self) -> str:
24
+ """Get the effective model name based on provider."""
25
+ if self.model:
26
+ return self.model
27
+ defaults = {
28
+ "openai": "gpt-4o-mini",
29
+ "anthropic": "claude-3-5-haiku-20241022",
30
+ "ollama": "llama3",
31
+ }
32
+ return defaults.get(self.provider, "gpt-4o-mini")
33
+
34
+ @property
35
+ def effective_api_key(self) -> str | None:
36
+ """Get API key from config or environment."""
37
+ if self.api_key:
38
+ return self.api_key
39
+ env_vars = {
40
+ "openai": "OPENAI_API_KEY",
41
+ "anthropic": "ANTHROPIC_API_KEY",
42
+ }
43
+ env_var = env_vars.get(self.provider)
44
+ if env_var:
45
+ return os.environ.get(env_var)
46
+ return None
47
+
48
+
49
+ # Global config singleton
50
+ _config: AIConfig | None = None
51
+
52
+
53
+ def configure(
54
+ provider: str = "openai",
55
+ model: str | None = None,
56
+ api_key: str | None = None,
57
+ base_url: str | None = None,
58
+ temperature: float = 0.3,
59
+ **kwargs: Any,
60
+ ) -> AIConfig:
61
+ """
62
+ Configure the AI backend for DuckGuard.
63
+
64
+ Args:
65
+ provider: LLM provider ("openai", "anthropic", "ollama")
66
+ model: Model name (defaults based on provider)
67
+ api_key: API key (or set via environment variable)
68
+ base_url: Custom base URL (for Ollama or proxies)
69
+ temperature: Sampling temperature (default: 0.3 for consistency)
70
+ **kwargs: Additional provider-specific options
71
+
72
+ Returns:
73
+ AIConfig instance
74
+
75
+ Example:
76
+ from duckguard.ai import configure
77
+
78
+ # OpenAI
79
+ configure(provider="openai", api_key="sk-...")
80
+
81
+ # Anthropic
82
+ configure(provider="anthropic") # uses ANTHROPIC_API_KEY env
83
+
84
+ # Local Ollama
85
+ configure(provider="ollama", model="llama3",
86
+ base_url="http://localhost:11434")
87
+ """
88
+ global _config
89
+ _config = AIConfig(
90
+ provider=provider,
91
+ model=model,
92
+ api_key=api_key,
93
+ base_url=base_url,
94
+ temperature=temperature,
95
+ extra=kwargs,
96
+ )
97
+ return _config
98
+
99
+
100
+ def get_config() -> AIConfig:
101
+ """Get the current AI configuration, or create a default one."""
102
+ global _config
103
+ if _config is None:
104
+ _config = AIConfig()
105
+ return _config
106
+
107
+
108
+ def _get_client(config: AIConfig | None = None):
109
+ """
110
+ Get an LLM client based on configuration.
111
+
112
+ Returns a callable that takes a prompt and returns a string response.
113
+ """
114
+ cfg = config or get_config()
115
+
116
+ if cfg.provider == "openai":
117
+ try:
118
+ from openai import OpenAI
119
+ except ImportError:
120
+ raise ImportError(
121
+ "OpenAI support requires the openai package. "
122
+ "Install with: pip install duckguard[llm]"
123
+ )
124
+
125
+ client = OpenAI(
126
+ api_key=cfg.effective_api_key,
127
+ base_url=cfg.base_url,
128
+ )
129
+
130
+ def call_openai(prompt: str, system: str = "") -> str:
131
+ messages = []
132
+ if system:
133
+ messages.append({"role": "system", "content": system})
134
+ messages.append({"role": "user", "content": prompt})
135
+
136
+ response = client.chat.completions.create(
137
+ model=cfg.effective_model,
138
+ messages=messages,
139
+ temperature=cfg.temperature,
140
+ max_tokens=cfg.max_tokens,
141
+ )
142
+ return response.choices[0].message.content or ""
143
+
144
+ return call_openai
145
+
146
+ elif cfg.provider == "anthropic":
147
+ try:
148
+ from anthropic import Anthropic
149
+ except ImportError:
150
+ raise ImportError(
151
+ "Anthropic support requires the anthropic package. "
152
+ "Install with: pip install duckguard[llm]"
153
+ )
154
+
155
+ client = Anthropic(api_key=cfg.effective_api_key)
156
+
157
+ def call_anthropic(prompt: str, system: str = "") -> str:
158
+ response = client.messages.create(
159
+ model=cfg.effective_model,
160
+ max_tokens=cfg.max_tokens,
161
+ system=system if system else "You are a data quality expert.",
162
+ messages=[{"role": "user", "content": prompt}],
163
+ )
164
+ return response.content[0].text
165
+
166
+ return call_anthropic
167
+
168
+ elif cfg.provider == "ollama":
169
+ try:
170
+ from openai import OpenAI
171
+ except ImportError:
172
+ raise ImportError(
173
+ "Ollama support uses the openai package. "
174
+ "Install with: pip install openai"
175
+ )
176
+
177
+ client = OpenAI(
178
+ api_key="ollama",
179
+ base_url=cfg.base_url or "http://localhost:11434/v1",
180
+ )
181
+
182
+ def call_ollama(prompt: str, system: str = "") -> str:
183
+ messages = []
184
+ if system:
185
+ messages.append({"role": "system", "content": system})
186
+ messages.append({"role": "user", "content": prompt})
187
+
188
+ response = client.chat.completions.create(
189
+ model=cfg.effective_model,
190
+ messages=messages,
191
+ temperature=cfg.temperature,
192
+ )
193
+ return response.choices[0].message.content or ""
194
+
195
+ return call_ollama
196
+
197
+ else:
198
+ raise ValueError(
199
+ f"Unsupported AI provider: {cfg.provider}. "
200
+ f"Supported: openai, anthropic, ollama"
201
+ )
@@ -0,0 +1,109 @@
1
+ """AI-powered data quality explanation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from duckguard.ai.config import _get_client
8
+
9
+ if TYPE_CHECKING:
10
+ from duckguard.core.dataset import Dataset
11
+
12
+ SYSTEM_PROMPT = """You are a data quality expert. You analyze dataset profiles and explain
13
+ data quality issues in clear, actionable language. Be specific about which columns and
14
+ values are problematic. Suggest concrete validation rules using DuckGuard's API.
15
+
16
+ DuckGuard API methods:
17
+ - column.is_not_null() — check for nulls
18
+ - column.is_unique() — check uniqueness
19
+ - column.between(min, max) — range check
20
+ - column.isin(values) — enum check
21
+ - column.matches(pattern) — regex pattern
22
+ - column.not_null_when(condition) — conditional not-null
23
+ - column.between_when(min, max, condition) — conditional range
24
+ - dataset.score() — quality score (A-F)
25
+ - detect_anomalies(dataset) — anomaly detection
26
+
27
+ Keep explanations concise and actionable. Use emoji for visual clarity."""
28
+
29
+
30
+ def explain(
31
+ dataset: Dataset,
32
+ focus: str | None = None,
33
+ detail: str = "medium",
34
+ ) -> str:
35
+ """
36
+ Generate a natural language explanation of data quality.
37
+
38
+ Args:
39
+ dataset: Dataset to analyze
40
+ focus: Optional column or aspect to focus on
41
+ detail: Level of detail ("brief", "medium", "detailed")
42
+
43
+ Returns:
44
+ Human-readable data quality explanation
45
+
46
+ Example:
47
+ from duckguard import connect
48
+ from duckguard.ai import explain
49
+
50
+ orders = connect("orders.csv")
51
+ print(explain(orders))
52
+ """
53
+ from duckguard.profiler import AutoProfiler
54
+
55
+ # Profile the dataset
56
+ profiler = AutoProfiler(deep=True)
57
+ profile = profiler.profile(dataset)
58
+
59
+ # Build context for the LLM
60
+ context_parts = [
61
+ f"Dataset: {dataset.name}",
62
+ f"Rows: {profile.row_count}, Columns: {profile.column_count}",
63
+ f"Overall Quality: {profile.overall_quality_grade} ({profile.overall_quality_score:.1f}/100)",
64
+ "",
65
+ "Column Profiles:",
66
+ ]
67
+
68
+ for col in profile.columns:
69
+ col_info = f" {col.name} ({col.dtype}): "
70
+ col_info += f"nulls={col.null_percent:.1f}%, "
71
+ col_info += f"unique={col.unique_percent:.1f}%, "
72
+ col_info += f"grade={col.quality_grade}"
73
+
74
+ if col.min_value is not None:
75
+ col_info += f", range=[{col.min_value}, {col.max_value}]"
76
+ if col.detected_patterns:
77
+ col_info += f", patterns={col.detected_patterns}"
78
+ if col.distribution_type:
79
+ col_info += f", dist={col.distribution_type}"
80
+ if col.outlier_count and col.outlier_count > 0:
81
+ col_info += f", outliers={col.outlier_count}"
82
+
83
+ context_parts.append(col_info)
84
+
85
+ if profile.suggested_rules:
86
+ context_parts.append("")
87
+ context_parts.append(f"Auto-suggested rules ({len(profile.suggested_rules)}):")
88
+ for rule in profile.suggested_rules[:10]:
89
+ context_parts.append(f" - {rule}")
90
+
91
+ context = "\n".join(context_parts)
92
+
93
+ # Build prompt
94
+ detail_instruction = {
95
+ "brief": "Give a 3-5 sentence summary.",
96
+ "medium": "Give a comprehensive but concise analysis (10-15 lines).",
97
+ "detailed": "Give a thorough analysis with specific recommendations.",
98
+ }.get(detail, "Give a comprehensive but concise analysis.")
99
+
100
+ focus_instruction = f"\nFocus specifically on: {focus}" if focus else ""
101
+
102
+ prompt = f"""Analyze this dataset profile and explain the data quality status.
103
+ {detail_instruction}{focus_instruction}
104
+
105
+ {context}"""
106
+
107
+ # Call LLM
108
+ client = _get_client()
109
+ return client(prompt, system=SYSTEM_PROMPT)
duckguard/ai/fixer.py ADDED
@@ -0,0 +1,105 @@
1
+ """AI-powered data fix suggestions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from duckguard.ai.config import _get_client
8
+
9
+ if TYPE_CHECKING:
10
+ from duckguard.core.dataset import Dataset
11
+
12
+ SYSTEM_PROMPT = """You are a data quality expert. Given a dataset profile with quality issues,
13
+ suggest specific fixes. For each issue:
14
+
15
+ 1. Describe the problem clearly
16
+ 2. Assess severity (critical / warning / info)
17
+ 3. Suggest a concrete fix (SQL, Python code, or process change)
18
+ 4. Note if no action is needed (e.g., nulls are expected for pending orders)
19
+
20
+ Be practical. Not every null is a bug. Use context to determine what's actually wrong.
21
+ Format with emoji and clear sections."""
22
+
23
+
24
+ def suggest_fixes(
25
+ dataset: Dataset,
26
+ rules_result=None,
27
+ ) -> str:
28
+ """
29
+ Get AI-suggested fixes for data quality issues.
30
+
31
+ Args:
32
+ dataset: Dataset to analyze
33
+ rules_result: Optional RuleExecutionResult from a previous validation run
34
+
35
+ Returns:
36
+ Human-readable fix suggestions
37
+
38
+ Example:
39
+ from duckguard import connect
40
+ from duckguard.ai import suggest_fixes
41
+
42
+ orders = connect("orders.csv")
43
+ print(suggest_fixes(orders))
44
+ """
45
+ from duckguard.profiler import AutoProfiler
46
+
47
+ # Profile the dataset
48
+ profiler = AutoProfiler(deep=True)
49
+ profile = profiler.profile(dataset)
50
+
51
+ # Build context
52
+ context_parts = [
53
+ f"Dataset: {dataset.name} ({profile.row_count} rows, {profile.column_count} columns)",
54
+ f"Quality: {profile.overall_quality_grade} ({profile.overall_quality_score:.1f}/100)",
55
+ "",
56
+ "Issues detected:",
57
+ ]
58
+
59
+ has_issues = False
60
+
61
+ for col in profile.columns:
62
+ issues = []
63
+
64
+ if col.null_percent > 0:
65
+ issues.append(f"nulls: {col.null_percent:.1f}% ({col.null_count} rows)")
66
+
67
+ if col.quality_grade in ("D", "F"):
68
+ issues.append(f"low quality grade: {col.quality_grade}")
69
+
70
+ if col.outlier_count and col.outlier_count > 0:
71
+ issues.append(f"outliers: {col.outlier_count} ({col.outlier_percentage:.1f}%)")
72
+
73
+ if issues:
74
+ has_issues = True
75
+ context_parts.append(f" {col.name} ({col.dtype}): {'; '.join(issues)}")
76
+
77
+ # Add sample values for context
78
+ if col.min_value is not None:
79
+ context_parts.append(f" range: [{col.min_value}, {col.max_value}]")
80
+
81
+ if not has_issues:
82
+ return "✅ No data quality issues detected. Your data looks clean!"
83
+
84
+ # Add validation results if provided
85
+ if rules_result:
86
+ context_parts.append("")
87
+ context_parts.append("Failed validation checks:")
88
+ for r in getattr(rules_result, "results", []):
89
+ if not r.passed:
90
+ context_parts.append(f" ✗ {r.message}")
91
+
92
+ context = "\n".join(context_parts)
93
+
94
+ prompt = f"""Analyze these data quality issues and suggest specific fixes.
95
+
96
+ {context}
97
+
98
+ For each issue, provide:
99
+ 1. What's wrong (brief)
100
+ 2. Severity (🔴 critical / 🟡 warning / 🔵 info)
101
+ 3. Suggested fix (code or process)
102
+ 4. Whether it actually needs fixing (sometimes nulls are expected)"""
103
+
104
+ client = _get_client()
105
+ return client(prompt, system=SYSTEM_PROMPT)
@@ -0,0 +1,119 @@
1
+ """Natural language validation rules."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from duckguard.ai.config import _get_client
8
+ from duckguard.core.result import ValidationResult
9
+
10
+ if TYPE_CHECKING:
11
+ from duckguard.core.dataset import Dataset
12
+
13
+ SYSTEM_PROMPT = """You are a data quality expert. Convert natural language rules into
14
+ DuckGuard Python code.
15
+
16
+ Available DuckGuard methods:
17
+ - dataset.column_name.is_not_null()
18
+ - dataset.column_name.is_unique()
19
+ - dataset.column_name.between(min, max)
20
+ - dataset.column_name.greater_than(value)
21
+ - dataset.column_name.less_than(value)
22
+ - dataset.column_name.isin(values_list)
23
+ - dataset.column_name.matches(regex_pattern)
24
+ - dataset.column_name.value_lengths_between(min, max)
25
+ - dataset.column_name.not_null_when(sql_condition)
26
+ - dataset.column_name.between_when(min, max, sql_condition)
27
+ - dataset.column_name.exists_in(other_dataset.column)
28
+ - dataset.expect_columns_unique(column_list)
29
+
30
+ Dataset columns: {columns}
31
+
32
+ For each natural language rule, output ONLY a Python expression that calls the appropriate
33
+ DuckGuard method. One expression per line. No explanations, no imports.
34
+
35
+ Example input: "order IDs should never be null"
36
+ Example output: dataset.order_id.is_not_null()
37
+
38
+ Example input: "quantities between 1 and 1000"
39
+ Example output: dataset.quantity.between(1, 1000)"""
40
+
41
+
42
+ def natural_rules(
43
+ dataset: Dataset,
44
+ rules: list[str],
45
+ ) -> list[ValidationResult]:
46
+ """
47
+ Validate data using natural language rules.
48
+
49
+ Converts plain English rules into DuckGuard validations and executes them.
50
+
51
+ Args:
52
+ dataset: Dataset to validate
53
+ rules: List of natural language rule descriptions
54
+
55
+ Returns:
56
+ List of ValidationResult objects
57
+
58
+ Example:
59
+ from duckguard import connect
60
+ from duckguard.ai import natural_rules
61
+
62
+ orders = connect("orders.csv")
63
+ results = natural_rules(orders, [
64
+ "order IDs should never be null or duplicated",
65
+ "quantities should be positive integers under 1000",
66
+ "status must be pending, shipped, or delivered",
67
+ ])
68
+
69
+ for r in results:
70
+ print(f"{'✓' if r.passed else '✗'} {r.message}")
71
+ """
72
+ columns = dataset.columns
73
+
74
+ # Build prompt with all rules
75
+ rules_text = "\n".join(f"Rule {i+1}: {rule}" for i, rule in enumerate(rules))
76
+
77
+ system = SYSTEM_PROMPT.format(columns=columns)
78
+ prompt = f"""Convert these natural language rules to DuckGuard expressions:
79
+
80
+ {rules_text}
81
+
82
+ Output one DuckGuard expression per rule, numbered to match. Use 'dataset' as the variable name."""
83
+
84
+ client = _get_client()
85
+ response = client(prompt, system=system)
86
+
87
+ # Parse and execute the generated expressions
88
+ results = []
89
+ expressions = [line.strip() for line in response.strip().split("\n") if line.strip()]
90
+
91
+ for i, expr in enumerate(expressions):
92
+ # Clean up the expression
93
+ expr = expr.lstrip("0123456789.:)-— ")
94
+ if not expr.startswith("dataset."):
95
+ continue
96
+
97
+ try:
98
+ # Execute the expression safely
99
+ result = eval(expr, {"dataset": dataset, "__builtins__": {}}) # noqa: S307
100
+ if isinstance(result, ValidationResult):
101
+ results.append(result)
102
+ elif isinstance(result, bool):
103
+ rule_desc = rules[i] if i < len(rules) else expr
104
+ results.append(ValidationResult(
105
+ passed=result,
106
+ actual_value=result,
107
+ expected_value=True,
108
+ message=f"Natural rule: {rule_desc}",
109
+ ))
110
+ except Exception as e:
111
+ rule_desc = rules[i] if i < len(rules) else expr
112
+ results.append(ValidationResult(
113
+ passed=False,
114
+ actual_value=str(e),
115
+ expected_value="valid expression",
116
+ message=f"Failed to evaluate rule '{rule_desc}': {e}",
117
+ ))
118
+
119
+ return results
@@ -0,0 +1,121 @@
1
+ """AI-powered validation rule generation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from duckguard.ai.config import _get_client
8
+
9
+ if TYPE_CHECKING:
10
+ from duckguard.core.dataset import Dataset
11
+
12
+ SYSTEM_PROMPT = """You are a data quality expert. Generate DuckGuard YAML validation rules
13
+ based on dataset profiles. Rules should be practical, not overly strict.
14
+
15
+ Output format — valid YAML only, no markdown fences:
16
+
17
+ name: <dataset>_validation
18
+ description: Auto-generated quality checks
19
+
20
+ checks:
21
+ column_name:
22
+ - not_null
23
+ - unique
24
+ - between: [min, max]
25
+ - allowed_values: [val1, val2]
26
+ - pattern: "regex"
27
+
28
+ Rules to consider:
29
+ - not_null for columns with 0% nulls (they're probably required)
30
+ - unique for ID-like columns (>99% unique)
31
+ - between for numeric columns (use actual range with small buffer)
32
+ - allowed_values for low-cardinality columns (<20 distinct values)
33
+ - pattern for columns matching known patterns (email, phone, etc.)
34
+
35
+ Be conservative — generate rules that reflect the actual data, not hypothetical constraints.
36
+ Only output the YAML. No explanations."""
37
+
38
+
39
+ def suggest_rules(
40
+ dataset: Dataset,
41
+ strict: bool = False,
42
+ include_comments: bool = True,
43
+ ) -> str:
44
+ """
45
+ Generate validation rules using AI analysis.
46
+
47
+ Combines DuckGuard's profiling with LLM intelligence to generate
48
+ context-aware YAML rules that match your data's actual patterns.
49
+
50
+ Args:
51
+ dataset: Dataset to analyze
52
+ strict: If True, generate stricter rules
53
+ include_comments: If True, add explanatory comments
54
+
55
+ Returns:
56
+ YAML string with validation rules
57
+
58
+ Example:
59
+ from duckguard import connect
60
+ from duckguard.ai import suggest_rules
61
+
62
+ orders = connect("orders.csv")
63
+ yaml_rules = suggest_rules(orders)
64
+ print(yaml_rules)
65
+
66
+ # Save to file
67
+ with open("duckguard.yaml", "w") as f:
68
+ f.write(yaml_rules)
69
+ """
70
+ from duckguard.profiler import AutoProfiler
71
+
72
+ # Profile the dataset
73
+ profiler = AutoProfiler()
74
+ profile = profiler.profile(dataset)
75
+
76
+ # Build context
77
+ context_parts = [
78
+ f"Dataset: {dataset.name}",
79
+ f"Rows: {profile.row_count}",
80
+ "",
81
+ "Columns:",
82
+ ]
83
+
84
+ for col in profile.columns:
85
+ col_info = f" {col.name}:"
86
+ col_info += f" type={col.dtype},"
87
+ col_info += f" nulls={col.null_percent:.1f}%,"
88
+ col_info += f" unique={col.unique_percent:.1f}%,"
89
+ col_info += f" distinct={col.unique_count}"
90
+
91
+ if col.min_value is not None:
92
+ col_info += f", min={col.min_value}, max={col.max_value}"
93
+ if col.detected_patterns:
94
+ col_info += f", patterns={col.detected_patterns}"
95
+ if hasattr(col, "sample_values") and col.sample_values:
96
+ col_info += f", samples={col.sample_values[:5]}"
97
+
98
+ context_parts.append(col_info)
99
+
100
+ # Include auto-detected rules as baseline
101
+ if profile.suggested_rules:
102
+ context_parts.append("")
103
+ context_parts.append("Auto-detected rules (baseline):")
104
+ for rule in profile.suggested_rules:
105
+ context_parts.append(f" - {rule}")
106
+
107
+ context = "\n".join(context_parts)
108
+
109
+ strictness = "Generate strict rules — flag anything suspicious." if strict else \
110
+ "Generate practical rules — match actual data patterns, allow reasonable variation."
111
+
112
+ comment_instruction = "Add YAML comments explaining each rule." if include_comments else \
113
+ "No comments, just rules."
114
+
115
+ prompt = f"""{strictness}
116
+ {comment_instruction}
117
+
118
+ {context}"""
119
+
120
+ client = _get_client()
121
+ return client(prompt, system=SYSTEM_PROMPT)