ai-pipeline-core 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +70 -144
- ai_pipeline_core/deployment/__init__.py +6 -18
- ai_pipeline_core/deployment/base.py +392 -212
- ai_pipeline_core/deployment/contract.py +6 -10
- ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
- ai_pipeline_core/deployment/helpers.py +16 -17
- ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
- ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +37 -82
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +32 -85
- ai_pipeline_core/images/_processing.py +5 -11
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +106 -81
- ai_pipeline_core/llm/client.py +267 -158
- ai_pipeline_core/llm/model_options.py +12 -84
- ai_pipeline_core/llm/model_response.py +53 -99
- ai_pipeline_core/llm/model_types.py +8 -23
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
- ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +134 -75
- ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
- ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
- ai_pipeline_core/debug/__init__.py +0 -26
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -494
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/prompt_builder/__init__.py +0 -5
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
- ai_pipeline_core/prompt_builder/global_cache.py +0 -78
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
- ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core-0.3.3.dist-info/METADATA +0 -569
- ai_pipeline_core-0.3.3.dist-info/RECORD +0 -57
- {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,644 @@
|
|
|
1
|
+
"""Per-module guide assembly with test discovery, relevance scoring, and rendering.
|
|
2
|
+
|
|
3
|
+
Collects public symbols, resolves dependencies, flattens inheritance,
|
|
4
|
+
discovers and scores test examples, extracts rules, and renders guides.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import ast
|
|
8
|
+
import re
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from ai_pipeline_core.docs_generator.extractor import (
|
|
13
|
+
EXTERNAL_STUBS,
|
|
14
|
+
ClassInfo,
|
|
15
|
+
FunctionInfo,
|
|
16
|
+
MethodInfo,
|
|
17
|
+
SymbolTable,
|
|
18
|
+
is_public_name,
|
|
19
|
+
resolve_dependencies,
|
|
20
|
+
)
|
|
21
|
+
from ai_pipeline_core.logging import get_pipeline_logger
|
|
22
|
+
|
|
23
|
+
logger = get_pipeline_logger(__name__)
|
|
24
|
+
|
|
25
|
+
MAX_EXAMPLES = 8
|
|
26
|
+
|
|
27
|
+
_RULE_PREFIXES = ("cannot", "must", "never", "always", "critical")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class TestExample:
|
|
32
|
+
"""Scored test function extracted for guide examples."""
|
|
33
|
+
|
|
34
|
+
name: str
|
|
35
|
+
source_file: str
|
|
36
|
+
line_number: int
|
|
37
|
+
code: str
|
|
38
|
+
score: int
|
|
39
|
+
is_error_example: bool
|
|
40
|
+
is_marked: bool = False
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class GuideData:
|
|
45
|
+
"""Intermediate representation of a guide, used by trimmer before rendering."""
|
|
46
|
+
|
|
47
|
+
module_name: str
|
|
48
|
+
classes: list[ClassInfo]
|
|
49
|
+
functions: list[FunctionInfo]
|
|
50
|
+
rules: list[str]
|
|
51
|
+
external_bases: set[str]
|
|
52
|
+
normal_examples: list[TestExample]
|
|
53
|
+
error_examples: list[TestExample]
|
|
54
|
+
internal_types: list[ClassInfo] = field(default_factory=list)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
# Test discovery
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def discover_tests(
|
|
63
|
+
module_name: str,
|
|
64
|
+
tests_dir: Path,
|
|
65
|
+
test_dir_overrides: dict[str, str] | None = None,
|
|
66
|
+
repo_root: Path | None = None,
|
|
67
|
+
) -> list[TestExample]:
|
|
68
|
+
"""Find and extract test functions relevant to a module.
|
|
69
|
+
|
|
70
|
+
Default mapping: ai_pipeline_core/<module>/ -> tests/<module>/
|
|
71
|
+
Override with test_dir_overrides to remap specific modules.
|
|
72
|
+
Root-level glob tests/test_<module>*.py always runs with original module name.
|
|
73
|
+
"""
|
|
74
|
+
test_files: list[Path] = []
|
|
75
|
+
|
|
76
|
+
# Subdirectory: use override if provided, else convention
|
|
77
|
+
subdir_name = (test_dir_overrides or {}).get(module_name, module_name)
|
|
78
|
+
subdir = tests_dir / subdir_name
|
|
79
|
+
if subdir.is_dir():
|
|
80
|
+
test_files.extend(sorted(subdir.glob("test_*.py")))
|
|
81
|
+
|
|
82
|
+
# Root-level tests always use original module_name
|
|
83
|
+
for f in sorted(tests_dir.glob(f"test_{module_name}*.py")):
|
|
84
|
+
if f not in test_files:
|
|
85
|
+
test_files.append(f)
|
|
86
|
+
|
|
87
|
+
examples: list[TestExample] = []
|
|
88
|
+
for test_file in test_files:
|
|
89
|
+
examples.extend(_extract_test_functions(test_file, repo_root))
|
|
90
|
+
return examples
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# ---------------------------------------------------------------------------
|
|
94
|
+
# Scoring and selection
|
|
95
|
+
# ---------------------------------------------------------------------------
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def score_test(test: TestExample, symbol_names: list[str]) -> int:
|
|
99
|
+
"""Score a test's relevance to a set of module symbols.
|
|
100
|
+
|
|
101
|
+
Returns the best score across all symbols.
|
|
102
|
+
"""
|
|
103
|
+
best_score = 0
|
|
104
|
+
test_name_lower = test.name.lower()
|
|
105
|
+
|
|
106
|
+
for symbol in symbol_names:
|
|
107
|
+
score = 0
|
|
108
|
+
symbol_lower = symbol.lower()
|
|
109
|
+
|
|
110
|
+
# Exact subject match: +5
|
|
111
|
+
subject = test_name_lower.removeprefix("test_")
|
|
112
|
+
if symbol_lower == subject or subject.startswith(symbol_lower + "_"):
|
|
113
|
+
score += 5
|
|
114
|
+
# Partial match: +3
|
|
115
|
+
elif symbol_lower in test_name_lower:
|
|
116
|
+
score += 3
|
|
117
|
+
|
|
118
|
+
# Body occurrences: +min(count, 2)
|
|
119
|
+
count = test.code.count(symbol)
|
|
120
|
+
score += min(count, 2)
|
|
121
|
+
|
|
122
|
+
best_score = max(best_score, score)
|
|
123
|
+
|
|
124
|
+
# Error example bonus: +2
|
|
125
|
+
if test.is_error_example:
|
|
126
|
+
best_score += 2
|
|
127
|
+
|
|
128
|
+
# Simplicity bonus
|
|
129
|
+
line_count = len(test.code.splitlines())
|
|
130
|
+
if line_count < 10:
|
|
131
|
+
best_score += 2
|
|
132
|
+
elif line_count < 20:
|
|
133
|
+
best_score += 1
|
|
134
|
+
|
|
135
|
+
# Pattern bonus: +1
|
|
136
|
+
if any(p in test.name for p in ("creation", "basic", "simple")):
|
|
137
|
+
best_score += 1
|
|
138
|
+
|
|
139
|
+
# Mock penalty — prefer real usage examples over heavily mocked tests
|
|
140
|
+
mock_patterns = ("Mock(", "MagicMock(", "patch(", "monkeypatch.")
|
|
141
|
+
mock_count = sum(test.code.count(p) for p in mock_patterns)
|
|
142
|
+
if mock_count >= 3:
|
|
143
|
+
best_score -= 3
|
|
144
|
+
elif mock_count >= 1:
|
|
145
|
+
best_score -= 1
|
|
146
|
+
|
|
147
|
+
return max(best_score, 0)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def select_examples(
|
|
151
|
+
tests: list[TestExample],
|
|
152
|
+
symbol_names: list[str],
|
|
153
|
+
max_total: int = MAX_EXAMPLES,
|
|
154
|
+
) -> tuple[list[TestExample], list[TestExample]]:
|
|
155
|
+
"""Select top examples within budget.
|
|
156
|
+
|
|
157
|
+
Marked tests (@pytest.mark.ai_docs) get priority slots.
|
|
158
|
+
Remaining slots filled by top-scored auto tests.
|
|
159
|
+
Returns (normal_examples, error_examples).
|
|
160
|
+
Error cap (max_total // 2) applies to auto tests only.
|
|
161
|
+
If marked tests exceed max_total, all marked tests are included.
|
|
162
|
+
"""
|
|
163
|
+
marked = [t for t in tests if t.is_marked]
|
|
164
|
+
auto = [t for t in tests if not t.is_marked]
|
|
165
|
+
|
|
166
|
+
scored_auto = sorted(
|
|
167
|
+
[(score_test(t, symbol_names), t) for t in auto],
|
|
168
|
+
key=lambda x: (-x[0], x[1].name),
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
marked_normal = [t for t in marked if not t.is_error_example]
|
|
172
|
+
marked_errors = [t for t in marked if t.is_error_example]
|
|
173
|
+
|
|
174
|
+
remaining = max(max_total - len(marked), 0)
|
|
175
|
+
if remaining > 0:
|
|
176
|
+
auto_errors = [(s, t) for s, t in scored_auto if t.is_error_example]
|
|
177
|
+
auto_normal = [(s, t) for s, t in scored_auto if not t.is_error_example]
|
|
178
|
+
|
|
179
|
+
max_auto_errors = remaining // 2
|
|
180
|
+
selected_auto_errors = [t for _, t in auto_errors[:max_auto_errors]]
|
|
181
|
+
|
|
182
|
+
auto_normal_slots = remaining - len(selected_auto_errors)
|
|
183
|
+
selected_auto_normal = [t for _, t in auto_normal[:auto_normal_slots]]
|
|
184
|
+
else:
|
|
185
|
+
selected_auto_errors = []
|
|
186
|
+
selected_auto_normal = []
|
|
187
|
+
|
|
188
|
+
return marked_normal + selected_auto_normal, marked_errors + selected_auto_errors
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
# ---------------------------------------------------------------------------
|
|
192
|
+
# Inheritance flattening
|
|
193
|
+
# ---------------------------------------------------------------------------
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def flatten_methods(cls: ClassInfo, table: SymbolTable) -> tuple[MethodInfo, ...]:
|
|
197
|
+
"""Flatten inheritance chain, showing inherited methods inline.
|
|
198
|
+
|
|
199
|
+
Uses "child-first, first-match wins" to approximate Python's MRO:
|
|
200
|
+
child methods > left base > right base > grandparent.
|
|
201
|
+
"""
|
|
202
|
+
method_map: dict[str, tuple[MethodInfo, str]] = {}
|
|
203
|
+
visited: set[str] = set()
|
|
204
|
+
|
|
205
|
+
def collect(c: ClassInfo) -> None:
|
|
206
|
+
if c.name in visited:
|
|
207
|
+
return
|
|
208
|
+
visited.add(c.name)
|
|
209
|
+
for method in c.methods:
|
|
210
|
+
if method.name not in method_map:
|
|
211
|
+
method_map[method.name] = (method, c.name)
|
|
212
|
+
for base_name in c.bases:
|
|
213
|
+
clean = base_name.split("[")[0]
|
|
214
|
+
if clean in table.classes and clean not in EXTERNAL_STUBS:
|
|
215
|
+
collect(table.classes[clean])
|
|
216
|
+
|
|
217
|
+
collect(cls)
|
|
218
|
+
|
|
219
|
+
result: list[MethodInfo] = []
|
|
220
|
+
for method, source_class in method_map.values():
|
|
221
|
+
if source_class != cls.name:
|
|
222
|
+
if source_class in EXTERNAL_STUBS:
|
|
223
|
+
continue
|
|
224
|
+
method = MethodInfo( # noqa: PLW2901
|
|
225
|
+
name=method.name,
|
|
226
|
+
signature=method.signature,
|
|
227
|
+
docstring=method.docstring,
|
|
228
|
+
source=method.source,
|
|
229
|
+
is_property=method.is_property,
|
|
230
|
+
is_classmethod=method.is_classmethod,
|
|
231
|
+
is_abstract=method.is_abstract,
|
|
232
|
+
line_count=method.line_count,
|
|
233
|
+
is_inherited=True,
|
|
234
|
+
inherited_from=source_class,
|
|
235
|
+
)
|
|
236
|
+
result.append(method)
|
|
237
|
+
|
|
238
|
+
return tuple(_sort_methods(result))
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
# ---------------------------------------------------------------------------
|
|
242
|
+
# Rule extraction
|
|
243
|
+
# ---------------------------------------------------------------------------
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def extract_rules(classes: list[ClassInfo]) -> list[str]:
|
|
247
|
+
"""Extract rules from docstrings (lines starting with constraint keywords)."""
|
|
248
|
+
rules: list[str] = []
|
|
249
|
+
seen: set[str] = set()
|
|
250
|
+
|
|
251
|
+
for cls in classes:
|
|
252
|
+
if not cls.docstring:
|
|
253
|
+
continue
|
|
254
|
+
for line in cls.docstring.splitlines():
|
|
255
|
+
stripped = line.strip()
|
|
256
|
+
if any(stripped.lower().startswith(prefix) for prefix in _RULE_PREFIXES) and stripped not in seen:
|
|
257
|
+
rules.append(stripped)
|
|
258
|
+
seen.add(stripped)
|
|
259
|
+
return rules
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
# ---------------------------------------------------------------------------
|
|
263
|
+
# Guide rendering
|
|
264
|
+
# ---------------------------------------------------------------------------
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def render_guide(data: GuideData) -> str: # noqa: C901, PLR0912, PLR0915
|
|
268
|
+
"""Render GuideData to final markdown string."""
|
|
269
|
+
parts: list[str] = []
|
|
270
|
+
|
|
271
|
+
# Header
|
|
272
|
+
class_names = ", ".join(c.name for c in data.classes)
|
|
273
|
+
external = ", ".join(sorted(data.external_bases))
|
|
274
|
+
parts.append(f"# MODULE: {data.module_name}")
|
|
275
|
+
if class_names:
|
|
276
|
+
parts.append(f"# CLASSES: {class_names}")
|
|
277
|
+
if external:
|
|
278
|
+
parts.append(f"# DEPENDS: {external}")
|
|
279
|
+
|
|
280
|
+
# Rules
|
|
281
|
+
if data.rules:
|
|
282
|
+
parts.append("")
|
|
283
|
+
parts.append("# === RULES (MUST FOLLOW) ===")
|
|
284
|
+
for i, rule in enumerate(data.rules, 1):
|
|
285
|
+
parts.append(f"# {i}. {rule}")
|
|
286
|
+
|
|
287
|
+
# Dependencies (external stubs)
|
|
288
|
+
if data.external_bases:
|
|
289
|
+
parts.append("")
|
|
290
|
+
parts.append("# === DEPENDENCIES (Resolved) ===")
|
|
291
|
+
parts.append("")
|
|
292
|
+
for base_name in sorted(data.external_bases):
|
|
293
|
+
clean = base_name.split("[")[0]
|
|
294
|
+
desc = EXTERNAL_STUBS.get(clean, "External base class (not fully documented).")
|
|
295
|
+
parts.append(f"class {clean}:")
|
|
296
|
+
parts.append(f' """{desc}"""')
|
|
297
|
+
parts.append(" ...")
|
|
298
|
+
parts.append("")
|
|
299
|
+
|
|
300
|
+
# Internal types (private classes referenced by public API)
|
|
301
|
+
if data.internal_types:
|
|
302
|
+
parts.append("")
|
|
303
|
+
parts.append("# === INTERNAL TYPES (referenced by public API) ===")
|
|
304
|
+
parts.append("")
|
|
305
|
+
for cls in data.internal_types:
|
|
306
|
+
parts.extend(_render_class(cls))
|
|
307
|
+
|
|
308
|
+
# Public API -- classes
|
|
309
|
+
parts.append("# === PUBLIC API ===")
|
|
310
|
+
parts.append("")
|
|
311
|
+
for cls in data.classes:
|
|
312
|
+
parts.extend(_render_class(cls))
|
|
313
|
+
|
|
314
|
+
# Public API -- functions
|
|
315
|
+
if data.functions:
|
|
316
|
+
parts.append("# === FUNCTIONS ===")
|
|
317
|
+
parts.append("")
|
|
318
|
+
for func in data.functions:
|
|
319
|
+
parts.extend(_render_function(func))
|
|
320
|
+
|
|
321
|
+
# Examples
|
|
322
|
+
if data.normal_examples or data.error_examples:
|
|
323
|
+
if data.normal_examples:
|
|
324
|
+
parts.append("# === EXAMPLES (from tests/) ===")
|
|
325
|
+
parts.append("")
|
|
326
|
+
for ex in data.normal_examples:
|
|
327
|
+
parts.append(f"# Example: {_example_title(ex)}")
|
|
328
|
+
parts.append(f"# Source: {ex.source_file}:{ex.line_number}")
|
|
329
|
+
parts.append(ex.code)
|
|
330
|
+
parts.append("")
|
|
331
|
+
|
|
332
|
+
if data.error_examples:
|
|
333
|
+
parts.append("# === ERROR EXAMPLES (What NOT to Do) ===")
|
|
334
|
+
parts.append("")
|
|
335
|
+
for ex in data.error_examples:
|
|
336
|
+
parts.append(f"# Error: {_example_title(ex)}")
|
|
337
|
+
parts.append(f"# Source: {ex.source_file}:{ex.line_number}")
|
|
338
|
+
parts.append(ex.code)
|
|
339
|
+
parts.append("")
|
|
340
|
+
else:
|
|
341
|
+
parts.append("# === EXAMPLES ===")
|
|
342
|
+
parts.append("# No test examples available.")
|
|
343
|
+
parts.append("")
|
|
344
|
+
|
|
345
|
+
content = "\n".join(parts)
|
|
346
|
+
|
|
347
|
+
# Insert SIZE header line (computed after rendering content)
|
|
348
|
+
size_kb = len(content.encode("utf-8")) // 1024
|
|
349
|
+
size_line = f"# SIZE: ~{size_kb}KB"
|
|
350
|
+
result_lines = content.splitlines()
|
|
351
|
+
# Insert after last header line (MODULE/CLASSES/DEPENDS), before any blank or section line
|
|
352
|
+
insert_idx = 0
|
|
353
|
+
for i, line in enumerate(result_lines):
|
|
354
|
+
if line.startswith(("# MODULE:", "# CLASSES:", "# DEPENDS:")):
|
|
355
|
+
insert_idx = i + 1
|
|
356
|
+
elif line.startswith("# ==="):
|
|
357
|
+
break
|
|
358
|
+
result_lines.insert(insert_idx, size_line)
|
|
359
|
+
return "\n".join(result_lines)
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
# ---------------------------------------------------------------------------
|
|
363
|
+
# Main entry point
|
|
364
|
+
# ---------------------------------------------------------------------------
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def build_guide( # noqa: PLR0917
|
|
368
|
+
module_name: str,
|
|
369
|
+
source_dir: Path,
|
|
370
|
+
tests_dir: Path,
|
|
371
|
+
table: SymbolTable,
|
|
372
|
+
test_dir_overrides: dict[str, str] | None = None,
|
|
373
|
+
repo_root: Path | None = None,
|
|
374
|
+
) -> GuideData:
|
|
375
|
+
"""Build guide data for a module.
|
|
376
|
+
|
|
377
|
+
Collects public symbols, resolves dependencies, flattens inheritance,
|
|
378
|
+
discovers tests, scores and selects examples.
|
|
379
|
+
"""
|
|
380
|
+
if repo_root is None:
|
|
381
|
+
repo_root = source_dir.parent
|
|
382
|
+
|
|
383
|
+
public_classes = [c for c in table.classes.values() if c.is_public and table.class_to_module.get(c.name) == module_name]
|
|
384
|
+
public_functions = [f for f in table.functions.values() if f.is_public and table.function_to_module.get(f.name) == module_name]
|
|
385
|
+
|
|
386
|
+
# Resolve dependencies
|
|
387
|
+
root_names = [c.name for c in public_classes]
|
|
388
|
+
_resolved, external_bases = resolve_dependencies(root_names, table)
|
|
389
|
+
|
|
390
|
+
# Flatten inheritance for each public class
|
|
391
|
+
flattened_classes: list[ClassInfo] = []
|
|
392
|
+
for cls in public_classes:
|
|
393
|
+
flat_methods = flatten_methods(cls, table)
|
|
394
|
+
flattened_classes.append(
|
|
395
|
+
ClassInfo(
|
|
396
|
+
name=cls.name,
|
|
397
|
+
bases=cls.bases,
|
|
398
|
+
docstring=cls.docstring,
|
|
399
|
+
is_public=cls.is_public,
|
|
400
|
+
class_vars=cls.class_vars,
|
|
401
|
+
methods=flat_methods,
|
|
402
|
+
validators=cls.validators,
|
|
403
|
+
module_path=cls.module_path,
|
|
404
|
+
decorators=cls.decorators,
|
|
405
|
+
)
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
# Detect private types referenced by public API signatures
|
|
409
|
+
internal_types = _collect_internal_types(public_functions, flattened_classes, table, module_name)
|
|
410
|
+
|
|
411
|
+
# Discover and score tests
|
|
412
|
+
tests = discover_tests(module_name, tests_dir, test_dir_overrides, repo_root)
|
|
413
|
+
if not tests:
|
|
414
|
+
logger.warning("No tests found for %s", module_name)
|
|
415
|
+
|
|
416
|
+
symbol_names = root_names + [f.name for f in public_functions]
|
|
417
|
+
|
|
418
|
+
for t in tests:
|
|
419
|
+
if t.is_marked and not _has_symbol_overlap(t, symbol_names):
|
|
420
|
+
logger.warning(
|
|
421
|
+
"Marked test %s in %s has no symbol overlap with module %s",
|
|
422
|
+
t.name,
|
|
423
|
+
t.source_file,
|
|
424
|
+
module_name,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
normal_examples, error_examples = select_examples(tests, symbol_names)
|
|
428
|
+
|
|
429
|
+
rules = extract_rules(flattened_classes)
|
|
430
|
+
|
|
431
|
+
return GuideData(
|
|
432
|
+
module_name=module_name,
|
|
433
|
+
classes=flattened_classes,
|
|
434
|
+
functions=public_functions,
|
|
435
|
+
rules=rules,
|
|
436
|
+
external_bases=external_bases,
|
|
437
|
+
normal_examples=normal_examples,
|
|
438
|
+
error_examples=error_examples,
|
|
439
|
+
internal_types=internal_types,
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
# ---------------------------------------------------------------------------
|
|
444
|
+
# Private helpers
|
|
445
|
+
# ---------------------------------------------------------------------------
|
|
446
|
+
|
|
447
|
+
_PRIVATE_TYPE_RE = re.compile(r"\b_[A-Z]\w*")
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
def _collect_internal_types(
|
|
451
|
+
public_functions: list[FunctionInfo],
|
|
452
|
+
public_classes: list[ClassInfo],
|
|
453
|
+
table: SymbolTable,
|
|
454
|
+
module_name: str,
|
|
455
|
+
) -> list[ClassInfo]:
|
|
456
|
+
"""Find private classes from the same module that are referenced in public signatures."""
|
|
457
|
+
# Build text blob from all public signatures and sources
|
|
458
|
+
parts: list[str] = [f.source for f in public_functions]
|
|
459
|
+
for c in public_classes:
|
|
460
|
+
parts.extend(m.signature for m in c.methods if is_public_name(m.name))
|
|
461
|
+
blob = " ".join(parts)
|
|
462
|
+
|
|
463
|
+
referenced = set(_PRIVATE_TYPE_RE.findall(blob))
|
|
464
|
+
if not referenced:
|
|
465
|
+
return []
|
|
466
|
+
|
|
467
|
+
return sorted(
|
|
468
|
+
(table.classes[name] for name in referenced if name in table.classes and table.class_to_module.get(name) == module_name),
|
|
469
|
+
key=lambda c: c.name,
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def _get_source(source_lines: list[str], node: ast.AST) -> str:
|
|
474
|
+
decoratable = (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)
|
|
475
|
+
if isinstance(node, decoratable) and node.decorator_list:
|
|
476
|
+
start = node.decorator_list[0].lineno - 1
|
|
477
|
+
else:
|
|
478
|
+
start: int = node.lineno - 1 # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType, reportUnknownVariableType]
|
|
479
|
+
end: int = node.end_lineno or node.lineno # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType, reportUnknownVariableType]
|
|
480
|
+
return "\n".join(source_lines[start:end])
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def _extract_test_functions(path: Path, repo_root: Path | None = None) -> list[TestExample]:
|
|
484
|
+
source = path.read_text(encoding="utf-8")
|
|
485
|
+
source_lines = source.splitlines()
|
|
486
|
+
tree = ast.parse(source)
|
|
487
|
+
|
|
488
|
+
if repo_root is not None:
|
|
489
|
+
try:
|
|
490
|
+
source_file = str(path.relative_to(repo_root))
|
|
491
|
+
except ValueError:
|
|
492
|
+
source_file = str(path)
|
|
493
|
+
else:
|
|
494
|
+
source_file = str(path)
|
|
495
|
+
|
|
496
|
+
results: list[TestExample] = []
|
|
497
|
+
for node in ast.walk(tree):
|
|
498
|
+
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
499
|
+
continue
|
|
500
|
+
if not node.name.startswith("test_"):
|
|
501
|
+
continue
|
|
502
|
+
|
|
503
|
+
code = _get_source(source_lines, node)
|
|
504
|
+
code = "\n".join(line for line in code.splitlines() if "pytest.mark.ai_docs" not in line and "mark.ai_docs" not in line)
|
|
505
|
+
code = "\n".join(_dedented_source(code))
|
|
506
|
+
results.append(
|
|
507
|
+
TestExample(
|
|
508
|
+
name=node.name,
|
|
509
|
+
source_file=source_file,
|
|
510
|
+
line_number=node.lineno,
|
|
511
|
+
code=code,
|
|
512
|
+
score=0,
|
|
513
|
+
is_error_example=_uses_pytest_raises(node),
|
|
514
|
+
is_marked=_has_ai_docs_marker(node),
|
|
515
|
+
)
|
|
516
|
+
)
|
|
517
|
+
return results
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def _uses_pytest_raises(node: ast.FunctionDef | ast.AsyncFunctionDef) -> bool:
|
|
521
|
+
for child in ast.walk(node):
|
|
522
|
+
if isinstance(child, ast.Attribute) and child.attr == "raises" and isinstance(child.value, ast.Name) and child.value.id == "pytest":
|
|
523
|
+
return True
|
|
524
|
+
return False
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def _has_ai_docs_marker(node: ast.FunctionDef | ast.AsyncFunctionDef) -> bool:
|
|
528
|
+
"""Check if a test function has @pytest.mark.ai_docs decorator."""
|
|
529
|
+
for decorator in node.decorator_list:
|
|
530
|
+
target = decorator.func if isinstance(decorator, ast.Call) else decorator
|
|
531
|
+
if not isinstance(target, ast.Attribute) or target.attr != "ai_docs":
|
|
532
|
+
continue
|
|
533
|
+
# pytest.mark.ai_docs
|
|
534
|
+
if (
|
|
535
|
+
isinstance(target.value, ast.Attribute)
|
|
536
|
+
and target.value.attr == "mark"
|
|
537
|
+
and isinstance(target.value.value, ast.Name)
|
|
538
|
+
and target.value.value.id == "pytest"
|
|
539
|
+
):
|
|
540
|
+
return True
|
|
541
|
+
# mark.ai_docs (from pytest import mark)
|
|
542
|
+
if isinstance(target.value, ast.Name) and target.value.id == "mark":
|
|
543
|
+
return True
|
|
544
|
+
return False
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def _has_symbol_overlap(test: TestExample, symbol_names: list[str]) -> bool:
|
|
548
|
+
"""Check if a test references any of the given symbol names."""
|
|
549
|
+
return any(symbol.lower() in test.name.lower() or symbol in test.code for symbol in symbol_names)
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
def _sort_methods(methods: list[MethodInfo]) -> list[MethodInfo]:
|
|
553
|
+
"""Sort: __init__ first, then properties, classmethods, regular methods."""
|
|
554
|
+
|
|
555
|
+
def key(m: MethodInfo) -> tuple[int, str]:
|
|
556
|
+
if m.name == "__init__":
|
|
557
|
+
return (0, m.name)
|
|
558
|
+
if m.is_property:
|
|
559
|
+
return (1, m.name)
|
|
560
|
+
if m.is_classmethod:
|
|
561
|
+
return (2, m.name)
|
|
562
|
+
return (3, m.name)
|
|
563
|
+
|
|
564
|
+
return sorted(methods, key=key)
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
def _render_class(cls: ClassInfo) -> list[str]:
|
|
568
|
+
lines: list[str] = [f"@{dec}" for dec in cls.decorators]
|
|
569
|
+
bases_str = f"({', '.join(cls.bases)})" if cls.bases else ""
|
|
570
|
+
lines.append(f"class {cls.name}{bases_str}:")
|
|
571
|
+
|
|
572
|
+
if cls.docstring:
|
|
573
|
+
lines.append(f' """{cls.docstring.strip()}"""')
|
|
574
|
+
|
|
575
|
+
for var_name, type_ann, default in cls.class_vars:
|
|
576
|
+
if type_ann:
|
|
577
|
+
if default:
|
|
578
|
+
lines.append(f" {var_name}: {type_ann} = {default}")
|
|
579
|
+
else:
|
|
580
|
+
lines.append(f" {var_name}: {type_ann}")
|
|
581
|
+
elif default:
|
|
582
|
+
lines.append(f" {var_name} = {default}")
|
|
583
|
+
else:
|
|
584
|
+
lines.append(f" {var_name}")
|
|
585
|
+
|
|
586
|
+
if cls.class_vars:
|
|
587
|
+
lines.append("")
|
|
588
|
+
|
|
589
|
+
# Separate own methods from inherited
|
|
590
|
+
own_methods = [m for m in cls.methods if not m.is_inherited and is_public_name(m.name)]
|
|
591
|
+
inherited_methods = [m for m in cls.methods if m.is_inherited and is_public_name(m.name)]
|
|
592
|
+
|
|
593
|
+
# Render inherited methods as compact grouped references
|
|
594
|
+
if inherited_methods:
|
|
595
|
+
groups: dict[str, list[str]] = {}
|
|
596
|
+
for m in inherited_methods:
|
|
597
|
+
parent = m.inherited_from or "unknown"
|
|
598
|
+
groups.setdefault(parent, []).append(m.name)
|
|
599
|
+
for parent, names in groups.items():
|
|
600
|
+
lines.append(f" # [Inherited from {parent}]")
|
|
601
|
+
lines.append(f" # {', '.join(sorted(names))}")
|
|
602
|
+
lines.append("")
|
|
603
|
+
|
|
604
|
+
# Render own methods with full source
|
|
605
|
+
for method in own_methods:
|
|
606
|
+
lines.extend(_render_method(method))
|
|
607
|
+
|
|
608
|
+
lines.append("")
|
|
609
|
+
return lines
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
def _render_method(method: MethodInfo) -> list[str]:
|
|
613
|
+
lines: list[str] = [f" {source_line}" for source_line in _dedented_source(method.source)]
|
|
614
|
+
lines.append("")
|
|
615
|
+
return lines
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
def _render_function(func: FunctionInfo) -> list[str]:
|
|
619
|
+
lines: list[str] = list(_dedented_source(func.source))
|
|
620
|
+
lines.append("")
|
|
621
|
+
return lines
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
def _dedented_source(source: str) -> list[str]:
|
|
625
|
+
"""Dedent source based on first line's indentation.
|
|
626
|
+
|
|
627
|
+
Uses the first non-empty line's indentation as the base level,
|
|
628
|
+
which correctly handles multi-line strings with content at column 0.
|
|
629
|
+
"""
|
|
630
|
+
raw_lines = source.splitlines()
|
|
631
|
+
if not raw_lines:
|
|
632
|
+
return []
|
|
633
|
+
first_non_empty = next((line for line in raw_lines if line.strip()), None)
|
|
634
|
+
if first_non_empty is None:
|
|
635
|
+
return raw_lines
|
|
636
|
+
indent = len(first_non_empty) - len(first_non_empty.lstrip())
|
|
637
|
+
if indent == 0:
|
|
638
|
+
return raw_lines
|
|
639
|
+
return [line[indent:] if line[:indent].isspace() else line for line in raw_lines]
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
def _example_title(ex: TestExample) -> str:
|
|
643
|
+
"""Convert test function name to readable title."""
|
|
644
|
+
return ex.name.removeprefix("test_").replace("_", " ").capitalize()
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Size management for generated guides.
|
|
2
|
+
|
|
3
|
+
Measures rendered guide size and warns when exceeding 50KB limit.
|
|
4
|
+
No hard failure -- oversized guides are still written.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from ai_pipeline_core.docs_generator.guide_builder import GuideData
|
|
8
|
+
from ai_pipeline_core.logging import get_pipeline_logger
|
|
9
|
+
|
|
10
|
+
logger = get_pipeline_logger(__name__)
|
|
11
|
+
|
|
12
|
+
MAX_GUIDE_SIZE = 51_200 # 50KB in bytes
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def manage_guide_size(
|
|
16
|
+
data: GuideData,
|
|
17
|
+
rendered_content: str,
|
|
18
|
+
max_size: int = MAX_GUIDE_SIZE,
|
|
19
|
+
) -> str:
|
|
20
|
+
"""Warn if rendered guide exceeds size limit. Returns content unchanged."""
|
|
21
|
+
size = _measure(rendered_content)
|
|
22
|
+
if size <= max_size:
|
|
23
|
+
return rendered_content
|
|
24
|
+
logger.warning(
|
|
25
|
+
"%s guide is %s bytes (%dKB). Consider: move private helpers to _ prefixed functions, split large classes into separate modules",
|
|
26
|
+
data.module_name,
|
|
27
|
+
f"{size:,}",
|
|
28
|
+
size // 1024,
|
|
29
|
+
)
|
|
30
|
+
return rendered_content
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _measure(content: str) -> int:
|
|
34
|
+
"""Measure guide size in UTF-8 bytes."""
|
|
35
|
+
return len(content.encode("utf-8"))
|