ai-pipeline-core 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. ai_pipeline_core/__init__.py +70 -144
  2. ai_pipeline_core/deployment/__init__.py +6 -18
  3. ai_pipeline_core/deployment/base.py +392 -212
  4. ai_pipeline_core/deployment/contract.py +6 -10
  5. ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
  6. ai_pipeline_core/deployment/helpers.py +16 -17
  7. ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
  8. ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +12 -14
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +318 -1434
  30. ai_pipeline_core/documents/mime_type.py +37 -82
  31. ai_pipeline_core/documents/utils.py +4 -12
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +32 -85
  34. ai_pipeline_core/images/_processing.py +5 -11
  35. ai_pipeline_core/llm/__init__.py +6 -4
  36. ai_pipeline_core/llm/ai_messages.py +106 -81
  37. ai_pipeline_core/llm/client.py +267 -158
  38. ai_pipeline_core/llm/model_options.py +12 -84
  39. ai_pipeline_core/llm/model_response.py +53 -99
  40. ai_pipeline_core/llm/model_types.py +8 -23
  41. ai_pipeline_core/logging/__init__.py +2 -7
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -37
  44. ai_pipeline_core/logging/logging_mixin.py +15 -41
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
  49. ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +134 -75
  50. ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
  51. ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +16 -102
  70. ai_pipeline_core/settings.py +26 -31
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
  74. ai_pipeline_core/debug/__init__.py +0 -26
  75. ai_pipeline_core/documents/document_list.py +0 -420
  76. ai_pipeline_core/documents/flow_document.py +0 -112
  77. ai_pipeline_core/documents/task_document.py +0 -117
  78. ai_pipeline_core/documents/temporary_document.py +0 -74
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -494
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -718
  83. ai_pipeline_core/prefect.py +0 -63
  84. ai_pipeline_core/prompt_builder/__init__.py +0 -5
  85. ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
  86. ai_pipeline_core/prompt_builder/global_cache.py +0 -78
  87. ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
  88. ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
  89. ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
  90. ai_pipeline_core/storage/__init__.py +0 -8
  91. ai_pipeline_core/storage/storage.py +0 -628
  92. ai_pipeline_core/utils/__init__.py +0 -8
  93. ai_pipeline_core-0.3.3.dist-info/METADATA +0 -569
  94. ai_pipeline_core-0.3.3.dist-info/RECORD +0 -57
  95. {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
  96. {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,644 @@
1
+ """Per-module guide assembly with test discovery, relevance scoring, and rendering.
2
+
3
+ Collects public symbols, resolves dependencies, flattens inheritance,
4
+ discovers and scores test examples, extracts rules, and renders guides.
5
+ """
6
+
7
+ import ast
8
+ import re
9
+ from dataclasses import dataclass, field
10
+ from pathlib import Path
11
+
12
+ from ai_pipeline_core.docs_generator.extractor import (
13
+ EXTERNAL_STUBS,
14
+ ClassInfo,
15
+ FunctionInfo,
16
+ MethodInfo,
17
+ SymbolTable,
18
+ is_public_name,
19
+ resolve_dependencies,
20
+ )
21
+ from ai_pipeline_core.logging import get_pipeline_logger
22
+
23
+ logger = get_pipeline_logger(__name__)
24
+
25
+ MAX_EXAMPLES = 8
26
+
27
+ _RULE_PREFIXES = ("cannot", "must", "never", "always", "critical")
28
+
29
+
30
+ @dataclass(frozen=True)
31
+ class TestExample:
32
+ """Scored test function extracted for guide examples."""
33
+
34
+ name: str
35
+ source_file: str
36
+ line_number: int
37
+ code: str
38
+ score: int
39
+ is_error_example: bool
40
+ is_marked: bool = False
41
+
42
+
43
+ @dataclass
44
+ class GuideData:
45
+ """Intermediate representation of a guide, used by trimmer before rendering."""
46
+
47
+ module_name: str
48
+ classes: list[ClassInfo]
49
+ functions: list[FunctionInfo]
50
+ rules: list[str]
51
+ external_bases: set[str]
52
+ normal_examples: list[TestExample]
53
+ error_examples: list[TestExample]
54
+ internal_types: list[ClassInfo] = field(default_factory=list)
55
+
56
+
57
+ # ---------------------------------------------------------------------------
58
+ # Test discovery
59
+ # ---------------------------------------------------------------------------
60
+
61
+
62
+ def discover_tests(
63
+ module_name: str,
64
+ tests_dir: Path,
65
+ test_dir_overrides: dict[str, str] | None = None,
66
+ repo_root: Path | None = None,
67
+ ) -> list[TestExample]:
68
+ """Find and extract test functions relevant to a module.
69
+
70
+ Default mapping: ai_pipeline_core/<module>/ -> tests/<module>/
71
+ Override with test_dir_overrides to remap specific modules.
72
+ Root-level glob tests/test_<module>*.py always runs with original module name.
73
+ """
74
+ test_files: list[Path] = []
75
+
76
+ # Subdirectory: use override if provided, else convention
77
+ subdir_name = (test_dir_overrides or {}).get(module_name, module_name)
78
+ subdir = tests_dir / subdir_name
79
+ if subdir.is_dir():
80
+ test_files.extend(sorted(subdir.glob("test_*.py")))
81
+
82
+ # Root-level tests always use original module_name
83
+ for f in sorted(tests_dir.glob(f"test_{module_name}*.py")):
84
+ if f not in test_files:
85
+ test_files.append(f)
86
+
87
+ examples: list[TestExample] = []
88
+ for test_file in test_files:
89
+ examples.extend(_extract_test_functions(test_file, repo_root))
90
+ return examples
91
+
92
+
93
+ # ---------------------------------------------------------------------------
94
+ # Scoring and selection
95
+ # ---------------------------------------------------------------------------
96
+
97
+
98
+ def score_test(test: TestExample, symbol_names: list[str]) -> int:
99
+ """Score a test's relevance to a set of module symbols.
100
+
101
+ Returns the best score across all symbols.
102
+ """
103
+ best_score = 0
104
+ test_name_lower = test.name.lower()
105
+
106
+ for symbol in symbol_names:
107
+ score = 0
108
+ symbol_lower = symbol.lower()
109
+
110
+ # Exact subject match: +5
111
+ subject = test_name_lower.removeprefix("test_")
112
+ if symbol_lower == subject or subject.startswith(symbol_lower + "_"):
113
+ score += 5
114
+ # Partial match: +3
115
+ elif symbol_lower in test_name_lower:
116
+ score += 3
117
+
118
+ # Body occurrences: +min(count, 2)
119
+ count = test.code.count(symbol)
120
+ score += min(count, 2)
121
+
122
+ best_score = max(best_score, score)
123
+
124
+ # Error example bonus: +2
125
+ if test.is_error_example:
126
+ best_score += 2
127
+
128
+ # Simplicity bonus
129
+ line_count = len(test.code.splitlines())
130
+ if line_count < 10:
131
+ best_score += 2
132
+ elif line_count < 20:
133
+ best_score += 1
134
+
135
+ # Pattern bonus: +1
136
+ if any(p in test.name for p in ("creation", "basic", "simple")):
137
+ best_score += 1
138
+
139
+ # Mock penalty — prefer real usage examples over heavily mocked tests
140
+ mock_patterns = ("Mock(", "MagicMock(", "patch(", "monkeypatch.")
141
+ mock_count = sum(test.code.count(p) for p in mock_patterns)
142
+ if mock_count >= 3:
143
+ best_score -= 3
144
+ elif mock_count >= 1:
145
+ best_score -= 1
146
+
147
+ return max(best_score, 0)
148
+
149
+
150
+ def select_examples(
151
+ tests: list[TestExample],
152
+ symbol_names: list[str],
153
+ max_total: int = MAX_EXAMPLES,
154
+ ) -> tuple[list[TestExample], list[TestExample]]:
155
+ """Select top examples within budget.
156
+
157
+ Marked tests (@pytest.mark.ai_docs) get priority slots.
158
+ Remaining slots filled by top-scored auto tests.
159
+ Returns (normal_examples, error_examples).
160
+ Error cap (max_total // 2) applies to auto tests only.
161
+ If marked tests exceed max_total, all marked tests are included.
162
+ """
163
+ marked = [t for t in tests if t.is_marked]
164
+ auto = [t for t in tests if not t.is_marked]
165
+
166
+ scored_auto = sorted(
167
+ [(score_test(t, symbol_names), t) for t in auto],
168
+ key=lambda x: (-x[0], x[1].name),
169
+ )
170
+
171
+ marked_normal = [t for t in marked if not t.is_error_example]
172
+ marked_errors = [t for t in marked if t.is_error_example]
173
+
174
+ remaining = max(max_total - len(marked), 0)
175
+ if remaining > 0:
176
+ auto_errors = [(s, t) for s, t in scored_auto if t.is_error_example]
177
+ auto_normal = [(s, t) for s, t in scored_auto if not t.is_error_example]
178
+
179
+ max_auto_errors = remaining // 2
180
+ selected_auto_errors = [t for _, t in auto_errors[:max_auto_errors]]
181
+
182
+ auto_normal_slots = remaining - len(selected_auto_errors)
183
+ selected_auto_normal = [t for _, t in auto_normal[:auto_normal_slots]]
184
+ else:
185
+ selected_auto_errors = []
186
+ selected_auto_normal = []
187
+
188
+ return marked_normal + selected_auto_normal, marked_errors + selected_auto_errors
189
+
190
+
191
+ # ---------------------------------------------------------------------------
192
+ # Inheritance flattening
193
+ # ---------------------------------------------------------------------------
194
+
195
+
196
+ def flatten_methods(cls: ClassInfo, table: SymbolTable) -> tuple[MethodInfo, ...]:
197
+ """Flatten inheritance chain, showing inherited methods inline.
198
+
199
+ Uses "child-first, first-match wins" to approximate Python's MRO:
200
+ child methods > left base > right base > grandparent.
201
+ """
202
+ method_map: dict[str, tuple[MethodInfo, str]] = {}
203
+ visited: set[str] = set()
204
+
205
+ def collect(c: ClassInfo) -> None:
206
+ if c.name in visited:
207
+ return
208
+ visited.add(c.name)
209
+ for method in c.methods:
210
+ if method.name not in method_map:
211
+ method_map[method.name] = (method, c.name)
212
+ for base_name in c.bases:
213
+ clean = base_name.split("[")[0]
214
+ if clean in table.classes and clean not in EXTERNAL_STUBS:
215
+ collect(table.classes[clean])
216
+
217
+ collect(cls)
218
+
219
+ result: list[MethodInfo] = []
220
+ for method, source_class in method_map.values():
221
+ if source_class != cls.name:
222
+ if source_class in EXTERNAL_STUBS:
223
+ continue
224
+ method = MethodInfo( # noqa: PLW2901
225
+ name=method.name,
226
+ signature=method.signature,
227
+ docstring=method.docstring,
228
+ source=method.source,
229
+ is_property=method.is_property,
230
+ is_classmethod=method.is_classmethod,
231
+ is_abstract=method.is_abstract,
232
+ line_count=method.line_count,
233
+ is_inherited=True,
234
+ inherited_from=source_class,
235
+ )
236
+ result.append(method)
237
+
238
+ return tuple(_sort_methods(result))
239
+
240
+
241
+ # ---------------------------------------------------------------------------
242
+ # Rule extraction
243
+ # ---------------------------------------------------------------------------
244
+
245
+
246
+ def extract_rules(classes: list[ClassInfo]) -> list[str]:
247
+ """Extract rules from docstrings (lines starting with constraint keywords)."""
248
+ rules: list[str] = []
249
+ seen: set[str] = set()
250
+
251
+ for cls in classes:
252
+ if not cls.docstring:
253
+ continue
254
+ for line in cls.docstring.splitlines():
255
+ stripped = line.strip()
256
+ if any(stripped.lower().startswith(prefix) for prefix in _RULE_PREFIXES) and stripped not in seen:
257
+ rules.append(stripped)
258
+ seen.add(stripped)
259
+ return rules
260
+
261
+
262
+ # ---------------------------------------------------------------------------
263
+ # Guide rendering
264
+ # ---------------------------------------------------------------------------
265
+
266
+
267
+ def render_guide(data: GuideData) -> str: # noqa: C901, PLR0912, PLR0915
268
+ """Render GuideData to final markdown string."""
269
+ parts: list[str] = []
270
+
271
+ # Header
272
+ class_names = ", ".join(c.name for c in data.classes)
273
+ external = ", ".join(sorted(data.external_bases))
274
+ parts.append(f"# MODULE: {data.module_name}")
275
+ if class_names:
276
+ parts.append(f"# CLASSES: {class_names}")
277
+ if external:
278
+ parts.append(f"# DEPENDS: {external}")
279
+
280
+ # Rules
281
+ if data.rules:
282
+ parts.append("")
283
+ parts.append("# === RULES (MUST FOLLOW) ===")
284
+ for i, rule in enumerate(data.rules, 1):
285
+ parts.append(f"# {i}. {rule}")
286
+
287
+ # Dependencies (external stubs)
288
+ if data.external_bases:
289
+ parts.append("")
290
+ parts.append("# === DEPENDENCIES (Resolved) ===")
291
+ parts.append("")
292
+ for base_name in sorted(data.external_bases):
293
+ clean = base_name.split("[")[0]
294
+ desc = EXTERNAL_STUBS.get(clean, "External base class (not fully documented).")
295
+ parts.append(f"class {clean}:")
296
+ parts.append(f' """{desc}"""')
297
+ parts.append(" ...")
298
+ parts.append("")
299
+
300
+ # Internal types (private classes referenced by public API)
301
+ if data.internal_types:
302
+ parts.append("")
303
+ parts.append("# === INTERNAL TYPES (referenced by public API) ===")
304
+ parts.append("")
305
+ for cls in data.internal_types:
306
+ parts.extend(_render_class(cls))
307
+
308
+ # Public API -- classes
309
+ parts.append("# === PUBLIC API ===")
310
+ parts.append("")
311
+ for cls in data.classes:
312
+ parts.extend(_render_class(cls))
313
+
314
+ # Public API -- functions
315
+ if data.functions:
316
+ parts.append("# === FUNCTIONS ===")
317
+ parts.append("")
318
+ for func in data.functions:
319
+ parts.extend(_render_function(func))
320
+
321
+ # Examples
322
+ if data.normal_examples or data.error_examples:
323
+ if data.normal_examples:
324
+ parts.append("# === EXAMPLES (from tests/) ===")
325
+ parts.append("")
326
+ for ex in data.normal_examples:
327
+ parts.append(f"# Example: {_example_title(ex)}")
328
+ parts.append(f"# Source: {ex.source_file}:{ex.line_number}")
329
+ parts.append(ex.code)
330
+ parts.append("")
331
+
332
+ if data.error_examples:
333
+ parts.append("# === ERROR EXAMPLES (What NOT to Do) ===")
334
+ parts.append("")
335
+ for ex in data.error_examples:
336
+ parts.append(f"# Error: {_example_title(ex)}")
337
+ parts.append(f"# Source: {ex.source_file}:{ex.line_number}")
338
+ parts.append(ex.code)
339
+ parts.append("")
340
+ else:
341
+ parts.append("# === EXAMPLES ===")
342
+ parts.append("# No test examples available.")
343
+ parts.append("")
344
+
345
+ content = "\n".join(parts)
346
+
347
+ # Insert SIZE header line (computed after rendering content)
348
+ size_kb = len(content.encode("utf-8")) // 1024
349
+ size_line = f"# SIZE: ~{size_kb}KB"
350
+ result_lines = content.splitlines()
351
+ # Insert after last header line (MODULE/CLASSES/DEPENDS), before any blank or section line
352
+ insert_idx = 0
353
+ for i, line in enumerate(result_lines):
354
+ if line.startswith(("# MODULE:", "# CLASSES:", "# DEPENDS:")):
355
+ insert_idx = i + 1
356
+ elif line.startswith("# ==="):
357
+ break
358
+ result_lines.insert(insert_idx, size_line)
359
+ return "\n".join(result_lines)
360
+
361
+
362
+ # ---------------------------------------------------------------------------
363
+ # Main entry point
364
+ # ---------------------------------------------------------------------------
365
+
366
+
367
+ def build_guide( # noqa: PLR0917
368
+ module_name: str,
369
+ source_dir: Path,
370
+ tests_dir: Path,
371
+ table: SymbolTable,
372
+ test_dir_overrides: dict[str, str] | None = None,
373
+ repo_root: Path | None = None,
374
+ ) -> GuideData:
375
+ """Build guide data for a module.
376
+
377
+ Collects public symbols, resolves dependencies, flattens inheritance,
378
+ discovers tests, scores and selects examples.
379
+ """
380
+ if repo_root is None:
381
+ repo_root = source_dir.parent
382
+
383
+ public_classes = [c for c in table.classes.values() if c.is_public and table.class_to_module.get(c.name) == module_name]
384
+ public_functions = [f for f in table.functions.values() if f.is_public and table.function_to_module.get(f.name) == module_name]
385
+
386
+ # Resolve dependencies
387
+ root_names = [c.name for c in public_classes]
388
+ _resolved, external_bases = resolve_dependencies(root_names, table)
389
+
390
+ # Flatten inheritance for each public class
391
+ flattened_classes: list[ClassInfo] = []
392
+ for cls in public_classes:
393
+ flat_methods = flatten_methods(cls, table)
394
+ flattened_classes.append(
395
+ ClassInfo(
396
+ name=cls.name,
397
+ bases=cls.bases,
398
+ docstring=cls.docstring,
399
+ is_public=cls.is_public,
400
+ class_vars=cls.class_vars,
401
+ methods=flat_methods,
402
+ validators=cls.validators,
403
+ module_path=cls.module_path,
404
+ decorators=cls.decorators,
405
+ )
406
+ )
407
+
408
+ # Detect private types referenced by public API signatures
409
+ internal_types = _collect_internal_types(public_functions, flattened_classes, table, module_name)
410
+
411
+ # Discover and score tests
412
+ tests = discover_tests(module_name, tests_dir, test_dir_overrides, repo_root)
413
+ if not tests:
414
+ logger.warning("No tests found for %s", module_name)
415
+
416
+ symbol_names = root_names + [f.name for f in public_functions]
417
+
418
+ for t in tests:
419
+ if t.is_marked and not _has_symbol_overlap(t, symbol_names):
420
+ logger.warning(
421
+ "Marked test %s in %s has no symbol overlap with module %s",
422
+ t.name,
423
+ t.source_file,
424
+ module_name,
425
+ )
426
+
427
+ normal_examples, error_examples = select_examples(tests, symbol_names)
428
+
429
+ rules = extract_rules(flattened_classes)
430
+
431
+ return GuideData(
432
+ module_name=module_name,
433
+ classes=flattened_classes,
434
+ functions=public_functions,
435
+ rules=rules,
436
+ external_bases=external_bases,
437
+ normal_examples=normal_examples,
438
+ error_examples=error_examples,
439
+ internal_types=internal_types,
440
+ )
441
+
442
+
443
+ # ---------------------------------------------------------------------------
444
+ # Private helpers
445
+ # ---------------------------------------------------------------------------
446
+
447
+ _PRIVATE_TYPE_RE = re.compile(r"\b_[A-Z]\w*")
448
+
449
+
450
+ def _collect_internal_types(
451
+ public_functions: list[FunctionInfo],
452
+ public_classes: list[ClassInfo],
453
+ table: SymbolTable,
454
+ module_name: str,
455
+ ) -> list[ClassInfo]:
456
+ """Find private classes from the same module that are referenced in public signatures."""
457
+ # Build text blob from all public signatures and sources
458
+ parts: list[str] = [f.source for f in public_functions]
459
+ for c in public_classes:
460
+ parts.extend(m.signature for m in c.methods if is_public_name(m.name))
461
+ blob = " ".join(parts)
462
+
463
+ referenced = set(_PRIVATE_TYPE_RE.findall(blob))
464
+ if not referenced:
465
+ return []
466
+
467
+ return sorted(
468
+ (table.classes[name] for name in referenced if name in table.classes and table.class_to_module.get(name) == module_name),
469
+ key=lambda c: c.name,
470
+ )
471
+
472
+
473
+ def _get_source(source_lines: list[str], node: ast.AST) -> str:
474
+ decoratable = (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)
475
+ if isinstance(node, decoratable) and node.decorator_list:
476
+ start = node.decorator_list[0].lineno - 1
477
+ else:
478
+ start: int = node.lineno - 1 # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType, reportUnknownVariableType]
479
+ end: int = node.end_lineno or node.lineno # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType, reportUnknownVariableType]
480
+ return "\n".join(source_lines[start:end])
481
+
482
+
483
+ def _extract_test_functions(path: Path, repo_root: Path | None = None) -> list[TestExample]:
484
+ source = path.read_text(encoding="utf-8")
485
+ source_lines = source.splitlines()
486
+ tree = ast.parse(source)
487
+
488
+ if repo_root is not None:
489
+ try:
490
+ source_file = str(path.relative_to(repo_root))
491
+ except ValueError:
492
+ source_file = str(path)
493
+ else:
494
+ source_file = str(path)
495
+
496
+ results: list[TestExample] = []
497
+ for node in ast.walk(tree):
498
+ if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
499
+ continue
500
+ if not node.name.startswith("test_"):
501
+ continue
502
+
503
+ code = _get_source(source_lines, node)
504
+ code = "\n".join(line for line in code.splitlines() if "pytest.mark.ai_docs" not in line and "mark.ai_docs" not in line)
505
+ code = "\n".join(_dedented_source(code))
506
+ results.append(
507
+ TestExample(
508
+ name=node.name,
509
+ source_file=source_file,
510
+ line_number=node.lineno,
511
+ code=code,
512
+ score=0,
513
+ is_error_example=_uses_pytest_raises(node),
514
+ is_marked=_has_ai_docs_marker(node),
515
+ )
516
+ )
517
+ return results
518
+
519
+
520
+ def _uses_pytest_raises(node: ast.FunctionDef | ast.AsyncFunctionDef) -> bool:
521
+ for child in ast.walk(node):
522
+ if isinstance(child, ast.Attribute) and child.attr == "raises" and isinstance(child.value, ast.Name) and child.value.id == "pytest":
523
+ return True
524
+ return False
525
+
526
+
527
+ def _has_ai_docs_marker(node: ast.FunctionDef | ast.AsyncFunctionDef) -> bool:
528
+ """Check if a test function has @pytest.mark.ai_docs decorator."""
529
+ for decorator in node.decorator_list:
530
+ target = decorator.func if isinstance(decorator, ast.Call) else decorator
531
+ if not isinstance(target, ast.Attribute) or target.attr != "ai_docs":
532
+ continue
533
+ # pytest.mark.ai_docs
534
+ if (
535
+ isinstance(target.value, ast.Attribute)
536
+ and target.value.attr == "mark"
537
+ and isinstance(target.value.value, ast.Name)
538
+ and target.value.value.id == "pytest"
539
+ ):
540
+ return True
541
+ # mark.ai_docs (from pytest import mark)
542
+ if isinstance(target.value, ast.Name) and target.value.id == "mark":
543
+ return True
544
+ return False
545
+
546
+
547
+ def _has_symbol_overlap(test: TestExample, symbol_names: list[str]) -> bool:
548
+ """Check if a test references any of the given symbol names."""
549
+ return any(symbol.lower() in test.name.lower() or symbol in test.code for symbol in symbol_names)
550
+
551
+
552
+ def _sort_methods(methods: list[MethodInfo]) -> list[MethodInfo]:
553
+ """Sort: __init__ first, then properties, classmethods, regular methods."""
554
+
555
+ def key(m: MethodInfo) -> tuple[int, str]:
556
+ if m.name == "__init__":
557
+ return (0, m.name)
558
+ if m.is_property:
559
+ return (1, m.name)
560
+ if m.is_classmethod:
561
+ return (2, m.name)
562
+ return (3, m.name)
563
+
564
+ return sorted(methods, key=key)
565
+
566
+
567
+ def _render_class(cls: ClassInfo) -> list[str]:
568
+ lines: list[str] = [f"@{dec}" for dec in cls.decorators]
569
+ bases_str = f"({', '.join(cls.bases)})" if cls.bases else ""
570
+ lines.append(f"class {cls.name}{bases_str}:")
571
+
572
+ if cls.docstring:
573
+ lines.append(f' """{cls.docstring.strip()}"""')
574
+
575
+ for var_name, type_ann, default in cls.class_vars:
576
+ if type_ann:
577
+ if default:
578
+ lines.append(f" {var_name}: {type_ann} = {default}")
579
+ else:
580
+ lines.append(f" {var_name}: {type_ann}")
581
+ elif default:
582
+ lines.append(f" {var_name} = {default}")
583
+ else:
584
+ lines.append(f" {var_name}")
585
+
586
+ if cls.class_vars:
587
+ lines.append("")
588
+
589
+ # Separate own methods from inherited
590
+ own_methods = [m for m in cls.methods if not m.is_inherited and is_public_name(m.name)]
591
+ inherited_methods = [m for m in cls.methods if m.is_inherited and is_public_name(m.name)]
592
+
593
+ # Render inherited methods as compact grouped references
594
+ if inherited_methods:
595
+ groups: dict[str, list[str]] = {}
596
+ for m in inherited_methods:
597
+ parent = m.inherited_from or "unknown"
598
+ groups.setdefault(parent, []).append(m.name)
599
+ for parent, names in groups.items():
600
+ lines.append(f" # [Inherited from {parent}]")
601
+ lines.append(f" # {', '.join(sorted(names))}")
602
+ lines.append("")
603
+
604
+ # Render own methods with full source
605
+ for method in own_methods:
606
+ lines.extend(_render_method(method))
607
+
608
+ lines.append("")
609
+ return lines
610
+
611
+
612
+ def _render_method(method: MethodInfo) -> list[str]:
613
+ lines: list[str] = [f" {source_line}" for source_line in _dedented_source(method.source)]
614
+ lines.append("")
615
+ return lines
616
+
617
+
618
+ def _render_function(func: FunctionInfo) -> list[str]:
619
+ lines: list[str] = list(_dedented_source(func.source))
620
+ lines.append("")
621
+ return lines
622
+
623
+
624
+ def _dedented_source(source: str) -> list[str]:
625
+ """Dedent source based on first line's indentation.
626
+
627
+ Uses the first non-empty line's indentation as the base level,
628
+ which correctly handles multi-line strings with content at column 0.
629
+ """
630
+ raw_lines = source.splitlines()
631
+ if not raw_lines:
632
+ return []
633
+ first_non_empty = next((line for line in raw_lines if line.strip()), None)
634
+ if first_non_empty is None:
635
+ return raw_lines
636
+ indent = len(first_non_empty) - len(first_non_empty.lstrip())
637
+ if indent == 0:
638
+ return raw_lines
639
+ return [line[indent:] if line[:indent].isspace() else line for line in raw_lines]
640
+
641
+
642
+ def _example_title(ex: TestExample) -> str:
643
+ """Convert test function name to readable title."""
644
+ return ex.name.removeprefix("test_").replace("_", " ").capitalize()
@@ -0,0 +1,35 @@
1
+ """Size management for generated guides.
2
+
3
+ Measures rendered guide size and warns when exceeding 50KB limit.
4
+ No hard failure -- oversized guides are still written.
5
+ """
6
+
7
+ from ai_pipeline_core.docs_generator.guide_builder import GuideData
8
+ from ai_pipeline_core.logging import get_pipeline_logger
9
+
10
+ logger = get_pipeline_logger(__name__)
11
+
12
+ MAX_GUIDE_SIZE = 51_200 # 50KB in bytes
13
+
14
+
15
+ def manage_guide_size(
16
+ data: GuideData,
17
+ rendered_content: str,
18
+ max_size: int = MAX_GUIDE_SIZE,
19
+ ) -> str:
20
+ """Warn if rendered guide exceeds size limit. Returns content unchanged."""
21
+ size = _measure(rendered_content)
22
+ if size <= max_size:
23
+ return rendered_content
24
+ logger.warning(
25
+ "%s guide is %s bytes (%dKB). Consider: move private helpers to _ prefixed functions, split large classes into separate modules",
26
+ data.module_name,
27
+ f"{size:,}",
28
+ size // 1024,
29
+ )
30
+ return rendered_content
31
+
32
+
33
+ def _measure(content: str) -> int:
34
+ """Measure guide size in UTF-8 bytes."""
35
+ return len(content.encode("utf-8"))