ai-pipeline-core 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +64 -158
- ai_pipeline_core/deployment/__init__.py +6 -18
- ai_pipeline_core/deployment/base.py +392 -212
- ai_pipeline_core/deployment/contract.py +6 -10
- ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
- ai_pipeline_core/deployment/helpers.py +16 -17
- ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
- ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +11 -84
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +32 -85
- ai_pipeline_core/images/_processing.py +5 -11
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +102 -90
- ai_pipeline_core/llm/client.py +229 -183
- ai_pipeline_core/llm/model_options.py +12 -84
- ai_pipeline_core/llm/model_response.py +53 -99
- ai_pipeline_core/llm/model_types.py +8 -23
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
- ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +133 -75
- ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
- ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
- ai_pipeline_core/debug/__init__.py +0 -26
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -494
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/prompt_builder/__init__.py +0 -5
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
- ai_pipeline_core/prompt_builder/global_cache.py +0 -78
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
- ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core-0.3.4.dist-info/METADATA +0 -569
- ai_pipeline_core-0.3.4.dist-info/RECORD +0 -57
- {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""AI-focused documentation generator.
|
|
2
|
+
|
|
3
|
+
Generates dense, self-contained guides from source code and test suite
|
|
4
|
+
for AI coding agents. Uses AST parsing, dependency resolution, and
|
|
5
|
+
size management for guides with a 50KB warning threshold.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from ai_pipeline_core.docs_generator.extractor import (
|
|
9
|
+
ClassInfo,
|
|
10
|
+
FunctionInfo,
|
|
11
|
+
MethodInfo,
|
|
12
|
+
ModuleInfo,
|
|
13
|
+
SymbolTable,
|
|
14
|
+
is_public_name,
|
|
15
|
+
parse_module,
|
|
16
|
+
)
|
|
17
|
+
from ai_pipeline_core.docs_generator.guide_builder import (
|
|
18
|
+
GuideData,
|
|
19
|
+
TestExample,
|
|
20
|
+
build_guide,
|
|
21
|
+
discover_tests,
|
|
22
|
+
select_examples,
|
|
23
|
+
)
|
|
24
|
+
from ai_pipeline_core.docs_generator.trimmer import manage_guide_size
|
|
25
|
+
from ai_pipeline_core.docs_generator.validator import (
|
|
26
|
+
ValidationResult,
|
|
27
|
+
compute_source_hash,
|
|
28
|
+
validate_all,
|
|
29
|
+
validate_completeness,
|
|
30
|
+
validate_freshness,
|
|
31
|
+
validate_size,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
"ClassInfo",
|
|
36
|
+
"FunctionInfo",
|
|
37
|
+
"GuideData",
|
|
38
|
+
"MethodInfo",
|
|
39
|
+
"ModuleInfo",
|
|
40
|
+
"SymbolTable",
|
|
41
|
+
"TestExample",
|
|
42
|
+
"ValidationResult",
|
|
43
|
+
"build_guide",
|
|
44
|
+
"compute_source_hash",
|
|
45
|
+
"discover_tests",
|
|
46
|
+
"is_public_name",
|
|
47
|
+
"manage_guide_size",
|
|
48
|
+
"parse_module",
|
|
49
|
+
"select_examples",
|
|
50
|
+
"validate_all",
|
|
51
|
+
"validate_completeness",
|
|
52
|
+
"validate_freshness",
|
|
53
|
+
"validate_size",
|
|
54
|
+
]
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""CLI for AI documentation generation and validation."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from ai_pipeline_core.docs_generator.extractor import build_symbol_table
|
|
8
|
+
from ai_pipeline_core.docs_generator.guide_builder import build_guide, render_guide
|
|
9
|
+
from ai_pipeline_core.docs_generator.trimmer import manage_guide_size
|
|
10
|
+
from ai_pipeline_core.docs_generator.validator import (
|
|
11
|
+
HASH_FILE,
|
|
12
|
+
compute_source_hash,
|
|
13
|
+
validate_all,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
EXCLUDED_MODULES: frozenset[str] = frozenset({"docs_generator"})
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _normalize_whitespace(content: str) -> str:
|
|
20
|
+
"""Strip trailing whitespace from each line and ensure final newline."""
|
|
21
|
+
lines = [line.rstrip() for line in content.splitlines()]
|
|
22
|
+
return "\n".join(lines) + "\n"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
TEST_DIR_OVERRIDES: dict[str, str] = {} # nosemgrep: no-mutable-module-globals
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _discover_modules(source_dir: Path) -> list[str]:
|
|
29
|
+
"""Discover all public module groupings from package structure."""
|
|
30
|
+
modules: set[str] = set()
|
|
31
|
+
for py_file in sorted(source_dir.rglob("*.py")):
|
|
32
|
+
if py_file.name.startswith("_") and py_file.name != "__init__.py":
|
|
33
|
+
continue
|
|
34
|
+
relative = py_file.relative_to(source_dir)
|
|
35
|
+
if len(relative.parts) > 1:
|
|
36
|
+
modules.add(relative.parts[0])
|
|
37
|
+
else:
|
|
38
|
+
modules.add(relative.stem)
|
|
39
|
+
return sorted(modules - EXCLUDED_MODULES)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def main(argv: list[str] | None = None) -> int:
|
|
43
|
+
"""Entry point for AI docs CLI with generate/check subcommands."""
|
|
44
|
+
parser = argparse.ArgumentParser(description="AI documentation generator")
|
|
45
|
+
parser.add_argument("--source-dir", type=Path, help="Source package directory")
|
|
46
|
+
parser.add_argument("--tests-dir", type=Path, help="Tests directory")
|
|
47
|
+
parser.add_argument("--output-dir", type=Path, help="Output .ai-docs directory")
|
|
48
|
+
subparsers = parser.add_subparsers(dest="command")
|
|
49
|
+
subparsers.add_parser("generate", help="Generate .ai-docs/ documentation")
|
|
50
|
+
subparsers.add_parser("check", help="Validate .ai-docs/ is up-to-date")
|
|
51
|
+
|
|
52
|
+
args = parser.parse_args(argv)
|
|
53
|
+
if not args.command:
|
|
54
|
+
parser.print_help()
|
|
55
|
+
return 1
|
|
56
|
+
|
|
57
|
+
source_dir, tests_dir, output_dir, repo_root = _resolve_paths(args)
|
|
58
|
+
|
|
59
|
+
if args.command == "generate":
|
|
60
|
+
return _run_generate(source_dir, tests_dir, output_dir, repo_root)
|
|
61
|
+
return _run_check(source_dir, tests_dir, output_dir)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _resolve_paths(args: argparse.Namespace) -> tuple[Path, Path, Path, Path]:
|
|
65
|
+
"""Resolve source, tests, output directories and repo root from args or auto-detect."""
|
|
66
|
+
cli_file = Path(__file__).resolve()
|
|
67
|
+
repo_root = cli_file.parent.parent.parent
|
|
68
|
+
source_dir = args.source_dir or (repo_root / "ai_pipeline_core")
|
|
69
|
+
tests_dir = args.tests_dir or (repo_root / "tests")
|
|
70
|
+
output_dir = args.output_dir or (repo_root / ".ai-docs")
|
|
71
|
+
return source_dir, tests_dir, output_dir, repo_root
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _run_generate(source_dir: Path, tests_dir: Path, output_dir: Path, repo_root: Path) -> int:
|
|
75
|
+
"""Generate all module guides, INDEX.md, and .hash file."""
|
|
76
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
77
|
+
|
|
78
|
+
# Clean stale files
|
|
79
|
+
for existing in output_dir.glob("*.md"):
|
|
80
|
+
existing.unlink()
|
|
81
|
+
hash_file = output_dir / HASH_FILE
|
|
82
|
+
if hash_file.exists():
|
|
83
|
+
hash_file.unlink()
|
|
84
|
+
|
|
85
|
+
table = build_symbol_table(source_dir)
|
|
86
|
+
generated: list[tuple[str, int]] = []
|
|
87
|
+
|
|
88
|
+
for module_name in _discover_modules(source_dir):
|
|
89
|
+
data = build_guide(module_name, source_dir, tests_dir, table, TEST_DIR_OVERRIDES, repo_root)
|
|
90
|
+
if not data.classes and not data.functions:
|
|
91
|
+
print(f" skip {module_name} (no public symbols)")
|
|
92
|
+
continue
|
|
93
|
+
|
|
94
|
+
content = render_guide(data)
|
|
95
|
+
content = manage_guide_size(data, content)
|
|
96
|
+
content = _normalize_whitespace(content)
|
|
97
|
+
|
|
98
|
+
guide_path = output_dir / f"{module_name}.md"
|
|
99
|
+
guide_path.write_text(content)
|
|
100
|
+
size = len(content.encode("utf-8"))
|
|
101
|
+
generated.append((module_name, size))
|
|
102
|
+
print(f" wrote {module_name}.md ({size:,} bytes)")
|
|
103
|
+
|
|
104
|
+
# INDEX.md
|
|
105
|
+
index_content = _normalize_whitespace(_render_index(generated))
|
|
106
|
+
(output_dir / "INDEX.md").write_text(index_content)
|
|
107
|
+
print(f" wrote INDEX.md ({len(index_content):,} bytes)")
|
|
108
|
+
|
|
109
|
+
# .hash
|
|
110
|
+
source_hash = compute_source_hash(source_dir, tests_dir)
|
|
111
|
+
(output_dir / HASH_FILE).write_text(source_hash + "\n")
|
|
112
|
+
print(f" wrote {HASH_FILE}")
|
|
113
|
+
|
|
114
|
+
total = sum(size for _, size in generated)
|
|
115
|
+
print(f"\nGenerated {len(generated)} guides ({total:,} bytes total)")
|
|
116
|
+
return 0
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _run_check(source_dir: Path, tests_dir: Path, output_dir: Path) -> int:
|
|
120
|
+
"""Validate .ai-docs/ freshness, completeness, and size."""
|
|
121
|
+
if not output_dir.is_dir():
|
|
122
|
+
print("FAIL: .ai-docs/ directory does not exist. Run 'generate' first.", file=sys.stderr)
|
|
123
|
+
return 1
|
|
124
|
+
|
|
125
|
+
result = validate_all(output_dir, source_dir, tests_dir, excluded_modules=EXCLUDED_MODULES)
|
|
126
|
+
|
|
127
|
+
if not result.is_fresh:
|
|
128
|
+
print("FAIL: .ai-docs/ is stale (source hash mismatch)")
|
|
129
|
+
if result.missing_symbols:
|
|
130
|
+
print(f"FAIL: {len(result.missing_symbols)} public symbols missing from guides:")
|
|
131
|
+
for sym in result.missing_symbols:
|
|
132
|
+
print(f" - {sym}")
|
|
133
|
+
if result.size_violations:
|
|
134
|
+
print(f"WARNING: {len(result.size_violations)} guides exceed size limit:")
|
|
135
|
+
for name, size in result.size_violations:
|
|
136
|
+
print(f" - {name}: {size:,} bytes")
|
|
137
|
+
|
|
138
|
+
if result.is_valid:
|
|
139
|
+
print("OK: .ai-docs/ is up-to-date")
|
|
140
|
+
return 0
|
|
141
|
+
return 1
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _render_index(generated: list[tuple[str, int]]) -> str:
|
|
145
|
+
"""Render INDEX.md with reading order, task lookup, imports, and size table."""
|
|
146
|
+
lines: list[str] = [
|
|
147
|
+
"# AI Documentation Index",
|
|
148
|
+
"",
|
|
149
|
+
"Auto-generated guide index. Do not edit manually.",
|
|
150
|
+
"",
|
|
151
|
+
"## Reading Order",
|
|
152
|
+
"",
|
|
153
|
+
]
|
|
154
|
+
for i, (name, _) in enumerate(generated, 1):
|
|
155
|
+
lines.append(f"{i}. [{name}]({name}.md)")
|
|
156
|
+
|
|
157
|
+
lines.extend([
|
|
158
|
+
"",
|
|
159
|
+
"## Task-Based Lookup",
|
|
160
|
+
"",
|
|
161
|
+
"| Task | Guide |",
|
|
162
|
+
"| ---- | ----- |",
|
|
163
|
+
])
|
|
164
|
+
task_map = {
|
|
165
|
+
"Create/read documents": "documents",
|
|
166
|
+
"Store/retrieve documents": "document_store",
|
|
167
|
+
"Call LLMs": "llm",
|
|
168
|
+
"Deploy pipelines": "deployment",
|
|
169
|
+
"Load templates": "prompt_manager",
|
|
170
|
+
"Process images": "images",
|
|
171
|
+
"Define flows/tasks": "pipeline",
|
|
172
|
+
"Configure settings": "settings",
|
|
173
|
+
"Handle errors": "exceptions",
|
|
174
|
+
"Log messages": "logging",
|
|
175
|
+
"Debug & observe traces": "observability",
|
|
176
|
+
"Test pipelines": "testing",
|
|
177
|
+
}
|
|
178
|
+
guide_set = {name for name, _ in generated}
|
|
179
|
+
for task, guide in task_map.items():
|
|
180
|
+
if guide in guide_set:
|
|
181
|
+
lines.append(f"| {task} | [{guide}]({guide}.md) |")
|
|
182
|
+
|
|
183
|
+
lines.extend([
|
|
184
|
+
"",
|
|
185
|
+
"## Module Sizes",
|
|
186
|
+
"",
|
|
187
|
+
"| Module | Size |",
|
|
188
|
+
"| ------ | ---- |",
|
|
189
|
+
])
|
|
190
|
+
for name, size in generated:
|
|
191
|
+
lines.append(f"| {name} | {size:,} bytes |")
|
|
192
|
+
total = sum(size for _, size in generated)
|
|
193
|
+
lines.append(f"| **Total** | **{total:,} bytes** |")
|
|
194
|
+
lines.append("")
|
|
195
|
+
|
|
196
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""AST-based symbol extraction from Python source files.
|
|
2
|
+
|
|
3
|
+
Extracts class/function signatures, inheritance chains,
|
|
4
|
+
and builds a symbol table for dependency resolution.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import ast
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True)
|
|
13
|
+
class MethodInfo:
|
|
14
|
+
"""Extracted method/property metadata from a class body."""
|
|
15
|
+
|
|
16
|
+
name: str
|
|
17
|
+
signature: str
|
|
18
|
+
docstring: str
|
|
19
|
+
source: str
|
|
20
|
+
is_property: bool
|
|
21
|
+
is_classmethod: bool
|
|
22
|
+
is_abstract: bool
|
|
23
|
+
line_count: int
|
|
24
|
+
is_inherited: bool = False
|
|
25
|
+
inherited_from: str | None = None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(frozen=True)
|
|
29
|
+
class ClassInfo:
|
|
30
|
+
"""Extracted class metadata including methods, validators, and class variables."""
|
|
31
|
+
|
|
32
|
+
name: str
|
|
33
|
+
bases: tuple[str, ...]
|
|
34
|
+
docstring: str
|
|
35
|
+
is_public: bool
|
|
36
|
+
class_vars: tuple[tuple[str, str, str], ...] # (name, type_annotation, default_value)
|
|
37
|
+
methods: tuple[MethodInfo, ...]
|
|
38
|
+
validators: tuple[MethodInfo, ...]
|
|
39
|
+
module_path: str
|
|
40
|
+
decorators: tuple[str, ...] = ()
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass(frozen=True)
|
|
44
|
+
class FunctionInfo:
|
|
45
|
+
"""Extracted module-level function metadata."""
|
|
46
|
+
|
|
47
|
+
name: str
|
|
48
|
+
signature: str
|
|
49
|
+
docstring: str
|
|
50
|
+
source: str
|
|
51
|
+
is_public: bool
|
|
52
|
+
is_async: bool
|
|
53
|
+
line_count: int
|
|
54
|
+
module_path: str
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass(frozen=True)
|
|
58
|
+
class ModuleInfo:
|
|
59
|
+
"""Parsed module containing its classes and functions."""
|
|
60
|
+
|
|
61
|
+
name: str
|
|
62
|
+
path: Path
|
|
63
|
+
docstring: str
|
|
64
|
+
is_public: bool
|
|
65
|
+
classes: tuple[ClassInfo, ...]
|
|
66
|
+
functions: tuple[FunctionInfo, ...]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class SymbolTable:
|
|
71
|
+
"""Mutable during construction, used read-only after building.
|
|
72
|
+
|
|
73
|
+
Maps class and function names to their ClassInfo/FunctionInfo objects,
|
|
74
|
+
and provides class_to_module/function_to_module lookups for dependency resolution.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
classes: dict[str, ClassInfo] = field(default_factory=dict)
|
|
78
|
+
functions: dict[str, FunctionInfo] = field(default_factory=dict)
|
|
79
|
+
class_to_module: dict[str, str] = field(default_factory=dict)
|
|
80
|
+
function_to_module: dict[str, str] = field(default_factory=dict)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# Known external base classes that get stub representations
|
|
84
|
+
EXTERNAL_STUBS: dict[str, str] = {
|
|
85
|
+
"BaseModel": "Pydantic base model. Fields are typed class attributes.",
|
|
86
|
+
"BaseSettings": "Pydantic settings model. Loads values from environment variables.",
|
|
87
|
+
"ABC": "Python abstract base class marker.",
|
|
88
|
+
"Generic": "Python generic base class for parameterized types.",
|
|
89
|
+
"list": "Python built-in list.",
|
|
90
|
+
"dict": "Python built-in dictionary.",
|
|
91
|
+
"StrEnum": "String enumeration base class.",
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def is_public_name(name: str) -> bool:
|
|
96
|
+
"""Determine if a symbol is public based on Python naming convention."""
|
|
97
|
+
if name.startswith("__") and name.endswith("__"):
|
|
98
|
+
return True
|
|
99
|
+
return not name.startswith("_")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def parse_module(path: Path) -> ModuleInfo:
|
|
103
|
+
"""Parse a single .py file and return all extracted symbols."""
|
|
104
|
+
source = path.read_text(encoding="utf-8")
|
|
105
|
+
source_lines = source.splitlines()
|
|
106
|
+
tree = ast.parse(source)
|
|
107
|
+
|
|
108
|
+
module_doc = ast.get_docstring(tree) or ""
|
|
109
|
+
module_path = _module_path(path)
|
|
110
|
+
|
|
111
|
+
classes: list[ClassInfo] = []
|
|
112
|
+
functions: list[FunctionInfo] = []
|
|
113
|
+
|
|
114
|
+
for node in tree.body:
|
|
115
|
+
if isinstance(node, ast.ClassDef):
|
|
116
|
+
classes.append(_extract_class(node, source_lines, module_path))
|
|
117
|
+
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
118
|
+
functions.append(_extract_function(node, source_lines, module_path))
|
|
119
|
+
|
|
120
|
+
module_public = any(c.is_public for c in classes) or any(f.is_public for f in functions)
|
|
121
|
+
|
|
122
|
+
return ModuleInfo(
|
|
123
|
+
name=path.stem,
|
|
124
|
+
path=path,
|
|
125
|
+
docstring=module_doc,
|
|
126
|
+
is_public=module_public,
|
|
127
|
+
classes=tuple(classes),
|
|
128
|
+
functions=tuple(functions),
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def build_symbol_table(source_dir: Path) -> SymbolTable:
|
|
133
|
+
"""Parse all .py files under source_dir and build a unified symbol table."""
|
|
134
|
+
table = SymbolTable()
|
|
135
|
+
|
|
136
|
+
for py_file in sorted(source_dir.rglob("*.py")):
|
|
137
|
+
if py_file.name.startswith("_") and py_file.name != "__init__.py":
|
|
138
|
+
continue
|
|
139
|
+
module = parse_module(py_file)
|
|
140
|
+
|
|
141
|
+
relative = py_file.relative_to(source_dir)
|
|
142
|
+
if len(relative.parts) > 1:
|
|
143
|
+
package_name = relative.parts[0]
|
|
144
|
+
else:
|
|
145
|
+
package_name = relative.stem
|
|
146
|
+
|
|
147
|
+
for cls in module.classes:
|
|
148
|
+
table.classes[cls.name] = cls
|
|
149
|
+
table.class_to_module[cls.name] = package_name
|
|
150
|
+
for func in module.functions:
|
|
151
|
+
table.functions[func.name] = func
|
|
152
|
+
table.function_to_module[func.name] = package_name
|
|
153
|
+
|
|
154
|
+
return table
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def resolve_dependencies(
|
|
158
|
+
root_classes: list[str],
|
|
159
|
+
table: SymbolTable,
|
|
160
|
+
) -> tuple[list[ClassInfo], set[str]]:
|
|
161
|
+
"""Resolve transitive dependencies for a set of root classes.
|
|
162
|
+
|
|
163
|
+
Returns (resolved ClassInfo list in topological order, external base names).
|
|
164
|
+
"""
|
|
165
|
+
resolved: list[ClassInfo] = []
|
|
166
|
+
external_bases: set[str] = set()
|
|
167
|
+
visited: set[str] = set()
|
|
168
|
+
|
|
169
|
+
def visit(name: str) -> None:
|
|
170
|
+
if name in visited:
|
|
171
|
+
return
|
|
172
|
+
visited.add(name)
|
|
173
|
+
|
|
174
|
+
if name in EXTERNAL_STUBS or name not in table.classes:
|
|
175
|
+
external_bases.add(name)
|
|
176
|
+
return
|
|
177
|
+
|
|
178
|
+
cls = table.classes[name]
|
|
179
|
+
for base in cls.bases:
|
|
180
|
+
visit(base.split("[")[0])
|
|
181
|
+
|
|
182
|
+
resolved.append(cls)
|
|
183
|
+
|
|
184
|
+
for root in root_classes:
|
|
185
|
+
visit(root)
|
|
186
|
+
|
|
187
|
+
return resolved, external_bases
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
# ---------------------------------------------------------------------------
|
|
191
|
+
# Private helpers
|
|
192
|
+
# ---------------------------------------------------------------------------
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _module_path(path: Path) -> str:
|
|
196
|
+
"""Convert filesystem path to dotted module path.
|
|
197
|
+
|
|
198
|
+
e.g. ai_pipeline_core/documents/document.py -> ai_pipeline_core.documents.document
|
|
199
|
+
"""
|
|
200
|
+
parts = list(path.with_suffix("").parts)
|
|
201
|
+
# Find the package root (ai_pipeline_core)
|
|
202
|
+
for i, part in enumerate(parts):
|
|
203
|
+
if part == "ai_pipeline_core":
|
|
204
|
+
return ".".join(parts[i:])
|
|
205
|
+
return ".".join(parts)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _decorator_name(decorator: ast.expr) -> str:
|
|
209
|
+
if isinstance(decorator, ast.Call):
|
|
210
|
+
return _decorator_name(decorator.func)
|
|
211
|
+
if isinstance(decorator, ast.Attribute):
|
|
212
|
+
return decorator.attr
|
|
213
|
+
if isinstance(decorator, ast.Name):
|
|
214
|
+
return decorator.id
|
|
215
|
+
return ""
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _body_line_count(node: ast.FunctionDef | ast.AsyncFunctionDef) -> int:
|
|
219
|
+
if not node.body:
|
|
220
|
+
return 0
|
|
221
|
+
first = node.body[0]
|
|
222
|
+
body_nodes = node.body
|
|
223
|
+
is_docstring = isinstance(first, ast.Expr) and isinstance(first.value, ast.Constant) and isinstance(first.value.value, str)
|
|
224
|
+
if is_docstring:
|
|
225
|
+
body_nodes = node.body[1:]
|
|
226
|
+
if not body_nodes:
|
|
227
|
+
return 0
|
|
228
|
+
start = body_nodes[0].lineno
|
|
229
|
+
end = body_nodes[-1].end_lineno or body_nodes[-1].lineno
|
|
230
|
+
return end - start + 1
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _extract_signature(node: ast.FunctionDef | ast.AsyncFunctionDef) -> str:
|
|
234
|
+
args_str = ast.unparse(node.args)
|
|
235
|
+
ret = f" -> {ast.unparse(node.returns)}" if node.returns else ""
|
|
236
|
+
return f"({args_str}){ret}"
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _get_source(source_lines: list[str], node: ast.AST) -> str:
|
|
240
|
+
decoratable = (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)
|
|
241
|
+
if isinstance(node, decoratable) and node.decorator_list:
|
|
242
|
+
start = node.decorator_list[0].lineno - 1
|
|
243
|
+
else:
|
|
244
|
+
start: int = node.lineno - 1 # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType, reportUnknownVariableType]
|
|
245
|
+
end: int = node.end_lineno or node.lineno # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType, reportUnknownVariableType]
|
|
246
|
+
return "\n".join(source_lines[start:end])
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _is_validator(node: ast.FunctionDef | ast.AsyncFunctionDef) -> bool:
|
|
250
|
+
validator_names = ("field_validator", "model_validator")
|
|
251
|
+
return any(_decorator_name(d) in validator_names for d in node.decorator_list)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _extract_method(
|
|
255
|
+
node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
256
|
+
source_lines: list[str],
|
|
257
|
+
) -> MethodInfo:
|
|
258
|
+
decorator_names = {_decorator_name(d) for d in node.decorator_list}
|
|
259
|
+
return MethodInfo(
|
|
260
|
+
name=node.name,
|
|
261
|
+
signature=_extract_signature(node),
|
|
262
|
+
docstring=ast.get_docstring(node) or "",
|
|
263
|
+
source=_get_source(source_lines, node),
|
|
264
|
+
is_property="property" in decorator_names,
|
|
265
|
+
is_classmethod="classmethod" in decorator_names,
|
|
266
|
+
is_abstract="abstractmethod" in decorator_names,
|
|
267
|
+
line_count=_body_line_count(node),
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _extract_class(node: ast.ClassDef, source_lines: list[str], module_path: str) -> ClassInfo:
|
|
272
|
+
docstring = ast.get_docstring(node) or ""
|
|
273
|
+
bases = [ast.unparse(base) for base in node.bases]
|
|
274
|
+
|
|
275
|
+
methods: list[MethodInfo] = []
|
|
276
|
+
validators: list[MethodInfo] = []
|
|
277
|
+
class_vars: list[tuple[str, str, str]] = []
|
|
278
|
+
|
|
279
|
+
for item in node.body:
|
|
280
|
+
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
281
|
+
method = _extract_method(item, source_lines)
|
|
282
|
+
methods.append(method)
|
|
283
|
+
if _is_validator(item):
|
|
284
|
+
validators.append(method)
|
|
285
|
+
elif isinstance(item, ast.AnnAssign) and item.target and isinstance(item.target, ast.Name):
|
|
286
|
+
name = item.target.id
|
|
287
|
+
if is_public_name(name):
|
|
288
|
+
type_ann = ast.unparse(item.annotation) if item.annotation else ""
|
|
289
|
+
default = ast.unparse(item.value) if item.value else ""
|
|
290
|
+
class_vars.append((name, type_ann, default))
|
|
291
|
+
elif isinstance(item, ast.Assign) and len(item.targets) == 1 and isinstance(item.targets[0], ast.Name):
|
|
292
|
+
name = item.targets[0].id
|
|
293
|
+
if is_public_name(name):
|
|
294
|
+
default = ast.unparse(item.value)
|
|
295
|
+
class_vars.append((name, "", default))
|
|
296
|
+
|
|
297
|
+
return ClassInfo(
|
|
298
|
+
name=node.name,
|
|
299
|
+
bases=tuple(bases),
|
|
300
|
+
docstring=docstring,
|
|
301
|
+
is_public=is_public_name(node.name),
|
|
302
|
+
class_vars=tuple(class_vars),
|
|
303
|
+
methods=tuple(methods),
|
|
304
|
+
validators=tuple(validators),
|
|
305
|
+
module_path=module_path,
|
|
306
|
+
decorators=tuple(ast.unparse(d) for d in node.decorator_list),
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def _extract_function(
|
|
311
|
+
node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
312
|
+
source_lines: list[str],
|
|
313
|
+
module_path: str,
|
|
314
|
+
) -> FunctionInfo:
|
|
315
|
+
return FunctionInfo(
|
|
316
|
+
name=node.name,
|
|
317
|
+
signature=_extract_signature(node),
|
|
318
|
+
docstring=ast.get_docstring(node) or "",
|
|
319
|
+
source=_get_source(source_lines, node),
|
|
320
|
+
is_public=is_public_name(node.name),
|
|
321
|
+
is_async=isinstance(node, ast.AsyncFunctionDef),
|
|
322
|
+
line_count=_body_line_count(node),
|
|
323
|
+
module_path=module_path,
|
|
324
|
+
)
|