ai-pipeline-core 0.2.6__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. ai_pipeline_core/__init__.py +78 -125
  2. ai_pipeline_core/deployment/__init__.py +34 -0
  3. ai_pipeline_core/deployment/base.py +861 -0
  4. ai_pipeline_core/deployment/contract.py +80 -0
  5. ai_pipeline_core/deployment/deploy.py +561 -0
  6. ai_pipeline_core/deployment/helpers.py +97 -0
  7. ai_pipeline_core/deployment/progress.py +126 -0
  8. ai_pipeline_core/deployment/remote.py +116 -0
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +12 -14
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +318 -1434
  30. ai_pipeline_core/documents/mime_type.py +37 -82
  31. ai_pipeline_core/documents/utils.py +4 -12
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +309 -0
  34. ai_pipeline_core/images/_processing.py +151 -0
  35. ai_pipeline_core/llm/__init__.py +6 -4
  36. ai_pipeline_core/llm/ai_messages.py +130 -81
  37. ai_pipeline_core/llm/client.py +327 -193
  38. ai_pipeline_core/llm/model_options.py +14 -86
  39. ai_pipeline_core/llm/model_response.py +60 -103
  40. ai_pipeline_core/llm/model_types.py +16 -34
  41. ai_pipeline_core/logging/__init__.py +2 -7
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -37
  44. ai_pipeline_core/logging/logging_mixin.py +15 -41
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/observability/_debug/_config.py +95 -0
  49. ai_pipeline_core/observability/_debug/_content.py +764 -0
  50. ai_pipeline_core/observability/_debug/_processor.py +98 -0
  51. ai_pipeline_core/observability/_debug/_summary.py +312 -0
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/observability/_debug/_writer.py +843 -0
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -283
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +16 -102
  70. ai_pipeline_core/settings.py +26 -31
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
  74. {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
  75. ai_pipeline_core/documents/document_list.py +0 -420
  76. ai_pipeline_core/documents/flow_document.py +0 -112
  77. ai_pipeline_core/documents/task_document.py +0 -117
  78. ai_pipeline_core/documents/temporary_document.py +0 -74
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -483
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -718
  83. ai_pipeline_core/prefect.py +0 -63
  84. ai_pipeline_core/simple_runner/__init__.py +0 -14
  85. ai_pipeline_core/simple_runner/cli.py +0 -254
  86. ai_pipeline_core/simple_runner/simple_runner.py +0 -247
  87. ai_pipeline_core/storage/__init__.py +0 -8
  88. ai_pipeline_core/storage/storage.py +0 -628
  89. ai_pipeline_core/utils/__init__.py +0 -8
  90. ai_pipeline_core/utils/deploy.py +0 -373
  91. ai_pipeline_core/utils/remote_deployment.py +0 -269
  92. ai_pipeline_core-0.2.6.dist-info/METADATA +0 -500
  93. ai_pipeline_core-0.2.6.dist-info/RECORD +0 -41
  94. {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,324 @@
1
+ """AST-based symbol extraction from Python source files.
2
+
3
+ Extracts class/function signatures, inheritance chains,
4
+ and builds a symbol table for dependency resolution.
5
+ """
6
+
7
+ import ast
8
+ from dataclasses import dataclass, field
9
+ from pathlib import Path
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class MethodInfo:
14
+ """Extracted method/property metadata from a class body."""
15
+
16
+ name: str
17
+ signature: str
18
+ docstring: str
19
+ source: str
20
+ is_property: bool
21
+ is_classmethod: bool
22
+ is_abstract: bool
23
+ line_count: int
24
+ is_inherited: bool = False
25
+ inherited_from: str | None = None
26
+
27
+
28
+ @dataclass(frozen=True)
29
+ class ClassInfo:
30
+ """Extracted class metadata including methods, validators, and class variables."""
31
+
32
+ name: str
33
+ bases: tuple[str, ...]
34
+ docstring: str
35
+ is_public: bool
36
+ class_vars: tuple[tuple[str, str, str], ...] # (name, type_annotation, default_value)
37
+ methods: tuple[MethodInfo, ...]
38
+ validators: tuple[MethodInfo, ...]
39
+ module_path: str
40
+ decorators: tuple[str, ...] = ()
41
+
42
+
43
+ @dataclass(frozen=True)
44
+ class FunctionInfo:
45
+ """Extracted module-level function metadata."""
46
+
47
+ name: str
48
+ signature: str
49
+ docstring: str
50
+ source: str
51
+ is_public: bool
52
+ is_async: bool
53
+ line_count: int
54
+ module_path: str
55
+
56
+
57
+ @dataclass(frozen=True)
58
+ class ModuleInfo:
59
+ """Parsed module containing its classes and functions."""
60
+
61
+ name: str
62
+ path: Path
63
+ docstring: str
64
+ is_public: bool
65
+ classes: tuple[ClassInfo, ...]
66
+ functions: tuple[FunctionInfo, ...]
67
+
68
+
69
+ @dataclass
70
+ class SymbolTable:
71
+ """Mutable during construction, used read-only after building.
72
+
73
+ Maps class and function names to their ClassInfo/FunctionInfo objects,
74
+ and provides class_to_module/function_to_module lookups for dependency resolution.
75
+ """
76
+
77
+ classes: dict[str, ClassInfo] = field(default_factory=dict)
78
+ functions: dict[str, FunctionInfo] = field(default_factory=dict)
79
+ class_to_module: dict[str, str] = field(default_factory=dict)
80
+ function_to_module: dict[str, str] = field(default_factory=dict)
81
+
82
+
83
+ # Known external base classes that get stub representations
84
+ EXTERNAL_STUBS: dict[str, str] = {
85
+ "BaseModel": "Pydantic base model. Fields are typed class attributes.",
86
+ "BaseSettings": "Pydantic settings model. Loads values from environment variables.",
87
+ "ABC": "Python abstract base class marker.",
88
+ "Generic": "Python generic base class for parameterized types.",
89
+ "list": "Python built-in list.",
90
+ "dict": "Python built-in dictionary.",
91
+ "StrEnum": "String enumeration base class.",
92
+ }
93
+
94
+
95
+ def is_public_name(name: str) -> bool:
96
+ """Determine if a symbol is public based on Python naming convention."""
97
+ if name.startswith("__") and name.endswith("__"):
98
+ return True
99
+ return not name.startswith("_")
100
+
101
+
102
+ def parse_module(path: Path) -> ModuleInfo:
103
+ """Parse a single .py file and return all extracted symbols."""
104
+ source = path.read_text(encoding="utf-8")
105
+ source_lines = source.splitlines()
106
+ tree = ast.parse(source)
107
+
108
+ module_doc = ast.get_docstring(tree) or ""
109
+ module_path = _module_path(path)
110
+
111
+ classes: list[ClassInfo] = []
112
+ functions: list[FunctionInfo] = []
113
+
114
+ for node in tree.body:
115
+ if isinstance(node, ast.ClassDef):
116
+ classes.append(_extract_class(node, source_lines, module_path))
117
+ elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
118
+ functions.append(_extract_function(node, source_lines, module_path))
119
+
120
+ module_public = any(c.is_public for c in classes) or any(f.is_public for f in functions)
121
+
122
+ return ModuleInfo(
123
+ name=path.stem,
124
+ path=path,
125
+ docstring=module_doc,
126
+ is_public=module_public,
127
+ classes=tuple(classes),
128
+ functions=tuple(functions),
129
+ )
130
+
131
+
132
+ def build_symbol_table(source_dir: Path) -> SymbolTable:
133
+ """Parse all .py files under source_dir and build a unified symbol table."""
134
+ table = SymbolTable()
135
+
136
+ for py_file in sorted(source_dir.rglob("*.py")):
137
+ if py_file.name.startswith("_") and py_file.name != "__init__.py":
138
+ continue
139
+ module = parse_module(py_file)
140
+
141
+ relative = py_file.relative_to(source_dir)
142
+ if len(relative.parts) > 1:
143
+ package_name = relative.parts[0]
144
+ else:
145
+ package_name = relative.stem
146
+
147
+ for cls in module.classes:
148
+ table.classes[cls.name] = cls
149
+ table.class_to_module[cls.name] = package_name
150
+ for func in module.functions:
151
+ table.functions[func.name] = func
152
+ table.function_to_module[func.name] = package_name
153
+
154
+ return table
155
+
156
+
157
+ def resolve_dependencies(
158
+ root_classes: list[str],
159
+ table: SymbolTable,
160
+ ) -> tuple[list[ClassInfo], set[str]]:
161
+ """Resolve transitive dependencies for a set of root classes.
162
+
163
+ Returns (resolved ClassInfo list in topological order, external base names).
164
+ """
165
+ resolved: list[ClassInfo] = []
166
+ external_bases: set[str] = set()
167
+ visited: set[str] = set()
168
+
169
+ def visit(name: str) -> None:
170
+ if name in visited:
171
+ return
172
+ visited.add(name)
173
+
174
+ if name in EXTERNAL_STUBS or name not in table.classes:
175
+ external_bases.add(name)
176
+ return
177
+
178
+ cls = table.classes[name]
179
+ for base in cls.bases:
180
+ visit(base.split("[")[0])
181
+
182
+ resolved.append(cls)
183
+
184
+ for root in root_classes:
185
+ visit(root)
186
+
187
+ return resolved, external_bases
188
+
189
+
190
+ # ---------------------------------------------------------------------------
191
+ # Private helpers
192
+ # ---------------------------------------------------------------------------
193
+
194
+
195
+ def _module_path(path: Path) -> str:
196
+ """Convert filesystem path to dotted module path.
197
+
198
+ e.g. ai_pipeline_core/documents/document.py -> ai_pipeline_core.documents.document
199
+ """
200
+ parts = list(path.with_suffix("").parts)
201
+ # Find the package root (ai_pipeline_core)
202
+ for i, part in enumerate(parts):
203
+ if part == "ai_pipeline_core":
204
+ return ".".join(parts[i:])
205
+ return ".".join(parts)
206
+
207
+
208
+ def _decorator_name(decorator: ast.expr) -> str:
209
+ if isinstance(decorator, ast.Call):
210
+ return _decorator_name(decorator.func)
211
+ if isinstance(decorator, ast.Attribute):
212
+ return decorator.attr
213
+ if isinstance(decorator, ast.Name):
214
+ return decorator.id
215
+ return ""
216
+
217
+
218
+ def _body_line_count(node: ast.FunctionDef | ast.AsyncFunctionDef) -> int:
219
+ if not node.body:
220
+ return 0
221
+ first = node.body[0]
222
+ body_nodes = node.body
223
+ is_docstring = isinstance(first, ast.Expr) and isinstance(first.value, ast.Constant) and isinstance(first.value.value, str)
224
+ if is_docstring:
225
+ body_nodes = node.body[1:]
226
+ if not body_nodes:
227
+ return 0
228
+ start = body_nodes[0].lineno
229
+ end = body_nodes[-1].end_lineno or body_nodes[-1].lineno
230
+ return end - start + 1
231
+
232
+
233
+ def _extract_signature(node: ast.FunctionDef | ast.AsyncFunctionDef) -> str:
234
+ args_str = ast.unparse(node.args)
235
+ ret = f" -> {ast.unparse(node.returns)}" if node.returns else ""
236
+ return f"({args_str}){ret}"
237
+
238
+
239
+ def _get_source(source_lines: list[str], node: ast.AST) -> str:
240
+ decoratable = (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)
241
+ if isinstance(node, decoratable) and node.decorator_list:
242
+ start = node.decorator_list[0].lineno - 1
243
+ else:
244
+ start: int = node.lineno - 1 # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType, reportUnknownVariableType]
245
+ end: int = node.end_lineno or node.lineno # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType, reportUnknownVariableType]
246
+ return "\n".join(source_lines[start:end])
247
+
248
+
249
+ def _is_validator(node: ast.FunctionDef | ast.AsyncFunctionDef) -> bool:
250
+ validator_names = ("field_validator", "model_validator")
251
+ return any(_decorator_name(d) in validator_names for d in node.decorator_list)
252
+
253
+
254
+ def _extract_method(
255
+ node: ast.FunctionDef | ast.AsyncFunctionDef,
256
+ source_lines: list[str],
257
+ ) -> MethodInfo:
258
+ decorator_names = {_decorator_name(d) for d in node.decorator_list}
259
+ return MethodInfo(
260
+ name=node.name,
261
+ signature=_extract_signature(node),
262
+ docstring=ast.get_docstring(node) or "",
263
+ source=_get_source(source_lines, node),
264
+ is_property="property" in decorator_names,
265
+ is_classmethod="classmethod" in decorator_names,
266
+ is_abstract="abstractmethod" in decorator_names,
267
+ line_count=_body_line_count(node),
268
+ )
269
+
270
+
271
+ def _extract_class(node: ast.ClassDef, source_lines: list[str], module_path: str) -> ClassInfo:
272
+ docstring = ast.get_docstring(node) or ""
273
+ bases = [ast.unparse(base) for base in node.bases]
274
+
275
+ methods: list[MethodInfo] = []
276
+ validators: list[MethodInfo] = []
277
+ class_vars: list[tuple[str, str, str]] = []
278
+
279
+ for item in node.body:
280
+ if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
281
+ method = _extract_method(item, source_lines)
282
+ methods.append(method)
283
+ if _is_validator(item):
284
+ validators.append(method)
285
+ elif isinstance(item, ast.AnnAssign) and item.target and isinstance(item.target, ast.Name):
286
+ name = item.target.id
287
+ if is_public_name(name):
288
+ type_ann = ast.unparse(item.annotation) if item.annotation else ""
289
+ default = ast.unparse(item.value) if item.value else ""
290
+ class_vars.append((name, type_ann, default))
291
+ elif isinstance(item, ast.Assign) and len(item.targets) == 1 and isinstance(item.targets[0], ast.Name):
292
+ name = item.targets[0].id
293
+ if is_public_name(name):
294
+ default = ast.unparse(item.value)
295
+ class_vars.append((name, "", default))
296
+
297
+ return ClassInfo(
298
+ name=node.name,
299
+ bases=tuple(bases),
300
+ docstring=docstring,
301
+ is_public=is_public_name(node.name),
302
+ class_vars=tuple(class_vars),
303
+ methods=tuple(methods),
304
+ validators=tuple(validators),
305
+ module_path=module_path,
306
+ decorators=tuple(ast.unparse(d) for d in node.decorator_list),
307
+ )
308
+
309
+
310
+ def _extract_function(
311
+ node: ast.FunctionDef | ast.AsyncFunctionDef,
312
+ source_lines: list[str],
313
+ module_path: str,
314
+ ) -> FunctionInfo:
315
+ return FunctionInfo(
316
+ name=node.name,
317
+ signature=_extract_signature(node),
318
+ docstring=ast.get_docstring(node) or "",
319
+ source=_get_source(source_lines, node),
320
+ is_public=is_public_name(node.name),
321
+ is_async=isinstance(node, ast.AsyncFunctionDef),
322
+ line_count=_body_line_count(node),
323
+ module_path=module_path,
324
+ )