ai-pipeline-core 0.2.6__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +78 -125
- ai_pipeline_core/deployment/__init__.py +34 -0
- ai_pipeline_core/deployment/base.py +861 -0
- ai_pipeline_core/deployment/contract.py +80 -0
- ai_pipeline_core/deployment/deploy.py +561 -0
- ai_pipeline_core/deployment/helpers.py +97 -0
- ai_pipeline_core/deployment/progress.py +126 -0
- ai_pipeline_core/deployment/remote.py +116 -0
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +37 -82
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +309 -0
- ai_pipeline_core/images/_processing.py +151 -0
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +130 -81
- ai_pipeline_core/llm/client.py +327 -193
- ai_pipeline_core/llm/model_options.py +14 -86
- ai_pipeline_core/llm/model_response.py +60 -103
- ai_pipeline_core/llm/model_types.py +16 -34
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/observability/_debug/_config.py +95 -0
- ai_pipeline_core/observability/_debug/_content.py +764 -0
- ai_pipeline_core/observability/_debug/_processor.py +98 -0
- ai_pipeline_core/observability/_debug/_summary.py +312 -0
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/observability/_debug/_writer.py +843 -0
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -283
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
- {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -483
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/simple_runner/__init__.py +0 -14
- ai_pipeline_core/simple_runner/cli.py +0 -254
- ai_pipeline_core/simple_runner/simple_runner.py +0 -247
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core/utils/deploy.py +0 -373
- ai_pipeline_core/utils/remote_deployment.py +0 -269
- ai_pipeline_core-0.2.6.dist-info/METADATA +0 -500
- ai_pipeline_core-0.2.6.dist-info/RECORD +0 -41
- {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Helper functions for pipeline deployments."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import re
|
|
5
|
+
from typing import Any, Literal, TypedDict
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
from ai_pipeline_core.deployment.contract import CompletedRun, FailedRun, ProgressRun
|
|
10
|
+
from ai_pipeline_core.documents import Document
|
|
11
|
+
from ai_pipeline_core.logging import get_pipeline_logger
|
|
12
|
+
|
|
13
|
+
logger = get_pipeline_logger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DownloadedDocument(Document):
|
|
17
|
+
"""Concrete document for downloaded content."""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class StatusPayload(TypedDict):
|
|
21
|
+
"""Webhook payload for Prefect state transitions (sub-flow level)."""
|
|
22
|
+
|
|
23
|
+
type: Literal["status"]
|
|
24
|
+
flow_run_id: str
|
|
25
|
+
project_name: str
|
|
26
|
+
step: int
|
|
27
|
+
total_steps: int
|
|
28
|
+
flow_name: str
|
|
29
|
+
state: str
|
|
30
|
+
state_name: str
|
|
31
|
+
timestamp: str
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def class_name_to_deployment_name(class_name: str) -> str:
|
|
35
|
+
"""Convert PascalCase to kebab-case: ResearchPipeline -> research-pipeline."""
|
|
36
|
+
name = re.sub(r"(?<!^)(?=[A-Z])", "-", class_name)
|
|
37
|
+
return name.lower()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def extract_generic_params(cls: type, base_class: type) -> tuple[type | None, type | None]:
|
|
41
|
+
"""Extract TOptions and TResult from a generic base class's args."""
|
|
42
|
+
for base in getattr(cls, "__orig_bases__", []):
|
|
43
|
+
origin = getattr(base, "__origin__", None)
|
|
44
|
+
if origin is base_class:
|
|
45
|
+
args = getattr(base, "__args__", ())
|
|
46
|
+
if len(args) == 2:
|
|
47
|
+
return args[0], args[1]
|
|
48
|
+
|
|
49
|
+
return None, None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
async def download_documents(urls: list[str]) -> list[Document]:
|
|
53
|
+
"""Download documents from URLs."""
|
|
54
|
+
documents: list[Document] = []
|
|
55
|
+
async with httpx.AsyncClient(timeout=60, follow_redirects=True) as client:
|
|
56
|
+
for url in urls:
|
|
57
|
+
response = await client.get(url)
|
|
58
|
+
response.raise_for_status()
|
|
59
|
+
filename = url.split("/")[-1].split("?")[0] or "document"
|
|
60
|
+
documents.append(DownloadedDocument(name=filename, content=response.content))
|
|
61
|
+
return documents
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
async def upload_documents(documents: list[Document], url_mapping: dict[str, str]) -> None:
|
|
65
|
+
"""Upload documents to their mapped URLs."""
|
|
66
|
+
async with httpx.AsyncClient(timeout=60, follow_redirects=True) as client:
|
|
67
|
+
for doc in documents:
|
|
68
|
+
if doc.name in url_mapping:
|
|
69
|
+
response = await client.put(
|
|
70
|
+
url_mapping[doc.name],
|
|
71
|
+
content=doc.content,
|
|
72
|
+
headers={"Content-Type": doc.mime_type},
|
|
73
|
+
)
|
|
74
|
+
response.raise_for_status()
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
async def send_webhook(
|
|
78
|
+
url: str,
|
|
79
|
+
payload: ProgressRun | CompletedRun | FailedRun,
|
|
80
|
+
max_retries: int = 3,
|
|
81
|
+
retry_delay: float = 10.0,
|
|
82
|
+
) -> None:
|
|
83
|
+
"""Send webhook with retries."""
|
|
84
|
+
data: dict[str, Any] = payload.model_dump(mode="json")
|
|
85
|
+
for attempt in range(max_retries):
|
|
86
|
+
try:
|
|
87
|
+
async with httpx.AsyncClient(timeout=30) as client:
|
|
88
|
+
response = await client.post(url, json=data, follow_redirects=True)
|
|
89
|
+
response.raise_for_status()
|
|
90
|
+
return
|
|
91
|
+
except Exception as e:
|
|
92
|
+
if attempt < max_retries - 1:
|
|
93
|
+
logger.warning(f"Webhook retry {attempt + 1}/{max_retries}: {e}")
|
|
94
|
+
await asyncio.sleep(retry_delay)
|
|
95
|
+
else:
|
|
96
|
+
logger.exception(f"Webhook failed after {max_retries} attempts")
|
|
97
|
+
raise
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""Intra-flow progress tracking with order-preserving webhook delivery."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import contextlib
|
|
5
|
+
from collections.abc import Generator
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
from contextvars import ContextVar
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from datetime import UTC, datetime
|
|
10
|
+
from uuid import UUID
|
|
11
|
+
|
|
12
|
+
from ai_pipeline_core.logging import get_pipeline_logger
|
|
13
|
+
|
|
14
|
+
from .contract import ProgressRun
|
|
15
|
+
from .helpers import send_webhook
|
|
16
|
+
|
|
17
|
+
logger = get_pipeline_logger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(frozen=True, slots=True)
|
|
21
|
+
class ProgressContext:
|
|
22
|
+
"""Internal context holding state for progress calculation and webhook delivery."""
|
|
23
|
+
|
|
24
|
+
webhook_url: str
|
|
25
|
+
project_name: str
|
|
26
|
+
run_id: str
|
|
27
|
+
flow_run_id: str
|
|
28
|
+
flow_name: str
|
|
29
|
+
step: int
|
|
30
|
+
total_steps: int
|
|
31
|
+
total_minutes: float
|
|
32
|
+
completed_minutes: float
|
|
33
|
+
current_flow_minutes: float
|
|
34
|
+
queue: asyncio.Queue[ProgressRun | None]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
_context: ContextVar[ProgressContext | None] = ContextVar("progress_context", default=None)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
async def update(fraction: float, message: str = "") -> None:
|
|
41
|
+
"""Report intra-flow progress (0.0-1.0). No-op without context."""
|
|
42
|
+
ctx = _context.get()
|
|
43
|
+
if ctx is None or not ctx.webhook_url:
|
|
44
|
+
return
|
|
45
|
+
|
|
46
|
+
fraction = max(0.0, min(1.0, fraction))
|
|
47
|
+
|
|
48
|
+
if ctx.total_minutes > 0:
|
|
49
|
+
overall = (ctx.completed_minutes + ctx.current_flow_minutes * fraction) / ctx.total_minutes
|
|
50
|
+
else:
|
|
51
|
+
overall = fraction
|
|
52
|
+
overall = round(max(0.0, min(1.0, overall)), 4)
|
|
53
|
+
|
|
54
|
+
payload = ProgressRun(
|
|
55
|
+
flow_run_id=UUID(ctx.flow_run_id) if ctx.flow_run_id else UUID(int=0),
|
|
56
|
+
project_name=ctx.project_name,
|
|
57
|
+
state="RUNNING",
|
|
58
|
+
timestamp=datetime.now(UTC),
|
|
59
|
+
step=ctx.step,
|
|
60
|
+
total_steps=ctx.total_steps,
|
|
61
|
+
flow_name=ctx.flow_name,
|
|
62
|
+
status="progress",
|
|
63
|
+
progress=overall,
|
|
64
|
+
step_progress=round(fraction, 4),
|
|
65
|
+
message=message,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
ctx.queue.put_nowait(payload)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
async def webhook_worker(
|
|
72
|
+
queue: asyncio.Queue[ProgressRun | None],
|
|
73
|
+
webhook_url: str,
|
|
74
|
+
max_retries: int = 3,
|
|
75
|
+
retry_delay: float = 10.0,
|
|
76
|
+
) -> None:
|
|
77
|
+
"""Process webhooks sequentially with retries, preserving order."""
|
|
78
|
+
while True:
|
|
79
|
+
payload = await queue.get()
|
|
80
|
+
if payload is None:
|
|
81
|
+
queue.task_done()
|
|
82
|
+
break
|
|
83
|
+
|
|
84
|
+
with contextlib.suppress(Exception):
|
|
85
|
+
await send_webhook(webhook_url, payload, max_retries, retry_delay)
|
|
86
|
+
|
|
87
|
+
queue.task_done()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@contextmanager
|
|
91
|
+
def flow_context( # noqa: PLR0917
|
|
92
|
+
webhook_url: str,
|
|
93
|
+
project_name: str,
|
|
94
|
+
run_id: str,
|
|
95
|
+
flow_run_id: str,
|
|
96
|
+
flow_name: str,
|
|
97
|
+
step: int,
|
|
98
|
+
total_steps: int,
|
|
99
|
+
flow_minutes: tuple[float, ...],
|
|
100
|
+
completed_minutes: float,
|
|
101
|
+
queue: asyncio.Queue[ProgressRun | None],
|
|
102
|
+
) -> Generator[None, None, None]:
|
|
103
|
+
"""Set up progress context for a flow. Framework internal use."""
|
|
104
|
+
current_flow_minutes = flow_minutes[step - 1] if step <= len(flow_minutes) else 1.0
|
|
105
|
+
total_minutes = sum(flow_minutes) if flow_minutes else current_flow_minutes
|
|
106
|
+
ctx = ProgressContext(
|
|
107
|
+
webhook_url=webhook_url,
|
|
108
|
+
project_name=project_name,
|
|
109
|
+
run_id=run_id,
|
|
110
|
+
flow_run_id=flow_run_id,
|
|
111
|
+
flow_name=flow_name,
|
|
112
|
+
step=step,
|
|
113
|
+
total_steps=total_steps,
|
|
114
|
+
total_minutes=total_minutes,
|
|
115
|
+
completed_minutes=completed_minutes,
|
|
116
|
+
current_flow_minutes=current_flow_minutes,
|
|
117
|
+
queue=queue,
|
|
118
|
+
)
|
|
119
|
+
token = _context.set(ctx)
|
|
120
|
+
try:
|
|
121
|
+
yield
|
|
122
|
+
finally:
|
|
123
|
+
_context.reset(token)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
__all__ = ["ProgressContext", "flow_context", "update", "webhook_worker"]
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Remote deployment utilities for calling PipelineDeployment flows via Prefect."""
|
|
2
|
+
|
|
3
|
+
import inspect
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from functools import wraps
|
|
6
|
+
from typing import Any, ParamSpec, TypeVar, cast
|
|
7
|
+
|
|
8
|
+
from prefect import get_client
|
|
9
|
+
from prefect.client.orchestration import PrefectClient
|
|
10
|
+
from prefect.client.schemas import FlowRun
|
|
11
|
+
from prefect.context import AsyncClientContext
|
|
12
|
+
from prefect.deployments.flow_runs import run_deployment
|
|
13
|
+
from prefect.exceptions import ObjectNotFound
|
|
14
|
+
|
|
15
|
+
from ai_pipeline_core.deployment import DeploymentContext, DeploymentResult, PipelineDeployment
|
|
16
|
+
from ai_pipeline_core.observability.tracing import TraceLevel, set_trace_cost, trace
|
|
17
|
+
from ai_pipeline_core.pipeline.options import FlowOptions
|
|
18
|
+
from ai_pipeline_core.settings import settings
|
|
19
|
+
|
|
20
|
+
P = ParamSpec("P")
|
|
21
|
+
TOptions = TypeVar("TOptions", bound=FlowOptions)
|
|
22
|
+
TResult = TypeVar("TResult", bound=DeploymentResult)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _is_already_traced(func: Callable[..., Any]) -> bool:
|
|
26
|
+
"""Check if function or its __wrapped__ has __is_traced__ attribute."""
|
|
27
|
+
if getattr(func, "__is_traced__", False):
|
|
28
|
+
return True
|
|
29
|
+
wrapped = getattr(func, "__wrapped__", None)
|
|
30
|
+
return getattr(wrapped, "__is_traced__", False) if wrapped else False
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
async def run_remote_deployment(deployment_name: str, parameters: dict[str, Any]) -> Any:
|
|
34
|
+
"""Run a remote Prefect deployment, trying local client first then remote."""
|
|
35
|
+
|
|
36
|
+
async def _run(client: PrefectClient, as_subflow: bool) -> Any:
|
|
37
|
+
fr: FlowRun = await run_deployment(client=client, name=deployment_name, parameters=parameters, as_subflow=as_subflow) # type: ignore
|
|
38
|
+
return await fr.state.result() # type: ignore
|
|
39
|
+
|
|
40
|
+
async with get_client() as client:
|
|
41
|
+
try:
|
|
42
|
+
await client.read_deployment_by_name(name=deployment_name)
|
|
43
|
+
return await _run(client, True) # noqa: FBT003
|
|
44
|
+
except ObjectNotFound:
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
if not settings.prefect_api_url:
|
|
48
|
+
raise ValueError(f"{deployment_name} not found, PREFECT_API_URL not set")
|
|
49
|
+
|
|
50
|
+
async with PrefectClient(
|
|
51
|
+
api=settings.prefect_api_url,
|
|
52
|
+
api_key=settings.prefect_api_key,
|
|
53
|
+
auth_string=settings.prefect_api_auth_string,
|
|
54
|
+
) as client:
|
|
55
|
+
try:
|
|
56
|
+
await client.read_deployment_by_name(name=deployment_name)
|
|
57
|
+
ctx = AsyncClientContext.model_construct(client=client, _httpx_settings=None, _context_stack=0)
|
|
58
|
+
with ctx:
|
|
59
|
+
return await _run(client, False) # noqa: FBT003
|
|
60
|
+
except ObjectNotFound:
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
raise ValueError(f"{deployment_name} deployment not found")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def remote_deployment(
|
|
67
|
+
deployment_class: type[PipelineDeployment[TOptions, TResult]],
|
|
68
|
+
*,
|
|
69
|
+
deployment_name: str | None = None,
|
|
70
|
+
name: str | None = None,
|
|
71
|
+
trace_level: TraceLevel = "always",
|
|
72
|
+
trace_cost: float | None = None,
|
|
73
|
+
) -> Callable[[Callable[P, TResult]], Callable[P, TResult]]:
|
|
74
|
+
"""Decorator to call PipelineDeployment flows remotely with automatic serialization."""
|
|
75
|
+
|
|
76
|
+
def decorator(func: Callable[P, TResult]) -> Callable[P, TResult]:
|
|
77
|
+
fname = getattr(func, "__name__", deployment_class.name)
|
|
78
|
+
|
|
79
|
+
if _is_already_traced(func):
|
|
80
|
+
raise TypeError(f"@remote_deployment target '{fname}' already has @trace")
|
|
81
|
+
|
|
82
|
+
@wraps(func)
|
|
83
|
+
async def _wrapper(*args: P.args, **kwargs: P.kwargs) -> TResult:
|
|
84
|
+
sig = inspect.signature(func)
|
|
85
|
+
bound = sig.bind(*args, **kwargs)
|
|
86
|
+
bound.apply_defaults()
|
|
87
|
+
|
|
88
|
+
# Pass parameters with proper types - Prefect handles Pydantic serialization
|
|
89
|
+
parameters: dict[str, Any] = {}
|
|
90
|
+
for pname, value in bound.arguments.items():
|
|
91
|
+
if value is None and pname == "context":
|
|
92
|
+
parameters[pname] = DeploymentContext()
|
|
93
|
+
else:
|
|
94
|
+
parameters[pname] = value
|
|
95
|
+
|
|
96
|
+
full_name = f"{deployment_class.name}/{deployment_name or deployment_class.name}"
|
|
97
|
+
|
|
98
|
+
result = await run_remote_deployment(full_name, parameters)
|
|
99
|
+
|
|
100
|
+
if trace_cost is not None and trace_cost > 0:
|
|
101
|
+
set_trace_cost(trace_cost)
|
|
102
|
+
|
|
103
|
+
if isinstance(result, DeploymentResult):
|
|
104
|
+
return cast(TResult, result)
|
|
105
|
+
if isinstance(result, dict):
|
|
106
|
+
return cast(TResult, deployment_class.result_type(**cast(dict[str, Any], result)))
|
|
107
|
+
raise TypeError(f"Expected DeploymentResult, got {type(result).__name__}")
|
|
108
|
+
|
|
109
|
+
traced_wrapper = trace(
|
|
110
|
+
level=trace_level,
|
|
111
|
+
name=name or deployment_class.name,
|
|
112
|
+
)(_wrapper)
|
|
113
|
+
|
|
114
|
+
return traced_wrapper # type: ignore[return-value]
|
|
115
|
+
|
|
116
|
+
return decorator
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""AI-focused documentation generator.
|
|
2
|
+
|
|
3
|
+
Generates dense, self-contained guides from source code and test suite
|
|
4
|
+
for AI coding agents. Uses AST parsing, dependency resolution, and
|
|
5
|
+
size management for guides with a 50KB warning threshold.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from ai_pipeline_core.docs_generator.extractor import (
|
|
9
|
+
ClassInfo,
|
|
10
|
+
FunctionInfo,
|
|
11
|
+
MethodInfo,
|
|
12
|
+
ModuleInfo,
|
|
13
|
+
SymbolTable,
|
|
14
|
+
is_public_name,
|
|
15
|
+
parse_module,
|
|
16
|
+
)
|
|
17
|
+
from ai_pipeline_core.docs_generator.guide_builder import (
|
|
18
|
+
GuideData,
|
|
19
|
+
TestExample,
|
|
20
|
+
build_guide,
|
|
21
|
+
discover_tests,
|
|
22
|
+
select_examples,
|
|
23
|
+
)
|
|
24
|
+
from ai_pipeline_core.docs_generator.trimmer import manage_guide_size
|
|
25
|
+
from ai_pipeline_core.docs_generator.validator import (
|
|
26
|
+
ValidationResult,
|
|
27
|
+
compute_source_hash,
|
|
28
|
+
validate_all,
|
|
29
|
+
validate_completeness,
|
|
30
|
+
validate_freshness,
|
|
31
|
+
validate_size,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
"ClassInfo",
|
|
36
|
+
"FunctionInfo",
|
|
37
|
+
"GuideData",
|
|
38
|
+
"MethodInfo",
|
|
39
|
+
"ModuleInfo",
|
|
40
|
+
"SymbolTable",
|
|
41
|
+
"TestExample",
|
|
42
|
+
"ValidationResult",
|
|
43
|
+
"build_guide",
|
|
44
|
+
"compute_source_hash",
|
|
45
|
+
"discover_tests",
|
|
46
|
+
"is_public_name",
|
|
47
|
+
"manage_guide_size",
|
|
48
|
+
"parse_module",
|
|
49
|
+
"select_examples",
|
|
50
|
+
"validate_all",
|
|
51
|
+
"validate_completeness",
|
|
52
|
+
"validate_freshness",
|
|
53
|
+
"validate_size",
|
|
54
|
+
]
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""CLI for AI documentation generation and validation."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from ai_pipeline_core.docs_generator.extractor import build_symbol_table
|
|
8
|
+
from ai_pipeline_core.docs_generator.guide_builder import build_guide, render_guide
|
|
9
|
+
from ai_pipeline_core.docs_generator.trimmer import manage_guide_size
|
|
10
|
+
from ai_pipeline_core.docs_generator.validator import (
|
|
11
|
+
HASH_FILE,
|
|
12
|
+
compute_source_hash,
|
|
13
|
+
validate_all,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
EXCLUDED_MODULES: frozenset[str] = frozenset({"docs_generator"})
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _normalize_whitespace(content: str) -> str:
|
|
20
|
+
"""Strip trailing whitespace from each line and ensure final newline."""
|
|
21
|
+
lines = [line.rstrip() for line in content.splitlines()]
|
|
22
|
+
return "\n".join(lines) + "\n"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
TEST_DIR_OVERRIDES: dict[str, str] = {} # nosemgrep: no-mutable-module-globals
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _discover_modules(source_dir: Path) -> list[str]:
|
|
29
|
+
"""Discover all public module groupings from package structure."""
|
|
30
|
+
modules: set[str] = set()
|
|
31
|
+
for py_file in sorted(source_dir.rglob("*.py")):
|
|
32
|
+
if py_file.name.startswith("_") and py_file.name != "__init__.py":
|
|
33
|
+
continue
|
|
34
|
+
relative = py_file.relative_to(source_dir)
|
|
35
|
+
if len(relative.parts) > 1:
|
|
36
|
+
modules.add(relative.parts[0])
|
|
37
|
+
else:
|
|
38
|
+
modules.add(relative.stem)
|
|
39
|
+
return sorted(modules - EXCLUDED_MODULES)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def main(argv: list[str] | None = None) -> int:
|
|
43
|
+
"""Entry point for AI docs CLI with generate/check subcommands."""
|
|
44
|
+
parser = argparse.ArgumentParser(description="AI documentation generator")
|
|
45
|
+
parser.add_argument("--source-dir", type=Path, help="Source package directory")
|
|
46
|
+
parser.add_argument("--tests-dir", type=Path, help="Tests directory")
|
|
47
|
+
parser.add_argument("--output-dir", type=Path, help="Output .ai-docs directory")
|
|
48
|
+
subparsers = parser.add_subparsers(dest="command")
|
|
49
|
+
subparsers.add_parser("generate", help="Generate .ai-docs/ documentation")
|
|
50
|
+
subparsers.add_parser("check", help="Validate .ai-docs/ is up-to-date")
|
|
51
|
+
|
|
52
|
+
args = parser.parse_args(argv)
|
|
53
|
+
if not args.command:
|
|
54
|
+
parser.print_help()
|
|
55
|
+
return 1
|
|
56
|
+
|
|
57
|
+
source_dir, tests_dir, output_dir, repo_root = _resolve_paths(args)
|
|
58
|
+
|
|
59
|
+
if args.command == "generate":
|
|
60
|
+
return _run_generate(source_dir, tests_dir, output_dir, repo_root)
|
|
61
|
+
return _run_check(source_dir, tests_dir, output_dir)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _resolve_paths(args: argparse.Namespace) -> tuple[Path, Path, Path, Path]:
|
|
65
|
+
"""Resolve source, tests, output directories and repo root from args or auto-detect."""
|
|
66
|
+
cli_file = Path(__file__).resolve()
|
|
67
|
+
repo_root = cli_file.parent.parent.parent
|
|
68
|
+
source_dir = args.source_dir or (repo_root / "ai_pipeline_core")
|
|
69
|
+
tests_dir = args.tests_dir or (repo_root / "tests")
|
|
70
|
+
output_dir = args.output_dir or (repo_root / ".ai-docs")
|
|
71
|
+
return source_dir, tests_dir, output_dir, repo_root
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _run_generate(source_dir: Path, tests_dir: Path, output_dir: Path, repo_root: Path) -> int:
|
|
75
|
+
"""Generate all module guides, INDEX.md, and .hash file."""
|
|
76
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
77
|
+
|
|
78
|
+
# Clean stale files
|
|
79
|
+
for existing in output_dir.glob("*.md"):
|
|
80
|
+
existing.unlink()
|
|
81
|
+
hash_file = output_dir / HASH_FILE
|
|
82
|
+
if hash_file.exists():
|
|
83
|
+
hash_file.unlink()
|
|
84
|
+
|
|
85
|
+
table = build_symbol_table(source_dir)
|
|
86
|
+
generated: list[tuple[str, int]] = []
|
|
87
|
+
|
|
88
|
+
for module_name in _discover_modules(source_dir):
|
|
89
|
+
data = build_guide(module_name, source_dir, tests_dir, table, TEST_DIR_OVERRIDES, repo_root)
|
|
90
|
+
if not data.classes and not data.functions:
|
|
91
|
+
print(f" skip {module_name} (no public symbols)")
|
|
92
|
+
continue
|
|
93
|
+
|
|
94
|
+
content = render_guide(data)
|
|
95
|
+
content = manage_guide_size(data, content)
|
|
96
|
+
content = _normalize_whitespace(content)
|
|
97
|
+
|
|
98
|
+
guide_path = output_dir / f"{module_name}.md"
|
|
99
|
+
guide_path.write_text(content)
|
|
100
|
+
size = len(content.encode("utf-8"))
|
|
101
|
+
generated.append((module_name, size))
|
|
102
|
+
print(f" wrote {module_name}.md ({size:,} bytes)")
|
|
103
|
+
|
|
104
|
+
# INDEX.md
|
|
105
|
+
index_content = _normalize_whitespace(_render_index(generated))
|
|
106
|
+
(output_dir / "INDEX.md").write_text(index_content)
|
|
107
|
+
print(f" wrote INDEX.md ({len(index_content):,} bytes)")
|
|
108
|
+
|
|
109
|
+
# .hash
|
|
110
|
+
source_hash = compute_source_hash(source_dir, tests_dir)
|
|
111
|
+
(output_dir / HASH_FILE).write_text(source_hash + "\n")
|
|
112
|
+
print(f" wrote {HASH_FILE}")
|
|
113
|
+
|
|
114
|
+
total = sum(size for _, size in generated)
|
|
115
|
+
print(f"\nGenerated {len(generated)} guides ({total:,} bytes total)")
|
|
116
|
+
return 0
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _run_check(source_dir: Path, tests_dir: Path, output_dir: Path) -> int:
|
|
120
|
+
"""Validate .ai-docs/ freshness, completeness, and size."""
|
|
121
|
+
if not output_dir.is_dir():
|
|
122
|
+
print("FAIL: .ai-docs/ directory does not exist. Run 'generate' first.", file=sys.stderr)
|
|
123
|
+
return 1
|
|
124
|
+
|
|
125
|
+
result = validate_all(output_dir, source_dir, tests_dir, excluded_modules=EXCLUDED_MODULES)
|
|
126
|
+
|
|
127
|
+
if not result.is_fresh:
|
|
128
|
+
print("FAIL: .ai-docs/ is stale (source hash mismatch)")
|
|
129
|
+
if result.missing_symbols:
|
|
130
|
+
print(f"FAIL: {len(result.missing_symbols)} public symbols missing from guides:")
|
|
131
|
+
for sym in result.missing_symbols:
|
|
132
|
+
print(f" - {sym}")
|
|
133
|
+
if result.size_violations:
|
|
134
|
+
print(f"WARNING: {len(result.size_violations)} guides exceed size limit:")
|
|
135
|
+
for name, size in result.size_violations:
|
|
136
|
+
print(f" - {name}: {size:,} bytes")
|
|
137
|
+
|
|
138
|
+
if result.is_valid:
|
|
139
|
+
print("OK: .ai-docs/ is up-to-date")
|
|
140
|
+
return 0
|
|
141
|
+
return 1
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _render_index(generated: list[tuple[str, int]]) -> str:
|
|
145
|
+
"""Render INDEX.md with reading order, task lookup, imports, and size table."""
|
|
146
|
+
lines: list[str] = [
|
|
147
|
+
"# AI Documentation Index",
|
|
148
|
+
"",
|
|
149
|
+
"Auto-generated guide index. Do not edit manually.",
|
|
150
|
+
"",
|
|
151
|
+
"## Reading Order",
|
|
152
|
+
"",
|
|
153
|
+
]
|
|
154
|
+
for i, (name, _) in enumerate(generated, 1):
|
|
155
|
+
lines.append(f"{i}. [{name}]({name}.md)")
|
|
156
|
+
|
|
157
|
+
lines.extend([
|
|
158
|
+
"",
|
|
159
|
+
"## Task-Based Lookup",
|
|
160
|
+
"",
|
|
161
|
+
"| Task | Guide |",
|
|
162
|
+
"| ---- | ----- |",
|
|
163
|
+
])
|
|
164
|
+
task_map = {
|
|
165
|
+
"Create/read documents": "documents",
|
|
166
|
+
"Store/retrieve documents": "document_store",
|
|
167
|
+
"Call LLMs": "llm",
|
|
168
|
+
"Deploy pipelines": "deployment",
|
|
169
|
+
"Load templates": "prompt_manager",
|
|
170
|
+
"Process images": "images",
|
|
171
|
+
"Define flows/tasks": "pipeline",
|
|
172
|
+
"Configure settings": "settings",
|
|
173
|
+
"Handle errors": "exceptions",
|
|
174
|
+
"Log messages": "logging",
|
|
175
|
+
"Debug & observe traces": "observability",
|
|
176
|
+
"Test pipelines": "testing",
|
|
177
|
+
}
|
|
178
|
+
guide_set = {name for name, _ in generated}
|
|
179
|
+
for task, guide in task_map.items():
|
|
180
|
+
if guide in guide_set:
|
|
181
|
+
lines.append(f"| {task} | [{guide}]({guide}.md) |")
|
|
182
|
+
|
|
183
|
+
lines.extend([
|
|
184
|
+
"",
|
|
185
|
+
"## Module Sizes",
|
|
186
|
+
"",
|
|
187
|
+
"| Module | Size |",
|
|
188
|
+
"| ------ | ---- |",
|
|
189
|
+
])
|
|
190
|
+
for name, size in generated:
|
|
191
|
+
lines.append(f"| {name} | {size:,} bytes |")
|
|
192
|
+
total = sum(size for _, size in generated)
|
|
193
|
+
lines.append(f"| **Total** | **{total:,} bytes** |")
|
|
194
|
+
lines.append("")
|
|
195
|
+
|
|
196
|
+
return "\n".join(lines)
|