ai-pipeline-core 0.2.6__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. ai_pipeline_core/__init__.py +78 -125
  2. ai_pipeline_core/deployment/__init__.py +34 -0
  3. ai_pipeline_core/deployment/base.py +861 -0
  4. ai_pipeline_core/deployment/contract.py +80 -0
  5. ai_pipeline_core/deployment/deploy.py +561 -0
  6. ai_pipeline_core/deployment/helpers.py +97 -0
  7. ai_pipeline_core/deployment/progress.py +126 -0
  8. ai_pipeline_core/deployment/remote.py +116 -0
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +12 -14
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +318 -1434
  30. ai_pipeline_core/documents/mime_type.py +37 -82
  31. ai_pipeline_core/documents/utils.py +4 -12
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +309 -0
  34. ai_pipeline_core/images/_processing.py +151 -0
  35. ai_pipeline_core/llm/__init__.py +6 -4
  36. ai_pipeline_core/llm/ai_messages.py +130 -81
  37. ai_pipeline_core/llm/client.py +327 -193
  38. ai_pipeline_core/llm/model_options.py +14 -86
  39. ai_pipeline_core/llm/model_response.py +60 -103
  40. ai_pipeline_core/llm/model_types.py +16 -34
  41. ai_pipeline_core/logging/__init__.py +2 -7
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -37
  44. ai_pipeline_core/logging/logging_mixin.py +15 -41
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/observability/_debug/_config.py +95 -0
  49. ai_pipeline_core/observability/_debug/_content.py +764 -0
  50. ai_pipeline_core/observability/_debug/_processor.py +98 -0
  51. ai_pipeline_core/observability/_debug/_summary.py +312 -0
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/observability/_debug/_writer.py +843 -0
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -283
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +16 -102
  70. ai_pipeline_core/settings.py +26 -31
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
  74. {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
  75. ai_pipeline_core/documents/document_list.py +0 -420
  76. ai_pipeline_core/documents/flow_document.py +0 -112
  77. ai_pipeline_core/documents/task_document.py +0 -117
  78. ai_pipeline_core/documents/temporary_document.py +0 -74
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -483
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -718
  83. ai_pipeline_core/prefect.py +0 -63
  84. ai_pipeline_core/simple_runner/__init__.py +0 -14
  85. ai_pipeline_core/simple_runner/cli.py +0 -254
  86. ai_pipeline_core/simple_runner/simple_runner.py +0 -247
  87. ai_pipeline_core/storage/__init__.py +0 -8
  88. ai_pipeline_core/storage/storage.py +0 -628
  89. ai_pipeline_core/utils/__init__.py +0 -8
  90. ai_pipeline_core/utils/deploy.py +0 -373
  91. ai_pipeline_core/utils/remote_deployment.py +0 -269
  92. ai_pipeline_core-0.2.6.dist-info/METADATA +0 -500
  93. ai_pipeline_core-0.2.6.dist-info/RECORD +0 -41
  94. {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,97 @@
1
+ """Helper functions for pipeline deployments."""
2
+
3
+ import asyncio
4
+ import re
5
+ from typing import Any, Literal, TypedDict
6
+
7
+ import httpx
8
+
9
+ from ai_pipeline_core.deployment.contract import CompletedRun, FailedRun, ProgressRun
10
+ from ai_pipeline_core.documents import Document
11
+ from ai_pipeline_core.logging import get_pipeline_logger
12
+
13
+ logger = get_pipeline_logger(__name__)
14
+
15
+
16
+ class DownloadedDocument(Document):
17
+ """Concrete document for downloaded content."""
18
+
19
+
20
+ class StatusPayload(TypedDict):
21
+ """Webhook payload for Prefect state transitions (sub-flow level)."""
22
+
23
+ type: Literal["status"]
24
+ flow_run_id: str
25
+ project_name: str
26
+ step: int
27
+ total_steps: int
28
+ flow_name: str
29
+ state: str
30
+ state_name: str
31
+ timestamp: str
32
+
33
+
34
+ def class_name_to_deployment_name(class_name: str) -> str:
35
+ """Convert PascalCase to kebab-case: ResearchPipeline -> research-pipeline."""
36
+ name = re.sub(r"(?<!^)(?=[A-Z])", "-", class_name)
37
+ return name.lower()
38
+
39
+
40
+ def extract_generic_params(cls: type, base_class: type) -> tuple[type | None, type | None]:
41
+ """Extract TOptions and TResult from a generic base class's args."""
42
+ for base in getattr(cls, "__orig_bases__", []):
43
+ origin = getattr(base, "__origin__", None)
44
+ if origin is base_class:
45
+ args = getattr(base, "__args__", ())
46
+ if len(args) == 2:
47
+ return args[0], args[1]
48
+
49
+ return None, None
50
+
51
+
52
+ async def download_documents(urls: list[str]) -> list[Document]:
53
+ """Download documents from URLs."""
54
+ documents: list[Document] = []
55
+ async with httpx.AsyncClient(timeout=60, follow_redirects=True) as client:
56
+ for url in urls:
57
+ response = await client.get(url)
58
+ response.raise_for_status()
59
+ filename = url.split("/")[-1].split("?")[0] or "document"
60
+ documents.append(DownloadedDocument(name=filename, content=response.content))
61
+ return documents
62
+
63
+
64
+ async def upload_documents(documents: list[Document], url_mapping: dict[str, str]) -> None:
65
+ """Upload documents to their mapped URLs."""
66
+ async with httpx.AsyncClient(timeout=60, follow_redirects=True) as client:
67
+ for doc in documents:
68
+ if doc.name in url_mapping:
69
+ response = await client.put(
70
+ url_mapping[doc.name],
71
+ content=doc.content,
72
+ headers={"Content-Type": doc.mime_type},
73
+ )
74
+ response.raise_for_status()
75
+
76
+
77
+ async def send_webhook(
78
+ url: str,
79
+ payload: ProgressRun | CompletedRun | FailedRun,
80
+ max_retries: int = 3,
81
+ retry_delay: float = 10.0,
82
+ ) -> None:
83
+ """Send webhook with retries."""
84
+ data: dict[str, Any] = payload.model_dump(mode="json")
85
+ for attempt in range(max_retries):
86
+ try:
87
+ async with httpx.AsyncClient(timeout=30) as client:
88
+ response = await client.post(url, json=data, follow_redirects=True)
89
+ response.raise_for_status()
90
+ return
91
+ except Exception as e:
92
+ if attempt < max_retries - 1:
93
+ logger.warning(f"Webhook retry {attempt + 1}/{max_retries}: {e}")
94
+ await asyncio.sleep(retry_delay)
95
+ else:
96
+ logger.exception(f"Webhook failed after {max_retries} attempts")
97
+ raise
@@ -0,0 +1,126 @@
1
+ """Intra-flow progress tracking with order-preserving webhook delivery."""
2
+
3
+ import asyncio
4
+ import contextlib
5
+ from collections.abc import Generator
6
+ from contextlib import contextmanager
7
+ from contextvars import ContextVar
8
+ from dataclasses import dataclass
9
+ from datetime import UTC, datetime
10
+ from uuid import UUID
11
+
12
+ from ai_pipeline_core.logging import get_pipeline_logger
13
+
14
+ from .contract import ProgressRun
15
+ from .helpers import send_webhook
16
+
17
+ logger = get_pipeline_logger(__name__)
18
+
19
+
20
+ @dataclass(frozen=True, slots=True)
21
+ class ProgressContext:
22
+ """Internal context holding state for progress calculation and webhook delivery."""
23
+
24
+ webhook_url: str
25
+ project_name: str
26
+ run_id: str
27
+ flow_run_id: str
28
+ flow_name: str
29
+ step: int
30
+ total_steps: int
31
+ total_minutes: float
32
+ completed_minutes: float
33
+ current_flow_minutes: float
34
+ queue: asyncio.Queue[ProgressRun | None]
35
+
36
+
37
+ _context: ContextVar[ProgressContext | None] = ContextVar("progress_context", default=None)
38
+
39
+
40
+ async def update(fraction: float, message: str = "") -> None:
41
+ """Report intra-flow progress (0.0-1.0). No-op without context."""
42
+ ctx = _context.get()
43
+ if ctx is None or not ctx.webhook_url:
44
+ return
45
+
46
+ fraction = max(0.0, min(1.0, fraction))
47
+
48
+ if ctx.total_minutes > 0:
49
+ overall = (ctx.completed_minutes + ctx.current_flow_minutes * fraction) / ctx.total_minutes
50
+ else:
51
+ overall = fraction
52
+ overall = round(max(0.0, min(1.0, overall)), 4)
53
+
54
+ payload = ProgressRun(
55
+ flow_run_id=UUID(ctx.flow_run_id) if ctx.flow_run_id else UUID(int=0),
56
+ project_name=ctx.project_name,
57
+ state="RUNNING",
58
+ timestamp=datetime.now(UTC),
59
+ step=ctx.step,
60
+ total_steps=ctx.total_steps,
61
+ flow_name=ctx.flow_name,
62
+ status="progress",
63
+ progress=overall,
64
+ step_progress=round(fraction, 4),
65
+ message=message,
66
+ )
67
+
68
+ ctx.queue.put_nowait(payload)
69
+
70
+
71
+ async def webhook_worker(
72
+ queue: asyncio.Queue[ProgressRun | None],
73
+ webhook_url: str,
74
+ max_retries: int = 3,
75
+ retry_delay: float = 10.0,
76
+ ) -> None:
77
+ """Process webhooks sequentially with retries, preserving order."""
78
+ while True:
79
+ payload = await queue.get()
80
+ if payload is None:
81
+ queue.task_done()
82
+ break
83
+
84
+ with contextlib.suppress(Exception):
85
+ await send_webhook(webhook_url, payload, max_retries, retry_delay)
86
+
87
+ queue.task_done()
88
+
89
+
90
+ @contextmanager
91
+ def flow_context( # noqa: PLR0917
92
+ webhook_url: str,
93
+ project_name: str,
94
+ run_id: str,
95
+ flow_run_id: str,
96
+ flow_name: str,
97
+ step: int,
98
+ total_steps: int,
99
+ flow_minutes: tuple[float, ...],
100
+ completed_minutes: float,
101
+ queue: asyncio.Queue[ProgressRun | None],
102
+ ) -> Generator[None, None, None]:
103
+ """Set up progress context for a flow. Framework internal use."""
104
+ current_flow_minutes = flow_minutes[step - 1] if step <= len(flow_minutes) else 1.0
105
+ total_minutes = sum(flow_minutes) if flow_minutes else current_flow_minutes
106
+ ctx = ProgressContext(
107
+ webhook_url=webhook_url,
108
+ project_name=project_name,
109
+ run_id=run_id,
110
+ flow_run_id=flow_run_id,
111
+ flow_name=flow_name,
112
+ step=step,
113
+ total_steps=total_steps,
114
+ total_minutes=total_minutes,
115
+ completed_minutes=completed_minutes,
116
+ current_flow_minutes=current_flow_minutes,
117
+ queue=queue,
118
+ )
119
+ token = _context.set(ctx)
120
+ try:
121
+ yield
122
+ finally:
123
+ _context.reset(token)
124
+
125
+
126
+ __all__ = ["ProgressContext", "flow_context", "update", "webhook_worker"]
@@ -0,0 +1,116 @@
1
+ """Remote deployment utilities for calling PipelineDeployment flows via Prefect."""
2
+
3
+ import inspect
4
+ from collections.abc import Callable
5
+ from functools import wraps
6
+ from typing import Any, ParamSpec, TypeVar, cast
7
+
8
+ from prefect import get_client
9
+ from prefect.client.orchestration import PrefectClient
10
+ from prefect.client.schemas import FlowRun
11
+ from prefect.context import AsyncClientContext
12
+ from prefect.deployments.flow_runs import run_deployment
13
+ from prefect.exceptions import ObjectNotFound
14
+
15
+ from ai_pipeline_core.deployment import DeploymentContext, DeploymentResult, PipelineDeployment
16
+ from ai_pipeline_core.observability.tracing import TraceLevel, set_trace_cost, trace
17
+ from ai_pipeline_core.pipeline.options import FlowOptions
18
+ from ai_pipeline_core.settings import settings
19
+
20
+ P = ParamSpec("P")
21
+ TOptions = TypeVar("TOptions", bound=FlowOptions)
22
+ TResult = TypeVar("TResult", bound=DeploymentResult)
23
+
24
+
25
+ def _is_already_traced(func: Callable[..., Any]) -> bool:
26
+ """Check if function or its __wrapped__ has __is_traced__ attribute."""
27
+ if getattr(func, "__is_traced__", False):
28
+ return True
29
+ wrapped = getattr(func, "__wrapped__", None)
30
+ return getattr(wrapped, "__is_traced__", False) if wrapped else False
31
+
32
+
33
+ async def run_remote_deployment(deployment_name: str, parameters: dict[str, Any]) -> Any:
34
+ """Run a remote Prefect deployment, trying local client first then remote."""
35
+
36
+ async def _run(client: PrefectClient, as_subflow: bool) -> Any:
37
+ fr: FlowRun = await run_deployment(client=client, name=deployment_name, parameters=parameters, as_subflow=as_subflow) # type: ignore
38
+ return await fr.state.result() # type: ignore
39
+
40
+ async with get_client() as client:
41
+ try:
42
+ await client.read_deployment_by_name(name=deployment_name)
43
+ return await _run(client, True) # noqa: FBT003
44
+ except ObjectNotFound:
45
+ pass
46
+
47
+ if not settings.prefect_api_url:
48
+ raise ValueError(f"{deployment_name} not found, PREFECT_API_URL not set")
49
+
50
+ async with PrefectClient(
51
+ api=settings.prefect_api_url,
52
+ api_key=settings.prefect_api_key,
53
+ auth_string=settings.prefect_api_auth_string,
54
+ ) as client:
55
+ try:
56
+ await client.read_deployment_by_name(name=deployment_name)
57
+ ctx = AsyncClientContext.model_construct(client=client, _httpx_settings=None, _context_stack=0)
58
+ with ctx:
59
+ return await _run(client, False) # noqa: FBT003
60
+ except ObjectNotFound:
61
+ pass
62
+
63
+ raise ValueError(f"{deployment_name} deployment not found")
64
+
65
+
66
+ def remote_deployment(
67
+ deployment_class: type[PipelineDeployment[TOptions, TResult]],
68
+ *,
69
+ deployment_name: str | None = None,
70
+ name: str | None = None,
71
+ trace_level: TraceLevel = "always",
72
+ trace_cost: float | None = None,
73
+ ) -> Callable[[Callable[P, TResult]], Callable[P, TResult]]:
74
+ """Decorator to call PipelineDeployment flows remotely with automatic serialization."""
75
+
76
+ def decorator(func: Callable[P, TResult]) -> Callable[P, TResult]:
77
+ fname = getattr(func, "__name__", deployment_class.name)
78
+
79
+ if _is_already_traced(func):
80
+ raise TypeError(f"@remote_deployment target '{fname}' already has @trace")
81
+
82
+ @wraps(func)
83
+ async def _wrapper(*args: P.args, **kwargs: P.kwargs) -> TResult:
84
+ sig = inspect.signature(func)
85
+ bound = sig.bind(*args, **kwargs)
86
+ bound.apply_defaults()
87
+
88
+ # Pass parameters with proper types - Prefect handles Pydantic serialization
89
+ parameters: dict[str, Any] = {}
90
+ for pname, value in bound.arguments.items():
91
+ if value is None and pname == "context":
92
+ parameters[pname] = DeploymentContext()
93
+ else:
94
+ parameters[pname] = value
95
+
96
+ full_name = f"{deployment_class.name}/{deployment_name or deployment_class.name}"
97
+
98
+ result = await run_remote_deployment(full_name, parameters)
99
+
100
+ if trace_cost is not None and trace_cost > 0:
101
+ set_trace_cost(trace_cost)
102
+
103
+ if isinstance(result, DeploymentResult):
104
+ return cast(TResult, result)
105
+ if isinstance(result, dict):
106
+ return cast(TResult, deployment_class.result_type(**cast(dict[str, Any], result)))
107
+ raise TypeError(f"Expected DeploymentResult, got {type(result).__name__}")
108
+
109
+ traced_wrapper = trace(
110
+ level=trace_level,
111
+ name=name or deployment_class.name,
112
+ )(_wrapper)
113
+
114
+ return traced_wrapper # type: ignore[return-value]
115
+
116
+ return decorator
@@ -0,0 +1,54 @@
1
+ """AI-focused documentation generator.
2
+
3
+ Generates dense, self-contained guides from source code and test suite
4
+ for AI coding agents. Uses AST parsing, dependency resolution, and
5
+ size management for guides with a 50KB warning threshold.
6
+ """
7
+
8
+ from ai_pipeline_core.docs_generator.extractor import (
9
+ ClassInfo,
10
+ FunctionInfo,
11
+ MethodInfo,
12
+ ModuleInfo,
13
+ SymbolTable,
14
+ is_public_name,
15
+ parse_module,
16
+ )
17
+ from ai_pipeline_core.docs_generator.guide_builder import (
18
+ GuideData,
19
+ TestExample,
20
+ build_guide,
21
+ discover_tests,
22
+ select_examples,
23
+ )
24
+ from ai_pipeline_core.docs_generator.trimmer import manage_guide_size
25
+ from ai_pipeline_core.docs_generator.validator import (
26
+ ValidationResult,
27
+ compute_source_hash,
28
+ validate_all,
29
+ validate_completeness,
30
+ validate_freshness,
31
+ validate_size,
32
+ )
33
+
34
+ __all__ = [
35
+ "ClassInfo",
36
+ "FunctionInfo",
37
+ "GuideData",
38
+ "MethodInfo",
39
+ "ModuleInfo",
40
+ "SymbolTable",
41
+ "TestExample",
42
+ "ValidationResult",
43
+ "build_guide",
44
+ "compute_source_hash",
45
+ "discover_tests",
46
+ "is_public_name",
47
+ "manage_guide_size",
48
+ "parse_module",
49
+ "select_examples",
50
+ "validate_all",
51
+ "validate_completeness",
52
+ "validate_freshness",
53
+ "validate_size",
54
+ ]
@@ -0,0 +1,5 @@
1
+ """Entry point for python -m ai_pipeline_core.docs_generator."""
2
+
3
+ from ai_pipeline_core.docs_generator.cli import main
4
+
5
+ raise SystemExit(main())
@@ -0,0 +1,196 @@
1
+ """CLI for AI documentation generation and validation."""
2
+
3
+ import argparse
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ from ai_pipeline_core.docs_generator.extractor import build_symbol_table
8
+ from ai_pipeline_core.docs_generator.guide_builder import build_guide, render_guide
9
+ from ai_pipeline_core.docs_generator.trimmer import manage_guide_size
10
+ from ai_pipeline_core.docs_generator.validator import (
11
+ HASH_FILE,
12
+ compute_source_hash,
13
+ validate_all,
14
+ )
15
+
16
+ EXCLUDED_MODULES: frozenset[str] = frozenset({"docs_generator"})
17
+
18
+
19
+ def _normalize_whitespace(content: str) -> str:
20
+ """Strip trailing whitespace from each line and ensure final newline."""
21
+ lines = [line.rstrip() for line in content.splitlines()]
22
+ return "\n".join(lines) + "\n"
23
+
24
+
25
+ TEST_DIR_OVERRIDES: dict[str, str] = {} # nosemgrep: no-mutable-module-globals
26
+
27
+
28
+ def _discover_modules(source_dir: Path) -> list[str]:
29
+ """Discover all public module groupings from package structure."""
30
+ modules: set[str] = set()
31
+ for py_file in sorted(source_dir.rglob("*.py")):
32
+ if py_file.name.startswith("_") and py_file.name != "__init__.py":
33
+ continue
34
+ relative = py_file.relative_to(source_dir)
35
+ if len(relative.parts) > 1:
36
+ modules.add(relative.parts[0])
37
+ else:
38
+ modules.add(relative.stem)
39
+ return sorted(modules - EXCLUDED_MODULES)
40
+
41
+
42
+ def main(argv: list[str] | None = None) -> int:
43
+ """Entry point for AI docs CLI with generate/check subcommands."""
44
+ parser = argparse.ArgumentParser(description="AI documentation generator")
45
+ parser.add_argument("--source-dir", type=Path, help="Source package directory")
46
+ parser.add_argument("--tests-dir", type=Path, help="Tests directory")
47
+ parser.add_argument("--output-dir", type=Path, help="Output .ai-docs directory")
48
+ subparsers = parser.add_subparsers(dest="command")
49
+ subparsers.add_parser("generate", help="Generate .ai-docs/ documentation")
50
+ subparsers.add_parser("check", help="Validate .ai-docs/ is up-to-date")
51
+
52
+ args = parser.parse_args(argv)
53
+ if not args.command:
54
+ parser.print_help()
55
+ return 1
56
+
57
+ source_dir, tests_dir, output_dir, repo_root = _resolve_paths(args)
58
+
59
+ if args.command == "generate":
60
+ return _run_generate(source_dir, tests_dir, output_dir, repo_root)
61
+ return _run_check(source_dir, tests_dir, output_dir)
62
+
63
+
64
+ def _resolve_paths(args: argparse.Namespace) -> tuple[Path, Path, Path, Path]:
65
+ """Resolve source, tests, output directories and repo root from args or auto-detect."""
66
+ cli_file = Path(__file__).resolve()
67
+ repo_root = cli_file.parent.parent.parent
68
+ source_dir = args.source_dir or (repo_root / "ai_pipeline_core")
69
+ tests_dir = args.tests_dir or (repo_root / "tests")
70
+ output_dir = args.output_dir or (repo_root / ".ai-docs")
71
+ return source_dir, tests_dir, output_dir, repo_root
72
+
73
+
74
+ def _run_generate(source_dir: Path, tests_dir: Path, output_dir: Path, repo_root: Path) -> int:
75
+ """Generate all module guides, INDEX.md, and .hash file."""
76
+ output_dir.mkdir(parents=True, exist_ok=True)
77
+
78
+ # Clean stale files
79
+ for existing in output_dir.glob("*.md"):
80
+ existing.unlink()
81
+ hash_file = output_dir / HASH_FILE
82
+ if hash_file.exists():
83
+ hash_file.unlink()
84
+
85
+ table = build_symbol_table(source_dir)
86
+ generated: list[tuple[str, int]] = []
87
+
88
+ for module_name in _discover_modules(source_dir):
89
+ data = build_guide(module_name, source_dir, tests_dir, table, TEST_DIR_OVERRIDES, repo_root)
90
+ if not data.classes and not data.functions:
91
+ print(f" skip {module_name} (no public symbols)")
92
+ continue
93
+
94
+ content = render_guide(data)
95
+ content = manage_guide_size(data, content)
96
+ content = _normalize_whitespace(content)
97
+
98
+ guide_path = output_dir / f"{module_name}.md"
99
+ guide_path.write_text(content)
100
+ size = len(content.encode("utf-8"))
101
+ generated.append((module_name, size))
102
+ print(f" wrote {module_name}.md ({size:,} bytes)")
103
+
104
+ # INDEX.md
105
+ index_content = _normalize_whitespace(_render_index(generated))
106
+ (output_dir / "INDEX.md").write_text(index_content)
107
+ print(f" wrote INDEX.md ({len(index_content):,} bytes)")
108
+
109
+ # .hash
110
+ source_hash = compute_source_hash(source_dir, tests_dir)
111
+ (output_dir / HASH_FILE).write_text(source_hash + "\n")
112
+ print(f" wrote {HASH_FILE}")
113
+
114
+ total = sum(size for _, size in generated)
115
+ print(f"\nGenerated {len(generated)} guides ({total:,} bytes total)")
116
+ return 0
117
+
118
+
119
+ def _run_check(source_dir: Path, tests_dir: Path, output_dir: Path) -> int:
120
+ """Validate .ai-docs/ freshness, completeness, and size."""
121
+ if not output_dir.is_dir():
122
+ print("FAIL: .ai-docs/ directory does not exist. Run 'generate' first.", file=sys.stderr)
123
+ return 1
124
+
125
+ result = validate_all(output_dir, source_dir, tests_dir, excluded_modules=EXCLUDED_MODULES)
126
+
127
+ if not result.is_fresh:
128
+ print("FAIL: .ai-docs/ is stale (source hash mismatch)")
129
+ if result.missing_symbols:
130
+ print(f"FAIL: {len(result.missing_symbols)} public symbols missing from guides:")
131
+ for sym in result.missing_symbols:
132
+ print(f" - {sym}")
133
+ if result.size_violations:
134
+ print(f"WARNING: {len(result.size_violations)} guides exceed size limit:")
135
+ for name, size in result.size_violations:
136
+ print(f" - {name}: {size:,} bytes")
137
+
138
+ if result.is_valid:
139
+ print("OK: .ai-docs/ is up-to-date")
140
+ return 0
141
+ return 1
142
+
143
+
144
+ def _render_index(generated: list[tuple[str, int]]) -> str:
145
+ """Render INDEX.md with reading order, task lookup, imports, and size table."""
146
+ lines: list[str] = [
147
+ "# AI Documentation Index",
148
+ "",
149
+ "Auto-generated guide index. Do not edit manually.",
150
+ "",
151
+ "## Reading Order",
152
+ "",
153
+ ]
154
+ for i, (name, _) in enumerate(generated, 1):
155
+ lines.append(f"{i}. [{name}]({name}.md)")
156
+
157
+ lines.extend([
158
+ "",
159
+ "## Task-Based Lookup",
160
+ "",
161
+ "| Task | Guide |",
162
+ "| ---- | ----- |",
163
+ ])
164
+ task_map = {
165
+ "Create/read documents": "documents",
166
+ "Store/retrieve documents": "document_store",
167
+ "Call LLMs": "llm",
168
+ "Deploy pipelines": "deployment",
169
+ "Load templates": "prompt_manager",
170
+ "Process images": "images",
171
+ "Define flows/tasks": "pipeline",
172
+ "Configure settings": "settings",
173
+ "Handle errors": "exceptions",
174
+ "Log messages": "logging",
175
+ "Debug & observe traces": "observability",
176
+ "Test pipelines": "testing",
177
+ }
178
+ guide_set = {name for name, _ in generated}
179
+ for task, guide in task_map.items():
180
+ if guide in guide_set:
181
+ lines.append(f"| {task} | [{guide}]({guide}.md) |")
182
+
183
+ lines.extend([
184
+ "",
185
+ "## Module Sizes",
186
+ "",
187
+ "| Module | Size |",
188
+ "| ------ | ---- |",
189
+ ])
190
+ for name, size in generated:
191
+ lines.append(f"| {name} | {size:,} bytes |")
192
+ total = sum(size for _, size in generated)
193
+ lines.append(f"| **Total** | **{total:,} bytes** |")
194
+ lines.append("")
195
+
196
+ return "\n".join(lines)