python-infrakit-dev 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. infrakit/__init__.py +0 -0
  2. infrakit/cli/__init__.py +1 -0
  3. infrakit/cli/commands/__init__.py +1 -0
  4. infrakit/cli/commands/deps.py +530 -0
  5. infrakit/cli/commands/init.py +129 -0
  6. infrakit/cli/commands/llm.py +295 -0
  7. infrakit/cli/commands/logger.py +160 -0
  8. infrakit/cli/commands/module.py +342 -0
  9. infrakit/cli/commands/time.py +81 -0
  10. infrakit/cli/main.py +65 -0
  11. infrakit/core/__init__.py +0 -0
  12. infrakit/core/config/__init__.py +0 -0
  13. infrakit/core/config/converter.py +480 -0
  14. infrakit/core/config/exporter.py +304 -0
  15. infrakit/core/config/loader.py +713 -0
  16. infrakit/core/config/validator.py +389 -0
  17. infrakit/core/logger/__init__.py +21 -0
  18. infrakit/core/logger/formatters.py +143 -0
  19. infrakit/core/logger/handlers.py +322 -0
  20. infrakit/core/logger/retention.py +176 -0
  21. infrakit/core/logger/setup.py +314 -0
  22. infrakit/deps/__init__.py +239 -0
  23. infrakit/deps/clean.py +141 -0
  24. infrakit/deps/depfile.py +405 -0
  25. infrakit/deps/health.py +357 -0
  26. infrakit/deps/optimizer.py +642 -0
  27. infrakit/deps/scanner.py +550 -0
  28. infrakit/llm/__init__.py +35 -0
  29. infrakit/llm/batch.py +165 -0
  30. infrakit/llm/client.py +575 -0
  31. infrakit/llm/key_manager.py +728 -0
  32. infrakit/llm/llm_readme.md +306 -0
  33. infrakit/llm/models.py +148 -0
  34. infrakit/llm/providers/__init__.py +5 -0
  35. infrakit/llm/providers/base.py +112 -0
  36. infrakit/llm/providers/gemini.py +164 -0
  37. infrakit/llm/providers/openai.py +168 -0
  38. infrakit/llm/rate_limiter.py +54 -0
  39. infrakit/scaffolder/__init__.py +31 -0
  40. infrakit/scaffolder/ai.py +508 -0
  41. infrakit/scaffolder/backend.py +555 -0
  42. infrakit/scaffolder/cli_tool.py +386 -0
  43. infrakit/scaffolder/generator.py +338 -0
  44. infrakit/scaffolder/pipeline.py +562 -0
  45. infrakit/scaffolder/registry.py +121 -0
  46. infrakit/time/__init__.py +60 -0
  47. infrakit/time/profiler.py +511 -0
  48. python_infrakit_dev-0.1.0.dist-info/METADATA +124 -0
  49. python_infrakit_dev-0.1.0.dist-info/RECORD +51 -0
  50. python_infrakit_dev-0.1.0.dist-info/WHEEL +4 -0
  51. python_infrakit_dev-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,562 @@
1
+ """
2
+ infrakit.scaffolder.templates.pipeline
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
+ Scaffold a data pipeline / ETL project.
5
+
6
+ Designed for batch jobs that extract, transform, and load data —
7
+ with or without an LLM enrichment step.
8
+
9
+ Layout
10
+ ------
11
+ <project>/
12
+ ├── src/
13
+ │ └── __init__.py
14
+ ├── pipeline/
15
+ │ ├── __init__.py
16
+ │ ├── extract.py # pull from source(s)
17
+ │ ├── transform.py # clean / reshape
18
+ │ ├── enrich.py # optional LLM enrichment step
19
+ │ ├── load.py # write to destination
20
+ │ └── runner.py # orchestrator — runs the full DAG
21
+ ├── schemas/
22
+ │ ├── __init__.py
23
+ │ └── records.py # Pydantic models for input/output records
24
+ ├── data/
25
+ │ ├── input/ # raw source files (not committed)
26
+ │ ├── staging/ # intermediate work (not committed)
27
+ │ └── output/ # final output (not committed)
28
+ ├── utils/
29
+ │ ├── __init__.py
30
+ │ ├── logger.py
31
+ │ └── llm.py # optional
32
+ ├── tests/
33
+ │ ├── __init__.py
34
+ │ └── test_pipeline.py
35
+ ├── logs/
36
+ ├── pyproject.toml / requirements.txt
37
+ ├── config.{env|yaml|json}
38
+ ├── README.md
39
+ └── .gitignore
40
+ """
41
+
42
+ from __future__ import annotations
43
+
44
+ from pathlib import Path
45
+
46
+ from infrakit.scaffolder.generator import (
47
+ ScaffoldResult,
48
+ _mkdir,
49
+ _write,
50
+ _config_content,
51
+ _gitignore,
52
+ _logger_util,
53
+ _src_init,
54
+ _tests_init,
55
+ _pyproject_toml,
56
+ _requirements_txt,
57
+ )
58
+ from infrakit.scaffolder.ai import _llm_util
59
+
60
+
61
+ # ── template content ──────────────────────────────────────────────────────────
62
+
63
+
64
+ def _pipeline_pkg_init() -> str:
65
+ return '''\
66
+ """
67
+ pipeline
68
+ ~~~~~~~~
69
+ Stages are independent modules; runner.py wires them in order.
70
+
71
+ Stage contract
72
+ --------------
73
+ Each stage exposes a ``run(**kwargs)`` function that:
74
+ - Reads from a well-known location (data/input, data/staging, etc.)
75
+ - Writes its output to the next location
76
+ - Returns a summary dict {records_in, records_out, errors}
77
+ - Is idempotent where possible
78
+ """
79
+ '''
80
+
81
+
82
+ def _extract() -> str:
83
+ return '''\
84
+ """
85
+ pipeline.extract
86
+ ~~~~~~~~~~~~~~~~
87
+ Pull records from source(s) into data/input/.
88
+ """
89
+
90
+ from pathlib import Path
91
+
92
+ from utils.logger import get_logger
93
+
94
+ log = get_logger(__name__)
95
+ IN_DIR = Path("data/input")
96
+
97
+
98
+ def run(source: str = "") -> dict:
99
+ IN_DIR.mkdir(parents=True, exist_ok=True)
100
+ log.info("extract: starting (source=%r)", source)
101
+
102
+ records = []
103
+ # TODO: fetch from API / database / files and append to `records`
104
+
105
+ log.info("extract: %d records fetched", len(records))
106
+ return {"records_in": 0, "records_out": len(records), "errors": 0}
107
+ '''
108
+
109
+
110
+ def _transform() -> str:
111
+ return '''\
112
+ """
113
+ pipeline.transform
114
+ ~~~~~~~~~~~~~~~~~~
115
+ Clean and reshape records from data/input/ → data/staging/.
116
+ """
117
+
118
+ from pathlib import Path
119
+
120
+ from utils.logger import get_logger
121
+
122
+ log = get_logger(__name__)
123
+ IN_DIR = Path("data/input")
124
+ STAGING_DIR = Path("data/staging")
125
+
126
+
127
+ def run() -> dict:
128
+ STAGING_DIR.mkdir(parents=True, exist_ok=True)
129
+ log.info("transform: starting")
130
+
131
+ errors = 0
132
+ records_out = 0
133
+
134
+ # TODO: read files from IN_DIR, clean/reshape, write to STAGING_DIR
135
+
136
+ log.info("transform: %d records, %d errors", records_out, errors)
137
+ return {"records_in": 0, "records_out": records_out, "errors": errors}
138
+ '''
139
+
140
+
141
+ def _enrich(include_llm: bool) -> str:
142
+ if include_llm:
143
+ return '''\
144
+ """
145
+ pipeline.enrich
146
+ ~~~~~~~~~~~~~~~
147
+ Optional LLM enrichment step — adds AI-generated fields to records.
148
+ Reads from data/staging/, writes enriched records back to data/staging/.
149
+ """
150
+
151
+ from pathlib import Path
152
+
153
+ from utils.llm import llm, Prompt
154
+ from utils.logger import get_logger
155
+
156
+ log = get_logger(__name__)
157
+ STAGING_DIR = Path("data/staging")
158
+
159
+
160
+ def run(provider: str = "openai", batch_size: int = 50) -> dict:
161
+ log.info("enrich: starting (provider=%s)", provider)
162
+
163
+ # TODO: load records from STAGING_DIR
164
+ raw_texts: list[str] = []
165
+
166
+ if not raw_texts:
167
+ log.info("enrich: nothing to enrich")
168
+ return {"records_in": 0, "records_out": 0, "errors": 0}
169
+
170
+ prompts = [Prompt(user=text) for text in raw_texts]
171
+ batch = llm.batch_generate(prompts, provider=provider)
172
+
173
+ errors = batch.failure_count
174
+ log.info(
175
+ "enrich: %d ok, %d errors, %d tokens",
176
+ batch.success_count, errors, batch.total_tokens,
177
+ )
178
+
179
+ # TODO: merge batch.results back into records and write to STAGING_DIR
180
+
181
+ return {
182
+ "records_in": len(raw_texts),
183
+ "records_out": batch.success_count,
184
+ "errors": errors,
185
+ }
186
+
187
+
188
+ if __name__ == "__main__":
189
+ run()
190
+ '''
191
+ else:
192
+ return '''\
193
+ """
194
+ pipeline.enrich
195
+ ~~~~~~~~~~~~~~~
196
+ Placeholder enrichment step — add derived / computed fields to records.
197
+ Reads from data/staging/, writes back to data/staging/.
198
+ """
199
+
200
+ from pathlib import Path
201
+
202
+ from utils.logger import get_logger
203
+
204
+ log = get_logger(__name__)
205
+ STAGING_DIR = Path("data/staging")
206
+
207
+
208
+ def run() -> dict:
209
+ log.info("enrich: starting")
210
+ # TODO: load records, compute derived fields, write back
211
+ return {"records_in": 0, "records_out": 0, "errors": 0}
212
+ '''
213
+
214
+
215
+ def _load() -> str:
216
+ return '''\
217
+ """
218
+ pipeline.load
219
+ ~~~~~~~~~~~~~
220
+ Write staged records to the final destination (data/output/, DB, API …).
221
+ """
222
+
223
+ from pathlib import Path
224
+
225
+ from utils.logger import get_logger
226
+
227
+ log = get_logger(__name__)
228
+ STAGE_DIR = Path("data/staging")
229
+ OUTPUT_DIR = Path("data/output")
230
+
231
+
232
+ def run(destination: str = "file") -> dict:
233
+ OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
234
+ log.info("load: starting (destination=%r)", destination)
235
+
236
+ records_written = 0
237
+ errors = 0
238
+
239
+ if destination == "file":
240
+ # TODO: read from STAGE_DIR, write to OUTPUT_DIR
241
+ pass
242
+ else:
243
+ # TODO: write to database / external API
244
+ pass
245
+
246
+ log.info("load: %d written, %d errors", records_written, errors)
247
+ return {"records_in": 0, "records_out": records_written, "errors": errors}
248
+ '''
249
+
250
+
251
+ def _runner(include_llm: bool) -> str:
252
+ enrich_import = "from pipeline import enrich\n" if True else ""
253
+ enrich_call = (
254
+ " summary['enrich'] = enrich.run(provider=provider)\n"
255
+ if include_llm else
256
+ " summary['enrich'] = enrich.run()\n"
257
+ )
258
+ provider_param = (
259
+ " provider: str = \"openai\","
260
+ if include_llm else ""
261
+ )
262
+ return f'''\
263
+ """
264
+ pipeline.runner
265
+ ~~~~~~~~~~~~~~~
266
+ Orchestrates the full extract → transform → enrich → load sequence.
267
+
268
+ Run the full pipeline:
269
+ python -m pipeline.runner
270
+
271
+ Run individual stages:
272
+ python -m pipeline.extract
273
+ python -m pipeline.transform
274
+ python -m pipeline.enrich
275
+ python -m pipeline.load
276
+ """
277
+
278
+ from pipeline import extract, transform, enrich, load
279
+ from utils.logger import get_logger
280
+
281
+ log = get_logger(__name__)
282
+
283
+
284
+ def run(
285
+ source: str = "",
286
+ destination: str = "file",
287
+ {(" provider: str = 'openai'," if include_llm else "")}
288
+ ) -> dict:
289
+ log.info("pipeline: start")
290
+ summary = {{}}
291
+
292
+ summary["extract"] = extract.run(source=source)
293
+ summary["transform"] = transform.run()
294
+ {enrich_call} summary["load"] = load.run(destination=destination)
295
+
296
+ total_errors = sum(s.get("errors", 0) for s in summary.values())
297
+ log.info("pipeline: done — %d total errors", total_errors)
298
+ return summary
299
+
300
+
301
+ if __name__ == "__main__":
302
+ import json, sys
303
+ result = run()
304
+ print(json.dumps(result, indent=2))
305
+ sys.exit(0 if all(s.get("errors", 0) == 0 for s in result.values()) else 1)
306
+ '''
307
+
308
+
309
+ def _schemas_records() -> str:
310
+ return '''\
311
+ """
312
+ schemas.records
313
+ ~~~~~~~~~~~~~~~
314
+ Pydantic models for input and output records.
315
+
316
+ Define your data contracts here so every pipeline stage can import and
317
+ validate against them.
318
+ """
319
+
320
+ from typing import Optional
321
+ from pydantic import BaseModel
322
+
323
+
324
+ class InputRecord(BaseModel):
325
+ """Raw record as received from the source."""
326
+ id: str
327
+ raw_text: str
328
+
329
+
330
+ class OutputRecord(BaseModel):
331
+ """Enriched / transformed record written to the destination."""
332
+ id: str
333
+ processed_text: str
334
+ enriched_field: Optional[str] = None
335
+ '''
336
+
337
+
338
+ def _test_pipeline() -> str:
339
+ return '''\
340
+ """tests.test_pipeline — smoke tests for each stage."""
341
+
342
+ import pytest
343
+ from unittest.mock import patch
344
+
345
+
346
+ def test_extract_returns_summary():
347
+ from pipeline import extract
348
+ # patch out any I/O so the test stays offline
349
+ result = extract.run(source="")
350
+ assert "records_out" in result
351
+ assert "errors" in result
352
+
353
+
354
+ def test_transform_returns_summary():
355
+ from pipeline import transform
356
+ result = transform.run()
357
+ assert "records_out" in result
358
+
359
+
360
+ def test_load_returns_summary(tmp_path, monkeypatch):
361
+ import pipeline.load as load_mod
362
+ monkeypatch.setattr(load_mod, "OUTPUT_DIR", tmp_path / "output")
363
+ result = load_mod.run(destination="file")
364
+ assert "records_out" in result
365
+
366
+
367
+ def test_runner_returns_all_stages():
368
+ from pipeline.runner import run
369
+ result = run()
370
+ assert set(result.keys()) >= {"extract", "transform", "enrich", "load"}
371
+ '''
372
+
373
+
374
+ def _pipeline_pyproject(
375
+ project_name: str, version: str, description: str, author: str, include_llm: bool
376
+ ) -> str:
377
+ author_line = f' "{author}",' if author else ' # "Your Name <you@example.com>",'
378
+ llm_deps = """\
379
+ "openai",
380
+ "google-generativeai",
381
+ "tqdm",
382
+ """ if include_llm else ""
383
+ return f"""\
384
+ [project]
385
+ name = "{project_name}"
386
+ version = "{version}"
387
+ description = "{description}"
388
+ readme = "README.md"
389
+ requires-python = ">=3.10"
390
+ authors = [
391
+ {author_line}
392
+ ]
393
+
394
+ dependencies = [
395
+ "infrakit",
396
+ "pydantic>=2.0",
397
+ {llm_deps}]
398
+
399
+ [project.optional-dependencies]
400
+ dev = [
401
+ "pytest",
402
+ "pytest-cov",
403
+ ]
404
+ """
405
+
406
+
407
+ def _pipeline_readme(project_name: str, description: str, include_llm: bool) -> str:
408
+ title = project_name.replace("-", " ").replace("_", " ").title()
409
+ desc_line = f"\n{description}\n" if description else ""
410
+ llm_note = (
411
+ "\nIncludes an LLM enrichment step via `infrakit.llm`. "
412
+ "Set `OPENAI_API_KEY` or `GEMINI_API_KEY` to enable it.\n"
413
+ ) if include_llm else ""
414
+ return f"""\
415
+ # {title}
416
+ {desc_line}{llm_note}
417
+ ## Setup
418
+
419
+ ```bash
420
+ pip install -e .
421
+ ```
422
+
423
+ ## Run
424
+
425
+ ```bash
426
+ # full pipeline
427
+ python -m pipeline.runner
428
+
429
+ # individual stages
430
+ python -m pipeline.extract
431
+ python -m pipeline.transform
432
+ python -m pipeline.enrich
433
+ python -m pipeline.load
434
+ ```
435
+
436
+ ## Structure
437
+
438
+ | Path | Purpose |
439
+ |---|---|
440
+ | `pipeline/extract.py` | Pull records from source |
441
+ | `pipeline/transform.py` | Clean and reshape |
442
+ | `pipeline/enrich.py` | {"LLM enrichment" if include_llm else "Computed / derived fields"} |
443
+ | `pipeline/load.py` | Write to destination |
444
+ | `pipeline/runner.py` | Orchestrate all stages |
445
+ | `schemas/records.py` | Pydantic data contracts |
446
+ | `data/input/` | Raw source data (not committed) |
447
+ | `data/staging/` | Intermediate (not committed) |
448
+ | `data/output/` | Final output (not committed) |
449
+
450
+ ## Development
451
+
452
+ ```bash
453
+ pip install -e ".[dev]"
454
+ pytest
455
+ ```
456
+ """
457
+
458
+
459
+ def _pipeline_gitignore() -> str:
460
+ return _gitignore() + """\
461
+ # Pipeline data (never commit raw / staging / output data)
462
+ data/input/
463
+ data/staging/
464
+ data/output/
465
+
466
+ # Keys
467
+ .env
468
+ keys.json
469
+ """
470
+
471
+
472
+ # ── public API ────────────────────────────────────────────────────────────────
473
+
474
+
475
+ def scaffold_pipeline(
476
+ project_dir: Path,
477
+ *,
478
+ version: str = "0.1.0",
479
+ description: str = "",
480
+ author: str = "",
481
+ config_fmt: str = "env",
482
+ deps: str = "toml",
483
+ include_llm: bool = False,
484
+ ) -> ScaffoldResult:
485
+ """
486
+ Scaffold a data pipeline / ETL project under ``project_dir``.
487
+
488
+ Parameters
489
+ ----------
490
+ project_dir:
491
+ Root directory for the project.
492
+ version:
493
+ Starting version string.
494
+ description:
495
+ Short project description.
496
+ author:
497
+ Author string.
498
+ config_fmt:
499
+ Config file format — ``"env"``, ``"yaml"``, or ``"json"``.
500
+ deps:
501
+ ``"toml"`` or ``"requirements"``.
502
+ include_llm:
503
+ Whether to wire up an LLM enrichment step in the pipeline.
504
+ """
505
+ result = ScaffoldResult(project_dir=project_dir)
506
+ project_name = project_dir.name
507
+
508
+ # ── directories ───────────────────────────────────────────────────────────
509
+ _mkdir(result, project_dir)
510
+ _mkdir(result, project_dir / "src")
511
+ _mkdir(result, project_dir / "pipeline")
512
+ _mkdir(result, project_dir / "schemas")
513
+ _mkdir(result, project_dir / "data" / "input")
514
+ _mkdir(result, project_dir / "data" / "staging")
515
+ _mkdir(result, project_dir / "data" / "output")
516
+ _mkdir(result, project_dir / "utils")
517
+ _mkdir(result, project_dir / "tests")
518
+ _mkdir(result, project_dir / "logs")
519
+
520
+ # ── src ───────────────────────────────────────────────────────────────────
521
+ _write(result, project_dir / "src" / "__init__.py", _src_init(version))
522
+
523
+ # ── pipeline stages ───────────────────────────────────────────────────────
524
+ _write(result, project_dir / "pipeline" / "__init__.py", _pipeline_pkg_init())
525
+ _write(result, project_dir / "pipeline" / "extract.py", _extract())
526
+ _write(result, project_dir / "pipeline" / "transform.py", _transform())
527
+ _write(result, project_dir / "pipeline" / "enrich.py", _enrich(include_llm))
528
+ _write(result, project_dir / "pipeline" / "load.py", _load())
529
+ _write(result, project_dir / "pipeline" / "runner.py", _runner(include_llm))
530
+
531
+ # ── schemas ───────────────────────────────────────────────────────────────
532
+ _write(result, project_dir / "schemas" / "__init__.py", "")
533
+ _write(result, project_dir / "schemas" / "records.py", _schemas_records())
534
+
535
+ # ── utils ─────────────────────────────────────────────────────────────────
536
+ _write(result, project_dir / "utils" / "__init__.py", '"""Shared utilities."""\n')
537
+ _write(result, project_dir / "utils" / "logger.py", _logger_util())
538
+ if include_llm:
539
+ _write(result, project_dir / "utils" / "llm.py", _llm_util(project_name))
540
+
541
+ # ── tests ─────────────────────────────────────────────────────────────────
542
+ _write(result, project_dir / "tests" / "__init__.py", _tests_init())
543
+ _write(result, project_dir / "tests" / "test_pipeline.py", _test_pipeline())
544
+
545
+ # ── config ────────────────────────────────────────────────────────────────
546
+ cfg_name, cfg_content = _config_content(config_fmt)
547
+ _write(result, project_dir / cfg_name, cfg_content)
548
+
549
+ # ── dependency file ───────────────────────────────────────────────────────
550
+ if deps == "requirements":
551
+ _write(result, project_dir / "requirements.txt",
552
+ _requirements_txt(project_name))
553
+ else:
554
+ _write(result, project_dir / "pyproject.toml",
555
+ _pipeline_pyproject(project_name, version, description, author, include_llm))
556
+
557
+ # ── repo files ────────────────────────────────────────────────────────────
558
+ _write(result, project_dir / "README.md",
559
+ _pipeline_readme(project_name, description, include_llm))
560
+ _write(result, project_dir / ".gitignore", _pipeline_gitignore())
561
+
562
+ return result
@@ -0,0 +1,121 @@
1
+ """
2
+ infrakit.scaffolder.templates.registry
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
+ Central registry mapping template names to their scaffold functions.
5
+
6
+ Usage (programmatic)
7
+ ---------------------
8
+ from infrakit.scaffolder.templates.registry import get_template, list_templates
9
+
10
+ fn = get_template("ai")
11
+ fn(Path("my_project"), version="0.2.0", include_notebooks=True)
12
+
13
+ Usage (CLI)
14
+ -----------
15
+ ik init my_project --template backend
16
+ ik init my_project --template pipeline --include-llm
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from dataclasses import dataclass
22
+ from pathlib import Path
23
+ from typing import Callable
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class TemplateInfo:
28
+ name: str
29
+ description: str
30
+ fn: Callable
31
+ extra_flags: list[str] # optional flags supported by this template
32
+
33
+
34
+ def _load_registry() -> dict[str, TemplateInfo]:
35
+ # Imports are deferred so that only the SDK(s) required by the chosen
36
+ # template need to be installed (e.g. openai is not needed for cli_tool).
37
+ from infrakit.scaffolder.generator import scaffold_basic
38
+ from infrakit.scaffolder.templates.ai import scaffold_ai
39
+ from infrakit.scaffolder.templates.backend import scaffold_backend
40
+ from infrakit.scaffolder.templates.cli_tool import scaffold_cli_tool
41
+ from infrakit.scaffolder.templates.pipeline import scaffold_pipeline
42
+
43
+ entries = [
44
+ TemplateInfo(
45
+ name="basic",
46
+ description="Minimal project — src/, utils/, tests/, logger.",
47
+ fn=scaffold_basic,
48
+ extra_flags=[],
49
+ ),
50
+ TemplateInfo(
51
+ name="ai",
52
+ description=(
53
+ "AI / ML project — pipelines, data dirs, notebooks, "
54
+ "utils/llm.py, utils/logger.py, prompts/."
55
+ ),
56
+ fn=scaffold_ai,
57
+ extra_flags=["--include-notebooks"],
58
+ ),
59
+ TemplateInfo(
60
+ name="backend",
61
+ description=(
62
+ "FastAPI service — app/, routes/, services/, middleware/, "
63
+ "utils/llm.py, Dockerfile, docker-compose."
64
+ ),
65
+ fn=scaffold_backend,
66
+ extra_flags=["--include-llm / --no-include-llm"],
67
+ ),
68
+ TemplateInfo(
69
+ name="cli-tool",
70
+ description=(
71
+ "Distributable Typer CLI — src/<pkg>/cli/, commands/, "
72
+ "entry point wired via pyproject.toml."
73
+ ),
74
+ fn=scaffold_cli_tool,
75
+ extra_flags=["--include-llm / --no-include-llm"],
76
+ ),
77
+ TemplateInfo(
78
+ name="pipeline",
79
+ description=(
80
+ "Data pipeline / ETL — extract, transform, enrich, load stages, "
81
+ "schemas/, data dirs."
82
+ ),
83
+ fn=scaffold_pipeline,
84
+ extra_flags=["--include-llm / --no-include-llm"],
85
+ ),
86
+ ]
87
+ return {e.name: e for e in entries}
88
+
89
+
90
+ # module-level singleton
91
+ _REGISTRY: dict[str, TemplateInfo] | None = None
92
+
93
+
94
+ def _registry() -> dict[str, TemplateInfo]:
95
+ global _REGISTRY
96
+ if _REGISTRY is None:
97
+ _REGISTRY = _load_registry()
98
+ return _REGISTRY
99
+
100
+
101
+ def list_templates() -> list[TemplateInfo]:
102
+ """Return all registered templates in definition order."""
103
+ return list(_registry().values())
104
+
105
+
106
+ def get_template(name: str) -> Callable:
107
+ """
108
+ Return the scaffold function for *name*.
109
+
110
+ Raises
111
+ ------
112
+ ValueError
113
+ If *name* is not a known template.
114
+ """
115
+ reg = _registry()
116
+ if name not in reg:
117
+ available = ", ".join(f"'{k}'" for k in reg)
118
+ raise ValueError(
119
+ f"Unknown template '{name}'. Available: {available}"
120
+ )
121
+ return reg[name].fn