python-infrakit-dev 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. infrakit/__init__.py +0 -0
  2. infrakit/cli/__init__.py +1 -0
  3. infrakit/cli/commands/__init__.py +1 -0
  4. infrakit/cli/commands/deps.py +530 -0
  5. infrakit/cli/commands/init.py +129 -0
  6. infrakit/cli/commands/llm.py +295 -0
  7. infrakit/cli/commands/logger.py +160 -0
  8. infrakit/cli/commands/module.py +342 -0
  9. infrakit/cli/commands/time.py +81 -0
  10. infrakit/cli/main.py +65 -0
  11. infrakit/core/__init__.py +0 -0
  12. infrakit/core/config/__init__.py +0 -0
  13. infrakit/core/config/converter.py +480 -0
  14. infrakit/core/config/exporter.py +304 -0
  15. infrakit/core/config/loader.py +713 -0
  16. infrakit/core/config/validator.py +389 -0
  17. infrakit/core/logger/__init__.py +21 -0
  18. infrakit/core/logger/formatters.py +143 -0
  19. infrakit/core/logger/handlers.py +322 -0
  20. infrakit/core/logger/retention.py +176 -0
  21. infrakit/core/logger/setup.py +314 -0
  22. infrakit/deps/__init__.py +239 -0
  23. infrakit/deps/clean.py +141 -0
  24. infrakit/deps/depfile.py +405 -0
  25. infrakit/deps/health.py +357 -0
  26. infrakit/deps/optimizer.py +642 -0
  27. infrakit/deps/scanner.py +550 -0
  28. infrakit/llm/__init__.py +35 -0
  29. infrakit/llm/batch.py +165 -0
  30. infrakit/llm/client.py +575 -0
  31. infrakit/llm/key_manager.py +728 -0
  32. infrakit/llm/llm_readme.md +306 -0
  33. infrakit/llm/models.py +148 -0
  34. infrakit/llm/providers/__init__.py +5 -0
  35. infrakit/llm/providers/base.py +112 -0
  36. infrakit/llm/providers/gemini.py +164 -0
  37. infrakit/llm/providers/openai.py +168 -0
  38. infrakit/llm/rate_limiter.py +54 -0
  39. infrakit/scaffolder/__init__.py +31 -0
  40. infrakit/scaffolder/ai.py +508 -0
  41. infrakit/scaffolder/backend.py +555 -0
  42. infrakit/scaffolder/cli_tool.py +386 -0
  43. infrakit/scaffolder/generator.py +338 -0
  44. infrakit/scaffolder/pipeline.py +562 -0
  45. infrakit/scaffolder/registry.py +121 -0
  46. infrakit/time/__init__.py +60 -0
  47. infrakit/time/profiler.py +511 -0
  48. python_infrakit_dev-0.1.0.dist-info/METADATA +124 -0
  49. python_infrakit_dev-0.1.0.dist-info/RECORD +51 -0
  50. python_infrakit_dev-0.1.0.dist-info/WHEEL +4 -0
  51. python_infrakit_dev-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,508 @@
1
+ """
2
+ infrakit.scaffolder.templates.ai
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
+ Scaffold an AI / ML project.
5
+
6
+ Layout
7
+ ------
8
+ <project>/
9
+ ├── src/
10
+ │ └── __init__.py
11
+ ├── pipelines/ # data → feature → train → eval stages
12
+ │ └── __init__.py
13
+ ├── data/
14
+ │ ├── raw/ # original, immutable data
15
+ │ ├── processed/ # cleaned / feature-engineered
16
+ │ └── outputs/ # model artefacts, predictions
17
+ ├── notebooks/ # exploratory Jupyter notebooks
18
+ ├── utils/
19
+ │ ├── __init__.py
20
+ │ ├── logger.py # infrakit.logger boot (same pattern as basic)
21
+ │ └── llm.py # infrakit.llm boot — ready-to-import LLMClient
22
+ ├── prompts/ # .txt prompt templates kept out of code
23
+ │ └── default.txt
24
+ ├── tests/
25
+ │ └── __init__.py
26
+ ├── logs/
27
+ ├── pyproject.toml / requirements.txt
28
+ ├── config.{env|yaml|json}
29
+ ├── README.md
30
+ └── .gitignore
31
+ """
32
+
33
+ from __future__ import annotations
34
+
35
+ from pathlib import Path
36
+
37
+ from infrakit.scaffolder.generator import (
38
+ ScaffoldResult,
39
+ _mkdir,
40
+ _write,
41
+ _config_content,
42
+ _gitignore,
43
+ _logger_util,
44
+ _src_init,
45
+ _tests_init,
46
+ _pyproject_toml,
47
+ _requirements_txt,
48
+ )
49
+
50
+
51
+ # ── template content ──────────────────────────────────────────────────────────
52
+
53
+
54
+ def _llm_util(project_name: str) -> str:
55
+ return f'''\
56
+ """
57
+ utils.llm
58
+ ~~~~~~~~~
59
+ Thin wrapper that boots the infrakit LLM client once and exports it.
60
+
61
+ The client reads key state from ``~/.infrakit/llm/`` by default, and
62
+ loads quota limits from ``~/.infrakit/llm/quotas.json`` if that file
63
+ exists. Both paths can be overridden with environment variables.
64
+
65
+ Usage
66
+ -----
67
+ from utils.llm import llm, Prompt
68
+
69
+ response = llm.generate(Prompt(user="Summarise this text: ..."), provider="openai")
70
+ print(response.content)
71
+
72
+ # structured output
73
+ from pydantic import BaseModel
74
+
75
+ class Summary(BaseModel):
76
+ title: str
77
+ bullets: list[str]
78
+
79
+ response = llm.generate(
80
+ Prompt(system="Return only JSON.", user="Summarise: ..."),
81
+ provider="openai",
82
+ response_model=Summary,
83
+ )
84
+ if response.schema_matched:
85
+ print(response.parsed.bullets)
86
+
87
+ # async batch (inside an async function)
88
+ batch = await llm.async_batch_generate(prompts, provider="gemini")
89
+ """
90
+
91
+ import json
92
+ import os
93
+ from pathlib import Path
94
+
95
+ from infrakit.llm import LLMClient, Prompt # re-export Prompt for convenience
96
+
97
+ # ── key loading ───────────────────────────────────────────────────────────────
98
+ # Keys are read from the environment or from a local keys.json file.
99
+ # Never commit real API keys — use .env or your secret manager.
100
+
101
+ def _load_keys() -> dict:
102
+ keys_file = Path(os.getenv("LLM_KEYS_FILE", "keys.json"))
103
+ if keys_file.exists():
104
+ with open(keys_file) as f:
105
+ return json.load(f)
106
+
107
+ # fall back to individual env vars
108
+ openai_key = os.getenv("OPENAI_API_KEY", "")
109
+ gemini_key = os.getenv("GEMINI_API_KEY", "")
110
+ return {{
111
+ "openai_keys": [openai_key] if openai_key else [],
112
+ "gemini_keys": [gemini_key] if gemini_key else [],
113
+ }}
114
+
115
+
116
+ # ── client singleton ──────────────────────────────────────────────────────────
117
+
118
+ llm: LLMClient = LLMClient(
119
+ keys=_load_keys(),
120
+ # storage_dir and quota_file default to ~/.infrakit/llm/
121
+ # override with env vars if needed:
122
+ storage_dir=os.getenv("LLM_STATE_DIR") or None,
123
+ quota_file=os.getenv("LLM_QUOTA_FILE") or None,
124
+ mode=os.getenv("LLM_MODE", "async"), # "async" | "threaded"
125
+ max_concurrent=int(os.getenv("LLM_CONCURRENCY", "3")),
126
+ openai_model=os.getenv("OPENAI_MODEL") or None,
127
+ gemini_model=os.getenv("GEMINI_MODEL") or None,
128
+ )
129
+
130
+ __all__ = ["llm", "Prompt"]
131
+ '''
132
+
133
+
134
+ def _pipeline_init() -> str:
135
+ return '''\
136
+ """
137
+ pipelines
138
+ ~~~~~~~~~
139
+ Each module in this package is a self-contained stage.
140
+
141
+ Typical order:
142
+ ingest -> preprocess -> featurise -> train -> evaluate -> predict
143
+ """
144
+ '''
145
+
146
+
147
+ def _pipeline_ingest() -> str:
148
+ return '''\
149
+ """
150
+ pipelines.ingest
151
+ ~~~~~~~~~~~~~~~~
152
+ Load raw data from source(s) into data/raw/.
153
+ """
154
+
155
+ from pathlib import Path
156
+
157
+ from utils.logger import get_logger
158
+
159
+ log = get_logger(__name__)
160
+ RAW_DIR = Path("data/raw")
161
+
162
+
163
+ def run() -> None:
164
+ RAW_DIR.mkdir(parents=True, exist_ok=True)
165
+ log.info("ingest: starting")
166
+ # TODO: load your raw data here
167
+ log.info("ingest: done")
168
+
169
+
170
+ if __name__ == "__main__":
171
+ run()
172
+ '''
173
+
174
+
175
+ def _pipeline_preprocess() -> str:
176
+ return '''\
177
+ """
178
+ pipelines.preprocess
179
+ ~~~~~~~~~~~~~~~~~~~~
180
+ Clean and normalise raw data; write to data/processed/.
181
+ """
182
+
183
+ from pathlib import Path
184
+
185
+ from utils.logger import get_logger
186
+
187
+ log = get_logger(__name__)
188
+ RAW_DIR = Path("data/raw")
189
+ PROCESSED_DIR = Path("data/processed")
190
+
191
+
192
+ def run() -> None:
193
+ PROCESSED_DIR.mkdir(parents=True, exist_ok=True)
194
+ log.info("preprocess: starting")
195
+ # TODO: read from RAW_DIR, clean, write to PROCESSED_DIR
196
+ log.info("preprocess: done")
197
+
198
+
199
+ if __name__ == "__main__":
200
+ run()
201
+ '''
202
+
203
+
204
+ def _pipeline_predict() -> str:
205
+ return '''\
206
+ """
207
+ pipelines.predict
208
+ ~~~~~~~~~~~~~~~~~
209
+ Run inference and write outputs to data/outputs/.
210
+ """
211
+
212
+ from pathlib import Path
213
+
214
+ from utils.llm import llm, Prompt
215
+ from utils.logger import get_logger
216
+
217
+ log = get_logger(__name__)
218
+ OUTPUT_DIR = Path("data/outputs")
219
+
220
+
221
+ def run(inputs: list[str], provider: str = "openai") -> list[str]:
222
+ OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
223
+ log.info("predict: %d inputs", len(inputs))
224
+
225
+ prompts = [Prompt(user=text) for text in inputs]
226
+ batch = llm.batch_generate(prompts, provider=provider)
227
+
228
+ results = []
229
+ for i, r in enumerate(batch.results):
230
+ if r.error:
231
+ log.warning("predict: item %d failed — %s", i, r.error)
232
+ results.append("")
233
+ else:
234
+ results.append(r.content)
235
+
236
+ log.info(
237
+ "predict: done — %d ok, %d failed, %d tokens",
238
+ batch.success_count,
239
+ batch.failure_count,
240
+ batch.total_tokens,
241
+ )
242
+ return results
243
+
244
+
245
+ if __name__ == "__main__":
246
+ sample = ["Summarise the history of Python in one sentence."]
247
+ outputs = run(sample)
248
+ for o in outputs:
249
+ print(o)
250
+ '''
251
+
252
+
253
+ def _default_prompt() -> str:
254
+ return """\
255
+ You are a helpful AI assistant working on the {project} project.
256
+ Answer concisely and accurately.
257
+ If you are unsure, say so rather than guessing.
258
+ """
259
+
260
+
261
+ def _notebook_explore() -> str:
262
+ # Minimal valid Jupyter notebook (JSON format)
263
+ return '''{
264
+ "cells": [
265
+ {
266
+ "cell_type": "markdown",
267
+ "metadata": {},
268
+ "source": ["# Exploration\\n", "Initial data exploration notebook."]
269
+ },
270
+ {
271
+ "cell_type": "code",
272
+ "execution_count": null,
273
+ "metadata": {},
274
+ "outputs": [],
275
+ "source": [
276
+ "import sys\\n",
277
+ "sys.path.insert(0, \'..\')\\n",
278
+ "\\n",
279
+ "from utils.logger import get_logger\\n",
280
+ "from utils.llm import llm, Prompt\\n",
281
+ "\\n",
282
+ "log = get_logger(__name__)\\n",
283
+ "log.info(\'notebook ready\')"
284
+ ]
285
+ }
286
+ ],
287
+ "metadata": {
288
+ "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"},
289
+ "language_info": {"name": "python", "version": "3.10.0"}
290
+ },
291
+ "nbformat": 4,
292
+ "nbformat_minor": 5
293
+ }
294
+ '''
295
+
296
+
297
+ def _keys_json_template() -> str:
298
+ return """\
299
+ {
300
+ "_comment": "Fill in your API keys. Never commit this file — it is in .gitignore.",
301
+ "openai_keys": [],
302
+ "gemini_keys": []
303
+ }
304
+ """
305
+
306
+
307
+ def _ai_gitignore() -> str:
308
+ return _gitignore() + """\
309
+ # Data — keep raw data out of git
310
+ data/raw/
311
+ data/processed/
312
+ data/outputs/
313
+
314
+ # Model artefacts
315
+ *.pt
316
+ *.pth
317
+ *.ckpt
318
+ *.safetensors
319
+ *.onnx
320
+ *.pkl
321
+ *.joblib
322
+
323
+ # Notebooks checkpoints
324
+ .ipynb_checkpoints/
325
+
326
+ # Keys (never commit)
327
+ keys.json
328
+ .env
329
+ """
330
+
331
+
332
+ def _ai_readme(project_name: str, description: str) -> str:
333
+ title = project_name.replace("-", " ").replace("_", " ").title()
334
+ desc_line = f"\n{description}\n" if description else ""
335
+ return f"""\
336
+ # {title}
337
+ {desc_line}
338
+ ## Setup
339
+
340
+ ```bash
341
+ pip install -e .
342
+ ```
343
+
344
+ Copy and fill in your API keys:
345
+
346
+ ```bash
347
+ cp keys.json.template keys.json
348
+ # edit keys.json
349
+ ```
350
+
351
+ Optionally create `~/.infrakit/llm/quotas.json` to set per-model rate limits
352
+ (see `infrakit.llm` docs).
353
+
354
+ ## Structure
355
+
356
+ | Path | Purpose |
357
+ |---|---|
358
+ | `src/` | Core library code |
359
+ | `pipelines/` | Data → feature → train → eval → predict stages |
360
+ | `data/raw/` | Original immutable data (not committed) |
361
+ | `data/processed/` | Cleaned data (not committed) |
362
+ | `data/outputs/` | Model outputs / predictions (not committed) |
363
+ | `notebooks/` | Exploratory Jupyter notebooks |
364
+ | `utils/llm.py` | LLM client singleton — import and use directly |
365
+ | `utils/logger.py` | Logger singleton |
366
+ | `prompts/` | Prompt templates (plain text, version-controlled) |
367
+
368
+ ## Running a pipeline stage
369
+
370
+ ```bash
371
+ python -m pipelines.ingest
372
+ python -m pipelines.preprocess
373
+ python -m pipelines.predict
374
+ ```
375
+
376
+ ## Development
377
+
378
+ ```bash
379
+ pip install -e ".[dev]"
380
+ pytest
381
+ ```
382
+ """
383
+
384
+
385
+ def _ai_pyproject(project_name: str, version: str, description: str, author: str) -> str:
386
+ author_line = f' "{author}",' if author else ' # "Your Name <you@example.com>",'
387
+ return f"""\
388
+ [project]
389
+ name = "{project_name}"
390
+ version = "{version}"
391
+ description = "{description}"
392
+ readme = "README.md"
393
+ requires-python = ">=3.10"
394
+ authors = [
395
+ {author_line}
396
+ ]
397
+
398
+ dependencies = [
399
+ "infrakit",
400
+ "openai",
401
+ "google-genai",
402
+ "pydantic>=2.0",
403
+ "tqdm",
404
+ ]
405
+
406
+ [project.optional-dependencies]
407
+ dev = [
408
+ "pytest",
409
+ "pytest-cov",
410
+ "jupyter",
411
+ "ipykernel",
412
+ ]
413
+ """
414
+
415
+
416
+ # ── public API ────────────────────────────────────────────────────────────────
417
+
418
+
419
+ def scaffold_ai(
420
+ project_dir: Path,
421
+ *,
422
+ version: str = "0.1.0",
423
+ description: str = "",
424
+ author: str = "",
425
+ config_fmt: str = "env",
426
+ deps: str = "toml",
427
+ include_notebooks: bool = True,
428
+ ) -> ScaffoldResult:
429
+ """
430
+ Scaffold an AI / ML project layout under ``project_dir``.
431
+
432
+ Parameters
433
+ ----------
434
+ project_dir:
435
+ Root directory for the project (created if absent).
436
+ version:
437
+ Starting version string.
438
+ description:
439
+ Short project description.
440
+ author:
441
+ Author string.
442
+ config_fmt:
443
+ Config file format — ``"env"``, ``"yaml"``, or ``"json"``.
444
+ deps:
445
+ Dependency file style — ``"toml"`` or ``"requirements"``.
446
+ include_notebooks:
447
+ Whether to create the ``notebooks/`` directory with a starter notebook.
448
+ """
449
+ result = ScaffoldResult(project_dir=project_dir)
450
+ project_name = project_dir.name
451
+
452
+ # ── directories ───────────────────────────────────────────────────────────
453
+ _mkdir(result, project_dir)
454
+ _mkdir(result, project_dir / "src")
455
+ _mkdir(result, project_dir / "pipelines")
456
+ _mkdir(result, project_dir / "data" / "raw")
457
+ _mkdir(result, project_dir / "data" / "processed")
458
+ _mkdir(result, project_dir / "data" / "outputs")
459
+ _mkdir(result, project_dir / "utils")
460
+ _mkdir(result, project_dir / "prompts")
461
+ _mkdir(result, project_dir / "tests")
462
+ _mkdir(result, project_dir / "logs")
463
+
464
+ if include_notebooks:
465
+ _mkdir(result, project_dir / "notebooks")
466
+
467
+ # ── src ───────────────────────────────────────────────────────────────────
468
+ _write(result, project_dir / "src" / "__init__.py", _src_init(version))
469
+
470
+ # ── pipelines ─────────────────────────────────────────────────────────────
471
+ _write(result, project_dir / "pipelines" / "__init__.py", _pipeline_init())
472
+ _write(result, project_dir / "pipelines" / "ingest.py", _pipeline_ingest())
473
+ _write(result, project_dir / "pipelines" / "preprocess.py", _pipeline_preprocess())
474
+ _write(result, project_dir / "pipelines" / "predict.py", _pipeline_predict())
475
+
476
+ # ── utils ─────────────────────────────────────────────────────────────────
477
+ _write(result, project_dir / "utils" / "__init__.py", '"""Shared utilities."""\n')
478
+ _write(result, project_dir / "utils" / "logger.py", _logger_util())
479
+ _write(result, project_dir / "utils" / "llm.py", _llm_util(project_name))
480
+
481
+ # ── notebooks ─────────────────────────────────────────────────────────────
482
+ if include_notebooks:
483
+ _write(result, project_dir / "notebooks" / "01_explore.ipynb",
484
+ _notebook_explore())
485
+
486
+ # ── tests ─────────────────────────────────────────────────────────────────
487
+ _write(result, project_dir / "tests" / "__init__.py", _tests_init())
488
+
489
+ # ── config ────────────────────────────────────────────────────────────────
490
+ cfg_name, cfg_content = _config_content(config_fmt)
491
+ _write(result, project_dir / cfg_name, cfg_content)
492
+
493
+ # ── keys template (safe placeholder — never contains real keys) ───────────
494
+ _write(result, project_dir / "keys.json", _keys_json_template())
495
+
496
+ # ── dependency file ───────────────────────────────────────────────────────
497
+ if deps == "requirements":
498
+ _write(result, project_dir / "requirements.txt",
499
+ _requirements_txt(project_name))
500
+ else:
501
+ _write(result, project_dir / "pyproject.toml",
502
+ _ai_pyproject(project_name, version, description, author))
503
+
504
+ # ── repo files ────────────────────────────────────────────────────────────
505
+ _write(result, project_dir / "README.md", _ai_readme(project_name, description))
506
+ _write(result, project_dir / ".gitignore", _ai_gitignore())
507
+
508
+ return result