mikon 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. mikon/__init__.py +8 -0
  2. mikon/_runner.py +146 -0
  3. mikon/cli.py +220 -0
  4. mikon/sdk/__init__.py +40 -0
  5. mikon/sdk/config.py +7 -0
  6. mikon/sdk/context.py +163 -0
  7. mikon/sdk/datasets.py +177 -0
  8. mikon/sdk/job.py +94 -0
  9. mikon/sdk/module.py +325 -0
  10. mikon/server/__init__.py +1 -0
  11. mikon/server/api.py +450 -0
  12. mikon/server/app.py +131 -0
  13. mikon/server/discovery.py +412 -0
  14. mikon/server/docs.py +536 -0
  15. mikon/server/models.py +361 -0
  16. mikon/server/problems.py +32 -0
  17. mikon/server/registry.py +96 -0
  18. mikon/server/resources.py +709 -0
  19. mikon/server/runner.py +284 -0
  20. mikon/server/schema.py +51 -0
  21. mikon/server/settings.py +43 -0
  22. mikon/server/store.py +1050 -0
  23. mikon/templates/docs/USAGE-ja.md +924 -0
  24. mikon/templates/docs/USAGE.md +924 -0
  25. mikon/web/assets/KaTeX_AMS-Regular-BQhdFMY1.woff2 +0 -0
  26. mikon/web/assets/KaTeX_AMS-Regular-DMm9YOAa.woff +0 -0
  27. mikon/web/assets/KaTeX_AMS-Regular-DRggAlZN.ttf +0 -0
  28. mikon/web/assets/KaTeX_Caligraphic-Bold-ATXxdsX0.ttf +0 -0
  29. mikon/web/assets/KaTeX_Caligraphic-Bold-BEiXGLvX.woff +0 -0
  30. mikon/web/assets/KaTeX_Caligraphic-Bold-Dq_IR9rO.woff2 +0 -0
  31. mikon/web/assets/KaTeX_Caligraphic-Regular-CTRA-rTL.woff +0 -0
  32. mikon/web/assets/KaTeX_Caligraphic-Regular-Di6jR-x-.woff2 +0 -0
  33. mikon/web/assets/KaTeX_Caligraphic-Regular-wX97UBjC.ttf +0 -0
  34. mikon/web/assets/KaTeX_Fraktur-Bold-BdnERNNW.ttf +0 -0
  35. mikon/web/assets/KaTeX_Fraktur-Bold-BsDP51OF.woff +0 -0
  36. mikon/web/assets/KaTeX_Fraktur-Bold-CL6g_b3V.woff2 +0 -0
  37. mikon/web/assets/KaTeX_Fraktur-Regular-CB_wures.ttf +0 -0
  38. mikon/web/assets/KaTeX_Fraktur-Regular-CTYiF6lA.woff2 +0 -0
  39. mikon/web/assets/KaTeX_Fraktur-Regular-Dxdc4cR9.woff +0 -0
  40. mikon/web/assets/KaTeX_Main-Bold-Cx986IdX.woff2 +0 -0
  41. mikon/web/assets/KaTeX_Main-Bold-Jm3AIy58.woff +0 -0
  42. mikon/web/assets/KaTeX_Main-Bold-waoOVXN0.ttf +0 -0
  43. mikon/web/assets/KaTeX_Main-BoldItalic-DxDJ3AOS.woff2 +0 -0
  44. mikon/web/assets/KaTeX_Main-BoldItalic-DzxPMmG6.ttf +0 -0
  45. mikon/web/assets/KaTeX_Main-BoldItalic-SpSLRI95.woff +0 -0
  46. mikon/web/assets/KaTeX_Main-Italic-3WenGoN9.ttf +0 -0
  47. mikon/web/assets/KaTeX_Main-Italic-BMLOBm91.woff +0 -0
  48. mikon/web/assets/KaTeX_Main-Italic-NWA7e6Wa.woff2 +0 -0
  49. mikon/web/assets/KaTeX_Main-Regular-B22Nviop.woff2 +0 -0
  50. mikon/web/assets/KaTeX_Main-Regular-Dr94JaBh.woff +0 -0
  51. mikon/web/assets/KaTeX_Main-Regular-ypZvNtVU.ttf +0 -0
  52. mikon/web/assets/KaTeX_Math-BoldItalic-B3XSjfu4.ttf +0 -0
  53. mikon/web/assets/KaTeX_Math-BoldItalic-CZnvNsCZ.woff2 +0 -0
  54. mikon/web/assets/KaTeX_Math-BoldItalic-iY-2wyZ7.woff +0 -0
  55. mikon/web/assets/KaTeX_Math-Italic-DA0__PXp.woff +0 -0
  56. mikon/web/assets/KaTeX_Math-Italic-flOr_0UB.ttf +0 -0
  57. mikon/web/assets/KaTeX_Math-Italic-t53AETM-.woff2 +0 -0
  58. mikon/web/assets/KaTeX_SansSerif-Bold-CFMepnvq.ttf +0 -0
  59. mikon/web/assets/KaTeX_SansSerif-Bold-D1sUS0GD.woff2 +0 -0
  60. mikon/web/assets/KaTeX_SansSerif-Bold-DbIhKOiC.woff +0 -0
  61. mikon/web/assets/KaTeX_SansSerif-Italic-C3H0VqGB.woff2 +0 -0
  62. mikon/web/assets/KaTeX_SansSerif-Italic-DN2j7dab.woff +0 -0
  63. mikon/web/assets/KaTeX_SansSerif-Italic-YYjJ1zSn.ttf +0 -0
  64. mikon/web/assets/KaTeX_SansSerif-Regular-BNo7hRIc.ttf +0 -0
  65. mikon/web/assets/KaTeX_SansSerif-Regular-CS6fqUqJ.woff +0 -0
  66. mikon/web/assets/KaTeX_SansSerif-Regular-DDBCnlJ7.woff2 +0 -0
  67. mikon/web/assets/KaTeX_Script-Regular-C5JkGWo-.ttf +0 -0
  68. mikon/web/assets/KaTeX_Script-Regular-D3wIWfF6.woff2 +0 -0
  69. mikon/web/assets/KaTeX_Script-Regular-D5yQViql.woff +0 -0
  70. mikon/web/assets/KaTeX_Size1-Regular-C195tn64.woff +0 -0
  71. mikon/web/assets/KaTeX_Size1-Regular-Dbsnue_I.ttf +0 -0
  72. mikon/web/assets/KaTeX_Size1-Regular-mCD8mA8B.woff2 +0 -0
  73. mikon/web/assets/KaTeX_Size2-Regular-B7gKUWhC.ttf +0 -0
  74. mikon/web/assets/KaTeX_Size2-Regular-Dy4dx90m.woff2 +0 -0
  75. mikon/web/assets/KaTeX_Size2-Regular-oD1tc_U0.woff +0 -0
  76. mikon/web/assets/KaTeX_Size3-Regular-CTq5MqoE.woff +0 -0
  77. mikon/web/assets/KaTeX_Size3-Regular-DgpXs0kz.ttf +0 -0
  78. mikon/web/assets/KaTeX_Size4-Regular-BF-4gkZK.woff +0 -0
  79. mikon/web/assets/KaTeX_Size4-Regular-DWFBv043.ttf +0 -0
  80. mikon/web/assets/KaTeX_Size4-Regular-Dl5lxZxV.woff2 +0 -0
  81. mikon/web/assets/KaTeX_Typewriter-Regular-C0xS9mPB.woff +0 -0
  82. mikon/web/assets/KaTeX_Typewriter-Regular-CO6r4hn1.woff2 +0 -0
  83. mikon/web/assets/KaTeX_Typewriter-Regular-D3Ib7_Hf.ttf +0 -0
  84. mikon/web/assets/index-DVrEHM_2.css +2 -0
  85. mikon/web/assets/index-WIx9l7H5.js +372 -0
  86. mikon/web/index.html +13 -0
  87. mikon-0.0.1.dist-info/METADATA +138 -0
  88. mikon-0.0.1.dist-info/RECORD +91 -0
  89. mikon-0.0.1.dist-info/WHEEL +4 -0
  90. mikon-0.0.1.dist-info/entry_points.txt +2 -0
  91. mikon-0.0.1.dist-info/licenses/LICENSE +21 -0
mikon/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ from mikon.sdk.config import Config
2
+ from mikon.sdk.context import RunContext
3
+ from mikon.sdk import datasets
4
+ from mikon.sdk.datasets import DatasetContext, dataset
5
+ from mikon.sdk.job import job
6
+ from mikon.sdk.module import ModuleFactory, ModuleRef, module
7
+
8
+ __all__ = ["Config", "DatasetContext", "ModuleFactory", "ModuleRef", "RunContext", "dataset", "datasets", "job", "module"]
mikon/_runner.py ADDED
@@ -0,0 +1,146 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import signal
6
+ import sys
7
+ import threading
8
+ import time
9
+ import traceback
10
+ from pathlib import Path
11
+ from typing import TextIO
12
+
13
+ from mikon.sdk.datasets import DatasetContext, get_dataset_registry
14
+ from mikon.sdk.context import RunContext
15
+ from mikon.sdk.job import get_registry
16
+ from mikon.sdk.module import instantiate_config_modules, validate_module_nest_depth
17
+ from mikon.server.discovery import import_project
18
+ from mikon.server.models import RunStatus
19
+ from mikon.server.settings import load_settings
20
+ from mikon.server.store import Store
21
+
22
+
23
+ def main(argv: list[str] | None = None) -> int:
24
+ parser = argparse.ArgumentParser()
25
+ parser.add_argument("--run-dir", required=True)
26
+ args = parser.parse_args(argv)
27
+ run_dir = Path(args.run_dir).resolve()
28
+ store = Store(run_dir.parents[1])
29
+ meta = store.read_json(run_dir / "meta.json")
30
+ run_id = meta["run_id"]
31
+
32
+ stop_event = threading.Event()
33
+ heartbeat = threading.Thread(
34
+ target=_heartbeat_loop, args=(run_dir, stop_event), name="mikon-heartbeat", daemon=True
35
+ )
36
+ heartbeat.start()
37
+
38
+ def _handle_signal(signum: int, frame: object) -> None:
39
+ raise KeyboardInterrupt(f"received signal {signum}")
40
+
41
+ signal.signal(signal.SIGTERM, _handle_signal)
42
+ signal.signal(signal.SIGINT, _handle_signal)
43
+
44
+ status = RunStatus.completed
45
+ exit_code = 0
46
+ error: str | None = None
47
+ event_logger = _LogEventWriter(run_dir / "logs" / "events.jsonl")
48
+ original_stdout = sys.stdout
49
+ original_stderr = sys.stderr
50
+ sys.stdout = _TeeLineWriter(original_stdout, event_logger, "stdout")
51
+ sys.stderr = _TeeLineWriter(original_stderr, event_logger, "stderr")
52
+ try:
53
+ project_root = Path(meta["project_root"]).resolve()
54
+ watch_paths = [Path(item).resolve() for item in meta["watch"]]
55
+ import_project(project_root, watch_paths)
56
+ kind = meta.get("kind", "job")
57
+ if kind == "dataset":
58
+ definition = get_dataset_registry().get(meta["job"])
59
+ else:
60
+ definition = get_registry().get(meta["job"])
61
+ if definition is None:
62
+ raise RuntimeError(f"{kind} not found during run: {meta['job']}")
63
+ config_data = store.read_json(run_dir / "config.json")
64
+ settings = load_settings(project_root)
65
+ validate_module_nest_depth(config_data, settings.max_module_nest_depth)
66
+ config = definition.config_type.model_validate(config_data)
67
+ validate_module_nest_depth(config.model_dump(mode="json"), settings.max_module_nest_depth)
68
+ instantiate_config_modules(config, max_depth=settings.max_module_nest_depth)
69
+ ctx = DatasetContext(run_dir, meta["job"]) if kind == "dataset" else RunContext(run_dir)
70
+ definition.func(config, ctx)
71
+ except KeyboardInterrupt as exc:
72
+ status = RunStatus.stopped
73
+ exit_code = 143
74
+ error = str(exc)
75
+ except BaseException:
76
+ status = RunStatus.failed
77
+ exit_code = 1
78
+ error = traceback.format_exc()
79
+ finally:
80
+ sys.stdout = original_stdout
81
+ sys.stderr = original_stderr
82
+ event_logger.flush()
83
+ stop_event.set()
84
+ heartbeat.join(timeout=1)
85
+ store.write_status(run_id, status, exit_code, error)
86
+ return exit_code
87
+
88
+
89
+ def _heartbeat_loop(run_dir: Path, stop_event: threading.Event) -> None:
90
+ heartbeat = run_dir / "heartbeat"
91
+ while not stop_event.is_set():
92
+ heartbeat.write_text(str(time.time()), encoding="utf-8")
93
+ stop_event.wait(2)
94
+
95
+
96
+ class _LogEventWriter:
97
+ def __init__(self, path: Path) -> None:
98
+ self.path = path
99
+ self.path.parent.mkdir(parents=True, exist_ok=True)
100
+ self._lock = threading.Lock()
101
+ self._seq = 0
102
+
103
+ def write_line(self, stream: str, line: str) -> None:
104
+ with self._lock:
105
+ record = {"seq": self._seq, "t": time.time(), "stream": stream, "line": line}
106
+ self._seq += 1
107
+ with self.path.open("a", encoding="utf-8") as fp:
108
+ fp.write(json.dumps(record, separators=(",", ":"), allow_nan=False) + "\n")
109
+ fp.flush()
110
+
111
+ def flush(self) -> None:
112
+ return
113
+
114
+
115
+ class _TeeLineWriter:
116
+ def __init__(self, target: TextIO, event_logger: _LogEventWriter, stream: str) -> None:
117
+ self.target = target
118
+ self.event_logger = event_logger
119
+ self.stream = stream
120
+ self._buffer = ""
121
+
122
+ def write(self, text: str) -> int:
123
+ written = self.target.write(text)
124
+ self.target.flush()
125
+ self._buffer += text
126
+ while "\n" in self._buffer:
127
+ line, self._buffer = self._buffer.split("\n", 1)
128
+ self.event_logger.write_line(self.stream, line)
129
+ return written
130
+
131
+ def flush(self) -> None:
132
+ self.target.flush()
133
+ if self._buffer:
134
+ self.event_logger.write_line(self.stream, self._buffer)
135
+ self._buffer = ""
136
+
137
+ def isatty(self) -> bool:
138
+ return self.target.isatty()
139
+
140
+ @property
141
+ def encoding(self) -> str | None:
142
+ return self.target.encoding
143
+
144
+
145
+ if __name__ == "__main__":
146
+ raise SystemExit(main())
mikon/cli.py ADDED
@@ -0,0 +1,220 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Annotated, Any
6
+
7
+ import httpx
8
+ import typer
9
+ import uvicorn
10
+
11
+ from mikon.server.app import create_app
12
+ from mikon.server.resources import ResourceMonitor
13
+ from mikon.server.settings import load_settings
14
+
15
+
16
+ app = typer.Typer(no_args_is_help=True)
17
+ dataset_app = typer.Typer(no_args_is_help=True)
18
+ app.add_typer(dataset_app, name="dataset")
19
+
20
+ _TEMPLATES_DIR = Path(__file__).parent / "templates"
21
+
22
+
23
+ @app.command()
24
+ def init(
25
+ force: Annotated[bool, typer.Option("--force", help="Overwrite existing files.")] = False,
26
+ ) -> None:
27
+ root = Path.cwd()
28
+ _write_template(root / "mikon.toml", MIKON_TOML, force)
29
+ _write_template(root / "src" / "example.py", EXAMPLE_JOB, force)
30
+ for fname in ("USAGE.md", "USAGE-ja.md"):
31
+ src = _TEMPLATES_DIR / "docs" / fname
32
+ if src.exists():
33
+ _write_template(root / "docs" / fname, src.read_text(encoding="utf-8"), force)
34
+ typer.echo("Initialized mikon project.")
35
+
36
+
37
+ @app.command()
38
+ def serve(
39
+ host: Annotated[str, typer.Option()] = "127.0.0.1",
40
+ port: Annotated[int, typer.Option()] = 8000,
41
+ token: Annotated[str | None, typer.Option()] = None,
42
+ ) -> None:
43
+ if host not in {"127.0.0.1", "localhost"} and not token:
44
+ raise typer.BadParameter("--token is required when binding outside localhost")
45
+ uvicorn.run(create_app(token=token), host=host, port=port)
46
+
47
+
48
+ @app.command("run")
49
+ def run_job(
50
+ job: str,
51
+ gpu: Annotated[str, typer.Option("--gpu", help="Comma-separated unified GPU ids.")],
52
+ config: Annotated[Path | None, typer.Option("--config", exists=True, dir_okay=False)] = None,
53
+ set_values: Annotated[list[str] | None, typer.Option("--set", help="Override config value as key=value. Dotted keys are supported.")] = None,
54
+ force: Annotated[bool, typer.Option("--force")] = False,
55
+ server: Annotated[str, typer.Option("--server")] = "http://127.0.0.1:8000",
56
+ ) -> None:
57
+ config_data = _load_config(config, set_values)
58
+ response = httpx.post(
59
+ f"{server.rstrip('/')}/api/runs",
60
+ json={"job": job, "config": config_data, "gpus": _split_csv(gpu), "force": force},
61
+ timeout=30,
62
+ )
63
+ _raise_for_problem(response)
64
+ typer.echo(response.text)
65
+
66
+
67
+ @app.command()
68
+ def stop(
69
+ run_id: str,
70
+ server: Annotated[str, typer.Option("--server")] = "http://127.0.0.1:8000",
71
+ ) -> None:
72
+ response = httpx.post(f"{server.rstrip('/')}/api/runs/{run_id}/stop", timeout=30)
73
+ _raise_for_problem(response)
74
+ typer.echo(response.text)
75
+
76
+
77
+ @app.command()
78
+ def doctor() -> None:
79
+ settings = load_settings()
80
+ diagnostics = ResourceMonitor(settings).diagnostics()
81
+ typer.echo(diagnostics.model_dump_json(indent=2))
82
+
83
+
84
+ @dataset_app.command("register")
85
+ def dataset_register(
86
+ name: str,
87
+ path: Path,
88
+ description: Annotated[str | None, typer.Option("--description")] = None,
89
+ server: Annotated[str, typer.Option("--server")] = "http://127.0.0.1:8000",
90
+ ) -> None:
91
+ response = httpx.post(
92
+ f"{server.rstrip('/')}/api/datasets",
93
+ json={"name": name, "path": str(path), "description": description},
94
+ timeout=30,
95
+ )
96
+ _raise_for_problem(response)
97
+ typer.echo(response.text)
98
+
99
+
100
+ @dataset_app.command("build")
101
+ def dataset_build(
102
+ name: str,
103
+ config: Annotated[Path | None, typer.Option("--config", exists=True, dir_okay=False)] = None,
104
+ set_values: Annotated[list[str] | None, typer.Option("--set", help="Override config value as key=value. Dotted keys are supported.")] = None,
105
+ gpu: Annotated[str | None, typer.Option("--gpu", help="Comma-separated unified GPU ids.")] = None,
106
+ force: Annotated[bool, typer.Option("--force")] = False,
107
+ server: Annotated[str, typer.Option("--server")] = "http://127.0.0.1:8000",
108
+ ) -> None:
109
+ response = httpx.post(
110
+ f"{server.rstrip('/')}/api/datasets/{name}/build",
111
+ json={"config": _load_config(config, set_values), "gpus": _split_csv(gpu or ""), "force": force},
112
+ timeout=30,
113
+ )
114
+ _raise_for_problem(response)
115
+ typer.echo(response.text)
116
+
117
+
118
+ def _write_template(path: Path, content: str, force: bool) -> None:
119
+ if path.exists() and not force:
120
+ typer.echo(f"Skipped existing {path}")
121
+ return
122
+ path.parent.mkdir(parents=True, exist_ok=True)
123
+ path.write_text(content, encoding="utf-8")
124
+ typer.echo(f"Wrote {path}")
125
+
126
+
127
+ def _split_csv(value: str) -> list[str]:
128
+ return [item.strip() for item in value.split(",") if item.strip()]
129
+
130
+
131
+ def _load_config(config: Path | None, set_values: list[str] | None) -> dict[str, Any]:
132
+ config_data = json.loads(config.read_text(encoding="utf-8")) if config else {}
133
+ if not isinstance(config_data, dict):
134
+ raise typer.BadParameter("--config must contain a JSON object")
135
+ for item in set_values or []:
136
+ _apply_override(config_data, item)
137
+ return config_data
138
+
139
+
140
+ def _apply_override(config: dict[str, Any], item: str) -> None:
141
+ if "=" not in item:
142
+ raise typer.BadParameter("--set values must use key=value")
143
+ key, raw_value = item.split("=", 1)
144
+ if not key:
145
+ raise typer.BadParameter("--set key must not be empty")
146
+ try:
147
+ value: Any = json.loads(raw_value)
148
+ except json.JSONDecodeError:
149
+ value = raw_value
150
+ target = config
151
+ parts = key.split(".")
152
+ for part in parts[:-1]:
153
+ if not part:
154
+ raise typer.BadParameter("--set dotted keys must not contain empty segments")
155
+ existing = target.get(part)
156
+ if existing is None:
157
+ existing = {}
158
+ target[part] = existing
159
+ if not isinstance(existing, dict):
160
+ raise typer.BadParameter(f"--set cannot assign nested key under non-object: {part}")
161
+ target = existing
162
+ if not parts[-1]:
163
+ raise typer.BadParameter("--set key must not end with dot")
164
+ target[parts[-1]] = value
165
+
166
+
167
+ def _raise_for_problem(response: httpx.Response) -> None:
168
+ if response.status_code < 400:
169
+ return
170
+ try:
171
+ problem = response.json()
172
+ detail = problem.get("detail") or problem.get("title") or response.text
173
+ except Exception:
174
+ detail = response.text
175
+ typer.echo(f"Request failed ({response.status_code}): {detail}", err=True)
176
+ raise typer.Exit(1)
177
+
178
+
179
+ MIKON_TOML = """[mikon]
180
+ watch = ["src"]
181
+ store = ".mikon"
182
+
183
+ [gpu]
184
+ occupancy_mem_mb = 500
185
+ occupancy_util = 5
186
+
187
+ [modules]
188
+ max_nest_depth = 8
189
+
190
+ [docs]
191
+ root = "docs"
192
+ """
193
+
194
+
195
+ EXAMPLE_JOB = '''import time
196
+ from typing import Literal
197
+
198
+ import mikon
199
+ from mikon import Config, RunContext
200
+ from pydantic import Field
201
+
202
+
203
+ class ExampleConfig(Config):
204
+ lr: float = Field(1e-3, gt=0, le=1)
205
+ epochs: int = Field(5, ge=1, le=100)
206
+ optimizer: Literal["adam", "sgd"] = "adam"
207
+
208
+
209
+ @mikon.job
210
+ def example(config: ExampleConfig, ctx: RunContext) -> None:
211
+ for epoch in range(config.epochs):
212
+ loss = 1.0 / (epoch + 1)
213
+ print(f"epoch={epoch} loss={loss}", flush=True)
214
+ ctx.log_metric("loss", loss, step=epoch)
215
+ time.sleep(0.5)
216
+
217
+ artifact = ctx.artifacts_dir / "result.txt"
218
+ artifact.write_text(f"optimizer={config.optimizer}\\n", encoding="utf-8")
219
+ ctx.log_artifact("result.txt", artifact)
220
+ '''
mikon/sdk/__init__.py ADDED
@@ -0,0 +1,40 @@
1
+ from mikon.sdk.config import Config
2
+ from mikon.sdk.context import RunContext
3
+ from mikon.sdk import datasets
4
+ from mikon.sdk.datasets import (
5
+ DatasetContext,
6
+ DatasetDefinition,
7
+ clear_dataset_registry,
8
+ dataset,
9
+ get_dataset_registry,
10
+ )
11
+ from mikon.sdk.job import JobDefinition, clear_registry, get_registry, job
12
+ from mikon.sdk.module import (
13
+ ModuleDefinition,
14
+ ModuleFactory,
15
+ ModuleRef,
16
+ clear_module_registry,
17
+ get_module_registry,
18
+ module,
19
+ )
20
+
21
+ __all__ = [
22
+ "Config",
23
+ "DatasetContext",
24
+ "DatasetDefinition",
25
+ "RunContext",
26
+ "JobDefinition",
27
+ "ModuleDefinition",
28
+ "ModuleFactory",
29
+ "ModuleRef",
30
+ "clear_dataset_registry",
31
+ "clear_module_registry",
32
+ "clear_registry",
33
+ "dataset",
34
+ "datasets",
35
+ "get_dataset_registry",
36
+ "get_module_registry",
37
+ "get_registry",
38
+ "job",
39
+ "module",
40
+ ]
mikon/sdk/config.py ADDED
@@ -0,0 +1,7 @@
1
+ from pydantic import BaseModel, ConfigDict
2
+
3
+
4
+ class Config(BaseModel):
5
+ """Base class for job configuration schemas."""
6
+
7
+ model_config = ConfigDict(extra="forbid")
mikon/sdk/context.py ADDED
@@ -0,0 +1,163 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import math
5
+ import os
6
+ import re
7
+ import shutil
8
+ import threading
9
+ import time
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+
14
+ class RunContext:
15
+ """Runtime handle injected into mikon jobs."""
16
+
17
+ def __init__(self, run_dir: str | os.PathLike[str] | None = None) -> None:
18
+ raw_run_dir = run_dir or os.environ.get("MIKON_RUN_DIR")
19
+ if not raw_run_dir:
20
+ raise RuntimeError("RunContext requires run_dir or MIKON_RUN_DIR")
21
+ self.run_dir = Path(raw_run_dir)
22
+ self.run_dir.mkdir(parents=True, exist_ok=True)
23
+ self.artifacts_dir = self.run_dir / "artifacts"
24
+ self.artifacts_dir.mkdir(parents=True, exist_ok=True)
25
+ self._metric_lock = threading.Lock()
26
+ self._artifact_lock = threading.Lock()
27
+ self._input_lock = threading.Lock()
28
+
29
+ def log_metric(self, name: str, value: int | float, step: int | None = None) -> None:
30
+ if not isinstance(name, str) or not name.strip():
31
+ raise ValueError("metric name must be a non-empty string")
32
+ if isinstance(value, bool) or not isinstance(value, (int, float)):
33
+ raise TypeError("metric value must be int or float, but not bool")
34
+ if not math.isfinite(float(value)):
35
+ raise ValueError("metric value must be finite")
36
+ record: dict[str, Any] = {
37
+ "t": time.time(),
38
+ "step": step,
39
+ "name": name,
40
+ "value": float(value),
41
+ }
42
+ metrics_path = self.run_dir / "metrics.jsonl"
43
+ with self._metric_lock:
44
+ with metrics_path.open("a", encoding="utf-8") as fp:
45
+ fp.write(json.dumps(record, separators=(",", ":"), allow_nan=False) + "\n")
46
+ fp.flush()
47
+
48
+ def log_artifact(self, name: str, path: str | os.PathLike[str]) -> Path:
49
+ source = Path(path)
50
+ if not source.exists():
51
+ raise FileNotFoundError(source)
52
+
53
+ artifact_path = _validate_artifact_name(name)
54
+ destination = (self.artifacts_dir / artifact_path).resolve()
55
+ artifacts_root = self.artifacts_dir.resolve()
56
+ if not _is_relative_to(destination, artifacts_root):
57
+ raise ValueError("artifact name must stay within artifacts_dir")
58
+ destination.parent.mkdir(parents=True, exist_ok=True)
59
+ if source.resolve() != destination.resolve():
60
+ if source.is_dir():
61
+ if destination.exists():
62
+ shutil.rmtree(destination)
63
+ shutil.copytree(source, destination)
64
+ else:
65
+ shutil.copy2(source, destination)
66
+
67
+ record = {
68
+ "t": time.time(),
69
+ "name": name,
70
+ "path": str(destination.relative_to(self.artifacts_dir)),
71
+ "size": _path_size(destination),
72
+ }
73
+ with self._artifact_lock:
74
+ with (self.run_dir / "artifacts.jsonl").open("a", encoding="utf-8") as fp:
75
+ fp.write(json.dumps(record, separators=(",", ":"), allow_nan=False) + "\n")
76
+ fp.flush()
77
+ return destination
78
+
79
+ def use_dataset(self, name: str) -> Path:
80
+ dataset_name = _validate_name(name, "dataset")
81
+ meta_path = _store_root(self.run_dir) / "datasets" / dataset_name / "meta.json"
82
+ if not meta_path.exists():
83
+ raise FileNotFoundError(f"dataset not registered: {dataset_name}")
84
+ meta = json.loads(meta_path.read_text(encoding="utf-8"))
85
+ path = Path(meta["path"]).resolve()
86
+ if not path.exists():
87
+ raise FileNotFoundError(path)
88
+ self._log_input(
89
+ {
90
+ "type": "uses-dataset",
91
+ "dataset": dataset_name,
92
+ "path": str(path),
93
+ }
94
+ )
95
+ return path
96
+
97
+ def use_artifact(self, run_id: str, name: str) -> Path:
98
+ source_run_id = _validate_run_id(run_id)
99
+ artifact_path = _validate_artifact_name(name)
100
+ store_root = _store_root(self.run_dir)
101
+ artifacts_root = (store_root / "runs" / source_run_id / "artifacts").resolve()
102
+ path = (artifacts_root / artifact_path).resolve()
103
+ if not _is_relative_to(path, artifacts_root) or not path.exists():
104
+ raise FileNotFoundError(path)
105
+ self._log_input(
106
+ {
107
+ "type": "consumes-artifact",
108
+ "run_id": source_run_id,
109
+ "artifact": str(artifact_path),
110
+ "path": str(path),
111
+ }
112
+ )
113
+ return path
114
+
115
+ def _log_input(self, record: dict[str, Any]) -> None:
116
+ payload = {"t": time.time(), **record}
117
+ with self._input_lock:
118
+ with (self.run_dir / "inputs.jsonl").open("a", encoding="utf-8") as fp:
119
+ fp.write(json.dumps(payload, separators=(",", ":"), allow_nan=False) + "\n")
120
+ fp.flush()
121
+
122
+
123
+ def _path_size(path: Path) -> int:
124
+ if path.is_file():
125
+ return path.stat().st_size
126
+ return sum(child.stat().st_size for child in path.rglob("*") if child.is_file())
127
+
128
+
129
+ def _validate_artifact_name(name: str) -> Path:
130
+ if not isinstance(name, str) or not name.strip():
131
+ raise ValueError("artifact name must be a non-empty relative path")
132
+ path = Path(name)
133
+ if path.is_absolute() or any(part in {"", ".", ".."} for part in path.parts):
134
+ raise ValueError("artifact name must be a safe relative path")
135
+ return path
136
+
137
+
138
+ def _validate_name(name: str, label: str) -> str:
139
+ if not isinstance(name, str) or not re.fullmatch(r"[A-Za-z0-9_.-]+", name):
140
+ raise ValueError(f"{label} name must contain only letters, digits, underscore, dot, and hyphen")
141
+ return name
142
+
143
+
144
+ def _validate_run_id(run_id: str) -> str:
145
+ if not isinstance(run_id, str) or run_id in {".", ".."} or not re.fullmatch(r"[A-Za-z0-9_.-]+", run_id):
146
+ raise ValueError("run_id must be a safe path segment")
147
+ return run_id
148
+
149
+
150
+ def _store_root(run_dir: Path) -> Path:
151
+ if os.environ.get("MIKON_STORE"):
152
+ return Path(os.environ["MIKON_STORE"]).resolve()
153
+ if run_dir.parent.name == "runs":
154
+ return run_dir.parent.parent
155
+ return run_dir
156
+
157
+
158
+ def _is_relative_to(path: Path, root: Path) -> bool:
159
+ try:
160
+ path.relative_to(root)
161
+ return True
162
+ except ValueError:
163
+ return False