multimodal-mllog 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.4
2
+ Name: multimodal-mllog
3
+ Version: 0.1.0
4
+ Summary: Experiment logbook pipeline: MLflow runs + git context -> structured logbook entries
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: mlflow
7
+ Requires-Dist: pydantic>=2
8
+ Requires-Dist: click
9
+ Provides-Extra: synthesis
10
+ Requires-Dist: anthropic; extra == "synthesis"
@@ -0,0 +1,96 @@
1
+ # multimodal_mllogger
2
+
3
+ Experiment Logbook Pipeline for a researcher finetuning an ML model using coding agents.
4
+
5
+ A researcher finetunes a model inside Claude Code; the training script logs to MLflow. At
6
+ session end, `/mllog` generates a structured, validated **logbook entry** — what was tried,
7
+ what changed in the code, what the metrics were, what likely happened, next steps — and
8
+ stores it back into MLflow as artifacts and tags.
9
+
10
+ ```
11
+ researcher → Claude Code → MLflow → logbook entry (triggered by /mllog at session end)
12
+ ```
13
+
14
+ Full design spec: [BUILD.md](BUILD.md).
15
+
16
+ ## Quick start
17
+
18
+ ```bash
19
+ python -m venv .venv
20
+ .venv/Scripts/activate # Windows; on Unix: source .venv/bin/activate
21
+ pip install -e .
22
+ pip install pytest # for the test suite
23
+
24
+ python examples/finetune_demo.py # demo run: 4 params, 2 metrics
25
+ mlflow ui --backend-store-uri sqlite:///mlflow.db # optional: inspect runs
26
+ ```
27
+
28
+ Then, in a Claude Code session opened in this repo, run **`/mllog`**. It collects ground
29
+ truth via the CLI, fills in the logbook from the live session, and ingests it.
30
+
31
+ ## Storage (v0.1)
32
+
33
+ - Tracking backend: **local SQLite**, default `MLFLOW_TRACKING_URI=sqlite:///mlflow.db`
34
+ (serverless; override via the env var). Run artifacts live under `./mlruns/`.
35
+ - `logbook.json` is canonical; `logbook.md` is a deterministic rendering of it — never
36
+ edited or parsed.
37
+
38
+ ## CLI
39
+
40
+ ```bash
41
+ mllog get-mlflow-info [--run-id ID | --latest] [--experiment NAME] --json
42
+ # params, metrics, tags, artifacts, UTC start/end of one MLflow run; exits 1 if no run
43
+
44
+ mllog get-git-info [--base REF] --json
45
+ # working-tree diff vs HEAD, recent commits (UTC ISO 8601), HEAD sha
46
+
47
+ mllog create-logbook <logbook.json> [--run-id ID | --latest]
48
+ # validate against the schema, render logbook.md, attach both as artifacts,
49
+ # set logbook.* tags; exits 1 on validation failure or missing run
50
+ ```
51
+
52
+ ## Schema and guardrails
53
+
54
+ `mllog/schema.py` defines the versioned `LogbookEntry` (pydantic v2). Validators enforce:
55
+
56
+ 1. **Grounding** — `hypothesis`, `likely_cause`, and `next_experiment` must each have a key
57
+ in `evidence`.
58
+ 2. **Metric references resolve** — every `metric:<name>` evidence value must name a metric
59
+ present in `metrics`.
60
+ 3. **Timestamps** — `date` must be a full ISO 8601 UTC timestamp.
61
+
62
+ Evidence values are typed references: `metric:val_acc`, `diff:planner.py@@-120,6`,
63
+ `session:user asked to lower lr` (`video:` / `telemetry:` reserved for later milestones).
64
+ The pipeline never fabricates data: no MLflow run or no git repo means a loud non-zero exit.
65
+
66
+ ## Layout
67
+
68
+ ```
69
+ mllog/
70
+ ├── cli.py # `mllog` entrypoint
71
+ ├── schema.py # versioned LogbookEntry + Metric (the contract)
72
+ ├── mlflow_io.py # collector: read run; writer: artifacts + tags
73
+ ├── git_context.py # collector: diff + recent commits
74
+ ├── render.py # LogbookEntry -> markdown (deterministic projection)
75
+ └── synthesis/ # empty in v0; M2 LLM layer (pip install mllog[synthesis])
76
+ .claude/commands/mllog.md # the /mllog command (thin agent wrapper)
77
+ examples/finetune_demo.py # tiny demo finetune that logs to MLflow
78
+ tests/ # schema, render (golden file), mlflow_io
79
+ ```
80
+
81
+ ## Tests
82
+
83
+ ```bash
84
+ python -m pytest -q
85
+ ```
86
+
87
+ The deterministic core is tested without any LLM. `tests/test_render.py` includes a
88
+ golden-file test (`tests/golden/logbook.md`) locking the markdown output; if a render
89
+ change is intentional, regenerate the golden file from the `valid_entry` fixture.
90
+
91
+ ## Roadmap
92
+
93
+ - **M1 (done):** `run-info`, `git-context`, `ingest`, `/mllog` — full loop on the demo.
94
+ - **M2:** `mllog.synthesis` (Anthropic API, optional extra), `mllog logbook`,
95
+ session-id run selection, multi-run sessions (`related_run_ids`).
96
+ - **M3:** video/telemetry collectors, time alignment, object-storage artifacts.
@@ -0,0 +1,6 @@
1
+ """mllog: deterministic experiment-logbook core (MLflow + git -> logbook)."""
2
+
3
+ from mllog.schema import LogbookEntry, Metric
4
+
5
+ __all__ = ["LogbookEntry", "Metric"]
6
+ __version__ = "0.1.0"
@@ -0,0 +1,97 @@
1
+ """mllog CLI. Fails loudly: non-zero exit when a source is absent or invalid."""
2
+
3
+ import json
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ import click
8
+ from pydantic import ValidationError
9
+
10
+
11
+
12
+ @click.group()
13
+ def main() -> None:
14
+ """Experiment logbook tools: MLflow + git -> validated logbook entries."""
15
+
16
+
17
+ @main.command("get-mlflow-info")
18
+ @click.option("--run-id", "run_id", default=None, help="Specific MLflow run id.")
19
+ @click.option("--latest", is_flag=True, help="Most recently started run in the experiment.")
20
+ @click.option("--experiment", "experiment_name", default=None, help="Experiment name (default: Default).")
21
+ @click.option("--json", "as_json", is_flag=True, help="Print JSON payload.")
22
+ def get_mlflow_info(run_id: str | None, latest: bool, experiment_name: str | None, as_json: bool) -> None:
23
+ """Print params, metrics, tags, artifacts and UTC times for one MLflow run."""
24
+ from mllog.mlflow_io import RunNotFoundError, get_mlflow_info
25
+
26
+ if not run_id and not latest:
27
+ raise click.UsageError("missing parameters: provide --run-id or --latest")
28
+ try:
29
+ info = get_mlflow_info(run_id=run_id, latest=latest, experiment_name=experiment_name)
30
+ except RunNotFoundError as exc:
31
+ click.echo(f"error: {exc}", err=True)
32
+ sys.exit(1)
33
+
34
+ click.echo(
35
+ f"selected run: {info['run_id']} (experiment: {info['experiment_name']})",
36
+ err=True,
37
+ )
38
+ click.echo(json.dumps(info, indent=2))
39
+
40
+
41
+ @main.command("get-git-info")
42
+ @click.option("--base", default="HEAD", help="Diff base ref (default: HEAD).")
43
+ @click.option("--json", "as_json", is_flag=True, help="Print JSON payload.")
44
+ def get_git_info(base: str, as_json: bool) -> None:
45
+ """Print working-tree diff, recent commits (UTC timestamps), and HEAD sha."""
46
+ from mllog.git_context import GitContextError, get_git_context
47
+
48
+ try:
49
+ ctx = get_git_context(base=base)
50
+ except GitContextError as exc:
51
+ click.echo(f"error: {exc}", err=True)
52
+ sys.exit(1)
53
+ click.echo(json.dumps(ctx, indent=2))
54
+
55
+
56
+ @main.command("create-logbook")
57
+ @click.argument("logbook_path", type=click.Path(exists=True, dir_okay=False, path_type=Path))
58
+ @click.option("--run-id", "run_id", default=None, help="Specific MLflow run id.")
59
+ @click.option("--latest", is_flag=True, help="Most recently started run in the experiment.")
60
+ @click.option("--experiment", "experiment_name", default=None, help="Experiment name (default: Default).")
61
+ def verify_log(logbook_path: Path, run_id: str | None, latest: bool, experiment_name: str | None) -> None:
62
+ """Validate logbook JSON, render markdown, attach both to the MLflow run."""
63
+ from mllog.mlflow_io import RunNotFoundError, attach_to_mlflow
64
+ from mllog.schema import LogbookEntry
65
+
66
+ if not run_id and not latest:
67
+ raise click.UsageError("specify --run-id ID or --latest")
68
+
69
+ # Load logbook JSON
70
+ try:
71
+ data = json.loads(logbook_path.read_text(encoding="utf-8"))
72
+ except json.JSONDecodeError as exc:
73
+ click.echo(f"error: {logbook_path} is not valid JSON: {exc}", err=True)
74
+ sys.exit(1)
75
+
76
+ # Validate logbook schema
77
+ try:
78
+ entry = LogbookEntry.model_validate(data)
79
+ except ValidationError as exc:
80
+ click.echo(f"error: logbook failed schema validation:\n{exc}", err=True)
81
+ sys.exit(1)
82
+
83
+ # Attach the logbook to the MLflow run
84
+ try:
85
+ result = attach_to_mlflow(
86
+ entry, run_id=run_id, latest=latest, experiment_name=experiment_name
87
+ )
88
+ except RunNotFoundError as exc:
89
+ click.echo(f"error: {exc}", err=True)
90
+ sys.exit(1)
91
+
92
+ click.echo(f"logbook ingested into run: {result['run_id']}")
93
+ click.echo(f"artifacts: {', '.join(result['artifacts'])}")
94
+
95
+
96
+ if __name__ == "__main__":
97
+ main()
@@ -0,0 +1,70 @@
1
+ """Git collector: diff + recent commits as a timestamped, typed payload.
2
+
3
+ Collector symmetry with mlflow_io: explicit inputs, typed payload with UTC
4
+ ISO 8601 timestamps, fails loudly (raises GitContextError) when git/repo is absent.
5
+ """
6
+
7
+ import subprocess
8
+ from datetime import datetime, timezone
9
+
10
+
11
+ class GitContextError(Exception):
12
+ """Raised when git context cannot be collected."""
13
+
14
+
15
+ def _git(*args: str, cwd: str | None = None) -> str:
16
+ try:
17
+ proc = subprocess.run(
18
+ ["git", *args],
19
+ cwd=cwd,
20
+ capture_output=True,
21
+ text=True,
22
+ encoding="utf-8",
23
+ errors="replace",
24
+ )
25
+ except FileNotFoundError as exc:
26
+ raise GitContextError("git executable not found") from exc
27
+ if proc.returncode != 0:
28
+ raise GitContextError(
29
+ f"git {' '.join(args)} failed: {proc.stderr.strip() or proc.stdout.strip()}"
30
+ )
31
+ return proc.stdout
32
+
33
+
34
+ def get_git_context(base: str = "HEAD", cwd: str | None = None, max_commits: int = 10) -> dict:
35
+ """Return {"collected_at", "diff", "recent_commits", "head"}."""
36
+
37
+ # Get the current HEAD commit
38
+ head = _git("rev-parse", "HEAD", cwd=cwd).strip()
39
+
40
+ # Get the diff
41
+ diff = _git("diff", base, cwd=cwd)
42
+
43
+ # Get recent commits
44
+ log_raw = _git(
45
+ "log",
46
+ f"-{max_commits}",
47
+ "--pretty=format:%H%x1f%cI%x1f%s", # %cI = committer date, strict ISO 8601
48
+ cwd=cwd,
49
+ )
50
+ commits = []
51
+ for line in log_raw.splitlines():
52
+ if not line.strip():
53
+ continue
54
+ sha, raw_ts, subject = line.split("\x1f", 2)
55
+ utc_ts = (
56
+ datetime.fromisoformat(raw_ts)
57
+ .astimezone(timezone.utc)
58
+ .isoformat(timespec="seconds")
59
+ .replace("+00:00", "Z")
60
+ )
61
+ commits.append({"sha": sha, "timestamp": utc_ts, "subject": subject})
62
+
63
+ return {
64
+ "collected_at": datetime.now(timezone.utc)
65
+ .isoformat(timespec="seconds")
66
+ .replace("+00:00", "Z"),
67
+ "diff": diff,
68
+ "recent_commits": commits,
69
+ "head": head,
70
+ }
@@ -0,0 +1,126 @@
1
+ """MLflow collector (read run -> typed payload) and writer (artifacts + tags).
2
+
3
+ Collector symmetry: explicit inputs, timestamped typed payload, fails loudly
4
+ (raises RunNotFoundError) when the source is absent.
5
+ """
6
+
7
+ import json
8
+ import os
9
+ import tempfile
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+
13
+ # v0.1 default: local serverless SQLite backend (MLflow's recommended local store).
14
+ os.environ.setdefault("MLFLOW_TRACKING_URI", "sqlite:///mlflow.db")
15
+
16
+ import mlflow
17
+ from mlflow.tracking import MlflowClient
18
+
19
+ from mllog.render import render_markdown
20
+ from mllog.schema import LogbookEntry
21
+
22
+
23
+ class RunNotFoundError(Exception):
24
+ """Raised when no MLflow run matches the request."""
25
+
26
+
27
+ def _ms_to_utc_iso(ms: int | None) -> str | None:
28
+ if ms is None:
29
+ return None
30
+ return (
31
+ datetime.fromtimestamp(ms / 1000, tz=timezone.utc)
32
+ .isoformat(timespec="seconds")
33
+ .replace("+00:00", "Z")
34
+ )
35
+
36
+
37
+ def _resolve_experiment(client: MlflowClient, experiment_name: str | None):
38
+ if experiment_name is None:
39
+ experiment_name = "Default"
40
+ experiment = client.get_experiment_by_name(experiment_name)
41
+ if experiment is None:
42
+ raise RunNotFoundError(f"MLflow experiment not found: {experiment_name!r}")
43
+ return experiment
44
+
45
+
46
+ def get_mlflow_info(
47
+ run_id: str | None = None,
48
+ latest: bool = False,
49
+ experiment_name: str | None = None,
50
+ ) -> dict:
51
+ """Return a typed, timestamped payload describing one MLflow run."""
52
+ client = MlflowClient()
53
+
54
+ # Find the run
55
+ if run_id is not None:
56
+ try:
57
+ run = client.get_run(run_id)
58
+ except Exception as exc:
59
+ raise RunNotFoundError(f"MLflow run not found: {run_id!r} ({exc})") from exc
60
+ elif latest:
61
+ experiment = _resolve_experiment(client, experiment_name)
62
+ runs = client.search_runs(
63
+ [experiment.experiment_id],
64
+ order_by=["attributes.start_time DESC"],
65
+ max_results=1,
66
+ )
67
+ if not runs:
68
+ raise RunNotFoundError(f"no runs found in experiment {experiment.name!r}")
69
+ run = runs[0]
70
+ else:
71
+ raise RunNotFoundError("specify --run-id or --latest")
72
+
73
+ # Get run info (metrics+params)
74
+ experiment = client.get_experiment(run.info.experiment_id)
75
+ artifacts = [f.path for f in client.list_artifacts(run.info.run_id)]
76
+ return {
77
+ "collected_at": datetime.now(timezone.utc)
78
+ .isoformat(timespec="seconds")
79
+ .replace("+00:00", "Z"),
80
+ "run_id": run.info.run_id,
81
+ "experiment_id": run.info.experiment_id,
82
+ "experiment_name": experiment.name,
83
+ "status": run.info.status,
84
+ "start_time": _ms_to_utc_iso(run.info.start_time),
85
+ "end_time": _ms_to_utc_iso(run.info.end_time),
86
+ "params": dict(run.data.params),
87
+ "metrics": dict(run.data.metrics),
88
+ "tags": dict(run.data.tags),
89
+ "artifacts": artifacts,
90
+ }
91
+
92
+
93
+ def attach_to_mlflow(
94
+ entry: LogbookEntry,
95
+ run_id: str | None = None,
96
+ latest: bool = False,
97
+ experiment_name: str | None = None,
98
+ ) -> dict:
99
+ """Render markdown, log logbook.json + logbook.md as artifacts, set tags.
100
+
101
+ Returns {"run_id": ..., "artifacts": [...]}.
102
+ """
103
+ info = get_mlflow_info(run_id=run_id, latest=latest, experiment_name=experiment_name)
104
+ target_run_id = info["run_id"]
105
+ client = MlflowClient()
106
+
107
+ # Create logbook artifacts
108
+ with tempfile.TemporaryDirectory() as tmp:
109
+ json_path = Path(tmp) / "logbook.json"
110
+ md_path = Path(tmp) / "logbook.md"
111
+ json_path.write_text(
112
+ json.dumps(entry.model_dump(), indent=2, ensure_ascii=False),
113
+ encoding="utf-8",
114
+ )
115
+ md_path.write_text(render_markdown(entry), encoding="utf-8")
116
+ # Attach to the MLflow run
117
+ client.log_artifact(target_run_id, str(json_path))
118
+ client.log_artifact(target_run_id, str(md_path))
119
+
120
+ # Set MLflow tags
121
+ client.set_tag(target_run_id, "logbook.generated", "true")
122
+ client.set_tag(target_run_id, "logbook.task", entry.task)
123
+ client.set_tag(target_run_id, "logbook.result", entry.result)
124
+ client.set_tag(target_run_id, "logbook.schema_version", entry.schema_version)
125
+
126
+ return {"run_id": target_run_id, "artifacts": ["logbook.json", "logbook.md"]}
@@ -0,0 +1,78 @@
1
+ """Deterministic LogbookEntry -> markdown projection.
2
+
3
+ logbook.md is never edited or parsed; to change it, amend the JSON and re-render.
4
+ """
5
+
6
+ from mllog.schema import LogbookEntry
7
+
8
+
9
+ def render_markdown(entry: LogbookEntry) -> str:
10
+ lines: list[str] = []
11
+ lines.append(f"# Logbook: {entry.task}")
12
+ lines.append("")
13
+ lines.append(f"- **Date:** {entry.date}")
14
+ lines.append(f"- **Experiment:** {entry.experiment_id}")
15
+ lines.append(f"- **MLflow run:** `{entry.mlflow_run_id}`")
16
+ if entry.related_run_ids:
17
+ lines.append(
18
+ "- **Related runs:** " + ", ".join(f"`{r}`" for r in entry.related_run_ids)
19
+ )
20
+ if entry.git_before or entry.git_after:
21
+ lines.append(
22
+ f"- **Git:** `{entry.git_before or '?'}` -> `{entry.git_after or '?'}`"
23
+ )
24
+ lines.append(f"- **Schema version:** {entry.schema_version}")
25
+ lines.append("")
26
+
27
+ lines.append("## Prompt")
28
+ lines.append("")
29
+ lines.append(entry.prompt)
30
+ lines.append("")
31
+
32
+ lines.append("## Hypothesis")
33
+ lines.append("")
34
+ lines.append(entry.hypothesis)
35
+ lines.append("")
36
+
37
+ lines.append("## Code changes")
38
+ lines.append("")
39
+ for change in entry.code_changes:
40
+ lines.append(f"- {change}")
41
+ lines.append("")
42
+
43
+ lines.append("## Metrics")
44
+ lines.append("")
45
+ lines.append("| Name | Value | Unit |")
46
+ lines.append("|------|-------|------|")
47
+ for m in entry.metrics:
48
+ lines.append(f"| {m.name} | {m.value} | {m.unit or ''} |")
49
+ lines.append("")
50
+
51
+ lines.append("## Result")
52
+ lines.append("")
53
+ lines.append(entry.result)
54
+ lines.append("")
55
+
56
+ lines.append("## Observations")
57
+ lines.append("")
58
+ for obs in entry.observations:
59
+ lines.append(f"- {obs}")
60
+ lines.append("")
61
+
62
+ lines.append("## Likely cause")
63
+ lines.append("")
64
+ lines.append(entry.likely_cause)
65
+ lines.append("")
66
+
67
+ lines.append("## Next experiment")
68
+ lines.append("")
69
+ lines.append(entry.next_experiment)
70
+ lines.append("")
71
+
72
+ lines.append("## Evidence")
73
+ lines.append("")
74
+ for claim in sorted(entry.evidence):
75
+ lines.append(f"- **{claim}** -> `{entry.evidence[claim]}`")
76
+ lines.append("")
77
+
78
+ return "\n".join(lines)
@@ -0,0 +1,69 @@
1
+ """Versioned logbook schema. This is the contract; validators enforce grounding."""
2
+
3
+ from datetime import datetime
4
+
5
+ from pydantic import BaseModel, Field, model_validator
6
+
7
+ GROUNDED_FIELDS = ("hypothesis", "likely_cause", "next_experiment")
8
+
9
+
10
+ class Metric(BaseModel):
11
+ name: str
12
+ value: float
13
+ unit: str | None = None
14
+
15
+
16
+ class LogbookEntry(BaseModel):
17
+ schema_version: str = "1.0"
18
+ experiment_id: str
19
+ date: str # full ISO 8601 UTC timestamp, e.g. "2026-06-09T18:45:01Z"
20
+ task: str
21
+ prompt: str
22
+ hypothesis: str
23
+ code_changes: list[str]
24
+ metrics: list[Metric]
25
+ result: str
26
+ observations: list[str]
27
+ likely_cause: str
28
+ next_experiment: str
29
+ evidence: dict[str, str] # claim -> typed reference (metric:, diff:, session:, ...)
30
+ mlflow_run_id: str
31
+ related_run_ids: list[str] = Field(default_factory=list) # RESERVED: multi-run sessions (M2+)
32
+ git_before: str | None = None
33
+ git_after: str | None = None
34
+ extra_inputs: dict = Field(default_factory=dict) # RESERVED for video/telemetry; {} in v0
35
+
36
+ @model_validator(mode="after")
37
+ def _validate_date_iso8601(self) -> "LogbookEntry":
38
+ try:
39
+ datetime.fromisoformat(self.date.replace("Z", "+00:00"))
40
+ except ValueError as exc:
41
+ raise ValueError(
42
+ f"date must be a full ISO 8601 timestamp, got {self.date!r}: {exc}"
43
+ ) from exc
44
+ return self
45
+
46
+ @model_validator(mode="after")
47
+ def _validate_grounding(self) -> "LogbookEntry":
48
+ missing = [f for f in GROUNDED_FIELDS if f not in self.evidence]
49
+ if missing:
50
+ raise ValueError(
51
+ "evidence must contain a key for each grounded claim; missing: "
52
+ + ", ".join(missing)
53
+ )
54
+ return self
55
+
56
+ @model_validator(mode="after")
57
+ def _validate_metric_references(self) -> "LogbookEntry":
58
+ metric_names = {m.name for m in self.metrics}
59
+ bad = [
60
+ ref
61
+ for ref in self.evidence.values()
62
+ if ref.startswith("metric:") and ref.removeprefix("metric:") not in metric_names
63
+ ]
64
+ if bad:
65
+ raise ValueError(
66
+ "evidence metric references must name a metric present in `metrics`; "
67
+ "unresolved: " + ", ".join(bad)
68
+ )
69
+ return self
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.4
2
+ Name: multimodal-mllog
3
+ Version: 0.1.0
4
+ Summary: Experiment logbook pipeline: MLflow runs + git context -> structured logbook entries
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: mlflow
7
+ Requires-Dist: pydantic>=2
8
+ Requires-Dist: click
9
+ Provides-Extra: synthesis
10
+ Requires-Dist: anthropic; extra == "synthesis"
@@ -0,0 +1,17 @@
1
+ README.md
2
+ pyproject.toml
3
+ mllog/__init__.py
4
+ mllog/cli.py
5
+ mllog/git_context.py
6
+ mllog/mlflow_io.py
7
+ mllog/render.py
8
+ mllog/schema.py
9
+ multimodal_mllog.egg-info/PKG-INFO
10
+ multimodal_mllog.egg-info/SOURCES.txt
11
+ multimodal_mllog.egg-info/dependency_links.txt
12
+ multimodal_mllog.egg-info/entry_points.txt
13
+ multimodal_mllog.egg-info/requires.txt
14
+ multimodal_mllog.egg-info/top_level.txt
15
+ tests/test_mlflow_io.py
16
+ tests/test_render.py
17
+ tests/test_schema.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ mllog = mllog.cli:main
@@ -0,0 +1,6 @@
1
+ mlflow
2
+ pydantic>=2
3
+ click
4
+
5
+ [synthesis]
6
+ anthropic
@@ -0,0 +1,23 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "multimodal-mllog"
7
+ version = "0.1.0"
8
+ description = "Experiment logbook pipeline: MLflow runs + git context -> structured logbook entries"
9
+ requires-python = ">=3.11"
10
+ dependencies = [
11
+ "mlflow",
12
+ "pydantic>=2",
13
+ "click",
14
+ ]
15
+
16
+ [project.optional-dependencies]
17
+ synthesis = ["anthropic"]
18
+
19
+ [project.scripts]
20
+ mllog = "mllog.cli:main"
21
+
22
+ [tool.setuptools.packages.find]
23
+ include = ["mllog*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,77 @@
1
+ import json
2
+ import re
3
+
4
+ import mlflow
5
+ import pytest
6
+
7
+ from mllog.mlflow_io import RunNotFoundError, attach_to_mlflow, get_mlflow_info
8
+
9
+ ISO_UTC = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$")
10
+
11
+
12
+ @pytest.fixture
13
+ def tracking(tmp_path, monkeypatch):
14
+ monkeypatch.chdir(tmp_path) # keep ./mlruns artifact root inside tmp
15
+ uri = f"sqlite:///{(tmp_path / 'mlflow.db').as_posix()}"
16
+ monkeypatch.setenv("MLFLOW_TRACKING_URI", uri)
17
+ mlflow.set_tracking_uri(uri)
18
+ yield uri
19
+
20
+
21
+ @pytest.fixture
22
+ def demo_run(tracking):
23
+ with mlflow.start_run(run_name="test-run") as run:
24
+ mlflow.log_params({"learning_rate": 3e-4, "epochs": 5})
25
+ mlflow.log_metric("train_loss", 0.271)
26
+ mlflow.log_metric("val_acc", 0.86)
27
+ return run.info.run_id
28
+
29
+
30
+ def test_run_info_latest(demo_run):
31
+ info = get_mlflow_info(latest=True)
32
+ assert info["run_id"] == demo_run
33
+ assert info["params"] == {"learning_rate": "0.0003", "epochs": "5"}
34
+ assert info["metrics"] == {"train_loss": 0.271, "val_acc": 0.86}
35
+ assert ISO_UTC.match(info["start_time"])
36
+ assert ISO_UTC.match(info["collected_at"])
37
+
38
+
39
+ def test_run_info_by_id(demo_run):
40
+ info = get_mlflow_info(run_id=demo_run)
41
+ assert info["run_id"] == demo_run
42
+ assert info["experiment_name"] == "Default"
43
+
44
+
45
+ def test_run_info_no_runs_fails_loudly(tracking):
46
+ with pytest.raises(RunNotFoundError):
47
+ get_mlflow_info(latest=True)
48
+
49
+
50
+ def test_run_info_bad_run_id_fails_loudly(tracking):
51
+ with pytest.raises(RunNotFoundError):
52
+ get_mlflow_info(run_id="nonexistent")
53
+
54
+
55
+ def test_ingest_attaches_artifacts_and_tags(demo_run, valid_entry):
56
+ entry = valid_entry.model_copy(update={"mlflow_run_id": demo_run})
57
+ result = attach_to_mlflow(entry, run_id=demo_run)
58
+ assert result["run_id"] == demo_run
59
+
60
+ client = mlflow.tracking.MlflowClient()
61
+ artifacts = {f.path for f in client.list_artifacts(demo_run)}
62
+ assert {"logbook.json", "logbook.md"} <= artifacts
63
+
64
+ run = client.get_run(demo_run)
65
+ assert run.data.tags["logbook.generated"] == "true"
66
+ assert run.data.tags["logbook.task"] == entry.task
67
+ assert run.data.tags["logbook.result"] == entry.result
68
+ assert run.data.tags["logbook.schema_version"] == "1.0"
69
+
70
+ json_path = client.download_artifacts(demo_run, "logbook.json")
71
+ stored = json.loads(open(json_path, encoding="utf-8").read())
72
+ assert stored["mlflow_run_id"] == demo_run
73
+
74
+
75
+ def test_ingest_missing_run_fails_loudly(tracking, valid_entry):
76
+ with pytest.raises(RunNotFoundError):
77
+ attach_to_mlflow(valid_entry, run_id="nonexistent")
@@ -0,0 +1,26 @@
1
+ from pathlib import Path
2
+
3
+ from mllog.render import render_markdown
4
+
5
+ GOLDEN = Path(__file__).parent / "golden" / "logbook.md"
6
+
7
+
8
+ def test_render_is_deterministic(valid_entry):
9
+ assert render_markdown(valid_entry) == render_markdown(valid_entry)
10
+
11
+
12
+ def test_render_contains_metrics_and_evidence(valid_entry):
13
+ md = render_markdown(valid_entry)
14
+ assert "| train_loss | 0.271 |" in md
15
+ assert "| val_acc | 0.86 |" in md
16
+ assert "`metric:train_loss`" in md
17
+ assert "abc123" in md
18
+
19
+
20
+ def test_golden_file(valid_entry):
21
+ """Locks the markdown projection of a fixed LogbookEntry.
22
+
23
+ If a render change is intentional, regenerate with:
24
+ python -c "from tests.conftest import *; ..." (see README) or update the file.
25
+ """
26
+ assert render_markdown(valid_entry) == GOLDEN.read_text(encoding="utf-8")
@@ -0,0 +1,47 @@
1
+ import pytest
2
+ from pydantic import ValidationError
3
+
4
+ from mllog.schema import LogbookEntry
5
+
6
+
7
+ def test_valid_entry_passes(valid_entry):
8
+ assert valid_entry.schema_version == "1.0"
9
+ assert valid_entry.related_run_ids == []
10
+ assert valid_entry.extra_inputs == {}
11
+
12
+
13
+ @pytest.mark.parametrize("missing", ["hypothesis", "likely_cause", "next_experiment"])
14
+ def test_grounding_required_for_each_claim(valid_entry, missing):
15
+ data = valid_entry.model_dump()
16
+ del data["evidence"][missing]
17
+ with pytest.raises(ValidationError, match=missing):
18
+ LogbookEntry.model_validate(data)
19
+
20
+
21
+ def test_metric_reference_must_resolve(valid_entry):
22
+ data = valid_entry.model_dump()
23
+ data["evidence"]["likely_cause"] = "metric:does_not_exist"
24
+ with pytest.raises(ValidationError, match="metric:does_not_exist"):
25
+ LogbookEntry.model_validate(data)
26
+
27
+
28
+ def test_non_metric_references_not_resolved_at_schema_level(valid_entry):
29
+ data = valid_entry.model_dump()
30
+ data["evidence"]["likely_cause"] = "diff:anything.py@@-1,1"
31
+ data["evidence"]["next_experiment"] = "session:we discussed it"
32
+ LogbookEntry.model_validate(data) # should not raise
33
+
34
+
35
+ @pytest.mark.parametrize("bad_date", ["2026-13-40", "yesterday", "", "18:45"])
36
+ def test_date_must_be_iso8601(valid_entry, bad_date):
37
+ data = valid_entry.model_dump()
38
+ data["date"] = bad_date
39
+ with pytest.raises(ValidationError):
40
+ LogbookEntry.model_validate(data)
41
+
42
+
43
+ def test_date_accepts_z_suffix_and_offset(valid_entry):
44
+ data = valid_entry.model_dump()
45
+ for ok in ("2026-06-09T18:45:01Z", "2026-06-09T18:45:01+00:00"):
46
+ data["date"] = ok
47
+ LogbookEntry.model_validate(data)