koobi 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- koobi/__init__.py +35 -0
- koobi/app.py +214 -0
- koobi/cards.py +106 -0
- koobi/charts.py +30 -0
- koobi/cli.py +174 -0
- koobi/config.py +86 -0
- koobi/index.py +135 -0
- koobi/schema.py +249 -0
- koobi-0.0.1.dist-info/METADATA +235 -0
- koobi-0.0.1.dist-info/RECORD +14 -0
- koobi-0.0.1.dist-info/WHEEL +5 -0
- koobi-0.0.1.dist-info/entry_points.txt +2 -0
- koobi-0.0.1.dist-info/licenses/LICENSE +21 -0
- koobi-0.0.1.dist-info/top_level.txt +1 -0
koobi/__init__.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Koobi: file-native model and experiment registry.
|
|
2
|
+
|
|
3
|
+
This module exposes the stable public API. Import the high-level helpers
|
|
4
|
+
directly from ``koobi`` rather than reaching into submodules::
|
|
5
|
+
|
|
6
|
+
from koobi import load_config, build, connect, query_cards
|
|
7
|
+
from koobi import Card, load_card, save_card
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from .cards import iter_cards, load_card, save_card
|
|
13
|
+
from .charts import measure_bar
|
|
14
|
+
from .config import Config, load_config
|
|
15
|
+
from .index import build, connect, query_cards
|
|
16
|
+
from .schema import Card, SeriesRef, dump_card, parse_card
|
|
17
|
+
|
|
18
|
+
__version__ = "0.0.1"
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"__version__",
|
|
22
|
+
"Card",
|
|
23
|
+
"Config",
|
|
24
|
+
"SeriesRef",
|
|
25
|
+
"build",
|
|
26
|
+
"connect",
|
|
27
|
+
"dump_card",
|
|
28
|
+
"iter_cards",
|
|
29
|
+
"load_card",
|
|
30
|
+
"load_config",
|
|
31
|
+
"measure_bar",
|
|
32
|
+
"parse_card",
|
|
33
|
+
"query_cards",
|
|
34
|
+
"save_card",
|
|
35
|
+
]
|
koobi/app.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""Marimo app entrypoint for Koobi."""
|
|
2
|
+
|
|
3
|
+
import marimo
|
|
4
|
+
|
|
5
|
+
__generated_with = "0.23.10"
|
|
6
|
+
app = marimo.App(width="full")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@app.cell
|
|
10
|
+
def _():
|
|
11
|
+
import os
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
import marimo as mo
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
from koobi.cards import load_card, save_card
|
|
18
|
+
from koobi.charts import measure_bar
|
|
19
|
+
from koobi.config import load_config
|
|
20
|
+
from koobi.index import build, connect, query_cards
|
|
21
|
+
|
|
22
|
+
return (
|
|
23
|
+
Path,
|
|
24
|
+
build,
|
|
25
|
+
connect,
|
|
26
|
+
load_card,
|
|
27
|
+
load_config,
|
|
28
|
+
measure_bar,
|
|
29
|
+
mo,
|
|
30
|
+
os,
|
|
31
|
+
pd,
|
|
32
|
+
query_cards,
|
|
33
|
+
save_card,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@app.cell
|
|
38
|
+
def _(Path, load_config, os):
|
|
39
|
+
config_hint = os.environ.get("KOOBI_CONFIG")
|
|
40
|
+
start_path = Path(config_hint).resolve() if config_hint else Path.cwd()
|
|
41
|
+
cfg = load_config(start_path)
|
|
42
|
+
return (cfg,)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@app.cell
|
|
46
|
+
def _(mo):
|
|
47
|
+
reindex_button = mo.ui.run_button(label="Reindex")
|
|
48
|
+
reindex_button
|
|
49
|
+
return (reindex_button,)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@app.cell
|
|
53
|
+
def _(build, cfg, connect, reindex_button):
|
|
54
|
+
# Referencing reindex_button.value makes this cell re-run when it is clicked.
|
|
55
|
+
# The index is rebuilt here (and on first load) so it never goes stale.
|
|
56
|
+
_reindex_tick = reindex_button.value
|
|
57
|
+
build(cfg)
|
|
58
|
+
con = connect(cfg)
|
|
59
|
+
return (con,)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@app.cell
|
|
63
|
+
def _(con):
|
|
64
|
+
projects = [
|
|
65
|
+
row[0]
|
|
66
|
+
for row in con.execute(
|
|
67
|
+
"SELECT DISTINCT project FROM cards ORDER BY project"
|
|
68
|
+
).fetchall()
|
|
69
|
+
]
|
|
70
|
+
statuses = [
|
|
71
|
+
row[0]
|
|
72
|
+
for row in con.execute(
|
|
73
|
+
"SELECT DISTINCT status FROM cards ORDER BY status"
|
|
74
|
+
).fetchall()
|
|
75
|
+
]
|
|
76
|
+
measures = [
|
|
77
|
+
row[0]
|
|
78
|
+
for row in con.execute(
|
|
79
|
+
"SELECT DISTINCT key FROM measures ORDER BY key"
|
|
80
|
+
).fetchall()
|
|
81
|
+
]
|
|
82
|
+
return measures, projects, statuses
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@app.cell
|
|
86
|
+
def _(measures, mo, projects, statuses):
|
|
87
|
+
project_options = {"All projects": None}
|
|
88
|
+
project_options.update({name: name for name in projects})
|
|
89
|
+
status_options = {"All statuses": None}
|
|
90
|
+
status_options.update({name: name for name in statuses})
|
|
91
|
+
|
|
92
|
+
project_select = mo.ui.dropdown(
|
|
93
|
+
project_options, value="All projects", label="Project"
|
|
94
|
+
)
|
|
95
|
+
status_select = mo.ui.dropdown(
|
|
96
|
+
status_options, value="All statuses", label="Status"
|
|
97
|
+
)
|
|
98
|
+
search_text = mo.ui.text(value="", placeholder="search name or tags", label="Search")
|
|
99
|
+
measure_select = (
|
|
100
|
+
mo.ui.dropdown(measures, value=measures[0], label="Measure chart")
|
|
101
|
+
if measures
|
|
102
|
+
else None
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
_filters = [project_select, status_select, search_text]
|
|
106
|
+
if measure_select is not None:
|
|
107
|
+
_filters.append(measure_select)
|
|
108
|
+
mo.hstack(_filters, justify="start")
|
|
109
|
+
return measure_select, project_select, search_text, status_select
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@app.cell
|
|
113
|
+
def _(con, project_select, query_cards, search_text, status_select):
|
|
114
|
+
cards_df = query_cards(
|
|
115
|
+
con,
|
|
116
|
+
project=project_select.value,
|
|
117
|
+
status=status_select.value,
|
|
118
|
+
search=search_text.value or None,
|
|
119
|
+
)
|
|
120
|
+
return (cards_df,)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@app.cell
|
|
124
|
+
def _(cards_df, measure_bar, measure_select, mo):
|
|
125
|
+
if measure_select is None or measure_select.value is None:
|
|
126
|
+
chart_view = mo.md("_No measures available to chart._")
|
|
127
|
+
else:
|
|
128
|
+
chart_view = mo.ui.altair_chart(measure_bar(cards_df, measure_select.value))
|
|
129
|
+
chart_view
|
|
130
|
+
return
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@app.cell
|
|
134
|
+
def _(cards_df, mo):
|
|
135
|
+
table = mo.ui.table(cards_df, selection="multi", label="Models")
|
|
136
|
+
table
|
|
137
|
+
return (table,)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@app.cell
|
|
141
|
+
def _(Path, cards_df, cfg, load_card, mo, pd, table):
|
|
142
|
+
mo.stop(cards_df.empty, mo.md("No cards match the current filters."))
|
|
143
|
+
|
|
144
|
+
_selection = table.value if isinstance(table.value, pd.DataFrame) else pd.DataFrame()
|
|
145
|
+
if _selection.empty:
|
|
146
|
+
_selection = cards_df.head(1)
|
|
147
|
+
|
|
148
|
+
selected_id = str(_selection.iloc[0]["id"])
|
|
149
|
+
selected_row = cards_df[cards_df["id"] == selected_id].iloc[0]
|
|
150
|
+
card = load_card(Path(selected_row["path"]), root=cfg.root)
|
|
151
|
+
|
|
152
|
+
metadata_rows = (
|
|
153
|
+
[{"section": "metric", "key": k, "value": v} for k, v in card.metrics.items()]
|
|
154
|
+
+ [{"section": "param", "key": k, "value": v} for k, v in card.params.items()]
|
|
155
|
+
+ [{"section": "meta", "key": "tags", "value": ", ".join(card.tags)}]
|
|
156
|
+
)
|
|
157
|
+
metadata_df = pd.DataFrame(metadata_rows, columns=["section", "key", "value"])
|
|
158
|
+
|
|
159
|
+
_allowed_status = ["idea", "running", "candidate", "promoted", "archived"]
|
|
160
|
+
edit_form = mo.ui.form(
|
|
161
|
+
mo.ui.dictionary(
|
|
162
|
+
{
|
|
163
|
+
"name": mo.ui.text(value=card.name, label="Name"),
|
|
164
|
+
"status": mo.ui.dropdown(
|
|
165
|
+
_allowed_status,
|
|
166
|
+
value=card.status if card.status in _allowed_status else "candidate",
|
|
167
|
+
label="Status",
|
|
168
|
+
),
|
|
169
|
+
"tags": mo.ui.text(
|
|
170
|
+
value=", ".join(card.tags), label="Tags (comma separated)"
|
|
171
|
+
),
|
|
172
|
+
"body": mo.ui.text_area(value=card.body, label="Description", rows=10),
|
|
173
|
+
}
|
|
174
|
+
),
|
|
175
|
+
submit_button_label="Save card",
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
mo.vstack(
|
|
179
|
+
[
|
|
180
|
+
mo.md(f"## {card.name}"),
|
|
181
|
+
mo.md(
|
|
182
|
+
f"**ID:** `{card.id}` \n"
|
|
183
|
+
f"**Type:** `{card.type}` \n"
|
|
184
|
+
f"**Project:** `{card.project}` \n"
|
|
185
|
+
f"**Status:** `{card.status}`"
|
|
186
|
+
),
|
|
187
|
+
mo.ui.table(metadata_df, selection=None, label="Measures / params"),
|
|
188
|
+
mo.md("### Description"),
|
|
189
|
+
mo.md(card.body or "_No description._"),
|
|
190
|
+
mo.md("### Edit card"),
|
|
191
|
+
edit_form,
|
|
192
|
+
]
|
|
193
|
+
)
|
|
194
|
+
return card, edit_form
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@app.cell
|
|
198
|
+
def _(build, card, cfg, edit_form, mo, save_card):
|
|
199
|
+
save_message = ""
|
|
200
|
+
if edit_form.value:
|
|
201
|
+
submitted = edit_form.value
|
|
202
|
+
card.name = str(submitted["name"])
|
|
203
|
+
card.status = str(submitted["status"])
|
|
204
|
+
card.tags = [tag.strip() for tag in str(submitted["tags"]).split(",") if tag.strip()]
|
|
205
|
+
card.body = str(submitted["body"])
|
|
206
|
+
save_card(card, fields=["name", "status", "tags", "body"])
|
|
207
|
+
build(cfg)
|
|
208
|
+
save_message = f"Saved `{card.id}`. Click **Reindex** to refresh the table."
|
|
209
|
+
mo.md(save_message)
|
|
210
|
+
return
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
if __name__ == "__main__":
|
|
214
|
+
app.run()
|
koobi/cards.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Card filesystem operations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from copy import deepcopy
|
|
6
|
+
from datetime import date
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Iterator
|
|
9
|
+
|
|
10
|
+
from .schema import Card, SeriesRef, dump_card, parse_card
|
|
11
|
+
|
|
12
|
+
HEADER_FIELDS = {
|
|
13
|
+
"id",
|
|
14
|
+
"name",
|
|
15
|
+
"type",
|
|
16
|
+
"project",
|
|
17
|
+
"status",
|
|
18
|
+
"created",
|
|
19
|
+
"updated",
|
|
20
|
+
"tags",
|
|
21
|
+
"metrics",
|
|
22
|
+
"params",
|
|
23
|
+
"artifacts",
|
|
24
|
+
"series",
|
|
25
|
+
"links",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def load_card(path: Path | str, *, root: Path | str | None = None) -> Card:
|
|
30
|
+
"""Load a card from disk and resolve data paths against root."""
|
|
31
|
+
card_path = Path(path).resolve()
|
|
32
|
+
text = card_path.read_text(encoding="utf-8")
|
|
33
|
+
card = parse_card(text)
|
|
34
|
+
card.path = card_path
|
|
35
|
+
_resolve_paths(card, Path(root).resolve() if root is not None else card_path.parent)
|
|
36
|
+
return card
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def save_card(card: Card, *, fields: list[str] | None = None) -> None:
|
|
40
|
+
"""Save a card while preserving untouched fields and body."""
|
|
41
|
+
if card.path is None:
|
|
42
|
+
raise ValueError("Card.path is required to save a card.")
|
|
43
|
+
|
|
44
|
+
target_path = card.path
|
|
45
|
+
original = parse_card(target_path.read_text(encoding="utf-8"))
|
|
46
|
+
original.path = target_path
|
|
47
|
+
to_write = deepcopy(original)
|
|
48
|
+
|
|
49
|
+
if fields is None:
|
|
50
|
+
to_write = deepcopy(card)
|
|
51
|
+
else:
|
|
52
|
+
for field_name in fields:
|
|
53
|
+
_copy_field(to_write, card, field_name)
|
|
54
|
+
|
|
55
|
+
to_write.updated = date.today()
|
|
56
|
+
if "updated" not in to_write.key_order:
|
|
57
|
+
to_write.key_order.append("updated")
|
|
58
|
+
|
|
59
|
+
rendered = dump_card(to_write)
|
|
60
|
+
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
61
|
+
target_path.write_text(rendered, encoding="utf-8")
|
|
62
|
+
card.updated = to_write.updated
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def iter_cards(root: Path | str, *, loader_root: Path | str | None = None) -> Iterator[Card]:
|
|
66
|
+
"""Recursively iterate all valid cards under root."""
|
|
67
|
+
cards_root = Path(root).resolve()
|
|
68
|
+
effective_root = Path(loader_root).resolve() if loader_root is not None else cards_root
|
|
69
|
+
if not cards_root.exists():
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
for path in sorted(cards_root.rglob("*.md")):
|
|
73
|
+
if path.name.startswith("_"):
|
|
74
|
+
continue
|
|
75
|
+
card = load_card(path, root=effective_root)
|
|
76
|
+
if not card.id:
|
|
77
|
+
continue
|
|
78
|
+
yield card
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _resolve_paths(card: Card, root: Path) -> None:
|
|
82
|
+
card.resolved_artifacts = {}
|
|
83
|
+
for artifact_name, artifact_path in card.artifacts.items():
|
|
84
|
+
card.resolved_artifacts[artifact_name] = _resolve_path(root, artifact_path)
|
|
85
|
+
|
|
86
|
+
for ref in card.series.values():
|
|
87
|
+
if isinstance(ref, SeriesRef):
|
|
88
|
+
ref.abs_path = _resolve_path(root, ref.path)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _resolve_path(root: Path, raw_path: str) -> Path:
|
|
92
|
+
path = Path(raw_path)
|
|
93
|
+
if path.is_absolute():
|
|
94
|
+
return path
|
|
95
|
+
return (root / path).resolve()
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _copy_field(target: Card, source: Card, field_name: str) -> None:
|
|
99
|
+
if field_name == "body":
|
|
100
|
+
target.body = source.body
|
|
101
|
+
return
|
|
102
|
+
if not hasattr(target, field_name):
|
|
103
|
+
raise ValueError(f"Unknown card field '{field_name}'.")
|
|
104
|
+
setattr(target, field_name, deepcopy(getattr(source, field_name)))
|
|
105
|
+
if field_name in HEADER_FIELDS and field_name not in target.key_order:
|
|
106
|
+
target.key_order.append(field_name)
|
koobi/charts.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Chart helpers for Koobi."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import altair as alt
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def measure_bar(cards_df: pd.DataFrame, measure: str) -> alt.Chart:
|
|
10
|
+
"""Bar chart of one measure across card ids."""
|
|
11
|
+
if cards_df.empty or measure not in cards_df.columns:
|
|
12
|
+
empty = pd.DataFrame({"id": [], "value": []})
|
|
13
|
+
return (
|
|
14
|
+
alt.Chart(empty)
|
|
15
|
+
.mark_bar()
|
|
16
|
+
.encode(x=alt.X("id:N", title="Card"), y=alt.Y("value:Q", title=measure))
|
|
17
|
+
.properties(title=f"{measure} by card")
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
plot_df = cards_df[["id", measure]].copy().rename(columns={measure: "value"})
|
|
21
|
+
return (
|
|
22
|
+
alt.Chart(plot_df)
|
|
23
|
+
.mark_bar()
|
|
24
|
+
.encode(
|
|
25
|
+
x=alt.X("id:N", sort="-y", title="Card"),
|
|
26
|
+
y=alt.Y("value:Q", title=measure),
|
|
27
|
+
tooltip=["id:N", "value:Q"],
|
|
28
|
+
)
|
|
29
|
+
.properties(title=f"{measure} by card")
|
|
30
|
+
)
|
koobi/cli.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""CLI entrypoint for Koobi."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
from datetime import date
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import subprocess
|
|
10
|
+
import sys
|
|
11
|
+
|
|
12
|
+
from .config import load_config
|
|
13
|
+
from .index import build
|
|
14
|
+
from .schema import Card, dump_card
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def main() -> int:
|
|
18
|
+
"""Run the Koobi command-line interface."""
|
|
19
|
+
parser = _build_parser()
|
|
20
|
+
args = parser.parse_args()
|
|
21
|
+
|
|
22
|
+
command = getattr(args, "command", None)
|
|
23
|
+
if command == "init":
|
|
24
|
+
return _cmd_init(args)
|
|
25
|
+
if command == "new":
|
|
26
|
+
return _cmd_new(args)
|
|
27
|
+
if command == "index":
|
|
28
|
+
return _cmd_index(args)
|
|
29
|
+
if command == "ui":
|
|
30
|
+
return _cmd_ui(args)
|
|
31
|
+
|
|
32
|
+
parser.print_help()
|
|
33
|
+
return 1
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _resolve_config_start(args: argparse.Namespace) -> Path:
|
|
37
|
+
"""Resolve where to start config discovery.
|
|
38
|
+
|
|
39
|
+
Precedence: explicit ``--config`` > ``KOOBI_CONFIG`` env var > current
|
|
40
|
+
working directory. The returned path is handed to ``load_config``, which
|
|
41
|
+
accepts either a ``koobi.toml`` file or a directory to search upward from.
|
|
42
|
+
"""
|
|
43
|
+
config_arg = getattr(args, "config", None)
|
|
44
|
+
if config_arg is not None:
|
|
45
|
+
return config_arg.resolve()
|
|
46
|
+
env_config = os.environ.get("KOOBI_CONFIG")
|
|
47
|
+
if env_config:
|
|
48
|
+
return Path(env_config).resolve()
|
|
49
|
+
return Path.cwd()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
53
|
+
parser = argparse.ArgumentParser(prog="koobi", description="Koobi Phase 0 CLI")
|
|
54
|
+
subparsers = parser.add_subparsers(dest="command")
|
|
55
|
+
|
|
56
|
+
init_parser = subparsers.add_parser("init", help="Create koobi.toml and cards directory")
|
|
57
|
+
init_parser.add_argument("--path", type=Path, default=Path.cwd(), help="Project root path.")
|
|
58
|
+
|
|
59
|
+
new_parser = subparsers.add_parser("new", help="Create a new card file")
|
|
60
|
+
new_parser.add_argument("card_id", help="Card id, for example demo_project.my_model")
|
|
61
|
+
new_parser.add_argument("--name", default=None, help="Card display name")
|
|
62
|
+
new_parser.add_argument("--type", default="model", help="Card type")
|
|
63
|
+
new_parser.add_argument("--project", default="default", help="Card project")
|
|
64
|
+
new_parser.add_argument("--status", default="idea", help="Card status")
|
|
65
|
+
new_parser.add_argument("--config", type=Path, default=None, help="Path to koobi.toml or project directory")
|
|
66
|
+
|
|
67
|
+
index_parser = subparsers.add_parser("index", help="Build the DuckDB index")
|
|
68
|
+
index_parser.add_argument("--config", type=Path, default=None, help="Path to koobi.toml or project directory")
|
|
69
|
+
|
|
70
|
+
ui_parser = subparsers.add_parser("ui", help="Launch the Marimo app")
|
|
71
|
+
ui_parser.add_argument("--config", type=Path, default=None, help="Path to koobi.toml or project directory")
|
|
72
|
+
|
|
73
|
+
return parser
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _cmd_init(args: argparse.Namespace) -> int:
|
|
77
|
+
root = args.path.resolve()
|
|
78
|
+
cards_dir = root / "docs" / "cards"
|
|
79
|
+
index_path = ".koobi/index.duckdb"
|
|
80
|
+
root.mkdir(parents=True, exist_ok=True)
|
|
81
|
+
cards_dir.mkdir(parents=True, exist_ok=True)
|
|
82
|
+
|
|
83
|
+
config_path = root / "koobi.toml"
|
|
84
|
+
if not config_path.exists():
|
|
85
|
+
config_path.write_text(
|
|
86
|
+
"\n".join(
|
|
87
|
+
[
|
|
88
|
+
"[koobi]",
|
|
89
|
+
'cards = "docs/cards"',
|
|
90
|
+
'root = "."',
|
|
91
|
+
f'index = "{index_path}"',
|
|
92
|
+
"",
|
|
93
|
+
"[display]",
|
|
94
|
+
'primary_measures = ["sharpe", "calmar", "max_drawdown"]',
|
|
95
|
+
'default_sort = "sharpe"',
|
|
96
|
+
"packs = []",
|
|
97
|
+
"",
|
|
98
|
+
]
|
|
99
|
+
),
|
|
100
|
+
encoding="utf-8",
|
|
101
|
+
)
|
|
102
|
+
print(f"Created {config_path}")
|
|
103
|
+
else:
|
|
104
|
+
print(f"Using existing {config_path}")
|
|
105
|
+
|
|
106
|
+
print(f"Cards directory: {cards_dir}")
|
|
107
|
+
return 0
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _cmd_new(args: argparse.Namespace) -> int:
|
|
111
|
+
cfg = load_config(_resolve_config_start(args))
|
|
112
|
+
card = Card(
|
|
113
|
+
id=args.card_id,
|
|
114
|
+
name=args.name or args.card_id,
|
|
115
|
+
type=args.type,
|
|
116
|
+
project=args.project,
|
|
117
|
+
status=args.status,
|
|
118
|
+
created=date.today(),
|
|
119
|
+
updated=date.today(),
|
|
120
|
+
tags=[],
|
|
121
|
+
metrics={},
|
|
122
|
+
params={},
|
|
123
|
+
artifacts={},
|
|
124
|
+
series={},
|
|
125
|
+
links=[],
|
|
126
|
+
body="",
|
|
127
|
+
key_order=[
|
|
128
|
+
"id",
|
|
129
|
+
"name",
|
|
130
|
+
"type",
|
|
131
|
+
"project",
|
|
132
|
+
"status",
|
|
133
|
+
"created",
|
|
134
|
+
"updated",
|
|
135
|
+
"tags",
|
|
136
|
+
"metrics",
|
|
137
|
+
"params",
|
|
138
|
+
"artifacts",
|
|
139
|
+
"series",
|
|
140
|
+
"links",
|
|
141
|
+
],
|
|
142
|
+
)
|
|
143
|
+
output_path = cfg.cards_root / f"{card.id}.md"
|
|
144
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
145
|
+
output_path.write_text(dump_card(card), encoding="utf-8")
|
|
146
|
+
print(f"Created {output_path}")
|
|
147
|
+
return 0
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _cmd_index(args: argparse.Namespace) -> int:
|
|
151
|
+
cfg = load_config(_resolve_config_start(args))
|
|
152
|
+
path = build(cfg)
|
|
153
|
+
print(f"Built index at {path}")
|
|
154
|
+
return 0
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _cmd_ui(args: argparse.Namespace) -> int:
|
|
158
|
+
config_target = _resolve_config_start(args)
|
|
159
|
+
cfg = load_config(config_target)
|
|
160
|
+
build(cfg)
|
|
161
|
+
|
|
162
|
+
app_path = Path(__file__).with_name("app.py")
|
|
163
|
+
env = dict(os.environ)
|
|
164
|
+
if cfg.config_path is not None:
|
|
165
|
+
env["KOOBI_CONFIG"] = str(cfg.config_path)
|
|
166
|
+
else:
|
|
167
|
+
env["KOOBI_CONFIG"] = str(config_target)
|
|
168
|
+
|
|
169
|
+
cmd = [sys.executable, "-m", "marimo", "run", str(app_path)]
|
|
170
|
+
return subprocess.run(cmd, env=env, check=False).returncode
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
if __name__ == "__main__":
|
|
174
|
+
raise SystemExit(main())
|
koobi/config.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Configuration loading for Koobi."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
try: # Python 3.11+
|
|
9
|
+
import tomllib
|
|
10
|
+
except ModuleNotFoundError: # Python 3.10 fallback
|
|
11
|
+
import tomli as tomllib
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class Config:
|
|
16
|
+
"""Runtime configuration resolved to absolute paths."""
|
|
17
|
+
|
|
18
|
+
root: Path
|
|
19
|
+
cards_root: Path
|
|
20
|
+
index: Path
|
|
21
|
+
primary_measures: list[str] = field(default_factory=list)
|
|
22
|
+
default_sort: str | None = None
|
|
23
|
+
packs: list[str] = field(default_factory=list)
|
|
24
|
+
config_path: Path | None = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def load_config(start: Path | str | None = None) -> Config:
|
|
28
|
+
"""Load koobi.toml by searching upward, or use defaults if absent."""
|
|
29
|
+
start_path = Path(start).resolve() if start is not None else Path.cwd().resolve()
|
|
30
|
+
config_path = _find_config_path(start_path)
|
|
31
|
+
if config_path is None:
|
|
32
|
+
base = start_path if start_path.is_dir() else start_path.parent
|
|
33
|
+
return Config(
|
|
34
|
+
root=base,
|
|
35
|
+
cards_root=(base / "cards").resolve(),
|
|
36
|
+
index=(base / ".koobi" / "index.duckdb").resolve(),
|
|
37
|
+
primary_measures=[],
|
|
38
|
+
default_sort=None,
|
|
39
|
+
packs=[],
|
|
40
|
+
config_path=None,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
data = tomllib.loads(config_path.read_text(encoding="utf-8"))
|
|
44
|
+
koobi_cfg = data.get("koobi", {})
|
|
45
|
+
display_cfg = data.get("display", {})
|
|
46
|
+
config_dir = config_path.parent
|
|
47
|
+
|
|
48
|
+
root = _resolve_config_path(config_dir, str(koobi_cfg.get("root", ".")))
|
|
49
|
+
cards_root = _resolve_config_path(config_dir, str(koobi_cfg.get("cards", "cards")))
|
|
50
|
+
index_path = _resolve_config_path(config_dir, str(koobi_cfg.get("index", ".koobi/index.duckdb")))
|
|
51
|
+
|
|
52
|
+
primary_measures = [str(v) for v in display_cfg.get("primary_measures", [])]
|
|
53
|
+
default_sort = display_cfg.get("default_sort")
|
|
54
|
+
packs = [str(v) for v in display_cfg.get("packs", [])]
|
|
55
|
+
|
|
56
|
+
return Config(
|
|
57
|
+
root=root,
|
|
58
|
+
cards_root=cards_root,
|
|
59
|
+
index=index_path,
|
|
60
|
+
primary_measures=primary_measures,
|
|
61
|
+
default_sort=str(default_sort) if default_sort is not None else None,
|
|
62
|
+
packs=packs,
|
|
63
|
+
config_path=config_path,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _find_config_path(start: Path) -> Path | None:
|
|
68
|
+
if start.is_file():
|
|
69
|
+
if start.name == "koobi.toml":
|
|
70
|
+
return start
|
|
71
|
+
current = start.parent
|
|
72
|
+
else:
|
|
73
|
+
current = start
|
|
74
|
+
|
|
75
|
+
for candidate_dir in [current, *current.parents]:
|
|
76
|
+
candidate = candidate_dir / "koobi.toml"
|
|
77
|
+
if candidate.exists():
|
|
78
|
+
return candidate.resolve()
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _resolve_config_path(base_dir: Path, raw_path: str) -> Path:
|
|
83
|
+
candidate = Path(raw_path)
|
|
84
|
+
if candidate.is_absolute():
|
|
85
|
+
return candidate.resolve()
|
|
86
|
+
return (base_dir / candidate).resolve()
|
koobi/index.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""DuckDB indexing and query helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import duckdb
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from .cards import iter_cards
|
|
13
|
+
from .config import Config
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def build(cfg: Config) -> Path:
|
|
17
|
+
"""Rebuild the DuckDB index from card files.
|
|
18
|
+
|
|
19
|
+
Uses ``CREATE OR REPLACE TABLE`` rather than deleting the database file so a
|
|
20
|
+
rebuild is safe even when another connection to the same path is still open
|
|
21
|
+
in-process (DuckDB caches one database instance per path). This is what makes
|
|
22
|
+
the app's "Reindex" action reliable.
|
|
23
|
+
"""
|
|
24
|
+
cfg.index.parent.mkdir(parents=True, exist_ok=True)
|
|
25
|
+
|
|
26
|
+
con = duckdb.connect(str(cfg.index))
|
|
27
|
+
try:
|
|
28
|
+
con.execute(
|
|
29
|
+
"""
|
|
30
|
+
CREATE OR REPLACE TABLE cards(
|
|
31
|
+
id TEXT PRIMARY KEY,
|
|
32
|
+
name TEXT,
|
|
33
|
+
type TEXT,
|
|
34
|
+
project TEXT,
|
|
35
|
+
status TEXT,
|
|
36
|
+
created DATE,
|
|
37
|
+
updated DATE,
|
|
38
|
+
body TEXT,
|
|
39
|
+
path TEXT,
|
|
40
|
+
tags JSON
|
|
41
|
+
)
|
|
42
|
+
"""
|
|
43
|
+
)
|
|
44
|
+
con.execute(
|
|
45
|
+
"""
|
|
46
|
+
CREATE OR REPLACE TABLE measures(
|
|
47
|
+
card_id TEXT,
|
|
48
|
+
key TEXT,
|
|
49
|
+
value DOUBLE
|
|
50
|
+
)
|
|
51
|
+
"""
|
|
52
|
+
)
|
|
53
|
+
con.execute(
|
|
54
|
+
"""
|
|
55
|
+
CREATE OR REPLACE TABLE series_registry(
|
|
56
|
+
card_id TEXT,
|
|
57
|
+
name TEXT,
|
|
58
|
+
path TEXT,
|
|
59
|
+
x TEXT,
|
|
60
|
+
y JSON
|
|
61
|
+
)
|
|
62
|
+
"""
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
for card in iter_cards(cfg.cards_root, loader_root=cfg.root):
|
|
66
|
+
con.execute("INSERT INTO cards VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", card.row())
|
|
67
|
+
for key, value in card.metrics.items():
|
|
68
|
+
con.execute("INSERT INTO measures VALUES (?, ?, ?)", [card.id, key, float(value)])
|
|
69
|
+
for series_name, ref in card.series.items():
|
|
70
|
+
series_path = str(ref.abs_path) if ref.abs_path is not None else ref.path
|
|
71
|
+
con.execute(
|
|
72
|
+
"INSERT INTO series_registry VALUES (?, ?, ?, ?, ?)",
|
|
73
|
+
[card.id, series_name, series_path, ref.x, json.dumps(ref.y)],
|
|
74
|
+
)
|
|
75
|
+
finally:
|
|
76
|
+
con.close()
|
|
77
|
+
return cfg.index
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def connect(cfg: Config) -> duckdb.DuckDBPyConnection:
|
|
81
|
+
"""Connect to index; build it first if needed."""
|
|
82
|
+
if not cfg.index.exists():
|
|
83
|
+
build(cfg)
|
|
84
|
+
return duckdb.connect(str(cfg.index))
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def query_cards(
|
|
88
|
+
con: duckdb.DuckDBPyConnection,
|
|
89
|
+
*,
|
|
90
|
+
project: str | None = None,
|
|
91
|
+
status: str | None = None,
|
|
92
|
+
search: str | None = None,
|
|
93
|
+
columns: list[str] | None = None,
|
|
94
|
+
) -> pd.DataFrame:
|
|
95
|
+
"""Return cards joined with wide-format measures."""
|
|
96
|
+
sql = (
|
|
97
|
+
"SELECT id, name, type, project, status, created, updated, body, path, tags "
|
|
98
|
+
"FROM cards WHERE 1=1"
|
|
99
|
+
)
|
|
100
|
+
params: list[Any] = []
|
|
101
|
+
if project:
|
|
102
|
+
sql += " AND project = ?"
|
|
103
|
+
params.append(project)
|
|
104
|
+
if status:
|
|
105
|
+
sql += " AND status = ?"
|
|
106
|
+
params.append(status)
|
|
107
|
+
if search:
|
|
108
|
+
sql += " AND (name ILIKE ? OR CAST(tags AS TEXT) ILIKE ?)"
|
|
109
|
+
term = f"%{search}%"
|
|
110
|
+
params.extend([term, term])
|
|
111
|
+
|
|
112
|
+
cards_df = con.execute(sql, params).fetchdf()
|
|
113
|
+
if cards_df.empty:
|
|
114
|
+
return cards_df
|
|
115
|
+
|
|
116
|
+
ids = cards_df["id"].tolist()
|
|
117
|
+
placeholders = ", ".join("?" for _ in ids)
|
|
118
|
+
measures_df = con.execute(
|
|
119
|
+
f"SELECT card_id, key, value FROM measures WHERE card_id IN ({placeholders})", ids
|
|
120
|
+
).fetchdf()
|
|
121
|
+
|
|
122
|
+
if not measures_df.empty:
|
|
123
|
+
wide = (
|
|
124
|
+
measures_df.pivot_table(index="card_id", columns="key", values="value", aggfunc="first")
|
|
125
|
+
.reset_index()
|
|
126
|
+
.rename(columns={"card_id": "id"})
|
|
127
|
+
)
|
|
128
|
+
result = cards_df.merge(wide, on="id", how="left")
|
|
129
|
+
else:
|
|
130
|
+
result = cards_df
|
|
131
|
+
|
|
132
|
+
if columns:
|
|
133
|
+
selected = [col for col in columns if col in result.columns]
|
|
134
|
+
return result[selected]
|
|
135
|
+
return result
|
koobi/schema.py
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
"""Card schema and Markdown frontmatter parsing."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from datetime import date
|
|
7
|
+
import json
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import yaml
|
|
12
|
+
|
|
13
|
+
RESERVED_KEYS = {
|
|
14
|
+
"id",
|
|
15
|
+
"name",
|
|
16
|
+
"type",
|
|
17
|
+
"project",
|
|
18
|
+
"status",
|
|
19
|
+
"created",
|
|
20
|
+
"updated",
|
|
21
|
+
"tags",
|
|
22
|
+
"metrics",
|
|
23
|
+
"params",
|
|
24
|
+
"artifacts",
|
|
25
|
+
"series",
|
|
26
|
+
"links",
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class SeriesRef:
|
|
32
|
+
"""Series metadata stored in a card frontmatter."""
|
|
33
|
+
|
|
34
|
+
path: str
|
|
35
|
+
x: str | None = None
|
|
36
|
+
y: list[str] = field(default_factory=list)
|
|
37
|
+
abs_path: Path | None = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class Card:
|
|
42
|
+
"""Canonical card representation."""
|
|
43
|
+
|
|
44
|
+
id: str
|
|
45
|
+
name: str
|
|
46
|
+
type: str = "model"
|
|
47
|
+
project: str = "default"
|
|
48
|
+
status: str = "idea"
|
|
49
|
+
created: date | None = None
|
|
50
|
+
updated: date | None = None
|
|
51
|
+
tags: list[str] = field(default_factory=list)
|
|
52
|
+
metrics: dict[str, float] = field(default_factory=dict)
|
|
53
|
+
params: dict[str, Any] = field(default_factory=dict)
|
|
54
|
+
artifacts: dict[str, str] = field(default_factory=dict)
|
|
55
|
+
series: dict[str, SeriesRef] = field(default_factory=dict)
|
|
56
|
+
sections: dict[str, Any] = field(default_factory=dict)
|
|
57
|
+
links: list[str] = field(default_factory=list)
|
|
58
|
+
body: str = ""
|
|
59
|
+
path: Path | None = None
|
|
60
|
+
key_order: list[str] = field(default_factory=list)
|
|
61
|
+
resolved_artifacts: dict[str, Path] = field(default_factory=dict)
|
|
62
|
+
|
|
63
|
+
def row(self) -> tuple[str, str, str, str, str, str | None, str | None, str, str, str]:
|
|
64
|
+
"""Return the exact cards-table tuple shape in technical design."""
|
|
65
|
+
if self.path is None:
|
|
66
|
+
raise ValueError("Card path is required to emit a cards row.")
|
|
67
|
+
return (
|
|
68
|
+
self.id,
|
|
69
|
+
self.name,
|
|
70
|
+
self.type,
|
|
71
|
+
self.project,
|
|
72
|
+
self.status,
|
|
73
|
+
_iso(self.created),
|
|
74
|
+
_iso(self.updated),
|
|
75
|
+
self.body,
|
|
76
|
+
str(self.path),
|
|
77
|
+
json.dumps(self.tags),
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def parse_card(text: str) -> Card:
|
|
82
|
+
"""Parse Markdown card text into a Card dataclass."""
|
|
83
|
+
metadata, body = _split_frontmatter(text)
|
|
84
|
+
key_order = list(metadata.keys())
|
|
85
|
+
|
|
86
|
+
series_map: dict[str, SeriesRef] = {}
|
|
87
|
+
raw_series = metadata.get("series") or {}
|
|
88
|
+
if isinstance(raw_series, dict):
|
|
89
|
+
for name, payload in raw_series.items():
|
|
90
|
+
if not isinstance(payload, dict):
|
|
91
|
+
continue
|
|
92
|
+
raw_y = payload.get("y") or []
|
|
93
|
+
y_values = [str(item) for item in raw_y] if isinstance(raw_y, list) else [str(raw_y)]
|
|
94
|
+
series_map[str(name)] = SeriesRef(
|
|
95
|
+
path=str(payload.get("path", "")),
|
|
96
|
+
x=str(payload["x"]) if payload.get("x") is not None else None,
|
|
97
|
+
y=y_values,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
raw_metrics = metadata.get("metrics") or {}
|
|
101
|
+
metrics: dict[str, float] = {}
|
|
102
|
+
if isinstance(raw_metrics, dict):
|
|
103
|
+
for metric_name, metric_value in raw_metrics.items():
|
|
104
|
+
metrics[str(metric_name)] = float(metric_value)
|
|
105
|
+
|
|
106
|
+
params = metadata.get("params") or {}
|
|
107
|
+
artifacts = metadata.get("artifacts") or {}
|
|
108
|
+
sections = {k: v for k, v in metadata.items() if k not in RESERVED_KEYS}
|
|
109
|
+
|
|
110
|
+
return Card(
|
|
111
|
+
id=str(metadata.get("id", "")),
|
|
112
|
+
name=str(metadata.get("name", "")),
|
|
113
|
+
type=str(metadata.get("type", "model")),
|
|
114
|
+
project=str(metadata.get("project", "default")),
|
|
115
|
+
status=str(metadata.get("status", "idea")),
|
|
116
|
+
created=_parse_date(metadata.get("created")),
|
|
117
|
+
updated=_parse_date(metadata.get("updated")),
|
|
118
|
+
tags=_coerce_str_list(metadata.get("tags", [])),
|
|
119
|
+
metrics=metrics,
|
|
120
|
+
params=params if isinstance(params, dict) else {},
|
|
121
|
+
artifacts={str(k): str(v) for k, v in artifacts.items()} if isinstance(artifacts, dict) else {},
|
|
122
|
+
series=series_map,
|
|
123
|
+
sections=sections,
|
|
124
|
+
links=_coerce_str_list(metadata.get("links", [])),
|
|
125
|
+
body=body,
|
|
126
|
+
key_order=key_order,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def dump_card(card: Card) -> str:
|
|
131
|
+
"""Serialize a Card into Markdown with YAML frontmatter."""
|
|
132
|
+
metadata = _serialize_card(card)
|
|
133
|
+
order = _metadata_order(card, metadata)
|
|
134
|
+
ordered_meta = {key: metadata[key] for key in order if key in metadata}
|
|
135
|
+
yaml_text = yaml.safe_dump(ordered_meta, sort_keys=False, default_flow_style=False).rstrip()
|
|
136
|
+
return f"---\n{yaml_text}\n---\n{card.body}"
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _split_frontmatter(text: str) -> tuple[dict[str, Any], str]:
|
|
140
|
+
lines = text.splitlines(keepends=True)
|
|
141
|
+
if not lines or lines[0].strip() != "---":
|
|
142
|
+
return {}, text
|
|
143
|
+
|
|
144
|
+
end_idx = None
|
|
145
|
+
for idx, line in enumerate(lines[1:], start=1):
|
|
146
|
+
if line.strip() == "---":
|
|
147
|
+
end_idx = idx
|
|
148
|
+
break
|
|
149
|
+
if end_idx is None:
|
|
150
|
+
raise ValueError("Malformed frontmatter: missing closing '---'.")
|
|
151
|
+
|
|
152
|
+
header = "".join(lines[1:end_idx])
|
|
153
|
+
body = "".join(lines[end_idx + 1 :])
|
|
154
|
+
loaded = yaml.safe_load(header) or {}
|
|
155
|
+
if not isinstance(loaded, dict):
|
|
156
|
+
raise ValueError("Card frontmatter must be a mapping.")
|
|
157
|
+
return loaded, body
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _serialize_card(card: Card) -> dict[str, Any]:
|
|
161
|
+
metadata: dict[str, Any] = {}
|
|
162
|
+
metadata["id"] = card.id
|
|
163
|
+
metadata["name"] = card.name
|
|
164
|
+
if "type" in card.key_order or card.type != "model":
|
|
165
|
+
metadata["type"] = card.type
|
|
166
|
+
if "project" in card.key_order or card.project != "default":
|
|
167
|
+
metadata["project"] = card.project
|
|
168
|
+
if "status" in card.key_order or card.status != "idea":
|
|
169
|
+
metadata["status"] = card.status
|
|
170
|
+
if "created" in card.key_order or card.created is not None:
|
|
171
|
+
metadata["created"] = _iso(card.created)
|
|
172
|
+
if "updated" in card.key_order or card.updated is not None:
|
|
173
|
+
metadata["updated"] = _iso(card.updated)
|
|
174
|
+
if "tags" in card.key_order or card.tags:
|
|
175
|
+
metadata["tags"] = list(card.tags)
|
|
176
|
+
if "metrics" in card.key_order or card.metrics:
|
|
177
|
+
metadata["metrics"] = {k: float(v) for k, v in card.metrics.items()}
|
|
178
|
+
if "params" in card.key_order or card.params:
|
|
179
|
+
metadata["params"] = card.params
|
|
180
|
+
if "artifacts" in card.key_order or card.artifacts:
|
|
181
|
+
metadata["artifacts"] = {k: str(v) for k, v in card.artifacts.items()}
|
|
182
|
+
if "series" in card.key_order or card.series:
|
|
183
|
+
metadata["series"] = {
|
|
184
|
+
name: {
|
|
185
|
+
"path": ref.path,
|
|
186
|
+
"x": ref.x,
|
|
187
|
+
"y": list(ref.y),
|
|
188
|
+
}
|
|
189
|
+
for name, ref in card.series.items()
|
|
190
|
+
}
|
|
191
|
+
if "links" in card.key_order or card.links:
|
|
192
|
+
metadata["links"] = list(card.links)
|
|
193
|
+
for section_key, section_value in card.sections.items():
|
|
194
|
+
metadata[section_key] = section_value
|
|
195
|
+
return metadata
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _metadata_order(card: Card, metadata: dict[str, Any]) -> list[str]:
|
|
199
|
+
default_order = [
|
|
200
|
+
"id",
|
|
201
|
+
"name",
|
|
202
|
+
"type",
|
|
203
|
+
"project",
|
|
204
|
+
"status",
|
|
205
|
+
"created",
|
|
206
|
+
"updated",
|
|
207
|
+
"tags",
|
|
208
|
+
"metrics",
|
|
209
|
+
"params",
|
|
210
|
+
"artifacts",
|
|
211
|
+
"series",
|
|
212
|
+
"links",
|
|
213
|
+
]
|
|
214
|
+
seen: set[str] = set()
|
|
215
|
+
order: list[str] = []
|
|
216
|
+
|
|
217
|
+
for key in card.key_order or default_order:
|
|
218
|
+
if key in metadata and key not in seen:
|
|
219
|
+
seen.add(key)
|
|
220
|
+
order.append(key)
|
|
221
|
+
|
|
222
|
+
for key in metadata.keys():
|
|
223
|
+
if key not in seen:
|
|
224
|
+
seen.add(key)
|
|
225
|
+
order.append(key)
|
|
226
|
+
|
|
227
|
+
return order
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _parse_date(value: Any) -> date | None:
|
|
231
|
+
if value is None:
|
|
232
|
+
return None
|
|
233
|
+
if isinstance(value, date):
|
|
234
|
+
return value
|
|
235
|
+
return date.fromisoformat(str(value))
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _iso(value: date | None) -> str | None:
|
|
239
|
+
if value is None:
|
|
240
|
+
return None
|
|
241
|
+
return value.isoformat()
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _coerce_str_list(value: Any) -> list[str]:
|
|
245
|
+
if value is None:
|
|
246
|
+
return []
|
|
247
|
+
if isinstance(value, list):
|
|
248
|
+
return [str(item) for item in value]
|
|
249
|
+
return [str(value)]
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: koobi
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: File-native model and experiment registry
|
|
5
|
+
Author-email: blu3c0ral <blu3c0ral@protonmail.ch>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/blu3c0ral/koobi
|
|
8
|
+
Project-URL: Repository, https://github.com/blu3c0ral/koobi
|
|
9
|
+
Project-URL: Issues, https://github.com/blu3c0ral/koobi/issues
|
|
10
|
+
Keywords: experiment-tracking,model-registry,marimo,duckdb,markdown
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: altair
|
|
24
|
+
Requires-Dist: duckdb
|
|
25
|
+
Requires-Dist: marimo
|
|
26
|
+
Requires-Dist: pandas
|
|
27
|
+
Requires-Dist: pyarrow
|
|
28
|
+
Requires-Dist: python-frontmatter
|
|
29
|
+
Requires-Dist: tomli; python_version < "3.11"
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: pytest; extra == "dev"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# Koobi
|
|
35
|
+
|
|
36
|
+
<p align="center">
|
|
37
|
+
<img src=".github/assets/koobi_icon.png" alt="Koobi project icon" width="180" />
|
|
38
|
+
</p>
|
|
39
|
+
|
|
40
|
+
File-native model and experiment registry for local workflows.
|
|
41
|
+
|
|
42
|
+
Koobi stores experiment metadata as plain Markdown cards, builds a disposable DuckDB index, and serves an interactive Marimo UI for browsing, filtering, visualizing, and editing cards. The source of truth is always your files.
|
|
43
|
+
|
|
44
|
+
## Why Koobi
|
|
45
|
+
|
|
46
|
+
Most experiment trackers assume a hosted service and database-backed runs. Koobi is designed for local-first users who want:
|
|
47
|
+
|
|
48
|
+
- readable, versionable records in Git
|
|
49
|
+
- zero-server operation
|
|
50
|
+
- domain-agnostic metrics and parameters
|
|
51
|
+
- workflows friendly to both humans and coding agents
|
|
52
|
+
|
|
53
|
+
## Current MVP Capabilities
|
|
54
|
+
|
|
55
|
+
- Markdown card parsing and serialization with preserved body/key ordering
|
|
56
|
+
- additive card updates (only targeted fields are rewritten)
|
|
57
|
+
- DuckDB index build with:
|
|
58
|
+
- `cards` table (metadata + body)
|
|
59
|
+
- `measures` table (long-format metrics)
|
|
60
|
+
- `series_registry` table (series pointers)
|
|
61
|
+
- Marimo app with:
|
|
62
|
+
- project/status/search filters
|
|
63
|
+
- model table view
|
|
64
|
+
- card detail view (metrics/params/tags/body)
|
|
65
|
+
- edit form (name/status/tags/body) with write-back
|
|
66
|
+
- one measure bar chart
|
|
67
|
+
- CLI commands:
|
|
68
|
+
- `koobi init`
|
|
69
|
+
- `koobi new`
|
|
70
|
+
- `koobi index`
|
|
71
|
+
- `koobi ui`
|
|
72
|
+
|
|
73
|
+
## Architecture Overview
|
|
74
|
+
|
|
75
|
+
```text
|
|
76
|
+
cards/*.md --> index.duckdb --> Marimo app
|
|
77
|
+
^ |
|
|
78
|
+
|---------- edit/write-back -------|
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
- Cards are authoritative.
|
|
82
|
+
- Index is derived and disposable.
|
|
83
|
+
- UI reads from index and writes edits back to cards.
|
|
84
|
+
|
|
85
|
+
## Installation
|
|
86
|
+
|
|
87
|
+
Install into any project or environment directly from Git:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
pip install git+https://github.com/blu3c0ral/koobi.git
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
This installs the `koobi` command and the importable `koobi` package.
|
|
94
|
+
|
|
95
|
+
For local development on Koobi itself, install editable with dev extras:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
python3 -m venv .venv
|
|
99
|
+
source .venv/bin/activate
|
|
100
|
+
python -m pip install -e ".[dev]"
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Quickstart
|
|
104
|
+
|
|
105
|
+
Koobi discovers its configuration automatically: every command searches the
|
|
106
|
+
current directory and its parents for a `koobi.toml`. Work from inside your
|
|
107
|
+
workspace and no `--config` flag is needed.
|
|
108
|
+
|
|
109
|
+
### 1) Create (or enter) a workspace
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
koobi init --path my-registry
|
|
113
|
+
cd my-registry
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Or use the bundled demo workspace:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
python examples/local_example_to_cards.py --overwrite
|
|
120
|
+
cd examples/local
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### 2) Build the index
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
koobi index
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### 3) Launch the UI
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
koobi ui
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
The UI opens at http://localhost:2718.
|
|
136
|
+
|
|
137
|
+
### Running from outside the workspace
|
|
138
|
+
|
|
139
|
+
If you prefer not to `cd`, point Koobi at the config explicitly or via an
|
|
140
|
+
environment variable (precedence: `--config` > `KOOBI_CONFIG` > current dir):
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
koobi ui --config examples/local/koobi.toml
|
|
144
|
+
# or
|
|
145
|
+
export KOOBI_CONFIG=examples/local/koobi.toml
|
|
146
|
+
koobi ui
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## CLI Reference
|
|
150
|
+
|
|
151
|
+
Initialize a new workspace:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
koobi init --path /path/to/workspace
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
Create a new card (run from inside the workspace):
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
koobi new demo_project.my_model
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Rebuild index:
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
koobi index
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Run UI:
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
koobi ui
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## Use as a Library
|
|
176
|
+
|
|
177
|
+
Koobi's core is importable, so other projects and coding agents can read,
|
|
178
|
+
query, and write cards programmatically:
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
from koobi import load_config, build, connect, query_cards
|
|
182
|
+
from koobi import Card, load_card, save_card
|
|
183
|
+
|
|
184
|
+
# Resolve config by searching upward from a path (file or directory)
|
|
185
|
+
cfg = load_config("path/to/workspace")
|
|
186
|
+
|
|
187
|
+
# Build the disposable DuckDB index from the Markdown cards
|
|
188
|
+
build(cfg)
|
|
189
|
+
|
|
190
|
+
# Query cards joined with wide-format measures
|
|
191
|
+
con = connect(cfg)
|
|
192
|
+
df = query_cards(con, project="demo_project", status="active")
|
|
193
|
+
|
|
194
|
+
# Read, mutate, and write back a single card (only targeted fields change)
|
|
195
|
+
card = load_card("path/to/workspace/cards/demo_project.my_model.md")
|
|
196
|
+
card.status = "shipped"
|
|
197
|
+
save_card(card, fields=["status"])
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
The full public API is exported from the top-level `koobi` package; see
|
|
201
|
+
`koobi.__all__`.
|
|
202
|
+
|
|
203
|
+
## Repository Layout
|
|
204
|
+
|
|
205
|
+
```text
|
|
206
|
+
src/koobi/ # core package (schema, cards, config, index, charts, app, cli)
|
|
207
|
+
tests/ # unit tests
|
|
208
|
+
examples/local/ # self-contained demo workspace
|
|
209
|
+
examples/local_example_to_cards.py
|
|
210
|
+
docs/ # product, technical, and future feature docs
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
## Development
|
|
214
|
+
|
|
215
|
+
Run tests:
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
pytest
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
Recommended pre-PR checklist:
|
|
222
|
+
|
|
223
|
+
- run `pytest`
|
|
224
|
+
- regenerate local example cards if example inputs changed
|
|
225
|
+
- verify `koobi index` and `koobi ui` against `examples/local/koobi.toml`
|
|
226
|
+
|
|
227
|
+
## Documentation
|
|
228
|
+
|
|
229
|
+
- [`docs/product_design.md`](docs/product_design.md) - product framing and roadmap
|
|
230
|
+
- [`docs/technical_design.md`](docs/technical_design.md) - architecture and data contracts
|
|
231
|
+
- [`docs/future_features.md`](docs/future_features.md) - planned automation and extensions
|
|
232
|
+
|
|
233
|
+
## Status
|
|
234
|
+
|
|
235
|
+
Active MVP implementation with local example workflow included. Phase 0 focus is stability of file contracts, indexing, and interactive editing.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
koobi/__init__.py,sha256=nD7-cGqDqO7hpOHzARpsYwbtZp3IAoCyAiOpYcEKkzo,846
|
|
2
|
+
koobi/app.py,sha256=bM_AspqkZ5u02at_5VvX5wGKsrTt11MWT74c7NVyxM0,6220
|
|
3
|
+
koobi/cards.py,sha256=wqLLxqwFvozULj0hdYVQ-vUvM5AyvkelASNjh2Cyg9Y,3219
|
|
4
|
+
koobi/charts.py,sha256=llbl2Oc_qheQeMSqXHl-ds2tEZksHesWmnRXHZGA7rE,927
|
|
5
|
+
koobi/cli.py,sha256=2v8Y7oLIoLWxwHVqISZEvaYUEsviuGl6aGpYPEJ81HU,5425
|
|
6
|
+
koobi/config.py,sha256=K9tV2rfdrt1r6uQqOqblwemWp-fFzOpDRumrzyN1ysU,2770
|
|
7
|
+
koobi/index.py,sha256=JOr_y712Xa3Ap0uEXLLo6G_AmfSWpzCJ7PFJkvILWKY,4038
|
|
8
|
+
koobi/schema.py,sha256=AYnqwPpbXO77oL64XEsURZ-ZtbvV2e8Em8cSBX9_3p4,7844
|
|
9
|
+
koobi-0.0.1.dist-info/licenses/LICENSE,sha256=DFNTqoZ1aQLmIomIs7YhzhiZvMrGKkh-Ri7Mq08hx00,1066
|
|
10
|
+
koobi-0.0.1.dist-info/METADATA,sha256=n9oNgUD-xWPzWO9QbW3lR8-Nxuj_ZO8wmP3h-3aYOZw,6193
|
|
11
|
+
koobi-0.0.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
12
|
+
koobi-0.0.1.dist-info/entry_points.txt,sha256=12qqWPg-YGv98DslZJXliiRnmWG-Qk_q5KWzcdZ39hQ,41
|
|
13
|
+
koobi-0.0.1.dist-info/top_level.txt,sha256=id_nUg0qDMmJBPQtLsyswEsJ-EGh5VllDg1xq2fSJ4k,6
|
|
14
|
+
koobi-0.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 blu3c0ral
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
koobi
|