minima-cli 0.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minima/__init__.py +5 -0
- minima/api/__init__.py +1 -0
- minima/api/auth.py +39 -0
- minima/api/errors.py +40 -0
- minima/api/routers/__init__.py +1 -0
- minima/api/routers/calibration.py +50 -0
- minima/api/routers/feedback.py +279 -0
- minima/api/routers/health.py +50 -0
- minima/api/routers/models.py +42 -0
- minima/api/routers/recommend.py +66 -0
- minima/api/routers/savings.py +55 -0
- minima/api/routers/strategies.py +33 -0
- minima/catalog/__init__.py +1 -0
- minima/catalog/data/capability_priors.json +210 -0
- minima/catalog/data/model_aliases.json +12 -0
- minima/catalog/merge.py +69 -0
- minima/catalog/refresh.py +54 -0
- minima/catalog/sources/__init__.py +1 -0
- minima/catalog/sources/litellm.py +19 -0
- minima/catalog/sources/openrouter.py +25 -0
- minima/catalog/store.py +86 -0
- minima/config.py +288 -0
- minima/deps.py +35 -0
- minima/llm/__init__.py +1 -0
- minima/llm/anthropic.py +106 -0
- minima/llm/base.py +196 -0
- minima/llm/gemini.py +124 -0
- minima/llm/registry.py +54 -0
- minima/logging.py +28 -0
- minima/main.py +109 -0
- minima/memory/__init__.py +1 -0
- minima/memory/adapter.py +572 -0
- minima/memory/keys.py +83 -0
- minima/memory/records.py +190 -0
- minima/memory/threadpool.py +41 -0
- minima/metrics/__init__.py +1 -0
- minima/metrics/calibration.py +415 -0
- minima/metrics/report.py +116 -0
- minima/metrics/savings.py +98 -0
- minima/recommender/__init__.py +1 -0
- minima/recommender/_pg_pool.py +38 -0
- minima/recommender/_redis_client.py +32 -0
- minima/recommender/aggregate.py +157 -0
- minima/recommender/classify.py +165 -0
- minima/recommender/decisionlog.py +505 -0
- minima/recommender/durablerefs.py +312 -0
- minima/recommender/engine.py +997 -0
- minima/recommender/escalation.py +83 -0
- minima/recommender/propensity.py +189 -0
- minima/recommender/recstore.py +368 -0
- minima/recommender/score.py +318 -0
- minima/recommender/types.py +166 -0
- minima/schemas/__init__.py +1 -0
- minima/schemas/common.py +73 -0
- minima/schemas/feedback.py +34 -0
- minima/schemas/models_catalog.py +36 -0
- minima/schemas/recommend.py +104 -0
- minima/schemas/savings.py +39 -0
- minima/schemas/strategies.py +57 -0
- minima/schemas/workflow.py +43 -0
- minima/seeding/__init__.py +1 -0
- minima/seeding/items.py +42 -0
- minima/seeding/llmrouterbench.py +232 -0
- minima/seeding/routerbench.py +141 -0
- minima/seeding/run_seed.py +56 -0
- minima/seeding/synthetic.py +70 -0
- minima/tenancy/__init__.py +8 -0
- minima/tenancy/context.py +37 -0
- minima/tenancy/passthrough.py +110 -0
- minima/version.py +3 -0
- minima_cli-0.4.9.dist-info/METADATA +275 -0
- minima_cli-0.4.9.dist-info/RECORD +161 -0
- minima_cli-0.4.9.dist-info/WHEEL +4 -0
- minima_cli-0.4.9.dist-info/entry_points.txt +5 -0
- minima_cli-0.4.9.dist-info/licenses/LICENSE +295 -0
- minima_client/__init__.py +19 -0
- minima_client/autocapture.py +101 -0
- minima_client/client.py +301 -0
- minima_client/errors.py +23 -0
- minima_harness/LICENSE_PI +32 -0
- minima_harness/__init__.py +16 -0
- minima_harness/agent/__init__.py +72 -0
- minima_harness/agent/agent.py +276 -0
- minima_harness/agent/events.py +124 -0
- minima_harness/agent/loop.py +311 -0
- minima_harness/agent/state.py +79 -0
- minima_harness/agent/tools.py +97 -0
- minima_harness/ai/__init__.py +66 -0
- minima_harness/ai/compat.py +71 -0
- minima_harness/ai/errors.py +96 -0
- minima_harness/ai/events.py +117 -0
- minima_harness/ai/openrouter_catalog.py +153 -0
- minima_harness/ai/provider_catalog.py +299 -0
- minima_harness/ai/provider_quirks.py +37 -0
- minima_harness/ai/providers/__init__.py +75 -0
- minima_harness/ai/providers/_common.py +48 -0
- minima_harness/ai/providers/anthropic.py +290 -0
- minima_harness/ai/providers/base.py +65 -0
- minima_harness/ai/providers/faux.py +173 -0
- minima_harness/ai/providers/google.py +221 -0
- minima_harness/ai/providers/openai_compat.py +278 -0
- minima_harness/ai/registry.py +184 -0
- minima_harness/ai/stream.py +82 -0
- minima_harness/ai/tools.py +51 -0
- minima_harness/ai/types.py +204 -0
- minima_harness/ai/usage.py +41 -0
- minima_harness/minima/__init__.py +40 -0
- minima_harness/minima/cache.py +102 -0
- minima_harness/minima/config.py +85 -0
- minima_harness/minima/goals.py +226 -0
- minima_harness/minima/judge.py +144 -0
- minima_harness/minima/mapping.py +147 -0
- minima_harness/minima/meter.py +143 -0
- minima_harness/minima/router.py +220 -0
- minima_harness/minima/runtime.py +544 -0
- minima_harness/minima/signals.py +195 -0
- minima_harness/session/__init__.py +14 -0
- minima_harness/session/format.py +35 -0
- minima_harness/session/store.py +236 -0
- minima_harness/tasks/__init__.py +17 -0
- minima_harness/tasks/task_set.py +78 -0
- minima_harness/tools/__init__.py +7 -0
- minima_harness/tools/_io.py +34 -0
- minima_harness/tools/bash.py +70 -0
- minima_harness/tools/builtin.py +23 -0
- minima_harness/tools/edit.py +50 -0
- minima_harness/tools/find.py +38 -0
- minima_harness/tools/grep.py +73 -0
- minima_harness/tools/ls.py +35 -0
- minima_harness/tools/read.py +38 -0
- minima_harness/tools/tasks.py +75 -0
- minima_harness/tools/write.py +36 -0
- minima_harness/tui/__init__.py +3 -0
- minima_harness/tui/analytics.py +111 -0
- minima_harness/tui/app.py +1927 -0
- minima_harness/tui/bridge.py +103 -0
- minima_harness/tui/cli.py +227 -0
- minima_harness/tui/clipboard.py +60 -0
- minima_harness/tui/commands.py +49 -0
- minima_harness/tui/compaction.py +17 -0
- minima_harness/tui/config_cli.py +141 -0
- minima_harness/tui/config_store.py +237 -0
- minima_harness/tui/context.py +93 -0
- minima_harness/tui/customize.py +95 -0
- minima_harness/tui/diff.py +53 -0
- minima_harness/tui/editor.py +43 -0
- minima_harness/tui/extensions.py +84 -0
- minima_harness/tui/extra_models.py +52 -0
- minima_harness/tui/history.py +71 -0
- minima_harness/tui/mubit.py +295 -0
- minima_harness/tui/overlays.py +593 -0
- minima_harness/tui/packages.py +59 -0
- minima_harness/tui/run_modes.py +66 -0
- minima_harness/tui/theme.py +77 -0
- minima_harness/tui/welcome.py +83 -0
- minima_harness/tui/widgets/__init__.py +3 -0
- minima_harness/tui/widgets/banner.py +38 -0
- minima_harness/tui/widgets/editor.py +83 -0
- minima_harness/tui/widgets/footer.py +73 -0
- minima_harness/tui/widgets/messages.py +151 -0
- minima_harness/tui/widgets/status.py +57 -0
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"""Code-quality signal extraction for code-aware routing (Phase B, the wedge).
|
|
2
|
+
|
|
3
|
+
Minima's recall is otherwise text-similarity-based. Extracting lightweight, code-quality
|
|
4
|
+
signals from the files a task touches and feeding them as ``tags`` / ``difficulty`` /
|
|
5
|
+
``expected_input_tokens`` into ``recommend`` makes routing *code-aware* — the Triage-style
|
|
6
|
+
wedge (route by CodeHealth + file metadata, not just prompt text).
|
|
7
|
+
|
|
8
|
+
The default :class:`CodeHealthExtractor` is language-agnostic and dependency-free: a proxy
|
|
9
|
+
McCabe (decision-keyword count), non-blank LOC, and sibling-test-file detection. It's a
|
|
10
|
+
deliberately rough signal — precise, per-language complexity (radon/tree-sitter) can plug
|
|
11
|
+
into the same :class:`ContextExtractor` protocol later. The discrimination gate
|
|
12
|
+
(``tests/harness/test_signals.py``) is the falsifiable check that this signal separates
|
|
13
|
+
task tiers at all; if it can't, code-aware routing isn't ready and we pivot to memory/cost.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import logging
|
|
19
|
+
import re
|
|
20
|
+
from collections.abc import Awaitable, Callable
|
|
21
|
+
from dataclasses import dataclass, field
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Protocol, runtime_checkable
|
|
24
|
+
|
|
25
|
+
_log = logging.getLogger("minima_harness.signals")
|
|
26
|
+
|
|
27
|
+
# Decision keywords across common languages (each ~ one McCabe branch). `and`/`or` cover
|
|
28
|
+
# Python/Ruby/JS boolean ops; `&&`/`||` cover C-family. Word-boundary matches keep false
|
|
29
|
+
# hits low (an identifier named "format" won't match "for").
|
|
30
|
+
_DECISION_RE = re.compile(r"\b(if|elif|for|while|case|catch|except|switch|and|or|not)\b|&&|\|\|")
|
|
31
|
+
|
|
32
|
+
# Files that look like tests (name conventions across languages).
|
|
33
|
+
_TEST_NAME_RE = re.compile(
|
|
34
|
+
r"(^|[/_])(test_|_test\.|[a-z0-9_]+_test\.)|(tests?/)|(_spec\.|spec/)", re.IGNORECASE
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Approx tokens per non-blank source line (code is denser than prose).
|
|
38
|
+
_TOKENS_PER_LOC = 10
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(slots=True)
|
|
42
|
+
class FileHealth:
|
|
43
|
+
path: str
|
|
44
|
+
loc: int
|
|
45
|
+
complexity: int
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(slots=True)
|
|
49
|
+
class SignalBundle:
|
|
50
|
+
"""The enrichment handed to ``router.recommend``."""
|
|
51
|
+
|
|
52
|
+
tags: list[str] = field(default_factory=list)
|
|
53
|
+
difficulty: str | None = None
|
|
54
|
+
expected_input_tokens: int | None = None
|
|
55
|
+
# Raw signals (inspection / logging / the discrimination gate):
|
|
56
|
+
files: int = 0
|
|
57
|
+
total_loc: int = 0
|
|
58
|
+
max_file_loc: int = 0
|
|
59
|
+
total_complexity: int = 0
|
|
60
|
+
max_complexity: int = 0
|
|
61
|
+
avg_complexity: float = 0.0
|
|
62
|
+
has_tests: bool = False
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@runtime_checkable
|
|
66
|
+
class ContextExtractor(Protocol):
|
|
67
|
+
async def extract(self, task: str, files: list[Path]) -> SignalBundle:
|
|
68
|
+
"""Compute code-quality signals for ``files`` to enrich a recommendation."""
|
|
69
|
+
...
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# extract() may be sync or async; normalize so callers can ``await`` either.
|
|
73
|
+
ExtractFn = Callable[[str, list[Path]], Awaitable[SignalBundle] | SignalBundle]
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _band(value: float, lo: float, hi: float) -> str:
|
|
77
|
+
if value <= lo:
|
|
78
|
+
return "low"
|
|
79
|
+
if value <= hi:
|
|
80
|
+
return "med"
|
|
81
|
+
return "high"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class CodeHealthExtractor:
|
|
85
|
+
"""Language-agnostic heuristic extractor (proxy McCabe + LOC + sibling tests)."""
|
|
86
|
+
|
|
87
|
+
def __init__(self, *, tokens_per_loc: int = _TOKENS_PER_LOC) -> None:
|
|
88
|
+
self._tokens_per_loc = tokens_per_loc
|
|
89
|
+
|
|
90
|
+
async def extract(self, task: str, files: list[Path]) -> SignalBundle:
|
|
91
|
+
per_file: list[FileHealth] = []
|
|
92
|
+
test_files: set[Path] = set()
|
|
93
|
+
all_paths = {f.resolve() for f in files}
|
|
94
|
+
for f in files:
|
|
95
|
+
try:
|
|
96
|
+
text = Path(f).read_text(encoding="utf-8", errors="replace")
|
|
97
|
+
except OSError as exc:
|
|
98
|
+
_log.debug("signal_skip_unreadable path=%s err=%s", f, exc)
|
|
99
|
+
continue
|
|
100
|
+
name = Path(f).name
|
|
101
|
+
is_test = bool(_TEST_NAME_RE.search(name)) or name.startswith("test")
|
|
102
|
+
fh = FileHealth(path=str(f), loc=_loc(text), complexity=_decisions(text))
|
|
103
|
+
if is_test:
|
|
104
|
+
test_files.add(Path(f).resolve())
|
|
105
|
+
per_file.append(fh)
|
|
106
|
+
|
|
107
|
+
source = [h for h in per_file if Path(h.path).resolve() not in test_files]
|
|
108
|
+
if not source:
|
|
109
|
+
source = per_file # a pure test-edit task still gets signals
|
|
110
|
+
|
|
111
|
+
bundle = SignalBundle(
|
|
112
|
+
files=len(per_file),
|
|
113
|
+
total_loc=sum(h.loc for h in source),
|
|
114
|
+
max_file_loc=max((h.loc for h in source), default=0),
|
|
115
|
+
total_complexity=sum(h.complexity for h in source),
|
|
116
|
+
max_complexity=max((h.complexity for h in source), default=0),
|
|
117
|
+
has_tests=_has_tests(source, all_paths),
|
|
118
|
+
)
|
|
119
|
+
n = len(source) or 1
|
|
120
|
+
bundle.avg_complexity = bundle.total_complexity / n
|
|
121
|
+
bundle.difficulty = _difficulty(bundle)
|
|
122
|
+
bundle.expected_input_tokens = bundle.total_loc * self._tokens_per_loc
|
|
123
|
+
bundle.tags = _tags(bundle)
|
|
124
|
+
return bundle
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _loc(text: str) -> int:
|
|
128
|
+
return sum(1 for line in text.splitlines() if line.strip())
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _decisions(text: str) -> int:
|
|
132
|
+
return len(_DECISION_RE.findall(text))
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _has_tests(source: list[FileHealth], all_paths: set[Path]) -> bool:
|
|
136
|
+
# A source file "has tests" if a test file is in the provided set, or a sibling test
|
|
137
|
+
# file exists on disk (test_<stem>.py / <stem>_test.py|go conventions).
|
|
138
|
+
if any(p for p in all_paths if _TEST_NAME_RE.search(p.name) or p.name.startswith("test")):
|
|
139
|
+
return True
|
|
140
|
+
for h in source:
|
|
141
|
+
stem = Path(h.path).stem
|
|
142
|
+
parent = Path(h.path).parent
|
|
143
|
+
for candidate in (
|
|
144
|
+
parent / f"test_{stem}.py",
|
|
145
|
+
parent / f"{stem}_test.py",
|
|
146
|
+
parent / f"{stem}_test.go",
|
|
147
|
+
parent / f"test_{stem}.go",
|
|
148
|
+
):
|
|
149
|
+
if candidate.resolve() in all_paths or candidate.exists():
|
|
150
|
+
return True
|
|
151
|
+
return False
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _difficulty(b: SignalBundle) -> str:
|
|
155
|
+
if b.max_file_loc > 800 or b.avg_complexity > 30:
|
|
156
|
+
return "expert"
|
|
157
|
+
if (
|
|
158
|
+
b.max_complexity >= 15
|
|
159
|
+
or b.max_file_loc > 400
|
|
160
|
+
or (not b.has_tests and b.avg_complexity > 10)
|
|
161
|
+
):
|
|
162
|
+
return "hard"
|
|
163
|
+
if b.avg_complexity >= 5 or b.max_file_loc > 150:
|
|
164
|
+
return "medium"
|
|
165
|
+
if b.total_loc > 20:
|
|
166
|
+
return "easy"
|
|
167
|
+
return "trivial"
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _tags(b: SignalBundle) -> list[str]:
|
|
171
|
+
return [
|
|
172
|
+
f"complexity:{_band(b.avg_complexity, 5, 15)}",
|
|
173
|
+
f"loc:{_band(float(b.max_file_loc), 150, 400)}",
|
|
174
|
+
"has_tests" if b.has_tests else "no_tests",
|
|
175
|
+
f"files:{b.files}",
|
|
176
|
+
]
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
async def extract_or_none(
|
|
180
|
+
extractor: ContextExtractor | None, task: str, files: list[Path] | None
|
|
181
|
+
) -> SignalBundle | None:
|
|
182
|
+
"""Run ``extractor`` if configured and files were provided; None otherwise.
|
|
183
|
+
|
|
184
|
+
Never raises — a broken extractor must not block routing (fall back to text-only).
|
|
185
|
+
"""
|
|
186
|
+
if extractor is None or not files:
|
|
187
|
+
return None
|
|
188
|
+
try:
|
|
189
|
+
result = extractor.extract(task, list(files))
|
|
190
|
+
if hasattr(result, "__await__"):
|
|
191
|
+
result = await result # type: ignore[assignment]
|
|
192
|
+
return result # type: ignore[return-value]
|
|
193
|
+
except Exception: # noqa: BLE001
|
|
194
|
+
_log.warning("signal_extraction_failed", exc_info=True)
|
|
195
|
+
return None
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Tree-structured JSONL session store (a port of PI's session model)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from minima_harness.session.format import EntryType, SessionEntry
|
|
6
|
+
from minima_harness.session.store import SessionManager, SessionStore, SessionSummary
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"EntryType",
|
|
10
|
+
"SessionEntry",
|
|
11
|
+
"SessionManager",
|
|
12
|
+
"SessionStore",
|
|
13
|
+
"SessionSummary",
|
|
14
|
+
]
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
import uuid
|
|
5
|
+
from enum import StrEnum
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class EntryType(StrEnum):
|
|
11
|
+
USER = "user"
|
|
12
|
+
ASSISTANT = "assistant"
|
|
13
|
+
TOOL = "tool"
|
|
14
|
+
SYSTEM = "system"
|
|
15
|
+
GOAL = "goal" # serialized Goal snapshot (latest wins); powers /goals across resume
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def new_id() -> str:
|
|
19
|
+
"""A short, unique entry/session id (first 12 hex chars of uuid4)."""
|
|
20
|
+
return uuid.uuid4().hex[:12]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def now_ts() -> float:
|
|
24
|
+
return time.time()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SessionEntry(BaseModel):
|
|
28
|
+
"""One node in the session tree. Append-only; never mutated once written."""
|
|
29
|
+
|
|
30
|
+
id: str
|
|
31
|
+
parent_id: str | None = None
|
|
32
|
+
type: EntryType
|
|
33
|
+
ts: float = Field(default_factory=now_ts)
|
|
34
|
+
payload: dict
|
|
35
|
+
label: str | None = None # optional bookmark label (for /tree)
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import time
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from minima_harness.session.format import EntryType, SessionEntry, new_id
|
|
11
|
+
|
|
12
|
+
_log = logging.getLogger("minima_harness.session")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def format_age(ts: float, now: float | None = None) -> str:
|
|
16
|
+
"""Compact relative age for a timestamp: ``just now`` / ``5m ago`` / ``2h ago`` /
|
|
17
|
+
``3d ago`` / ``5w ago``. Returns ``?`` when ``ts`` is missing/non-positive."""
|
|
18
|
+
if not ts or ts <= 0:
|
|
19
|
+
return "?"
|
|
20
|
+
now = time.time() if now is None else now
|
|
21
|
+
delta = max(0.0, now - ts)
|
|
22
|
+
if delta < 60:
|
|
23
|
+
return "just now"
|
|
24
|
+
if delta < 3600:
|
|
25
|
+
return f"{int(delta // 60)}m ago"
|
|
26
|
+
if delta < 86400:
|
|
27
|
+
return f"{int(delta // 3600)}h ago"
|
|
28
|
+
if delta < 86400 * 7:
|
|
29
|
+
return f"{int(delta // 86400)}d ago"
|
|
30
|
+
return f"{int(delta // (86400 * 7))}w ago"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SessionStore:
|
|
34
|
+
"""Append-only JSONL session tree. File-backed or in-memory (``--no-session``)."""
|
|
35
|
+
|
|
36
|
+
def __init__(self, path: Path | None, *, display_name: str | None = None) -> None:
|
|
37
|
+
self._path = path
|
|
38
|
+
self._mem: list[SessionEntry] = []
|
|
39
|
+
self._tip: str | None = None
|
|
40
|
+
self.display_name = display_name
|
|
41
|
+
if path is not None and path.exists():
|
|
42
|
+
self._reload()
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def file_backed(cls, path: Path, *, display_name: str | None = None) -> SessionStore:
|
|
46
|
+
return cls(path, display_name=display_name)
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def in_memory(cls) -> SessionStore:
|
|
50
|
+
return cls(None)
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def path(self) -> Path | None:
|
|
54
|
+
return self._path
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def persistent(self) -> bool:
|
|
58
|
+
return self._path is not None
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def entries(self) -> list[SessionEntry]:
|
|
62
|
+
return list(self._mem)
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def tip(self) -> str | None:
|
|
66
|
+
return self._tip
|
|
67
|
+
|
|
68
|
+
def append(
|
|
69
|
+
self, entry_type: EntryType, payload: dict, *, label: str | None = None
|
|
70
|
+
) -> SessionEntry:
|
|
71
|
+
entry = SessionEntry(
|
|
72
|
+
id=new_id(),
|
|
73
|
+
parent_id=self._tip,
|
|
74
|
+
type=entry_type,
|
|
75
|
+
payload=payload,
|
|
76
|
+
label=label,
|
|
77
|
+
)
|
|
78
|
+
self._mem.append(entry)
|
|
79
|
+
self._tip = entry.id
|
|
80
|
+
if self._path is not None:
|
|
81
|
+
try:
|
|
82
|
+
with self._path.open("a", encoding="utf-8") as fh:
|
|
83
|
+
fh.write(entry.model_dump_json() + "\n")
|
|
84
|
+
except OSError: # noqa: BLE001 - disk failure must not kill the turn
|
|
85
|
+
_log.warning("session_append_failed", exc_info=True)
|
|
86
|
+
return entry
|
|
87
|
+
|
|
88
|
+
def set_tip(self, entry_id: str) -> None:
|
|
89
|
+
"""Branch: continue the next append from ``entry_id`` (must already exist)."""
|
|
90
|
+
if not any(e.id == entry_id for e in self._mem):
|
|
91
|
+
raise KeyError(f"unknown entry id: {entry_id}")
|
|
92
|
+
self._tip = entry_id
|
|
93
|
+
|
|
94
|
+
def path_to(self, entry_id: str) -> list[SessionEntry]:
|
|
95
|
+
"""Root → entry_id path (inclusive). Raises KeyError if unknown."""
|
|
96
|
+
by_id = {e.id: e for e in self._mem}
|
|
97
|
+
if entry_id not in by_id:
|
|
98
|
+
raise KeyError(f"unknown entry id: {entry_id}")
|
|
99
|
+
out: list[SessionEntry] = []
|
|
100
|
+
cur: str | None = entry_id
|
|
101
|
+
while cur is not None and cur in by_id:
|
|
102
|
+
out.append(by_id[cur])
|
|
103
|
+
cur = by_id[cur].parent_id
|
|
104
|
+
out.reverse()
|
|
105
|
+
return out
|
|
106
|
+
|
|
107
|
+
def children_map(self) -> dict[str | None, list[str]]:
|
|
108
|
+
"""parentId → child ids in insertion order (root key is None)."""
|
|
109
|
+
cm: dict[str | None, list[str]] = {}
|
|
110
|
+
for e in self._mem:
|
|
111
|
+
cm.setdefault(e.parent_id, []).append(e.id)
|
|
112
|
+
return cm
|
|
113
|
+
|
|
114
|
+
def _write_path(self, dest: Path, entries: list[SessionEntry]) -> None:
|
|
115
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
116
|
+
with dest.open("w", encoding="utf-8") as fh:
|
|
117
|
+
for e in entries:
|
|
118
|
+
fh.write(e.model_dump_json() + "\n")
|
|
119
|
+
|
|
120
|
+
def fork_to(self, dest: Path, *, from_entry_id: str) -> SessionStore:
|
|
121
|
+
"""Copy the root→from_entry_id path into a new session file."""
|
|
122
|
+
path = self.path_to(from_entry_id)
|
|
123
|
+
self._write_path(dest, path)
|
|
124
|
+
return SessionStore.file_backed(dest)
|
|
125
|
+
|
|
126
|
+
def clone_to(self, dest: Path) -> SessionStore:
|
|
127
|
+
"""Copy the current branch (root→tip) into a new session file."""
|
|
128
|
+
if self._tip is None:
|
|
129
|
+
self._write_path(dest, [])
|
|
130
|
+
return SessionStore.file_backed(dest)
|
|
131
|
+
return self.fork_to(dest, from_entry_id=self._tip)
|
|
132
|
+
|
|
133
|
+
def _reload(self) -> None:
|
|
134
|
+
assert self._path is not None
|
|
135
|
+
self._mem = []
|
|
136
|
+
for line in self._path.read_text(encoding="utf-8").splitlines():
|
|
137
|
+
if not line.strip():
|
|
138
|
+
continue
|
|
139
|
+
try:
|
|
140
|
+
self._mem.append(SessionEntry.model_validate_json(line))
|
|
141
|
+
except Exception: # noqa: BLE001 - one bad line must not lose the session
|
|
142
|
+
_log.warning("session_skipped_malformed_line")
|
|
143
|
+
self._tip = self._mem[-1].id if self._mem else None
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@dataclass(slots=True)
|
|
147
|
+
class SessionSummary:
|
|
148
|
+
session_id: str
|
|
149
|
+
path: Path
|
|
150
|
+
display_name: str | None
|
|
151
|
+
mtime: float # file mtime ≈ last activity ("recently used")
|
|
152
|
+
n_entries: int
|
|
153
|
+
created: float = 0.0 # ts of the first entry ("created"); falls back to mtime
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class SessionManager:
|
|
157
|
+
"""Discovers/creates session files under ``<sessions_dir>/<cwd-slug>/<uuid>.jsonl``."""
|
|
158
|
+
|
|
159
|
+
def __init__(self, sessions_dir: Path | None = None) -> None:
|
|
160
|
+
base = sessions_dir or Path.home() / ".minima-harness" / "sessions"
|
|
161
|
+
self._base = Path(base)
|
|
162
|
+
|
|
163
|
+
def slug_for(self, directory: Path) -> str:
|
|
164
|
+
directory = Path(directory).resolve()
|
|
165
|
+
slug = str(directory).replace(os.sep, "-").replace("/", "-")
|
|
166
|
+
return slug.lstrip("-") or "root"
|
|
167
|
+
|
|
168
|
+
def _dir_for(self, directory: Path) -> Path:
|
|
169
|
+
d = self._base / self.slug_for(directory)
|
|
170
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
171
|
+
return d
|
|
172
|
+
|
|
173
|
+
def new(self, directory: Path, *, name: str | None = None) -> SessionStore:
|
|
174
|
+
sid = new_id()
|
|
175
|
+
path = self._dir_for(directory) / f"{sid}.jsonl"
|
|
176
|
+
return SessionStore.file_backed(path, display_name=name)
|
|
177
|
+
|
|
178
|
+
def open(
|
|
179
|
+
self,
|
|
180
|
+
directory: Path,
|
|
181
|
+
*,
|
|
182
|
+
session_id: str | None = None,
|
|
183
|
+
no_session: bool = False,
|
|
184
|
+
) -> SessionStore:
|
|
185
|
+
if no_session:
|
|
186
|
+
return SessionStore.in_memory()
|
|
187
|
+
if session_id:
|
|
188
|
+
for s in self.list_sessions(directory):
|
|
189
|
+
if s.session_id.startswith(session_id) or session_id.startswith(s.session_id):
|
|
190
|
+
return SessionStore.file_backed(s.path, display_name=s.display_name)
|
|
191
|
+
raise FileNotFoundError(f"no session matching id: {session_id}")
|
|
192
|
+
recent = self.most_recent(directory)
|
|
193
|
+
if recent is not None:
|
|
194
|
+
return SessionStore.file_backed(recent.path, display_name=recent.display_name)
|
|
195
|
+
return self.new(directory)
|
|
196
|
+
|
|
197
|
+
def most_recent(self, directory: Path) -> SessionSummary | None:
|
|
198
|
+
sessions = self.list_sessions(directory)
|
|
199
|
+
return max(sessions, key=lambda s: s.mtime) if sessions else None
|
|
200
|
+
|
|
201
|
+
def list_sessions(self, directory: Path) -> list[SessionSummary]:
|
|
202
|
+
d = self._base / self.slug_for(directory)
|
|
203
|
+
if not d.exists():
|
|
204
|
+
return []
|
|
205
|
+
out: list[SessionSummary] = []
|
|
206
|
+
for p in sorted(d.glob("*.jsonl")):
|
|
207
|
+
try:
|
|
208
|
+
nonempty = [ln for ln in p.read_text(encoding="utf-8").splitlines() if ln.strip()]
|
|
209
|
+
except OSError: # noqa: BLE001
|
|
210
|
+
continue
|
|
211
|
+
mtime = p.stat().st_mtime
|
|
212
|
+
out.append(
|
|
213
|
+
SessionSummary(
|
|
214
|
+
session_id=p.stem,
|
|
215
|
+
path=p,
|
|
216
|
+
display_name=None,
|
|
217
|
+
mtime=mtime,
|
|
218
|
+
n_entries=len(nonempty),
|
|
219
|
+
created=_first_entry_ts(nonempty) or mtime,
|
|
220
|
+
)
|
|
221
|
+
)
|
|
222
|
+
# Most-recently-used first — the natural order for a resume picker.
|
|
223
|
+
out.sort(key=lambda s: s.mtime, reverse=True)
|
|
224
|
+
return out
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _first_entry_ts(nonempty_lines: list[str]) -> float | None:
|
|
228
|
+
"""Parse the ``ts`` of a session's first entry (its logical creation time)."""
|
|
229
|
+
if not nonempty_lines:
|
|
230
|
+
return None
|
|
231
|
+
try:
|
|
232
|
+
obj = json.loads(nonempty_lines[0])
|
|
233
|
+
ts = obj.get("ts") if isinstance(obj, dict) else None # non-object first line → no ts
|
|
234
|
+
return float(ts) if ts is not None else None
|
|
235
|
+
except (ValueError, TypeError):
|
|
236
|
+
return None
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Task corpus + grading helpers for harness runs."""
|
|
2
|
+
|
|
3
|
+
from minima_harness.tasks.task_set import (
|
|
4
|
+
PARTIAL_THRESHOLD,
|
|
5
|
+
SUCCESS_THRESHOLD,
|
|
6
|
+
TASKS,
|
|
7
|
+
Task,
|
|
8
|
+
grade_outcome,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"PARTIAL_THRESHOLD",
|
|
13
|
+
"SUCCESS_THRESHOLD",
|
|
14
|
+
"TASKS",
|
|
15
|
+
"Task",
|
|
16
|
+
"grade_outcome",
|
|
17
|
+
]
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Task corpus for harness runs.
|
|
2
|
+
|
|
3
|
+
A :class:`Task` carries both the deterministic ``quality_fn`` (cheap, offline grading)
|
|
4
|
+
and the richer ``rubric``/``expected`` fields the LLM judge consumes (Phase 3). Either
|
|
5
|
+
grading path is optional: a task with ``quality_fn=None`` and an empty ``rubric`` just
|
|
6
|
+
records tokens/cost with a neutral outcome.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from collections.abc import Callable
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
|
|
14
|
+
QualityFn = Callable[[str], float]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(slots=True)
|
|
18
|
+
class Task:
|
|
19
|
+
"""A single graded task."""
|
|
20
|
+
|
|
21
|
+
label: str
|
|
22
|
+
prompt: str
|
|
23
|
+
task_type: str # code | qa | reasoning | extraction | creative | ...
|
|
24
|
+
quality_fn: QualityFn | None = None # (model_output) -> float in [0, 1]
|
|
25
|
+
slider: float = 5.0 # cost/quality tradeoff: 1.0=cheapest, 10.0=best quality
|
|
26
|
+
rubric: str = "" # consumed by the LLM judge (Phase 3)
|
|
27
|
+
expected: str = "" # reference answer for the judge / deterministic checks
|
|
28
|
+
tags: list[str] = field(default_factory=list)
|
|
29
|
+
|
|
30
|
+
def __post_init__(self) -> None:
|
|
31
|
+
assert 0.0 < self.slider <= 10.0, f"slider must be in (0, 10], got {self.slider}"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# Outcome thresholds mirror examples/agent_warmup.py so feedback labels are consistent.
|
|
35
|
+
SUCCESS_THRESHOLD = 0.8
|
|
36
|
+
PARTIAL_THRESHOLD = 0.4
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def grade_outcome(quality: float) -> str:
|
|
40
|
+
"""Map a [0, 1] quality score to a Minima outcome label."""
|
|
41
|
+
if quality >= SUCCESS_THRESHOLD:
|
|
42
|
+
return "success"
|
|
43
|
+
if quality >= PARTIAL_THRESHOLD:
|
|
44
|
+
return "partial"
|
|
45
|
+
return "failure"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
# Seed corpus (3 tasks ported from the old agent/task_set.py, enriched)
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
51
|
+
|
|
52
|
+
TASKS: list[Task] = [
|
|
53
|
+
Task(
|
|
54
|
+
label="order-extract",
|
|
55
|
+
prompt="Extract order id and total from: 'Order #A-9931 totalling $48.20 shipped.'",
|
|
56
|
+
task_type="extraction",
|
|
57
|
+
quality_fn=lambda t: 1.0 if "A-9931" in t and "48.20" in t else 0.0,
|
|
58
|
+
slider=3.0, # cheap is fine for extraction
|
|
59
|
+
rubric="Output must contain order id 'A-9931' and total '$48.20'.",
|
|
60
|
+
expected="A-9931, $48.20",
|
|
61
|
+
),
|
|
62
|
+
Task(
|
|
63
|
+
label="retry-policy",
|
|
64
|
+
prompt=("Write a retry policy with jitter for a flaky payment webhook. Justify the math."),
|
|
65
|
+
task_type="reasoning",
|
|
66
|
+
quality_fn=lambda t: 0.9 if len(t) > 200 else 0.4,
|
|
67
|
+
slider=7.0, # harder task, want quality
|
|
68
|
+
rubric="Must describe exponential backoff with jitter and justify the math.",
|
|
69
|
+
),
|
|
70
|
+
Task(
|
|
71
|
+
label="binary-search",
|
|
72
|
+
prompt="Implement binary search in Python with a test. Make it idiomatic.",
|
|
73
|
+
task_type="code",
|
|
74
|
+
quality_fn=lambda t: 1.0 if "def binary_search" in t and "assert" in t else 0.5,
|
|
75
|
+
slider=5.0,
|
|
76
|
+
rubric="Must define `def binary_search(...)` and include an `assert`-based test.",
|
|
77
|
+
),
|
|
78
|
+
]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
MAX_LINE = 2000
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def truncate_line(line: str) -> str:
|
|
9
|
+
if len(line) <= MAX_LINE:
|
|
10
|
+
return line
|
|
11
|
+
return line[:MAX_LINE] + " …(truncated)"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def read_lines(path: Path, *, offset: int, limit: int) -> tuple[str, int]:
|
|
15
|
+
"""Return (numbered_body, n_selected) for lines [offset, offset+limit)."""
|
|
16
|
+
text = path.read_text(encoding="utf-8", errors="replace")
|
|
17
|
+
lines = text.splitlines()
|
|
18
|
+
start = max(0, offset - 1)
|
|
19
|
+
end = min(len(lines), start + limit)
|
|
20
|
+
selected = lines[start:end]
|
|
21
|
+
width = len(str(end if end else 1))
|
|
22
|
+
body = "\n".join(
|
|
23
|
+
f"{str(i).rjust(width)}: {truncate_line(line)}"
|
|
24
|
+
for i, line in enumerate(selected, start=start + 1)
|
|
25
|
+
)
|
|
26
|
+
if end < len(lines):
|
|
27
|
+
body += f"\n…({len(lines) - end} more lines; use a larger offset to continue)"
|
|
28
|
+
return body, len(selected)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def write_text(path: Path, content: str) -> int:
|
|
32
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
33
|
+
path.write_text(content, encoding="utf-8")
|
|
34
|
+
return len(content.splitlines())
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
|
|
8
|
+
from minima_harness.agent.tools import AgentTool, ToolResult, ToolUpdate, error_result
|
|
9
|
+
from minima_harness.ai.types import TextContent
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class BashParams(BaseModel):
|
|
13
|
+
command: str
|
|
14
|
+
timeout: int = Field(default=120_000, ge=1) # milliseconds
|
|
15
|
+
workdir: str | None = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
async def _execute(
|
|
19
|
+
tool_call_id: str,
|
|
20
|
+
params,
|
|
21
|
+
signal,
|
|
22
|
+
on_update: ToolUpdate | None, # noqa: ANN001
|
|
23
|
+
) -> ToolResult:
|
|
24
|
+
assert isinstance(params, BashParams)
|
|
25
|
+
wd = str(Path(params.workdir).expanduser()) if params.workdir else None
|
|
26
|
+
try:
|
|
27
|
+
proc = await asyncio.create_subprocess_shell(
|
|
28
|
+
params.command,
|
|
29
|
+
stdout=asyncio.subprocess.PIPE,
|
|
30
|
+
stderr=asyncio.subprocess.STDOUT,
|
|
31
|
+
cwd=wd,
|
|
32
|
+
start_new_session=True,
|
|
33
|
+
)
|
|
34
|
+
except OSError as exc:
|
|
35
|
+
return error_result(f"bash: failed to start: {exc}")
|
|
36
|
+
|
|
37
|
+
chunks: list[str] = []
|
|
38
|
+
assert proc.stdout is not None
|
|
39
|
+
try:
|
|
40
|
+
async with asyncio.timeout(params.timeout / 1000.0):
|
|
41
|
+
async for raw in proc.stdout:
|
|
42
|
+
chunk = raw.decode("utf-8", errors="replace")
|
|
43
|
+
chunks.append(chunk)
|
|
44
|
+
if on_update is not None:
|
|
45
|
+
try:
|
|
46
|
+
on_update(chunk)
|
|
47
|
+
except Exception: # noqa: BLE001 - progress must never break the run
|
|
48
|
+
pass
|
|
49
|
+
await proc.wait()
|
|
50
|
+
except TimeoutError:
|
|
51
|
+
proc.kill()
|
|
52
|
+
await proc.wait()
|
|
53
|
+
return error_result(f"bash: timed out after {params.timeout} ms")
|
|
54
|
+
|
|
55
|
+
output = "".join(chunks)
|
|
56
|
+
code = proc.returncode if proc.returncode is not None else -1
|
|
57
|
+
body = f"{output}\n[exit {code}]" if output else f"[exit {code}]"
|
|
58
|
+
return ToolResult(content=[TextContent(text=body)], details={"exit_code": code})
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def bash_tool() -> AgentTool:
|
|
62
|
+
return AgentTool(
|
|
63
|
+
name="bash",
|
|
64
|
+
description=(
|
|
65
|
+
"Run a shell command and return its combined stdout/stderr and exit code. "
|
|
66
|
+
"Output streams live. Runs with the user's full permissions — no confirmation."
|
|
67
|
+
),
|
|
68
|
+
parameters=BashParams,
|
|
69
|
+
execute=_execute,
|
|
70
|
+
)
|