minima-cli 0.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. minima/__init__.py +5 -0
  2. minima/api/__init__.py +1 -0
  3. minima/api/auth.py +39 -0
  4. minima/api/errors.py +40 -0
  5. minima/api/routers/__init__.py +1 -0
  6. minima/api/routers/calibration.py +50 -0
  7. minima/api/routers/feedback.py +279 -0
  8. minima/api/routers/health.py +50 -0
  9. minima/api/routers/models.py +42 -0
  10. minima/api/routers/recommend.py +66 -0
  11. minima/api/routers/savings.py +55 -0
  12. minima/api/routers/strategies.py +33 -0
  13. minima/catalog/__init__.py +1 -0
  14. minima/catalog/data/capability_priors.json +210 -0
  15. minima/catalog/data/model_aliases.json +12 -0
  16. minima/catalog/merge.py +69 -0
  17. minima/catalog/refresh.py +54 -0
  18. minima/catalog/sources/__init__.py +1 -0
  19. minima/catalog/sources/litellm.py +19 -0
  20. minima/catalog/sources/openrouter.py +25 -0
  21. minima/catalog/store.py +86 -0
  22. minima/config.py +288 -0
  23. minima/deps.py +35 -0
  24. minima/llm/__init__.py +1 -0
  25. minima/llm/anthropic.py +106 -0
  26. minima/llm/base.py +196 -0
  27. minima/llm/gemini.py +124 -0
  28. minima/llm/registry.py +54 -0
  29. minima/logging.py +28 -0
  30. minima/main.py +109 -0
  31. minima/memory/__init__.py +1 -0
  32. minima/memory/adapter.py +572 -0
  33. minima/memory/keys.py +83 -0
  34. minima/memory/records.py +190 -0
  35. minima/memory/threadpool.py +41 -0
  36. minima/metrics/__init__.py +1 -0
  37. minima/metrics/calibration.py +415 -0
  38. minima/metrics/report.py +116 -0
  39. minima/metrics/savings.py +98 -0
  40. minima/recommender/__init__.py +1 -0
  41. minima/recommender/_pg_pool.py +38 -0
  42. minima/recommender/_redis_client.py +32 -0
  43. minima/recommender/aggregate.py +157 -0
  44. minima/recommender/classify.py +165 -0
  45. minima/recommender/decisionlog.py +505 -0
  46. minima/recommender/durablerefs.py +312 -0
  47. minima/recommender/engine.py +997 -0
  48. minima/recommender/escalation.py +83 -0
  49. minima/recommender/propensity.py +189 -0
  50. minima/recommender/recstore.py +368 -0
  51. minima/recommender/score.py +318 -0
  52. minima/recommender/types.py +166 -0
  53. minima/schemas/__init__.py +1 -0
  54. minima/schemas/common.py +73 -0
  55. minima/schemas/feedback.py +34 -0
  56. minima/schemas/models_catalog.py +36 -0
  57. minima/schemas/recommend.py +104 -0
  58. minima/schemas/savings.py +39 -0
  59. minima/schemas/strategies.py +57 -0
  60. minima/schemas/workflow.py +43 -0
  61. minima/seeding/__init__.py +1 -0
  62. minima/seeding/items.py +42 -0
  63. minima/seeding/llmrouterbench.py +232 -0
  64. minima/seeding/routerbench.py +141 -0
  65. minima/seeding/run_seed.py +56 -0
  66. minima/seeding/synthetic.py +70 -0
  67. minima/tenancy/__init__.py +8 -0
  68. minima/tenancy/context.py +37 -0
  69. minima/tenancy/passthrough.py +110 -0
  70. minima/version.py +3 -0
  71. minima_cli-0.4.9.dist-info/METADATA +275 -0
  72. minima_cli-0.4.9.dist-info/RECORD +161 -0
  73. minima_cli-0.4.9.dist-info/WHEEL +4 -0
  74. minima_cli-0.4.9.dist-info/entry_points.txt +5 -0
  75. minima_cli-0.4.9.dist-info/licenses/LICENSE +295 -0
  76. minima_client/__init__.py +19 -0
  77. minima_client/autocapture.py +101 -0
  78. minima_client/client.py +301 -0
  79. minima_client/errors.py +23 -0
  80. minima_harness/LICENSE_PI +32 -0
  81. minima_harness/__init__.py +16 -0
  82. minima_harness/agent/__init__.py +72 -0
  83. minima_harness/agent/agent.py +276 -0
  84. minima_harness/agent/events.py +124 -0
  85. minima_harness/agent/loop.py +311 -0
  86. minima_harness/agent/state.py +79 -0
  87. minima_harness/agent/tools.py +97 -0
  88. minima_harness/ai/__init__.py +66 -0
  89. minima_harness/ai/compat.py +71 -0
  90. minima_harness/ai/errors.py +96 -0
  91. minima_harness/ai/events.py +117 -0
  92. minima_harness/ai/openrouter_catalog.py +153 -0
  93. minima_harness/ai/provider_catalog.py +299 -0
  94. minima_harness/ai/provider_quirks.py +37 -0
  95. minima_harness/ai/providers/__init__.py +75 -0
  96. minima_harness/ai/providers/_common.py +48 -0
  97. minima_harness/ai/providers/anthropic.py +290 -0
  98. minima_harness/ai/providers/base.py +65 -0
  99. minima_harness/ai/providers/faux.py +173 -0
  100. minima_harness/ai/providers/google.py +221 -0
  101. minima_harness/ai/providers/openai_compat.py +278 -0
  102. minima_harness/ai/registry.py +184 -0
  103. minima_harness/ai/stream.py +82 -0
  104. minima_harness/ai/tools.py +51 -0
  105. minima_harness/ai/types.py +204 -0
  106. minima_harness/ai/usage.py +41 -0
  107. minima_harness/minima/__init__.py +40 -0
  108. minima_harness/minima/cache.py +102 -0
  109. minima_harness/minima/config.py +85 -0
  110. minima_harness/minima/goals.py +226 -0
  111. minima_harness/minima/judge.py +144 -0
  112. minima_harness/minima/mapping.py +147 -0
  113. minima_harness/minima/meter.py +143 -0
  114. minima_harness/minima/router.py +220 -0
  115. minima_harness/minima/runtime.py +544 -0
  116. minima_harness/minima/signals.py +195 -0
  117. minima_harness/session/__init__.py +14 -0
  118. minima_harness/session/format.py +35 -0
  119. minima_harness/session/store.py +236 -0
  120. minima_harness/tasks/__init__.py +17 -0
  121. minima_harness/tasks/task_set.py +78 -0
  122. minima_harness/tools/__init__.py +7 -0
  123. minima_harness/tools/_io.py +34 -0
  124. minima_harness/tools/bash.py +70 -0
  125. minima_harness/tools/builtin.py +23 -0
  126. minima_harness/tools/edit.py +50 -0
  127. minima_harness/tools/find.py +38 -0
  128. minima_harness/tools/grep.py +73 -0
  129. minima_harness/tools/ls.py +35 -0
  130. minima_harness/tools/read.py +38 -0
  131. minima_harness/tools/tasks.py +75 -0
  132. minima_harness/tools/write.py +36 -0
  133. minima_harness/tui/__init__.py +3 -0
  134. minima_harness/tui/analytics.py +111 -0
  135. minima_harness/tui/app.py +1927 -0
  136. minima_harness/tui/bridge.py +103 -0
  137. minima_harness/tui/cli.py +227 -0
  138. minima_harness/tui/clipboard.py +60 -0
  139. minima_harness/tui/commands.py +49 -0
  140. minima_harness/tui/compaction.py +17 -0
  141. minima_harness/tui/config_cli.py +141 -0
  142. minima_harness/tui/config_store.py +237 -0
  143. minima_harness/tui/context.py +93 -0
  144. minima_harness/tui/customize.py +95 -0
  145. minima_harness/tui/diff.py +53 -0
  146. minima_harness/tui/editor.py +43 -0
  147. minima_harness/tui/extensions.py +84 -0
  148. minima_harness/tui/extra_models.py +52 -0
  149. minima_harness/tui/history.py +71 -0
  150. minima_harness/tui/mubit.py +295 -0
  151. minima_harness/tui/overlays.py +593 -0
  152. minima_harness/tui/packages.py +59 -0
  153. minima_harness/tui/run_modes.py +66 -0
  154. minima_harness/tui/theme.py +77 -0
  155. minima_harness/tui/welcome.py +83 -0
  156. minima_harness/tui/widgets/__init__.py +3 -0
  157. minima_harness/tui/widgets/banner.py +38 -0
  158. minima_harness/tui/widgets/editor.py +83 -0
  159. minima_harness/tui/widgets/footer.py +73 -0
  160. minima_harness/tui/widgets/messages.py +151 -0
  161. minima_harness/tui/widgets/status.py +57 -0
@@ -0,0 +1,195 @@
1
+ """Code-quality signal extraction for code-aware routing (Phase B, the wedge).
2
+
3
+ Minima's recall is otherwise text-similarity-based. Extracting lightweight, code-quality
4
+ signals from the files a task touches and feeding them as ``tags`` / ``difficulty`` /
5
+ ``expected_input_tokens`` into ``recommend`` makes routing *code-aware* — the Triage-style
6
+ wedge (route by CodeHealth + file metadata, not just prompt text).
7
+
8
+ The default :class:`CodeHealthExtractor` is language-agnostic and dependency-free: a proxy
9
+ McCabe (decision-keyword count), non-blank LOC, and sibling-test-file detection. It's a
10
+ deliberately rough signal — precise, per-language complexity (radon/tree-sitter) can plug
11
+ into the same :class:`ContextExtractor` protocol later. The discrimination gate
12
+ (``tests/harness/test_signals.py``) is the falsifiable check that this signal separates
13
+ task tiers at all; if it can't, code-aware routing isn't ready and we pivot to memory/cost.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import logging
19
+ import re
20
+ from collections.abc import Awaitable, Callable
21
+ from dataclasses import dataclass, field
22
+ from pathlib import Path
23
+ from typing import Protocol, runtime_checkable
24
+
25
+ _log = logging.getLogger("minima_harness.signals")
26
+
27
+ # Decision keywords across common languages (each ~ one McCabe branch). `and`/`or` cover
28
+ # Python/Ruby/JS boolean ops; `&&`/`||` cover C-family. Word-boundary matches keep false
29
+ # hits low (an identifier named "format" won't match "for").
30
+ _DECISION_RE = re.compile(r"\b(if|elif|for|while|case|catch|except|switch|and|or|not)\b|&&|\|\|")
31
+
32
+ # Files that look like tests (name conventions across languages).
33
+ _TEST_NAME_RE = re.compile(
34
+ r"(^|[/_])(test_|_test\.|[a-z0-9_]+_test\.)|(tests?/)|(_spec\.|spec/)", re.IGNORECASE
35
+ )
36
+
37
+ # Approx tokens per non-blank source line (code is denser than prose).
38
+ _TOKENS_PER_LOC = 10
39
+
40
+
41
+ @dataclass(slots=True)
42
+ class FileHealth:
43
+ path: str
44
+ loc: int
45
+ complexity: int
46
+
47
+
48
+ @dataclass(slots=True)
49
+ class SignalBundle:
50
+ """The enrichment handed to ``router.recommend``."""
51
+
52
+ tags: list[str] = field(default_factory=list)
53
+ difficulty: str | None = None
54
+ expected_input_tokens: int | None = None
55
+ # Raw signals (inspection / logging / the discrimination gate):
56
+ files: int = 0
57
+ total_loc: int = 0
58
+ max_file_loc: int = 0
59
+ total_complexity: int = 0
60
+ max_complexity: int = 0
61
+ avg_complexity: float = 0.0
62
+ has_tests: bool = False
63
+
64
+
65
+ @runtime_checkable
66
+ class ContextExtractor(Protocol):
67
+ async def extract(self, task: str, files: list[Path]) -> SignalBundle:
68
+ """Compute code-quality signals for ``files`` to enrich a recommendation."""
69
+ ...
70
+
71
+
72
+ # extract() may be sync or async; normalize so callers can ``await`` either.
73
+ ExtractFn = Callable[[str, list[Path]], Awaitable[SignalBundle] | SignalBundle]
74
+
75
+
76
+ def _band(value: float, lo: float, hi: float) -> str:
77
+ if value <= lo:
78
+ return "low"
79
+ if value <= hi:
80
+ return "med"
81
+ return "high"
82
+
83
+
84
+ class CodeHealthExtractor:
85
+ """Language-agnostic heuristic extractor (proxy McCabe + LOC + sibling tests)."""
86
+
87
+ def __init__(self, *, tokens_per_loc: int = _TOKENS_PER_LOC) -> None:
88
+ self._tokens_per_loc = tokens_per_loc
89
+
90
+ async def extract(self, task: str, files: list[Path]) -> SignalBundle:
91
+ per_file: list[FileHealth] = []
92
+ test_files: set[Path] = set()
93
+ all_paths = {f.resolve() for f in files}
94
+ for f in files:
95
+ try:
96
+ text = Path(f).read_text(encoding="utf-8", errors="replace")
97
+ except OSError as exc:
98
+ _log.debug("signal_skip_unreadable path=%s err=%s", f, exc)
99
+ continue
100
+ name = Path(f).name
101
+ is_test = bool(_TEST_NAME_RE.search(name)) or name.startswith("test")
102
+ fh = FileHealth(path=str(f), loc=_loc(text), complexity=_decisions(text))
103
+ if is_test:
104
+ test_files.add(Path(f).resolve())
105
+ per_file.append(fh)
106
+
107
+ source = [h for h in per_file if Path(h.path).resolve() not in test_files]
108
+ if not source:
109
+ source = per_file # a pure test-edit task still gets signals
110
+
111
+ bundle = SignalBundle(
112
+ files=len(per_file),
113
+ total_loc=sum(h.loc for h in source),
114
+ max_file_loc=max((h.loc for h in source), default=0),
115
+ total_complexity=sum(h.complexity for h in source),
116
+ max_complexity=max((h.complexity for h in source), default=0),
117
+ has_tests=_has_tests(source, all_paths),
118
+ )
119
+ n = len(source) or 1
120
+ bundle.avg_complexity = bundle.total_complexity / n
121
+ bundle.difficulty = _difficulty(bundle)
122
+ bundle.expected_input_tokens = bundle.total_loc * self._tokens_per_loc
123
+ bundle.tags = _tags(bundle)
124
+ return bundle
125
+
126
+
127
+ def _loc(text: str) -> int:
128
+ return sum(1 for line in text.splitlines() if line.strip())
129
+
130
+
131
+ def _decisions(text: str) -> int:
132
+ return len(_DECISION_RE.findall(text))
133
+
134
+
135
+ def _has_tests(source: list[FileHealth], all_paths: set[Path]) -> bool:
136
+ # A source file "has tests" if a test file is in the provided set, or a sibling test
137
+ # file exists on disk (test_<stem>.py / <stem>_test.py|go conventions).
138
+ if any(p for p in all_paths if _TEST_NAME_RE.search(p.name) or p.name.startswith("test")):
139
+ return True
140
+ for h in source:
141
+ stem = Path(h.path).stem
142
+ parent = Path(h.path).parent
143
+ for candidate in (
144
+ parent / f"test_{stem}.py",
145
+ parent / f"{stem}_test.py",
146
+ parent / f"{stem}_test.go",
147
+ parent / f"test_{stem}.go",
148
+ ):
149
+ if candidate.resolve() in all_paths or candidate.exists():
150
+ return True
151
+ return False
152
+
153
+
154
+ def _difficulty(b: SignalBundle) -> str:
155
+ if b.max_file_loc > 800 or b.avg_complexity > 30:
156
+ return "expert"
157
+ if (
158
+ b.max_complexity >= 15
159
+ or b.max_file_loc > 400
160
+ or (not b.has_tests and b.avg_complexity > 10)
161
+ ):
162
+ return "hard"
163
+ if b.avg_complexity >= 5 or b.max_file_loc > 150:
164
+ return "medium"
165
+ if b.total_loc > 20:
166
+ return "easy"
167
+ return "trivial"
168
+
169
+
170
+ def _tags(b: SignalBundle) -> list[str]:
171
+ return [
172
+ f"complexity:{_band(b.avg_complexity, 5, 15)}",
173
+ f"loc:{_band(float(b.max_file_loc), 150, 400)}",
174
+ "has_tests" if b.has_tests else "no_tests",
175
+ f"files:{b.files}",
176
+ ]
177
+
178
+
179
+ async def extract_or_none(
180
+ extractor: ContextExtractor | None, task: str, files: list[Path] | None
181
+ ) -> SignalBundle | None:
182
+ """Run ``extractor`` if configured and files were provided; None otherwise.
183
+
184
+ Never raises — a broken extractor must not block routing (fall back to text-only).
185
+ """
186
+ if extractor is None or not files:
187
+ return None
188
+ try:
189
+ result = extractor.extract(task, list(files))
190
+ if hasattr(result, "__await__"):
191
+ result = await result # type: ignore[assignment]
192
+ return result # type: ignore[return-value]
193
+ except Exception: # noqa: BLE001
194
+ _log.warning("signal_extraction_failed", exc_info=True)
195
+ return None
@@ -0,0 +1,14 @@
1
+ """Tree-structured JSONL session store (a port of PI's session model)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from minima_harness.session.format import EntryType, SessionEntry
6
+ from minima_harness.session.store import SessionManager, SessionStore, SessionSummary
7
+
8
+ __all__ = [
9
+ "EntryType",
10
+ "SessionEntry",
11
+ "SessionManager",
12
+ "SessionStore",
13
+ "SessionSummary",
14
+ ]
@@ -0,0 +1,35 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ import uuid
5
+ from enum import StrEnum
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ class EntryType(StrEnum):
11
+ USER = "user"
12
+ ASSISTANT = "assistant"
13
+ TOOL = "tool"
14
+ SYSTEM = "system"
15
+ GOAL = "goal" # serialized Goal snapshot (latest wins); powers /goals across resume
16
+
17
+
18
+ def new_id() -> str:
19
+ """A short, unique entry/session id (first 12 hex chars of uuid4)."""
20
+ return uuid.uuid4().hex[:12]
21
+
22
+
23
+ def now_ts() -> float:
24
+ return time.time()
25
+
26
+
27
+ class SessionEntry(BaseModel):
28
+ """One node in the session tree. Append-only; never mutated once written."""
29
+
30
+ id: str
31
+ parent_id: str | None = None
32
+ type: EntryType
33
+ ts: float = Field(default_factory=now_ts)
34
+ payload: dict
35
+ label: str | None = None # optional bookmark label (for /tree)
@@ -0,0 +1,236 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import time
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+
10
+ from minima_harness.session.format import EntryType, SessionEntry, new_id
11
+
12
+ _log = logging.getLogger("minima_harness.session")
13
+
14
+
15
+ def format_age(ts: float, now: float | None = None) -> str:
16
+ """Compact relative age for a timestamp: ``just now`` / ``5m ago`` / ``2h ago`` /
17
+ ``3d ago`` / ``5w ago``. Returns ``?`` when ``ts`` is missing/non-positive."""
18
+ if not ts or ts <= 0:
19
+ return "?"
20
+ now = time.time() if now is None else now
21
+ delta = max(0.0, now - ts)
22
+ if delta < 60:
23
+ return "just now"
24
+ if delta < 3600:
25
+ return f"{int(delta // 60)}m ago"
26
+ if delta < 86400:
27
+ return f"{int(delta // 3600)}h ago"
28
+ if delta < 86400 * 7:
29
+ return f"{int(delta // 86400)}d ago"
30
+ return f"{int(delta // (86400 * 7))}w ago"
31
+
32
+
33
+ class SessionStore:
34
+ """Append-only JSONL session tree. File-backed or in-memory (``--no-session``)."""
35
+
36
+ def __init__(self, path: Path | None, *, display_name: str | None = None) -> None:
37
+ self._path = path
38
+ self._mem: list[SessionEntry] = []
39
+ self._tip: str | None = None
40
+ self.display_name = display_name
41
+ if path is not None and path.exists():
42
+ self._reload()
43
+
44
+ @classmethod
45
+ def file_backed(cls, path: Path, *, display_name: str | None = None) -> SessionStore:
46
+ return cls(path, display_name=display_name)
47
+
48
+ @classmethod
49
+ def in_memory(cls) -> SessionStore:
50
+ return cls(None)
51
+
52
+ @property
53
+ def path(self) -> Path | None:
54
+ return self._path
55
+
56
+ @property
57
+ def persistent(self) -> bool:
58
+ return self._path is not None
59
+
60
+ @property
61
+ def entries(self) -> list[SessionEntry]:
62
+ return list(self._mem)
63
+
64
+ @property
65
+ def tip(self) -> str | None:
66
+ return self._tip
67
+
68
+ def append(
69
+ self, entry_type: EntryType, payload: dict, *, label: str | None = None
70
+ ) -> SessionEntry:
71
+ entry = SessionEntry(
72
+ id=new_id(),
73
+ parent_id=self._tip,
74
+ type=entry_type,
75
+ payload=payload,
76
+ label=label,
77
+ )
78
+ self._mem.append(entry)
79
+ self._tip = entry.id
80
+ if self._path is not None:
81
+ try:
82
+ with self._path.open("a", encoding="utf-8") as fh:
83
+ fh.write(entry.model_dump_json() + "\n")
84
+ except OSError: # noqa: BLE001 - disk failure must not kill the turn
85
+ _log.warning("session_append_failed", exc_info=True)
86
+ return entry
87
+
88
+ def set_tip(self, entry_id: str) -> None:
89
+ """Branch: continue the next append from ``entry_id`` (must already exist)."""
90
+ if not any(e.id == entry_id for e in self._mem):
91
+ raise KeyError(f"unknown entry id: {entry_id}")
92
+ self._tip = entry_id
93
+
94
+ def path_to(self, entry_id: str) -> list[SessionEntry]:
95
+ """Root → entry_id path (inclusive). Raises KeyError if unknown."""
96
+ by_id = {e.id: e for e in self._mem}
97
+ if entry_id not in by_id:
98
+ raise KeyError(f"unknown entry id: {entry_id}")
99
+ out: list[SessionEntry] = []
100
+ cur: str | None = entry_id
101
+ while cur is not None and cur in by_id:
102
+ out.append(by_id[cur])
103
+ cur = by_id[cur].parent_id
104
+ out.reverse()
105
+ return out
106
+
107
+ def children_map(self) -> dict[str | None, list[str]]:
108
+ """parentId → child ids in insertion order (root key is None)."""
109
+ cm: dict[str | None, list[str]] = {}
110
+ for e in self._mem:
111
+ cm.setdefault(e.parent_id, []).append(e.id)
112
+ return cm
113
+
114
+ def _write_path(self, dest: Path, entries: list[SessionEntry]) -> None:
115
+ dest.parent.mkdir(parents=True, exist_ok=True)
116
+ with dest.open("w", encoding="utf-8") as fh:
117
+ for e in entries:
118
+ fh.write(e.model_dump_json() + "\n")
119
+
120
+ def fork_to(self, dest: Path, *, from_entry_id: str) -> SessionStore:
121
+ """Copy the root→from_entry_id path into a new session file."""
122
+ path = self.path_to(from_entry_id)
123
+ self._write_path(dest, path)
124
+ return SessionStore.file_backed(dest)
125
+
126
+ def clone_to(self, dest: Path) -> SessionStore:
127
+ """Copy the current branch (root→tip) into a new session file."""
128
+ if self._tip is None:
129
+ self._write_path(dest, [])
130
+ return SessionStore.file_backed(dest)
131
+ return self.fork_to(dest, from_entry_id=self._tip)
132
+
133
+ def _reload(self) -> None:
134
+ assert self._path is not None
135
+ self._mem = []
136
+ for line in self._path.read_text(encoding="utf-8").splitlines():
137
+ if not line.strip():
138
+ continue
139
+ try:
140
+ self._mem.append(SessionEntry.model_validate_json(line))
141
+ except Exception: # noqa: BLE001 - one bad line must not lose the session
142
+ _log.warning("session_skipped_malformed_line")
143
+ self._tip = self._mem[-1].id if self._mem else None
144
+
145
+
146
+ @dataclass(slots=True)
147
+ class SessionSummary:
148
+ session_id: str
149
+ path: Path
150
+ display_name: str | None
151
+ mtime: float # file mtime ≈ last activity ("recently used")
152
+ n_entries: int
153
+ created: float = 0.0 # ts of the first entry ("created"); falls back to mtime
154
+
155
+
156
+ class SessionManager:
157
+ """Discovers/creates session files under ``<sessions_dir>/<cwd-slug>/<uuid>.jsonl``."""
158
+
159
+ def __init__(self, sessions_dir: Path | None = None) -> None:
160
+ base = sessions_dir or Path.home() / ".minima-harness" / "sessions"
161
+ self._base = Path(base)
162
+
163
+ def slug_for(self, directory: Path) -> str:
164
+ directory = Path(directory).resolve()
165
+ slug = str(directory).replace(os.sep, "-").replace("/", "-")
166
+ return slug.lstrip("-") or "root"
167
+
168
+ def _dir_for(self, directory: Path) -> Path:
169
+ d = self._base / self.slug_for(directory)
170
+ d.mkdir(parents=True, exist_ok=True)
171
+ return d
172
+
173
+ def new(self, directory: Path, *, name: str | None = None) -> SessionStore:
174
+ sid = new_id()
175
+ path = self._dir_for(directory) / f"{sid}.jsonl"
176
+ return SessionStore.file_backed(path, display_name=name)
177
+
178
+ def open(
179
+ self,
180
+ directory: Path,
181
+ *,
182
+ session_id: str | None = None,
183
+ no_session: bool = False,
184
+ ) -> SessionStore:
185
+ if no_session:
186
+ return SessionStore.in_memory()
187
+ if session_id:
188
+ for s in self.list_sessions(directory):
189
+ if s.session_id.startswith(session_id) or session_id.startswith(s.session_id):
190
+ return SessionStore.file_backed(s.path, display_name=s.display_name)
191
+ raise FileNotFoundError(f"no session matching id: {session_id}")
192
+ recent = self.most_recent(directory)
193
+ if recent is not None:
194
+ return SessionStore.file_backed(recent.path, display_name=recent.display_name)
195
+ return self.new(directory)
196
+
197
+ def most_recent(self, directory: Path) -> SessionSummary | None:
198
+ sessions = self.list_sessions(directory)
199
+ return max(sessions, key=lambda s: s.mtime) if sessions else None
200
+
201
+ def list_sessions(self, directory: Path) -> list[SessionSummary]:
202
+ d = self._base / self.slug_for(directory)
203
+ if not d.exists():
204
+ return []
205
+ out: list[SessionSummary] = []
206
+ for p in sorted(d.glob("*.jsonl")):
207
+ try:
208
+ nonempty = [ln for ln in p.read_text(encoding="utf-8").splitlines() if ln.strip()]
209
+ except OSError: # noqa: BLE001
210
+ continue
211
+ mtime = p.stat().st_mtime
212
+ out.append(
213
+ SessionSummary(
214
+ session_id=p.stem,
215
+ path=p,
216
+ display_name=None,
217
+ mtime=mtime,
218
+ n_entries=len(nonempty),
219
+ created=_first_entry_ts(nonempty) or mtime,
220
+ )
221
+ )
222
+ # Most-recently-used first — the natural order for a resume picker.
223
+ out.sort(key=lambda s: s.mtime, reverse=True)
224
+ return out
225
+
226
+
227
+ def _first_entry_ts(nonempty_lines: list[str]) -> float | None:
228
+ """Parse the ``ts`` of a session's first entry (its logical creation time)."""
229
+ if not nonempty_lines:
230
+ return None
231
+ try:
232
+ obj = json.loads(nonempty_lines[0])
233
+ ts = obj.get("ts") if isinstance(obj, dict) else None # non-object first line → no ts
234
+ return float(ts) if ts is not None else None
235
+ except (ValueError, TypeError):
236
+ return None
@@ -0,0 +1,17 @@
1
+ """Task corpus + grading helpers for harness runs."""
2
+
3
+ from minima_harness.tasks.task_set import (
4
+ PARTIAL_THRESHOLD,
5
+ SUCCESS_THRESHOLD,
6
+ TASKS,
7
+ Task,
8
+ grade_outcome,
9
+ )
10
+
11
+ __all__ = [
12
+ "PARTIAL_THRESHOLD",
13
+ "SUCCESS_THRESHOLD",
14
+ "TASKS",
15
+ "Task",
16
+ "grade_outcome",
17
+ ]
@@ -0,0 +1,78 @@
1
+ """Task corpus for harness runs.
2
+
3
+ A :class:`Task` carries both the deterministic ``quality_fn`` (cheap, offline grading)
4
+ and the richer ``rubric``/``expected`` fields the LLM judge consumes (Phase 3). Either
5
+ grading path is optional: a task with ``quality_fn=None`` and an empty ``rubric`` just
6
+ records tokens/cost with a neutral outcome.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from collections.abc import Callable
12
+ from dataclasses import dataclass, field
13
+
14
+ QualityFn = Callable[[str], float]
15
+
16
+
17
+ @dataclass(slots=True)
18
+ class Task:
19
+ """A single graded task."""
20
+
21
+ label: str
22
+ prompt: str
23
+ task_type: str # code | qa | reasoning | extraction | creative | ...
24
+ quality_fn: QualityFn | None = None # (model_output) -> float in [0, 1]
25
+ slider: float = 5.0 # cost/quality tradeoff: 1.0=cheapest, 10.0=best quality
26
+ rubric: str = "" # consumed by the LLM judge (Phase 3)
27
+ expected: str = "" # reference answer for the judge / deterministic checks
28
+ tags: list[str] = field(default_factory=list)
29
+
30
+ def __post_init__(self) -> None:
31
+ assert 0.0 < self.slider <= 10.0, f"slider must be in (0, 10], got {self.slider}"
32
+
33
+
34
+ # Outcome thresholds mirror examples/agent_warmup.py so feedback labels are consistent.
35
+ SUCCESS_THRESHOLD = 0.8
36
+ PARTIAL_THRESHOLD = 0.4
37
+
38
+
39
+ def grade_outcome(quality: float) -> str:
40
+ """Map a [0, 1] quality score to a Minima outcome label."""
41
+ if quality >= SUCCESS_THRESHOLD:
42
+ return "success"
43
+ if quality >= PARTIAL_THRESHOLD:
44
+ return "partial"
45
+ return "failure"
46
+
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Seed corpus (3 tasks ported from the old agent/task_set.py, enriched)
50
+ # ---------------------------------------------------------------------------
51
+
52
+ TASKS: list[Task] = [
53
+ Task(
54
+ label="order-extract",
55
+ prompt="Extract order id and total from: 'Order #A-9931 totalling $48.20 shipped.'",
56
+ task_type="extraction",
57
+ quality_fn=lambda t: 1.0 if "A-9931" in t and "48.20" in t else 0.0,
58
+ slider=3.0, # cheap is fine for extraction
59
+ rubric="Output must contain order id 'A-9931' and total '$48.20'.",
60
+ expected="A-9931, $48.20",
61
+ ),
62
+ Task(
63
+ label="retry-policy",
64
+ prompt=("Write a retry policy with jitter for a flaky payment webhook. Justify the math."),
65
+ task_type="reasoning",
66
+ quality_fn=lambda t: 0.9 if len(t) > 200 else 0.4,
67
+ slider=7.0, # harder task, want quality
68
+ rubric="Must describe exponential backoff with jitter and justify the math.",
69
+ ),
70
+ Task(
71
+ label="binary-search",
72
+ prompt="Implement binary search in Python with a test. Make it idiomatic.",
73
+ task_type="code",
74
+ quality_fn=lambda t: 1.0 if "def binary_search" in t and "assert" in t else 0.5,
75
+ slider=5.0,
76
+ rubric="Must define `def binary_search(...)` and include an `assert`-based test.",
77
+ ),
78
+ ]
@@ -0,0 +1,7 @@
1
+ """Built-in coding tools (read/write/edit/bash/grep/find/ls) — a port of PI's defaults."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from minima_harness.tools.builtin import default_toolset
6
+
7
+ __all__ = ["default_toolset"]
@@ -0,0 +1,34 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ MAX_LINE = 2000
6
+
7
+
8
+ def truncate_line(line: str) -> str:
9
+ if len(line) <= MAX_LINE:
10
+ return line
11
+ return line[:MAX_LINE] + " …(truncated)"
12
+
13
+
14
+ def read_lines(path: Path, *, offset: int, limit: int) -> tuple[str, int]:
15
+ """Return (numbered_body, n_selected) for lines [offset, offset+limit)."""
16
+ text = path.read_text(encoding="utf-8", errors="replace")
17
+ lines = text.splitlines()
18
+ start = max(0, offset - 1)
19
+ end = min(len(lines), start + limit)
20
+ selected = lines[start:end]
21
+ width = len(str(end if end else 1))
22
+ body = "\n".join(
23
+ f"{str(i).rjust(width)}: {truncate_line(line)}"
24
+ for i, line in enumerate(selected, start=start + 1)
25
+ )
26
+ if end < len(lines):
27
+ body += f"\n…({len(lines) - end} more lines; use a larger offset to continue)"
28
+ return body, len(selected)
29
+
30
+
31
+ def write_text(path: Path, content: str) -> int:
32
+ path.parent.mkdir(parents=True, exist_ok=True)
33
+ path.write_text(content, encoding="utf-8")
34
+ return len(content.splitlines())
@@ -0,0 +1,70 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from pathlib import Path
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+ from minima_harness.agent.tools import AgentTool, ToolResult, ToolUpdate, error_result
9
+ from minima_harness.ai.types import TextContent
10
+
11
+
12
+ class BashParams(BaseModel):
13
+ command: str
14
+ timeout: int = Field(default=120_000, ge=1) # milliseconds
15
+ workdir: str | None = None
16
+
17
+
18
+ async def _execute(
19
+ tool_call_id: str,
20
+ params,
21
+ signal,
22
+ on_update: ToolUpdate | None, # noqa: ANN001
23
+ ) -> ToolResult:
24
+ assert isinstance(params, BashParams)
25
+ wd = str(Path(params.workdir).expanduser()) if params.workdir else None
26
+ try:
27
+ proc = await asyncio.create_subprocess_shell(
28
+ params.command,
29
+ stdout=asyncio.subprocess.PIPE,
30
+ stderr=asyncio.subprocess.STDOUT,
31
+ cwd=wd,
32
+ start_new_session=True,
33
+ )
34
+ except OSError as exc:
35
+ return error_result(f"bash: failed to start: {exc}")
36
+
37
+ chunks: list[str] = []
38
+ assert proc.stdout is not None
39
+ try:
40
+ async with asyncio.timeout(params.timeout / 1000.0):
41
+ async for raw in proc.stdout:
42
+ chunk = raw.decode("utf-8", errors="replace")
43
+ chunks.append(chunk)
44
+ if on_update is not None:
45
+ try:
46
+ on_update(chunk)
47
+ except Exception: # noqa: BLE001 - progress must never break the run
48
+ pass
49
+ await proc.wait()
50
+ except TimeoutError:
51
+ proc.kill()
52
+ await proc.wait()
53
+ return error_result(f"bash: timed out after {params.timeout} ms")
54
+
55
+ output = "".join(chunks)
56
+ code = proc.returncode if proc.returncode is not None else -1
57
+ body = f"{output}\n[exit {code}]" if output else f"[exit {code}]"
58
+ return ToolResult(content=[TextContent(text=body)], details={"exit_code": code})
59
+
60
+
61
+ def bash_tool() -> AgentTool:
62
+ return AgentTool(
63
+ name="bash",
64
+ description=(
65
+ "Run a shell command and return its combined stdout/stderr and exit code. "
66
+ "Output streams live. Runs with the user's full permissions — no confirmation."
67
+ ),
68
+ parameters=BashParams,
69
+ execute=_execute,
70
+ )