@event4u/agent-config 3.1.1 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/analytics/prune.md +78 -0
- package/.agent-src/commands/analytics/show.md +107 -0
- package/.agent-src/commands/analytics.md +64 -0
- package/.agent-src/commands/knowledge/forget.md +104 -0
- package/.agent-src/commands/knowledge/ingest.md +122 -0
- package/.agent-src/commands/knowledge/list.md +102 -0
- package/.agent-src/commands/knowledge.md +75 -0
- package/.agent-src/scripts/update_roadmap_progress.py +1 -1
- package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
- package/.claude-plugin/marketplace.json +8 -1
- package/CHANGELOG.md +38 -230
- package/README.md +12 -2
- package/dist/discovery/deprecation-report.md +1 -1
- package/dist/discovery/discovery-manifest.json +162 -8
- package/dist/discovery/discovery-manifest.json.sha256 +1 -1
- package/dist/discovery/discovery-manifest.summary.md +3 -3
- package/dist/discovery/orphan-report.md +1 -1
- package/dist/discovery/packs.json +12 -5
- package/dist/discovery/trust-report.md +2 -2
- package/dist/discovery/workspaces.json +11 -4
- package/dist/mcp/mcp-cloudflare-catalogue.json +2 -0
- package/dist/mcp/registry-manifest.json +5 -3
- package/docs/architecture.md +1 -1
- package/docs/archive/CHANGELOG-pre-3.2.0.md +268 -0
- package/docs/catalog.md +9 -2
- package/docs/contracts/CHANGELOG-conventions.md +19 -0
- package/docs/contracts/at-rest-encryption.md +146 -0
- package/docs/contracts/daily-workspace.md +137 -0
- package/docs/contracts/explain-modes.md +146 -0
- package/docs/contracts/host-agent-protocol.md +88 -0
- package/docs/contracts/local-analytics.md +148 -0
- package/docs/contracts/local-knowledge-ingestion.md +96 -0
- package/docs/contracts/role-experience.md +121 -0
- package/docs/contracts/workspace-documents.md +140 -0
- package/docs/decisions/ADR-022-daily-workspace-decomposition.md +140 -0
- package/docs/decisions/ADR-023-host-agent-protocol.md +129 -0
- package/docs/decisions/ADR-024-workspace-v0-feature-floor.md +126 -0
- package/docs/decisions/ADR-025-workspace-chrome.md +119 -0
- package/docs/decisions/ADR-026-explain-mode-translation.md +117 -0
- package/docs/deploy/small-team-recipe.md +148 -0
- package/docs/deploy/team-deployment-posture.md +91 -0
- package/docs/getting-started-by-role.md +27 -0
- package/docs/getting-started.md +1 -1
- package/docs/guides/local-analytics.md +125 -0
- package/docs/guides/local-knowledge.md +127 -0
- package/package.json +4 -2
- package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
- package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
- package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
- package/scripts/_lib/changelog_eras.py +330 -0
- package/scripts/memory_lookup.py +78 -1
- package/scripts/release.py +93 -3
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
---
|
|
2
|
+
stability: beta
|
|
3
|
+
keep-beta-until: 2026-08-24
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Local knowledge — 5-minute walkthrough
|
|
7
|
+
|
|
8
|
+
Point the agent at a folder of local files (PDFs, Markdown, Word docs, spreadsheets). It chunks, redacts PII + secrets, and persists into the agent memory namespace — local-only, single-user, no OAuth, no remote fetch.
|
|
9
|
+
|
|
10
|
+
Contract: [`local-knowledge-ingestion`](../contracts/local-knowledge-ingestion.md).
|
|
11
|
+
Roadmap home: `agents/roadmaps/road-to-employee-product-and-external-proof.md` Phase 2.
|
|
12
|
+
|
|
13
|
+
## Prerequisites
|
|
14
|
+
|
|
15
|
+
- Python 3.10+ on the host.
|
|
16
|
+
- `markitdown` on `PATH` if the corpus contains PDF / DOCX / XLSX / EPUB / images. Pure markdown / text corpora work without it.
|
|
17
|
+
- An `agents/` directory in the project (created by the installer). The `agents/memory/knowledge/` subdirectory is created lazily on first ingest.
|
|
18
|
+
|
|
19
|
+
## Step 1 — Pick a folder
|
|
20
|
+
|
|
21
|
+
Anything local works: a customer folder, a project drop, a `.zip` archive, a single PDF. The walk skips hidden dirs (`.git`, `.venv`, `node_modules`) and does not follow symlinks.
|
|
22
|
+
|
|
23
|
+
For this walkthrough we use a folder with one PDF and three markdown notes:
|
|
24
|
+
|
|
25
|
+
```text
|
|
26
|
+
/Users/maintainer/clients/acme/
|
|
27
|
+
├── brief.pdf
|
|
28
|
+
├── kickoff-notes.md
|
|
29
|
+
├── meeting-2026-05-12.md
|
|
30
|
+
└── pricing-v3.md
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Step 2 — Ingest
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
/knowledge ingest /Users/maintainer/clients/acme/
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Realistic output (your ingest-id will differ — uuid7s are time-ordered):
|
|
40
|
+
|
|
41
|
+
```text
|
|
42
|
+
✅ ingested 01927f4a-2b1c from /Users/maintainer/clients/acme/
|
|
43
|
+
documents: 4, chunks: 18, bytes_stored: 47312
|
|
44
|
+
PII redacted: EMAIL=3, PHONE=1, IBAN=0, CC=0, SSN=0
|
|
45
|
+
secrets redacted: 0
|
|
46
|
+
skipped: 0 unsupported MIME
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
What just happened:
|
|
50
|
+
|
|
51
|
+
- Each file routed through `markitdown` (PDF) or passthrough (Markdown).
|
|
52
|
+
- Chunks split at ~2 KB boundaries, written to `agents/memory/knowledge/<ingest-id>/chunks/<n>.md`.
|
|
53
|
+
- A `manifest.json` recorded the source path, doc count, redaction counters, and `created_at`.
|
|
54
|
+
- PII regex pass replaced 3 emails + 1 phone with `[EMAIL]` / `[PHONE]` placeholders **before** the chunk hit disk.
|
|
55
|
+
|
|
56
|
+
> Want the raw text in (no redaction)? `--no-redact`. The manifest captures the flag so the audit row names every bypass. Default is always redact.
|
|
57
|
+
|
|
58
|
+
## Step 3 — Ask the agent
|
|
59
|
+
|
|
60
|
+
Use the host model normally. The MCP tool `memory_retrieve` now returns knowledge chunks alongside curated and intake entries — same envelope, with an additional `body.source_kind: knowledge` tag so the model knows the source is user-supplied, not maintainer-curated.
|
|
61
|
+
|
|
62
|
+
Example prompt:
|
|
63
|
+
|
|
64
|
+
> *"What does the acme pricing-v3 note say about volume discounts?"*
|
|
65
|
+
|
|
66
|
+
The agent retrieves the matching chunks (pinned chunks rank slightly higher than unpinned; knowledge entries are discounted ~15 % vs curated so hand-reviewed content still wins on equal relevance) and answers with a citation back to the source path stored in the manifest.
|
|
67
|
+
|
|
68
|
+
If nothing matches, the model says so. The retrieval surface does not invent a citation.
|
|
69
|
+
|
|
70
|
+
## Step 4 — List + pin
|
|
71
|
+
|
|
72
|
+
See what's been ingested:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
/knowledge list
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
```text
|
|
79
|
+
ID DOCS CHUNKS BYTES PINNED REDACTED CREATED SOURCE
|
|
80
|
+
01927f4a 4 18 47312 no yes 2026-05-25T08:14:02 /Users/maintainer/clients/acme
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Pin so it survives LRU eviction when the 500 MB namespace cap is crossed:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
/knowledge list --pin 01927f4a
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
```text
|
|
90
|
+
✅ pinned 01927f4a
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Prefix must be unambiguous — if it matches > 1 ingest, the command rejects with a structured error and asks for a longer prefix.
|
|
94
|
+
|
|
95
|
+
## Step 5 — Forget
|
|
96
|
+
|
|
97
|
+
When the work is done, drop the ingest atomically:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
/knowledge forget 01927f4a
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
```text
|
|
104
|
+
✅ forgot 01927f4a — removed 18 chunks, 47312 bytes
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Forget is atomic — no partial state. Pinned ingests are dropped the same as unpinned; pinning protects from LRU, not from explicit forget.
|
|
108
|
+
|
|
109
|
+
## What the guide does **not** cover
|
|
110
|
+
|
|
111
|
+
- Multi-user share — single-user by design. Multi-user lives behind ADR-024 workspace work and Phase 4 of the parent roadmap.
|
|
112
|
+
- Remote sources — every input must resolve to a local path. `http://`, `https://`, `s3://`, `gs://`, `azure://` are rejected at the input validator.
|
|
113
|
+
- Connector contracts (GitHub / Jira / Confluence) — those sit behind Hard-Floor OAuth and stay cancelled in `road-to-internal-ai-os-deployment.md` Phase 5.
|
|
114
|
+
|
|
115
|
+
## Troubleshooting
|
|
116
|
+
|
|
117
|
+
- **"Bound exceeded: total_ingest_size"** — the corpus is > 100 MB. Split it, or ingest a sub-folder.
|
|
118
|
+
- **"Bound exceeded: document_count"** — > 1000 files. Same fix.
|
|
119
|
+
- **"unsupported MIME"** — file skipped, counted in the summary, no chunk written. Add the file as `.md` if you need it indexed.
|
|
120
|
+
- **OCR confidence < 0.7** — the chunk is tagged `low_confidence`. The model still receives it but the citation surface flags the lower confidence.
|
|
121
|
+
- **markitdown not on PATH** — install it (`pip install 'markitdown[all]'`) or pass `--markitdown=<bin>`. Markdown-only corpora work without it.
|
|
122
|
+
|
|
123
|
+
## See also
|
|
124
|
+
|
|
125
|
+
- [`local-knowledge-ingestion`](../contracts/local-knowledge-ingestion.md) — contract (input shapes, bounds, storage, redaction).
|
|
126
|
+
- [`/knowledge ingest`](../../.agent-src/commands/knowledge/ingest.md) · [`/knowledge list`](../../.agent-src/commands/knowledge/list.md) · [`/knowledge forget`](../../.agent-src/commands/knowledge/forget.md)
|
|
127
|
+
- [`markitdown` skill](../../.agent-src/skills/markitdown/SKILL.md) — peer-side adapter for binary formats.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@event4u/agent-config",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.2.0",
|
|
4
4
|
"description": "Universal AI Agent OS \u2014 audited skills, governance rules, commands, and templates for AI coding tools (Claude Code, Cursor, Windsurf, Copilot).",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"private": false,
|
|
@@ -26,7 +26,9 @@
|
|
|
26
26
|
"skills",
|
|
27
27
|
"prompt-engineering",
|
|
28
28
|
"typescript",
|
|
29
|
-
"python"
|
|
29
|
+
"python",
|
|
30
|
+
"agent-skills",
|
|
31
|
+
"cinematic-ai-video"
|
|
30
32
|
],
|
|
31
33
|
"files": [
|
|
32
34
|
".agent-src/",
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
"""Shared constants + helpers for CHANGELOG.md era discipline.
|
|
2
|
+
|
|
3
|
+
The drift gate (``tests/test_changelog_eras.py``) and the release
|
|
4
|
+
automation (``scripts/release.py``) both reason about the same era
|
|
5
|
+
shape: a single ``# Era: X.Y.x — current`` header followed by inline
|
|
6
|
+
entries, then ``# Era: pre-X.Y.0 — archived`` pointers to files under
|
|
7
|
+
``docs/archive/``. Keeping the regex / cap / path constants in one
|
|
8
|
+
place prevents drift between the gate and the auto-split logic.
|
|
9
|
+
|
|
10
|
+
Normative source: ``docs/contracts/CHANGELOG-conventions.md § Era splits``.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent.parent
|
|
19
|
+
CHANGELOG = REPO_ROOT / "CHANGELOG.md"
|
|
20
|
+
CONVENTIONS = REPO_ROOT / "docs" / "contracts" / "CHANGELOG-conventions.md"
|
|
21
|
+
ARCHIVE_DIR = REPO_ROOT / "docs" / "archive"
|
|
22
|
+
|
|
23
|
+
# Drift cap — entries between the current era header and the next era
|
|
24
|
+
# header may not exceed this many lines. Raising the cap is a contract
|
|
25
|
+
# change (see CHANGELOG-conventions.md § Era splits).
|
|
26
|
+
CURRENT_ERA_BODY_CAP = 250
|
|
27
|
+
|
|
28
|
+
ERA_HEADER_RE = re.compile(
|
|
29
|
+
r"^# Era: (?P<label>[^\n]+?)(?: — (?P<state>current|archived))?\s*$"
|
|
30
|
+
)
|
|
31
|
+
ARCHIVE_LINK_RE = re.compile(r"\(docs/archive/(CHANGELOG-pre-[^)\s]+\.md)\)")
|
|
32
|
+
VERSION_HEADING_RE = re.compile(r"^## \[?(?P<version>\d+\.\d+\.\d+)")
|
|
33
|
+
ERA_LABEL_RE = re.compile(r"^(?P<major>\d+)\.(?P<minor>\d+)\.x$")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass(frozen=True)
|
|
37
|
+
class EraSpan:
|
|
38
|
+
"""One era header in CHANGELOG.md, with its line index."""
|
|
39
|
+
|
|
40
|
+
line_index: int
|
|
41
|
+
label: str
|
|
42
|
+
state: str # "current" | "archived" | ""
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def read_changelog_lines() -> list[str]:
|
|
46
|
+
"""Return CHANGELOG.md split into lines (no trailing newlines)."""
|
|
47
|
+
return CHANGELOG.read_text(encoding="utf-8").splitlines()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def era_spans(lines: list[str]) -> list[EraSpan]:
|
|
51
|
+
"""Return every era header in line-order."""
|
|
52
|
+
spans: list[EraSpan] = []
|
|
53
|
+
for i, line in enumerate(lines):
|
|
54
|
+
m = ERA_HEADER_RE.match(line)
|
|
55
|
+
if m:
|
|
56
|
+
spans.append(
|
|
57
|
+
EraSpan(
|
|
58
|
+
line_index=i,
|
|
59
|
+
label=m.group("label"),
|
|
60
|
+
state=m.group("state") or "",
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
return spans
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def current_era_index(spans: list[EraSpan]) -> int | None:
|
|
67
|
+
"""Return the line index of the ``— current`` era header, or None."""
|
|
68
|
+
for span in spans:
|
|
69
|
+
if span.state == "current":
|
|
70
|
+
return span.line_index
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def current_era_body_size(lines: list[str] | None = None) -> int:
|
|
75
|
+
"""Return the number of lines between the current era header and
|
|
76
|
+
the next era header (exclusive of both)."""
|
|
77
|
+
if lines is None:
|
|
78
|
+
lines = read_changelog_lines()
|
|
79
|
+
spans = era_spans(lines)
|
|
80
|
+
current_idx = current_era_index(spans)
|
|
81
|
+
if current_idx is None:
|
|
82
|
+
return 0
|
|
83
|
+
next_era_line = len(lines)
|
|
84
|
+
for span in spans:
|
|
85
|
+
if span.line_index > current_idx:
|
|
86
|
+
next_era_line = span.line_index
|
|
87
|
+
break
|
|
88
|
+
return next_era_line - current_idx - 1
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def parse_era_label(label: str) -> tuple[int, int] | None:
|
|
92
|
+
"""Parse ``M.N.x`` into ``(M, N)``; return None for archived labels."""
|
|
93
|
+
m = ERA_LABEL_RE.match(label.strip())
|
|
94
|
+
if not m:
|
|
95
|
+
return None
|
|
96
|
+
return int(m.group("major")), int(m.group("minor"))
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def archive_path_for_boundary(boundary: str) -> Path:
|
|
100
|
+
"""Return ``docs/archive/CHANGELOG-pre-<boundary>.md``."""
|
|
101
|
+
return ARCHIVE_DIR / f"CHANGELOG-pre-{boundary}.md"
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def collapsed_era_block(boundary: str) -> str:
|
|
105
|
+
"""Render the standard ``# Era: pre-<boundary> — archived`` pointer
|
|
106
|
+
block that replaces archived entries in CHANGELOG.md.
|
|
107
|
+
|
|
108
|
+
Mirrors the wording the manual splits already used (verified against
|
|
109
|
+
every existing collapsed era as of 3.2.x).
|
|
110
|
+
"""
|
|
111
|
+
archive_rel = f"docs/archive/CHANGELOG-pre-{boundary}.md"
|
|
112
|
+
return (
|
|
113
|
+
f"# Era: pre-{boundary} — archived\n"
|
|
114
|
+
"\n"
|
|
115
|
+
f"> All entries before `{boundary}` live in\n"
|
|
116
|
+
f"> [`{archive_rel}`]({archive_rel}).\n"
|
|
117
|
+
"> The archive is read-only; git tags remain the canonical\n"
|
|
118
|
+
"> source for what shipped. Splitting them out of the main file\n"
|
|
119
|
+
"> keeps the active era under the 250-line drift cap enforced by\n"
|
|
120
|
+
"> `tests/test_changelog_eras.py`.\n"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def archive_file_header(boundary: str) -> str:
|
|
125
|
+
"""Return the standard prologue for ``docs/archive/CHANGELOG-pre-<boundary>.md``."""
|
|
126
|
+
return (
|
|
127
|
+
f"# Changelog Archive — pre-{boundary}\n"
|
|
128
|
+
"\n"
|
|
129
|
+
f"> Frozen snapshot of `event4u/agent-config` changelog entries\n"
|
|
130
|
+
f"> released before `{boundary}`, split out of the main\n"
|
|
131
|
+
"> [`CHANGELOG.md`](../../CHANGELOG.md) by `scripts/release.py`\n"
|
|
132
|
+
"> once the active era's body crossed the drift cap enforced by\n"
|
|
133
|
+
"> `tests/test_changelog_eras.py`.\n"
|
|
134
|
+
">\n"
|
|
135
|
+
"> **Read-only.** New entries land in `CHANGELOG.md`. Entries\n"
|
|
136
|
+
"> here are not amended — git tags remain the canonical source\n"
|
|
137
|
+
"> for what shipped.\n"
|
|
138
|
+
">\n"
|
|
139
|
+
"> Entry shape follows\n"
|
|
140
|
+
"> [`../contracts/CHANGELOG-conventions.md`](../contracts/CHANGELOG-conventions.md).\n"
|
|
141
|
+
"\n"
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# ─── split planning + execution ────────────────────────────────────────────────
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
_RELEASE_VERSION_RE = re.compile(r"^(\d+)\.(\d+)\.(\d+)$")
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@dataclass(frozen=True)
|
|
152
|
+
class SplitPlan:
|
|
153
|
+
"""Recipe for an era split during release of ``release_version``."""
|
|
154
|
+
|
|
155
|
+
release_version: str # e.g. "3.3.0"
|
|
156
|
+
boundary: str # e.g. "3.3.0" — used in archive filename + pointer
|
|
157
|
+
new_era_label: str # e.g. "3.3.x"
|
|
158
|
+
old_era_label: str # e.g. "3.2.x"
|
|
159
|
+
archive_path: Path
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def commit_subject(self) -> str:
|
|
163
|
+
return (
|
|
164
|
+
f"chore(changelog): split era {self.old_era_label} "
|
|
165
|
+
f"→ pre-{self.boundary}"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def plan_split(release_version: str) -> SplitPlan | None:
|
|
170
|
+
"""Plan an era split when releasing ``release_version``.
|
|
171
|
+
|
|
172
|
+
Returns None when no split is needed (release is a patch within the
|
|
173
|
+
current era, or no current era header exists). Returns a SplitPlan
|
|
174
|
+
when the release crosses a minor or major boundary; the caller
|
|
175
|
+
decides whether to invoke ``perform_split`` based on era body size.
|
|
176
|
+
|
|
177
|
+
Raises ValueError when ``release_version`` is not bare semver, or
|
|
178
|
+
when it would move backward relative to the current era label.
|
|
179
|
+
"""
|
|
180
|
+
m = _RELEASE_VERSION_RE.match(release_version.strip())
|
|
181
|
+
if not m:
|
|
182
|
+
raise ValueError(f"not a bare semver (X.Y.Z): {release_version!r}")
|
|
183
|
+
rel_major, rel_minor, _rel_patch = (int(m.group(i)) for i in (1, 2, 3))
|
|
184
|
+
|
|
185
|
+
lines = read_changelog_lines()
|
|
186
|
+
spans = era_spans(lines)
|
|
187
|
+
current = next((s for s in spans if s.state == "current"), None)
|
|
188
|
+
if current is None:
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
parsed = parse_era_label(current.label)
|
|
192
|
+
if parsed is None:
|
|
193
|
+
return None
|
|
194
|
+
era_major, era_minor = parsed
|
|
195
|
+
|
|
196
|
+
if (rel_major, rel_minor) < (era_major, era_minor):
|
|
197
|
+
raise ValueError(
|
|
198
|
+
f"release {release_version!r} is older than current era "
|
|
199
|
+
f"{current.label!r}; refusing to plan a backwards split"
|
|
200
|
+
)
|
|
201
|
+
if (rel_major, rel_minor) == (era_major, era_minor):
|
|
202
|
+
# Patch release within the current era — no era boundary crossed,
|
|
203
|
+
# so an auto-split would create a nonsensical archive name. The
|
|
204
|
+
# caller is expected to die() with the manual-intervention message.
|
|
205
|
+
return None
|
|
206
|
+
|
|
207
|
+
boundary = f"{rel_major}.{rel_minor}.0"
|
|
208
|
+
return SplitPlan(
|
|
209
|
+
release_version=release_version,
|
|
210
|
+
boundary=boundary,
|
|
211
|
+
new_era_label=f"{rel_major}.{rel_minor}.x",
|
|
212
|
+
old_era_label=current.label,
|
|
213
|
+
archive_path=archive_path_for_boundary(boundary),
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def new_era_intro_block(new_era_label: str, boundary: str) -> str:
|
|
218
|
+
"""Render the header + blockquote intro for a freshly-split current era."""
|
|
219
|
+
parsed = parse_era_label(new_era_label)
|
|
220
|
+
if parsed is None:
|
|
221
|
+
next_example = "# Era: <next>.x"
|
|
222
|
+
else:
|
|
223
|
+
m, n = parsed
|
|
224
|
+
next_example = f"# Era: {m}.{n + 1}.x"
|
|
225
|
+
return (
|
|
226
|
+
f"# Era: {new_era_label} — current\n"
|
|
227
|
+
"\n"
|
|
228
|
+
f"> Started at `{boundary}`. Full entries live inline below.\n"
|
|
229
|
+
"> The drift test caps this era at 250 lines of entry body; growth past\n"
|
|
230
|
+
f"> that forces a new era split (`{next_example}`, etc.) — see\n"
|
|
231
|
+
"> [`docs/contracts/CHANGELOG-conventions.md § Era splits`](docs/contracts/CHANGELOG-conventions.md).\n"
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _era_body_bounds(
|
|
236
|
+
lines: list[str], current_idx: int
|
|
237
|
+
) -> tuple[int, int, int]:
|
|
238
|
+
"""Return ``(body_start, body_end, next_era_line)`` for the era at
|
|
239
|
+
``current_idx``.
|
|
240
|
+
|
|
241
|
+
* ``body_start`` — first line after the header + leading blockquote
|
|
242
|
+
intro + the blank line that follows.
|
|
243
|
+
* ``body_end`` — exclusive; one line before the next era marker (or
|
|
244
|
+
end of file). Trailing blank lines are NOT trimmed; the caller
|
|
245
|
+
reattaches them on splice.
|
|
246
|
+
* ``next_era_line`` — index of the next ``# Era:`` line, or
|
|
247
|
+
``len(lines)`` when none follows.
|
|
248
|
+
"""
|
|
249
|
+
next_era_line = len(lines)
|
|
250
|
+
for i in range(current_idx + 1, len(lines)):
|
|
251
|
+
if ERA_HEADER_RE.match(lines[i]):
|
|
252
|
+
next_era_line = i
|
|
253
|
+
break
|
|
254
|
+
|
|
255
|
+
cursor = current_idx + 1
|
|
256
|
+
# Skip leading blank lines between header and blockquote intro.
|
|
257
|
+
while cursor < next_era_line and lines[cursor].strip() == "":
|
|
258
|
+
cursor += 1
|
|
259
|
+
# Skip the leading blockquote intro (consecutive `>`-prefixed lines).
|
|
260
|
+
while cursor < next_era_line and lines[cursor].startswith(">"):
|
|
261
|
+
cursor += 1
|
|
262
|
+
# Skip the blank separator between intro and entries.
|
|
263
|
+
while cursor < next_era_line and lines[cursor].strip() == "":
|
|
264
|
+
cursor += 1
|
|
265
|
+
|
|
266
|
+
return cursor, next_era_line, next_era_line
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def current_era_insertion_point(lines: list[str]) -> int | None:
|
|
270
|
+
"""Return the line index at which a new release entry should be
|
|
271
|
+
prepended within the current era.
|
|
272
|
+
|
|
273
|
+
Strategy:
|
|
274
|
+
* If the current era body contains one or more ``## [X.Y.Z]``
|
|
275
|
+
headings, return the line of the topmost (newest) one.
|
|
276
|
+
* Otherwise, return the first line after the era intro blockquote.
|
|
277
|
+
|
|
278
|
+
Returns None when no current era header exists.
|
|
279
|
+
"""
|
|
280
|
+
spans = era_spans(lines)
|
|
281
|
+
current_idx = current_era_index(spans)
|
|
282
|
+
if current_idx is None:
|
|
283
|
+
return None
|
|
284
|
+
body_start, body_end, _ = _era_body_bounds(lines, current_idx)
|
|
285
|
+
for i in range(body_start, body_end):
|
|
286
|
+
if VERSION_HEADING_RE.match(lines[i]):
|
|
287
|
+
return i
|
|
288
|
+
return body_start
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def perform_split(plan: SplitPlan) -> None:
|
|
292
|
+
"""Execute ``plan`` against the on-disk CHANGELOG.md.
|
|
293
|
+
|
|
294
|
+
* Refuses to overwrite an existing archive file.
|
|
295
|
+
* Moves every entry in the current era body into the new archive.
|
|
296
|
+
* Replaces the current era block with the collapsed pointer + the
|
|
297
|
+
freshly-labelled new current era header (empty body).
|
|
298
|
+
"""
|
|
299
|
+
if plan.archive_path.exists():
|
|
300
|
+
raise FileExistsError(
|
|
301
|
+
f"archive already exists at {plan.archive_path} — "
|
|
302
|
+
"likely a previous --resume run; inspect manually"
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
lines = read_changelog_lines()
|
|
306
|
+
spans = era_spans(lines)
|
|
307
|
+
current_idx = current_era_index(spans)
|
|
308
|
+
if current_idx is None:
|
|
309
|
+
raise RuntimeError("no current era header found in CHANGELOG.md")
|
|
310
|
+
|
|
311
|
+
body_start, _, next_era_line = _era_body_bounds(lines, current_idx)
|
|
312
|
+
entries = lines[body_start:next_era_line]
|
|
313
|
+
# Trim trailing blank lines so the archive doesn't accumulate them.
|
|
314
|
+
while entries and entries[-1].strip() == "":
|
|
315
|
+
entries.pop()
|
|
316
|
+
|
|
317
|
+
collapsed = collapsed_era_block(plan.boundary).rstrip("\n").splitlines()
|
|
318
|
+
new_era = new_era_intro_block(plan.new_era_label, plan.boundary).rstrip("\n").splitlines()
|
|
319
|
+
|
|
320
|
+
head = lines[:current_idx]
|
|
321
|
+
tail = lines[next_era_line:]
|
|
322
|
+
new_lines = head + collapsed + [""] + new_era + [""] + tail
|
|
323
|
+
new_text = "\n".join(new_lines).rstrip() + "\n"
|
|
324
|
+
|
|
325
|
+
archive_body = "\n".join(entries).rstrip() + "\n" if entries else ""
|
|
326
|
+
archive_text = archive_file_header(plan.boundary) + archive_body
|
|
327
|
+
|
|
328
|
+
plan.archive_path.parent.mkdir(parents=True, exist_ok=True)
|
|
329
|
+
plan.archive_path.write_text(archive_text, encoding="utf-8")
|
|
330
|
+
CHANGELOG.write_text(new_text, encoding="utf-8")
|
package/scripts/memory_lookup.py
CHANGED
|
@@ -39,6 +39,7 @@ from typing import Any, Callable, Iterable, Optional, Union
|
|
|
39
39
|
|
|
40
40
|
MEMORY_ROOT = Path("agents/memory")
|
|
41
41
|
INTAKE_ROOT = MEMORY_ROOT / "intake"
|
|
42
|
+
KNOWLEDGE_ROOT = MEMORY_ROOT / "knowledge"
|
|
42
43
|
|
|
43
44
|
CURATED_TYPES = {
|
|
44
45
|
"ownership",
|
|
@@ -49,6 +50,12 @@ CURATED_TYPES = {
|
|
|
49
50
|
"product-rules",
|
|
50
51
|
}
|
|
51
52
|
|
|
53
|
+
# `knowledge` is its own type: user-ingested local documents that live
|
|
54
|
+
# under `agents/memory/knowledge/<ingest-id>/chunks/*.md`. They are
|
|
55
|
+
# repo-side (file-backed) but not "curated" and not intake — the
|
|
56
|
+
# conflict rule still treats them as repo entries against operational.
|
|
57
|
+
KNOWLEDGE_TYPE = "knowledge"
|
|
58
|
+
|
|
52
59
|
|
|
53
60
|
@dataclass
|
|
54
61
|
class Hit:
|
|
@@ -167,6 +174,58 @@ def _iter_intake_entries(mtype: str) -> Iterable[tuple[Path, dict]]:
|
|
|
167
174
|
yield jsonl, obj
|
|
168
175
|
|
|
169
176
|
|
|
177
|
+
def _iter_knowledge_entries() -> Iterable[tuple[Path, dict]]:
|
|
178
|
+
"""Yield (chunk-file, entry) pairs from `agents/memory/knowledge/`.
|
|
179
|
+
|
|
180
|
+
Layout (frozen in `docs/contracts/local-knowledge-ingestion.md`):
|
|
181
|
+
|
|
182
|
+
agents/memory/knowledge/<ingest-id>/
|
|
183
|
+
manifest.json
|
|
184
|
+
chunks/<n>.md
|
|
185
|
+
|
|
186
|
+
Each chunk becomes one retrieval entry. The chunk body, the
|
|
187
|
+
manifest source path, and pinned flag are surfaced into the entry
|
|
188
|
+
so `_score()` can match on either the source path or the chunk
|
|
189
|
+
text. The entry id is ``<ingest-id>:<chunk-stem>`` so callers can
|
|
190
|
+
locate the exact file on disk.
|
|
191
|
+
"""
|
|
192
|
+
if not KNOWLEDGE_ROOT.is_dir():
|
|
193
|
+
return
|
|
194
|
+
for ingest_dir in sorted(KNOWLEDGE_ROOT.iterdir()):
|
|
195
|
+
if not ingest_dir.is_dir():
|
|
196
|
+
continue
|
|
197
|
+
manifest_path = ingest_dir / "manifest.json"
|
|
198
|
+
manifest: dict = {}
|
|
199
|
+
if manifest_path.is_file():
|
|
200
|
+
try:
|
|
201
|
+
manifest = json.loads(
|
|
202
|
+
manifest_path.read_text(encoding="utf-8")
|
|
203
|
+
)
|
|
204
|
+
except (ValueError, OSError):
|
|
205
|
+
manifest = {}
|
|
206
|
+
ingest_id = str(manifest.get("ingest_id") or ingest_dir.name)
|
|
207
|
+
source = str(manifest.get("source") or "")
|
|
208
|
+
pinned = bool(manifest.get("pinned", False))
|
|
209
|
+
chunks_dir = ingest_dir / "chunks"
|
|
210
|
+
if not chunks_dir.is_dir():
|
|
211
|
+
continue
|
|
212
|
+
for chunk in sorted(chunks_dir.glob("*.md")):
|
|
213
|
+
try:
|
|
214
|
+
body = chunk.read_text(encoding="utf-8")
|
|
215
|
+
except OSError:
|
|
216
|
+
continue
|
|
217
|
+
entry = {
|
|
218
|
+
"id": f"{ingest_id}:{chunk.stem}",
|
|
219
|
+
"ingest_id": ingest_id,
|
|
220
|
+
"source": source,
|
|
221
|
+
"path": source,
|
|
222
|
+
"body": body,
|
|
223
|
+
"pinned": pinned,
|
|
224
|
+
"source_kind": "knowledge",
|
|
225
|
+
}
|
|
226
|
+
yield chunk, entry
|
|
227
|
+
|
|
228
|
+
|
|
170
229
|
def _score(entry: dict, keys: list[str]) -> float:
|
|
171
230
|
"""Naive relevance score: max over keys of (glob-match | substring).
|
|
172
231
|
|
|
@@ -378,6 +437,24 @@ def retrieve(
|
|
|
378
437
|
"""
|
|
379
438
|
repo_hits: list[Hit] = []
|
|
380
439
|
for mtype in types:
|
|
440
|
+
if mtype == KNOWLEDGE_TYPE:
|
|
441
|
+
for path, entry in _iter_knowledge_entries():
|
|
442
|
+
base = _score(entry, keys)
|
|
443
|
+
# Pinned entries get a slight ranking boost so the
|
|
444
|
+
# `/knowledge:list --pin` flag has retrieval effect.
|
|
445
|
+
if entry.get("pinned"):
|
|
446
|
+
base = min(1.0, base + 0.05)
|
|
447
|
+
repo_hits.append(Hit(
|
|
448
|
+
id=str(entry.get("id", "")),
|
|
449
|
+
type=KNOWLEDGE_TYPE,
|
|
450
|
+
source="knowledge",
|
|
451
|
+
path=str(path),
|
|
452
|
+
# Discount vs curated/intake so hand-reviewed repo
|
|
453
|
+
# entries still win on equal relevance.
|
|
454
|
+
score=base * 0.85,
|
|
455
|
+
entry=entry,
|
|
456
|
+
))
|
|
457
|
+
continue
|
|
381
458
|
if mtype not in CURATED_TYPES:
|
|
382
459
|
continue
|
|
383
460
|
for path, entry in _iter_curated_entries(mtype):
|
|
@@ -426,7 +503,7 @@ CONTRACT_VERSION = 1
|
|
|
426
503
|
|
|
427
504
|
# Memory types this file-backed backend can answer. Types outside this
|
|
428
505
|
# set map to `unknown_type` per the retrieval contract.
|
|
429
|
-
_KNOWN_TYPES = CURATED_TYPES
|
|
506
|
+
_KNOWN_TYPES = CURATED_TYPES | {KNOWLEDGE_TYPE}
|
|
430
507
|
|
|
431
508
|
|
|
432
509
|
def retrieve_v1(
|