memarium 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +146 -0
  3. package/assets/scripts/merge-books.mjs +921 -0
  4. package/assets/workflows/memarium-aggregate.yml +66 -0
  5. package/dist/bin/memarium.js +6 -0
  6. package/dist/src/aggregated-store.js +95 -0
  7. package/dist/src/cli.js +175 -0
  8. package/dist/src/commands/cat.js +20 -0
  9. package/dist/src/commands/doctor.js +383 -0
  10. package/dist/src/commands/init-wizard.js +201 -0
  11. package/dist/src/commands/init.js +45 -0
  12. package/dist/src/commands/list.js +19 -0
  13. package/dist/src/commands/prune.js +108 -0
  14. package/dist/src/commands/resume/config-pathmap.js +38 -0
  15. package/dist/src/commands/resume/fuzzy-match.js +13 -0
  16. package/dist/src/commands/resume/list-sessions.js +54 -0
  17. package/dist/src/commands/resume/render-prompt.js +121 -0
  18. package/dist/src/commands/resume/resume.js +121 -0
  19. package/dist/src/commands/show.js +21 -0
  20. package/dist/src/commands/sync.js +279 -0
  21. package/dist/src/commands/upgrade.js +47 -0
  22. package/dist/src/commands/workflow.js +126 -0
  23. package/dist/src/config.js +98 -0
  24. package/dist/src/content-project-inference.js +185 -0
  25. package/dist/src/device.js +47 -0
  26. package/dist/src/digest/manifest.js +121 -0
  27. package/dist/src/digest/project-filter.js +32 -0
  28. package/dist/src/digest/session-signal.js +106 -0
  29. package/dist/src/digest/toc.js +127 -0
  30. package/dist/src/git-ops.js +359 -0
  31. package/dist/src/index-store.js +35 -0
  32. package/dist/src/migrate.js +72 -0
  33. package/dist/src/project-identity.js +139 -0
  34. package/dist/src/project-resolve.js +42 -0
  35. package/dist/src/prompts.js +87 -0
  36. package/dist/src/repo-data-dir.js +25 -0
  37. package/dist/src/slug.js +28 -0
  38. package/dist/src/sources/base.js +1 -0
  39. package/dist/src/sources/claude-code.js +294 -0
  40. package/dist/src/sources/vscode-copilot.js +400 -0
  41. package/dist/src/types.js +1 -0
  42. package/dist/src/writer.js +240 -0
  43. package/package.json +60 -0
@@ -0,0 +1,185 @@
1
+ import { readdirSync } from "node:fs";
2
+ import { homedir } from "node:os";
3
+ import { join } from "node:path";
4
+ import { cachedProjectSlug } from "./project-identity.js";
5
+ /**
6
+ * Content-based project inference.
7
+ *
8
+ * Claude Code groups jsonl files by cwd at session-start. When the user
9
+ * `cd`s into a different project mid-session, or runs `claude` in the wrong
10
+ * directory by accident (e.g. opens it in `chromium-src` but spends the
11
+ * whole session editing files in `edge-memarium`), the session is filed
12
+ * under the wrong project. The user's intent — "this conversation is about
13
+ * memarium" — disagrees with the cwd label.
14
+ *
15
+ * We recover intent by scanning the session's tool-use blocks for absolute
16
+ * file paths the assistant actually touched (Read / Write / Edit / Bash),
17
+ * mapping each path to a project root, and picking the dominant one. If
18
+ * one project owns ≥ MIN_CONFIDENCE of all path mentions, we override the
19
+ * cwd-derived project with the inferred one.
20
+ *
21
+ * We DO NOT scan message text for paths — that produces too many false
22
+ * positives (e.g. "the chromium codebase has a similar pattern in
23
+ * /chromium/src/foo.cc" mentioned as reference, not as work). Tool-use
24
+ * inputs reflect actual edits/reads, which is the signal we want.
25
+ */
26
+ export const MIN_CONFIDENCE = 0.7;
27
+ export const MIN_PATH_HITS = 5;
28
+ /**
29
+ * Decode a Claude project-dir name back to its filesystem path prefix.
30
+ *
31
+ * "-Users-me-edge-memvc" → "/Users/me/edge/memvc"
32
+ *
33
+ * Note this is one-way and lossy — Claude itself uses the same encoding so
34
+ * actual hyphens in path components become indistinguishable from `/`.
35
+ * That ambiguity is fine for our use: we only need the prefix to match the
36
+ * common case `/Users/<u>/<dir>/...`, where hyphens-in-names are rare.
37
+ */
38
+ function decodeProjectDirName(name) {
39
+ if (!name.startsWith("-"))
40
+ return name;
41
+ return "/" + name.slice(1).replace(/-/g, "/");
42
+ }
43
+ /**
44
+ * Build the list of "known project roots" by listing `~/.claude/projects/`.
45
+ * Returns just the `{ path }`s, sorted longest-prefix-first so a path like
46
+ * `/Users/u/edge/memvc/.claude/worktrees/foo` matches the worktree subdir
47
+ * before falling back to the parent project. The (remote-based) slug is
48
+ * resolved lazily — only for the root a path actually matches — so listing
49
+ * roots never spawns `git` for projects the session didn't touch (#41 review).
50
+ */
51
+ export function listKnownProjectRoots(projectsDir = join(homedir(), ".claude", "projects")) {
52
+ let entries;
53
+ try {
54
+ entries = readdirSync(projectsDir);
55
+ }
56
+ catch {
57
+ return [];
58
+ }
59
+ const out = entries.map((name) => ({ path: decodeProjectDirName(name) }));
60
+ out.sort((a, b) => b.path.length - a.path.length);
61
+ return out;
62
+ }
63
+ /**
64
+ * Match an absolute path to a known project root, or fall back to
65
+ * deriving a slug from the path's parent component. The slug is the stable
66
+ * remote-based identity (cached); a path slug only when there's no git remote.
67
+ *
68
+ * "/Users/me/edge/memvc/src/foo.ts" → "github.com-..."/"edge-memvc" (matched)
69
+ * "/Users/me/edge/random/file.ts" → slug from parent (no root match)
70
+ * "/etc/hosts" → null (non-project)
71
+ */
72
+ export function pathToProjectSlug(absPath, roots) {
73
+ if (!absPath || !absPath.startsWith("/"))
74
+ return null;
75
+ for (const r of roots) {
76
+ if (absPath === r.path || absPath.startsWith(r.path + "/"))
77
+ return cachedProjectSlug(r.path);
78
+ }
79
+ // Fallback: derive a slug from the directory the touched file lives in.
80
+ const lastSlash = absPath.lastIndexOf("/");
81
+ if (lastSlash <= 0)
82
+ return null;
83
+ const dir = absPath.slice(0, lastSlash);
84
+ // Reject obvious non-project dirs BEFORE resolving identity (skip the git spawn).
85
+ if (dir.startsWith("/tmp/") || dir.startsWith("/private/tmp/") ||
86
+ dir.startsWith("/etc") || dir.startsWith("/usr") || dir.startsWith("/var") ||
87
+ dir.startsWith("/System") || dir.startsWith("/opt"))
88
+ return null;
89
+ // Resolve through the SAME remote-first identity as known roots (#41 review):
90
+ // an inferred-override target that's a git repo gets its stable remote slug,
91
+ // not a path slug from a different namespace that would re-split the project.
92
+ const slug = cachedProjectSlug(dir);
93
+ if (slug === "home" || slug === "root")
94
+ return null;
95
+ return slug;
96
+ }
97
+ /**
98
+ * Pull every plausible absolute path out of the message's raw tool-use
99
+ * blocks. Returns deduplicated paths per message — repeated reads of the
100
+ * same file count once per message, not N times — to avoid a single noisy
101
+ * Read loop dominating the tally.
102
+ */
103
+ export function extractPathsFromMessages(messages) {
104
+ const out = [];
105
+ for (const m of messages) {
106
+ const raw = m.raw;
107
+ const content = raw?.message?.content;
108
+ if (!Array.isArray(content))
109
+ continue;
110
+ const seen = new Set();
111
+ for (const block of content) {
112
+ if (!block || typeof block !== "object")
113
+ continue;
114
+ const b = block;
115
+ if (b.type !== "tool_use")
116
+ continue;
117
+ const inp = b.input ?? {};
118
+ const name = b.name ?? "";
119
+ if (name === "Read" || name === "Write" || name === "Edit" || name === "NotebookEdit") {
120
+ const fp = inp.file_path ?? inp.notebook_path;
121
+ if (typeof fp === "string" && fp.startsWith("/"))
122
+ seen.add(fp);
123
+ }
124
+ else if (name === "Bash") {
125
+ const cmd = inp.command;
126
+ if (typeof cmd === "string") {
127
+ // Greedy match for absolute-path-looking tokens. Stops at whitespace
128
+ // or shell metacharacters; tolerates dots/dashes/underscores.
129
+ for (const m2 of cmd.matchAll(/\/[A-Za-z0-9._\-/]+(?:\.[A-Za-z0-9]+)?/g)) {
130
+ const p = m2[0];
131
+ // Skip very short hits (likely "/" or "/x") and obvious URL paths.
132
+ if (p.length < 6)
133
+ continue;
134
+ if (cmd.includes("http://" + p) || cmd.includes("https://" + p))
135
+ continue;
136
+ seen.add(p);
137
+ }
138
+ }
139
+ }
140
+ else if (name === "Glob" || name === "Grep") {
141
+ const pat = inp.path ?? inp.pattern;
142
+ if (typeof pat === "string" && pat.startsWith("/"))
143
+ seen.add(pat);
144
+ }
145
+ }
146
+ for (const p of seen)
147
+ out.push(p);
148
+ }
149
+ return out;
150
+ }
151
+ /**
152
+ * Run inference on a session's messages. Returns the inferred project slug
153
+ * and confidence. Caller decides whether to override based on the policy
154
+ * (e.g. inferred != cwd-project AND confidence >= MIN_CONFIDENCE).
155
+ */
156
+ export function inferProjectFromContent(messages, roots = listKnownProjectRoots()) {
157
+ const paths = extractPathsFromMessages(messages);
158
+ const counts = {};
159
+ let totalHits = 0;
160
+ for (const p of paths) {
161
+ const slug = pathToProjectSlug(p, roots);
162
+ if (!slug)
163
+ continue;
164
+ counts[slug] = (counts[slug] ?? 0) + 1;
165
+ totalHits++;
166
+ }
167
+ if (totalHits < MIN_PATH_HITS) {
168
+ return { inferredProject: null, confidence: 0, totalHits, perProject: counts };
169
+ }
170
+ let topSlug = "";
171
+ let topCount = 0;
172
+ for (const [slug, c] of Object.entries(counts)) {
173
+ if (c > topCount) {
174
+ topCount = c;
175
+ topSlug = slug;
176
+ }
177
+ }
178
+ const confidence = topCount / totalHits;
179
+ return {
180
+ inferredProject: confidence >= MIN_CONFIDENCE ? topSlug : null,
181
+ confidence,
182
+ totalHits,
183
+ perProject: counts,
184
+ };
185
+ }
@@ -0,0 +1,47 @@
1
+ import { hostname } from "node:os";
2
+ /**
3
+ * Make `raw` safe for use as a git branch name.
4
+ * Keeps [A-Za-z0-9._-]; replaces everything else with '-'; collapses runs of '-';
5
+ * trims leading/trailing '-' or '.'; caps length at 60.
6
+ * Falls back to "device" if empty after sanitize.
7
+ */
8
+ export function sanitizeBranchName(raw) {
9
+ let s = raw.replace(/[^A-Za-z0-9._-]/g, "-");
10
+ s = s.replace(/-+/g, "-");
11
+ s = s.replace(/\.+/g, ".");
12
+ s = s.replace(/^[-.]+|[-.]+$/g, "");
13
+ if (s.length === 0)
14
+ return "device";
15
+ if (s.length > 60)
16
+ s = s.slice(0, 60).replace(/[-.]+$/, "");
17
+ if (s.endsWith(".lock"))
18
+ s = s.slice(0, -5).replace(/[-.]+$/, "");
19
+ if (s.length === 0)
20
+ return "device";
21
+ return s;
22
+ }
23
+ export function deviceBranchFromHostname() {
24
+ return sanitizeBranchName(hostname());
25
+ }
26
+ /**
27
+ * Heuristic: does the given branch name look like it came from macOS's
28
+ * volatile hostname (mDNS in home wifi, corporate DHCP-given names on VPN,
29
+ * iPhone tethering, etc.)? Used by the init wizard and doctor to nudge users
30
+ * toward a stable physical-label name like "mini2" instead of letting their
31
+ * spool grow a new device branch each time they change networks.
32
+ *
33
+ * Conservative: returns true (stable-looking) by default; we only flag
34
+ * patterns we've actually seen drift in dogfood:
35
+ * - ends in `.local` (Bonjour / mDNS, changes when DHCP renames host)
36
+ * - matches a fully-qualified DNS name (contains a `.` followed by 2+
37
+ * letters as a TLD) — e.g. `MIS-EV2-BB1.surfacescenarios.org`,
38
+ * `host42.corp.example.com`. These come from corp DHCP and rotate.
39
+ */
40
+ export function isStableDeviceName(name) {
41
+ if (name.endsWith(".local"))
42
+ return false;
43
+ // FQDN-ish: contains `.`, ends in `.<letters>{2+}` (the TLD).
44
+ if (/\.[A-Za-z]{2,}$/.test(name))
45
+ return false;
46
+ return true;
47
+ }
@@ -0,0 +1,121 @@
1
+ /** Cap on commits / files_touched / candidate_decisions to bound frontmatter
2
+ * size. files_touched dominates volume — a long debugging session can hit
3
+ * Read on 500+ files; we keep first-seen 200, which covers the dense
4
+ * early-session exploration without exploding YAML parse cost. */
5
+ const FILES_CAP = 200;
6
+ const COMMITS_CAP = 100;
7
+ const DECISIONS_CAP = 20;
8
+ /** Heuristic regex for "user signaled a decision here." Intentionally narrow
9
+ * — false positives waste the digest skill's attention more than false
10
+ * negatives. The skill is told these are candidates, not facts. */
11
+ const DECISION_RE = /(我决定|我们决定|最后采用|最后用|let'?s go with|decided to|going with|ok merged|merged it|ship it as)/i;
12
+ /** Match `git commit … -m "msg"` / `-m 'msg'` / heredoc commits. We also
13
+ * catch `git tag -a vX.Y.Z -m "…"` and bare `git tag <ver>` since releases
14
+ * matter for the digest. `git push` is excluded — it's procedural, not a
15
+ * decision point. */
16
+ const GIT_COMMIT_RE = /\bgit\s+commit\b[^\n]*?-m\s+(?:"((?:[^"\\]|\\.)*)"|'((?:[^'\\]|\\.)*)'|(\S+))/;
17
+ const GIT_COMMIT_HEREDOC_RE = /\bgit\s+commit\b[^\n]*?-m\s+"\$\(cat\s+<<\s*'?(\w+)'?[\r\n]+([\s\S]*?)[\r\n]+\1\s*\)"/;
18
+ const GIT_TAG_RE = /\bgit\s+tag\b(?:[^\n]*?-(?:a|s)\s+)?\s*(v[\w.\-+]+)(?:[^\n]*?-m\s+(?:"((?:[^"\\]|\\.)*)"|'((?:[^'\\]|\\.)*)'))?/;
19
+ /** Tools whose input.file_path contributes to files_touched. */
20
+ const FILE_TOOLS = new Set(["Read", "Edit", "Write", "MultiEdit", "NotebookEdit"]);
21
+ /**
22
+ * Extract a mechanical-facts SessionManifest from already-extracted
23
+ * SessionMessages. Pure function — no I/O.
24
+ *
25
+ * @param messages SessionMessage[] in chronological order.
26
+ * @param messageLineOffsets parallel array where messageLineOffsets[i] is the
27
+ * line number of the i-th message's `## User`/`## Assistant` heading in
28
+ * the final rendered md. Used to populate `line` fields so consumers can
29
+ * `Read offset:line` to jump straight to the source turn.
30
+ */
31
+ export function extractManifest(messages, messageLineOffsets) {
32
+ const tools_used = {};
33
+ const commits = [];
34
+ const filesSeen = new Set();
35
+ const files_touched = [];
36
+ const candidate_decisions = [];
37
+ let user_turns = 0;
38
+ let assistant_turns = 0;
39
+ for (let i = 0; i < messages.length; i++) {
40
+ const m = messages[i];
41
+ const line = messageLineOffsets[i] ?? 0;
42
+ if (m.role === "user")
43
+ user_turns++;
44
+ else if (m.role === "assistant")
45
+ assistant_turns++;
46
+ // Decision heuristic: user text only (assistants don't make decisions).
47
+ if (m.role === "user" && m.text && DECISION_RE.test(m.text) && candidate_decisions.length < DECISIONS_CAP) {
48
+ candidate_decisions.push({ line, preview: previewOf(m.text, 100) });
49
+ }
50
+ for (const b of m.contentBlocks ?? []) {
51
+ if (b.type !== "tool_use")
52
+ continue;
53
+ tools_used[b.name] = (tools_used[b.name] ?? 0) + 1;
54
+ if (FILE_TOOLS.has(b.name)) {
55
+ const fp = readFilePath(b);
56
+ if (fp && !filesSeen.has(fp) && files_touched.length < FILES_CAP) {
57
+ filesSeen.add(fp);
58
+ files_touched.push(fp);
59
+ }
60
+ }
61
+ if (b.name === "Bash" && commits.length < COMMITS_CAP) {
62
+ const cmd = readBashCommand(b);
63
+ if (cmd) {
64
+ const c = parseCommit(cmd);
65
+ if (c)
66
+ commits.push({ ...c, line });
67
+ else {
68
+ const t = parseTag(cmd);
69
+ if (t)
70
+ commits.push({ ...t, line });
71
+ }
72
+ }
73
+ }
74
+ }
75
+ }
76
+ return {
77
+ user_turns,
78
+ assistant_turns,
79
+ tools_used,
80
+ commits,
81
+ files_touched,
82
+ candidate_decisions,
83
+ };
84
+ }
85
+ function readFilePath(b) {
86
+ const input = b.input;
87
+ if (!input || typeof input !== "object")
88
+ return null;
89
+ return typeof input.file_path === "string" ? input.file_path : null;
90
+ }
91
+ function readBashCommand(b) {
92
+ const input = b.input;
93
+ if (!input || typeof input !== "object")
94
+ return null;
95
+ return typeof input.command === "string" ? input.command : null;
96
+ }
97
+ function parseCommit(cmd) {
98
+ const h = cmd.match(GIT_COMMIT_HEREDOC_RE);
99
+ if (h) {
100
+ const body = (h[2] ?? "").trim();
101
+ const firstLine = body.split("\n", 1)[0].trim();
102
+ return firstLine ? { sha: "", msg: firstLine } : null;
103
+ }
104
+ const m = cmd.match(GIT_COMMIT_RE);
105
+ if (!m)
106
+ return null;
107
+ const msg = (m[1] ?? m[2] ?? m[3] ?? "").trim();
108
+ return msg ? { sha: "", msg } : null;
109
+ }
110
+ function parseTag(cmd) {
111
+ const m = cmd.match(GIT_TAG_RE);
112
+ if (!m)
113
+ return null;
114
+ const tag = m[1];
115
+ const msg = (m[2] ?? m[3] ?? "").trim();
116
+ return { sha: tag, msg: msg || `tag ${tag}` };
117
+ }
118
+ function previewOf(text, max) {
119
+ const collapsed = text.replace(/\s+/g, " ").trim();
120
+ return collapsed.length > max ? collapsed.slice(0, max - 1) + "…" : collapsed;
121
+ }
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Heuristic: a path is a "real project" if it's a developer working directory,
3
+ * not a worktree, electron data dir, or transient workspace path.
4
+ *
5
+ * Rejects:
6
+ * - paths containing /.worktrees-*
7
+ * - paths ending in *.code-workspace, *-workspace.json (workspace fragments)
8
+ * - paths ending in -workspaceStorage (VSCode workspaceStorage hash dirs)
9
+ * - empty / "root" / "home"
10
+ * - long-numeric-prefixed slugs (10+ digit run, e.g. workspaceStorage timestamps)
11
+ * - 20+ pure-hex strings (workspaceStorage hashes)
12
+ *
13
+ * This is a heuristic — it's allowed to be wrong in edge cases. Goal: clean
14
+ * the obviously-junk projects out of book/ TOC.
15
+ */
16
+ export function isRealProjectPath(slugOrPath) {
17
+ if (!slugOrPath || slugOrPath === "root" || slugOrPath === "home")
18
+ return false;
19
+ const lower = slugOrPath.toLowerCase();
20
+ if (lower.includes(".worktrees-"))
21
+ return false;
22
+ if (lower.endsWith(".code-workspace") || lower.endsWith("-workspacestorage"))
23
+ return false;
24
+ if (lower.endsWith("-workspace.json"))
25
+ return false;
26
+ // Reject pure-numeric / 32-hex-like pseudo-IDs masquerading as project names
27
+ if (/^\d{10,}/.test(slugOrPath))
28
+ return false;
29
+ if (/^[a-f0-9]{20,}$/.test(slugOrPath))
30
+ return false;
31
+ return true;
32
+ }
@@ -0,0 +1,106 @@
1
+ const SIGNAL_CATEGORIES = {
2
+ debugging: ["bug", "error", "fix", "debug", "root cause", "traceback", "broken", "问题", "修复"],
3
+ architecture: ["architecture", "design", "pattern", "trade-off", "decision", "approach", "架构", "设计"],
4
+ discovery: ["learned", "discovered", "insight", "gotcha", "trap", "pitfall", "trick", "发现", "陷阱", "关键"],
5
+ reasoning: ["because", "instead of", "rather than", "why", "the reason", "原因", "所以", "因为"],
6
+ evaluation: ["review", "evaluate", "score", "verdict", "assessment", "评估", "审查"],
7
+ };
8
+ /**
9
+ * Detect a "memarium meta-session" — the user invoking the /memarium skill
10
+ * itself, which produces a session whose entire content is the digest
11
+ * pipeline (memarium prepare/publish/etc.) rather than real engineering
12
+ * work. These have zero chronicle value (they're self-referential noise)
13
+ * so prepare filters them out before the LLM ever sees them.
14
+ *
15
+ * Signals (any one is enough):
16
+ * - first user message starts with "/memarium"
17
+ * - first user message starts with "/loop /memarium" (looped invocations)
18
+ * - first user message references the SKILL.md by path
19
+ */
20
+ export function isMemariumMetaSession(mdBody) {
21
+ const userTexts = extractUserTexts(mdBody);
22
+ const first = (userTexts[0] ?? "").trimStart();
23
+ if (/^\/memarium(\b|$)/i.test(first))
24
+ return true;
25
+ if (/^\/loop\s+\/memarium(\b|$)/i.test(first))
26
+ return true;
27
+ if (first.includes("skills/memarium/SKILL.md"))
28
+ return true;
29
+ return false;
30
+ }
31
+ /**
32
+ * Extract per-session signals from a rendered session .md body.
33
+ * Pure; no IO.
34
+ *
35
+ * The .md body is produced by `src/writer.ts` and looks like:
36
+ * # <displayName>
37
+ * **Tool:** ... etc
38
+ * ---
39
+ * ## User _(timestamp)_
40
+ * <text>
41
+ * ## Assistant _(timestamp)_
42
+ * <text>
43
+ * ## User _(timestamp)_
44
+ * ...
45
+ *
46
+ * We extract user-message text only (assistant is too noisy for a topic preview).
47
+ */
48
+ export function extractSessionSignals(mdBody) {
49
+ const userTexts = extractUserTexts(mdBody);
50
+ const joined = userTexts.join(" ").replace(/\s+/g, " ").trim();
51
+ const titleSrc = userTexts[0] ?? "";
52
+ const titleClean = titleSrc.replace(/\s+/g, " ").trim();
53
+ const title = titleClean.length > 80 ? titleClean.slice(0, 80) : titleClean;
54
+ const preview = joined.length > 300 ? joined.slice(0, 300) + "…" : joined;
55
+ const score = scoreText(joined, userTexts.join(" ").length, mdBody.length);
56
+ return { title, preview, insightScore: score };
57
+ }
58
+ /** Pull text from every "## User" block. Stops at the next "## " heading. */
59
+ function extractUserTexts(md) {
60
+ const out = [];
61
+ const lines = md.split("\n");
62
+ let inUser = false;
63
+ let buf = [];
64
+ for (const line of lines) {
65
+ if (/^## User\b/.test(line)) {
66
+ if (buf.length > 0) {
67
+ out.push(buf.join("\n").trim());
68
+ buf = [];
69
+ }
70
+ inUser = true;
71
+ continue;
72
+ }
73
+ if (/^## /.test(line)) {
74
+ if (inUser && buf.length > 0) {
75
+ out.push(buf.join("\n").trim());
76
+ buf = [];
77
+ }
78
+ inUser = false;
79
+ continue;
80
+ }
81
+ if (inUser)
82
+ buf.push(line);
83
+ }
84
+ if (inUser && buf.length > 0)
85
+ out.push(buf.join("\n").trim());
86
+ return out.filter((s) => s.length > 0);
87
+ }
88
+ function scoreText(joinedLower, userTextLen, totalLen) {
89
+ if (!joinedLower)
90
+ return 0;
91
+ const lower = joinedLower.toLowerCase();
92
+ let categoryHits = 0;
93
+ let totalHits = 0;
94
+ for (const keywords of Object.values(SIGNAL_CATEGORIES)) {
95
+ const hits = keywords.filter((kw) => lower.includes(kw)).length;
96
+ if (hits > 0) {
97
+ categoryHits++;
98
+ totalHits += hits;
99
+ }
100
+ }
101
+ if (categoryHits < 2)
102
+ return 0.1;
103
+ const userRatio = userTextLen / Math.max(totalLen, 1);
104
+ const score = (categoryHits / 5) * 0.4 + (totalHits / 15) * 0.3 + userRatio * 0.3;
105
+ return Math.min(1.0, score);
106
+ }
@@ -0,0 +1,127 @@
1
+ /** Min length of sanitized user text to qualify as a "real" prompt (vs. a
2
+ * tool_result wrapper). Matches the writer's existing sanitizer gate. */
3
+ const USER_TEXT_MIN = 50;
4
+ /** Min length of an assistant's plain text reply (no tool_use) to count as
5
+ * a substantive "voice" turn worth listing in the TOC. Below this, the
6
+ * message is usually "OK", "done", or a brief acknowledgement. */
7
+ const ASSISTANT_TEXT_MIN = 200;
8
+ /** Bash sub-commands that signal a noteworthy VCS event. `git push` excluded:
9
+ * it's procedural, and we already capture the underlying commit/tag. */
10
+ const GIT_NOTEWORTHY_RE = /\bgit\s+(commit|tag)\b/;
11
+ /** Tools that materially mutate the repo. */
12
+ const EDIT_TOOLS = new Set(["Edit", "Write", "MultiEdit", "NotebookEdit"]);
13
+ /**
14
+ * Build an importance-based TOC. Tool-result-only turns are skipped; what
15
+ * remains is real user prompts, file edits, commits, and substantive
16
+ * assistant replies. Markers reflect what makes a turn noteworthy (a turn
17
+ * may have multiple).
18
+ *
19
+ * @param messages SessionMessage[] in chronological order.
20
+ * @param messageLineOffsets parallel array: messageLineOffsets[i] is the line
21
+ * number of message i's `## User`/`## Assistant` heading in the final
22
+ * rendered md. Consumers `Read offset:line` to jump straight to the turn.
23
+ */
24
+ export function buildTocEntries(messages, messageLineOffsets) {
25
+ const out = [];
26
+ for (let i = 0; i < messages.length; i++) {
27
+ const m = messages[i];
28
+ const markers = computeMarkers(m);
29
+ if (!markers)
30
+ continue;
31
+ out.push({
32
+ turn: i + 1,
33
+ timestamp: m.timestamp ?? "",
34
+ markers,
35
+ preview: computePreview(m),
36
+ line: messageLineOffsets[i] ?? 0,
37
+ });
38
+ }
39
+ return out;
40
+ }
41
+ function computeMarkers(m) {
42
+ const marks = [];
43
+ if (m.role === "user" && m.text && m.text.length >= USER_TEXT_MIN) {
44
+ marks.push("🧑");
45
+ }
46
+ if (m.role === "assistant") {
47
+ let hasEdit = false;
48
+ let hasCommit = false;
49
+ for (const b of m.contentBlocks ?? []) {
50
+ if (b.type !== "tool_use")
51
+ continue;
52
+ if (EDIT_TOOLS.has(b.name))
53
+ hasEdit = true;
54
+ if (b.name === "Bash") {
55
+ const cmd = readCommand(b);
56
+ if (cmd && GIT_NOTEWORTHY_RE.test(cmd))
57
+ hasCommit = true;
58
+ }
59
+ }
60
+ if (hasCommit)
61
+ marks.push("💾");
62
+ if (hasEdit)
63
+ marks.push("✏️");
64
+ // Substantive text reply (no tool calls dominating it): a real "voice"
65
+ // turn. We check after edit/commit so the markers stack naturally.
66
+ if (m.text && m.text.length >= ASSISTANT_TEXT_MIN && !hasEdit && !hasCommit) {
67
+ marks.push("🤖");
68
+ }
69
+ }
70
+ return marks.join("");
71
+ }
72
+ function computePreview(m) {
73
+ // Prefer user/assistant text. For tool-only turns, summarize the actions.
74
+ if (m.text)
75
+ return previewOf(m.text, 100);
76
+ const actions = [];
77
+ for (const b of m.contentBlocks ?? []) {
78
+ if (b.type !== "tool_use")
79
+ continue;
80
+ if (EDIT_TOOLS.has(b.name)) {
81
+ const fp = b.input?.file_path;
82
+ if (typeof fp === "string")
83
+ actions.push(`${b.name} ${fp}`);
84
+ else
85
+ actions.push(b.name);
86
+ }
87
+ else if (b.name === "Bash") {
88
+ const cmd = readCommand(b);
89
+ if (cmd) {
90
+ const firstLine = cmd.split("\n", 1)[0].trim();
91
+ actions.push(firstLine);
92
+ }
93
+ }
94
+ if (actions.length >= 2)
95
+ break;
96
+ }
97
+ return previewOf(actions.join(" · "), 100);
98
+ }
99
+ function readCommand(b) {
100
+ const input = b.input;
101
+ if (!input || typeof input !== "object")
102
+ return null;
103
+ return typeof input.command === "string" ? input.command : null;
104
+ }
105
+ function previewOf(text, max) {
106
+ const collapsed = text.replace(/\s+/g, " ").trim();
107
+ return collapsed.length > max ? collapsed.slice(0, max - 1) + "…" : collapsed;
108
+ }
109
+ /** Render the TOC as a markdown block ready to embed in the final md.
110
+ * Returns "" if no entries (e.g. session is entirely tool noise). */
111
+ export function renderTocMarkdown(entries) {
112
+ if (entries.length === 0)
113
+ return "";
114
+ const header = `# Table of Contents\n\n` +
115
+ `Importance-based — real user turns (≥${USER_TEXT_MIN} chars), file edits, commits, and substantive assistant replies. Tool-result-only turns omitted.\n\n` +
116
+ `| # | Time | Marker | Preview | Line |\n` +
117
+ `|---|------|--------|---------|------|`;
118
+ const rows = entries.map((e) => {
119
+ const time = e.timestamp ? e.timestamp.slice(5, 16).replace("T", " ") : "—";
120
+ const preview = escapeTableCell(e.preview);
121
+ return `| ${e.turn} | ${time} | ${e.markers} | ${preview} | →L${e.line} |`;
122
+ });
123
+ return [header, ...rows].join("\n");
124
+ }
125
+ function escapeTableCell(s) {
126
+ return s.replace(/\|/g, "\\|").replace(/\n/g, " ");
127
+ }