pi-vcc 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +120 -0
  3. package/demo.gif +0 -0
  4. package/flow/plans/20260515-1300/plan.md +206 -0
  5. package/index.ts +14 -0
  6. package/package.json +36 -0
  7. package/pi-vcc-config.schema.json +131 -0
  8. package/scripts/audit-sessions.ts +88 -0
  9. package/scripts/benchmark-real-sessions.ts +25 -0
  10. package/scripts/compare-before-after.ts +36 -0
  11. package/scripts/dump-branch-output.ts +20 -0
  12. package/src/commands/pi-vcc.ts +33 -0
  13. package/src/commands/vcc-recall.ts +65 -0
  14. package/src/core/brief.ts +381 -0
  15. package/src/core/build-sections.ts +87 -0
  16. package/src/core/content.ts +60 -0
  17. package/src/core/filter-noise.ts +42 -0
  18. package/src/core/format-recall.ts +27 -0
  19. package/src/core/format.ts +56 -0
  20. package/src/core/lineage.ts +26 -0
  21. package/src/core/load-messages.ts +63 -0
  22. package/src/core/normalize.ts +66 -0
  23. package/src/core/recall-scope.ts +14 -0
  24. package/src/core/render-entries.ts +68 -0
  25. package/src/core/report.ts +237 -0
  26. package/src/core/sanitize.ts +5 -0
  27. package/src/core/search-entries.ts +230 -0
  28. package/src/core/settings.ts +215 -0
  29. package/src/core/skill-collapse.ts +35 -0
  30. package/src/core/summarize.ts +159 -0
  31. package/src/core/tool-args.ts +14 -0
  32. package/src/details.ts +7 -0
  33. package/src/extract/commits.ts +69 -0
  34. package/src/extract/files.ts +80 -0
  35. package/src/extract/goals.ts +79 -0
  36. package/src/extract/preferences.ts +55 -0
  37. package/src/extract/references.ts +214 -0
  38. package/src/extract/signals.ts +145 -0
  39. package/src/hooks/before-compact.ts +405 -0
  40. package/src/sections.ts +14 -0
  41. package/src/tools/recall.ts +109 -0
  42. package/src/types.ts +14 -0
  43. package/tests/before-compact-hook.test.ts +181 -0
  44. package/tests/before-compact.test.ts +140 -0
  45. package/tests/brief.test.ts +206 -0
  46. package/tests/build-sections.test.ts +90 -0
  47. package/tests/compile.test.ts +110 -0
  48. package/tests/config-integration.test.ts +107 -0
  49. package/tests/content.test.ts +31 -0
  50. package/tests/edge-cases.test.ts +368 -0
  51. package/tests/extract-goals.test.ts +86 -0
  52. package/tests/extract-preferences.test.ts +30 -0
  53. package/tests/extract-references.test.ts +475 -0
  54. package/tests/extract-signals.test.ts +561 -0
  55. package/tests/filter-noise.test.ts +61 -0
  56. package/tests/fixtures.ts +61 -0
  57. package/tests/format-recall.test.ts +30 -0
  58. package/tests/format.test.ts +91 -0
  59. package/tests/lineage.test.ts +33 -0
  60. package/tests/load-messages.test.ts +51 -0
  61. package/tests/normalize.test.ts +97 -0
  62. package/tests/real-sessions.test.ts +38 -0
  63. package/tests/recall-expand.test.ts +15 -0
  64. package/tests/recall-scope.test.ts +32 -0
  65. package/tests/recall-tool-scope.test.ts +67 -0
  66. package/tests/render-entries.test.ts +62 -0
  67. package/tests/report.test.ts +44 -0
  68. package/tests/sanitize.test.ts +24 -0
  69. package/tests/search-entries.test.ts +144 -0
  70. package/tests/settings-scaffold.test.ts +120 -0
  71. package/tests/settings.test.ts +32 -0
  72. package/tests/support/load-session.ts +23 -0
  73. package/tests/support/real-sessions.ts +51 -0
  74. package/tsconfig.json +14 -0
  75. package/vitest.config.ts +7 -0
@@ -0,0 +1,69 @@
1
+ import type { NormalizedBlock } from "../types";
2
+
3
+ interface CommitInfo {
4
+ hash?: string;
5
+ message: string;
6
+ }
7
+
8
+ const COMMIT_MSG_RE = /git\s+commit[^\n]*?-m\s+(?:"((?:[^"\\]|\\.)*)"|'((?:[^'\\]|\\.)*)'|\$?'((?:[^'\\]|\\.)*)')/;
9
+ // Match short hash from git output: "[branch hash]" or "main hash" or 7-12 hex
10
+ const HASH_RE = /\b([0-9a-f]{7,12})\b/;
11
+
12
+ const firstLineOf = (text: string): string => {
13
+ const line = text.split(/\\n|\n/)[0] ?? "";
14
+ return line.trim();
15
+ };
16
+
17
+ const cleanMessage = (msg: string): string =>
18
+ msg.replace(/\\"/g, '"').replace(/\\'/g, "'").trim();
19
+
20
+ /**
21
+ * Extract git commits from bash tool calls (`git commit -m "..."`) and pair
22
+ * with hash from the immediately following tool_result.
23
+ */
24
+ export const extractCommits = (blocks: NormalizedBlock[]): CommitInfo[] => {
25
+ const commits: CommitInfo[] = [];
26
+
27
+ for (let i = 0; i < blocks.length; i++) {
28
+ const b = blocks[i];
29
+ if (b.kind !== "tool_call" || b.name !== "bash") continue;
30
+ const cmd = typeof b.args.command === "string" ? b.args.command : "";
31
+ if (!/\bgit\s+commit\b/.test(cmd)) continue;
32
+ const m = cmd.match(COMMIT_MSG_RE);
33
+ if (!m) continue;
34
+ const message = firstLineOf(cleanMessage(m[1] ?? m[2] ?? m[3] ?? ""));
35
+ if (!message) continue;
36
+
37
+ let hash: string | undefined;
38
+ // Look at next tool_result for hash
39
+ for (let j = i + 1; j < Math.min(blocks.length, i + 3); j++) {
40
+ const r = blocks[j];
41
+ if (r.kind !== "tool_result") continue;
42
+ // Common git commit output: `[branch <hash>] message` or `<branch> <hash>..<hash>`
43
+ const bracket = r.text.match(/\[\S+\s+([0-9a-f]{7,12})\]/);
44
+ if (bracket) { hash = bracket[1]; break; }
45
+ const range = r.text.match(/\b([0-9a-f]{7,12})\.\.([0-9a-f]{7,12})\b/);
46
+ if (range) { hash = range[2]; break; }
47
+ const plain = r.text.match(HASH_RE);
48
+ if (plain) { hash = plain[1]; break; }
49
+ }
50
+
51
+ // Dedup by message+hash
52
+ const key = `${hash ?? ""}::${message}`;
53
+ if (!commits.some((c) => `${c.hash ?? ""}::${c.message}` === key)) {
54
+ commits.push({ hash, message });
55
+ }
56
+ }
57
+
58
+ return commits;
59
+ };
60
+
61
+ export const formatCommits = (commits: CommitInfo[], limit = 8): string[] => {
62
+ const lines: string[] = [];
63
+ const items = commits.slice(-limit); // keep most recent
64
+ for (const c of items) {
65
+ const prefix = c.hash ? `${c.hash}: ` : "";
66
+ lines.push(`${prefix}${c.message}`);
67
+ }
68
+ return lines;
69
+ };
@@ -0,0 +1,80 @@
1
+ import type { FileOps, NormalizedBlock } from "../types";
2
+ import { extractPath } from "../core/tool-args";
3
+
4
+ interface FileActivity {
5
+ read: Set<string>;
6
+ modified: Set<string>;
7
+ created: Set<string>;
8
+ }
9
+
10
+ const FILE_READ_TOOLS = new Set([
11
+ "Read", "read_file", "View",
12
+ ]);
13
+
14
+ const FILE_WRITE_TOOLS = new Set([
15
+ "Edit", "Write", "edit", "write", "edit_file", "write_file",
16
+ "MultiEdit",
17
+ ]);
18
+
19
+ const FILE_CREATE_TOOLS = new Set([
20
+ "Write", "write", "write_file",
21
+ ]);
22
+
23
+ /**
24
+ * Find the longest common directory prefix among absolute paths.
25
+ * Returns "" if fewer than 2 absolute paths or no meaningful common prefix.
26
+ */
27
+ const longestCommonDirPrefix = (paths: string[]): string => {
28
+ const abs = paths.filter((p) => p.startsWith("/"));
29
+ if (abs.length < 2) return "";
30
+ const split = abs.map((p) => p.split("/"));
31
+ const min = Math.min(...split.map((s) => s.length));
32
+ let i = 0;
33
+ while (i < min - 1) {
34
+ const seg = split[0][i];
35
+ if (!split.every((s) => s[i] === seg)) break;
36
+ i++;
37
+ }
38
+ if (i < 2) return ""; // require at least /a/b common
39
+ return split[0].slice(0, i).join("/") + "/";
40
+ };
41
+
42
+ const trimPaths = (set: Set<string>, prefix: string): Set<string> => {
43
+ if (!prefix) return set;
44
+ const out = new Set<string>();
45
+ for (const p of set) {
46
+ out.add(p.startsWith(prefix) ? p.slice(prefix.length) : p);
47
+ }
48
+ return out;
49
+ };
50
+
51
+ export const extractFiles = (
52
+ blocks: NormalizedBlock[],
53
+ fileOps?: FileOps,
54
+ ): FileActivity => {
55
+ const act: FileActivity = {
56
+ read: new Set(fileOps?.readFiles ?? []),
57
+ modified: new Set(fileOps?.modifiedFiles ?? []),
58
+ created: new Set(fileOps?.createdFiles ?? []),
59
+ };
60
+
61
+ for (const b of blocks) {
62
+ if (b.kind !== "tool_call") continue;
63
+ const p = extractPath(b.args);
64
+ if (!p) continue;
65
+
66
+ if (FILE_READ_TOOLS.has(b.name)) act.read.add(p);
67
+ if (FILE_WRITE_TOOLS.has(b.name)) act.modified.add(p);
68
+ if (FILE_CREATE_TOOLS.has(b.name)) act.created.add(p);
69
+ }
70
+
71
+ const all = [...act.read, ...act.modified, ...act.created];
72
+ const prefix = longestCommonDirPrefix(all);
73
+ if (prefix) {
74
+ act.read = trimPaths(act.read, prefix);
75
+ act.modified = trimPaths(act.modified, prefix);
76
+ act.created = trimPaths(act.created, prefix);
77
+ }
78
+
79
+ return act;
80
+ };
@@ -0,0 +1,79 @@
1
+ import type { NormalizedBlock } from "../types";
2
+ import { nonEmptyLines, clip } from "../core/content";
3
+ import { collapseSkillLines } from "../core/skill-collapse";
4
+
5
+ const SCOPE_CHANGE_RE =
6
+ /\b(instead|actually|change of plan|forget that|new task|switch to|now I want|pivot|let'?s do|stop .* and)\b/i;
7
+
8
+ const TASK_RE =
9
+ /\b(fix|implement|add|create|build|refactor|debug|investigate|update|remove|delete|migrate|deploy|test|write|set up)\b/i;
10
+
11
+ const NOISE_SHORT_RE = /^(ok|yes|no|sure|yeah|yep|go|hi|hey|thx|thanks|ok\b.*|y|n|k)\s*[.!?]*$/i;
12
+
13
+ // Reject lines that are clearly not user goals (pasted output, code, paths, tool dumps)
14
+ // or meta-prompt boilerplate (command templates like `/issues` that start with "For each issue:"
15
+ // followed by numbered "Read the issue in full..." steps).
16
+ const NON_GOAL_RE =
17
+ /^\s*[\[│├└─╭╰]|```|^\s*(=[A-Z]+\(|function |const |let |var |import |export |class )|^(https?:|file:|\/[A-Za-z])|\\n|^\s*For each\b|\bin full\b[^\n]*\b(comments|issue|issues|PRs?|linked)\b/;
18
+
19
+ // Signals that the rest of the user message is a command template (e.g. /issues),
20
+ // in which case we should stop collecting goals at the signal line.
21
+ const TEMPLATE_SIGNAL_RE =
22
+ /^\s*(For each\b|Do NOT implement\b|Analyze and propose\b|If Task\/context\b|Output:\s*$)/i;
23
+
24
+ const truncateAtTemplate = (lines: string[]): string[] => {
25
+ const idx = lines.findIndex((l) => TEMPLATE_SIGNAL_RE.test(l));
26
+ return idx >= 0 ? lines.slice(0, idx) : lines;
27
+ };
28
+
29
+ const stripLeadingBullet = (line: string): string =>
30
+ line.replace(/^\s*(?:[-*+]|\d+\.)\s+/, "").trim();
31
+
32
+ const MAX_GOAL_CHARS = 200;
33
+
34
+ const isSubstantiveGoal = (text: string): boolean => {
35
+ const t = text.trim();
36
+ if (t.length <= 5) return false;
37
+ if (t.length > MAX_GOAL_CHARS) return false;
38
+ if (NOISE_SHORT_RE.test(t)) return false;
39
+ if (NON_GOAL_RE.test(t)) return false;
40
+ return true;
41
+ };
42
+
43
+ // Test scope-change / task intent only on the leading portion of a user block
44
+ // so that pasted outputs below the actual instruction do not trigger matches.
45
+ const LEADING_CHARS = 200;
46
+
47
+ export const extractGoals = (blocks: NormalizedBlock[]): string[] => {
48
+ const goals: string[] = [];
49
+ let latestScopeChange: string[] | null = null;
50
+
51
+ for (const b of blocks) {
52
+ if (b.kind !== "user") continue;
53
+ const rawLines = nonEmptyLines(b.text);
54
+ const truncated = truncateAtTemplate(rawLines);
55
+ const lines = collapseSkillLines(truncated.filter(isSubstantiveGoal))
56
+ .map(stripLeadingBullet)
57
+ .filter((l) => l.length > 5);
58
+ if (lines.length === 0) continue;
59
+
60
+ if (goals.length === 0) {
61
+ goals.push(...lines.slice(0, 6));
62
+ continue;
63
+ }
64
+
65
+ const leading = b.text.slice(0, LEADING_CHARS);
66
+ if (SCOPE_CHANGE_RE.test(leading)) {
67
+ latestScopeChange = lines.slice(0, 3).map((l) => clip(l, MAX_GOAL_CHARS));
68
+ } else if (TASK_RE.test(leading) && lines[0].length > 15) {
69
+ latestScopeChange = lines.slice(0, 2).map((l) => clip(l, MAX_GOAL_CHARS));
70
+ }
71
+ }
72
+
73
+ // Only emit the [Scope change] marker when we actually captured bullets.
74
+ if (latestScopeChange && latestScopeChange.length > 0) {
75
+ goals.push("[Scope change]", ...latestScopeChange);
76
+ }
77
+
78
+ return goals.slice(0, 8);
79
+ };
@@ -0,0 +1,55 @@
1
+ import type { NormalizedBlock } from "../types";
2
+ import { clip, nonEmptyLines } from "../core/content";
3
+
4
+ // Tightened patterns: require a clear preference construction, not bare keywords.
5
+ const PREF_PATTERNS = [
6
+ /\bprefer(?:s|red|ring)?\s+\w/i,
7
+ /\bdon'?t want\b/i,
8
+ /\balways (?:use|do|run|prefer|keep|make|format|write|add|set|put|prefix|start|include|append)\b/i,
9
+ /\bnever (?:use|do|run|push|commit|write|ignore|add|set|put|remove|delete|include|deploy)\b/i,
10
+ /\bplease (?:use|avoid|keep|make|don'?t|do not|format|write)\b/i,
11
+ /\b(?:style|format|language|naming)\s*[:=]\s*\S/i,
12
+ ];
13
+
14
+ export const extractPreferences = (blocks: NormalizedBlock[]): string[] => {
15
+ const prefs: string[] = [];
16
+ const seen = new Set<string>();
17
+
18
+ for (const b of blocks) {
19
+ if (b.kind !== "user") continue;
20
+
21
+ let perBlock = 0;
22
+ for (const line of nonEmptyLines(b.text)) {
23
+ const trimmed = line.trim();
24
+ if (!trimmed || trimmed.length < 5) continue;
25
+ if (trimmed.length > 200) continue;
26
+ // Reject questions.
27
+ if (trimmed.endsWith("?") || trimmed.includes("?...")) continue;
28
+ if (!PREF_PATTERNS.some((p) => p.test(trimmed))) continue;
29
+
30
+ const clipped = clip(trimmed, 200);
31
+ const key = clipped.toLowerCase();
32
+ if (seen.has(key)) continue;
33
+ seen.add(key);
34
+ prefs.push(clipped);
35
+
36
+ // Cap per user block to avoid pasting long rule lists as many prefs.
37
+ if (++perBlock >= 1) break;
38
+ }
39
+ }
40
+
41
+ return prefs.slice(0, 10);
42
+ };
43
+
44
+ /**
45
+ * Remove preferences that duplicate goals (case-insensitive, trimmed).
46
+ * Called by `buildSections` so that the two sections do not overlap.
47
+ */
48
+ export const dedupPreferencesAgainstGoals = (
49
+ prefs: string[],
50
+ goals: string[],
51
+ ): string[] => {
52
+ const norm = (s: string) => s.trim().toLowerCase();
53
+ const goalSet = new Set(goals.map(norm));
54
+ return prefs.filter((p) => !goalSet.has(norm(p)));
55
+ };
@@ -0,0 +1,214 @@
1
+ import type { NormalizedBlock } from "../types";
2
+
3
+ export interface ReferenceExtract {
4
+ urls: string[];
5
+ githubRefs: string[];
6
+ versions: string[];
7
+ branches: string[];
8
+ commitRefs: string[];
9
+ }
10
+
11
+ export interface ReferencesOptions {
12
+ enabled?: boolean;
13
+ /** Extra URL regex strings (compiled and applied alongside built-in). */
14
+ extraUrlPatterns?: string[];
15
+ /** Extra GitHub ref regex strings (full match added to githubRefs). */
16
+ extraGithubRefPatterns?: string[];
17
+ /** Extra version regex strings (capture group 1 or full match). */
18
+ extraVersionPatterns?: string[];
19
+ /** Extra branch regex strings (full match added to branches). */
20
+ extraBranchPatterns?: string[];
21
+ }
22
+
23
+ // ── Regex patterns ──
24
+
25
+ // URLs: http:// or https:// followed by non-whitespace, strip trailing punctuation
26
+ const URL_RE = /https?:\/\/\S+/g;
27
+ const TRAILING_PUNCT_RE = /[.,;:!?)\]}>]+$/;
28
+
29
+ // GitHub refs
30
+ const BARE_ISSUE_RE = /#(\d+)/g;
31
+ const PR_REF_RE = /\b(PR|pr)\s*#(\d+)/g;
32
+ const OWNER_REPO_RE = /\b([a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)\/([a-zA-Z0-9_.-]+)\b/g;
33
+ // Filter: owner must be 2+ chars, repo must be 2+ chars, and not look like a file path
34
+ const REPO_FILTER = (owner: string, repo: string): boolean => {
35
+ if (owner.length < 2 || repo.length < 2) return false;
36
+ if (/^\./.test(repo)) return false;
37
+ const skipOwners = new Set(["src", "lib", "dist", "build", "test", "tests", "pkg", "cmd", "web", "app", "docs", "scripts", "assets", "public", "static", "vendor"]);
38
+ if (skipOwners.has(owner.toLowerCase())) return false;
39
+ return true;
40
+ };
41
+
42
+ // Versions: v1.2.3 or bare 1.2.3 (but NOT IP octets)
43
+ const VERSION_RE = /\b(v?\d+\.\d+\.\d+)\b/g;
44
+ const isIPContext = (text: string, index: number): boolean => {
45
+ const before = text.slice(Math.max(0, index - 20), index);
46
+ const after = text.slice(index, Math.min(text.length, index + 30));
47
+ if (/\d+\.\d+\.\d+\.\d+/.test(after)) return true;
48
+ if (/\d+\.\d+\.\d+\.\d+/.test(before + after.slice(0, 15))) return true;
49
+ return false;
50
+ };
51
+
52
+ // Branches: feat/xxx, fix/xxx, etc.
53
+ const BRANCH_RE = /\b(?:feat|fix|hotfix|release|chore|refactor|docs|test)\/[\w-]+\b/g;
54
+
55
+ // Commit refs: 7-12 hex chars (only in user/assistant blocks)
56
+ const COMMIT_REF_RE = /\b([0-9a-f]{7,12})\b/g;
57
+ const isLikelyHexHash = (s: string): boolean => /[a-f]/i.test(s);
58
+
59
+ // ── Limits ──
60
+ const URL_LIMIT = 10;
61
+ const GITHUB_REF_LIMIT = 8;
62
+ const VERSION_LIMIT = 5;
63
+ const BRANCH_LIMIT = 5;
64
+ const COMMIT_REF_LIMIT = 5;
65
+
66
+ // ── Helpers ──
67
+
68
+ const cleanUrl = (raw: string): string => raw.replace(TRAILING_PUNCT_RE, "");
69
+
70
+ const collectTextFromBlocks = (blocks: NormalizedBlock[]): string[] => {
71
+ const texts: string[] = [];
72
+ for (const b of blocks) {
73
+ if (b.kind === "user" || b.kind === "assistant") {
74
+ texts.push(b.text);
75
+ }
76
+ }
77
+ return texts;
78
+ };
79
+
80
+ /** Compile an array of regex strings into RegExp objects (global). */
81
+ const compilePatterns = (patterns: string[] | undefined): RegExp[] => {
82
+ if (!patterns || patterns.length === 0) return [];
83
+ return patterns.map((p) => {
84
+ try { return new RegExp(p, "g"); } catch { return null; }
85
+ }).filter((r): r is RegExp => r !== null);
86
+ };
87
+
88
+ // ── Extraction ──
89
+
90
+ export const extractReferences = (blocks: NormalizedBlock[], options?: ReferencesOptions): ReferenceExtract => {
91
+ if (options?.enabled === false) {
92
+ return { urls: [], githubRefs: [], versions: [], branches: [], commitRefs: [] };
93
+ }
94
+
95
+ const urls = new Set<string>();
96
+ const githubRefs = new Set<string>();
97
+ const versions = new Set<string>();
98
+ const branches = new Set<string>();
99
+ const commitRefs = new Set<string>();
100
+
101
+ const extraUrlRes = compilePatterns(options?.extraUrlPatterns);
102
+ const extraGhRefRes = compilePatterns(options?.extraGithubRefPatterns);
103
+ const extraVersionRes = compilePatterns(options?.extraVersionPatterns);
104
+ const extraBranchRes = compilePatterns(options?.extraBranchPatterns);
105
+
106
+ const texts = collectTextFromBlocks(blocks);
107
+
108
+ for (const text of texts) {
109
+ // ── URLs ──
110
+ if (urls.size < URL_LIMIT) {
111
+ for (const m of text.matchAll(URL_RE)) {
112
+ const cleaned = cleanUrl(m[0]);
113
+ if (cleaned && urls.size < URL_LIMIT) urls.add(cleaned);
114
+ }
115
+ // Extra URL patterns
116
+ for (const re of extraUrlRes) {
117
+ for (const m of text.matchAll(re)) {
118
+ const val = m[1] ?? m[0];
119
+ const cleaned = cleanUrl(val);
120
+ if (cleaned && urls.size < URL_LIMIT) urls.add(cleaned);
121
+ }
122
+ }
123
+ }
124
+
125
+ // ── GitHub refs ──
126
+ if (githubRefs.size < GITHUB_REF_LIMIT) {
127
+ // Built-in: bare issue numbers
128
+ for (const m of text.matchAll(BARE_ISSUE_RE)) {
129
+ const ref = `#${m[1]}`;
130
+ if (githubRefs.size < GITHUB_REF_LIMIT) githubRefs.add(ref);
131
+ }
132
+ // Built-in: PR references
133
+ for (const m of text.matchAll(PR_REF_RE)) {
134
+ const ref = `${m[1]} #${m[2]}`;
135
+ if (githubRefs.size < GITHUB_REF_LIMIT) githubRefs.add(ref);
136
+ }
137
+ // Built-in: owner/repo
138
+ for (const m of text.matchAll(OWNER_REPO_RE)) {
139
+ if (REPO_FILTER(m[1], m[2])) {
140
+ const ref = `${m[1]}/${m[2]}`;
141
+ if (githubRefs.size < GITHUB_REF_LIMIT) githubRefs.add(ref);
142
+ }
143
+ }
144
+ // Built-in: GitHub URLs → owner/repo#issue
145
+ for (const m of text.matchAll(/https?:\/\/github\.com\/([a-zA-Z0-9-]+)\/([a-zA-Z0-9_.-]+)\/(?:issues|pull)\/(\d+)/g)) {
146
+ const ref = `${m[1]}/${m[2]}#${m[3]}`;
147
+ if (githubRefs.size < GITHUB_REF_LIMIT) githubRefs.add(ref);
148
+ }
149
+ // Extra GitHub ref patterns (full match)
150
+ for (const re of extraGhRefRes) {
151
+ for (const m of text.matchAll(re)) {
152
+ const val = m[1] ?? m[0];
153
+ if (githubRefs.size < GITHUB_REF_LIMIT) githubRefs.add(val);
154
+ }
155
+ }
156
+ }
157
+
158
+ // ── Versions ──
159
+ if (versions.size < VERSION_LIMIT) {
160
+ for (const m of text.matchAll(VERSION_RE)) {
161
+ if (!isIPContext(text, m.index ?? 0)) {
162
+ if (versions.size < VERSION_LIMIT) versions.add(m[1]);
163
+ }
164
+ }
165
+ // Extra version patterns
166
+ for (const re of extraVersionRes) {
167
+ for (const m of text.matchAll(re)) {
168
+ const val = m[1] ?? m[0];
169
+ if (versions.size < VERSION_LIMIT) versions.add(val);
170
+ }
171
+ }
172
+ }
173
+
174
+ // ── Branches ──
175
+ if (branches.size < BRANCH_LIMIT) {
176
+ for (const m of text.matchAll(BRANCH_RE)) {
177
+ if (branches.size < BRANCH_LIMIT) branches.add(m[0]);
178
+ }
179
+ // Extra branch patterns
180
+ for (const re of extraBranchRes) {
181
+ for (const m of text.matchAll(re)) {
182
+ if (branches.size < BRANCH_LIMIT) branches.add(m[0]);
183
+ }
184
+ }
185
+ }
186
+
187
+ // ── Commit refs ──
188
+ if (commitRefs.size < COMMIT_REF_LIMIT) {
189
+ for (const m of text.matchAll(COMMIT_REF_RE)) {
190
+ if (isLikelyHexHash(m[1])) {
191
+ if (commitRefs.size < COMMIT_REF_LIMIT) commitRefs.add(m[1]);
192
+ }
193
+ }
194
+ }
195
+ }
196
+
197
+ return {
198
+ urls: [...urls].slice(0, URL_LIMIT),
199
+ githubRefs: [...githubRefs].slice(0, GITHUB_REF_LIMIT),
200
+ versions: [...versions].slice(0, VERSION_LIMIT),
201
+ branches: [...branches].slice(0, BRANCH_LIMIT),
202
+ commitRefs: [...commitRefs].slice(0, COMMIT_REF_LIMIT),
203
+ };
204
+ };
205
+
206
+ export const formatReferences = (refs: ReferenceExtract): string[] => {
207
+ const lines: string[] = [];
208
+ if (refs.urls.length > 0) lines.push(`URL: ${refs.urls.join(", ")}`);
209
+ if (refs.githubRefs.length > 0) lines.push(`GitHub: ${refs.githubRefs.join(", ")}`);
210
+ if (refs.versions.length > 0) lines.push(`Version: ${refs.versions.join(", ")}`);
211
+ if (refs.branches.length > 0) lines.push(`Branch: ${refs.branches.join(", ")}`);
212
+ if (refs.commitRefs.length > 0) lines.push(`CommitRef: ${refs.commitRefs.join(", ")}`);
213
+ return lines;
214
+ };
@@ -0,0 +1,145 @@
1
+ import type { NormalizedBlock } from "../types";
2
+ import { clip, nonEmptyLines } from "../core/content";
3
+
4
+ export interface SignalsOptions {
5
+ enabled?: boolean;
6
+ /** Extra constraint regex strings (applied alongside built-in). */
7
+ extraConstraintPatterns?: string[];
8
+ /** Extra decision regex strings (applied alongside built-in). */
9
+ extraDecisionPatterns?: string[];
10
+ /** Extra status keywords (added to built-in DONE|TODO|WIP|blocked|resolved). */
11
+ extraStatusKeywords?: string[];
12
+ }
13
+
14
+ // ─── Pattern definitions ────────────────────────────────────────────
15
+
16
+ const CONSTRAINT_RE =
17
+ /\b(don'?t|must not|cannot|forbidden|disallowed|off[- ]limits|out of scope|excluded|do not)\b/i;
18
+
19
+ const DECISION_RE =
20
+ /\b(decided|let'?s use|going with|chose|we'?ll use)\b/i;
21
+
22
+ const DEFAULT_STATUS_KEYWORDS = ["DONE", "TODO", "WIP", "blocked", "resolved"];
23
+
24
+ // Build a regex that matches a status keyword at start of line or after [.!?;:\-—]
25
+ const buildStatusPattern = (line: string, extraKeywords?: string[]): boolean => {
26
+ const keywords = extraKeywords && extraKeywords.length > 0
27
+ ? [...DEFAULT_STATUS_KEYWORDS, ...extraKeywords]
28
+ : DEFAULT_STATUS_KEYWORDS;
29
+ for (const kw of keywords) {
30
+ const re = new RegExp(`(?:^|[.!?;:\\-—])\\s*${kw}\\b`, "i");
31
+ if (re.test(line)) return true;
32
+ }
33
+ return false;
34
+ };
35
+
36
+ /** Compile an array of regex strings into RegExp objects. */
37
+ const compilePatterns = (patterns: string[] | undefined): RegExp[] => {
38
+ if (!patterns || patterns.length === 0) return [];
39
+ return patterns.map((p) => {
40
+ try { return new RegExp(p, "i"); } catch { return null; }
41
+ }).filter((r): r is RegExp => r !== null);
42
+ };
43
+
44
+ // ─── Types ──────────────────────────────────────────────────────────
45
+
46
+ export interface SignalExtract {
47
+ constraints: string[];
48
+ decisions: string[];
49
+ statuses: string[];
50
+ }
51
+
52
+ // ─── Extractor ──────────────────────────────────────────────────────
53
+
54
+ const MIN_LINE_LENGTH = 15;
55
+ const MAX_LINE_LENGTH = 200;
56
+ const CAP_CONSTRAINTS = 5;
57
+ const CAP_DECISIONS = 5;
58
+ const CAP_STATUSES = 5;
59
+
60
+ export const extractSignals = (blocks: NormalizedBlock[], options?: SignalsOptions): SignalExtract => {
61
+ if (options?.enabled === false) {
62
+ return { constraints: [], decisions: [], statuses: [] };
63
+ }
64
+
65
+ const extraConstraintRes = compilePatterns(options?.extraConstraintPatterns);
66
+ const extraDecisionRes = compilePatterns(options?.extraDecisionPatterns);
67
+
68
+ const constraints: string[] = [];
69
+ const decisions: string[] = [];
70
+ const statuses: string[] = [];
71
+
72
+ const seenConstraints = new Set<string>();
73
+ const seenDecisions = new Set<string>();
74
+ const seenStatuses = new Set<string>();
75
+
76
+ for (const b of blocks) {
77
+ if (b.kind !== "user" && b.kind !== "assistant") continue;
78
+ const isUser = b.kind === "user";
79
+
80
+ for (const line of nonEmptyLines(b.text)) {
81
+ const trimmed = line.trim();
82
+ if (trimmed.length < MIN_LINE_LENGTH) continue;
83
+ if (trimmed.length > 500) continue;
84
+ if (trimmed.endsWith("?")) continue;
85
+
86
+ // ── Constraints (user only) ────────────────────────────────
87
+ if (isUser && constraints.length < CAP_CONSTRAINTS) {
88
+ let matched = CONSTRAINT_RE.test(trimmed);
89
+ if (!matched) {
90
+ for (const re of extraConstraintRes) {
91
+ if (re.test(trimmed)) { matched = true; break; }
92
+ }
93
+ }
94
+ if (matched) {
95
+ const clipped = clip(trimmed, MAX_LINE_LENGTH);
96
+ const key = clipped.toLowerCase();
97
+ if (!seenConstraints.has(key)) {
98
+ seenConstraints.add(key);
99
+ constraints.push(clipped);
100
+ }
101
+ }
102
+ }
103
+
104
+ // ── Decisions (user only) ──────────────────────────────────
105
+ if (isUser && decisions.length < CAP_DECISIONS) {
106
+ let matched = DECISION_RE.test(trimmed);
107
+ if (!matched) {
108
+ for (const re of extraDecisionRes) {
109
+ if (re.test(trimmed)) { matched = true; break; }
110
+ }
111
+ }
112
+ if (matched) {
113
+ const clipped = clip(trimmed, MAX_LINE_LENGTH);
114
+ const key = clipped.toLowerCase();
115
+ if (!seenDecisions.has(key)) {
116
+ seenDecisions.add(key);
117
+ decisions.push(clipped);
118
+ }
119
+ }
120
+ }
121
+
122
+ // ── Statuses (user + assistant) ────────────────────────────
123
+ if (statuses.length < CAP_STATUSES && buildStatusPattern(trimmed, options?.extraStatusKeywords)) {
124
+ const clipped = clip(trimmed, MAX_LINE_LENGTH);
125
+ const key = clipped.toLowerCase();
126
+ if (!seenStatuses.has(key)) {
127
+ seenStatuses.add(key);
128
+ statuses.push(clipped);
129
+ }
130
+ }
131
+ }
132
+ }
133
+
134
+ return { constraints, decisions, statuses };
135
+ };
136
+
137
+ // ─── Formatter ──────────────────────────────────────────────────────
138
+
139
+ export const formatSignals = (signals: SignalExtract): string[] => {
140
+ const lines: string[] = [];
141
+ for (const c of signals.constraints) lines.push(`Constraint: ${c}`);
142
+ for (const d of signals.decisions) lines.push(`Decision: ${d}`);
143
+ for (const s of signals.statuses) lines.push(`Status: ${s}`);
144
+ return lines;
145
+ };