gsd-pi 2.38.0-dev.96dc7fb → 2.38.0-dev.98b44dc
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -11
- package/dist/app-paths.js +1 -1
- package/dist/extension-registry.js +2 -2
- package/dist/remote-questions-config.js +2 -2
- package/dist/resource-loader.js +34 -1
- package/dist/resources/extensions/browser-tools/index.js +3 -1
- package/dist/resources/extensions/browser-tools/tools/verify.js +97 -0
- package/dist/resources/extensions/env-utils.js +29 -0
- package/dist/resources/extensions/get-secrets-from-user.js +5 -24
- package/dist/resources/extensions/github-sync/cli.js +284 -0
- package/dist/resources/extensions/github-sync/index.js +73 -0
- package/dist/resources/extensions/github-sync/mapping.js +67 -0
- package/dist/resources/extensions/github-sync/sync.js +424 -0
- package/dist/resources/extensions/github-sync/templates.js +118 -0
- package/dist/resources/extensions/github-sync/types.js +7 -0
- package/dist/resources/extensions/gsd/auto/session.js +6 -23
- package/dist/resources/extensions/gsd/auto-dispatch.js +8 -9
- package/dist/resources/extensions/gsd/auto-loop.js +636 -594
- package/dist/resources/extensions/gsd/auto-post-unit.js +99 -70
- package/dist/resources/extensions/gsd/auto-prompts.js +202 -48
- package/dist/resources/extensions/gsd/auto-start.js +7 -1
- package/dist/resources/extensions/gsd/auto-worktree-sync.js +2 -1
- package/dist/resources/extensions/gsd/auto-worktree.js +3 -3
- package/dist/resources/extensions/gsd/auto.js +143 -96
- package/dist/resources/extensions/gsd/commands-extensions.js +3 -2
- package/dist/resources/extensions/gsd/commands-prefs-wizard.js +1 -1
- package/dist/resources/extensions/gsd/commands.js +4 -2
- package/dist/resources/extensions/gsd/context-budget.js +2 -10
- package/dist/resources/extensions/gsd/detection.js +1 -2
- package/dist/resources/extensions/gsd/docs/preferences-reference.md +0 -2
- package/dist/resources/extensions/gsd/doctor-providers.js +30 -11
- package/dist/resources/extensions/gsd/doctor.js +20 -1
- package/dist/resources/extensions/gsd/exit-command.js +2 -1
- package/dist/resources/extensions/gsd/export.js +1 -1
- package/dist/resources/extensions/gsd/files.js +48 -9
- package/dist/resources/extensions/gsd/forensics.js +1 -1
- package/dist/resources/extensions/gsd/git-service.js +30 -12
- package/dist/resources/extensions/gsd/gitignore.js +16 -3
- package/dist/resources/extensions/gsd/guided-flow.js +149 -38
- package/dist/resources/extensions/gsd/health-widget-core.js +32 -70
- package/dist/resources/extensions/gsd/health-widget.js +3 -86
- package/dist/resources/extensions/gsd/index.js +24 -20
- package/dist/resources/extensions/gsd/migrate/parsers.js +1 -1
- package/dist/resources/extensions/gsd/migrate-external.js +18 -1
- package/dist/resources/extensions/gsd/native-git-bridge.js +37 -0
- package/dist/resources/extensions/gsd/paths.js +3 -0
- package/dist/resources/extensions/gsd/preferences-models.js +0 -12
- package/dist/resources/extensions/gsd/preferences-types.js +1 -1
- package/dist/resources/extensions/gsd/preferences-validation.js +59 -11
- package/dist/resources/extensions/gsd/preferences.js +22 -11
- package/dist/resources/extensions/gsd/prompt-loader.js +6 -2
- package/dist/resources/extensions/gsd/prompts/complete-milestone.md +1 -1
- package/dist/resources/extensions/gsd/prompts/complete-slice.md +1 -1
- package/dist/resources/extensions/gsd/prompts/discuss.md +11 -14
- package/dist/resources/extensions/gsd/prompts/execute-task.md +5 -3
- package/dist/resources/extensions/gsd/prompts/guided-complete-slice.md +1 -1
- package/dist/resources/extensions/gsd/prompts/guided-discuss-milestone.md +11 -12
- package/dist/resources/extensions/gsd/prompts/guided-discuss-slice.md +8 -10
- package/dist/resources/extensions/gsd/prompts/guided-execute-task.md +1 -1
- package/dist/resources/extensions/gsd/prompts/guided-plan-milestone.md +1 -1
- package/dist/resources/extensions/gsd/prompts/guided-plan-slice.md +1 -1
- package/dist/resources/extensions/gsd/prompts/guided-research-slice.md +1 -1
- package/dist/resources/extensions/gsd/prompts/guided-resume-task.md +1 -1
- package/dist/resources/extensions/gsd/prompts/plan-milestone.md +1 -1
- package/dist/resources/extensions/gsd/prompts/plan-slice.md +1 -1
- package/dist/resources/extensions/gsd/prompts/queue.md +4 -8
- package/dist/resources/extensions/gsd/prompts/reactive-execute.md +11 -8
- package/dist/resources/extensions/gsd/prompts/reassess-roadmap.md +1 -1
- package/dist/resources/extensions/gsd/prompts/research-milestone.md +1 -1
- package/dist/resources/extensions/gsd/prompts/research-slice.md +1 -1
- package/dist/resources/extensions/gsd/prompts/run-uat.md +28 -11
- package/dist/resources/extensions/gsd/prompts/workflow-start.md +2 -2
- package/dist/resources/extensions/gsd/repo-identity.js +21 -4
- package/dist/resources/extensions/gsd/resource-version.js +2 -1
- package/dist/resources/extensions/gsd/roadmap-mutations.js +24 -0
- package/dist/resources/extensions/gsd/state.js +42 -23
- package/dist/resources/extensions/gsd/templates/runtime.md +21 -0
- package/dist/resources/extensions/gsd/templates/task-plan.md +3 -0
- package/dist/resources/extensions/gsd/visualizer-data.js +1 -1
- package/dist/resources/extensions/mcp-client/index.js +14 -1
- package/dist/resources/extensions/remote-questions/status.js +4 -1
- package/dist/resources/extensions/remote-questions/store.js +4 -1
- package/dist/resources/extensions/search-the-web/provider.js +2 -1
- package/dist/resources/extensions/shared/frontmatter.js +1 -1
- package/dist/resources/extensions/subagent/isolation.js +2 -1
- package/dist/resources/extensions/ttsr/rule-loader.js +2 -1
- package/package.json +1 -1
- package/packages/pi-ai/dist/utils/oauth/anthropic.js +2 -2
- package/packages/pi-ai/dist/utils/oauth/anthropic.js.map +1 -1
- package/packages/pi-ai/src/utils/oauth/anthropic.ts +2 -2
- package/packages/pi-coding-agent/dist/core/extensions/loader.d.ts.map +1 -1
- package/packages/pi-coding-agent/dist/core/extensions/loader.js +205 -7
- package/packages/pi-coding-agent/dist/core/extensions/loader.js.map +1 -1
- package/packages/pi-coding-agent/dist/core/skills.d.ts +1 -0
- package/packages/pi-coding-agent/dist/core/skills.d.ts.map +1 -1
- package/packages/pi-coding-agent/dist/core/skills.js +6 -1
- package/packages/pi-coding-agent/dist/core/skills.js.map +1 -1
- package/packages/pi-coding-agent/dist/index.d.ts +1 -1
- package/packages/pi-coding-agent/dist/index.d.ts.map +1 -1
- package/packages/pi-coding-agent/dist/index.js +1 -1
- package/packages/pi-coding-agent/dist/index.js.map +1 -1
- package/packages/pi-coding-agent/src/core/extensions/loader.ts +223 -7
- package/packages/pi-coding-agent/src/core/skills.ts +9 -1
- package/packages/pi-coding-agent/src/index.ts +1 -0
- package/src/resources/extensions/browser-tools/index.ts +3 -0
- package/src/resources/extensions/browser-tools/tools/verify.ts +117 -0
- package/src/resources/extensions/env-utils.ts +31 -0
- package/src/resources/extensions/get-secrets-from-user.ts +5 -24
- package/src/resources/extensions/github-sync/cli.ts +364 -0
- package/src/resources/extensions/github-sync/index.ts +93 -0
- package/src/resources/extensions/github-sync/mapping.ts +81 -0
- package/src/resources/extensions/github-sync/sync.ts +556 -0
- package/src/resources/extensions/github-sync/templates.ts +183 -0
- package/src/resources/extensions/github-sync/tests/cli.test.ts +20 -0
- package/src/resources/extensions/github-sync/tests/commit-linking.test.ts +39 -0
- package/src/resources/extensions/github-sync/tests/mapping.test.ts +104 -0
- package/src/resources/extensions/github-sync/tests/templates.test.ts +110 -0
- package/src/resources/extensions/github-sync/types.ts +47 -0
- package/src/resources/extensions/gsd/auto/session.ts +7 -25
- package/src/resources/extensions/gsd/auto-dispatch.ts +7 -9
- package/src/resources/extensions/gsd/auto-loop.ts +526 -545
- package/src/resources/extensions/gsd/auto-post-unit.ts +80 -44
- package/src/resources/extensions/gsd/auto-prompts.ts +247 -50
- package/src/resources/extensions/gsd/auto-start.ts +11 -1
- package/src/resources/extensions/gsd/auto-worktree-sync.ts +3 -1
- package/src/resources/extensions/gsd/auto-worktree.ts +3 -3
- package/src/resources/extensions/gsd/auto.ts +139 -101
- package/src/resources/extensions/gsd/commands-extensions.ts +4 -2
- package/src/resources/extensions/gsd/commands-prefs-wizard.ts +1 -1
- package/src/resources/extensions/gsd/commands.ts +5 -3
- package/src/resources/extensions/gsd/context-budget.ts +2 -12
- package/src/resources/extensions/gsd/detection.ts +2 -2
- package/src/resources/extensions/gsd/docs/preferences-reference.md +0 -2
- package/src/resources/extensions/gsd/doctor-providers.ts +30 -9
- package/src/resources/extensions/gsd/doctor.ts +22 -1
- package/src/resources/extensions/gsd/exit-command.ts +2 -2
- package/src/resources/extensions/gsd/export.ts +1 -1
- package/src/resources/extensions/gsd/files.ts +51 -11
- package/src/resources/extensions/gsd/forensics.ts +1 -1
- package/src/resources/extensions/gsd/git-service.ts +44 -10
- package/src/resources/extensions/gsd/gitignore.ts +17 -3
- package/src/resources/extensions/gsd/guided-flow.ts +177 -44
- package/src/resources/extensions/gsd/health-widget-core.ts +28 -80
- package/src/resources/extensions/gsd/health-widget.ts +3 -89
- package/src/resources/extensions/gsd/index.ts +24 -17
- package/src/resources/extensions/gsd/migrate/parsers.ts +1 -1
- package/src/resources/extensions/gsd/migrate-external.ts +18 -1
- package/src/resources/extensions/gsd/native-git-bridge.ts +37 -0
- package/src/resources/extensions/gsd/paths.ts +4 -0
- package/src/resources/extensions/gsd/preferences-models.ts +0 -12
- package/src/resources/extensions/gsd/preferences-types.ts +4 -4
- package/src/resources/extensions/gsd/preferences-validation.ts +51 -11
- package/src/resources/extensions/gsd/preferences.ts +25 -11
- package/src/resources/extensions/gsd/prompt-loader.ts +7 -2
- package/src/resources/extensions/gsd/prompts/complete-milestone.md +1 -1
- package/src/resources/extensions/gsd/prompts/complete-slice.md +1 -1
- package/src/resources/extensions/gsd/prompts/discuss.md +11 -14
- package/src/resources/extensions/gsd/prompts/execute-task.md +5 -3
- package/src/resources/extensions/gsd/prompts/guided-complete-slice.md +1 -1
- package/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md +11 -12
- package/src/resources/extensions/gsd/prompts/guided-discuss-slice.md +8 -10
- package/src/resources/extensions/gsd/prompts/guided-execute-task.md +1 -1
- package/src/resources/extensions/gsd/prompts/guided-plan-milestone.md +1 -1
- package/src/resources/extensions/gsd/prompts/guided-plan-slice.md +1 -1
- package/src/resources/extensions/gsd/prompts/guided-research-slice.md +1 -1
- package/src/resources/extensions/gsd/prompts/guided-resume-task.md +1 -1
- package/src/resources/extensions/gsd/prompts/plan-milestone.md +1 -1
- package/src/resources/extensions/gsd/prompts/plan-slice.md +1 -1
- package/src/resources/extensions/gsd/prompts/queue.md +4 -8
- package/src/resources/extensions/gsd/prompts/reactive-execute.md +11 -8
- package/src/resources/extensions/gsd/prompts/reassess-roadmap.md +1 -1
- package/src/resources/extensions/gsd/prompts/research-milestone.md +1 -1
- package/src/resources/extensions/gsd/prompts/research-slice.md +1 -1
- package/src/resources/extensions/gsd/prompts/run-uat.md +28 -11
- package/src/resources/extensions/gsd/prompts/workflow-start.md +2 -2
- package/src/resources/extensions/gsd/repo-identity.ts +23 -4
- package/src/resources/extensions/gsd/resource-version.ts +3 -1
- package/src/resources/extensions/gsd/roadmap-mutations.ts +29 -0
- package/src/resources/extensions/gsd/state.ts +39 -21
- package/src/resources/extensions/gsd/templates/runtime.md +21 -0
- package/src/resources/extensions/gsd/templates/task-plan.md +3 -0
- package/src/resources/extensions/gsd/tests/agent-end-retry.test.ts +21 -18
- package/src/resources/extensions/gsd/tests/auto-loop.test.ts +122 -68
- package/src/resources/extensions/gsd/tests/auto-worktree-milestone-merge.test.ts +4 -3
- package/src/resources/extensions/gsd/tests/derive-state.test.ts +43 -0
- package/src/resources/extensions/gsd/tests/doctor-providers.test.ts +86 -3
- package/src/resources/extensions/gsd/tests/gitignore-tracked-gsd.test.ts +50 -0
- package/src/resources/extensions/gsd/tests/health-widget.test.ts +16 -54
- package/src/resources/extensions/gsd/tests/parsers.test.ts +131 -14
- package/src/resources/extensions/gsd/tests/plan-slice-prompt.test.ts +209 -0
- package/src/resources/extensions/gsd/tests/preferences.test.ts +2 -7
- package/src/resources/extensions/gsd/tests/prompt-contracts.test.ts +59 -0
- package/src/resources/extensions/gsd/tests/repo-identity-worktree.test.ts +21 -1
- package/src/resources/extensions/gsd/tests/run-uat.test.ts +16 -4
- package/src/resources/extensions/gsd/tests/skill-activation.test.ts +140 -0
- package/src/resources/extensions/gsd/types.ts +18 -1
- package/src/resources/extensions/gsd/verification-evidence.ts +16 -0
- package/src/resources/extensions/gsd/visualizer-data.ts +1 -1
- package/src/resources/extensions/mcp-client/index.ts +17 -1
- package/src/resources/extensions/remote-questions/status.ts +5 -1
- package/src/resources/extensions/remote-questions/store.ts +5 -1
- package/src/resources/extensions/search-the-web/provider.ts +2 -1
- package/src/resources/extensions/shared/frontmatter.ts +1 -1
- package/src/resources/extensions/subagent/isolation.ts +3 -1
- package/src/resources/extensions/ttsr/rule-loader.ts +3 -1
- package/dist/resources/extensions/gsd/prompt-compressor.js +0 -393
- package/dist/resources/extensions/gsd/semantic-chunker.js +0 -254
- package/dist/resources/extensions/gsd/summary-distiller.js +0 -212
- package/src/resources/extensions/gsd/prompt-compressor.ts +0 -508
- package/src/resources/extensions/gsd/semantic-chunker.ts +0 -336
- package/src/resources/extensions/gsd/summary-distiller.ts +0 -258
- package/src/resources/extensions/gsd/tests/context-compression.test.ts +0 -193
- package/src/resources/extensions/gsd/tests/prompt-compressor.test.ts +0 -529
- package/src/resources/extensions/gsd/tests/semantic-chunker.test.ts +0 -426
- package/src/resources/extensions/gsd/tests/summary-distiller.test.ts +0 -323
- package/src/resources/extensions/gsd/tests/token-optimization-benchmark.test.ts +0 -1272
- package/src/resources/extensions/gsd/tests/token-optimization-prefs.test.ts +0 -164
|
@@ -1,393 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Prompt Compressor — deterministic text compression for context reduction.
|
|
3
|
-
*
|
|
4
|
-
* Applies a series of lossless and near-lossless transformations to reduce
|
|
5
|
-
* token count while preserving semantic meaning. No LLM calls, no external
|
|
6
|
-
* dependencies. Sub-millisecond for typical prompt sizes.
|
|
7
|
-
*
|
|
8
|
-
* Compression techniques (applied in order):
|
|
9
|
-
* 1. Redundant whitespace normalization
|
|
10
|
-
* 2. Markdown formatting reduction (collapse verbose tables, lists)
|
|
11
|
-
* 3. Common phrase abbreviation
|
|
12
|
-
* 4. Repeated pattern deduplication
|
|
13
|
-
* 5. Low-information content removal (empty sections, boilerplate)
|
|
14
|
-
*/
|
|
15
|
-
// ─── Phrase Abbreviation Map ────────────────────────────────────────────────
|
|
16
|
-
/**
|
|
17
|
-
* Build a regex that matches a verbose phrase even when split across lines.
|
|
18
|
-
* Whitespace between words is matched with \s+ to handle line wrapping.
|
|
19
|
-
*/
|
|
20
|
-
function phraseRegex(phrase) {
|
|
21
|
-
const words = phrase.split(/\s+/);
|
|
22
|
-
const pattern = `\\b${words.join("\\s+")}\\b`;
|
|
23
|
-
return new RegExp(pattern, "gi");
|
|
24
|
-
}
|
|
25
|
-
const VERBOSE_PHRASES = [
|
|
26
|
-
[phraseRegex("In order to"), "To"],
|
|
27
|
-
[phraseRegex("It is important to note that"), "Note:"],
|
|
28
|
-
[phraseRegex("As mentioned previously"), "(see above)"],
|
|
29
|
-
[phraseRegex("The following"), "These"],
|
|
30
|
-
[phraseRegex("In addition to"), "Also,"],
|
|
31
|
-
[phraseRegex("Due to the fact that"), "Because"],
|
|
32
|
-
[phraseRegex("At this point in time"), "Now"],
|
|
33
|
-
[phraseRegex("For the purpose of"), "For"],
|
|
34
|
-
[phraseRegex("In the event that"), "If"],
|
|
35
|
-
[phraseRegex("With regard to"), "Re:"],
|
|
36
|
-
[phraseRegex("Prior to"), "Before"],
|
|
37
|
-
[phraseRegex("Subsequent to"), "After"],
|
|
38
|
-
[phraseRegex("In accordance with"), "Per"],
|
|
39
|
-
[phraseRegex("A number of"), "Several"],
|
|
40
|
-
[phraseRegex("In the case of"), "For"],
|
|
41
|
-
[phraseRegex("On the basis of"), "Based on"],
|
|
42
|
-
];
|
|
43
|
-
function extractCodeBlocks(content) {
|
|
44
|
-
const blocks = new Map();
|
|
45
|
-
let counter = 0;
|
|
46
|
-
const text = content.replace(/```[\s\S]*?```/g, (match) => {
|
|
47
|
-
const placeholder = `\x00CODEBLOCK_${counter++}\x00`;
|
|
48
|
-
blocks.set(placeholder, match);
|
|
49
|
-
return placeholder;
|
|
50
|
-
});
|
|
51
|
-
return { text, blocks };
|
|
52
|
-
}
|
|
53
|
-
function restoreCodeBlocks(text, blocks) {
|
|
54
|
-
let result = text;
|
|
55
|
-
for (const [placeholder, block] of blocks) {
|
|
56
|
-
result = result.replace(placeholder, block);
|
|
57
|
-
}
|
|
58
|
-
return result;
|
|
59
|
-
}
|
|
60
|
-
// ─── Light Transformations ──────────────────────────────────────────────────
|
|
61
|
-
function normalizeWhitespace(content) {
|
|
62
|
-
// Collapse 3+ consecutive blank lines to 2
|
|
63
|
-
let result = content.replace(/(\n\s*){3,}\n/g, "\n\n");
|
|
64
|
-
// Trim trailing whitespace on every line
|
|
65
|
-
result = result.replace(/[ \t]+$/gm, "");
|
|
66
|
-
return result;
|
|
67
|
-
}
|
|
68
|
-
function removeMarkdownComments(content) {
|
|
69
|
-
return content.replace(/<!--[\s\S]*?-->/g, "");
|
|
70
|
-
}
|
|
71
|
-
function removeHorizontalRules(content) {
|
|
72
|
-
// Remove horizontal rules (---, ***, ___) that stand alone on a line
|
|
73
|
-
return content.replace(/^\s*[-*_]{3,}\s*$/gm, "");
|
|
74
|
-
}
|
|
75
|
-
function collapseEmptyListItems(content) {
|
|
76
|
-
// Collapse repeated empty list items (- \n- \n- \n) into one
|
|
77
|
-
return content.replace(/(^[ \t]*[-*+]\s*$\n){2,}/gm, "$1");
|
|
78
|
-
}
|
|
79
|
-
function applyLightTransformations(content) {
|
|
80
|
-
let count = 0;
|
|
81
|
-
let result = content;
|
|
82
|
-
const after1 = normalizeWhitespace(result);
|
|
83
|
-
if (after1 !== result)
|
|
84
|
-
count++;
|
|
85
|
-
result = after1;
|
|
86
|
-
const after2 = removeMarkdownComments(result);
|
|
87
|
-
if (after2 !== result)
|
|
88
|
-
count++;
|
|
89
|
-
result = after2;
|
|
90
|
-
const after3 = removeHorizontalRules(result);
|
|
91
|
-
if (after3 !== result)
|
|
92
|
-
count++;
|
|
93
|
-
result = after3;
|
|
94
|
-
const after4 = collapseEmptyListItems(result);
|
|
95
|
-
if (after4 !== result)
|
|
96
|
-
count++;
|
|
97
|
-
result = after4;
|
|
98
|
-
return { content: result, count };
|
|
99
|
-
}
|
|
100
|
-
// ─── Moderate Transformations ───────────────────────────────────────────────
|
|
101
|
-
function abbreviateVerbosePhrases(content) {
|
|
102
|
-
let count = 0;
|
|
103
|
-
let result = content;
|
|
104
|
-
for (const [pattern, replacement] of VERBOSE_PHRASES) {
|
|
105
|
-
const after = result.replace(pattern, replacement);
|
|
106
|
-
if (after !== result)
|
|
107
|
-
count++;
|
|
108
|
-
result = after;
|
|
109
|
-
}
|
|
110
|
-
return { content: result, count };
|
|
111
|
-
}
|
|
112
|
-
function removeBoilerplateLines(content) {
|
|
113
|
-
const lines = content.split("\n");
|
|
114
|
-
const filtered = lines.filter((line) => {
|
|
115
|
-
const trimmed = line.trim();
|
|
116
|
-
// Remove lines that are just N/A, (none), (empty), (not applicable)
|
|
117
|
-
if (/^(?:N\/A|\(none\)|\(empty\)|\(not applicable\))$/i.test(trimmed)) {
|
|
118
|
-
return false;
|
|
119
|
-
}
|
|
120
|
-
return true;
|
|
121
|
-
});
|
|
122
|
-
return filtered.join("\n");
|
|
123
|
-
}
|
|
124
|
-
function deduplicateConsecutiveLines(content) {
|
|
125
|
-
const lines = content.split("\n");
|
|
126
|
-
const result = [];
|
|
127
|
-
for (let i = 0; i < lines.length; i++) {
|
|
128
|
-
if (i === 0 || lines[i] !== lines[i - 1] || lines[i].trim() === "") {
|
|
129
|
-
result.push(lines[i]);
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
return result.join("\n");
|
|
133
|
-
}
|
|
134
|
-
function collapseTableFormatting(content) {
|
|
135
|
-
// Remove excessive padding in markdown table cells
|
|
136
|
-
// Matches table rows like | cell | cell | and collapses to | cell | cell |
|
|
137
|
-
return content.replace(/\|[ \t]{2,}([^|\n]*?)[ \t]{2,}\|/g, (_, cellContent) => {
|
|
138
|
-
return `| ${cellContent.trim()} |`;
|
|
139
|
-
});
|
|
140
|
-
}
|
|
141
|
-
function applyModerateTransformations(content) {
|
|
142
|
-
let count = 0;
|
|
143
|
-
let result = content;
|
|
144
|
-
const phraseResult = abbreviateVerbosePhrases(result);
|
|
145
|
-
count += phraseResult.count;
|
|
146
|
-
result = phraseResult.content;
|
|
147
|
-
const after1 = removeBoilerplateLines(result);
|
|
148
|
-
if (after1 !== result)
|
|
149
|
-
count++;
|
|
150
|
-
result = after1;
|
|
151
|
-
const after2 = deduplicateConsecutiveLines(result);
|
|
152
|
-
if (after2 !== result)
|
|
153
|
-
count++;
|
|
154
|
-
result = after2;
|
|
155
|
-
const after3 = collapseTableFormatting(result);
|
|
156
|
-
if (after3 !== result)
|
|
157
|
-
count++;
|
|
158
|
-
result = after3;
|
|
159
|
-
return { content: result, count };
|
|
160
|
-
}
|
|
161
|
-
// ─── Aggressive Transformations ─────────────────────────────────────────────
|
|
162
|
-
function removeMarkdownEmphasis(content) {
|
|
163
|
-
// Bold: **text** or __text__
|
|
164
|
-
let result = content.replace(/\*\*(.+?)\*\*/g, "$1");
|
|
165
|
-
result = result.replace(/__(.+?)__/g, "$1");
|
|
166
|
-
// Italic: *text* or _text_ (single, not inside words)
|
|
167
|
-
result = result.replace(/(?<!\w)\*([^*\n]+?)\*(?!\w)/g, "$1");
|
|
168
|
-
result = result.replace(/(?<!\w)_([^_\n]+?)_(?!\w)/g, "$1");
|
|
169
|
-
return result;
|
|
170
|
-
}
|
|
171
|
-
function removeMarkdownLinks(content) {
|
|
172
|
-
// [text](url) → text
|
|
173
|
-
return content.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1");
|
|
174
|
-
}
|
|
175
|
-
function truncateLongLines(content) {
|
|
176
|
-
const lines = content.split("\n");
|
|
177
|
-
const result = lines.map((line) => {
|
|
178
|
-
if (line.length <= 300)
|
|
179
|
-
return line;
|
|
180
|
-
// Find a sentence boundary (. ! ?) near the 300 char mark
|
|
181
|
-
const truncateZone = line.slice(0, 300);
|
|
182
|
-
const lastSentenceEnd = Math.max(truncateZone.lastIndexOf(". "), truncateZone.lastIndexOf("! "), truncateZone.lastIndexOf("? "));
|
|
183
|
-
if (lastSentenceEnd > 150) {
|
|
184
|
-
return line.slice(0, lastSentenceEnd + 1);
|
|
185
|
-
}
|
|
186
|
-
// Fallback: cut at last space before 300
|
|
187
|
-
const lastSpace = truncateZone.lastIndexOf(" ");
|
|
188
|
-
if (lastSpace > 150) {
|
|
189
|
-
return line.slice(0, lastSpace);
|
|
190
|
-
}
|
|
191
|
-
return truncateZone;
|
|
192
|
-
});
|
|
193
|
-
return result.join("\n");
|
|
194
|
-
}
|
|
195
|
-
function removeBulletMarkers(content) {
|
|
196
|
-
// Remove bullet markers: - , * , + , numbered (1. 2. etc)
|
|
197
|
-
return content.replace(/^[ \t]*(?:[-*+]|\d+\.)\s+/gm, "");
|
|
198
|
-
}
|
|
199
|
-
function removeBlockquoteMarkers(content) {
|
|
200
|
-
return content.replace(/^[ \t]*>+\s?/gm, "");
|
|
201
|
-
}
|
|
202
|
-
function deduplicateStructuralPatterns(content) {
|
|
203
|
-
// Deduplicate consecutive lines that match the same "Key: value" pattern
|
|
204
|
-
const lines = content.split("\n");
|
|
205
|
-
const result = [];
|
|
206
|
-
const seen = new Set();
|
|
207
|
-
let lastWasStructural = false;
|
|
208
|
-
for (const line of lines) {
|
|
209
|
-
const trimmed = line.trim();
|
|
210
|
-
// Detect structural patterns: "Key: value"
|
|
211
|
-
const structMatch = trimmed.match(/^(\w[\w\s]*?):\s+(.+)$/);
|
|
212
|
-
if (structMatch) {
|
|
213
|
-
if (seen.has(trimmed)) {
|
|
214
|
-
lastWasStructural = true;
|
|
215
|
-
continue;
|
|
216
|
-
}
|
|
217
|
-
seen.add(trimmed);
|
|
218
|
-
lastWasStructural = true;
|
|
219
|
-
}
|
|
220
|
-
else {
|
|
221
|
-
// Reset seen set when structural block ends
|
|
222
|
-
if (!lastWasStructural || trimmed === "") {
|
|
223
|
-
seen.clear();
|
|
224
|
-
}
|
|
225
|
-
lastWasStructural = false;
|
|
226
|
-
}
|
|
227
|
-
result.push(line);
|
|
228
|
-
}
|
|
229
|
-
return result.join("\n");
|
|
230
|
-
}
|
|
231
|
-
function applyAggressiveTransformations(content, preserveHeadings) {
|
|
232
|
-
let count = 0;
|
|
233
|
-
let result = content;
|
|
234
|
-
const after1 = removeMarkdownEmphasis(result);
|
|
235
|
-
if (after1 !== result)
|
|
236
|
-
count++;
|
|
237
|
-
result = after1;
|
|
238
|
-
const after2 = removeMarkdownLinks(result);
|
|
239
|
-
if (after2 !== result)
|
|
240
|
-
count++;
|
|
241
|
-
result = after2;
|
|
242
|
-
const after3 = truncateLongLines(result);
|
|
243
|
-
if (after3 !== result)
|
|
244
|
-
count++;
|
|
245
|
-
result = after3;
|
|
246
|
-
const after4 = removeBulletMarkers(result);
|
|
247
|
-
if (after4 !== result)
|
|
248
|
-
count++;
|
|
249
|
-
result = after4;
|
|
250
|
-
const after5 = removeBlockquoteMarkers(result);
|
|
251
|
-
if (after5 !== result)
|
|
252
|
-
count++;
|
|
253
|
-
result = after5;
|
|
254
|
-
const after6 = deduplicateStructuralPatterns(result);
|
|
255
|
-
if (after6 !== result)
|
|
256
|
-
count++;
|
|
257
|
-
result = after6;
|
|
258
|
-
return { content: result, count };
|
|
259
|
-
}
|
|
260
|
-
function extractHeadings(content) {
|
|
261
|
-
const headings = new Map();
|
|
262
|
-
let counter = 0;
|
|
263
|
-
const text = content.replace(/^(#{1,6}\s.+)$/gm, (match) => {
|
|
264
|
-
const placeholder = `\x00HEADING_${counter++}\x00`;
|
|
265
|
-
headings.set(placeholder, match);
|
|
266
|
-
return placeholder;
|
|
267
|
-
});
|
|
268
|
-
return { text, headings };
|
|
269
|
-
}
|
|
270
|
-
function restoreHeadings(text, headings) {
|
|
271
|
-
let result = text;
|
|
272
|
-
for (const [placeholder, heading] of headings) {
|
|
273
|
-
result = result.replace(placeholder, heading);
|
|
274
|
-
}
|
|
275
|
-
return result;
|
|
276
|
-
}
|
|
277
|
-
// ─── Public API ─────────────────────────────────────────────────────────────
|
|
278
|
-
/**
|
|
279
|
-
* Compress prompt content using deterministic text transformations.
|
|
280
|
-
*/
|
|
281
|
-
export function compressPrompt(content, options) {
|
|
282
|
-
const level = options?.level ?? "moderate";
|
|
283
|
-
const preserveHeadings = options?.preserveHeadings ?? true;
|
|
284
|
-
const preserveCodeBlocks = options?.preserveCodeBlocks ?? true;
|
|
285
|
-
if (content === "") {
|
|
286
|
-
return {
|
|
287
|
-
content: "",
|
|
288
|
-
originalChars: 0,
|
|
289
|
-
compressedChars: 0,
|
|
290
|
-
savingsPercent: 0,
|
|
291
|
-
level,
|
|
292
|
-
transformationsApplied: 0,
|
|
293
|
-
};
|
|
294
|
-
}
|
|
295
|
-
const originalChars = content.length;
|
|
296
|
-
let working = content;
|
|
297
|
-
let totalTransformations = 0;
|
|
298
|
-
// Extract code blocks if preserving
|
|
299
|
-
let codeBlocks = null;
|
|
300
|
-
if (preserveCodeBlocks) {
|
|
301
|
-
const extracted = extractCodeBlocks(working);
|
|
302
|
-
working = extracted.text;
|
|
303
|
-
codeBlocks = extracted.blocks;
|
|
304
|
-
}
|
|
305
|
-
// Extract headings if preserving
|
|
306
|
-
let headings = null;
|
|
307
|
-
if (preserveHeadings) {
|
|
308
|
-
const extracted = extractHeadings(working);
|
|
309
|
-
working = extracted.text;
|
|
310
|
-
headings = extracted.headings;
|
|
311
|
-
}
|
|
312
|
-
// Apply light transformations (always)
|
|
313
|
-
const lightResult = applyLightTransformations(working);
|
|
314
|
-
working = lightResult.content;
|
|
315
|
-
totalTransformations += lightResult.count;
|
|
316
|
-
// Check target
|
|
317
|
-
if (options?.targetChars && getRestoredLength(working, codeBlocks, headings) <= options.targetChars) {
|
|
318
|
-
return buildResult(working, originalChars, level, totalTransformations, codeBlocks, headings);
|
|
319
|
-
}
|
|
320
|
-
// Apply moderate transformations
|
|
321
|
-
if (level === "moderate" || level === "aggressive") {
|
|
322
|
-
const modResult = applyModerateTransformations(working);
|
|
323
|
-
working = modResult.content;
|
|
324
|
-
totalTransformations += modResult.count;
|
|
325
|
-
if (options?.targetChars && getRestoredLength(working, codeBlocks, headings) <= options.targetChars) {
|
|
326
|
-
return buildResult(working, originalChars, level, totalTransformations, codeBlocks, headings);
|
|
327
|
-
}
|
|
328
|
-
}
|
|
329
|
-
// Apply aggressive transformations
|
|
330
|
-
if (level === "aggressive") {
|
|
331
|
-
const aggResult = applyAggressiveTransformations(working, preserveHeadings);
|
|
332
|
-
working = aggResult.content;
|
|
333
|
-
totalTransformations += aggResult.count;
|
|
334
|
-
}
|
|
335
|
-
return buildResult(working, originalChars, level, totalTransformations, codeBlocks, headings);
|
|
336
|
-
}
|
|
337
|
-
/**
|
|
338
|
-
* Compress with a target size — applies progressively more aggressive
|
|
339
|
-
* compression until the target is reached or all transformations exhausted.
|
|
340
|
-
*/
|
|
341
|
-
export function compressToTarget(content, targetChars) {
|
|
342
|
-
if (content.length <= targetChars) {
|
|
343
|
-
return {
|
|
344
|
-
content,
|
|
345
|
-
originalChars: content.length,
|
|
346
|
-
compressedChars: content.length,
|
|
347
|
-
savingsPercent: 0,
|
|
348
|
-
level: "light",
|
|
349
|
-
transformationsApplied: 0,
|
|
350
|
-
};
|
|
351
|
-
}
|
|
352
|
-
const levels = ["light", "moderate", "aggressive"];
|
|
353
|
-
for (const level of levels) {
|
|
354
|
-
const result = compressPrompt(content, { level, targetChars });
|
|
355
|
-
if (result.compressedChars <= targetChars) {
|
|
356
|
-
return result;
|
|
357
|
-
}
|
|
358
|
-
// If aggressive and still over target, return best effort
|
|
359
|
-
if (level === "aggressive") {
|
|
360
|
-
return result;
|
|
361
|
-
}
|
|
362
|
-
}
|
|
363
|
-
// Unreachable, but satisfy TypeScript
|
|
364
|
-
return compressPrompt(content, { level: "aggressive" });
|
|
365
|
-
}
|
|
366
|
-
// ─── Helpers ────────────────────────────────────────────────────────────────
|
|
367
|
-
function getRestoredLength(text, codeBlocks, headings) {
|
|
368
|
-
let result = text;
|
|
369
|
-
if (headings)
|
|
370
|
-
result = restoreHeadings(result, headings);
|
|
371
|
-
if (codeBlocks)
|
|
372
|
-
result = restoreCodeBlocks(result, codeBlocks);
|
|
373
|
-
return result.length;
|
|
374
|
-
}
|
|
375
|
-
function buildResult(working, originalChars, level, transformationsApplied, codeBlocks, headings) {
|
|
376
|
-
let content = working;
|
|
377
|
-
if (headings)
|
|
378
|
-
content = restoreHeadings(content, headings);
|
|
379
|
-
if (codeBlocks)
|
|
380
|
-
content = restoreCodeBlocks(content, codeBlocks);
|
|
381
|
-
const compressedChars = content.length;
|
|
382
|
-
const savingsPercent = originalChars > 0
|
|
383
|
-
? Math.round(((originalChars - compressedChars) / originalChars) * 10000) / 100
|
|
384
|
-
: 0;
|
|
385
|
-
return {
|
|
386
|
-
content,
|
|
387
|
-
originalChars,
|
|
388
|
-
compressedChars,
|
|
389
|
-
savingsPercent,
|
|
390
|
-
level,
|
|
391
|
-
transformationsApplied,
|
|
392
|
-
};
|
|
393
|
-
}
|
|
@@ -1,254 +0,0 @@
|
|
|
1
|
-
// GSD Extension — Semantic Chunker with TF-IDF Relevance Scoring
|
|
2
|
-
// Splits code/text into semantic chunks and selects the most relevant ones for a given task.
|
|
3
|
-
// Pure TypeScript — no external dependencies.
|
|
4
|
-
// ─── Constants ──────────────────────────────────────────────────────────────
|
|
5
|
-
const CODE_BOUNDARY_RE = /^(export\s+)?(async\s+)?(function|class|interface|type|const|enum)\s/;
|
|
6
|
-
const MARKDOWN_HEADING_RE = /^#{1,6}\s/;
|
|
7
|
-
const STOP_WORDS = new Set([
|
|
8
|
-
"the", "a", "an", "is", "are", "was", "were", "be", "to", "of", "in",
|
|
9
|
-
"for", "on", "with", "at", "by", "from", "this", "that", "it", "as",
|
|
10
|
-
"or", "and", "not", "but", "if", "do", "no", "so", "up", "its", "has",
|
|
11
|
-
"had", "get", "set", "can", "may", "all", "use", "new", "one", "two",
|
|
12
|
-
"also", "each", "than", "been", "into", "most", "only", "over", "such",
|
|
13
|
-
"how", "some", "any", "our", "his", "her", "out", "did", "let", "say", "she",
|
|
14
|
-
]);
|
|
15
|
-
const DEFAULT_MIN_LINES = 3;
|
|
16
|
-
const DEFAULT_MAX_LINES = 80;
|
|
17
|
-
const DEFAULT_MAX_CHUNKS = 5;
|
|
18
|
-
const DEFAULT_MIN_SCORE = 0.1;
|
|
19
|
-
function detectContentType(lines) {
|
|
20
|
-
let codeSignals = 0;
|
|
21
|
-
let mdSignals = 0;
|
|
22
|
-
const sampleSize = Math.min(lines.length, 50);
|
|
23
|
-
for (let i = 0; i < sampleSize; i++) {
|
|
24
|
-
const line = lines[i];
|
|
25
|
-
if (CODE_BOUNDARY_RE.test(line) || /^\s*import\s/.test(line)) {
|
|
26
|
-
codeSignals++;
|
|
27
|
-
}
|
|
28
|
-
if (MARKDOWN_HEADING_RE.test(line)) {
|
|
29
|
-
mdSignals++;
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
if (mdSignals >= 2 && mdSignals > codeSignals)
|
|
33
|
-
return "markdown";
|
|
34
|
-
if (codeSignals >= 2)
|
|
35
|
-
return "code";
|
|
36
|
-
return "text";
|
|
37
|
-
}
|
|
38
|
-
// ─── Tokenizer ──────────────────────────────────────────────────────────────
|
|
39
|
-
function tokenize(text) {
|
|
40
|
-
return text
|
|
41
|
-
.toLowerCase()
|
|
42
|
-
.split(/[\s\W]+/)
|
|
43
|
-
.filter((w) => w.length >= 2 && !STOP_WORDS.has(w));
|
|
44
|
-
}
|
|
45
|
-
// ─── splitIntoChunks ────────────────────────────────────────────────────────
|
|
46
|
-
export function splitIntoChunks(content, options) {
|
|
47
|
-
if (!content || content.trim().length === 0)
|
|
48
|
-
return [];
|
|
49
|
-
const minLines = options?.minLines ?? DEFAULT_MIN_LINES;
|
|
50
|
-
const maxLines = options?.maxLines ?? DEFAULT_MAX_LINES;
|
|
51
|
-
const lines = content.split("\n");
|
|
52
|
-
if (lines.length === 0)
|
|
53
|
-
return [];
|
|
54
|
-
const contentType = detectContentType(lines);
|
|
55
|
-
let boundaries;
|
|
56
|
-
switch (contentType) {
|
|
57
|
-
case "code":
|
|
58
|
-
boundaries = findCodeBoundaries(lines);
|
|
59
|
-
break;
|
|
60
|
-
case "markdown":
|
|
61
|
-
boundaries = findMarkdownBoundaries(lines);
|
|
62
|
-
break;
|
|
63
|
-
default:
|
|
64
|
-
boundaries = findTextBoundaries(lines);
|
|
65
|
-
break;
|
|
66
|
-
}
|
|
67
|
-
// Always include 0 as first boundary
|
|
68
|
-
if (boundaries.length === 0 || boundaries[0] !== 0) {
|
|
69
|
-
boundaries.unshift(0);
|
|
70
|
-
}
|
|
71
|
-
// Build raw chunks from boundaries
|
|
72
|
-
const rawChunks = [];
|
|
73
|
-
for (let i = 0; i < boundaries.length; i++) {
|
|
74
|
-
const start = boundaries[i];
|
|
75
|
-
const end = i + 1 < boundaries.length ? boundaries[i + 1] - 1 : lines.length - 1;
|
|
76
|
-
const chunkLines = lines.slice(start, end + 1);
|
|
77
|
-
rawChunks.push({
|
|
78
|
-
content: chunkLines.join("\n"),
|
|
79
|
-
startLine: start + 1, // 1-based
|
|
80
|
-
endLine: end + 1, // 1-based
|
|
81
|
-
score: 0,
|
|
82
|
-
});
|
|
83
|
-
}
|
|
84
|
-
// Split oversized chunks at maxLines
|
|
85
|
-
const splitChunks = [];
|
|
86
|
-
for (const chunk of rawChunks) {
|
|
87
|
-
const chunkLineCount = chunk.endLine - chunk.startLine + 1;
|
|
88
|
-
if (chunkLineCount <= maxLines) {
|
|
89
|
-
splitChunks.push(chunk);
|
|
90
|
-
}
|
|
91
|
-
else {
|
|
92
|
-
const chunkLines = chunk.content.split("\n");
|
|
93
|
-
for (let offset = 0; offset < chunkLines.length; offset += maxLines) {
|
|
94
|
-
const slice = chunkLines.slice(offset, offset + maxLines);
|
|
95
|
-
splitChunks.push({
|
|
96
|
-
content: slice.join("\n"),
|
|
97
|
-
startLine: chunk.startLine + offset,
|
|
98
|
-
endLine: chunk.startLine + offset + slice.length - 1,
|
|
99
|
-
score: 0,
|
|
100
|
-
});
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
}
|
|
104
|
-
// Merge tiny chunks into predecessor
|
|
105
|
-
const merged = [];
|
|
106
|
-
for (const chunk of splitChunks) {
|
|
107
|
-
const chunkLineCount = chunk.endLine - chunk.startLine + 1;
|
|
108
|
-
if (chunkLineCount < minLines && merged.length > 0) {
|
|
109
|
-
const prev = merged[merged.length - 1];
|
|
110
|
-
prev.content += "\n" + chunk.content;
|
|
111
|
-
prev.endLine = chunk.endLine;
|
|
112
|
-
}
|
|
113
|
-
else {
|
|
114
|
-
merged.push({ ...chunk });
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
return merged;
|
|
118
|
-
}
|
|
119
|
-
function findCodeBoundaries(lines) {
|
|
120
|
-
const boundaries = [];
|
|
121
|
-
for (let i = 0; i < lines.length; i++) {
|
|
122
|
-
if (CODE_BOUNDARY_RE.test(lines[i])) {
|
|
123
|
-
// Also consider a blank line before a boundary marker
|
|
124
|
-
if (i > 0 && lines[i - 1].trim() === "" && !boundaries.includes(i)) {
|
|
125
|
-
boundaries.push(i);
|
|
126
|
-
}
|
|
127
|
-
else if (!boundaries.includes(i)) {
|
|
128
|
-
boundaries.push(i);
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
return boundaries;
|
|
133
|
-
}
|
|
134
|
-
function findMarkdownBoundaries(lines) {
|
|
135
|
-
const boundaries = [];
|
|
136
|
-
for (let i = 0; i < lines.length; i++) {
|
|
137
|
-
if (MARKDOWN_HEADING_RE.test(lines[i])) {
|
|
138
|
-
boundaries.push(i);
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
return boundaries;
|
|
142
|
-
}
|
|
143
|
-
function findTextBoundaries(lines) {
|
|
144
|
-
const boundaries = [0];
|
|
145
|
-
for (let i = 1; i < lines.length; i++) {
|
|
146
|
-
if (lines[i - 1].trim() === "" && lines[i].trim() !== "") {
|
|
147
|
-
boundaries.push(i);
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
return boundaries;
|
|
151
|
-
}
|
|
152
|
-
// ─── scoreChunks ────────────────────────────────────────────────────────────
|
|
153
|
-
export function scoreChunks(chunks, query) {
|
|
154
|
-
if (chunks.length === 0)
|
|
155
|
-
return [];
|
|
156
|
-
const queryTerms = tokenize(query);
|
|
157
|
-
if (queryTerms.length === 0) {
|
|
158
|
-
return chunks.map((c) => ({ ...c, score: 0 }));
|
|
159
|
-
}
|
|
160
|
-
const totalChunks = chunks.length;
|
|
161
|
-
// Pre-compute IDF for each query term
|
|
162
|
-
const termChunkCounts = new Map();
|
|
163
|
-
const chunkTokenSets = [];
|
|
164
|
-
for (const chunk of chunks) {
|
|
165
|
-
const tokens = new Set(tokenize(chunk.content));
|
|
166
|
-
chunkTokenSets.push(tokens);
|
|
167
|
-
for (const term of queryTerms) {
|
|
168
|
-
if (tokens.has(term)) {
|
|
169
|
-
termChunkCounts.set(term, (termChunkCounts.get(term) ?? 0) + 1);
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
const idf = new Map();
|
|
174
|
-
for (const term of queryTerms) {
|
|
175
|
-
const df = termChunkCounts.get(term) ?? 0;
|
|
176
|
-
idf.set(term, Math.log(1 + totalChunks / (1 + df)));
|
|
177
|
-
}
|
|
178
|
-
// Score each chunk
|
|
179
|
-
const scored = chunks.map((chunk, idx) => {
|
|
180
|
-
const chunkTokens = tokenize(chunk.content);
|
|
181
|
-
const totalTerms = chunkTokens.length;
|
|
182
|
-
if (totalTerms === 0)
|
|
183
|
-
return { ...chunk, score: 0 };
|
|
184
|
-
// Count term frequencies
|
|
185
|
-
const termFreq = new Map();
|
|
186
|
-
for (const token of chunkTokens) {
|
|
187
|
-
termFreq.set(token, (termFreq.get(token) ?? 0) + 1);
|
|
188
|
-
}
|
|
189
|
-
let score = 0;
|
|
190
|
-
for (const term of queryTerms) {
|
|
191
|
-
const tf = (termFreq.get(term) ?? 0) / totalTerms;
|
|
192
|
-
const termIdf = idf.get(term) ?? 0;
|
|
193
|
-
score += tf * termIdf;
|
|
194
|
-
}
|
|
195
|
-
return { ...chunk, score };
|
|
196
|
-
});
|
|
197
|
-
// Normalize to 0-1
|
|
198
|
-
const maxScore = Math.max(...scored.map((c) => c.score));
|
|
199
|
-
if (maxScore > 0) {
|
|
200
|
-
for (const chunk of scored) {
|
|
201
|
-
chunk.score = chunk.score / maxScore;
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
return scored;
|
|
205
|
-
}
|
|
206
|
-
// ─── chunkByRelevance ───────────────────────────────────────────────────────
|
|
207
|
-
export function chunkByRelevance(content, query, options) {
|
|
208
|
-
const maxChunks = options?.maxChunks ?? DEFAULT_MAX_CHUNKS;
|
|
209
|
-
const minScore = options?.minScore ?? DEFAULT_MIN_SCORE;
|
|
210
|
-
const minLines = options?.minChunkLines ?? DEFAULT_MIN_LINES;
|
|
211
|
-
const maxLines = options?.maxChunkLines ?? DEFAULT_MAX_LINES;
|
|
212
|
-
const rawChunks = splitIntoChunks(content, { minLines, maxLines });
|
|
213
|
-
if (rawChunks.length === 0) {
|
|
214
|
-
return { chunks: [], totalChunks: 0, omittedChunks: 0, savingsPercent: 0 };
|
|
215
|
-
}
|
|
216
|
-
const scored = scoreChunks(rawChunks, query);
|
|
217
|
-
// Filter by minScore and take top maxChunks by score
|
|
218
|
-
const qualifying = scored
|
|
219
|
-
.filter((c) => c.score >= minScore)
|
|
220
|
-
.sort((a, b) => b.score - a.score)
|
|
221
|
-
.slice(0, maxChunks);
|
|
222
|
-
// Return in original document order (by startLine)
|
|
223
|
-
const selected = qualifying.sort((a, b) => a.startLine - b.startLine);
|
|
224
|
-
const totalChars = content.length;
|
|
225
|
-
const selectedChars = selected.reduce((sum, c) => sum + c.content.length, 0);
|
|
226
|
-
const savingsPercent = totalChars > 0
|
|
227
|
-
? Math.round(((totalChars - selectedChars) / totalChars) * 100)
|
|
228
|
-
: 0;
|
|
229
|
-
return {
|
|
230
|
-
chunks: selected,
|
|
231
|
-
totalChunks: rawChunks.length,
|
|
232
|
-
omittedChunks: rawChunks.length - selected.length,
|
|
233
|
-
savingsPercent: Math.max(0, savingsPercent),
|
|
234
|
-
};
|
|
235
|
-
}
|
|
236
|
-
// ─── formatChunks ───────────────────────────────────────────────────────────
|
|
237
|
-
export function formatChunks(result, filePath) {
|
|
238
|
-
if (result.chunks.length === 0) {
|
|
239
|
-
return `[${filePath}: empty or no relevant chunks]`;
|
|
240
|
-
}
|
|
241
|
-
const parts = [];
|
|
242
|
-
let lastEndLine = 0;
|
|
243
|
-
for (const chunk of result.chunks) {
|
|
244
|
-
// Show omission gap
|
|
245
|
-
if (lastEndLine > 0 && chunk.startLine > lastEndLine + 1) {
|
|
246
|
-
const gapLines = chunk.startLine - lastEndLine - 1;
|
|
247
|
-
parts.push(`[...${gapLines} lines omitted...]`);
|
|
248
|
-
}
|
|
249
|
-
parts.push(`[Lines ${chunk.startLine}-${chunk.endLine}]`);
|
|
250
|
-
parts.push(chunk.content);
|
|
251
|
-
lastEndLine = chunk.endLine;
|
|
252
|
-
}
|
|
253
|
-
return parts.join("\n");
|
|
254
|
-
}
|