k0ntext 3.6.0 → 3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analyzer/intelligent-analyzer.d.ts +7 -0
- package/dist/analyzer/intelligent-analyzer.d.ts.map +1 -1
- package/dist/analyzer/intelligent-analyzer.js +46 -1
- package/dist/analyzer/intelligent-analyzer.js.map +1 -1
- package/dist/cli/commands/embeddings-refresh.d.ts.map +1 -1
- package/dist/cli/commands/embeddings-refresh.js +4 -1
- package/dist/cli/commands/embeddings-refresh.js.map +1 -1
- package/dist/cli/commands/migrate.d.ts.map +1 -1
- package/dist/cli/commands/migrate.js +8 -0
- package/dist/cli/commands/migrate.js.map +1 -1
- package/dist/cli/repl/init/wizard.d.ts.map +1 -1
- package/dist/cli/repl/init/wizard.js +12 -4
- package/dist/cli/repl/init/wizard.js.map +1 -1
- package/dist/db/schema.d.ts +1 -1
- package/dist/db/schema.js +1 -1
- package/dist/embeddings/openrouter.d.ts.map +1 -1
- package/dist/embeddings/openrouter.js +8 -3
- package/dist/embeddings/openrouter.js.map +1 -1
- package/dist/utils/chunking.d.ts +38 -0
- package/dist/utils/chunking.d.ts.map +1 -0
- package/dist/utils/chunking.js +133 -0
- package/dist/utils/chunking.js.map +1 -0
- package/dist/utils/encoding.d.ts +24 -0
- package/dist/utils/encoding.d.ts.map +1 -0
- package/dist/utils/encoding.js +32 -0
- package/dist/utils/encoding.js.map +1 -0
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +8 -0
- package/dist/utils/index.js.map +1 -0
- package/docs/plans/2026-02-09-v3.7.0-database-fixes-and-improvements.md +900 -0
- package/package.json +1 -1
- package/src/analyzer/intelligent-analyzer.ts +58 -1
- package/src/cli/commands/embeddings-refresh.ts +4 -1
- package/src/cli/commands/migrate.ts +8 -0
- package/src/cli/repl/init/wizard.ts +12 -4
- package/src/db/migrations/files/0015_add_sync_state_version_tracking.sql +18 -0
- package/src/db/schema.ts +1 -1
- package/src/embeddings/openrouter.ts +10 -4
- package/src/utils/chunking.ts +152 -0
- package/src/utils/encoding.ts +33 -0
- package/src/utils/index.ts +8 -0
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text Chunking Utility
|
|
3
|
+
*
|
|
4
|
+
* Splits large texts into chunks suitable for embedding generation.
|
|
5
|
+
* Handles token limits, word boundaries, and overlap for context preservation.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Estimate token count for text.
|
|
9
|
+
*
|
|
10
|
+
* Uses a simple heuristic: ~4 characters per token for English text.
|
|
11
|
+
* This is approximate but works well for our use case.
|
|
12
|
+
*
|
|
13
|
+
* @param text - Text to estimate tokens for
|
|
14
|
+
* @returns Estimated token count
|
|
15
|
+
*/
|
|
16
|
+
export function estimateTokens(text) {
|
|
17
|
+
if (!text)
|
|
18
|
+
return 0;
|
|
19
|
+
// Remove whitespace for more accurate estimate
|
|
20
|
+
const trimmed = text.trim();
|
|
21
|
+
if (trimmed.length === 0)
|
|
22
|
+
return 0;
|
|
23
|
+
// Rough estimate: 1 token per 4 characters for English text
|
|
24
|
+
// This is a simplification but works well for most cases
|
|
25
|
+
return Math.ceil(trimmed.length / 4);
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Split text into chunks that fit within max tokens.
|
|
29
|
+
*
|
|
30
|
+
* Tries to break at word boundaries when possible.
|
|
31
|
+
* Adds overlap between chunks to preserve context.
|
|
32
|
+
*
|
|
33
|
+
* @param text - Text to chunk
|
|
34
|
+
* @param maxTokens - Maximum tokens per chunk (default: 8000 for OpenRouter)
|
|
35
|
+
* @param overlapTokens - Number of tokens to overlap between chunks (default: 0)
|
|
36
|
+
* @returns Array of text chunks
|
|
37
|
+
*/
|
|
38
|
+
export function chunkText(text, maxTokens = 8000, overlapTokens = 0) {
|
|
39
|
+
// Handle empty or very short text
|
|
40
|
+
if (!text || text.trim().length === 0) {
|
|
41
|
+
return [''];
|
|
42
|
+
}
|
|
43
|
+
const trimmedText = text.trim();
|
|
44
|
+
const estimatedTokens = estimateTokens(trimmedText);
|
|
45
|
+
// If text is under the limit, return as-is
|
|
46
|
+
if (estimatedTokens <= maxTokens) {
|
|
47
|
+
return [trimmedText];
|
|
48
|
+
}
|
|
49
|
+
const chunks = [];
|
|
50
|
+
const maxChars = maxTokens * 4; // Convert tokens to approximate characters
|
|
51
|
+
const overlapChars = overlapTokens * 4;
|
|
52
|
+
let startIndex = 0;
|
|
53
|
+
let previousEndIndex = 0;
|
|
54
|
+
let loopCount = 0;
|
|
55
|
+
const maxLoops = 1000; // Safety limit to prevent infinite loops
|
|
56
|
+
while (startIndex < trimmedText.length && loopCount < maxLoops) {
|
|
57
|
+
loopCount++;
|
|
58
|
+
// Calculate end index for this chunk
|
|
59
|
+
let endIndex = Math.min(startIndex + maxChars, trimmedText.length);
|
|
60
|
+
// If not the last chunk, try to break at a word boundary
|
|
61
|
+
if (endIndex < trimmedText.length) {
|
|
62
|
+
// Look for word boundary near the end
|
|
63
|
+
const boundaryChars = 200; // Look back up to 200 chars
|
|
64
|
+
const searchStart = Math.max(startIndex, endIndex - boundaryChars);
|
|
65
|
+
const substring = trimmedText.slice(searchStart, endIndex);
|
|
66
|
+
// Try to find line break first, then space, then punctuation
|
|
67
|
+
let breakIndex = -1;
|
|
68
|
+
// Look for last newline in the window
|
|
69
|
+
const lastNewline = substring.lastIndexOf('\n');
|
|
70
|
+
if (lastNewline !== -1) {
|
|
71
|
+
breakIndex = searchStart + lastNewline + 1;
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
// Look for last space in the window
|
|
75
|
+
const lastSpace = substring.lastIndexOf(' ');
|
|
76
|
+
if (lastSpace !== -1) {
|
|
77
|
+
breakIndex = searchStart + lastSpace + 1;
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
// Look for sentence-ending punctuation
|
|
81
|
+
for (let i = substring.length - 1; i >= Math.max(0, substring.length - 100); i--) {
|
|
82
|
+
const char = substring[i];
|
|
83
|
+
if (char === '.' || char === '!' || char === '?') {
|
|
84
|
+
// Make sure it's actually a sentence end (followed by space or end)
|
|
85
|
+
const nextChar = substring[i + 1];
|
|
86
|
+
if (!nextChar || nextChar === ' ' || nextChar === '\n') {
|
|
87
|
+
breakIndex = searchStart + i + 1;
|
|
88
|
+
break;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
// Use the break index if found, otherwise use the calculated end
|
|
95
|
+
if (breakIndex > startIndex) {
|
|
96
|
+
endIndex = breakIndex;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
// Extract the chunk
|
|
100
|
+
const chunk = trimmedText.slice(startIndex, endIndex);
|
|
101
|
+
chunks.push(chunk);
|
|
102
|
+
// Move to next chunk, accounting for overlap
|
|
103
|
+
if (overlapChars > 0 && endIndex < trimmedText.length) {
|
|
104
|
+
// Only apply overlap if not at the end
|
|
105
|
+
startIndex = Math.max(endIndex - overlapChars, endIndex - maxChars / 2);
|
|
106
|
+
// Ensure we make progress
|
|
107
|
+
if (startIndex <= previousEndIndex) {
|
|
108
|
+
startIndex = endIndex;
|
|
109
|
+
}
|
|
110
|
+
// Also ensure we move forward at least a bit
|
|
111
|
+
if (startIndex >= endIndex) {
|
|
112
|
+
startIndex = endIndex;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
else {
|
|
116
|
+
startIndex = endIndex;
|
|
117
|
+
}
|
|
118
|
+
previousEndIndex = endIndex;
|
|
119
|
+
}
|
|
120
|
+
return chunks;
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Chunk text specifically for embedding generation.
|
|
124
|
+
*
|
|
125
|
+
* Uses 8000 token limit (OpenRouter's limit for text-embedding-3-small).
|
|
126
|
+
*
|
|
127
|
+
* @param text - Text to chunk
|
|
128
|
+
* @returns Array of text chunks suitable for embeddings
|
|
129
|
+
*/
|
|
130
|
+
export function chunkForEmbedding(text) {
|
|
131
|
+
return chunkText(text, 8000, 100); // 100 token overlap for context
|
|
132
|
+
}
|
|
133
|
+
//# sourceMappingURL=chunking.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunking.js","sourceRoot":"","sources":["../../src/utils/chunking.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;;;;;;;GAQG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI;QAAE,OAAO,CAAC,CAAC;IAEpB,+CAA+C;IAC/C,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEnC,4DAA4D;IAC5D,yDAAyD;IACzD,OAAO,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACvC,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,SAAS,CACvB,IAAY,EACZ,YAAoB,IAAI,EACxB,gBAAwB,CAAC;IAEzB,kCAAkC;IAClC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,CAAC,EAAE,CAAC,CAAC;IACd,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAChC,MAAM,eAAe,GAAG,cAAc,CAAC,WAAW,CAAC,CAAC;IAEpD,2CAA2C;IAC3C,IAAI,eAAe,IAAI,SAAS,EAAE,CAAC;QACjC,OAAO,CAAC,WAAW,CAAC,CAAC;IACvB,CAAC;IAED,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,QAAQ,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,2CAA2C;IAC3E,MAAM,YAAY,GAAG,aAAa,GAAG,CAAC,CAAC;IAEvC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,gBAAgB,GAAG,CAAC,CAAC;IACzB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,MAAM,QAAQ,GAAG,IAAI,CAAC,CAAC,yCAAyC;IAEhE,OAAO,UAAU,GAAG,WAAW,CAAC,MAAM,IAAI,SAAS,GAAG,QAAQ,EAAE,CAAC;QAC/D,SAAS,EAAE,CAAC;QAEZ,qCAAqC;QACrC,IAAI,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,QAAQ,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC;QAEnE,yDAAyD;QACzD,IAAI,QAAQ,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC;YAClC,sCAAsC;YACtC,MAAM,aAAa,GAAG,GAAG,CAAC,CAAC,4BAA4B;YACvD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,QAAQ,GAAG,aAAa,CAAC,CAAC;YACnE,MAAM,SAAS,GAAG,WAAW,CAAC,KAAK,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;YAE3D,6DAA6D;YAC7D,IAAI,UAAU,GAAG,CAAC,CAAC,CAAC;YAEpB,sCAAsC;YACtC,MAAM,WAAW,GAAG,SAAS,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;YAChD,IAAI,WAAW,KAAK,CAAC,CAAC,EAAE,CAAC;gBACvB,UAAU,GAAG,WAAW,GAAG,WAAW,GAAG,CAAC,CAAC;YAC7C,CAAC;iBAAM,CAAC;gBACN,oCAAoC;gBACpC,MAAM,SAAS,GAAG,SAAS,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;gBAC7C,IAAI,SAAS,KAAK,CAAC,CAAC,EAAE,CAAC;oBACrB,UAAU,GAAG,WAAW,GAAG,SAAS,GAAG,CAAC,CAAC;gBAC3C,CAAC;qBAAM,CAAC;oBACN,uCAAuC;oBACvC,KAAK,IAAI,CAAC,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC,MAAM,GAAG,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;wBACjF,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;wBAC1B,IAAI,IAAI,KAAK,GAAG,IAAI,IAAI,KAAK,GAAG,IAAI,IAAI,KAAK,GAAG,EAAE,CAAC;4BACjD,oEAAoE;4BACpE,MAAM,QAAQ,GAAG,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;4BAClC,IAAI,CAAC,QAAQ,IAAI,QAAQ,KAAK,GAAG,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;gCACvD,UAAU,GAAG,WAAW,GAAG,CAAC,GAAG,CAAC,CAAC;gCACjC,MAAM;4BACR,CAAC;wBACH,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;YAED,iEAAiE;YACjE,IAAI,UAAU,GAAG,UAAU,EAAE,CAAC;gBAC5B,QAAQ,GAAG,UAAU,CAAC;YACxB,CAAC;QACH,CAAC;QAED,oBAAoB;QACpB,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QACtD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAEnB,6CAA6C;QAC7C,IAAI,YAAY,GAAG,CAAC,IAAI,QAAQ,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC;YACtD,uCAAuC;YACvC,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,YAAY,EAAE,QAAQ,GAAG,QAAQ,GAAG,CAAC,CAAC,CAAC;YAExE,0BAA0B;YAC1B,IAAI,UAAU,IAAI,gBAAgB,EAAE,CAAC;gBACnC,UAAU,GAAG,QAAQ,CAAC;YACxB,CAAC;YAED,6CAA6C;YAC7C,IAAI,UAAU,IAAI,QAAQ,EAAE,CAAC;gBAC3B,UAAU,GAAG,QAAQ,CAAC;YACxB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,UAAU,GAAG,QAAQ,CAAC;QACxB,CAAC;QAED,gBAAgB,GAAG,QAAQ,CAAC;IAC9B,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,OAAO,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC,gCAAgC;AACrE,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Encoding Utilities
|
|
3
|
+
*
|
|
4
|
+
* Handles text encoding issues across different platforms.
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Strip UTF-8 BOM (Byte Order Mark) from a string.
|
|
8
|
+
*
|
|
9
|
+
* The UTF-8 BOM is the byte sequence EF BB BF (U+FEFF).
|
|
10
|
+
* Some Windows editors add this to the start of files,
|
|
11
|
+
* which can break environment variable parsing.
|
|
12
|
+
*
|
|
13
|
+
* @param str - String that may contain a BOM
|
|
14
|
+
* @returns String with BOM removed if present
|
|
15
|
+
*/
|
|
16
|
+
export declare function stripBOM(str: string): string;
|
|
17
|
+
/**
|
|
18
|
+
* Detect if a string has a UTF-8 BOM.
|
|
19
|
+
*
|
|
20
|
+
* @param str - String to check
|
|
21
|
+
* @returns true if BOM is present
|
|
22
|
+
*/
|
|
23
|
+
export declare function hasBOM(str: string): boolean;
|
|
24
|
+
//# sourceMappingURL=encoding.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"encoding.d.ts","sourceRoot":"","sources":["../../src/utils/encoding.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;;;;;;;;GASG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAM5C;AAED;;;;;GAKG;AACH,wBAAgB,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAE3C"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Encoding Utilities
|
|
3
|
+
*
|
|
4
|
+
* Handles text encoding issues across different platforms.
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Strip UTF-8 BOM (Byte Order Mark) from a string.
|
|
8
|
+
*
|
|
9
|
+
* The UTF-8 BOM is the byte sequence EF BB BF (U+FEFF).
|
|
10
|
+
* Some Windows editors add this to the start of files,
|
|
11
|
+
* which can break environment variable parsing.
|
|
12
|
+
*
|
|
13
|
+
* @param str - String that may contain a BOM
|
|
14
|
+
* @returns String with BOM removed if present
|
|
15
|
+
*/
|
|
16
|
+
export function stripBOM(str) {
|
|
17
|
+
// Check for BOM at position 0
|
|
18
|
+
if (str.charCodeAt(0) === 0xFEFF) {
|
|
19
|
+
return str.slice(1);
|
|
20
|
+
}
|
|
21
|
+
return str;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Detect if a string has a UTF-8 BOM.
|
|
25
|
+
*
|
|
26
|
+
* @param str - String to check
|
|
27
|
+
* @returns true if BOM is present
|
|
28
|
+
*/
|
|
29
|
+
export function hasBOM(str) {
|
|
30
|
+
return str.charCodeAt(0) === 0xFEFF;
|
|
31
|
+
}
|
|
32
|
+
//# sourceMappingURL=encoding.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"encoding.js","sourceRoot":"","sources":["../../src/utils/encoding.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;;;;;;;;GASG;AACH,MAAM,UAAU,QAAQ,CAAC,GAAW;IAClC,8BAA8B;IAC9B,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,MAAM,EAAE,CAAC;QACjC,OAAO,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,MAAM,CAAC,GAAW;IAChC,OAAO,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC;AACtC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC"}
|