k0ntext 3.6.0 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/analyzer/intelligent-analyzer.d.ts +7 -0
  2. package/dist/analyzer/intelligent-analyzer.d.ts.map +1 -1
  3. package/dist/analyzer/intelligent-analyzer.js +46 -1
  4. package/dist/analyzer/intelligent-analyzer.js.map +1 -1
  5. package/dist/cli/commands/embeddings-refresh.d.ts.map +1 -1
  6. package/dist/cli/commands/embeddings-refresh.js +4 -1
  7. package/dist/cli/commands/embeddings-refresh.js.map +1 -1
  8. package/dist/cli/commands/migrate.d.ts.map +1 -1
  9. package/dist/cli/commands/migrate.js +8 -0
  10. package/dist/cli/commands/migrate.js.map +1 -1
  11. package/dist/cli/repl/init/wizard.d.ts.map +1 -1
  12. package/dist/cli/repl/init/wizard.js +12 -4
  13. package/dist/cli/repl/init/wizard.js.map +1 -1
  14. package/dist/db/schema.d.ts +1 -1
  15. package/dist/db/schema.js +1 -1
  16. package/dist/embeddings/openrouter.d.ts.map +1 -1
  17. package/dist/embeddings/openrouter.js +8 -3
  18. package/dist/embeddings/openrouter.js.map +1 -1
  19. package/dist/utils/chunking.d.ts +38 -0
  20. package/dist/utils/chunking.d.ts.map +1 -0
  21. package/dist/utils/chunking.js +133 -0
  22. package/dist/utils/chunking.js.map +1 -0
  23. package/dist/utils/encoding.d.ts +24 -0
  24. package/dist/utils/encoding.d.ts.map +1 -0
  25. package/dist/utils/encoding.js +32 -0
  26. package/dist/utils/encoding.js.map +1 -0
  27. package/dist/utils/index.d.ts +8 -0
  28. package/dist/utils/index.d.ts.map +1 -0
  29. package/dist/utils/index.js +8 -0
  30. package/dist/utils/index.js.map +1 -0
  31. package/docs/plans/2026-02-09-v3.7.0-database-fixes-and-improvements.md +900 -0
  32. package/package.json +1 -1
  33. package/src/analyzer/intelligent-analyzer.ts +58 -1
  34. package/src/cli/commands/embeddings-refresh.ts +4 -1
  35. package/src/cli/commands/migrate.ts +8 -0
  36. package/src/cli/repl/init/wizard.ts +12 -4
  37. package/src/db/migrations/files/0015_add_sync_state_version_tracking.sql +18 -0
  38. package/src/db/schema.ts +1 -1
  39. package/src/embeddings/openrouter.ts +10 -4
  40. package/src/utils/chunking.ts +152 -0
  41. package/src/utils/encoding.ts +33 -0
  42. package/src/utils/index.ts +8 -0
@@ -0,0 +1,133 @@
1
+ /**
2
+ * Text Chunking Utility
3
+ *
4
+ * Splits large texts into chunks suitable for embedding generation.
5
+ * Handles token limits, word boundaries, and overlap for context preservation.
6
+ */
7
+ /**
8
+ * Estimate token count for text.
9
+ *
10
+ * Uses a simple heuristic: ~4 characters per token for English text.
11
+ * This is approximate but works well for our use case.
12
+ *
13
+ * @param text - Text to estimate tokens for
14
+ * @returns Estimated token count
15
+ */
16
+ export function estimateTokens(text) {
17
+ if (!text)
18
+ return 0;
19
+ // Remove whitespace for more accurate estimate
20
+ const trimmed = text.trim();
21
+ if (trimmed.length === 0)
22
+ return 0;
23
+ // Rough estimate: 1 token per 4 characters for English text
24
+ // This is a simplification but works well for most cases
25
+ return Math.ceil(trimmed.length / 4);
26
+ }
27
+ /**
28
+ * Split text into chunks that fit within max tokens.
29
+ *
30
+ * Tries to break at word boundaries when possible.
31
+ * Adds overlap between chunks to preserve context.
32
+ *
33
+ * @param text - Text to chunk
34
+ * @param maxTokens - Maximum tokens per chunk (default: 8000 for OpenRouter)
35
+ * @param overlapTokens - Number of tokens to overlap between chunks (default: 0)
36
+ * @returns Array of text chunks
37
+ */
38
+ export function chunkText(text, maxTokens = 8000, overlapTokens = 0) {
39
+ // Handle empty or very short text
40
+ if (!text || text.trim().length === 0) {
41
+ return [''];
42
+ }
43
+ const trimmedText = text.trim();
44
+ const estimatedTokens = estimateTokens(trimmedText);
45
+ // If text is under the limit, return as-is
46
+ if (estimatedTokens <= maxTokens) {
47
+ return [trimmedText];
48
+ }
49
+ const chunks = [];
50
+ const maxChars = maxTokens * 4; // Convert tokens to approximate characters
51
+ const overlapChars = overlapTokens * 4;
52
+ let startIndex = 0;
53
+ let previousEndIndex = 0;
54
+ let loopCount = 0;
55
+ const maxLoops = 1000; // Safety limit to prevent infinite loops
56
+ while (startIndex < trimmedText.length && loopCount < maxLoops) {
57
+ loopCount++;
58
+ // Calculate end index for this chunk
59
+ let endIndex = Math.min(startIndex + maxChars, trimmedText.length);
60
+ // If not the last chunk, try to break at a word boundary
61
+ if (endIndex < trimmedText.length) {
62
+ // Look for word boundary near the end
63
+ const boundaryChars = 200; // Look back up to 200 chars
64
+ const searchStart = Math.max(startIndex, endIndex - boundaryChars);
65
+ const substring = trimmedText.slice(searchStart, endIndex);
66
+ // Try to find line break first, then space, then punctuation
67
+ let breakIndex = -1;
68
+ // Look for last newline in the window
69
+ const lastNewline = substring.lastIndexOf('\n');
70
+ if (lastNewline !== -1) {
71
+ breakIndex = searchStart + lastNewline + 1;
72
+ }
73
+ else {
74
+ // Look for last space in the window
75
+ const lastSpace = substring.lastIndexOf(' ');
76
+ if (lastSpace !== -1) {
77
+ breakIndex = searchStart + lastSpace + 1;
78
+ }
79
+ else {
80
+ // Look for sentence-ending punctuation
81
+ for (let i = substring.length - 1; i >= Math.max(0, substring.length - 100); i--) {
82
+ const char = substring[i];
83
+ if (char === '.' || char === '!' || char === '?') {
84
+ // Make sure it's actually a sentence end (followed by space or end)
85
+ const nextChar = substring[i + 1];
86
+ if (!nextChar || nextChar === ' ' || nextChar === '\n') {
87
+ breakIndex = searchStart + i + 1;
88
+ break;
89
+ }
90
+ }
91
+ }
92
+ }
93
+ }
94
+ // Use the break index if found, otherwise use the calculated end
95
+ if (breakIndex > startIndex) {
96
+ endIndex = breakIndex;
97
+ }
98
+ }
99
+ // Extract the chunk
100
+ const chunk = trimmedText.slice(startIndex, endIndex);
101
+ chunks.push(chunk);
102
+ // Move to next chunk, accounting for overlap
103
+ if (overlapChars > 0 && endIndex < trimmedText.length) {
104
+ // Only apply overlap if not at the end
105
+ startIndex = Math.max(endIndex - overlapChars, endIndex - maxChars / 2);
106
+ // Ensure we make progress
107
+ if (startIndex <= previousEndIndex) {
108
+ startIndex = endIndex;
109
+ }
110
+ // Also ensure we move forward at least a bit
111
+ if (startIndex >= endIndex) {
112
+ startIndex = endIndex;
113
+ }
114
+ }
115
+ else {
116
+ startIndex = endIndex;
117
+ }
118
+ previousEndIndex = endIndex;
119
+ }
120
+ return chunks;
121
+ }
122
+ /**
123
+ * Chunk text specifically for embedding generation.
124
+ *
125
+ * Uses 8000 token limit (OpenRouter's limit for text-embedding-3-small).
126
+ *
127
+ * @param text - Text to chunk
128
+ * @returns Array of text chunks suitable for embeddings
129
+ */
130
+ export function chunkForEmbedding(text) {
131
+ return chunkText(text, 8000, 100); // 100 token overlap for context
132
+ }
133
+ //# sourceMappingURL=chunking.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunking.js","sourceRoot":"","sources":["../../src/utils/chunking.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;;;;;;;GAQG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI;QAAE,OAAO,CAAC,CAAC;IAEpB,+CAA+C;IAC/C,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEnC,4DAA4D;IAC5D,yDAAyD;IACzD,OAAO,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACvC,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,SAAS,CACvB,IAAY,EACZ,YAAoB,IAAI,EACxB,gBAAwB,CAAC;IAEzB,kCAAkC;IAClC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,CAAC,EAAE,CAAC,CAAC;IACd,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAChC,MAAM,eAAe,GAAG,cAAc,CAAC,WAAW,CAAC,CAAC;IAEpD,2CAA2C;IAC3C,IAAI,eAAe,IAAI,SAAS,EAAE,CAAC;QACjC,OAAO,CAAC,WAAW,CAAC,CAAC;IACvB,CAAC;IAED,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,QAAQ,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,2CAA2C;IAC3E,MAAM,YAAY,GAAG,aAAa,GAAG,CAAC,CAAC;IAEvC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,gBAAgB,GAAG,CAAC,CAAC;IACzB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,MAAM,QAAQ,GAAG,IAAI,CAAC,CAAC,yCAAyC;IAEhE,OAAO,UAAU,GAAG,WAAW,CAAC,MAAM,IAAI,SAAS,GAAG,QAAQ,EAAE,CAAC;QAC/D,SAAS,EAAE,CAAC;QAEZ,qCAAqC;QACrC,IAAI,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,QAAQ,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC;QAEnE,yDAAyD;QACzD,IAAI,QAAQ,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC;YAClC,sCAAsC;YACtC,MAAM,aAAa,GAAG,GAAG,CAAC,CAAC,4BAA4B;YACvD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,QAAQ,GAAG,aAAa,CAAC,CAAC;YACnE,MAAM,SAAS,GAAG,WAAW,CAAC,KAAK,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;YAE3D,6DAA6D;YAC7D,IAAI,UAAU,GAAG,CAAC,CAAC,CAAC;YAEpB,sCAAsC;YACtC,MAAM,WAAW,GAAG,SAAS,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;YAChD,IAAI,WAAW,KAAK,CAAC,CAAC,EAAE,CAAC;gBACvB,UAAU,GAAG,WAAW,GAAG,WAAW,GAAG,CAAC,CAAC;YAC7C,CAAC;iBAAM,CAAC;gBACN,oCAAoC;gBACpC,MAAM,SAAS,GAAG,SAAS,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;gBAC7C,IAAI,SAAS,KAAK,CAAC,CAAC,EAAE,CAAC;oBACrB,UAAU,GAAG,WAAW,GAAG,SAAS,GAAG,CAAC,CAAC;gBAC3C,CAAC;qBAAM,CAAC;oBACN,uCAAuC;oBACvC,KAAK,IAAI,CAAC,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC,MAAM,GAAG,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;wBACjF,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;wBAC1B,IAAI,IAAI,KAAK,GAAG,IAAI,IAAI,KAAK,GAAG,IAAI,IAAI,KAAK,GAAG,EAAE,CAAC;4BACjD,oEAAoE;4BACpE,MAAM,QAAQ,GAAG,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;4BAClC,IAAI,CAAC,QAAQ,IAAI,QAAQ,KAAK,GAAG,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;gCACvD,UAAU,GAAG,WAAW,GAAG,CAAC,GAAG,CAAC,CAAC;gCACjC,MAAM;4BACR,CAAC;wBACH,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;YAED,iEAAiE;YACjE,IAAI,UAAU,GAAG,UAAU,EAAE,CAAC;gBAC5B,QAAQ,GAAG,UAAU,CAAC;YACxB,CAAC;QACH,CAAC;QAED,oBAAoB;QACpB,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QACtD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAEnB,6CAA6C;QAC7C,IAAI,YAAY,GAAG,CAAC,IAAI,QAAQ,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC;YACtD,uCAAuC;YACvC,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,YAAY,EAAE,QAAQ,GAAG,QAAQ,GAAG,CAAC,CAAC,CAAC;YAExE,0BAA0B;YAC1B,IAAI,UAAU,IAAI,gBAAgB,EAAE,CAAC;gBACnC,UAAU,GAAG,QAAQ,CAAC;YACxB,CAAC;YAED,6CAA6C;YAC7C,IAAI,UAAU,IAAI,QAAQ,EAAE,CAAC;gBAC3B,UAAU,GAAG,QAAQ,CAAC;YACxB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,UAAU,GAAG,QAAQ,CAAC;QACxB,CAAC;QAED,gBAAgB,GAAG,QAAQ,CAAC;IAC9B,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,OAAO,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC,gCAAgC;AACrE,CAAC"}
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Encoding Utilities
3
+ *
4
+ * Handles text encoding issues across different platforms.
5
+ */
6
+ /**
7
+ * Strip UTF-8 BOM (Byte Order Mark) from a string.
8
+ *
9
+ * The UTF-8 BOM is the byte sequence EF BB BF (U+FEFF).
10
+ * Some Windows editors add this to the start of files,
11
+ * which can break environment variable parsing.
12
+ *
13
+ * @param str - String that may contain a BOM
14
+ * @returns String with BOM removed if present
15
+ */
16
+ export declare function stripBOM(str: string): string;
17
+ /**
18
+ * Detect if a string has a UTF-8 BOM.
19
+ *
20
+ * @param str - String to check
21
+ * @returns true if BOM is present
22
+ */
23
+ export declare function hasBOM(str: string): boolean;
24
+ //# sourceMappingURL=encoding.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"encoding.d.ts","sourceRoot":"","sources":["../../src/utils/encoding.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;;;;;;;;GASG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAM5C;AAED;;;;;GAKG;AACH,wBAAgB,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAE3C"}
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Encoding Utilities
3
+ *
4
+ * Handles text encoding issues across different platforms.
5
+ */
6
+ /**
7
+ * Strip UTF-8 BOM (Byte Order Mark) from a string.
8
+ *
9
+ * The UTF-8 BOM is the byte sequence EF BB BF (U+FEFF).
10
+ * Some Windows editors add this to the start of files,
11
+ * which can break environment variable parsing.
12
+ *
13
+ * @param str - String that may contain a BOM
14
+ * @returns String with BOM removed if present
15
+ */
16
+ export function stripBOM(str) {
17
+ // Check for BOM at position 0
18
+ if (str.charCodeAt(0) === 0xFEFF) {
19
+ return str.slice(1);
20
+ }
21
+ return str;
22
+ }
23
+ /**
24
+ * Detect if a string has a UTF-8 BOM.
25
+ *
26
+ * @param str - String to check
27
+ * @returns true if BOM is present
28
+ */
29
+ export function hasBOM(str) {
30
+ return str.charCodeAt(0) === 0xFEFF;
31
+ }
32
+ //# sourceMappingURL=encoding.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"encoding.js","sourceRoot":"","sources":["../../src/utils/encoding.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;;;;;;;;GASG;AACH,MAAM,UAAU,QAAQ,CAAC,GAAW;IAClC,8BAA8B;IAC9B,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,MAAM,EAAE,CAAC;QACjC,OAAO,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,MAAM,CAAC,GAAW;IAChC,OAAO,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC;AACtC,CAAC"}
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Shared Utilities Module
3
+ *
4
+ * Exports all shared utility functions.
5
+ */
6
+ export * from './encoding.js';
7
+ export * from './chunking.js';
8
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC"}
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Shared Utilities Module
3
+ *
4
+ * Exports all shared utility functions.
5
+ */
6
+ export * from './encoding.js';
7
+ export * from './chunking.js';
8
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC"}