@milo4jo/contextkit 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/indexer/chunker.d.ts.map +1 -1
- package/dist/indexer/chunker.js +14 -15
- package/dist/indexer/chunker.js.map +1 -1
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../src/indexer/chunker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;
|
|
1
|
+
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../src/indexer/chunker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAErD,2CAA2C;AAC3C,MAAM,WAAW,KAAK;IACpB,sBAAsB;IACtB,EAAE,EAAE,MAAM,CAAC;IACX,mCAAmC;IACnC,QAAQ,EAAE,MAAM,CAAC;IACjB,oCAAoC;IACpC,QAAQ,EAAE,MAAM,CAAC;IACjB,oBAAoB;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,6BAA6B;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,sCAAsC;IACtC,OAAO,EAAE,MAAM,CAAC;IAChB,kBAAkB;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,uBAAuB;AACvB,MAAM,WAAW,YAAY;IAC3B,8BAA8B;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,oCAAoC;IACpC,YAAY,EAAE,MAAM,CAAC;CACtB;AAUD;;GAEG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEhD;AAkBD;;GAEG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,cAAc,EAAE,OAAO,GAAE,YAA8B,GAAG,KAAK,EAAE,CAgEhG;AAED;;GAEG;AACH,wBAAgB,UAAU,CACxB,KAAK,EAAE,cAAc,EAAE,EACvB,OAAO,GAAE,YAA8B,GACtC,KAAK,EAAE,CAST"}
|
package/dist/indexer/chunker.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
* Splits files into chunks for embedding.
|
|
5
5
|
* Uses line-based chunking with configurable size and overlap.
|
|
6
6
|
*/
|
|
7
|
+
import { createHash } from 'crypto';
|
|
7
8
|
import { encodingForModel } from 'js-tiktoken';
|
|
8
9
|
const DEFAULT_OPTIONS = {
|
|
9
10
|
chunkSize: 500,
|
|
@@ -18,18 +19,14 @@ export function countTokens(text) {
|
|
|
18
19
|
return encoder.encode(text).length;
|
|
19
20
|
}
|
|
20
21
|
/**
|
|
21
|
-
* Generate a unique chunk ID
|
|
22
|
+
* Generate a unique chunk ID using SHA-256 hash
|
|
23
|
+
* Includes both startLine and endLine to handle edge cases where
|
|
24
|
+
* high overlap values could result in same startLine for different chunks.
|
|
22
25
|
*/
|
|
23
|
-
function generateChunkId(sourceId, filePath, startLine) {
|
|
24
|
-
const base = `${sourceId}:${filePath}:${startLine}`;
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
for (let i = 0; i < base.length; i++) {
|
|
28
|
-
const char = base.charCodeAt(i);
|
|
29
|
-
hash = (hash << 5) - hash + char;
|
|
30
|
-
hash = hash & hash; // Convert to 32-bit integer
|
|
31
|
-
}
|
|
32
|
-
return `chunk_${Math.abs(hash).toString(36)}`;
|
|
26
|
+
function generateChunkId(sourceId, filePath, startLine, endLine) {
|
|
27
|
+
const base = `${sourceId}:${filePath}:${startLine}:${endLine}`;
|
|
28
|
+
const hash = createHash('sha256').update(base).digest('hex').slice(0, 16);
|
|
29
|
+
return `chunk_${hash}`;
|
|
33
30
|
}
|
|
34
31
|
/**
|
|
35
32
|
* Chunk a single file into pieces
|
|
@@ -46,13 +43,14 @@ export function chunkFile(file, options = DEFAULT_OPTIONS) {
|
|
|
46
43
|
// If adding this line exceeds chunk size, save current chunk
|
|
47
44
|
if (currentTokens + lineTokens > options.chunkSize && currentLines.length > 0) {
|
|
48
45
|
const content = currentLines.join('\n');
|
|
46
|
+
const endLine = startLine + currentLines.length - 1;
|
|
49
47
|
chunks.push({
|
|
50
|
-
id: generateChunkId(file.sourceId, file.relativePath, startLine),
|
|
48
|
+
id: generateChunkId(file.sourceId, file.relativePath, startLine, endLine),
|
|
51
49
|
sourceId: file.sourceId,
|
|
52
50
|
filePath: file.relativePath,
|
|
53
51
|
content,
|
|
54
52
|
startLine,
|
|
55
|
-
endLine
|
|
53
|
+
endLine,
|
|
56
54
|
tokens: currentTokens,
|
|
57
55
|
});
|
|
58
56
|
// Calculate overlap: keep last N tokens worth of lines
|
|
@@ -76,13 +74,14 @@ export function chunkFile(file, options = DEFAULT_OPTIONS) {
|
|
|
76
74
|
// Don't forget the last chunk
|
|
77
75
|
if (currentLines.length > 0) {
|
|
78
76
|
const content = currentLines.join('\n');
|
|
77
|
+
const endLine = startLine + currentLines.length - 1;
|
|
79
78
|
chunks.push({
|
|
80
|
-
id: generateChunkId(file.sourceId, file.relativePath, startLine),
|
|
79
|
+
id: generateChunkId(file.sourceId, file.relativePath, startLine, endLine),
|
|
81
80
|
sourceId: file.sourceId,
|
|
82
81
|
filePath: file.relativePath,
|
|
83
82
|
content,
|
|
84
83
|
startLine,
|
|
85
|
-
endLine
|
|
84
|
+
endLine,
|
|
86
85
|
tokens: countTokens(content),
|
|
87
86
|
});
|
|
88
87
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/indexer/chunker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AA6B/C,MAAM,eAAe,GAAiB;IACpC,SAAS,EAAE,GAAG;IACd,YAAY,EAAE,EAAE;CACjB,CAAC;AAEF,qDAAqD;AACrD,MAAM,OAAO,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;AAE1C;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,OAAO,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;AACrC,CAAC;AAED
|
|
1
|
+
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/indexer/chunker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AA6B/C,MAAM,eAAe,GAAiB;IACpC,SAAS,EAAE,GAAG;IACd,YAAY,EAAE,EAAE;CACjB,CAAC;AAEF,qDAAqD;AACrD,MAAM,OAAO,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;AAE1C;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,OAAO,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;AACrC,CAAC;AAED;;;;GAIG;AACH,SAAS,eAAe,CACtB,QAAgB,EAChB,QAAgB,EAChB,SAAiB,EACjB,OAAe;IAEf,MAAM,IAAI,GAAG,GAAG,QAAQ,IAAI,QAAQ,IAAI,SAAS,IAAI,OAAO,EAAE,CAAC;IAC/D,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC1E,OAAO,SAAS,IAAI,EAAE,CAAC;AACzB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,IAAoB,EAAE,UAAwB,eAAe;IACrF,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACvC,MAAM,MAAM,GAAY,EAAE,CAAC;IAE3B,IAAI,YAAY,GAAa,EAAE,CAAC;IAChC,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,UAAU,GAAG,WAAW,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;QAE5C,6DAA6D;QAC7D,IAAI,aAAa,GAAG,UAAU,GAAG,OAAO,CAAC,SAAS,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9E,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxC,MAAM,OAAO,GAAG,SAAS,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;YACpD,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,YAAY,EAAE,SAAS,EAAE,OAAO,CAAC;gBACzE,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,QAAQ,EAAE,IAAI,CAAC,YAAY;gBAC3B,OAAO;gBACP,SAAS;gBACT,OAAO;gBACP,MAAM,EAAE,aAAa;aACtB,CAAC,CAAC;YAEH,uDAAuD;YACvD,MAAM,YAAY,GAAa,EAAE,CAAC;YAClC,IAAI,aAAa,GAAG,CAAC,CAAC;YAEtB,KAAK,IAAI,CAAC,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,aAAa,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC1F,MAAM,WAAW,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;gBACpC,MAAM,iBAAiB,GAAG,WAAW,CAAC,WAAW,GAAG,IAAI,CAAC,CAAC;gBAC1D,YAAY,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;gBAClC,aAAa,IAAI,iBAAiB,CAAC;YACrC,CAAC;YAED,gCAAgC;YAChC,YAAY,GAAG,YAAY,CAAC;YAC5B,aAAa,GAAG,aAAa,CAAC;YAC9B,SAAS;gBACP,SAAS,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,OAAO,GAAG,SAAS,GAAG,CAAC,CAAC,GAAG,YAAY,CAAC,MAAM,CAAC;QAC1F,CAAC;QAED,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxB,aAAa,IAAI,UAAU,CAAC;IAC9B,CAAC;IAED,8BAA8B;IAC9B,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,OAAO,GAAG,SAAS,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;QACpD,MAAM,CAAC,IAAI,CAAC;YACV,EAAE,EAAE,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,YAAY,EAAE,SAAS,EAAE,OAAO,CAAC;YACzE,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,QAAQ,EAAE,IAAI,CAAC,YAAY;YAC3B,OAAO;YACP,SAAS;YACT,OAAO;YACP,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC;SAC7B,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CACxB,KAAuB,EACvB,UAAwB,eAAe;IAEvC,MAAM,SAAS,GAAY,EAAE,CAAC;IAE9B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACxC,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;IAC5B,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@milo4jo/contextkit",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "Intelligent context selection for LLM applications",
|
|
5
5
|
"author": "Milo <milo4jo@outlook.de>",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"type": "module",
|
|
8
8
|
"bin": {
|
|
9
|
-
"contextkit": "
|
|
9
|
+
"contextkit": "dist/index.js"
|
|
10
10
|
},
|
|
11
11
|
"main": "./dist/index.js",
|
|
12
12
|
"types": "./dist/index.d.ts",
|
|
@@ -62,6 +62,6 @@
|
|
|
62
62
|
],
|
|
63
63
|
"repository": {
|
|
64
64
|
"type": "git",
|
|
65
|
-
"url": "https://github.com/milo4jo/contextkit.git"
|
|
65
|
+
"url": "git+https://github.com/milo4jo/contextkit.git"
|
|
66
66
|
}
|
|
67
67
|
}
|