@shrkcrft/context 0.1.0-alpha.1 → 0.1.0-alpha.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/context-builder.js +3 -3
- package/dist/index.d.ts +0 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +0 -1
- package/package.json +11 -11
- package/dist/tokenizer.d.ts +0 -29
- package/dist/tokenizer.d.ts.map +0 -1
- package/dist/tokenizer.js +0 -80
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
SharkCraft AI context builder: token-budgeted relevance retrieval for tasks.
|
|
4
4
|
|
|
5
|
-
Part of [SharkCraft](https://github.com/
|
|
5
|
+
Part of [SharkCraft](https://github.com/shrkcrft/sharkcraft) — a deterministic, local-first toolkit that gives AI coding agents durable project context. See the main repo for documentation, examples, and the `shrk` CLI.
|
|
6
6
|
|
|
7
7
|
## Install
|
|
8
8
|
|
package/dist/context-builder.js
CHANGED
|
@@ -2,7 +2,7 @@ import { aggregateActionHints, formatAggregatedHints } from '@shrkcrft/knowledge
|
|
|
2
2
|
import { DEFAULT_CONTEXT_REQUEST } from "./context-request.js";
|
|
3
3
|
import { selectRelevantEntries } from "./relevance-selector.js";
|
|
4
4
|
import { formatEntryForContext, formatSectionBody } from "./ai-context-formatter.js";
|
|
5
|
-
import {
|
|
5
|
+
import { estimateTokens } from "./token-estimator.js";
|
|
6
6
|
export function buildContext(allEntries, request) {
|
|
7
7
|
const r = { ...DEFAULT_CONTEXT_REQUEST, ...request };
|
|
8
8
|
const maxTokens = r.maxTokens ?? DEFAULT_CONTEXT_REQUEST.maxTokens;
|
|
@@ -43,7 +43,7 @@ export function buildContext(allEntries, request) {
|
|
|
43
43
|
const omitted = [];
|
|
44
44
|
let used = 0;
|
|
45
45
|
function tryAddSection(title, body, entryIds) {
|
|
46
|
-
const tokens =
|
|
46
|
+
const tokens = estimateTokens(body);
|
|
47
47
|
if (used + tokens > maxTokens && sections.length > 0) {
|
|
48
48
|
omitted.push(title);
|
|
49
49
|
return;
|
|
@@ -52,7 +52,7 @@ export function buildContext(allEntries, request) {
|
|
|
52
52
|
// Still emit, but mark truncated.
|
|
53
53
|
const ratio = (maxTokens - used) / tokens;
|
|
54
54
|
const truncatedBody = body.slice(0, Math.max(0, Math.floor(body.length * ratio))) + '\n…[truncated]';
|
|
55
|
-
const truncTokens =
|
|
55
|
+
const truncTokens = estimateTokens(truncatedBody);
|
|
56
56
|
sections.push({ title, body: truncatedBody, entryIds, tokens: truncTokens, truncated: true });
|
|
57
57
|
used += truncTokens;
|
|
58
58
|
return;
|
package/dist/index.d.ts
CHANGED
|
@@ -3,7 +3,6 @@ export * from './context-result.js';
|
|
|
3
3
|
export * from './context-section.js';
|
|
4
4
|
export * from './context-builder.js';
|
|
5
5
|
export * from './token-estimator.js';
|
|
6
|
-
export * from './tokenizer.js';
|
|
7
6
|
export * from './relevance-selector.js';
|
|
8
7
|
export * from './ai-context-formatter.js';
|
|
9
8
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,sBAAsB,CAAC;AACrC,cAAc,qBAAqB,CAAC;AACpC,cAAc,sBAAsB,CAAC;AACrC,cAAc,sBAAsB,CAAC;AACrC,cAAc,sBAAsB,CAAC;AACrC,cAAc,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,sBAAsB,CAAC;AACrC,cAAc,qBAAqB,CAAC;AACpC,cAAc,sBAAsB,CAAC;AACrC,cAAc,sBAAsB,CAAC;AACrC,cAAc,sBAAsB,CAAC;AACrC,cAAc,yBAAyB,CAAC;AACxC,cAAc,2BAA2B,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -3,6 +3,5 @@ export * from "./context-result.js";
|
|
|
3
3
|
export * from "./context-section.js";
|
|
4
4
|
export * from "./context-builder.js";
|
|
5
5
|
export * from "./token-estimator.js";
|
|
6
|
-
export * from "./tokenizer.js";
|
|
7
6
|
export * from "./relevance-selector.js";
|
|
8
7
|
export * from "./ai-context-formatter.js";
|
package/package.json
CHANGED
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@shrkcrft/context",
|
|
3
|
-
"version": "0.1.0-alpha.
|
|
3
|
+
"version": "0.1.0-alpha.11",
|
|
4
4
|
"description": "SharkCraft AI context builder: token-budgeted relevance retrieval for tasks.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "SharkCraft contributors",
|
|
7
7
|
"type": "module",
|
|
8
8
|
"main": "./dist/index.js",
|
|
9
|
-
"types": "./dist/index.d.ts",
|
|
9
|
+
"types": "./dist/index.d.d.ts",
|
|
10
10
|
"exports": {
|
|
11
11
|
".": {
|
|
12
12
|
"types": "./dist/index.d.ts",
|
|
13
|
+
"bun": "./src/index.ts",
|
|
13
14
|
"import": "./dist/index.js",
|
|
14
15
|
"default": "./dist/index.js"
|
|
15
16
|
}
|
|
@@ -21,12 +22,12 @@
|
|
|
21
22
|
],
|
|
22
23
|
"repository": {
|
|
23
24
|
"type": "git",
|
|
24
|
-
"url": "git+https://github.com/
|
|
25
|
+
"url": "git+https://github.com/shrkcrft/sharkcraft.git",
|
|
25
26
|
"directory": "packages/context"
|
|
26
27
|
},
|
|
27
|
-
"homepage": "https://github.com/
|
|
28
|
+
"homepage": "https://github.com/shrkcrft/sharkcraft",
|
|
28
29
|
"bugs": {
|
|
29
|
-
"url": "https://github.com/
|
|
30
|
+
"url": "https://github.com/shrkcrft/sharkcraft/issues"
|
|
30
31
|
},
|
|
31
32
|
"keywords": [
|
|
32
33
|
"sharkcraft",
|
|
@@ -43,12 +44,11 @@
|
|
|
43
44
|
"typecheck": "tsc --noEmit -p tsconfig.json"
|
|
44
45
|
},
|
|
45
46
|
"dependencies": {
|
|
46
|
-
"@shrkcrft/core": "^0.1.0-alpha.
|
|
47
|
-
"@shrkcrft/knowledge": "^0.1.0-alpha.
|
|
48
|
-
"@shrkcrft/
|
|
49
|
-
"@shrkcrft/
|
|
50
|
-
"@shrkcrft/templates": "^0.1.0-alpha.
|
|
51
|
-
"gpt-tokenizer": "^3.4.0"
|
|
47
|
+
"@shrkcrft/core": "^0.1.0-alpha.11",
|
|
48
|
+
"@shrkcrft/knowledge": "^0.1.0-alpha.11",
|
|
49
|
+
"@shrkcrft/rules": "^0.1.0-alpha.11",
|
|
50
|
+
"@shrkcrft/paths": "^0.1.0-alpha.11",
|
|
51
|
+
"@shrkcrft/templates": "^0.1.0-alpha.11"
|
|
52
52
|
},
|
|
53
53
|
"publishConfig": {
|
|
54
54
|
"access": "public"
|
package/dist/tokenizer.d.ts
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
export interface ITokenizer {
|
|
2
|
-
/** Human-readable name, surfaced in diagnostics. */
|
|
3
|
-
readonly name: string;
|
|
4
|
-
/** Total token count for `text`. Must be deterministic for a given input. */
|
|
5
|
-
countTokens(text: string): number;
|
|
6
|
-
}
|
|
7
|
-
export declare function getTokenizer(): ITokenizer;
|
|
8
|
-
export declare function setTokenizer(tokenizer: ITokenizer): void;
|
|
9
|
-
export declare function resetTokenizer(): void;
|
|
10
|
-
export declare function countTokens(text: string): number;
|
|
11
|
-
export interface ITokenizerInitOptions {
|
|
12
|
-
/**
|
|
13
|
-
* Which BPE encoding to load. Defaults to `cl100k_base` (GPT-4 / GPT-3.5);
|
|
14
|
-
* pass `o200k_base` for GPT-4o-class models.
|
|
15
|
-
*/
|
|
16
|
-
encoding?: 'cl100k_base' | 'o200k_base';
|
|
17
|
-
/**
|
|
18
|
-
* If true, throw on load failure instead of falling back silently. Useful
|
|
19
|
-
* in tests; not recommended in production paths.
|
|
20
|
-
*/
|
|
21
|
-
strict?: boolean;
|
|
22
|
-
}
|
|
23
|
-
/**
|
|
24
|
-
* Asynchronously upgrade the active tokenizer from the estimator to a real
|
|
25
|
-
* BPE encoder. Resolves to `true` if the swap happened; resolves to `false`
|
|
26
|
-
* (or rejects, when `strict`) if the tokenizer module could not be loaded.
|
|
27
|
-
*/
|
|
28
|
-
export declare function initTokenizer(options?: ITokenizerInitOptions): Promise<boolean>;
|
|
29
|
-
//# sourceMappingURL=tokenizer.d.ts.map
|
package/dist/tokenizer.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../src/tokenizer.ts"],"names":[],"mappings":"AAiBA,MAAM,WAAW,UAAU;IACzB,oDAAoD;IACpD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,6EAA6E;IAC7E,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAAC;CACnC;AAyBD,wBAAgB,YAAY,IAAI,UAAU,CAEzC;AAED,wBAAgB,YAAY,CAAC,SAAS,EAAE,UAAU,GAAG,IAAI,CAExD;AAED,wBAAgB,cAAc,IAAI,IAAI,CAErC;AAED,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEhD;AAED,MAAM,WAAW,qBAAqB;IACpC;;;OAGG;IACH,QAAQ,CAAC,EAAE,aAAa,GAAG,YAAY,CAAC;IACxC;;;OAGG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED;;;;GAIG;AACH,wBAAsB,aAAa,CAAC,OAAO,GAAE,qBAA0B,GAAG,OAAO,CAAC,OAAO,CAAC,CAgBzF"}
|
package/dist/tokenizer.js
DELETED
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
var __rewriteRelativeImportExtension = (this && this.__rewriteRelativeImportExtension) || function (path, preserveJsx) {
|
|
2
|
-
if (typeof path === "string" && /^\.\.?\//.test(path)) {
|
|
3
|
-
return path.replace(/\.(tsx)$|((?:\.d)?)((?:\.[^./]+?)?)\.([cm]?)ts$/i, function (m, tsx, d, ext, cm) {
|
|
4
|
-
return tsx ? preserveJsx ? ".jsx" : ".js" : d && (!ext || !cm) ? m : (d + ext + "." + cm.toLowerCase() + "js");
|
|
5
|
-
});
|
|
6
|
-
}
|
|
7
|
-
return path;
|
|
8
|
-
};
|
|
9
|
-
/**
|
|
10
|
-
* Token counting abstraction.
|
|
11
|
-
*
|
|
12
|
-
* The context builder needs to fit content under a budget — wrong counts mean
|
|
13
|
-
* either truncated context (under-counted) or wasted budget (over-counted).
|
|
14
|
-
* Real BPE tokenizers are accurate; the legacy `estimateTokens` is a 4-char
|
|
15
|
-
* heuristic kept as a fallback so the package still loads in environments
|
|
16
|
-
* where the tokenizer can't be imported.
|
|
17
|
-
*
|
|
18
|
-
* Default flow:
|
|
19
|
-
* 1. On import, the estimator is the active tokenizer (always works).
|
|
20
|
-
* 2. Call `await initTokenizer()` once at startup to swap in the real
|
|
21
|
-
* tokenizer. If that fails for any reason, the estimator stays active.
|
|
22
|
-
* 3. `countTokens(text)` always returns a number, synchronously.
|
|
23
|
-
*/
|
|
24
|
-
import { estimateTokens } from "./token-estimator.js";
|
|
25
|
-
class EstimatorTokenizer {
|
|
26
|
-
name = 'estimator';
|
|
27
|
-
countTokens(text) {
|
|
28
|
-
return estimateTokens(text);
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
class GptTokenizer {
|
|
32
|
-
encode;
|
|
33
|
-
name;
|
|
34
|
-
constructor(name, encode) {
|
|
35
|
-
this.encode = encode;
|
|
36
|
-
this.name = name;
|
|
37
|
-
}
|
|
38
|
-
countTokens(text) {
|
|
39
|
-
if (!text)
|
|
40
|
-
return 0;
|
|
41
|
-
return this.encode(text).length;
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
let activeTokenizer = new EstimatorTokenizer();
|
|
45
|
-
export function getTokenizer() {
|
|
46
|
-
return activeTokenizer;
|
|
47
|
-
}
|
|
48
|
-
export function setTokenizer(tokenizer) {
|
|
49
|
-
activeTokenizer = tokenizer;
|
|
50
|
-
}
|
|
51
|
-
export function resetTokenizer() {
|
|
52
|
-
activeTokenizer = new EstimatorTokenizer();
|
|
53
|
-
}
|
|
54
|
-
export function countTokens(text) {
|
|
55
|
-
return activeTokenizer.countTokens(text);
|
|
56
|
-
}
|
|
57
|
-
/**
|
|
58
|
-
* Asynchronously upgrade the active tokenizer from the estimator to a real
|
|
59
|
-
* BPE encoder. Resolves to `true` if the swap happened; resolves to `false`
|
|
60
|
-
* (or rejects, when `strict`) if the tokenizer module could not be loaded.
|
|
61
|
-
*/
|
|
62
|
-
export async function initTokenizer(options = {}) {
|
|
63
|
-
const encoding = options.encoding ?? 'cl100k_base';
|
|
64
|
-
try {
|
|
65
|
-
const moduleName = encoding === 'o200k_base' ? 'gpt-tokenizer/encoding/o200k_base' : 'gpt-tokenizer/encoding/cl100k_base';
|
|
66
|
-
const mod = (await import(__rewriteRelativeImportExtension(moduleName)));
|
|
67
|
-
if (typeof mod.encode !== 'function') {
|
|
68
|
-
if (options.strict)
|
|
69
|
-
throw new Error(`gpt-tokenizer module ${moduleName} missing encode()`);
|
|
70
|
-
return false;
|
|
71
|
-
}
|
|
72
|
-
setTokenizer(new GptTokenizer(`gpt-tokenizer:${encoding}`, mod.encode));
|
|
73
|
-
return true;
|
|
74
|
-
}
|
|
75
|
-
catch (err) {
|
|
76
|
-
if (options.strict)
|
|
77
|
-
throw err;
|
|
78
|
-
return false;
|
|
79
|
-
}
|
|
80
|
-
}
|