@shrkcrft/context 0.1.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +15 -0
- package/dist/ai-context-formatter.d.ts +8 -0
- package/dist/ai-context-formatter.d.ts.map +1 -0
- package/dist/ai-context-formatter.js +37 -0
- package/dist/context-builder.d.ts +7 -0
- package/dist/context-builder.d.ts.map +1 -0
- package/dist/context-builder.js +97 -0
- package/dist/context-request.d.ts +20 -0
- package/dist/context-request.d.ts.map +1 -0
- package/dist/context-request.js +11 -0
- package/dist/context-result.d.ts +12 -0
- package/dist/context-result.d.ts.map +1 -0
- package/dist/context-result.js +1 -0
- package/dist/context-section.d.ts +12 -0
- package/dist/context-section.d.ts.map +1 -0
- package/dist/context-section.js +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +8 -0
- package/dist/relevance-selector.d.ts +17 -0
- package/dist/relevance-selector.d.ts.map +1 -0
- package/dist/relevance-selector.js +54 -0
- package/dist/token-estimator.d.ts +7 -0
- package/dist/token-estimator.d.ts.map +1 -0
- package/dist/token-estimator.js +15 -0
- package/dist/tokenizer.d.ts +29 -0
- package/dist/tokenizer.d.ts.map +1 -0
- package/dist/tokenizer.js +80 -0
- package/package.json +56 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 SharkCraft contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# @shrkcrft/context
|
|
2
|
+
|
|
3
|
+
SharkCraft AI context builder: token-budgeted relevance retrieval for tasks.
|
|
4
|
+
|
|
5
|
+
Part of [SharkCraft](https://github.com/sharkcraft/sharkcraft) — a deterministic, local-first toolkit that gives AI coding agents durable project context. See the main repo for documentation, examples, and the `shrk` CLI.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
bun add @shrkcrft/context
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## License
|
|
14
|
+
|
|
15
|
+
MIT — see [LICENSE](./LICENSE).
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { IKnowledgeEntry } from '@shrkcrft/knowledge';
|
|
2
|
+
export interface FormatEntryOptions {
|
|
3
|
+
includeExamples?: boolean;
|
|
4
|
+
maxContentChars?: number;
|
|
5
|
+
}
|
|
6
|
+
export declare function formatEntryForContext(entry: IKnowledgeEntry, options?: FormatEntryOptions): string;
|
|
7
|
+
export declare function formatSectionBody(entries: readonly IKnowledgeEntry[], options?: FormatEntryOptions): string;
|
|
8
|
+
//# sourceMappingURL=ai-context-formatter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-context-formatter.d.ts","sourceRoot":"","sources":["../src/ai-context-formatter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAE3D,MAAM,WAAW,kBAAkB;IACjC,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,eAAe,EACtB,OAAO,GAAE,kBAAuB,GAC/B,MAAM,CAoCR;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,SAAS,eAAe,EAAE,EAAE,OAAO,GAAE,kBAAuB,GAAG,MAAM,CAE/G"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
export function formatEntryForContext(entry, options = {}) {
|
|
2
|
+
const lines = [];
|
|
3
|
+
const meta = [
|
|
4
|
+
`id:${entry.id}`,
|
|
5
|
+
`priority:${entry.priority}`,
|
|
6
|
+
entry.scope.length ? `scope:[${entry.scope.join(',')}]` : '',
|
|
7
|
+
entry.tags.length ? `tags:[${entry.tags.join(',')}]` : '',
|
|
8
|
+
]
|
|
9
|
+
.filter(Boolean)
|
|
10
|
+
.join(' ');
|
|
11
|
+
lines.push(`### ${entry.title} (${meta})`);
|
|
12
|
+
if (entry.summary) {
|
|
13
|
+
lines.push(entry.summary.trim());
|
|
14
|
+
}
|
|
15
|
+
let content = entry.content.trim();
|
|
16
|
+
if (options.maxContentChars && content.length > options.maxContentChars) {
|
|
17
|
+
content = content.slice(0, options.maxContentChars) + '…';
|
|
18
|
+
}
|
|
19
|
+
lines.push(content);
|
|
20
|
+
if (options.includeExamples && entry.examples?.length) {
|
|
21
|
+
lines.push('Examples:');
|
|
22
|
+
for (const ex of entry.examples) {
|
|
23
|
+
if (ex.title)
|
|
24
|
+
lines.push(`- ${ex.title}`);
|
|
25
|
+
if (ex.code) {
|
|
26
|
+
const lang = ex.language ?? '';
|
|
27
|
+
lines.push('```' + lang);
|
|
28
|
+
lines.push(ex.code.trim());
|
|
29
|
+
lines.push('```');
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return lines.join('\n');
|
|
34
|
+
}
|
|
35
|
+
export function formatSectionBody(entries, options = {}) {
|
|
36
|
+
return entries.map((e) => formatEntryForContext(e, options)).join('\n\n');
|
|
37
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { IKnowledgeEntry } from '@shrkcrft/knowledge';
|
|
2
|
+
import { type IContextRequest } from './context-request.js';
|
|
3
|
+
import type { IContextResult } from './context-result.js';
|
|
4
|
+
import { formatEntryForContext } from './ai-context-formatter.js';
|
|
5
|
+
export declare function buildContext(allEntries: readonly IKnowledgeEntry[], request: IContextRequest): IContextResult;
|
|
6
|
+
export { formatEntryForContext };
|
|
7
|
+
//# sourceMappingURL=context-builder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"context-builder.d.ts","sourceRoot":"","sources":["../src/context-builder.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAE3D,OAAO,EAA2B,KAAK,eAAe,EAAE,MAAM,sBAAsB,CAAC;AACrF,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAG1D,OAAO,EAAE,qBAAqB,EAAqB,MAAM,2BAA2B,CAAC;AASrF,wBAAgB,YAAY,CAC1B,UAAU,EAAE,SAAS,eAAe,EAAE,EACtC,OAAO,EAAE,eAAe,GACvB,cAAc,CAiGhB;AAED,OAAO,EAAE,qBAAqB,EAAE,CAAC"}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import { aggregateActionHints, formatAggregatedHints } from '@shrkcrft/knowledge';
|
|
2
|
+
import { DEFAULT_CONTEXT_REQUEST } from "./context-request.js";
|
|
3
|
+
import { selectRelevantEntries } from "./relevance-selector.js";
|
|
4
|
+
import { formatEntryForContext, formatSectionBody } from "./ai-context-formatter.js";
|
|
5
|
+
import { countTokens } from "./tokenizer.js";
|
|
6
|
+
export function buildContext(allEntries, request) {
|
|
7
|
+
const r = { ...DEFAULT_CONTEXT_REQUEST, ...request };
|
|
8
|
+
const maxTokens = r.maxTokens ?? DEFAULT_CONTEXT_REQUEST.maxTokens;
|
|
9
|
+
const buckets = selectRelevantEntries(allEntries, r);
|
|
10
|
+
const sectionPlans = [];
|
|
11
|
+
if (r.includeOverview && r.projectOverview) {
|
|
12
|
+
sectionPlans.push({
|
|
13
|
+
title: 'Project Overview',
|
|
14
|
+
priority: 100,
|
|
15
|
+
entries: [],
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
if (r.includeWarnings && buckets.warnings.length) {
|
|
19
|
+
sectionPlans.push({ title: 'Important Warnings', priority: 95, entries: buckets.warnings });
|
|
20
|
+
}
|
|
21
|
+
if (r.includeRules) {
|
|
22
|
+
sectionPlans.push({ title: 'Relevant Rules', priority: 90, entries: buckets.rules });
|
|
23
|
+
}
|
|
24
|
+
sectionPlans.push({ title: 'Architecture Constraints', priority: 80, entries: buckets.architecture });
|
|
25
|
+
if (r.includePaths) {
|
|
26
|
+
sectionPlans.push({ title: 'Relevant Path Conventions', priority: 70, entries: buckets.paths });
|
|
27
|
+
}
|
|
28
|
+
if (r.includeTemplates) {
|
|
29
|
+
sectionPlans.push({ title: 'Relevant Templates', priority: 65, entries: buckets.templates });
|
|
30
|
+
}
|
|
31
|
+
sectionPlans.push({ title: 'Technical Stack', priority: 50, entries: buckets.technical });
|
|
32
|
+
sectionPlans.push({ title: 'Testing Guidelines', priority: 45, entries: buckets.testing });
|
|
33
|
+
sectionPlans.push({ title: 'Security Guidelines', priority: 44, entries: buckets.security });
|
|
34
|
+
if (r.includeCommands) {
|
|
35
|
+
sectionPlans.push({ title: 'Commands', priority: 40, entries: buckets.commands });
|
|
36
|
+
}
|
|
37
|
+
sectionPlans.push({ title: 'Current Tasks', priority: 30, entries: buckets.tasks });
|
|
38
|
+
if (r.includeDocs) {
|
|
39
|
+
sectionPlans.push({ title: 'Reference Docs', priority: 10, entries: buckets.docs });
|
|
40
|
+
}
|
|
41
|
+
sectionPlans.sort((a, b) => b.priority - a.priority);
|
|
42
|
+
const sections = [];
|
|
43
|
+
const omitted = [];
|
|
44
|
+
let used = 0;
|
|
45
|
+
function tryAddSection(title, body, entryIds) {
|
|
46
|
+
const tokens = countTokens(body);
|
|
47
|
+
if (used + tokens > maxTokens && sections.length > 0) {
|
|
48
|
+
omitted.push(title);
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
if (used + tokens > maxTokens) {
|
|
52
|
+
// Still emit, but mark truncated.
|
|
53
|
+
const ratio = (maxTokens - used) / tokens;
|
|
54
|
+
const truncatedBody = body.slice(0, Math.max(0, Math.floor(body.length * ratio))) + '\n…[truncated]';
|
|
55
|
+
const truncTokens = countTokens(truncatedBody);
|
|
56
|
+
sections.push({ title, body: truncatedBody, entryIds, tokens: truncTokens, truncated: true });
|
|
57
|
+
used += truncTokens;
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
sections.push({ title, body, entryIds, tokens });
|
|
61
|
+
used += tokens;
|
|
62
|
+
}
|
|
63
|
+
for (const plan of sectionPlans) {
|
|
64
|
+
if (plan.title === 'Project Overview' && r.projectOverview) {
|
|
65
|
+
tryAddSection('Project Overview', r.projectOverview.trim(), []);
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
if (plan.entries.length === 0)
|
|
69
|
+
continue;
|
|
70
|
+
const body = formatSectionBody(plan.entries, {
|
|
71
|
+
includeExamples: r.includeExamples,
|
|
72
|
+
maxContentChars: 1500,
|
|
73
|
+
});
|
|
74
|
+
const ids = plan.entries.map((e) => e.id);
|
|
75
|
+
tryAddSection(plan.title, body, ids);
|
|
76
|
+
}
|
|
77
|
+
// Action hints: aggregate from every included entry and emit a single
|
|
78
|
+
// composite "Agent Actions" section. Skipped when no entry contributes.
|
|
79
|
+
const allIncludedEntries = sectionPlans.flatMap((p) => p.entries);
|
|
80
|
+
const aggregated = aggregateActionHints(allIncludedEntries);
|
|
81
|
+
const hintsBody = formatAggregatedHints(aggregated, { level: '###', compact: true });
|
|
82
|
+
if (hintsBody && hintsBody.length > 0) {
|
|
83
|
+
tryAddSection('Agent Actions', hintsBody, aggregated.contributingEntries);
|
|
84
|
+
}
|
|
85
|
+
const fullBody = sections
|
|
86
|
+
.map((s) => `## ${s.title}${s.truncated ? ' (truncated)' : ''}\n\n${s.body}`)
|
|
87
|
+
.join('\n\n');
|
|
88
|
+
return {
|
|
89
|
+
request: r,
|
|
90
|
+
sections,
|
|
91
|
+
totalTokens: used,
|
|
92
|
+
maxTokens,
|
|
93
|
+
omittedSections: omitted,
|
|
94
|
+
body: fullBody,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
export { formatEntryForContext };
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export interface IContextRequest {
|
|
2
|
+
task: string;
|
|
3
|
+
framework?: string;
|
|
4
|
+
area?: string;
|
|
5
|
+
tags?: readonly string[];
|
|
6
|
+
scope?: readonly string[];
|
|
7
|
+
appliesWhen?: readonly string[];
|
|
8
|
+
maxTokens?: number;
|
|
9
|
+
includeExamples?: boolean;
|
|
10
|
+
includeTemplates?: boolean;
|
|
11
|
+
includeRules?: boolean;
|
|
12
|
+
includePaths?: boolean;
|
|
13
|
+
includeDocs?: boolean;
|
|
14
|
+
includeOverview?: boolean;
|
|
15
|
+
includeWarnings?: boolean;
|
|
16
|
+
includeCommands?: boolean;
|
|
17
|
+
projectOverview?: string;
|
|
18
|
+
}
|
|
19
|
+
export declare const DEFAULT_CONTEXT_REQUEST: Required<Pick<IContextRequest, 'maxTokens' | 'includeExamples' | 'includeTemplates' | 'includeRules' | 'includePaths' | 'includeDocs' | 'includeOverview' | 'includeWarnings' | 'includeCommands'>>;
|
|
20
|
+
//# sourceMappingURL=context-request.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"context-request.d.ts","sourceRoot":"","sources":["../src/context-request.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IAC1B,WAAW,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,eAAO,MAAM,uBAAuB,EAAE,QAAQ,CAC5C,IAAI,CACF,eAAe,EACb,WAAW,GACX,iBAAiB,GACjB,kBAAkB,GAClB,cAAc,GACd,cAAc,GACd,aAAa,GACb,iBAAiB,GACjB,iBAAiB,GACjB,iBAAiB,CACpB,CAWF,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export const DEFAULT_CONTEXT_REQUEST = {
|
|
2
|
+
maxTokens: 4000,
|
|
3
|
+
includeExamples: true,
|
|
4
|
+
includeTemplates: true,
|
|
5
|
+
includeRules: true,
|
|
6
|
+
includePaths: true,
|
|
7
|
+
includeDocs: false,
|
|
8
|
+
includeOverview: true,
|
|
9
|
+
includeWarnings: true,
|
|
10
|
+
includeCommands: false,
|
|
11
|
+
};
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { IContextSection } from './context-section.js';
|
|
2
|
+
import type { IContextRequest } from './context-request.js';
|
|
3
|
+
export interface IContextResult {
|
|
4
|
+
request: IContextRequest;
|
|
5
|
+
sections: readonly IContextSection[];
|
|
6
|
+
totalTokens: number;
|
|
7
|
+
maxTokens: number;
|
|
8
|
+
omittedSections: readonly string[];
|
|
9
|
+
/** Combined render of all sections (in order). */
|
|
10
|
+
body: string;
|
|
11
|
+
}
|
|
12
|
+
//# sourceMappingURL=context-result.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"context-result.d.ts","sourceRoot":"","sources":["../src/context-result.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAE5D,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,eAAe,CAAC;IACzB,QAAQ,EAAE,SAAS,eAAe,EAAE,CAAC;IACrC,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,SAAS,MAAM,EAAE,CAAC;IACnC,kDAAkD;IAClD,IAAI,EAAE,MAAM,CAAC;CACd"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export interface IContextSection {
|
|
2
|
+
title: string;
|
|
3
|
+
/** Entry IDs included (for traceability). */
|
|
4
|
+
entryIds: readonly string[];
|
|
5
|
+
/** Compact, AI-ready body. */
|
|
6
|
+
body: string;
|
|
7
|
+
/** Approximate token cost. */
|
|
8
|
+
tokens: number;
|
|
9
|
+
/** Marker if section was truncated due to budget. */
|
|
10
|
+
truncated?: boolean;
|
|
11
|
+
}
|
|
12
|
+
//# sourceMappingURL=context-section.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"context-section.d.ts","sourceRoot":"","sources":["../src/context-section.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,6CAA6C;IAC7C,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;IAC5B,8BAA8B;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,8BAA8B;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,qDAAqD;IACrD,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export * from './context-request.js';
|
|
2
|
+
export * from './context-result.js';
|
|
3
|
+
export * from './context-section.js';
|
|
4
|
+
export * from './context-builder.js';
|
|
5
|
+
export * from './token-estimator.js';
|
|
6
|
+
export * from './tokenizer.js';
|
|
7
|
+
export * from './relevance-selector.js';
|
|
8
|
+
export * from './ai-context-formatter.js';
|
|
9
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,sBAAsB,CAAC;AACrC,cAAc,qBAAqB,CAAC;AACpC,cAAc,sBAAsB,CAAC;AACrC,cAAc,sBAAsB,CAAC;AACrC,cAAc,sBAAsB,CAAC;AACrC,cAAc,gBAAgB,CAAC;AAC/B,cAAc,yBAAyB,CAAC;AACxC,cAAc,2BAA2B,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export * from "./context-request.js";
|
|
2
|
+
export * from "./context-result.js";
|
|
3
|
+
export * from "./context-section.js";
|
|
4
|
+
export * from "./context-builder.js";
|
|
5
|
+
export * from "./token-estimator.js";
|
|
6
|
+
export * from "./tokenizer.js";
|
|
7
|
+
export * from "./relevance-selector.js";
|
|
8
|
+
export * from "./ai-context-formatter.js";
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { IKnowledgeEntry } from '@shrkcrft/knowledge';
|
|
2
|
+
import type { IContextRequest } from './context-request.js';
|
|
3
|
+
export interface SelectedEntries {
|
|
4
|
+
rules: IKnowledgeEntry[];
|
|
5
|
+
paths: IKnowledgeEntry[];
|
|
6
|
+
templates: IKnowledgeEntry[];
|
|
7
|
+
architecture: IKnowledgeEntry[];
|
|
8
|
+
technical: IKnowledgeEntry[];
|
|
9
|
+
warnings: IKnowledgeEntry[];
|
|
10
|
+
commands: IKnowledgeEntry[];
|
|
11
|
+
testing: IKnowledgeEntry[];
|
|
12
|
+
security: IKnowledgeEntry[];
|
|
13
|
+
docs: IKnowledgeEntry[];
|
|
14
|
+
tasks: IKnowledgeEntry[];
|
|
15
|
+
}
|
|
16
|
+
export declare function selectRelevantEntries(allEntries: readonly IKnowledgeEntry[], request: IContextRequest, limitPerSection?: number): SelectedEntries;
|
|
17
|
+
//# sourceMappingURL=relevance-selector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"relevance-selector.d.ts","sourceRoot":"","sources":["../src/relevance-selector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAE3D,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAE5D,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,eAAe,EAAE,CAAC;IACzB,KAAK,EAAE,eAAe,EAAE,CAAC;IACzB,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,YAAY,EAAE,eAAe,EAAE,CAAC;IAChC,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,QAAQ,EAAE,eAAe,EAAE,CAAC;IAC5B,QAAQ,EAAE,eAAe,EAAE,CAAC;IAC5B,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3B,QAAQ,EAAE,eAAe,EAAE,CAAC;IAC5B,IAAI,EAAE,eAAe,EAAE,CAAC;IACxB,KAAK,EAAE,eAAe,EAAE,CAAC;CAC1B;AAeD,wBAAgB,qBAAqB,CACnC,UAAU,EAAE,SAAS,eAAe,EAAE,EACtC,OAAO,EAAE,eAAe,EACxB,eAAe,SAAI,GAClB,eAAe,CAuCjB"}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { KnowledgeIndex } from '@shrkcrft/knowledge';
|
|
2
|
+
const TYPE_BUCKETS = {
|
|
3
|
+
rule: 'rules',
|
|
4
|
+
path: 'paths',
|
|
5
|
+
template: 'templates',
|
|
6
|
+
architecture: 'architecture',
|
|
7
|
+
technical: 'technical',
|
|
8
|
+
warning: 'warnings',
|
|
9
|
+
command: 'commands',
|
|
10
|
+
testing: 'testing',
|
|
11
|
+
security: 'security',
|
|
12
|
+
task: 'tasks',
|
|
13
|
+
};
|
|
14
|
+
export function selectRelevantEntries(allEntries, request, limitPerSection = 5) {
|
|
15
|
+
const index = new KnowledgeIndex(allEntries);
|
|
16
|
+
const tags = [...(request.tags ?? [])];
|
|
17
|
+
const scope = [...(request.scope ?? [])];
|
|
18
|
+
if (request.framework)
|
|
19
|
+
scope.push(request.framework);
|
|
20
|
+
if (request.area)
|
|
21
|
+
scope.push(request.area);
|
|
22
|
+
const searchAll = index.search({
|
|
23
|
+
query: request.task,
|
|
24
|
+
scope,
|
|
25
|
+
tags,
|
|
26
|
+
appliesWhen: request.appliesWhen,
|
|
27
|
+
});
|
|
28
|
+
const buckets = {
|
|
29
|
+
rules: [],
|
|
30
|
+
paths: [],
|
|
31
|
+
templates: [],
|
|
32
|
+
architecture: [],
|
|
33
|
+
technical: [],
|
|
34
|
+
warnings: [],
|
|
35
|
+
commands: [],
|
|
36
|
+
testing: [],
|
|
37
|
+
security: [],
|
|
38
|
+
docs: [],
|
|
39
|
+
tasks: [],
|
|
40
|
+
};
|
|
41
|
+
for (const r of searchAll) {
|
|
42
|
+
const typeKey = String(r.entry.type).toLowerCase();
|
|
43
|
+
const bucketKey = TYPE_BUCKETS[typeKey];
|
|
44
|
+
if (bucketKey) {
|
|
45
|
+
if (buckets[bucketKey].length < limitPerSection)
|
|
46
|
+
buckets[bucketKey].push(r.entry);
|
|
47
|
+
}
|
|
48
|
+
else if (request.includeDocs) {
|
|
49
|
+
if (buckets.docs.length < limitPerSection)
|
|
50
|
+
buckets.docs.push(r.entry);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return buckets;
|
|
54
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Approximate token count. Average English token ≈ 4 chars.
|
|
3
|
+
* Good enough for a v1 budget gate; not a substitute for a real tokenizer.
|
|
4
|
+
*/
|
|
5
|
+
export declare function estimateTokens(text: string): number;
|
|
6
|
+
export declare function fitsBudget(currentTokens: number, addTokens: number, max: number): boolean;
|
|
7
|
+
//# sourceMappingURL=token-estimator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"token-estimator.d.ts","sourceRoot":"","sources":["../src/token-estimator.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMnD;AAED,wBAAgB,UAAU,CAAC,aAAa,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAEzF"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Approximate token count. Average English token ≈ 4 chars.
|
|
3
|
+
* Good enough for a v1 budget gate; not a substitute for a real tokenizer.
|
|
4
|
+
*/
|
|
5
|
+
export function estimateTokens(text) {
|
|
6
|
+
if (!text)
|
|
7
|
+
return 0;
|
|
8
|
+
// Use a slightly conservative ratio so we stay under budget.
|
|
9
|
+
const chars = text.length;
|
|
10
|
+
const words = text.trim().split(/\s+/).length;
|
|
11
|
+
return Math.max(Math.ceil(chars / 4), Math.ceil(words * 1.3));
|
|
12
|
+
}
|
|
13
|
+
export function fitsBudget(currentTokens, addTokens, max) {
|
|
14
|
+
return currentTokens + addTokens <= max;
|
|
15
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
export interface ITokenizer {
|
|
2
|
+
/** Human-readable name, surfaced in diagnostics. */
|
|
3
|
+
readonly name: string;
|
|
4
|
+
/** Total token count for `text`. Must be deterministic for a given input. */
|
|
5
|
+
countTokens(text: string): number;
|
|
6
|
+
}
|
|
7
|
+
export declare function getTokenizer(): ITokenizer;
|
|
8
|
+
export declare function setTokenizer(tokenizer: ITokenizer): void;
|
|
9
|
+
export declare function resetTokenizer(): void;
|
|
10
|
+
export declare function countTokens(text: string): number;
|
|
11
|
+
export interface ITokenizerInitOptions {
|
|
12
|
+
/**
|
|
13
|
+
* Which BPE encoding to load. Defaults to `cl100k_base` (GPT-4 / GPT-3.5);
|
|
14
|
+
* pass `o200k_base` for GPT-4o-class models.
|
|
15
|
+
*/
|
|
16
|
+
encoding?: 'cl100k_base' | 'o200k_base';
|
|
17
|
+
/**
|
|
18
|
+
* If true, throw on load failure instead of falling back silently. Useful
|
|
19
|
+
* in tests; not recommended in production paths.
|
|
20
|
+
*/
|
|
21
|
+
strict?: boolean;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Asynchronously upgrade the active tokenizer from the estimator to a real
|
|
25
|
+
* BPE encoder. Resolves to `true` if the swap happened; resolves to `false`
|
|
26
|
+
* (or rejects, when `strict`) if the tokenizer module could not be loaded.
|
|
27
|
+
*/
|
|
28
|
+
export declare function initTokenizer(options?: ITokenizerInitOptions): Promise<boolean>;
|
|
29
|
+
//# sourceMappingURL=tokenizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../src/tokenizer.ts"],"names":[],"mappings":"AAiBA,MAAM,WAAW,UAAU;IACzB,oDAAoD;IACpD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,6EAA6E;IAC7E,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAAC;CACnC;AAyBD,wBAAgB,YAAY,IAAI,UAAU,CAEzC;AAED,wBAAgB,YAAY,CAAC,SAAS,EAAE,UAAU,GAAG,IAAI,CAExD;AAED,wBAAgB,cAAc,IAAI,IAAI,CAErC;AAED,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEhD;AAED,MAAM,WAAW,qBAAqB;IACpC;;;OAGG;IACH,QAAQ,CAAC,EAAE,aAAa,GAAG,YAAY,CAAC;IACxC;;;OAGG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED;;;;GAIG;AACH,wBAAsB,aAAa,CAAC,OAAO,GAAE,qBAA0B,GAAG,OAAO,CAAC,OAAO,CAAC,CAgBzF"}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
var __rewriteRelativeImportExtension = (this && this.__rewriteRelativeImportExtension) || function (path, preserveJsx) {
|
|
2
|
+
if (typeof path === "string" && /^\.\.?\//.test(path)) {
|
|
3
|
+
return path.replace(/\.(tsx)$|((?:\.d)?)((?:\.[^./]+?)?)\.([cm]?)ts$/i, function (m, tsx, d, ext, cm) {
|
|
4
|
+
return tsx ? preserveJsx ? ".jsx" : ".js" : d && (!ext || !cm) ? m : (d + ext + "." + cm.toLowerCase() + "js");
|
|
5
|
+
});
|
|
6
|
+
}
|
|
7
|
+
return path;
|
|
8
|
+
};
|
|
9
|
+
/**
|
|
10
|
+
* Token counting abstraction.
|
|
11
|
+
*
|
|
12
|
+
* The context builder needs to fit content under a budget — wrong counts mean
|
|
13
|
+
* either truncated context (under-counted) or wasted budget (over-counted).
|
|
14
|
+
* Real BPE tokenizers are accurate; the legacy `estimateTokens` is a 4-char
|
|
15
|
+
* heuristic kept as a fallback so the package still loads in environments
|
|
16
|
+
* where the tokenizer can't be imported.
|
|
17
|
+
*
|
|
18
|
+
* Default flow:
|
|
19
|
+
* 1. On import, the estimator is the active tokenizer (always works).
|
|
20
|
+
* 2. Call `await initTokenizer()` once at startup to swap in the real
|
|
21
|
+
* tokenizer. If that fails for any reason, the estimator stays active.
|
|
22
|
+
* 3. `countTokens(text)` always returns a number, synchronously.
|
|
23
|
+
*/
|
|
24
|
+
import { estimateTokens } from "./token-estimator.js";
|
|
25
|
+
class EstimatorTokenizer {
|
|
26
|
+
name = 'estimator';
|
|
27
|
+
countTokens(text) {
|
|
28
|
+
return estimateTokens(text);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
class GptTokenizer {
|
|
32
|
+
encode;
|
|
33
|
+
name;
|
|
34
|
+
constructor(name, encode) {
|
|
35
|
+
this.encode = encode;
|
|
36
|
+
this.name = name;
|
|
37
|
+
}
|
|
38
|
+
countTokens(text) {
|
|
39
|
+
if (!text)
|
|
40
|
+
return 0;
|
|
41
|
+
return this.encode(text).length;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
let activeTokenizer = new EstimatorTokenizer();
|
|
45
|
+
export function getTokenizer() {
|
|
46
|
+
return activeTokenizer;
|
|
47
|
+
}
|
|
48
|
+
export function setTokenizer(tokenizer) {
|
|
49
|
+
activeTokenizer = tokenizer;
|
|
50
|
+
}
|
|
51
|
+
export function resetTokenizer() {
|
|
52
|
+
activeTokenizer = new EstimatorTokenizer();
|
|
53
|
+
}
|
|
54
|
+
export function countTokens(text) {
|
|
55
|
+
return activeTokenizer.countTokens(text);
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Asynchronously upgrade the active tokenizer from the estimator to a real
|
|
59
|
+
* BPE encoder. Resolves to `true` if the swap happened; resolves to `false`
|
|
60
|
+
* (or rejects, when `strict`) if the tokenizer module could not be loaded.
|
|
61
|
+
*/
|
|
62
|
+
export async function initTokenizer(options = {}) {
|
|
63
|
+
const encoding = options.encoding ?? 'cl100k_base';
|
|
64
|
+
try {
|
|
65
|
+
const moduleName = encoding === 'o200k_base' ? 'gpt-tokenizer/encoding/o200k_base' : 'gpt-tokenizer/encoding/cl100k_base';
|
|
66
|
+
const mod = (await import(__rewriteRelativeImportExtension(moduleName)));
|
|
67
|
+
if (typeof mod.encode !== 'function') {
|
|
68
|
+
if (options.strict)
|
|
69
|
+
throw new Error(`gpt-tokenizer module ${moduleName} missing encode()`);
|
|
70
|
+
return false;
|
|
71
|
+
}
|
|
72
|
+
setTokenizer(new GptTokenizer(`gpt-tokenizer:${encoding}`, mod.encode));
|
|
73
|
+
return true;
|
|
74
|
+
}
|
|
75
|
+
catch (err) {
|
|
76
|
+
if (options.strict)
|
|
77
|
+
throw err;
|
|
78
|
+
return false;
|
|
79
|
+
}
|
|
80
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@shrkcrft/context",
|
|
3
|
+
"version": "0.1.0-alpha.1",
|
|
4
|
+
"description": "SharkCraft AI context builder: token-budgeted relevance retrieval for tasks.",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"author": "SharkCraft contributors",
|
|
7
|
+
"type": "module",
|
|
8
|
+
"main": "./dist/index.js",
|
|
9
|
+
"types": "./dist/index.d.ts",
|
|
10
|
+
"exports": {
|
|
11
|
+
".": {
|
|
12
|
+
"types": "./dist/index.d.ts",
|
|
13
|
+
"import": "./dist/index.js",
|
|
14
|
+
"default": "./dist/index.js"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"files": [
|
|
18
|
+
"dist",
|
|
19
|
+
"README.md",
|
|
20
|
+
"LICENSE"
|
|
21
|
+
],
|
|
22
|
+
"repository": {
|
|
23
|
+
"type": "git",
|
|
24
|
+
"url": "git+https://github.com/sharkcraft/sharkcraft.git",
|
|
25
|
+
"directory": "packages/context"
|
|
26
|
+
},
|
|
27
|
+
"homepage": "https://github.com/sharkcraft/sharkcraft",
|
|
28
|
+
"bugs": {
|
|
29
|
+
"url": "https://github.com/sharkcraft/sharkcraft/issues"
|
|
30
|
+
},
|
|
31
|
+
"keywords": [
|
|
32
|
+
"sharkcraft",
|
|
33
|
+
"context",
|
|
34
|
+
"ai",
|
|
35
|
+
"token-budget",
|
|
36
|
+
"mcp"
|
|
37
|
+
],
|
|
38
|
+
"engines": {
|
|
39
|
+
"bun": ">=1.1.0",
|
|
40
|
+
"node": ">=18"
|
|
41
|
+
},
|
|
42
|
+
"scripts": {
|
|
43
|
+
"typecheck": "tsc --noEmit -p tsconfig.json"
|
|
44
|
+
},
|
|
45
|
+
"dependencies": {
|
|
46
|
+
"@shrkcrft/core": "^0.1.0-alpha.1",
|
|
47
|
+
"@shrkcrft/knowledge": "^0.1.0-alpha.1",
|
|
48
|
+
"@shrkcrft/paths": "^0.1.0-alpha.1",
|
|
49
|
+
"@shrkcrft/rules": "^0.1.0-alpha.1",
|
|
50
|
+
"@shrkcrft/templates": "^0.1.0-alpha.1",
|
|
51
|
+
"gpt-tokenizer": "^3.4.0"
|
|
52
|
+
},
|
|
53
|
+
"publishConfig": {
|
|
54
|
+
"access": "public"
|
|
55
|
+
}
|
|
56
|
+
}
|