chub-dev 0.2.0-beta.2 → 0.2.0-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/commands/annotate.js +83 -0
- package/src/commands/build.js +9 -0
- package/src/commands/get.js +64 -39
- package/src/index.js +14 -13
- package/src/lib/annotations.js +57 -0
- package/src/lib/bm25.js +170 -0
- package/src/lib/cache.js +14 -0
- package/src/lib/config.js +1 -1
- package/src/lib/registry.js +103 -20
- package/dist/anthropic/docs/sdk/javascript/DOC.md +0 -499
- package/dist/anthropic/docs/sdk/python/DOC.md +0 -382
- package/dist/openai/docs/chat/javascript/DOC.md +0 -350
- package/dist/openai/docs/chat/python/DOC.md +0 -526
- package/dist/pinecone/docs/sdk/javascript/DOC.md +0 -984
- package/dist/pinecone/docs/sdk/python/DOC.md +0 -1395
- package/dist/registry.json +0 -276
- package/dist/resend/docs/sdk/DOC.md +0 -1271
- package/dist/stripe/docs/api/DOC.md +0 -1726
- package/dist/supabase/docs/sdk/DOC.md +0 -1606
- package/dist/twilio/docs/sdk/python/DOC.md +0 -469
- package/dist/twilio/docs/sdk/typescript/DOC.md +0 -946
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "chub-dev",
|
|
3
|
-
"version": "0.2.0-beta.
|
|
3
|
+
"version": "0.2.0-beta.4",
|
|
4
4
|
"description": "CLI for Context Hub - search and retrieve LLM-optimized docs and skills",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -46,6 +46,6 @@
|
|
|
46
46
|
"yaml": "^2.3.0"
|
|
47
47
|
},
|
|
48
48
|
"devDependencies": {
|
|
49
|
-
"vitest": "^
|
|
49
|
+
"vitest": "^4.0.18"
|
|
50
50
|
}
|
|
51
51
|
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import chalk from 'chalk';
|
|
2
|
+
import { readAnnotation, writeAnnotation, clearAnnotation, listAnnotations } from '../lib/annotations.js';
|
|
3
|
+
import { output, error, info } from '../lib/output.js';
|
|
4
|
+
|
|
5
|
+
export function registerAnnotateCommand(program) {
|
|
6
|
+
program
|
|
7
|
+
.command('annotate [id] [note]')
|
|
8
|
+
.description('Attach agent notes to a doc or skill')
|
|
9
|
+
.option('--clear', 'Remove annotation for this entry')
|
|
10
|
+
.option('--list', 'List all annotations')
|
|
11
|
+
.action((id, note, opts) => {
|
|
12
|
+
const globalOpts = program.optsWithGlobals();
|
|
13
|
+
|
|
14
|
+
if (opts.list) {
|
|
15
|
+
const annotations = listAnnotations();
|
|
16
|
+
output(
|
|
17
|
+
annotations,
|
|
18
|
+
(data) => {
|
|
19
|
+
if (data.length === 0) {
|
|
20
|
+
console.log('No annotations.');
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
for (const a of data) {
|
|
24
|
+
console.log(`${chalk.bold(a.id)} ${chalk.dim(`(${a.updatedAt})`)}`);
|
|
25
|
+
console.log(` ${a.note}`);
|
|
26
|
+
console.log();
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
globalOpts
|
|
30
|
+
);
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
if (!id) {
|
|
35
|
+
error('Usage: chub annotate <id> <note> | chub annotate <id> --clear | chub annotate --list', globalOpts);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (opts.clear) {
|
|
39
|
+
const removed = clearAnnotation(id);
|
|
40
|
+
output(
|
|
41
|
+
{ id, cleared: removed },
|
|
42
|
+
(data) => {
|
|
43
|
+
if (data.cleared) {
|
|
44
|
+
console.log(`Annotation cleared for ${chalk.bold(id)}.`);
|
|
45
|
+
} else {
|
|
46
|
+
console.log(`No annotation found for ${chalk.bold(id)}.`);
|
|
47
|
+
}
|
|
48
|
+
},
|
|
49
|
+
globalOpts
|
|
50
|
+
);
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (!note) {
|
|
55
|
+
// Show existing annotation
|
|
56
|
+
const existing = readAnnotation(id);
|
|
57
|
+
if (existing) {
|
|
58
|
+
output(
|
|
59
|
+
existing,
|
|
60
|
+
(data) => {
|
|
61
|
+
console.log(`${chalk.bold(data.id)} ${chalk.dim(`(${data.updatedAt})`)}`);
|
|
62
|
+
console.log(data.note);
|
|
63
|
+
},
|
|
64
|
+
globalOpts
|
|
65
|
+
);
|
|
66
|
+
} else {
|
|
67
|
+
output(
|
|
68
|
+
{ id, note: null },
|
|
69
|
+
() => console.log(`No annotation for ${chalk.bold(id)}.`),
|
|
70
|
+
globalOpts
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const data = writeAnnotation(id, note);
|
|
77
|
+
output(
|
|
78
|
+
data,
|
|
79
|
+
(d) => console.log(`Annotation saved for ${chalk.bold(d.id)}.`),
|
|
80
|
+
globalOpts
|
|
81
|
+
);
|
|
82
|
+
});
|
|
83
|
+
}
|
package/src/commands/build.js
CHANGED
|
@@ -4,6 +4,7 @@ import chalk from 'chalk';
|
|
|
4
4
|
import { parseFrontmatter } from '../lib/frontmatter.js';
|
|
5
5
|
import { info } from '../lib/output.js';
|
|
6
6
|
import { trackEvent } from '../lib/analytics.js';
|
|
7
|
+
import { buildIndex } from '../lib/bm25.js';
|
|
7
8
|
|
|
8
9
|
/**
|
|
9
10
|
* Recursively find all DOC.md and SKILL.md files under a directory.
|
|
@@ -301,6 +302,14 @@ export function registerBuildCommand(program) {
|
|
|
301
302
|
mkdirSync(outputDir, { recursive: true });
|
|
302
303
|
writeFileSync(join(outputDir, 'registry.json'), JSON.stringify(registry, null, 2));
|
|
303
304
|
|
|
305
|
+
// Build and write BM25 search index
|
|
306
|
+
const allEntries = [
|
|
307
|
+
...allDocs.map((d) => ({ ...d, _type: 'doc' })),
|
|
308
|
+
...allSkills.map((s) => ({ ...s, _type: 'skill' })),
|
|
309
|
+
];
|
|
310
|
+
const searchIndex = buildIndex(allEntries);
|
|
311
|
+
writeFileSync(join(outputDir, 'search-index.json'), JSON.stringify(searchIndex));
|
|
312
|
+
|
|
304
313
|
// Copy content tree
|
|
305
314
|
for (const authorEntry of topLevel) {
|
|
306
315
|
const src = join(contentDir, authorEntry.name);
|
package/src/commands/get.js
CHANGED
|
@@ -5,19 +5,17 @@ import { getEntry, resolveDocPath, resolveEntryFile } from '../lib/registry.js';
|
|
|
5
5
|
import { fetchDoc, fetchDocFull } from '../lib/cache.js';
|
|
6
6
|
import { output, error, info } from '../lib/output.js';
|
|
7
7
|
import { trackEvent } from '../lib/analytics.js';
|
|
8
|
+
import { readAnnotation } from '../lib/annotations.js';
|
|
8
9
|
|
|
9
10
|
/**
|
|
10
|
-
*
|
|
11
|
-
* @param {string} type - "doc" or "skill"
|
|
12
|
-
* @param {string[]} ids - one or more entry ids
|
|
13
|
-
* @param {object} opts - command options (lang, version, output, full)
|
|
14
|
-
* @param {object} globalOpts - global options (json)
|
|
11
|
+
* Fetch one or more entries by ID. Auto-detects doc vs skill per entry.
|
|
15
12
|
*/
|
|
16
|
-
async function fetchEntries(
|
|
13
|
+
async function fetchEntries(ids, opts, globalOpts) {
|
|
17
14
|
const results = [];
|
|
18
15
|
|
|
19
16
|
for (const id of ids) {
|
|
20
|
-
|
|
17
|
+
// Search both docs and skills — auto-detect type
|
|
18
|
+
const result = getEntry(id);
|
|
21
19
|
|
|
22
20
|
if (result.ambiguous) {
|
|
23
21
|
error(
|
|
@@ -27,16 +25,24 @@ async function fetchEntries(type, ids, opts, globalOpts) {
|
|
|
27
25
|
}
|
|
28
26
|
|
|
29
27
|
if (!result.entry) {
|
|
30
|
-
error(`Entry "${id}" not found
|
|
28
|
+
error(`Entry "${id}" not found.`, globalOpts);
|
|
31
29
|
}
|
|
32
30
|
|
|
33
31
|
const entry = result.entry;
|
|
32
|
+
const type = entry.languages ? 'doc' : 'skill';
|
|
34
33
|
const resolved = resolveDocPath(entry, opts.lang, opts.version);
|
|
35
34
|
|
|
36
35
|
if (!resolved) {
|
|
37
36
|
error(`Could not resolve path for "${id}" ${opts.lang || ''} ${opts.version || ''}`.trim(), globalOpts);
|
|
38
37
|
}
|
|
39
38
|
|
|
39
|
+
if (resolved.versionNotFound) {
|
|
40
|
+
error(
|
|
41
|
+
`Version "${resolved.requested}" not found for "${id}". Available versions: ${resolved.available.join(', ')}`,
|
|
42
|
+
globalOpts
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
|
|
40
46
|
if (resolved.needsLanguage) {
|
|
41
47
|
error(
|
|
42
48
|
`Multiple languages available for "${id}": ${resolved.available.join(', ')}. Specify --lang.`,
|
|
@@ -49,13 +55,32 @@ async function fetchEntries(type, ids, opts, globalOpts) {
|
|
|
49
55
|
error(`"${id}" ${entryFile.error}`, globalOpts);
|
|
50
56
|
}
|
|
51
57
|
|
|
58
|
+
// Determine which reference files exist (beyond DOC.md/SKILL.md)
|
|
59
|
+
const entryFileName = type === 'skill' ? 'SKILL.md' : 'DOC.md';
|
|
60
|
+
const refFiles = resolved.files.filter((f) => f !== entryFileName);
|
|
61
|
+
|
|
52
62
|
try {
|
|
53
|
-
if (opts.
|
|
63
|
+
if (opts.file) {
|
|
64
|
+
// --file mode: fetch specific file(s) by path
|
|
65
|
+
const requested = opts.file.split(',').map((f) => f.trim());
|
|
66
|
+
const invalid = requested.filter((f) => !resolved.files.includes(f));
|
|
67
|
+
if (invalid.length > 0) {
|
|
68
|
+
const available = refFiles.length > 0 ? refFiles.join(', ') : '(none)';
|
|
69
|
+
error(`File "${invalid[0]}" not found in ${id}. Available: ${available}`, globalOpts);
|
|
70
|
+
}
|
|
71
|
+
if (requested.length === 1) {
|
|
72
|
+
const content = await fetchDoc(resolved.source, join(resolved.path, requested[0]));
|
|
73
|
+
results.push({ id: entry.id, type, content, path: join(resolved.path, requested[0]) });
|
|
74
|
+
} else {
|
|
75
|
+
const allFiles = await fetchDocFull(resolved.source, resolved.path, requested);
|
|
76
|
+
results.push({ id: entry.id, type, files: allFiles, path: resolved.path });
|
|
77
|
+
}
|
|
78
|
+
} else if (opts.full && resolved.files.length > 0) {
|
|
54
79
|
const allFiles = await fetchDocFull(resolved.source, resolved.path, resolved.files);
|
|
55
|
-
results.push({ id: entry.id, files: allFiles, path: resolved.path });
|
|
80
|
+
results.push({ id: entry.id, type, files: allFiles, path: resolved.path });
|
|
56
81
|
} else {
|
|
57
82
|
const content = await fetchDoc(resolved.source, entryFile.filePath);
|
|
58
|
-
results.push({ id: entry.id, content, path: entryFile.filePath });
|
|
83
|
+
results.push({ id: entry.id, type, content, path: entryFile.filePath, additionalFiles: refFiles });
|
|
59
84
|
}
|
|
60
85
|
} catch (err) {
|
|
61
86
|
error(err.message, globalOpts);
|
|
@@ -64,7 +89,7 @@ async function fetchEntries(type, ids, opts, globalOpts) {
|
|
|
64
89
|
|
|
65
90
|
// Track fetches
|
|
66
91
|
for (const r of results) {
|
|
67
|
-
trackEvent(type === 'doc' ? 'doc_fetched' : 'skill_fetched', {
|
|
92
|
+
trackEvent(r.type === 'doc' ? 'doc_fetched' : 'skill_fetched', {
|
|
68
93
|
entry_id: r.id,
|
|
69
94
|
full: !!opts.full,
|
|
70
95
|
lang: opts.lang || undefined,
|
|
@@ -74,7 +99,6 @@ async function fetchEntries(type, ids, opts, globalOpts) {
|
|
|
74
99
|
// Output
|
|
75
100
|
if (opts.output) {
|
|
76
101
|
if (opts.full) {
|
|
77
|
-
// --full -o: write individual files preserving directory structure
|
|
78
102
|
for (const r of results) {
|
|
79
103
|
if (r.files) {
|
|
80
104
|
const baseDir = ids.length > 1 ? join(opts.output, r.id) : opts.output;
|
|
@@ -111,18 +135,32 @@ async function fetchEntries(type, ids, opts, globalOpts) {
|
|
|
111
135
|
}
|
|
112
136
|
}
|
|
113
137
|
if (globalOpts.json) {
|
|
114
|
-
console.log(JSON.stringify(results.map((r) => ({ id: r.id, path: opts.output }))));
|
|
138
|
+
console.log(JSON.stringify(results.map((r) => ({ id: r.id, type: r.type, path: opts.output }))));
|
|
115
139
|
}
|
|
116
140
|
} else {
|
|
117
|
-
// stdout
|
|
118
141
|
if (results.length === 1 && !results[0].files) {
|
|
142
|
+
const r = results[0];
|
|
143
|
+
const extraFiles = r.additionalFiles || [];
|
|
144
|
+
const annotation = readAnnotation(r.id);
|
|
145
|
+
const jsonData = { id: r.id, type: r.type, content: r.content, path: r.path };
|
|
146
|
+
if (extraFiles.length > 0) jsonData.additionalFiles = extraFiles;
|
|
147
|
+
if (annotation) jsonData.annotation = annotation;
|
|
119
148
|
output(
|
|
120
|
-
|
|
121
|
-
(data) =>
|
|
149
|
+
jsonData,
|
|
150
|
+
(data) => {
|
|
151
|
+
process.stdout.write(data.content);
|
|
152
|
+
if (annotation) {
|
|
153
|
+
process.stdout.write(`\n\n---\n[Agent note — ${annotation.updatedAt}]\n${annotation.note}\n`);
|
|
154
|
+
}
|
|
155
|
+
if (extraFiles.length > 0) {
|
|
156
|
+
const fileList = extraFiles.map((f) => ` ${f}`).join('\n');
|
|
157
|
+
const example = `chub get ${r.id} --file ${extraFiles[0]}`;
|
|
158
|
+
process.stdout.write(`\n\n---\nAdditional files available (use --file to fetch):\n${fileList}\nExample: ${example}\n`);
|
|
159
|
+
}
|
|
160
|
+
},
|
|
122
161
|
globalOpts
|
|
123
162
|
);
|
|
124
163
|
} else {
|
|
125
|
-
// Concatenate all content (--full to stdout, or multiple entries)
|
|
126
164
|
const parts = results.flatMap((r) => {
|
|
127
165
|
if (r.files) {
|
|
128
166
|
return r.files.map((f) => `# FILE: ${f.name}\n\n${f.content}`);
|
|
@@ -131,7 +169,7 @@ async function fetchEntries(type, ids, opts, globalOpts) {
|
|
|
131
169
|
});
|
|
132
170
|
const combined = parts.join('\n\n---\n\n');
|
|
133
171
|
output(
|
|
134
|
-
results.map((r) => ({ id: r.id, path: r.path })),
|
|
172
|
+
results.map((r) => ({ id: r.id, type: r.type, path: r.path })),
|
|
135
173
|
() => process.stdout.write(combined),
|
|
136
174
|
globalOpts
|
|
137
175
|
);
|
|
@@ -140,29 +178,16 @@ async function fetchEntries(type, ids, opts, globalOpts) {
|
|
|
140
178
|
}
|
|
141
179
|
|
|
142
180
|
export function registerGetCommand(program) {
|
|
143
|
-
|
|
144
|
-
.command('get')
|
|
145
|
-
.description('
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
.command('docs <ids...>')
|
|
149
|
-
.description('Fetch documentation content')
|
|
150
|
-
.option('--lang <language>', 'Language variant')
|
|
151
|
-
.option('--version <version>', 'Specific version')
|
|
152
|
-
.option('-o, --output <path>', 'Write to file or directory')
|
|
153
|
-
.option('--full', 'Fetch all files (not just entry point)')
|
|
154
|
-
.action(async (ids, opts) => {
|
|
155
|
-
const globalOpts = program.optsWithGlobals();
|
|
156
|
-
await fetchEntries('doc', ids, opts, globalOpts);
|
|
157
|
-
});
|
|
158
|
-
|
|
159
|
-
get
|
|
160
|
-
.command('skills <ids...>')
|
|
161
|
-
.description('Fetch skill content')
|
|
181
|
+
program
|
|
182
|
+
.command('get <ids...>')
|
|
183
|
+
.description('Fetch docs or skills by ID (auto-detects type)')
|
|
184
|
+
.option('--lang <language>', 'Language variant (for docs)')
|
|
185
|
+
.option('--version <version>', 'Specific version (for docs)')
|
|
162
186
|
.option('-o, --output <path>', 'Write to file or directory')
|
|
163
187
|
.option('--full', 'Fetch all files (not just entry point)')
|
|
188
|
+
.option('--file <paths>', 'Fetch specific file(s) by path (comma-separated)')
|
|
164
189
|
.action(async (ids, opts) => {
|
|
165
190
|
const globalOpts = program.optsWithGlobals();
|
|
166
|
-
await fetchEntries(
|
|
191
|
+
await fetchEntries(ids, opts, globalOpts);
|
|
167
192
|
});
|
|
168
193
|
}
|
package/src/index.js
CHANGED
|
@@ -10,6 +10,7 @@ import { registerSearchCommand } from './commands/search.js';
|
|
|
10
10
|
import { registerGetCommand } from './commands/get.js';
|
|
11
11
|
import { registerBuildCommand } from './commands/build.js';
|
|
12
12
|
import { registerFeedbackCommand } from './commands/feedback.js';
|
|
13
|
+
import { registerAnnotateCommand } from './commands/annotate.js';
|
|
13
14
|
import { trackEvent, shutdownAnalytics } from './lib/analytics.js';
|
|
14
15
|
|
|
15
16
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
@@ -26,17 +27,16 @@ ${chalk.bold.underline('Getting Started')}
|
|
|
26
27
|
${chalk.dim('$')} chub search ${chalk.dim('# list everything available')}
|
|
27
28
|
${chalk.dim('$')} chub search "stripe" ${chalk.dim('# fuzzy search')}
|
|
28
29
|
${chalk.dim('$')} chub search stripe/payments ${chalk.dim('# exact id → full detail')}
|
|
29
|
-
${chalk.dim('$')} chub get
|
|
30
|
-
${chalk.dim('$')} chub get
|
|
31
|
-
${chalk.dim('$')} chub get
|
|
32
|
-
${chalk.dim('$')} chub get
|
|
33
|
-
${chalk.dim('$')} chub get
|
|
30
|
+
${chalk.dim('$')} chub get stripe/api ${chalk.dim('# print doc to terminal')}
|
|
31
|
+
${chalk.dim('$')} chub get stripe/api -o doc.md ${chalk.dim('# save to file')}
|
|
32
|
+
${chalk.dim('$')} chub get openai/chat --lang py ${chalk.dim('# specific language')}
|
|
33
|
+
${chalk.dim('$')} chub get pw-community/login-flows ${chalk.dim('# fetch a skill')}
|
|
34
|
+
${chalk.dim('$')} chub get openai/chat stripe/api ${chalk.dim('# fetch multiple')}
|
|
34
35
|
|
|
35
36
|
${chalk.bold.underline('Commands')}
|
|
36
37
|
|
|
37
38
|
${chalk.bold('search')} [query] Search docs and skills (no query = list all)
|
|
38
|
-
${chalk.bold('get
|
|
39
|
-
${chalk.bold('get skills')} <ids...> Fetch skill content
|
|
39
|
+
${chalk.bold('get')} <ids...> Fetch docs or skills by ID
|
|
40
40
|
${chalk.bold('update')} Refresh the cached registry
|
|
41
41
|
${chalk.bold('cache')} status|clear Manage the local cache
|
|
42
42
|
${chalk.bold('build')} <content-dir> Build registry from content directory
|
|
@@ -56,10 +56,10 @@ ${chalk.bold.underline('Agent Piping Patterns')}
|
|
|
56
56
|
|
|
57
57
|
${chalk.dim('# Search → pick → fetch → save')}
|
|
58
58
|
${chalk.dim('$')} ID=$(chub search "stripe" --json | jq -r '.results[0].id')
|
|
59
|
-
${chalk.dim('$')} chub get
|
|
59
|
+
${chalk.dim('$')} chub get "$ID" --lang js -o .context/stripe.md
|
|
60
60
|
|
|
61
|
-
${chalk.dim('# Fetch multiple
|
|
62
|
-
${chalk.dim('$')} chub get
|
|
61
|
+
${chalk.dim('# Fetch multiple at once')}
|
|
62
|
+
${chalk.dim('$')} chub get openai/chat stripe/api -o .context/
|
|
63
63
|
|
|
64
64
|
${chalk.bold.underline('Multi-Source Config')} ${chalk.dim('(~/.chub/config.yaml)')}
|
|
65
65
|
|
|
@@ -69,7 +69,7 @@ ${chalk.bold.underline('Multi-Source Config')} ${chalk.dim('(~/.chub/config.yaml
|
|
|
69
69
|
${chalk.dim(' - name: internal')}
|
|
70
70
|
${chalk.dim(' path: /path/to/local/docs')}
|
|
71
71
|
|
|
72
|
-
${chalk.dim('# On id collision, use source: prefix: chub get
|
|
72
|
+
${chalk.dim('# On id collision, use source: prefix: chub get internal:openai/chat')}
|
|
73
73
|
`);
|
|
74
74
|
}
|
|
75
75
|
|
|
@@ -78,14 +78,14 @@ const program = new Command();
|
|
|
78
78
|
program
|
|
79
79
|
.name('chub')
|
|
80
80
|
.description('Context Hub - search and retrieve LLM-optimized docs and skills')
|
|
81
|
-
.version(pkg.version)
|
|
81
|
+
.version(pkg.version, '-V, --cli-version')
|
|
82
82
|
.option('--json', 'Output as JSON (machine-readable)')
|
|
83
83
|
.action(() => {
|
|
84
84
|
printUsage();
|
|
85
85
|
});
|
|
86
86
|
|
|
87
87
|
// Commands that don't need registry
|
|
88
|
-
const SKIP_REGISTRY = ['update', 'cache', 'build', 'feedback', 'help'];
|
|
88
|
+
const SKIP_REGISTRY = ['update', 'cache', 'build', 'feedback', 'annotate', 'help'];
|
|
89
89
|
|
|
90
90
|
program.hook('preAction', async (thisCommand) => {
|
|
91
91
|
const cmdName = thisCommand.args?.[0] || thisCommand.name();
|
|
@@ -112,6 +112,7 @@ registerSearchCommand(program);
|
|
|
112
112
|
registerGetCommand(program);
|
|
113
113
|
registerBuildCommand(program);
|
|
114
114
|
registerFeedbackCommand(program);
|
|
115
|
+
registerAnnotateCommand(program);
|
|
115
116
|
|
|
116
117
|
program.parse();
|
|
117
118
|
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { readFileSync, writeFileSync, mkdirSync, unlinkSync, readdirSync } from 'node:fs';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
import { getChubDir } from './config.js';
|
|
4
|
+
|
|
5
|
+
function getAnnotationsDir() {
|
|
6
|
+
return join(getChubDir(), 'annotations');
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
function annotationPath(entryId) {
|
|
10
|
+
const safe = entryId.replace(/\//g, '--');
|
|
11
|
+
return join(getAnnotationsDir(), `${safe}.json`);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export function readAnnotation(entryId) {
|
|
15
|
+
try {
|
|
16
|
+
return JSON.parse(readFileSync(annotationPath(entryId), 'utf8'));
|
|
17
|
+
} catch {
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function writeAnnotation(entryId, note) {
|
|
23
|
+
const dir = getAnnotationsDir();
|
|
24
|
+
mkdirSync(dir, { recursive: true });
|
|
25
|
+
const data = {
|
|
26
|
+
id: entryId,
|
|
27
|
+
note,
|
|
28
|
+
updatedAt: new Date().toISOString(),
|
|
29
|
+
};
|
|
30
|
+
writeFileSync(annotationPath(entryId), JSON.stringify(data, null, 2));
|
|
31
|
+
return data;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function clearAnnotation(entryId) {
|
|
35
|
+
try {
|
|
36
|
+
unlinkSync(annotationPath(entryId));
|
|
37
|
+
return true;
|
|
38
|
+
} catch {
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function listAnnotations() {
|
|
44
|
+
const dir = getAnnotationsDir();
|
|
45
|
+
try {
|
|
46
|
+
const files = readdirSync(dir).filter((f) => f.endsWith('.json'));
|
|
47
|
+
return files.map((f) => {
|
|
48
|
+
try {
|
|
49
|
+
return JSON.parse(readFileSync(join(dir, f), 'utf8'));
|
|
50
|
+
} catch {
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
}).filter(Boolean);
|
|
54
|
+
} catch {
|
|
55
|
+
return [];
|
|
56
|
+
}
|
|
57
|
+
}
|
package/src/lib/bm25.js
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BM25 search implementation for Context Hub.
|
|
3
|
+
* Index is built at `chub build` time, scoring happens at search time.
|
|
4
|
+
* Tokenizer is shared between build and search to ensure consistency.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const STOP_WORDS = new Set([
|
|
8
|
+
'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from',
|
|
9
|
+
'has', 'have', 'in', 'is', 'it', 'its', 'of', 'on', 'or', 'that',
|
|
10
|
+
'the', 'to', 'was', 'were', 'will', 'with', 'this', 'but', 'not',
|
|
11
|
+
'you', 'your', 'can', 'do', 'does', 'how', 'if', 'may', 'no',
|
|
12
|
+
'so', 'than', 'too', 'very', 'just', 'about', 'into', 'over',
|
|
13
|
+
'such', 'then', 'them', 'these', 'those', 'through', 'under',
|
|
14
|
+
'use', 'using', 'used',
|
|
15
|
+
]);
|
|
16
|
+
|
|
17
|
+
// BM25 default parameters
|
|
18
|
+
const DEFAULT_K1 = 1.5;
|
|
19
|
+
const DEFAULT_B = 0.75;
|
|
20
|
+
|
|
21
|
+
// Field weights for multi-field scoring
|
|
22
|
+
const FIELD_WEIGHTS = {
|
|
23
|
+
name: 3.0,
|
|
24
|
+
tags: 2.0,
|
|
25
|
+
description: 1.0,
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Tokenize text into lowercase terms with stop word removal.
|
|
30
|
+
* Must be used identically at build time and search time.
|
|
31
|
+
*/
|
|
32
|
+
export function tokenize(text) {
|
|
33
|
+
if (!text) return [];
|
|
34
|
+
return text
|
|
35
|
+
.toLowerCase()
|
|
36
|
+
.replace(/[^a-z0-9\s-]/g, ' ')
|
|
37
|
+
.split(/[\s-]+/)
|
|
38
|
+
.filter((t) => t.length > 1 && !STOP_WORDS.has(t));
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Build a BM25 search index from registry entries.
|
|
43
|
+
* Called during `chub build`.
|
|
44
|
+
*
|
|
45
|
+
* @param {Array} entries - Combined docs and skills from registry
|
|
46
|
+
* @returns {Object} The search index
|
|
47
|
+
*/
|
|
48
|
+
export function buildIndex(entries) {
|
|
49
|
+
const documents = [];
|
|
50
|
+
const dfMap = {}; // document frequency per term (across all fields)
|
|
51
|
+
const fieldLengths = { name: [], description: [], tags: [] };
|
|
52
|
+
|
|
53
|
+
for (const entry of entries) {
|
|
54
|
+
const nameTokens = tokenize(entry.name);
|
|
55
|
+
const descTokens = tokenize(entry.description || '');
|
|
56
|
+
const tagTokens = (entry.tags || []).flatMap((t) => tokenize(t));
|
|
57
|
+
|
|
58
|
+
documents.push({
|
|
59
|
+
id: entry.id,
|
|
60
|
+
tokens: {
|
|
61
|
+
name: nameTokens,
|
|
62
|
+
description: descTokens,
|
|
63
|
+
tags: tagTokens,
|
|
64
|
+
},
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
fieldLengths.name.push(nameTokens.length);
|
|
68
|
+
fieldLengths.description.push(descTokens.length);
|
|
69
|
+
fieldLengths.tags.push(tagTokens.length);
|
|
70
|
+
|
|
71
|
+
// Count document frequency — a term counts once per document (union of all fields)
|
|
72
|
+
const allTerms = new Set([...nameTokens, ...descTokens, ...tagTokens]);
|
|
73
|
+
for (const term of allTerms) {
|
|
74
|
+
dfMap[term] = (dfMap[term] || 0) + 1;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const N = documents.length;
|
|
79
|
+
|
|
80
|
+
// Compute IDF for each term
|
|
81
|
+
const idf = {};
|
|
82
|
+
for (const [term, df] of Object.entries(dfMap)) {
|
|
83
|
+
idf[term] = Math.log((N - df + 0.5) / (df + 0.5) + 1);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Compute average field lengths
|
|
87
|
+
const avg = (arr) => arr.length === 0 ? 0 : arr.reduce((a, b) => a + b, 0) / arr.length;
|
|
88
|
+
const avgFieldLengths = {
|
|
89
|
+
name: avg(fieldLengths.name),
|
|
90
|
+
description: avg(fieldLengths.description),
|
|
91
|
+
tags: avg(fieldLengths.tags),
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
return {
|
|
95
|
+
version: '1.0.0',
|
|
96
|
+
algorithm: 'bm25',
|
|
97
|
+
params: { k1: DEFAULT_K1, b: DEFAULT_B },
|
|
98
|
+
totalDocs: N,
|
|
99
|
+
avgFieldLengths,
|
|
100
|
+
idf,
|
|
101
|
+
documents,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Compute BM25 score for a single field.
|
|
107
|
+
*/
|
|
108
|
+
function scoreField(queryTerms, fieldTokens, idf, avgFieldLen, k1, b) {
|
|
109
|
+
if (fieldTokens.length === 0) return 0;
|
|
110
|
+
|
|
111
|
+
// Build term frequency map for this field
|
|
112
|
+
const tf = {};
|
|
113
|
+
for (const t of fieldTokens) {
|
|
114
|
+
tf[t] = (tf[t] || 0) + 1;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
let score = 0;
|
|
118
|
+
const dl = fieldTokens.length;
|
|
119
|
+
|
|
120
|
+
for (const term of queryTerms) {
|
|
121
|
+
const termFreq = tf[term] || 0;
|
|
122
|
+
if (termFreq === 0) continue;
|
|
123
|
+
|
|
124
|
+
const termIdf = idf[term] || 0;
|
|
125
|
+
const numerator = termFreq * (k1 + 1);
|
|
126
|
+
const denominator = termFreq + k1 * (1 - b + b * (dl / (avgFieldLen || 1)));
|
|
127
|
+
score += termIdf * (numerator / denominator);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return score;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Search the BM25 index with a query string.
|
|
135
|
+
*
|
|
136
|
+
* @param {string} query - The search query
|
|
137
|
+
* @param {Object} index - The pre-built BM25 index
|
|
138
|
+
* @param {Object} opts - Options: { limit }
|
|
139
|
+
* @returns {Array} Sorted results: [{ id, score }]
|
|
140
|
+
*/
|
|
141
|
+
export function search(query, index, opts = {}) {
|
|
142
|
+
const queryTerms = tokenize(query);
|
|
143
|
+
if (queryTerms.length === 0) return [];
|
|
144
|
+
|
|
145
|
+
const { k1, b } = index.params;
|
|
146
|
+
const results = [];
|
|
147
|
+
|
|
148
|
+
for (const doc of index.documents) {
|
|
149
|
+
let totalScore = 0;
|
|
150
|
+
|
|
151
|
+
for (const [field, weight] of Object.entries(FIELD_WEIGHTS)) {
|
|
152
|
+
const fieldTokens = doc.tokens[field] || [];
|
|
153
|
+
const avgLen = index.avgFieldLengths[field] || 1;
|
|
154
|
+
const fieldScore = scoreField(queryTerms, fieldTokens, index.idf, avgLen, k1, b);
|
|
155
|
+
totalScore += fieldScore * weight;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if (totalScore > 0) {
|
|
159
|
+
results.push({ id: doc.id, score: totalScore });
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
results.sort((a, b) => b.score - a.score);
|
|
164
|
+
|
|
165
|
+
if (opts.limit) {
|
|
166
|
+
return results.slice(0, opts.limit);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return results;
|
|
170
|
+
}
|
package/src/lib/cache.js
CHANGED
|
@@ -225,6 +225,20 @@ export function loadSourceRegistry(source) {
|
|
|
225
225
|
return JSON.parse(readFileSync(regPath, 'utf8'));
|
|
226
226
|
}
|
|
227
227
|
|
|
228
|
+
/**
|
|
229
|
+
* Load BM25 search index for a single source (if available).
|
|
230
|
+
*/
|
|
231
|
+
export function loadSearchIndex(source) {
|
|
232
|
+
const basePath = source.path || getSourceDir(source.name);
|
|
233
|
+
const indexPath = join(basePath, 'search-index.json');
|
|
234
|
+
if (!existsSync(indexPath)) return null;
|
|
235
|
+
try {
|
|
236
|
+
return JSON.parse(readFileSync(indexPath, 'utf8'));
|
|
237
|
+
} catch {
|
|
238
|
+
return null;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
228
242
|
/**
|
|
229
243
|
* Get cache stats.
|
|
230
244
|
*/
|