chub-dev 0.2.0-beta.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -0
- package/bin/chub-mcp +2 -0
- package/package.json +10 -5
- package/skills/get-api-docs/SKILL.md +81 -0
- package/src/commands/annotate.js +1 -1
- package/src/commands/build.js +12 -4
- package/src/commands/feedback.js +12 -9
- package/src/commands/get.js +32 -11
- package/src/commands/help.js +34 -0
- package/src/commands/search.js +17 -8
- package/src/index.js +31 -65
- package/src/lib/analytics.js +13 -2
- package/src/lib/bm25.js +185 -52
- package/src/lib/cache.js +94 -17
- package/src/lib/config.js +14 -1
- package/src/lib/help.js +158 -0
- package/src/lib/identity.js +12 -1
- package/src/lib/registry.js +236 -63
- package/src/lib/telemetry.js +7 -1
- package/src/lib/welcome.js +42 -0
- package/src/mcp/server.js +184 -0
- package/src/mcp/stdio-lifecycle.js +54 -0
- package/src/mcp/tools.js +286 -0
package/src/lib/config.js
CHANGED
|
@@ -5,14 +5,19 @@ import { parse as parseYaml } from 'yaml';
|
|
|
5
5
|
|
|
6
6
|
const DEFAULT_CDN_URL = 'https://cdn.aichub.org/v1';
|
|
7
7
|
const DEFAULT_TELEMETRY_URL = 'https://api.aichub.org/v1';
|
|
8
|
+
const DEFAULT_HELP_URL = 'https://cdn.aichub.org/v1/help.json';
|
|
9
|
+
const DEFAULT_HELP_TIMEOUT_MS = 2000;
|
|
8
10
|
|
|
9
11
|
const DEFAULTS = {
|
|
10
12
|
output_dir: '.context',
|
|
11
|
-
refresh_interval:
|
|
13
|
+
refresh_interval: 21600,
|
|
12
14
|
output_format: 'human',
|
|
13
15
|
source: 'official,maintainer,community',
|
|
14
16
|
telemetry: true,
|
|
17
|
+
feedback: true,
|
|
15
18
|
telemetry_url: DEFAULT_TELEMETRY_URL,
|
|
19
|
+
help_url: DEFAULT_HELP_URL,
|
|
20
|
+
help_timeout_ms: DEFAULT_HELP_TIMEOUT_MS,
|
|
16
21
|
};
|
|
17
22
|
|
|
18
23
|
let _config = null;
|
|
@@ -50,7 +55,15 @@ export function loadConfig() {
|
|
|
50
55
|
output_format: fileConfig.output_format || DEFAULTS.output_format,
|
|
51
56
|
source: fileConfig.source || DEFAULTS.source,
|
|
52
57
|
telemetry: fileConfig.telemetry !== undefined ? fileConfig.telemetry : DEFAULTS.telemetry,
|
|
58
|
+
feedback: fileConfig.feedback !== undefined ? fileConfig.feedback : DEFAULTS.feedback,
|
|
53
59
|
telemetry_url: fileConfig.telemetry_url || DEFAULTS.telemetry_url,
|
|
60
|
+
help_url: process.env.CHUB_HELP_URL
|
|
61
|
+
?? (fileConfig.help_url !== undefined ? fileConfig.help_url : DEFAULTS.help_url),
|
|
62
|
+
help_timeout_ms: Number.parseInt(
|
|
63
|
+
process.env.CHUB_HELP_TIMEOUT_MS
|
|
64
|
+
?? (fileConfig.help_timeout_ms ?? DEFAULTS.help_timeout_ms),
|
|
65
|
+
10
|
|
66
|
+
) || DEFAULTS.help_timeout_ms,
|
|
54
67
|
};
|
|
55
68
|
|
|
56
69
|
return _config;
|
package/src/lib/help.js
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import { loadConfig } from './config.js';
|
|
2
|
+
|
|
3
|
+
function normalizeHelpUrl(value) {
|
|
4
|
+
if (value === false || value === null) return null;
|
|
5
|
+
if (typeof value !== 'string') return value || null;
|
|
6
|
+
|
|
7
|
+
const trimmed = value.trim();
|
|
8
|
+
if (!trimmed) return null;
|
|
9
|
+
|
|
10
|
+
const normalized = trimmed.toLowerCase();
|
|
11
|
+
if (['0', 'false', 'off', 'disabled', 'none', 'local'].includes(normalized)) {
|
|
12
|
+
return null;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
return trimmed;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function buildRemoteHelpUrl(helpUrl, cliVersion) {
|
|
19
|
+
try {
|
|
20
|
+
const url = new URL(helpUrl);
|
|
21
|
+
if (!['http:', 'https:'].includes(url.protocol)) {
|
|
22
|
+
return helpUrl;
|
|
23
|
+
}
|
|
24
|
+
url.searchParams.set('cli_version', cliVersion);
|
|
25
|
+
return url.toString();
|
|
26
|
+
} catch {
|
|
27
|
+
return helpUrl;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function parseRemoteHelpPayload(raw, contentType = '') {
|
|
32
|
+
const trimmed = raw.trim();
|
|
33
|
+
const looksLikeJson = contentType.includes('json') || (trimmed.startsWith('{') && trimmed.endsWith('}'));
|
|
34
|
+
|
|
35
|
+
if (!looksLikeJson) {
|
|
36
|
+
return { content: raw };
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
let parsed;
|
|
40
|
+
try {
|
|
41
|
+
parsed = JSON.parse(raw);
|
|
42
|
+
} catch {
|
|
43
|
+
return { content: raw };
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if (typeof parsed === 'string') {
|
|
47
|
+
return { content: parsed };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (!parsed || typeof parsed.content !== 'string') {
|
|
51
|
+
throw new Error('Invalid remote help payload');
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const result = { content: parsed.content };
|
|
55
|
+
if (typeof parsed.updatedAt === 'string') result.updatedAt = parsed.updatedAt;
|
|
56
|
+
if (typeof parsed.version === 'string') result.version = parsed.version;
|
|
57
|
+
if (typeof parsed.minimumCliVersion === 'string') {
|
|
58
|
+
result.minimumCliVersion = parsed.minimumCliVersion;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return result;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export function getLocalHelpText(cliVersion) {
|
|
65
|
+
return [
|
|
66
|
+
`chub — Context Hub CLI v${cliVersion}`,
|
|
67
|
+
'Search and retrieve LLM-optimized docs and skills.',
|
|
68
|
+
'',
|
|
69
|
+
'Bootstrap workflow for coding agents',
|
|
70
|
+
'',
|
|
71
|
+
' 1. Start with chub before writing code against an external API, SDK, or library.',
|
|
72
|
+
' 2. Find the best entry with `chub search`.',
|
|
73
|
+
' 3. Fetch the exact doc or skill with `chub get`.',
|
|
74
|
+
' 4. Use that content instead of guessing from training data.',
|
|
75
|
+
' 5. If you discover a gap, save it with `chub annotate`.',
|
|
76
|
+
' 6. Always leave feedback after using a doc or skill.',
|
|
77
|
+
'',
|
|
78
|
+
'Quick start',
|
|
79
|
+
'',
|
|
80
|
+
' $ chub search "openai" --json',
|
|
81
|
+
' $ chub get openai/chat --lang py',
|
|
82
|
+
' $ chub feedback openai/chat up --label accurate "Current and clear"',
|
|
83
|
+
'',
|
|
84
|
+
'Distribution surfaces',
|
|
85
|
+
'',
|
|
86
|
+
' $ npx skills add chub-dev',
|
|
87
|
+
' Copy `skills/get-api-docs/SKILL.md` into your agent\'s skill or rule directory when manual install is needed.',
|
|
88
|
+
' Use `chub-mcp` for MCP-compatible agents that prefer tools over local skill files.',
|
|
89
|
+
'',
|
|
90
|
+
'If chub is not installed yet',
|
|
91
|
+
'',
|
|
92
|
+
' $ npm install -g chub-dev',
|
|
93
|
+
'',
|
|
94
|
+
'Core commands',
|
|
95
|
+
'',
|
|
96
|
+
' search [query] Search docs and skills (no query = list all)',
|
|
97
|
+
' get <ids...> Fetch docs or skills by ID',
|
|
98
|
+
' annotate [id] [note] Save a note that appears on future fetches',
|
|
99
|
+
' feedback <id> <up|down> Rate a doc or skill after using it',
|
|
100
|
+
' update Refresh the cached registry',
|
|
101
|
+
' cache status|clear Manage the local cache',
|
|
102
|
+
'',
|
|
103
|
+
'Feedback loop',
|
|
104
|
+
'',
|
|
105
|
+
' After using a doc or skill, always run one of these:',
|
|
106
|
+
' $ chub feedback <id> up --label accurate "What was useful"',
|
|
107
|
+
' $ chub feedback <id> down --label outdated "What needs to be fixed"',
|
|
108
|
+
'',
|
|
109
|
+
' Valid labels: accurate, well-structured, helpful, good-examples, outdated, inaccurate, incomplete, wrong-examples, wrong-version, poorly-structured',
|
|
110
|
+
].join('\n');
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export async function loadHelpContent(cliVersion, { fetchImpl = globalThis.fetch } = {}) {
|
|
114
|
+
const config = loadConfig();
|
|
115
|
+
const helpUrl = normalizeHelpUrl(config.help_url);
|
|
116
|
+
|
|
117
|
+
if (!helpUrl || typeof fetchImpl !== 'function') {
|
|
118
|
+
return {
|
|
119
|
+
source: 'local',
|
|
120
|
+
content: getLocalHelpText(cliVersion),
|
|
121
|
+
url: helpUrl || null,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
const controller = new AbortController();
|
|
126
|
+
const timeout = setTimeout(() => controller.abort(), config.help_timeout_ms || 2000);
|
|
127
|
+
|
|
128
|
+
try {
|
|
129
|
+
const response = await fetchImpl(buildRemoteHelpUrl(helpUrl, cliVersion), {
|
|
130
|
+
signal: controller.signal,
|
|
131
|
+
headers: {
|
|
132
|
+
accept: 'application/json, text/plain;q=0.9, text/markdown;q=0.8',
|
|
133
|
+
},
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
if (!response.ok) {
|
|
137
|
+
throw new Error(`${response.status} ${response.statusText}`);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const contentType = response.headers?.get?.('content-type') || '';
|
|
141
|
+
const payload = parseRemoteHelpPayload(await response.text(), contentType);
|
|
142
|
+
|
|
143
|
+
return {
|
|
144
|
+
source: 'remote',
|
|
145
|
+
url: helpUrl,
|
|
146
|
+
...payload,
|
|
147
|
+
};
|
|
148
|
+
} catch (err) {
|
|
149
|
+
return {
|
|
150
|
+
source: 'local',
|
|
151
|
+
content: getLocalHelpText(cliVersion),
|
|
152
|
+
url: helpUrl,
|
|
153
|
+
fallbackReason: err?.name === 'AbortError' ? 'timeout' : (err?.message || 'remote_help_unavailable'),
|
|
154
|
+
};
|
|
155
|
+
} finally {
|
|
156
|
+
clearTimeout(timeout);
|
|
157
|
+
}
|
|
158
|
+
}
|
package/src/lib/identity.js
CHANGED
|
@@ -63,7 +63,7 @@ export async function getOrCreateClientId() {
|
|
|
63
63
|
// File doesn't exist or is unreadable
|
|
64
64
|
}
|
|
65
65
|
|
|
66
|
-
// Generate from machine UUID
|
|
66
|
+
// Generate from machine UUID — this is a first-time user
|
|
67
67
|
const uuid = getMachineUUID();
|
|
68
68
|
const hash = createHash('sha256').update(uuid).digest('hex');
|
|
69
69
|
|
|
@@ -74,9 +74,20 @@ export async function getOrCreateClientId() {
|
|
|
74
74
|
|
|
75
75
|
writeFileSync(idPath, hash, 'utf8');
|
|
76
76
|
_cachedClientId = hash;
|
|
77
|
+
_isFirstRun = true;
|
|
77
78
|
return hash;
|
|
78
79
|
}
|
|
79
80
|
|
|
81
|
+
let _isFirstRun = false;
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Returns true if this is the first time the CLI has run on this machine.
|
|
85
|
+
* Only valid after getOrCreateClientId() has been called.
|
|
86
|
+
*/
|
|
87
|
+
export function isFirstRun() {
|
|
88
|
+
return _isFirstRun;
|
|
89
|
+
}
|
|
90
|
+
|
|
80
91
|
/**
|
|
81
92
|
* Auto-detect the AI coding tool from environment variables.
|
|
82
93
|
*/
|
package/src/lib/registry.js
CHANGED
|
@@ -1,11 +1,192 @@
|
|
|
1
1
|
import { loadSourceRegistry, loadSearchIndex } from './cache.js';
|
|
2
2
|
import { loadConfig } from './config.js';
|
|
3
3
|
import { normalizeLanguage } from './normalize.js';
|
|
4
|
-
import { search as bm25Search } from './bm25.js';
|
|
4
|
+
import { buildIndexFromDocuments, compactIdentifier, search as bm25Search, tokenize } from './bm25.js';
|
|
5
5
|
|
|
6
6
|
let _merged = null;
|
|
7
7
|
let _searchIndex = null;
|
|
8
8
|
|
|
9
|
+
function getSearchLookupId(sourceName, entryId) {
|
|
10
|
+
return `${sourceName}:${entryId}`;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function normalizeQuery(query) {
|
|
14
|
+
return String(query || '')
|
|
15
|
+
.trim()
|
|
16
|
+
.replace(/\s+/g, ' ');
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function splitCompactSegments(text) {
|
|
20
|
+
return [...new Set([
|
|
21
|
+
...String(text || '').split('/').map((segment) => compactIdentifier(segment)),
|
|
22
|
+
...String(text || '').split(/[\/_.\s-]+/).map((segment) => compactIdentifier(segment)),
|
|
23
|
+
])].filter(Boolean);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function levenshteinDistance(a, b, maxDistance = Infinity) {
|
|
27
|
+
if (a === b) return 0;
|
|
28
|
+
if (!a.length) return b.length;
|
|
29
|
+
if (!b.length) return a.length;
|
|
30
|
+
if (Math.abs(a.length - b.length) > maxDistance) return maxDistance + 1;
|
|
31
|
+
|
|
32
|
+
let previous = Array.from({ length: b.length + 1 }, (_, idx) => idx);
|
|
33
|
+
let current = new Array(b.length + 1);
|
|
34
|
+
|
|
35
|
+
for (let i = 1; i <= a.length; i++) {
|
|
36
|
+
current[0] = i;
|
|
37
|
+
let rowMin = current[0];
|
|
38
|
+
|
|
39
|
+
for (let j = 1; j <= b.length; j++) {
|
|
40
|
+
const substitutionCost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
41
|
+
current[j] = Math.min(
|
|
42
|
+
previous[j] + 1,
|
|
43
|
+
current[j - 1] + 1,
|
|
44
|
+
previous[j - 1] + substitutionCost,
|
|
45
|
+
);
|
|
46
|
+
rowMin = Math.min(rowMin, current[j]);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (rowMin > maxDistance) return maxDistance + 1;
|
|
50
|
+
[previous, current] = [current, previous];
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return previous[b.length];
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function scoreCompactCandidate(queryCompact, candidateCompact, weights) {
|
|
57
|
+
if (!queryCompact || !candidateCompact) return 0;
|
|
58
|
+
if (candidateCompact === queryCompact) return weights.exact;
|
|
59
|
+
if (queryCompact.length < 3) return 0;
|
|
60
|
+
|
|
61
|
+
const lengthPenalty = Math.abs(candidateCompact.length - queryCompact.length);
|
|
62
|
+
const lengthRatio = Math.min(candidateCompact.length, queryCompact.length)
|
|
63
|
+
/ Math.max(candidateCompact.length, queryCompact.length);
|
|
64
|
+
|
|
65
|
+
if ((candidateCompact.startsWith(queryCompact) || queryCompact.startsWith(candidateCompact)) && lengthRatio >= 0.6) {
|
|
66
|
+
return Math.max(weights.prefix - lengthPenalty, 0);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if ((candidateCompact.includes(queryCompact) || queryCompact.includes(candidateCompact)) && lengthRatio >= 0.75) {
|
|
70
|
+
return Math.max(weights.contains - lengthPenalty, 0);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (queryCompact.length < 5) return 0;
|
|
74
|
+
|
|
75
|
+
const maxDistance = queryCompact.length <= 5 ? 1 : queryCompact.length <= 8 ? 2 : 3;
|
|
76
|
+
const distance = levenshteinDistance(queryCompact, candidateCompact, maxDistance);
|
|
77
|
+
if (distance > maxDistance) return 0;
|
|
78
|
+
|
|
79
|
+
return Math.max(weights.fuzzy - (distance * 20) - lengthPenalty, 0);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function scoreEntryLexicalVariant(entry, queryCompact) {
|
|
83
|
+
if (queryCompact.length < 2) return 0;
|
|
84
|
+
|
|
85
|
+
const nameCompact = compactIdentifier(entry.name);
|
|
86
|
+
const idCompact = compactIdentifier(entry.id);
|
|
87
|
+
const idSegments = splitCompactSegments(entry.id);
|
|
88
|
+
const nameSegments = splitCompactSegments(entry.name);
|
|
89
|
+
|
|
90
|
+
let best = 0;
|
|
91
|
+
|
|
92
|
+
best = Math.max(best, scoreCompactCandidate(queryCompact, nameCompact, {
|
|
93
|
+
exact: 620,
|
|
94
|
+
prefix: 560,
|
|
95
|
+
contains: 520,
|
|
96
|
+
fuzzy: 500,
|
|
97
|
+
}));
|
|
98
|
+
|
|
99
|
+
best = Math.max(best, scoreCompactCandidate(queryCompact, idCompact, {
|
|
100
|
+
exact: 600,
|
|
101
|
+
prefix: 540,
|
|
102
|
+
contains: 500,
|
|
103
|
+
fuzzy: 470,
|
|
104
|
+
}));
|
|
105
|
+
|
|
106
|
+
for (let idx = 0; idx < idSegments.length; idx++) {
|
|
107
|
+
const segment = idSegments[idx];
|
|
108
|
+
const segmentScore = scoreCompactCandidate(queryCompact, segment, {
|
|
109
|
+
exact: 580,
|
|
110
|
+
prefix: 530,
|
|
111
|
+
contains: 490,
|
|
112
|
+
fuzzy: 460,
|
|
113
|
+
});
|
|
114
|
+
if (segmentScore === 0) continue;
|
|
115
|
+
|
|
116
|
+
let bonus = 0;
|
|
117
|
+
const isFirst = idx === 0;
|
|
118
|
+
const isLast = idx === idSegments.length - 1;
|
|
119
|
+
if (isFirst) bonus += 10;
|
|
120
|
+
if (isLast) bonus += 10;
|
|
121
|
+
if (queryCompact === idSegments[0]) bonus += 60;
|
|
122
|
+
if (queryCompact === idSegments[idSegments.length - 1]) bonus += 25;
|
|
123
|
+
if (idSegments.length > 1 && queryCompact === idSegments[0] && queryCompact === idSegments[idSegments.length - 1]) {
|
|
124
|
+
bonus += 40;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
best = Math.max(best, segmentScore + bonus);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
for (const segment of nameSegments) {
|
|
131
|
+
best = Math.max(best, scoreCompactCandidate(queryCompact, segment, {
|
|
132
|
+
exact: 560,
|
|
133
|
+
prefix: 520,
|
|
134
|
+
contains: 480,
|
|
135
|
+
fuzzy: 450,
|
|
136
|
+
}));
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return best;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function scoreEntryLexicalBoost(entry, normalizedQuery, rescueTerms = []) {
|
|
143
|
+
const queryCompacts = [...new Set([
|
|
144
|
+
compactIdentifier(normalizedQuery),
|
|
145
|
+
...rescueTerms.map((term) => compactIdentifier(term)),
|
|
146
|
+
])].filter((queryCompact) => queryCompact.length >= 2);
|
|
147
|
+
|
|
148
|
+
let best = 0;
|
|
149
|
+
for (const queryCompact of queryCompacts) {
|
|
150
|
+
best = Math.max(best, scoreEntryLexicalVariant(entry, queryCompact));
|
|
151
|
+
}
|
|
152
|
+
return best;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function getMissingQueryTerms(normalizedQuery) {
|
|
156
|
+
if (!_searchIndex?.invertedIndex) {
|
|
157
|
+
return [];
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return tokenize(normalizedQuery).filter((term) => !_searchIndex.invertedIndex[term]?.length);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function shouldRunGlobalLexicalScan(normalizedQuery, resultByKey) {
|
|
164
|
+
if (!_searchIndex || resultByKey.size === 0) {
|
|
165
|
+
return true;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
if (!_searchIndex.invertedIndex) {
|
|
169
|
+
return false;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const queryTerms = tokenize(normalizedQuery);
|
|
173
|
+
if (queryTerms.length < 2) {
|
|
174
|
+
return false;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return getMissingQueryTerms(normalizedQuery).length > 0;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function namespaceSearchIndex(index, sourceName) {
|
|
181
|
+
return {
|
|
182
|
+
...index,
|
|
183
|
+
documents: (index.documents || []).map((doc) => ({
|
|
184
|
+
...doc,
|
|
185
|
+
id: getSearchLookupId(sourceName, doc.id),
|
|
186
|
+
})),
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
|
|
9
190
|
/**
|
|
10
191
|
* Load and merge entries from all configured sources.
|
|
11
192
|
* Returns { docs: [...], skills: [...] } with each entry tagged with _source/_sourceObj.
|
|
@@ -24,7 +205,7 @@ function getMerged() {
|
|
|
24
205
|
|
|
25
206
|
// Load BM25 search index if available
|
|
26
207
|
const idx = loadSearchIndex(source);
|
|
27
|
-
if (idx) searchIndexes.push(idx);
|
|
208
|
+
if (idx) searchIndexes.push(namespaceSearchIndex(idx, source.name));
|
|
28
209
|
|
|
29
210
|
// Support both new format (docs/skills) and old format (entries)
|
|
30
211
|
if (registry.docs) {
|
|
@@ -56,47 +237,13 @@ function getMerged() {
|
|
|
56
237
|
// Merge search indexes (combine documents and recompute IDF)
|
|
57
238
|
if (searchIndexes.length > 0) {
|
|
58
239
|
if (searchIndexes.length === 1) {
|
|
59
|
-
|
|
240
|
+
const [singleIndex] = searchIndexes;
|
|
241
|
+
_searchIndex = singleIndex.invertedIndex
|
|
242
|
+
? singleIndex
|
|
243
|
+
: buildIndexFromDocuments(singleIndex.documents, singleIndex.params);
|
|
60
244
|
} else {
|
|
61
|
-
// Merge multiple indexes: combine documents, recompute global IDF
|
|
62
245
|
const allDocuments = searchIndexes.flatMap((idx) => idx.documents);
|
|
63
|
-
|
|
64
|
-
const dfMap = {};
|
|
65
|
-
const fieldLengths = { name: [], description: [], tags: [] };
|
|
66
|
-
|
|
67
|
-
for (const doc of allDocuments) {
|
|
68
|
-
const allTerms = new Set([
|
|
69
|
-
...(doc.tokens.name || []),
|
|
70
|
-
...(doc.tokens.description || []),
|
|
71
|
-
...(doc.tokens.tags || []),
|
|
72
|
-
]);
|
|
73
|
-
for (const term of allTerms) {
|
|
74
|
-
dfMap[term] = (dfMap[term] || 0) + 1;
|
|
75
|
-
}
|
|
76
|
-
fieldLengths.name.push((doc.tokens.name || []).length);
|
|
77
|
-
fieldLengths.description.push((doc.tokens.description || []).length);
|
|
78
|
-
fieldLengths.tags.push((doc.tokens.tags || []).length);
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
const idf = {};
|
|
82
|
-
for (const [term, df] of Object.entries(dfMap)) {
|
|
83
|
-
idf[term] = Math.log((N - df + 0.5) / (df + 0.5) + 1);
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
const avg = (arr) => arr.length === 0 ? 0 : arr.reduce((a, b) => a + b, 0) / arr.length;
|
|
87
|
-
_searchIndex = {
|
|
88
|
-
version: '1.0.0',
|
|
89
|
-
algorithm: 'bm25',
|
|
90
|
-
params: searchIndexes[0].params,
|
|
91
|
-
totalDocs: N,
|
|
92
|
-
avgFieldLengths: {
|
|
93
|
-
name: avg(fieldLengths.name),
|
|
94
|
-
description: avg(fieldLengths.description),
|
|
95
|
-
tags: avg(fieldLengths.tags),
|
|
96
|
-
},
|
|
97
|
-
idf,
|
|
98
|
-
documents: allDocuments,
|
|
99
|
-
};
|
|
246
|
+
_searchIndex = buildIndexFromDocuments(allDocuments, searchIndexes[0].params);
|
|
100
247
|
}
|
|
101
248
|
}
|
|
102
249
|
|
|
@@ -178,6 +325,7 @@ export function getDisplayId(entry) {
|
|
|
178
325
|
* Uses BM25 when a search index is available, falls back to keyword matching.
|
|
179
326
|
*/
|
|
180
327
|
export function searchEntries(query, filters = {}) {
|
|
328
|
+
const normalizedQuery = normalizeQuery(query);
|
|
181
329
|
const entries = applySourceFilter(getAllEntries());
|
|
182
330
|
|
|
183
331
|
// Deduplicate: same id+source appearing as both doc and skill → show once
|
|
@@ -194,26 +342,29 @@ export function searchEntries(query, filters = {}) {
|
|
|
194
342
|
// Build entry lookup by id
|
|
195
343
|
const entryById = new Map();
|
|
196
344
|
for (const entry of deduped) {
|
|
197
|
-
entryById.set(entry.id, entry);
|
|
345
|
+
entryById.set(getSearchLookupId(entry._source, entry.id), entry);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
if (!normalizedQuery) {
|
|
349
|
+
return applyFilters(deduped, filters).map((entry) => ({ ...entry, _score: 0 }));
|
|
198
350
|
}
|
|
199
351
|
|
|
200
|
-
|
|
352
|
+
const resultByKey = new Map();
|
|
201
353
|
|
|
202
354
|
if (_searchIndex) {
|
|
203
355
|
// BM25 search
|
|
204
|
-
const
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
.filter(Boolean);
|
|
356
|
+
for (const match of bm25Search(normalizedQuery, _searchIndex)) {
|
|
357
|
+
const entry = entryById.get(match.id);
|
|
358
|
+
if (!entry) continue;
|
|
359
|
+
const key = getSearchLookupId(entry._source, entry.id);
|
|
360
|
+
resultByKey.set(key, { entry, score: match.score });
|
|
361
|
+
}
|
|
211
362
|
} else {
|
|
212
363
|
// Fallback: keyword matching
|
|
213
|
-
const q =
|
|
364
|
+
const q = normalizedQuery.toLowerCase();
|
|
214
365
|
const words = q.split(/\s+/);
|
|
215
366
|
|
|
216
|
-
|
|
367
|
+
for (const entry of deduped) {
|
|
217
368
|
let score = 0;
|
|
218
369
|
|
|
219
370
|
if (entry.id === q) score += 100;
|
|
@@ -230,12 +381,35 @@ export function searchEntries(query, filters = {}) {
|
|
|
230
381
|
if (entry.tags?.some((t) => t.toLowerCase().includes(word))) score += 15;
|
|
231
382
|
}
|
|
232
383
|
|
|
233
|
-
|
|
234
|
-
|
|
384
|
+
if (score > 0) {
|
|
385
|
+
const key = getSearchLookupId(entry._source, entry.id);
|
|
386
|
+
resultByKey.set(key, { entry, score });
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
}
|
|
235
390
|
|
|
236
|
-
|
|
391
|
+
const lexicalCandidates = !shouldRunGlobalLexicalScan(normalizedQuery, resultByKey)
|
|
392
|
+
? [...new Set([...resultByKey.values()].map(({ entry }) => entry))]
|
|
393
|
+
: deduped;
|
|
394
|
+
const rescueTerms = resultByKey.size > 0
|
|
395
|
+
? getMissingQueryTerms(normalizedQuery).filter((term) => term.length >= 5)
|
|
396
|
+
: [];
|
|
397
|
+
|
|
398
|
+
for (const entry of lexicalCandidates) {
|
|
399
|
+
const boost = scoreEntryLexicalBoost(entry, normalizedQuery, rescueTerms);
|
|
400
|
+
if (boost === 0) continue;
|
|
401
|
+
|
|
402
|
+
const key = getSearchLookupId(entry._source, entry.id);
|
|
403
|
+
const current = resultByKey.get(key);
|
|
404
|
+
if (current) {
|
|
405
|
+
current.score += boost;
|
|
406
|
+
} else {
|
|
407
|
+
resultByKey.set(key, { entry, score: boost });
|
|
408
|
+
}
|
|
237
409
|
}
|
|
238
410
|
|
|
411
|
+
let results = [...resultByKey.values()];
|
|
412
|
+
|
|
239
413
|
const filtered = applyFilters(results.map((r) => r.entry), filters);
|
|
240
414
|
const filteredSet = new Set(filtered);
|
|
241
415
|
results = results.filter((r) => filteredSet.has(r.entry));
|
|
@@ -249,6 +423,7 @@ export function searchEntries(query, filters = {}) {
|
|
|
249
423
|
* type: "doc" or "skill". If null, searches both.
|
|
250
424
|
*/
|
|
251
425
|
export function getEntry(idOrNamespacedId, type = null) {
|
|
426
|
+
const normalizedId = normalizeQuery(idOrNamespacedId);
|
|
252
427
|
const { docs, skills } = getMerged();
|
|
253
428
|
let pool;
|
|
254
429
|
if (type === 'doc') pool = applySourceFilter(docs);
|
|
@@ -256,16 +431,16 @@ export function getEntry(idOrNamespacedId, type = null) {
|
|
|
256
431
|
else pool = applySourceFilter([...docs, ...skills]);
|
|
257
432
|
|
|
258
433
|
// Check for source:id format (colon separates source from id)
|
|
259
|
-
if (
|
|
260
|
-
const colonIdx =
|
|
261
|
-
const sourceName =
|
|
262
|
-
const id =
|
|
434
|
+
if (normalizedId.includes(':')) {
|
|
435
|
+
const colonIdx = normalizedId.indexOf(':');
|
|
436
|
+
const sourceName = normalizedId.slice(0, colonIdx);
|
|
437
|
+
const id = normalizedId.slice(colonIdx + 1);
|
|
263
438
|
const entry = pool.find((e) => e._source === sourceName && e.id === id);
|
|
264
439
|
return entry ? { entry, ambiguous: false } : { entry: null, ambiguous: false };
|
|
265
440
|
}
|
|
266
441
|
|
|
267
442
|
// Bare id (may contain slashes like author/name)
|
|
268
|
-
const matches = pool.filter((e) => e.id ===
|
|
443
|
+
const matches = pool.filter((e) => e.id === normalizedId);
|
|
269
444
|
if (matches.length === 0) return { entry: null, ambiguous: false };
|
|
270
445
|
if (matches.length === 1) return { entry: matches[0], ambiguous: false };
|
|
271
446
|
|
|
@@ -317,9 +492,7 @@ export function resolveDocPath(entry, language, version) {
|
|
|
317
492
|
let langObj = null;
|
|
318
493
|
if (lang) {
|
|
319
494
|
langObj = entry.languages.find((l) => l.language === lang);
|
|
320
|
-
} else
|
|
321
|
-
langObj = entry.languages[0];
|
|
322
|
-
} else if (entry.languages.length > 1) {
|
|
495
|
+
} else {
|
|
323
496
|
return {
|
|
324
497
|
needsLanguage: true,
|
|
325
498
|
available: entry.languages.map((l) => l.language),
|
package/src/lib/telemetry.js
CHANGED
|
@@ -8,6 +8,12 @@ export function isTelemetryEnabled() {
|
|
|
8
8
|
return config.telemetry !== false;
|
|
9
9
|
}
|
|
10
10
|
|
|
11
|
+
export function isFeedbackEnabled() {
|
|
12
|
+
if (process.env.CHUB_FEEDBACK === '0' || process.env.CHUB_FEEDBACK === 'false') return false;
|
|
13
|
+
const config = loadConfig();
|
|
14
|
+
return config.feedback !== false;
|
|
15
|
+
}
|
|
16
|
+
|
|
11
17
|
export function getTelemetryUrl() {
|
|
12
18
|
const url = process.env.CHUB_TELEMETRY_URL;
|
|
13
19
|
if (url) return url;
|
|
@@ -33,7 +39,7 @@ export function getTelemetryUrl() {
|
|
|
33
39
|
* @param {string} [opts.source] - Registry source name
|
|
34
40
|
*/
|
|
35
41
|
export async function sendFeedback(entryId, entryType, rating, opts = {}) {
|
|
36
|
-
if (!
|
|
42
|
+
if (!isFeedbackEnabled()) return { status: 'skipped', reason: 'feedback_disabled' };
|
|
37
43
|
|
|
38
44
|
const { getOrCreateClientId, detectAgent, detectAgentVersion } = await import('./identity.js');
|
|
39
45
|
const clientId = await getOrCreateClientId();
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { existsSync, writeFileSync, mkdirSync } from 'node:fs';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
import chalk from 'chalk';
|
|
4
|
+
import { getChubDir } from './config.js';
|
|
5
|
+
|
|
6
|
+
const WELCOME_MARKER = '.welcome_shown';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Show the first-run welcome notice if it hasn't been shown yet.
|
|
10
|
+
* Creates a marker file so it only displays once.
|
|
11
|
+
*/
|
|
12
|
+
export function showWelcomeIfNeeded(opts = {}) {
|
|
13
|
+
if (opts.json) return;
|
|
14
|
+
if (!process.stdout.isTTY || !process.stderr.isTTY) return;
|
|
15
|
+
|
|
16
|
+
const chubDir = getChubDir();
|
|
17
|
+
const markerPath = join(chubDir, WELCOME_MARKER);
|
|
18
|
+
const configPath = join(chubDir, 'config.yaml');
|
|
19
|
+
|
|
20
|
+
if (existsSync(markerPath)) return;
|
|
21
|
+
|
|
22
|
+
// Print to stderr so it doesn't interfere with JSON/piped output
|
|
23
|
+
console.error(`
|
|
24
|
+
${chalk.bold('Welcome to Context Hub (chub)!')} Chub helps your AI coding agents make API calls correctly, by providing \
|
|
25
|
+
the latest documentation.
|
|
26
|
+
|
|
27
|
+
By using chub, you agree to the Terms of Service at ${chalk.underline('https://www.aichub.org/tos.html')}
|
|
28
|
+
|
|
29
|
+
Chub asks agents to provide feedback on documentation, and this feedback is used to improve docs for the developer \
|
|
30
|
+
community. If you wish to disable this feedback, add ${chalk.bold('"feedback: false"')} to ${chalk.bold(configPath)}. See \
|
|
31
|
+
${chalk.underline('https://github.com/andrewyng/context-hub')} for details.
|
|
32
|
+
`);
|
|
33
|
+
|
|
34
|
+
try {
|
|
35
|
+
if (!existsSync(chubDir)) {
|
|
36
|
+
mkdirSync(chubDir, { recursive: true });
|
|
37
|
+
}
|
|
38
|
+
writeFileSync(markerPath, new Date().toISOString(), 'utf8');
|
|
39
|
+
} catch {
|
|
40
|
+
// Best-effort — don't block CLI if marker can't be written
|
|
41
|
+
}
|
|
42
|
+
}
|