chub-dev 0.2.0-beta.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.js CHANGED
@@ -1,4 +1,3 @@
1
- import chalk from 'chalk';
2
1
  import { Command } from 'commander';
3
2
  import { readFileSync } from 'node:fs';
4
3
  import { fileURLToPath } from 'node:url';
@@ -11,67 +10,15 @@ import { registerGetCommand } from './commands/get.js';
11
10
  import { registerBuildCommand } from './commands/build.js';
12
11
  import { registerFeedbackCommand } from './commands/feedback.js';
13
12
  import { registerAnnotateCommand } from './commands/annotate.js';
14
- import { trackEvent, shutdownAnalytics } from './lib/analytics.js';
13
+ import { registerHelpCommand } from './commands/help.js';
14
+ import { trackEvent, shutdownAnalytics, setCliVersion } from './lib/analytics.js';
15
+ import { error } from './lib/output.js';
16
+ import { showWelcomeIfNeeded } from './lib/welcome.js';
17
+ import { getLocalHelpText } from './lib/help.js';
15
18
 
16
19
  const __dirname = dirname(fileURLToPath(import.meta.url));
17
20
  const pkg = JSON.parse(readFileSync(join(__dirname, '..', 'package.json'), 'utf8'));
18
-
19
- function printUsage() {
20
- console.log(`
21
- ${chalk.bold('chub')} — Context Hub CLI v${pkg.version}
22
- Search and retrieve LLM-optimized docs and skills.
23
-
24
- ${chalk.bold.underline('Getting Started')}
25
-
26
- ${chalk.dim('$')} chub update ${chalk.dim('# download the registry')}
27
- ${chalk.dim('$')} chub search ${chalk.dim('# list everything available')}
28
- ${chalk.dim('$')} chub search "stripe" ${chalk.dim('# fuzzy search')}
29
- ${chalk.dim('$')} chub search stripe/payments ${chalk.dim('# exact id → full detail')}
30
- ${chalk.dim('$')} chub get stripe/api ${chalk.dim('# print doc to terminal')}
31
- ${chalk.dim('$')} chub get stripe/api -o doc.md ${chalk.dim('# save to file')}
32
- ${chalk.dim('$')} chub get openai/chat --lang py ${chalk.dim('# specific language')}
33
- ${chalk.dim('$')} chub get pw-community/login-flows ${chalk.dim('# fetch a skill')}
34
- ${chalk.dim('$')} chub get openai/chat stripe/api ${chalk.dim('# fetch multiple')}
35
-
36
- ${chalk.bold.underline('Commands')}
37
-
38
- ${chalk.bold('search')} [query] Search docs and skills (no query = list all)
39
- ${chalk.bold('get')} <ids...> Fetch docs or skills by ID
40
- ${chalk.bold('update')} Refresh the cached registry
41
- ${chalk.bold('cache')} status|clear Manage the local cache
42
- ${chalk.bold('build')} <content-dir> Build registry from content directory
43
-
44
- ${chalk.bold.underline('Flags')}
45
-
46
- --json Structured JSON output (for agents and piping)
47
- --tags <csv> Filter by tags (e.g. docs, skill, openai, browser)
48
- --lang <language> Language variant (js, py, ts)
49
- --full Fetch all files, not just the entry point
50
- -o, --output <path> Write content to file or directory
51
-
52
- ${chalk.bold.underline('Agent Piping Patterns')}
53
-
54
- ${chalk.dim('# Get the top result id')}
55
- ${chalk.dim('$')} chub search "stripe" --json | jq -r '.results[0].id'
56
-
57
- ${chalk.dim('# Search → pick → fetch → save')}
58
- ${chalk.dim('$')} ID=$(chub search "stripe" --json | jq -r '.results[0].id')
59
- ${chalk.dim('$')} chub get "$ID" --lang js -o .context/stripe.md
60
-
61
- ${chalk.dim('# Fetch multiple at once')}
62
- ${chalk.dim('$')} chub get openai/chat stripe/api -o .context/
63
-
64
- ${chalk.bold.underline('Multi-Source Config')} ${chalk.dim('(~/.chub/config.yaml)')}
65
-
66
- ${chalk.dim('sources:')}
67
- ${chalk.dim(' - name: community')}
68
- ${chalk.dim(' url: https://cdn.aichub.org/v1')}
69
- ${chalk.dim(' - name: internal')}
70
- ${chalk.dim(' path: /path/to/local/docs')}
71
-
72
- ${chalk.dim('# On id collision, use source: prefix: chub get internal:openai/chat')}
73
- `);
74
- }
21
+ setCliVersion(pkg.version);
75
22
 
76
23
  const program = new Command();
77
24
 
@@ -79,19 +26,38 @@ program
79
26
  .name('chub')
80
27
  .description('Context Hub - search and retrieve LLM-optimized docs and skills')
81
28
  .version(pkg.version, '-V, --cli-version')
29
+ .addHelpCommand(false)
82
30
  .option('--json', 'Output as JSON (machine-readable)')
83
31
  .action(() => {
84
- printUsage();
32
+ console.log(getLocalHelpText(pkg.version));
85
33
  });
86
34
 
87
35
  // Commands that don't need registry
88
36
  const SKIP_REGISTRY = ['update', 'cache', 'build', 'feedback', 'annotate', 'help'];
89
37
 
90
38
  program.hook('preAction', async (thisCommand) => {
39
+ const globalOpts = thisCommand.optsWithGlobals?.() || {};
40
+ showWelcomeIfNeeded(globalOpts);
41
+
91
42
  const cmdName = thisCommand.args?.[0] || thisCommand.name();
92
- // Track command usage (fire-and-forget, never blocks)
93
43
  if (cmdName !== 'chub') {
94
- trackEvent('command_run', { command: cmdName }).catch(() => {});
44
+ // Only initialize identity and track if telemetry is enabled
45
+ // Respects CHUB_TELEMETRY=0 — no client_id file created, no events sent
46
+ try {
47
+ const { isTelemetryEnabled } = await import('./lib/telemetry.js');
48
+ if (isTelemetryEnabled()) {
49
+ const { getOrCreateClientId, isFirstRun } = await import('./lib/identity.js');
50
+ await getOrCreateClientId();
51
+
52
+ // Fire-and-forget — don't block command on PostHog network I/O
53
+ trackEvent('command_run', { command: cmdName }).catch(() => {});
54
+ if (isFirstRun()) {
55
+ trackEvent('first_run', { command: cmdName }).catch(() => {});
56
+ }
57
+ }
58
+ } catch {
59
+ // Identity/telemetry failure — silently skip, don't block the command
60
+ }
95
61
  }
96
62
  if (SKIP_REGISTRY.includes(cmdName)) return;
97
63
  if (thisCommand.parent?.name() === 'cache') return;
@@ -100,9 +66,8 @@ program.hook('preAction', async (thisCommand) => {
100
66
  try {
101
67
  await ensureRegistry();
102
68
  } catch (err) {
103
- process.stderr.write(`Warning: Could not load registry: ${err.message}\n`);
104
- process.stderr.write(`Run \`chub update\` to initialize.\n`);
105
- process.exit(1);
69
+ await trackEvent('command_error', { command: cmdName, error_type: 'registry_unavailable' });
70
+ error(`Registry not available: ${err.message}. Run \`chub update\` to refresh remote registries, or check that local source paths in ~/.chub/config.yaml are correct.`, globalOpts);
106
71
  }
107
72
  });
108
73
 
@@ -113,6 +78,7 @@ registerGetCommand(program);
113
78
  registerBuildCommand(program);
114
79
  registerFeedbackCommand(program);
115
80
  registerAnnotateCommand(program);
81
+ registerHelpCommand(program, pkg.version);
116
82
 
117
83
  program.parse();
118
84
 
@@ -4,13 +4,14 @@
4
4
  * Tracks: command usage, search patterns, doc/skill popularity, errors.
5
5
  * Does NOT track feedback ratings (those go to the custom API via telemetry.js).
6
6
  *
7
- * Respects the same telemetry opt-out: `telemetry: false` in config or CHUB_TELEMETRY=0.
7
+ * Respects telemetry opt-out: `telemetry: false` in config or CHUB_TELEMETRY=0.
8
+ * Feedback has a separate opt-out: `feedback: false` in config or CHUB_FEEDBACK=0.
8
9
  */
9
10
 
10
11
  import { isTelemetryEnabled } from './telemetry.js';
11
12
 
12
13
  // PostHog project API key (public — standard for client-side analytics)
13
- const POSTHOG_KEY = 'phc_cUPXY1tAUkIOU9perzGcFYEtFQeCgUhUO6ejT79YLIk';
14
+ const POSTHOG_KEY = 'phc_tO9mXIgcCuBccfN2Ut0quf6UFsd06u3Y6g1kqMaYdQX';
14
15
  const POSTHOG_HOST = 'https://us.i.posthog.com';
15
16
 
16
17
  let _posthog = null;
@@ -65,6 +66,7 @@ export async function trackEvent(event, properties = {}) {
65
66
  ...properties,
66
67
  platform: process.platform,
67
68
  node_version: process.version,
69
+ cli_version: _cliVersion || undefined,
68
70
  },
69
71
  });
70
72
 
@@ -75,6 +77,15 @@ export async function trackEvent(event, properties = {}) {
75
77
  }
76
78
  }
77
79
 
80
+ let _cliVersion;
81
+ /**
82
+ * Set the CLI version for inclusion in all events.
83
+ * Called once from index.js at startup.
84
+ */
85
+ export function setCliVersion(version) {
86
+ _cliVersion = version;
87
+ }
88
+
78
89
  /**
79
90
  * Shut down the PostHog client gracefully.
80
91
  * Call this before process exit if possible.
package/src/lib/bm25.js CHANGED
@@ -20,11 +20,32 @@ const DEFAULT_B = 0.75;
20
20
 
21
21
  // Field weights for multi-field scoring
22
22
  const FIELD_WEIGHTS = {
23
+ id: 4.0,
23
24
  name: 3.0,
24
25
  tags: 2.0,
25
26
  description: 1.0,
26
27
  };
27
28
 
29
+ function getDefaultParams() {
30
+ return { k1: DEFAULT_K1, b: DEFAULT_B };
31
+ }
32
+
33
+ function isSearchableToken(token) {
34
+ return (token.length > 1 || /^\d+$/.test(token)) && !STOP_WORDS.has(token);
35
+ }
36
+
37
+ export function compactIdentifier(text) {
38
+ return String(text || '')
39
+ .toLowerCase()
40
+ .replace(/[^a-z0-9]/g, '');
41
+ }
42
+
43
+ function splitAlphaNumeric(text) {
44
+ return text
45
+ .replace(/([a-z])(\d)/g, '$1 $2')
46
+ .replace(/(\d)([a-z])/g, '$1 $2');
47
+ }
48
+
28
49
  /**
29
50
  * Tokenize text into lowercase terms with stop word removal.
30
51
  * Must be used identically at build time and search time.
@@ -35,73 +56,139 @@ export function tokenize(text) {
35
56
  .toLowerCase()
36
57
  .replace(/[^a-z0-9\s-]/g, ' ')
37
58
  .split(/[\s-]+/)
38
- .filter((t) => t.length > 1 && !STOP_WORDS.has(t));
59
+ .filter(isSearchableToken);
39
60
  }
40
61
 
41
62
  /**
42
- * Build a BM25 search index from registry entries.
43
- * Called during `chub build`.
44
- *
45
- * @param {Array} entries - Combined docs and skills from registry
46
- * @returns {Object} The search index
63
+ * Tokenize identifiers more aggressively than free text so package ids
64
+ * still match joined/split variants like "nodefetch" and "auth 0".
47
65
  */
48
- export function buildIndex(entries) {
49
- const documents = [];
50
- const dfMap = {}; // document frequency per term (across all fields)
51
- const fieldLengths = { name: [], description: [], tags: [] };
66
+ export function tokenizeIdentifier(text) {
67
+ if (!text) return [];
52
68
 
53
- for (const entry of entries) {
54
- const nameTokens = tokenize(entry.name);
55
- const descTokens = tokenize(entry.description || '');
56
- const tagTokens = (entry.tags || []).flatMap((t) => tokenize(t));
69
+ const tokens = new Set(tokenize(text));
70
+ const raw = String(text);
71
+ const compact = compactIdentifier(raw);
72
+ const segments = new Set([
73
+ ...raw.split('/').map((segment) => compactIdentifier(segment)),
74
+ ...raw.split(/[\/_.\s-]+/).map((segment) => compactIdentifier(segment)),
75
+ ]);
57
76
 
58
- documents.push({
59
- id: entry.id,
60
- tokens: {
61
- name: nameTokens,
62
- description: descTokens,
63
- tags: tagTokens,
64
- },
65
- });
77
+ if (isSearchableToken(compact)) {
78
+ tokens.add(compact);
79
+ }
80
+
81
+ for (const token of tokenize(splitAlphaNumeric(compact))) {
82
+ tokens.add(token);
83
+ }
84
+
85
+ for (const segment of segments) {
86
+ if (!segment) continue;
87
+ if (isSearchableToken(segment)) {
88
+ tokens.add(segment);
89
+ }
90
+ for (const token of tokenize(splitAlphaNumeric(segment))) {
91
+ tokens.add(token);
92
+ }
93
+ }
94
+
95
+ return [...tokens];
96
+ }
97
+
98
+ function buildInvertedIndex(documents) {
99
+ const invertedIndex = Object.create(null);
100
+
101
+ for (const [docIndex, doc] of documents.entries()) {
102
+ const allTerms = new Set([
103
+ ...(doc.tokens.id || []),
104
+ ...(doc.tokens.name || []),
105
+ ...(doc.tokens.description || []),
106
+ ...(doc.tokens.tags || []),
107
+ ]);
108
+
109
+ for (const term of allTerms) {
110
+ if (!invertedIndex[term]) invertedIndex[term] = [];
111
+ invertedIndex[term].push(docIndex);
112
+ }
113
+ }
114
+
115
+ return invertedIndex;
116
+ }
117
+
118
+ export function buildIndexFromDocuments(documents, params = getDefaultParams()) {
119
+ const dfMap = Object.create(null); // document frequency per term (across all fields)
120
+ const fieldLengths = { id: [], name: [], description: [], tags: [] };
121
+
122
+ for (const doc of documents) {
123
+ const idTokens = doc.tokens.id || [];
124
+ const nameTokens = doc.tokens.name || [];
125
+ const descTokens = doc.tokens.description || [];
126
+ const tagTokens = doc.tokens.tags || [];
66
127
 
128
+ fieldLengths.id.push(idTokens.length);
67
129
  fieldLengths.name.push(nameTokens.length);
68
130
  fieldLengths.description.push(descTokens.length);
69
131
  fieldLengths.tags.push(tagTokens.length);
70
132
 
71
- // Count document frequency — a term counts once per document (union of all fields)
72
- const allTerms = new Set([...nameTokens, ...descTokens, ...tagTokens]);
133
+ const allTerms = new Set([...idTokens, ...nameTokens, ...descTokens, ...tagTokens]);
73
134
  for (const term of allTerms) {
74
135
  dfMap[term] = (dfMap[term] || 0) + 1;
75
136
  }
76
137
  }
77
138
 
78
139
  const N = documents.length;
79
-
80
- // Compute IDF for each term
81
- const idf = {};
140
+ const idf = Object.create(null);
82
141
  for (const [term, df] of Object.entries(dfMap)) {
83
142
  idf[term] = Math.log((N - df + 0.5) / (df + 0.5) + 1);
84
143
  }
85
144
 
86
- // Compute average field lengths
87
145
  const avg = (arr) => arr.length === 0 ? 0 : arr.reduce((a, b) => a + b, 0) / arr.length;
88
- const avgFieldLengths = {
89
- name: avg(fieldLengths.name),
90
- description: avg(fieldLengths.description),
91
- tags: avg(fieldLengths.tags),
92
- };
93
-
94
146
  return {
95
147
  version: '1.0.0',
96
148
  algorithm: 'bm25',
97
- params: { k1: DEFAULT_K1, b: DEFAULT_B },
149
+ params,
98
150
  totalDocs: N,
99
- avgFieldLengths,
151
+ avgFieldLengths: {
152
+ id: avg(fieldLengths.id),
153
+ name: avg(fieldLengths.name),
154
+ description: avg(fieldLengths.description),
155
+ tags: avg(fieldLengths.tags),
156
+ },
100
157
  idf,
101
158
  documents,
159
+ invertedIndex: buildInvertedIndex(documents),
102
160
  };
103
161
  }
104
162
 
163
+ /**
164
+ * Build a BM25 search index from registry entries.
165
+ * Called during `chub build`.
166
+ *
167
+ * @param {Array} entries - Combined docs and skills from registry
168
+ * @returns {Object} The search index
169
+ */
170
+ export function buildIndex(entries) {
171
+ const documents = [];
172
+
173
+ for (const entry of entries) {
174
+ const idTokens = tokenizeIdentifier(entry.id);
175
+ const nameTokens = tokenize(entry.name);
176
+ const descTokens = tokenize(entry.description || '');
177
+ const tagTokens = (entry.tags || []).flatMap((t) => tokenize(t));
178
+
179
+ documents.push({
180
+ id: entry.id,
181
+ tokens: {
182
+ id: idTokens,
183
+ name: nameTokens,
184
+ description: descTokens,
185
+ tags: tagTokens,
186
+ },
187
+ });
188
+ }
189
+ return buildIndexFromDocuments(documents);
190
+ }
191
+
105
192
  /**
106
193
  * Compute BM25 score for a single field.
107
194
  */
@@ -109,7 +196,7 @@ function scoreField(queryTerms, fieldTokens, idf, avgFieldLen, k1, b) {
109
196
  if (fieldTokens.length === 0) return 0;
110
197
 
111
198
  // Build term frequency map for this field
112
- const tf = {};
199
+ const tf = Object.create(null);
113
200
  for (const t of fieldTokens) {
114
201
  tf[t] = (tf[t] || 0) + 1;
115
202
  }
@@ -130,22 +217,46 @@ function scoreField(queryTerms, fieldTokens, idf, avgFieldLen, k1, b) {
130
217
  return score;
131
218
  }
132
219
 
133
- /**
134
- * Search the BM25 index with a query string.
135
- *
136
- * @param {string} query - The search query
137
- * @param {Object} index - The pre-built BM25 index
138
- * @param {Object} opts - Options: { limit }
139
- * @returns {Array} Sorted results: [{ id, score }]
140
- */
141
- export function search(query, index, opts = {}) {
220
+ function getCandidateDocIndexes(queryTerms, index) {
221
+ if (!index.invertedIndex) {
222
+ return index.documents.map((_, docIndex) => docIndex);
223
+ }
224
+
225
+ const candidateIndexes = new Set();
226
+ for (const term of new Set(queryTerms)) {
227
+ const postings = index.invertedIndex[term];
228
+ if (!postings) continue;
229
+ for (const docIndex of postings) {
230
+ candidateIndexes.add(docIndex);
231
+ }
232
+ }
233
+
234
+ return [...candidateIndexes];
235
+ }
236
+
237
+ function runSearch(query, index, opts = {}) {
142
238
  const queryTerms = tokenize(query);
143
- if (queryTerms.length === 0) return [];
239
+ const totalDocs = index.documents.length;
240
+
241
+ if (queryTerms.length === 0) {
242
+ return {
243
+ results: [],
244
+ stats: {
245
+ totalDocs,
246
+ candidateDocCount: 0,
247
+ scoredDocCount: 0,
248
+ matchedDocCount: 0,
249
+ usedInvertedIndex: !!index.invertedIndex,
250
+ },
251
+ };
252
+ }
144
253
 
145
254
  const { k1, b } = index.params;
146
255
  const results = [];
256
+ const candidateDocIndexes = getCandidateDocIndexes(queryTerms, index);
147
257
 
148
- for (const doc of index.documents) {
258
+ for (const docIndex of candidateDocIndexes) {
259
+ const doc = index.documents[docIndex];
149
260
  let totalScore = 0;
150
261
 
151
262
  for (const [field, weight] of Object.entries(FIELD_WEIGHTS)) {
@@ -161,10 +272,32 @@ export function search(query, index, opts = {}) {
161
272
  }
162
273
 
163
274
  results.sort((a, b) => b.score - a.score);
275
+ const limitedResults = opts.limit ? results.slice(0, opts.limit) : results;
164
276
 
165
- if (opts.limit) {
166
- return results.slice(0, opts.limit);
167
- }
277
+ return {
278
+ results: limitedResults,
279
+ stats: {
280
+ totalDocs,
281
+ candidateDocCount: candidateDocIndexes.length,
282
+ scoredDocCount: candidateDocIndexes.length,
283
+ matchedDocCount: results.length,
284
+ usedInvertedIndex: !!index.invertedIndex,
285
+ },
286
+ };
287
+ }
288
+
289
+ /**
290
+ * Search the BM25 index with a query string.
291
+ *
292
+ * @param {string} query - The search query
293
+ * @param {Object} index - The pre-built BM25 index
294
+ * @param {Object} opts - Options: { limit }
295
+ * @returns {Array} Sorted results: [{ id, score }]
296
+ */
297
+ export function search(query, index, opts = {}) {
298
+ return runSearch(query, index, opts).results;
299
+ }
168
300
 
169
- return results;
301
+ export function searchWithStats(query, index, opts = {}) {
302
+ return runSearch(query, index, opts);
170
303
  }
package/src/lib/cache.js CHANGED
@@ -31,6 +31,10 @@ function getSourceRegistryPath(sourceName) {
31
31
  return join(getSourceDir(sourceName), 'registry.json');
32
32
  }
33
33
 
34
+ function getSourceSearchIndexPath(sourceName) {
35
+ return join(getSourceDir(sourceName), 'search-index.json');
36
+ }
37
+
34
38
  function readMeta(sourceName) {
35
39
  try {
36
40
  return JSON.parse(readFileSync(getSourceMetaPath(sourceName), 'utf8'));
@@ -47,38 +51,99 @@ function writeMeta(sourceName, meta) {
47
51
 
48
52
  function isSourceCacheFresh(sourceName) {
49
53
  const meta = readMeta(sourceName);
50
- if (!meta.lastUpdated) return false;
54
+ if (!meta.lastUpdated && meta.lastUpdated !== 0) return false;
51
55
  const config = loadConfig();
52
56
  const age = (Date.now() - meta.lastUpdated) / 1000;
53
57
  return age < config.refresh_interval;
54
58
  }
55
59
 
56
- /**
57
- * Fetch registry for a single remote source.
58
- */
59
- async function fetchRemoteRegistry(source, force = false) {
60
- if (!force && isSourceCacheFresh(source.name) && existsSync(getSourceRegistryPath(source.name))) {
61
- return;
60
+ function isTimestampFresh(timestamp) {
61
+ if (timestamp === undefined || timestamp === null) return false;
62
+ const config = loadConfig();
63
+ const age = (Date.now() - timestamp) / 1000;
64
+ return age < config.refresh_interval;
65
+ }
66
+
67
+ function hasFreshSearchIndexState(sourceName) {
68
+ if (existsSync(getSourceSearchIndexPath(sourceName))) {
69
+ return true;
62
70
  }
63
71
 
64
- const url = `${source.url}/registry.json`;
72
+ const meta = readMeta(sourceName);
73
+ return meta.searchIndexAvailable === false && isTimestampFresh(meta.searchIndexCheckedAt);
74
+ }
75
+
76
+ function shouldFetchRemoteRegistry(sourceName, force = false) {
77
+ if (force) return true;
78
+ return !(
79
+ isSourceCacheFresh(sourceName)
80
+ && existsSync(getSourceRegistryPath(sourceName))
81
+ && hasFreshSearchIndexState(sourceName)
82
+ );
83
+ }
84
+
85
+ async function fetchRemoteText(url) {
65
86
  const controller = new AbortController();
66
87
  const timeout = setTimeout(() => controller.abort(), 30000);
67
- let res;
68
88
  try {
69
- res = await fetch(url, { signal: controller.signal });
89
+ const res = await fetch(url, { signal: controller.signal });
90
+ if (!res.ok) {
91
+ throw new Error(`${res.status} ${res.statusText}`);
92
+ }
93
+ return await res.text();
70
94
  } finally {
71
95
  clearTimeout(timeout);
72
96
  }
73
- if (!res.ok) {
74
- throw new Error(`Failed to fetch registry from ${source.name}: ${res.status} ${res.statusText}`);
97
+ }
98
+
99
+ /**
100
+ * Fetch registry for a single remote source.
101
+ */
102
+ async function fetchRemoteRegistry(source, force = false) {
103
+ if (!shouldFetchRemoteRegistry(source.name, force)) {
104
+ return;
105
+ }
106
+
107
+ const registryUrl = `${source.url}/registry.json`;
108
+ let registryText;
109
+ try {
110
+ registryText = await fetchRemoteText(registryUrl);
111
+ } catch (err) {
112
+ throw new Error(`Failed to fetch registry from ${source.name}: ${err.message}`);
75
113
  }
76
114
 
77
- const data = await res.text();
78
115
  const dir = getSourceDir(source.name);
79
116
  mkdirSync(dir, { recursive: true });
80
- writeFileSync(getSourceRegistryPath(source.name), data);
81
- writeMeta(source.name, { ...readMeta(source.name), lastUpdated: Date.now() });
117
+ writeFileSync(getSourceRegistryPath(source.name), registryText);
118
+
119
+ const searchIndexUrl = `${source.url}/search-index.json`;
120
+ const searchIndexCheckedAt = Date.now();
121
+ let searchIndexAvailable;
122
+ try {
123
+ const searchIndexText = await fetchRemoteText(searchIndexUrl);
124
+ writeFileSync(getSourceSearchIndexPath(source.name), searchIndexText);
125
+ searchIndexAvailable = true;
126
+ } catch (err) {
127
+ // Avoid serving a stale local search index after a registry refresh.
128
+ rmSync(getSourceSearchIndexPath(source.name), { force: true });
129
+ if (err.message?.startsWith('404 ')) {
130
+ searchIndexAvailable = false;
131
+ }
132
+ }
133
+
134
+ const nextMeta = {
135
+ ...readMeta(source.name),
136
+ lastUpdated: Date.now(),
137
+ };
138
+ delete nextMeta.searchIndexAvailable;
139
+ delete nextMeta.searchIndexCheckedAt;
140
+
141
+ if (searchIndexAvailable !== undefined) {
142
+ nextMeta.searchIndexAvailable = searchIndexAvailable;
143
+ nextMeta.searchIndexCheckedAt = searchIndexCheckedAt;
144
+ }
145
+
146
+ writeMeta(source.name, nextMeta);
82
147
  }
83
148
 
84
149
  /**
@@ -141,6 +206,14 @@ export async function fetchFullBundle(sourceName) {
141
206
  writeFileSync(getSourceRegistryPath(sourceName), regData);
142
207
  }
143
208
 
209
+ const extractedSearchIndex = join(dataDir, 'search-index.json');
210
+ if (existsSync(extractedSearchIndex)) {
211
+ const searchIndexData = readFileSync(extractedSearchIndex, 'utf8');
212
+ writeFileSync(getSourceSearchIndexPath(sourceName), searchIndexData);
213
+ } else {
214
+ rmSync(getSourceSearchIndexPath(sourceName), { force: true });
215
+ }
216
+
144
217
  writeMeta(sourceName, { ...readMeta(sourceName), lastUpdated: Date.now(), fullBundle: true });
145
218
  rmSync(tmpPath, { force: true });
146
219
  }
@@ -187,7 +260,7 @@ export async function fetchDoc(source, docPath) {
187
260
  const content = await res.text();
188
261
 
189
262
  // Cache locally
190
- const dir = cachedPath.substring(0, cachedPath.lastIndexOf('/'));
263
+ const dir = dirname(cachedPath);
191
264
  mkdirSync(dir, { recursive: true });
192
265
  writeFileSync(cachedPath, content);
193
266
 
@@ -327,7 +400,7 @@ export async function ensureRegistry() {
327
400
  // Auto-refresh stale remote registries (best-effort)
328
401
  for (const source of config.sources) {
329
402
  if (source.path) continue;
330
- if (!isSourceCacheFresh(source.name)) {
403
+ if (shouldFetchRemoteRegistry(source.name)) {
331
404
  try { await fetchRemoteRegistry(source); } catch { /* use stale */ }
332
405
  }
333
406
  }
@@ -341,6 +414,10 @@ export async function ensureRegistry() {
341
414
  const defaultDir = getSourceDir('default');
342
415
  mkdirSync(defaultDir, { recursive: true });
343
416
  writeFileSync(getSourceRegistryPath('default'), readFileSync(bundledRegistry, 'utf8'));
417
+ const bundledSearchIndex = join(getBundledDir(), 'search-index.json');
418
+ if (existsSync(bundledSearchIndex)) {
419
+ writeFileSync(getSourceSearchIndexPath('default'), readFileSync(bundledSearchIndex, 'utf8'));
420
+ }
344
421
  writeMeta('default', { lastUpdated: 0, bundledSeed: true }); // lastUpdated=0 → stale, so chub update will refresh
345
422
  return;
346
423
  }