chub-dev 0.1.0 → 0.1.2-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/README.md +55 -0
  2. package/bin/chub-mcp +2 -0
  3. package/dist/airtable/docs/database/javascript/DOC.md +1437 -0
  4. package/dist/airtable/docs/database/python/DOC.md +1735 -0
  5. package/dist/amplitude/docs/analytics/javascript/DOC.md +1282 -0
  6. package/dist/amplitude/docs/analytics/python/DOC.md +1199 -0
  7. package/dist/anthropic/docs/claude-api/javascript/DOC.md +503 -0
  8. package/dist/anthropic/docs/claude-api/python/DOC.md +389 -0
  9. package/dist/asana/docs/tasks/DOC.md +1396 -0
  10. package/dist/assemblyai/docs/transcription/DOC.md +1043 -0
  11. package/dist/atlassian/docs/confluence/javascript/DOC.md +1347 -0
  12. package/dist/atlassian/docs/confluence/python/DOC.md +1604 -0
  13. package/dist/auth0/docs/identity/javascript/DOC.md +968 -0
  14. package/dist/auth0/docs/identity/python/DOC.md +1199 -0
  15. package/dist/aws/docs/s3/javascript/DOC.md +1773 -0
  16. package/dist/aws/docs/s3/python/DOC.md +1807 -0
  17. package/dist/binance/docs/trading/javascript/DOC.md +1315 -0
  18. package/dist/binance/docs/trading/python/DOC.md +1454 -0
  19. package/dist/braintree/docs/gateway/javascript/DOC.md +1278 -0
  20. package/dist/braintree/docs/gateway/python/DOC.md +1179 -0
  21. package/dist/chromadb/docs/embeddings-db/javascript/DOC.md +1263 -0
  22. package/dist/chromadb/docs/embeddings-db/python/DOC.md +1707 -0
  23. package/dist/clerk/docs/auth/javascript/DOC.md +1220 -0
  24. package/dist/clerk/docs/auth/python/DOC.md +274 -0
  25. package/dist/cloudflare/docs/workers/javascript/DOC.md +918 -0
  26. package/dist/cloudflare/docs/workers/python/DOC.md +994 -0
  27. package/dist/cockroachdb/docs/distributed-db/DOC.md +1500 -0
  28. package/dist/cohere/docs/llm/DOC.md +1335 -0
  29. package/dist/datadog/docs/monitoring/javascript/DOC.md +1740 -0
  30. package/dist/datadog/docs/monitoring/python/DOC.md +1815 -0
  31. package/dist/deepgram/docs/speech/javascript/DOC.md +885 -0
  32. package/dist/deepgram/docs/speech/python/DOC.md +685 -0
  33. package/dist/deepl/docs/translation/javascript/DOC.md +887 -0
  34. package/dist/deepl/docs/translation/python/DOC.md +944 -0
  35. package/dist/deepseek/docs/llm/DOC.md +1220 -0
  36. package/dist/directus/docs/headless-cms/javascript/DOC.md +1128 -0
  37. package/dist/directus/docs/headless-cms/python/DOC.md +1276 -0
  38. package/dist/discord/docs/bot/javascript/DOC.md +1090 -0
  39. package/dist/discord/docs/bot/python/DOC.md +1130 -0
  40. package/dist/elasticsearch/docs/search/DOC.md +1634 -0
  41. package/dist/elevenlabs/docs/text-to-speech/javascript/DOC.md +336 -0
  42. package/dist/elevenlabs/docs/text-to-speech/python/DOC.md +552 -0
  43. package/dist/firebase/docs/auth/DOC.md +1015 -0
  44. package/dist/gemini/docs/genai/javascript/DOC.md +691 -0
  45. package/dist/gemini/docs/genai/python/DOC.md +555 -0
  46. package/dist/github/docs/octokit/DOC.md +1560 -0
  47. package/dist/google/docs/bigquery/javascript/DOC.md +1688 -0
  48. package/dist/google/docs/bigquery/python/DOC.md +1503 -0
  49. package/dist/hubspot/docs/crm/javascript/DOC.md +1805 -0
  50. package/dist/hubspot/docs/crm/python/DOC.md +2033 -0
  51. package/dist/huggingface/docs/transformers/DOC.md +948 -0
  52. package/dist/intercom/docs/messaging/javascript/DOC.md +1844 -0
  53. package/dist/intercom/docs/messaging/python/DOC.md +1797 -0
  54. package/dist/jira/docs/issues/javascript/DOC.md +1420 -0
  55. package/dist/jira/docs/issues/python/DOC.md +1492 -0
  56. package/dist/kafka/docs/streaming/javascript/DOC.md +1671 -0
  57. package/dist/kafka/docs/streaming/python/DOC.md +1464 -0
  58. package/dist/landingai-ade/docs/api/DOC.md +620 -0
  59. package/dist/landingai-ade/docs/sdk/python/DOC.md +489 -0
  60. package/dist/landingai-ade/docs/sdk/typescript/DOC.md +542 -0
  61. package/dist/landingai-ade/skills/SKILL.md +489 -0
  62. package/dist/launchdarkly/docs/feature-flags/javascript/DOC.md +1191 -0
  63. package/dist/launchdarkly/docs/feature-flags/python/DOC.md +1671 -0
  64. package/dist/linear/docs/tracker/DOC.md +1554 -0
  65. package/dist/livekit/docs/realtime/javascript/DOC.md +303 -0
  66. package/dist/livekit/docs/realtime/python/DOC.md +163 -0
  67. package/dist/mailchimp/docs/marketing/DOC.md +1420 -0
  68. package/dist/meilisearch/docs/search/DOC.md +1241 -0
  69. package/dist/microsoft/docs/onedrive/javascript/DOC.md +1421 -0
  70. package/dist/microsoft/docs/onedrive/python/DOC.md +1549 -0
  71. package/dist/mongodb/docs/atlas/DOC.md +2041 -0
  72. package/dist/notion/docs/workspace-api/javascript/DOC.md +1435 -0
  73. package/dist/notion/docs/workspace-api/python/DOC.md +1400 -0
  74. package/dist/okta/docs/identity/javascript/DOC.md +1171 -0
  75. package/dist/okta/docs/identity/python/DOC.md +1401 -0
  76. package/dist/openai/docs/chat/javascript/DOC.md +407 -0
  77. package/dist/openai/docs/chat/python/DOC.md +568 -0
  78. package/dist/paypal/docs/checkout/DOC.md +278 -0
  79. package/dist/pinecone/docs/sdk/javascript/DOC.md +984 -0
  80. package/dist/pinecone/docs/sdk/python/DOC.md +1395 -0
  81. package/dist/plaid/docs/banking/javascript/DOC.md +1163 -0
  82. package/dist/plaid/docs/banking/python/DOC.md +1203 -0
  83. package/dist/playwright-community/skills/login-flows/SKILL.md +108 -0
  84. package/dist/postmark/docs/transactional-email/DOC.md +1168 -0
  85. package/dist/prisma/docs/orm/javascript/DOC.md +1419 -0
  86. package/dist/prisma/docs/orm/python/DOC.md +1317 -0
  87. package/dist/qdrant/docs/vector-search/javascript/DOC.md +1221 -0
  88. package/dist/qdrant/docs/vector-search/python/DOC.md +1653 -0
  89. package/dist/rabbitmq/docs/message-queue/javascript/DOC.md +1193 -0
  90. package/dist/rabbitmq/docs/message-queue/python/DOC.md +1243 -0
  91. package/dist/razorpay/docs/payments/javascript/DOC.md +1219 -0
  92. package/dist/razorpay/docs/payments/python/DOC.md +1330 -0
  93. package/dist/redis/docs/key-value/javascript/DOC.md +1851 -0
  94. package/dist/redis/docs/key-value/python/DOC.md +2054 -0
  95. package/dist/registry.json +2817 -0
  96. package/dist/replicate/docs/model-hosting/DOC.md +1318 -0
  97. package/dist/resend/docs/email/DOC.md +1271 -0
  98. package/dist/salesforce/docs/crm/javascript/DOC.md +1241 -0
  99. package/dist/salesforce/docs/crm/python/DOC.md +1183 -0
  100. package/dist/search-index.json +1 -0
  101. package/dist/sendgrid/docs/email-api/javascript/DOC.md +371 -0
  102. package/dist/sendgrid/docs/email-api/python/DOC.md +656 -0
  103. package/dist/sentry/docs/error-tracking/javascript/DOC.md +1073 -0
  104. package/dist/sentry/docs/error-tracking/python/DOC.md +1309 -0
  105. package/dist/shopify/docs/storefront/DOC.md +457 -0
  106. package/dist/slack/docs/workspace/javascript/DOC.md +933 -0
  107. package/dist/slack/docs/workspace/python/DOC.md +271 -0
  108. package/dist/square/docs/payments/javascript/DOC.md +1855 -0
  109. package/dist/square/docs/payments/python/DOC.md +1728 -0
  110. package/dist/stripe/docs/api/DOC.md +1727 -0
  111. package/dist/stripe/docs/payments/DOC.md +1726 -0
  112. package/dist/stytch/docs/auth/javascript/DOC.md +1813 -0
  113. package/dist/stytch/docs/auth/python/DOC.md +1962 -0
  114. package/dist/supabase/docs/client/DOC.md +1606 -0
  115. package/dist/twilio/docs/messaging/python/DOC.md +469 -0
  116. package/dist/twilio/docs/messaging/typescript/DOC.md +946 -0
  117. package/dist/vercel/docs/platform/DOC.md +1940 -0
  118. package/dist/weaviate/docs/vector-db/javascript/DOC.md +1268 -0
  119. package/dist/weaviate/docs/vector-db/python/DOC.md +1388 -0
  120. package/dist/zendesk/docs/support/javascript/DOC.md +2150 -0
  121. package/dist/zendesk/docs/support/python/DOC.md +2297 -0
  122. package/package.json +22 -6
  123. package/skills/get-api-docs/SKILL.md +84 -0
  124. package/src/commands/annotate.js +83 -0
  125. package/src/commands/build.js +12 -1
  126. package/src/commands/feedback.js +150 -0
  127. package/src/commands/get.js +83 -42
  128. package/src/commands/search.js +7 -0
  129. package/src/index.js +43 -17
  130. package/src/lib/analytics.js +90 -0
  131. package/src/lib/annotations.js +57 -0
  132. package/src/lib/bm25.js +170 -0
  133. package/src/lib/cache.js +69 -6
  134. package/src/lib/config.js +8 -3
  135. package/src/lib/identity.js +99 -0
  136. package/src/lib/registry.js +103 -20
  137. package/src/lib/telemetry.js +86 -0
  138. package/src/mcp/server.js +177 -0
  139. package/src/mcp/tools.js +251 -0
package/src/index.js CHANGED
@@ -9,6 +9,10 @@ import { registerCacheCommand } from './commands/cache.js';
9
9
  import { registerSearchCommand } from './commands/search.js';
10
10
  import { registerGetCommand } from './commands/get.js';
11
11
  import { registerBuildCommand } from './commands/build.js';
12
+ import { registerFeedbackCommand } from './commands/feedback.js';
13
+ import { registerAnnotateCommand } from './commands/annotate.js';
14
+ import { trackEvent, shutdownAnalytics } from './lib/analytics.js';
15
+ import { error } from './lib/output.js';
12
16
 
13
17
  const __dirname = dirname(fileURLToPath(import.meta.url));
14
18
  const pkg = JSON.parse(readFileSync(join(__dirname, '..', 'package.json'), 'utf8'));
@@ -24,17 +28,31 @@ ${chalk.bold.underline('Getting Started')}
24
28
  ${chalk.dim('$')} chub search ${chalk.dim('# list everything available')}
25
29
  ${chalk.dim('$')} chub search "stripe" ${chalk.dim('# fuzzy search')}
26
30
  ${chalk.dim('$')} chub search stripe/payments ${chalk.dim('# exact id → full detail')}
27
- ${chalk.dim('$')} chub get docs stripe/payments ${chalk.dim('# print doc to terminal')}
28
- ${chalk.dim('$')} chub get docs stripe/payments -o doc.md ${chalk.dim('# save to file')}
29
- ${chalk.dim('$')} chub get docs stripe/payments --lang py ${chalk.dim('# specific language')}
30
- ${chalk.dim('$')} chub get skills pw/login-flows ${chalk.dim('# fetch a skill')}
31
- ${chalk.dim('$')} chub get docs openai/chat stripe/payments ${chalk.dim('# fetch multiple')}
31
+ ${chalk.dim('$')} chub get stripe/api ${chalk.dim('# print doc to terminal')}
32
+ ${chalk.dim('$')} chub get stripe/api -o doc.md ${chalk.dim('# save to file')}
33
+ ${chalk.dim('$')} chub get openai/chat --lang py ${chalk.dim('# specific language')}
34
+ ${chalk.dim('$')} chub get pw-community/login-flows ${chalk.dim('# fetch a skill')}
35
+ ${chalk.dim('$')} chub get openai/chat stripe/api ${chalk.dim('# fetch multiple')}
36
+
37
+ ${chalk.bold.underline('Learn & Improve')}
38
+
39
+ After using a doc, save what you learned so future sessions start smarter:
40
+
41
+ ${chalk.dim('$')} chub annotate stripe/api "Webhook needs raw body" ${chalk.dim('# persists across sessions')}
42
+ ${chalk.dim('$')} chub annotate --list ${chalk.dim('# see all saved notes')}
43
+ ${chalk.dim('$')} chub annotate stripe/api --clear ${chalk.dim('# remove a note')}
44
+
45
+ Rate docs so authors can improve them (ask the user before sending):
46
+
47
+ ${chalk.dim('$')} chub feedback stripe/api up ${chalk.dim('# worked well')}
48
+ ${chalk.dim('$')} chub feedback stripe/api down --label outdated ${chalk.dim('# needs updating')}
32
49
 
33
50
  ${chalk.bold.underline('Commands')}
34
51
 
35
52
  ${chalk.bold('search')} [query] Search docs and skills (no query = list all)
36
- ${chalk.bold('get docs')} <ids...> Fetch documentation content
37
- ${chalk.bold('get skills')} <ids...> Fetch skill content
53
+ ${chalk.bold('get')} <ids...> Fetch docs or skills by ID
54
+ ${chalk.bold('annotate')} [id] [note] Save a note — appears on future fetches
55
+ ${chalk.bold('feedback')} <id> <up|down> Rate a doc (helps authors improve it)
38
56
  ${chalk.bold('update')} Refresh the cached registry
39
57
  ${chalk.bold('cache')} status|clear Manage the local cache
40
58
  ${chalk.bold('build')} <content-dir> Build registry from content directory
@@ -54,20 +72,20 @@ ${chalk.bold.underline('Agent Piping Patterns')}
54
72
 
55
73
  ${chalk.dim('# Search → pick → fetch → save')}
56
74
  ${chalk.dim('$')} ID=$(chub search "stripe" --json | jq -r '.results[0].id')
57
- ${chalk.dim('$')} chub get docs "$ID" --lang js -o .context/stripe.md
75
+ ${chalk.dim('$')} chub get "$ID" --lang js -o .context/stripe.md
58
76
 
59
- ${chalk.dim('# Fetch multiple docs at once')}
60
- ${chalk.dim('$')} chub get docs openai/chat stripe/payments -o .context/
77
+ ${chalk.dim('# Fetch multiple at once')}
78
+ ${chalk.dim('$')} chub get openai/chat stripe/api -o .context/
61
79
 
62
80
  ${chalk.bold.underline('Multi-Source Config')} ${chalk.dim('(~/.chub/config.yaml)')}
63
81
 
64
82
  ${chalk.dim('sources:')}
65
83
  ${chalk.dim(' - name: community')}
66
- ${chalk.dim(' url: https://cdn.contexthub.dev/v1')}
84
+ ${chalk.dim(' url: https://cdn.aichub.org/v1')}
67
85
  ${chalk.dim(' - name: internal')}
68
86
  ${chalk.dim(' path: /path/to/local/docs')}
69
87
 
70
- ${chalk.dim('# On id collision, use source: prefix: chub get docs internal:openai/chat')}
88
+ ${chalk.dim('# On id collision, use source: prefix: chub get internal:openai/chat')}
71
89
  `);
72
90
  }
73
91
 
@@ -76,17 +94,21 @@ const program = new Command();
76
94
  program
77
95
  .name('chub')
78
96
  .description('Context Hub - search and retrieve LLM-optimized docs and skills')
79
- .version(pkg.version)
97
+ .version(pkg.version, '-V, --cli-version')
80
98
  .option('--json', 'Output as JSON (machine-readable)')
81
99
  .action(() => {
82
100
  printUsage();
83
101
  });
84
102
 
85
103
  // Commands that don't need registry
86
- const SKIP_REGISTRY = ['update', 'cache', 'build', 'help'];
104
+ const SKIP_REGISTRY = ['update', 'cache', 'build', 'feedback', 'annotate', 'help'];
87
105
 
88
106
  program.hook('preAction', async (thisCommand) => {
89
107
  const cmdName = thisCommand.args?.[0] || thisCommand.name();
108
+ // Track command usage (fire-and-forget, never blocks)
109
+ if (cmdName !== 'chub') {
110
+ trackEvent('command_run', { command: cmdName }).catch(() => {});
111
+ }
90
112
  if (SKIP_REGISTRY.includes(cmdName)) return;
91
113
  if (thisCommand.parent?.name() === 'cache') return;
92
114
  // Don't fetch registry for default action (no command)
@@ -94,9 +116,8 @@ program.hook('preAction', async (thisCommand) => {
94
116
  try {
95
117
  await ensureRegistry();
96
118
  } catch (err) {
97
- process.stderr.write(`Warning: Could not load registry: ${err.message}\n`);
98
- process.stderr.write(`Run \`chub update\` to initialize.\n`);
99
- process.exit(1);
119
+ const globalOpts = thisCommand.optsWithGlobals?.() || {};
120
+ error(`Registry not available: ${err.message}. Run \`chub update\` to refresh remote registries, or check that local source paths in ~/.chub/config.yaml are correct.`, globalOpts);
100
121
  }
101
122
  });
102
123
 
@@ -105,5 +126,10 @@ registerCacheCommand(program);
105
126
  registerSearchCommand(program);
106
127
  registerGetCommand(program);
107
128
  registerBuildCommand(program);
129
+ registerFeedbackCommand(program);
130
+ registerAnnotateCommand(program);
108
131
 
109
132
  program.parse();
133
+
134
+ // Flush analytics before exit (best-effort)
135
+ process.on('beforeExit', () => shutdownAnalytics().catch(() => {}));
@@ -0,0 +1,90 @@
1
+ /**
2
+ * PostHog Cloud analytics for general CLI usage tracking.
3
+ *
4
+ * Tracks: command usage, search patterns, doc/skill popularity, errors.
5
+ * Does NOT track feedback ratings (those go to the custom API via telemetry.js).
6
+ *
7
+ * Respects the same telemetry opt-out: `telemetry: false` in config or CHUB_TELEMETRY=0.
8
+ */
9
+
10
+ import { isTelemetryEnabled } from './telemetry.js';
11
+
12
+ // PostHog project API key (public — standard for client-side analytics)
13
+ const POSTHOG_KEY = 'phc_tO9mXIgcCuBccfN2Ut0quf6UFsd06u3Y6g1kqMaYdQX';
14
+ const POSTHOG_HOST = 'https://us.i.posthog.com';
15
+
16
+ let _posthog = null;
17
+ let _initFailed = false;
18
+
19
+ /**
20
+ * Lazily initialize PostHog client. Returns null if telemetry is disabled
21
+ * or posthog-node is not installed.
22
+ */
23
+ async function getClient() {
24
+ if (_initFailed) return null;
25
+ if (_posthog) return _posthog;
26
+
27
+ if (!isTelemetryEnabled()) {
28
+ _initFailed = true;
29
+ return null;
30
+ }
31
+
32
+ try {
33
+ const { PostHog } = await import('posthog-node');
34
+ _posthog = new PostHog(POSTHOG_KEY, {
35
+ host: POSTHOG_HOST,
36
+ flushAt: 1, // Send immediately (CLI is short-lived)
37
+ flushInterval: 0, // Don't batch
38
+ });
39
+ return _posthog;
40
+ } catch {
41
+ // posthog-node not installed — skip analytics silently
42
+ _initFailed = true;
43
+ return null;
44
+ }
45
+ }
46
+
47
+ /**
48
+ * Track an analytics event. Fire-and-forget — never throws, never blocks.
49
+ *
50
+ * @param {string} event - Event name (e.g., 'command_run', 'search', 'doc_fetched')
51
+ * @param {object} properties - Event properties
52
+ */
53
+ export async function trackEvent(event, properties = {}) {
54
+ try {
55
+ const client = await getClient();
56
+ if (!client) return;
57
+
58
+ const { getOrCreateClientId } = await import('./identity.js');
59
+ const distinctId = await getOrCreateClientId();
60
+
61
+ client.capture({
62
+ distinctId,
63
+ event,
64
+ properties: {
65
+ ...properties,
66
+ platform: process.platform,
67
+ node_version: process.version,
68
+ },
69
+ });
70
+
71
+ // Flush immediately since CLI process exits soon
72
+ await client.flush();
73
+ } catch {
74
+ // Silent fail — analytics should never disrupt CLI
75
+ }
76
+ }
77
+
78
+ /**
79
+ * Shut down the PostHog client gracefully.
80
+ * Call this before process exit if possible.
81
+ */
82
+ export async function shutdownAnalytics() {
83
+ if (_posthog) {
84
+ try {
85
+ await _posthog.shutdown();
86
+ } catch {
87
+ // Silent
88
+ }
89
+ }
90
+ }
@@ -0,0 +1,57 @@
1
+ import { readFileSync, writeFileSync, mkdirSync, unlinkSync, readdirSync } from 'node:fs';
2
+ import { join } from 'node:path';
3
+ import { getChubDir } from './config.js';
4
+
5
+ function getAnnotationsDir() {
6
+ return join(getChubDir(), 'annotations');
7
+ }
8
+
9
+ function annotationPath(entryId) {
10
+ const safe = entryId.replace(/\//g, '--');
11
+ return join(getAnnotationsDir(), `${safe}.json`);
12
+ }
13
+
14
+ export function readAnnotation(entryId) {
15
+ try {
16
+ return JSON.parse(readFileSync(annotationPath(entryId), 'utf8'));
17
+ } catch {
18
+ return null;
19
+ }
20
+ }
21
+
22
+ export function writeAnnotation(entryId, note) {
23
+ const dir = getAnnotationsDir();
24
+ mkdirSync(dir, { recursive: true });
25
+ const data = {
26
+ id: entryId,
27
+ note,
28
+ updatedAt: new Date().toISOString(),
29
+ };
30
+ writeFileSync(annotationPath(entryId), JSON.stringify(data, null, 2));
31
+ return data;
32
+ }
33
+
34
+ export function clearAnnotation(entryId) {
35
+ try {
36
+ unlinkSync(annotationPath(entryId));
37
+ return true;
38
+ } catch {
39
+ return false;
40
+ }
41
+ }
42
+
43
+ export function listAnnotations() {
44
+ const dir = getAnnotationsDir();
45
+ try {
46
+ const files = readdirSync(dir).filter((f) => f.endsWith('.json'));
47
+ return files.map((f) => {
48
+ try {
49
+ return JSON.parse(readFileSync(join(dir, f), 'utf8'));
50
+ } catch {
51
+ return null;
52
+ }
53
+ }).filter(Boolean);
54
+ } catch {
55
+ return [];
56
+ }
57
+ }
@@ -0,0 +1,170 @@
1
+ /**
2
+ * BM25 search implementation for Context Hub.
3
+ * Index is built at `chub build` time, scoring happens at search time.
4
+ * Tokenizer is shared between build and search to ensure consistency.
5
+ */
6
+
7
+ const STOP_WORDS = new Set([
8
+ 'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from',
9
+ 'has', 'have', 'in', 'is', 'it', 'its', 'of', 'on', 'or', 'that',
10
+ 'the', 'to', 'was', 'were', 'will', 'with', 'this', 'but', 'not',
11
+ 'you', 'your', 'can', 'do', 'does', 'how', 'if', 'may', 'no',
12
+ 'so', 'than', 'too', 'very', 'just', 'about', 'into', 'over',
13
+ 'such', 'then', 'them', 'these', 'those', 'through', 'under',
14
+ 'use', 'using', 'used',
15
+ ]);
16
+
17
+ // BM25 default parameters
18
+ const DEFAULT_K1 = 1.5;
19
+ const DEFAULT_B = 0.75;
20
+
21
+ // Field weights for multi-field scoring
22
+ const FIELD_WEIGHTS = {
23
+ name: 3.0,
24
+ tags: 2.0,
25
+ description: 1.0,
26
+ };
27
+
28
+ /**
29
+ * Tokenize text into lowercase terms with stop word removal.
30
+ * Must be used identically at build time and search time.
31
+ */
32
+ export function tokenize(text) {
33
+ if (!text) return [];
34
+ return text
35
+ .toLowerCase()
36
+ .replace(/[^a-z0-9\s-]/g, ' ')
37
+ .split(/[\s-]+/)
38
+ .filter((t) => t.length > 1 && !STOP_WORDS.has(t));
39
+ }
40
+
41
+ /**
42
+ * Build a BM25 search index from registry entries.
43
+ * Called during `chub build`.
44
+ *
45
+ * @param {Array} entries - Combined docs and skills from registry
46
+ * @returns {Object} The search index
47
+ */
48
+ export function buildIndex(entries) {
49
+ const documents = [];
50
+ const dfMap = {}; // document frequency per term (across all fields)
51
+ const fieldLengths = { name: [], description: [], tags: [] };
52
+
53
+ for (const entry of entries) {
54
+ const nameTokens = tokenize(entry.name);
55
+ const descTokens = tokenize(entry.description || '');
56
+ const tagTokens = (entry.tags || []).flatMap((t) => tokenize(t));
57
+
58
+ documents.push({
59
+ id: entry.id,
60
+ tokens: {
61
+ name: nameTokens,
62
+ description: descTokens,
63
+ tags: tagTokens,
64
+ },
65
+ });
66
+
67
+ fieldLengths.name.push(nameTokens.length);
68
+ fieldLengths.description.push(descTokens.length);
69
+ fieldLengths.tags.push(tagTokens.length);
70
+
71
+ // Count document frequency — a term counts once per document (union of all fields)
72
+ const allTerms = new Set([...nameTokens, ...descTokens, ...tagTokens]);
73
+ for (const term of allTerms) {
74
+ dfMap[term] = (dfMap[term] || 0) + 1;
75
+ }
76
+ }
77
+
78
+ const N = documents.length;
79
+
80
+ // Compute IDF for each term
81
+ const idf = {};
82
+ for (const [term, df] of Object.entries(dfMap)) {
83
+ idf[term] = Math.log((N - df + 0.5) / (df + 0.5) + 1);
84
+ }
85
+
86
+ // Compute average field lengths
87
+ const avg = (arr) => arr.length === 0 ? 0 : arr.reduce((a, b) => a + b, 0) / arr.length;
88
+ const avgFieldLengths = {
89
+ name: avg(fieldLengths.name),
90
+ description: avg(fieldLengths.description),
91
+ tags: avg(fieldLengths.tags),
92
+ };
93
+
94
+ return {
95
+ version: '1.0.0',
96
+ algorithm: 'bm25',
97
+ params: { k1: DEFAULT_K1, b: DEFAULT_B },
98
+ totalDocs: N,
99
+ avgFieldLengths,
100
+ idf,
101
+ documents,
102
+ };
103
+ }
104
+
105
+ /**
106
+ * Compute BM25 score for a single field.
107
+ */
108
+ function scoreField(queryTerms, fieldTokens, idf, avgFieldLen, k1, b) {
109
+ if (fieldTokens.length === 0) return 0;
110
+
111
+ // Build term frequency map for this field
112
+ const tf = {};
113
+ for (const t of fieldTokens) {
114
+ tf[t] = (tf[t] || 0) + 1;
115
+ }
116
+
117
+ let score = 0;
118
+ const dl = fieldTokens.length;
119
+
120
+ for (const term of queryTerms) {
121
+ const termFreq = tf[term] || 0;
122
+ if (termFreq === 0) continue;
123
+
124
+ const termIdf = idf[term] || 0;
125
+ const numerator = termFreq * (k1 + 1);
126
+ const denominator = termFreq + k1 * (1 - b + b * (dl / (avgFieldLen || 1)));
127
+ score += termIdf * (numerator / denominator);
128
+ }
129
+
130
+ return score;
131
+ }
132
+
133
+ /**
134
+ * Search the BM25 index with a query string.
135
+ *
136
+ * @param {string} query - The search query
137
+ * @param {Object} index - The pre-built BM25 index
138
+ * @param {Object} opts - Options: { limit }
139
+ * @returns {Array} Sorted results: [{ id, score }]
140
+ */
141
+ export function search(query, index, opts = {}) {
142
+ const queryTerms = tokenize(query);
143
+ if (queryTerms.length === 0) return [];
144
+
145
+ const { k1, b } = index.params;
146
+ const results = [];
147
+
148
+ for (const doc of index.documents) {
149
+ let totalScore = 0;
150
+
151
+ for (const [field, weight] of Object.entries(FIELD_WEIGHTS)) {
152
+ const fieldTokens = doc.tokens[field] || [];
153
+ const avgLen = index.avgFieldLengths[field] || 1;
154
+ const fieldScore = scoreField(queryTerms, fieldTokens, index.idf, avgLen, k1, b);
155
+ totalScore += fieldScore * weight;
156
+ }
157
+
158
+ if (totalScore > 0) {
159
+ results.push({ id: doc.id, score: totalScore });
160
+ }
161
+ }
162
+
163
+ results.sort((a, b) => b.score - a.score);
164
+
165
+ if (opts.limit) {
166
+ return results.slice(0, opts.limit);
167
+ }
168
+
169
+ return results;
170
+ }
package/src/lib/cache.js CHANGED
@@ -1,9 +1,20 @@
1
1
  import { existsSync, mkdirSync, readFileSync, writeFileSync, rmSync, readdirSync, statSync } from 'node:fs';
2
- import { join } from 'node:path';
2
+ import { join, dirname } from 'node:path';
3
3
  import { pipeline } from 'node:stream/promises';
4
4
  import { createWriteStream } from 'node:fs';
5
+ import { fileURLToPath } from 'node:url';
5
6
  import { getChubDir, loadConfig } from './config.js';
6
7
 
8
+ const __dirname = dirname(fileURLToPath(import.meta.url));
9
+
10
+ /**
11
+ * Path to bundled content shipped with the npm package.
12
+ * Contains registry.json + doc files built from content/ at publish time.
13
+ */
14
+ function getBundledDir() {
15
+ return join(__dirname, '..', '..', 'dist');
16
+ }
17
+
7
18
  function getSourceDir(sourceName) {
8
19
  return join(getChubDir(), 'sources', sourceName);
9
20
  }
@@ -51,7 +62,14 @@ async function fetchRemoteRegistry(source, force = false) {
51
62
  }
52
63
 
53
64
  const url = `${source.url}/registry.json`;
54
- const res = await fetch(url);
65
+ const controller = new AbortController();
66
+ const timeout = setTimeout(() => controller.abort(), 30000);
67
+ let res;
68
+ try {
69
+ res = await fetch(url, { signal: controller.signal });
70
+ } finally {
71
+ clearTimeout(timeout);
72
+ }
55
73
  if (!res.ok) {
56
74
  throw new Error(`Failed to fetch registry from ${source.name}: ${res.status} ${res.statusText}`);
57
75
  }
@@ -95,7 +113,14 @@ export async function fetchFullBundle(sourceName) {
95
113
  const url = `${source.url}/bundle.tar.gz`;
96
114
  const tmpPath = join(getSourceDir(sourceName), 'bundle.tar.gz');
97
115
 
98
- const res = await fetch(url);
116
+ const controller = new AbortController();
117
+ const timeout = setTimeout(() => controller.abort(), 30000);
118
+ let res;
119
+ try {
120
+ res = await fetch(url, { signal: controller.signal });
121
+ } finally {
122
+ clearTimeout(timeout);
123
+ }
99
124
  if (!res.ok) {
100
125
  throw new Error(`Failed to fetch bundle from ${sourceName}: ${res.status} ${res.statusText}`);
101
126
  }
@@ -139,9 +164,22 @@ export async function fetchDoc(source, docPath) {
139
164
  return readFileSync(cachedPath, 'utf8');
140
165
  }
141
166
 
142
- // Fetch from CDN
167
+ // Check bundled content (shipped with npm package)
168
+ const bundledPath = join(getBundledDir(), docPath);
169
+ if (existsSync(bundledPath)) {
170
+ return readFileSync(bundledPath, 'utf8');
171
+ }
172
+
173
+ // Fetch from CDN (optional — only if source has a URL)
143
174
  const url = `${source.url}/${docPath}`;
144
- const res = await fetch(url);
175
+ const controller = new AbortController();
176
+ const timeout = setTimeout(() => controller.abort(), 30000);
177
+ let res;
178
+ try {
179
+ res = await fetch(url, { signal: controller.signal });
180
+ } finally {
181
+ clearTimeout(timeout);
182
+ }
145
183
  if (!res.ok) {
146
184
  throw new Error(`Failed to fetch ${docPath} from ${source.name}: ${res.status} ${res.statusText}`);
147
185
  }
@@ -187,6 +225,20 @@ export function loadSourceRegistry(source) {
187
225
  return JSON.parse(readFileSync(regPath, 'utf8'));
188
226
  }
189
227
 
228
+ /**
229
+ * Load BM25 search index for a single source (if available).
230
+ */
231
+ export function loadSearchIndex(source) {
232
+ const basePath = source.path || getSourceDir(source.name);
233
+ const indexPath = join(basePath, 'search-index.json');
234
+ if (!existsSync(indexPath)) return null;
235
+ try {
236
+ return JSON.parse(readFileSync(indexPath, 'utf8'));
237
+ } catch {
238
+ return null;
239
+ }
240
+ }
241
+
190
242
  /**
191
243
  * Get cache stats.
192
244
  */
@@ -282,6 +334,17 @@ export async function ensureRegistry() {
282
334
  return;
283
335
  }
284
336
 
285
- // No registries at all — must download remote ones
337
+ // No registries at all — try bundled content first, then network
338
+ const bundledRegistry = join(getBundledDir(), 'registry.json');
339
+ if (existsSync(bundledRegistry)) {
340
+ // Seed cache from bundled content (ships with npm package)
341
+ const defaultDir = getSourceDir('default');
342
+ mkdirSync(defaultDir, { recursive: true });
343
+ writeFileSync(getSourceRegistryPath('default'), readFileSync(bundledRegistry, 'utf8'));
344
+ writeMeta('default', { lastUpdated: 0, bundledSeed: true }); // lastUpdated=0 → stale, so chub update will refresh
345
+ return;
346
+ }
347
+
348
+ // No bundled content either — must download from remote
286
349
  await fetchAllRegistries(true);
287
350
  }
package/src/lib/config.js CHANGED
@@ -3,19 +3,22 @@ import { join } from 'node:path';
3
3
  import { homedir } from 'node:os';
4
4
  import { parse as parseYaml } from 'yaml';
5
5
 
6
- const DEFAULT_CDN_URL = 'https://github.com/context-hub/context-hub/releases/latest/download';
6
+ const DEFAULT_CDN_URL = 'https://cdn.aichub.org/v1';
7
+ const DEFAULT_TELEMETRY_URL = 'https://api.aichub.org/v1';
7
8
 
8
9
  const DEFAULTS = {
9
10
  output_dir: '.context',
10
- refresh_interval: 86400,
11
+ refresh_interval: 21600,
11
12
  output_format: 'human',
12
13
  source: 'official,maintainer,community',
14
+ telemetry: true,
15
+ telemetry_url: DEFAULT_TELEMETRY_URL,
13
16
  };
14
17
 
15
18
  let _config = null;
16
19
 
17
20
  export function getChubDir() {
18
- return join(homedir(), '.chub');
21
+ return process.env.CHUB_DIR || join(homedir(), '.chub');
19
22
  }
20
23
 
21
24
  export function loadConfig() {
@@ -46,6 +49,8 @@ export function loadConfig() {
46
49
  refresh_interval: fileConfig.refresh_interval ?? DEFAULTS.refresh_interval,
47
50
  output_format: fileConfig.output_format || DEFAULTS.output_format,
48
51
  source: fileConfig.source || DEFAULTS.source,
52
+ telemetry: fileConfig.telemetry !== undefined ? fileConfig.telemetry : DEFAULTS.telemetry,
53
+ telemetry_url: fileConfig.telemetry_url || DEFAULTS.telemetry_url,
49
54
  };
50
55
 
51
56
  return _config;
@@ -0,0 +1,99 @@
1
+ import { createHash } from 'node:crypto';
2
+ import { execSync } from 'node:child_process';
3
+ import { platform } from 'node:os';
4
+ import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
5
+ import { join } from 'node:path';
6
+ import { getChubDir } from './config.js';
7
+
8
+ let _cachedClientId = null;
9
+
10
+ /**
11
+ * Get the platform-native machine UUID.
12
+ */
13
+ function getMachineUUID() {
14
+ const plat = platform();
15
+
16
+ if (plat === 'darwin') {
17
+ return execSync(
18
+ `ioreg -rd1 -c IOPlatformExpertDevice | awk -F'"' '/IOPlatformUUID/{print $4}'`,
19
+ { encoding: 'utf8' }
20
+ ).trim();
21
+ }
22
+
23
+ if (plat === 'linux') {
24
+ try {
25
+ return readFileSync('/etc/machine-id', 'utf8').trim();
26
+ } catch {
27
+ return readFileSync('/var/lib/dbus/machine-id', 'utf8').trim();
28
+ }
29
+ }
30
+
31
+ if (plat === 'win32') {
32
+ const output = execSync(
33
+ 'reg query "HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Cryptography" /v MachineGuid',
34
+ { encoding: 'utf8' }
35
+ );
36
+ const match = output.match(/MachineGuid\s+REG_SZ\s+(.+)/);
37
+ if (match) return match[1].trim();
38
+ throw new Error('Could not parse MachineGuid from registry');
39
+ }
40
+
41
+ throw new Error(`Unsupported platform: ${plat}`);
42
+ }
43
+
44
+ /**
45
+ * Get or create a stable, anonymous client ID.
46
+ * Checks ~/.chub/client_id for a cached 64-char hex string.
47
+ * If not found, hashes the machine UUID with SHA-256 and saves it.
48
+ */
49
+ export async function getOrCreateClientId() {
50
+ if (_cachedClientId) return _cachedClientId;
51
+
52
+ const chubDir = getChubDir();
53
+ const idPath = join(chubDir, 'client_id');
54
+
55
+ // Try to read existing client id
56
+ try {
57
+ const existing = readFileSync(idPath, 'utf8').trim();
58
+ if (/^[0-9a-f]{64}$/.test(existing)) {
59
+ _cachedClientId = existing;
60
+ return existing;
61
+ }
62
+ } catch {
63
+ // File doesn't exist or is unreadable
64
+ }
65
+
66
+ // Generate from machine UUID
67
+ const uuid = getMachineUUID();
68
+ const hash = createHash('sha256').update(uuid).digest('hex');
69
+
70
+ // Ensure directory exists
71
+ if (!existsSync(chubDir)) {
72
+ mkdirSync(chubDir, { recursive: true });
73
+ }
74
+
75
+ writeFileSync(idPath, hash, 'utf8');
76
+ _cachedClientId = hash;
77
+ return hash;
78
+ }
79
+
80
+ /**
81
+ * Auto-detect the AI coding tool from environment variables.
82
+ */
83
+ export function detectAgent() {
84
+ if (process.env.CLAUDE_CODE || process.env.CLAUDE_SESSION_ID) return 'claude-code';
85
+ if (process.env.CURSOR_SESSION_ID || process.env.CURSOR_TRACE_ID) return 'cursor';
86
+ if (process.env.CODEX_HOME || process.env.CODEX_SESSION) return 'codex';
87
+ if (process.env.WINDSURF_SESSION) return 'windsurf';
88
+ if (process.env.AIDER_MODEL || process.env.AIDER) return 'aider';
89
+ if (process.env.CLINE_SESSION) return 'cline';
90
+ if (process.env.GITHUB_COPILOT) return 'copilot';
91
+ return 'unknown';
92
+ }
93
+
94
+ /**
95
+ * Detect the version of the AI coding tool, if available.
96
+ */
97
+ export function detectAgentVersion() {
98
+ return process.env.CLAUDE_CODE_VERSION || process.env.CURSOR_VERSION || undefined;
99
+ }