@adityanair98/api-oracle 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +216 -0
  3. package/dist/cli.d.ts +11 -0
  4. package/dist/cli.js +74 -0
  5. package/dist/dashboard/public/app.js +1004 -0
  6. package/dist/dashboard/public/index.html +142 -0
  7. package/dist/dashboard/public/public/app.js +1004 -0
  8. package/dist/dashboard/public/public/index.html +142 -0
  9. package/dist/dashboard/public/public/styles.css +1464 -0
  10. package/dist/dashboard/public/styles.css +1464 -0
  11. package/dist/dashboard/routes/api.d.ts +7 -0
  12. package/dist/dashboard/routes/api.js +245 -0
  13. package/dist/dashboard/server.d.ts +9 -0
  14. package/dist/dashboard/server.js +45 -0
  15. package/dist/index.d.ts +5 -0
  16. package/dist/index.js +23 -0
  17. package/dist/knowledge/db.d.ts +22 -0
  18. package/dist/knowledge/db.js +182 -0
  19. package/dist/knowledge/schema.d.ts +275 -0
  20. package/dist/knowledge/schema.js +135 -0
  21. package/dist/knowledge/scorer.d.ts +63 -0
  22. package/dist/knowledge/scorer.js +314 -0
  23. package/dist/knowledge/search.d.ts +37 -0
  24. package/dist/knowledge/search.js +111 -0
  25. package/dist/knowledge/synonyms.d.ts +36 -0
  26. package/dist/knowledge/synonyms.js +523 -0
  27. package/dist/knowledge/tfidf.d.ts +42 -0
  28. package/dist/knowledge/tfidf.js +138 -0
  29. package/dist/server.d.ts +9 -0
  30. package/dist/server.js +40 -0
  31. package/dist/tools/check-freshness.d.ts +9 -0
  32. package/dist/tools/check-freshness.js +95 -0
  33. package/dist/tools/compare-apis.d.ts +8 -0
  34. package/dist/tools/compare-apis.js +149 -0
  35. package/dist/tools/find-api.d.ts +9 -0
  36. package/dist/tools/find-api.js +120 -0
  37. package/dist/tools/get-setup-guide.d.ts +8 -0
  38. package/dist/tools/get-setup-guide.js +127 -0
  39. package/dist/updater/linter.d.ts +31 -0
  40. package/dist/updater/linter.js +219 -0
  41. package/dist/updater/report.d.ts +29 -0
  42. package/dist/updater/report.js +96 -0
  43. package/dist/updater/staleness.d.ts +39 -0
  44. package/dist/updater/staleness.js +66 -0
  45. package/dist/updater/version-tracker.d.ts +28 -0
  46. package/dist/updater/version-tracker.js +50 -0
  47. package/dist/utils/config.d.ts +11 -0
  48. package/dist/utils/config.js +13 -0
  49. package/dist/utils/logger.d.ts +20 -0
  50. package/dist/utils/logger.js +32 -0
  51. package/package.json +56 -0
  52. package/src/entries/ai/anthropic.json +95 -0
  53. package/src/entries/ai/eleven-labs.json +90 -0
  54. package/src/entries/ai/openai.json +95 -0
  55. package/src/entries/ai/replicate.json +87 -0
  56. package/src/entries/ai/resemble-ai.json +88 -0
  57. package/src/entries/ai/stability-ai.json +89 -0
  58. package/src/entries/analytics/posthog.json +88 -0
  59. package/src/entries/analytics/sentry.json +84 -0
  60. package/src/entries/auth/auth0.json +90 -0
  61. package/src/entries/auth/clerk.json +95 -0
  62. package/src/entries/cms/contentful.json +92 -0
  63. package/src/entries/cms/sanity.json +92 -0
  64. package/src/entries/cms/strapi.json +93 -0
  65. package/src/entries/commerce/medusa.json +91 -0
  66. package/src/entries/commerce/shopify-api.json +91 -0
  67. package/src/entries/communication/sendbird.json +85 -0
  68. package/src/entries/communication/stream-chat.json +94 -0
  69. package/src/entries/database/firebase.json +88 -0
  70. package/src/entries/database/neon.json +94 -0
  71. package/src/entries/database/planetscale.json +95 -0
  72. package/src/entries/database/supabase.json +94 -0
  73. package/src/entries/database/upstash.json +94 -0
  74. package/src/entries/devops/fly-io.json +90 -0
  75. package/src/entries/devops/netlify.json +90 -0
  76. package/src/entries/devops/railway.json +90 -0
  77. package/src/entries/devops/vercel.json +90 -0
  78. package/src/entries/email/mailgun.json +91 -0
  79. package/src/entries/email/postmark.json +91 -0
  80. package/src/entries/email/resend.json +89 -0
  81. package/src/entries/email/sendgrid.json +90 -0
  82. package/src/entries/forms/formspark.json +85 -0
  83. package/src/entries/forms/typeform.json +98 -0
  84. package/src/entries/infrastructure/aws-s3.json +104 -0
  85. package/src/entries/infrastructure/cloudflare-r2.json +92 -0
  86. package/src/entries/infrastructure/cloudflare-workers.json +92 -0
  87. package/src/entries/infrastructure/digital-ocean-spaces.json +87 -0
  88. package/src/entries/integration/nango.json +90 -0
  89. package/src/entries/integration/zapier.json +92 -0
  90. package/src/entries/maps/google-maps.json +89 -0
  91. package/src/entries/maps/mapbox.json +87 -0
  92. package/src/entries/media/deepgram.json +84 -0
  93. package/src/entries/media/imgix.json +84 -0
  94. package/src/entries/media/mux.json +94 -0
  95. package/src/entries/messaging/ably.json +94 -0
  96. package/src/entries/messaging/pusher.json +94 -0
  97. package/src/entries/messaging/twilio.json +94 -0
  98. package/src/entries/messaging/vonage.json +89 -0
  99. package/src/entries/notifications/knock.json +84 -0
  100. package/src/entries/notifications/novu.json +84 -0
  101. package/src/entries/notifications/onesignal.json +84 -0
  102. package/src/entries/payments/lemonsqueezy.json +91 -0
  103. package/src/entries/payments/paddle.json +90 -0
  104. package/src/entries/payments/paypal.json +91 -0
  105. package/src/entries/payments/razorpay.json +85 -0
  106. package/src/entries/payments/square.json +91 -0
  107. package/src/entries/payments/stripe.json +96 -0
  108. package/src/entries/scheduling/cal-com.json +90 -0
  109. package/src/entries/scheduling/calendly.json +90 -0
  110. package/src/entries/search/algolia.json +96 -0
  111. package/src/entries/security/arcjet.json +89 -0
  112. package/src/entries/security/snyk.json +90 -0
  113. package/src/entries/storage/cloudinary.json +93 -0
  114. package/src/entries/storage/uploadthing.json +90 -0
  115. package/src/entries/testing/browserstack.json +86 -0
  116. package/src/entries/testing/checkly.json +89 -0
  117. package/src/entries/workflow/inngest.json +88 -0
  118. package/src/entries/workflow/temporal.json +90 -0
  119. package/src/entries/workflow/trigger-dev.json +89 -0
@@ -0,0 +1,314 @@
1
+ /**
2
+ * Scoring/ranking engine — ranks API entries by relevance to a task description.
3
+ * Uses weighted multi-factor scoring across 6 dimensions.
4
+ *
5
+ * Exports: SCORING_WEIGHTS, DEFAULT_WEIGHTS, ScoreWeights, scoreEntry, rankEntries,
6
+ * tokenize, scoreUseCaseFit, scoreDevExperience
7
+ */
8
+ /** Default weights — must sum to exactly 1.0 */
9
+ export const SCORING_WEIGHTS = {
10
+ useCaseFit: 0.35,
11
+ qualityScore: 0.20,
12
+ developerExperience: 0.15,
13
+ pricingFit: 0.15,
14
+ keywordRelevance: 0.10,
15
+ recencyBonus: 0.05,
16
+ };
17
+ /** Alias for backwards compatibility */
18
+ export const DEFAULT_WEIGHTS = SCORING_WEIGHTS;
19
+ // ─── Tokenizer ────────────────────────────────────────────────────────────────
20
+ // Stop words that add noise without semantic value in the scoring context
21
+ const STOP_WORDS = new Set([
22
+ "the", "and", "for", "are", "but", "not", "you", "all",
23
+ "can", "her", "was", "one", "our", "out", "had", "has",
24
+ "with", "this", "that", "from", "they", "have", "been",
25
+ "more", "will", "when", "what", "your", "which", "how",
26
+ "any", "use", "get", "let", "its", "also", "want", "need",
27
+ "build", "add", "make", "app", "just",
28
+ ]);
29
+ /**
30
+ * Normalize text to a set of lowercase tokens.
31
+ * Allows 2-char tokens (important for "ai", "db", "ml", "go").
32
+ */
33
+ export function tokenize(text) {
34
+ const tokens = text
35
+ .toLowerCase()
36
+ .replace(/[^a-z0-9\s]/g, " ")
37
+ .split(/\s+/)
38
+ .filter((t) => t.length >= 2 && !STOP_WORDS.has(t));
39
+ return new Set(tokens);
40
+ }
41
+ /** Compute Jaccard-like overlap score between two token sets */
42
+ function tokenOverlap(a, b) {
43
+ if (a.size === 0 || b.size === 0)
44
+ return 0;
45
+ let overlap = 0;
46
+ for (const token of a) {
47
+ if (b.has(token))
48
+ overlap++;
49
+ }
50
+ return overlap / Math.max(a.size, b.size);
51
+ }
52
+ // ─── Category Signals ─────────────────────────────────────────────────────────
53
+ const CATEGORY_SIGNALS = {
54
+ email: ["email", "mail", "smtp", "send", "transactional", "newsletter", "inbox", "bounce", "mailer"],
55
+ payments: ["payment", "pay", "checkout", "charge", "billing", "invoice", "subscription", "card", "upi", "merchant", "transaction"],
56
+ ai: ["ai", "ml", "llm", "gpt", "claude", "openai", "anthropic", "model", "completion", "embedding", "chatbot", "generate", "inference", "generative"],
57
+ storage: ["upload", "file", "image", "video", "media", "storage", "cdn", "asset", "photo", "bucket", "picture", "avatar"],
58
+ search: ["search", "query", "index", "algolia", "fulltext", "autocomplete", "facet", "filter", "typeahead"],
59
+ auth: ["auth", "authentication", "login", "signin", "signup", "oauth", "sso", "identity", "session", "token", "jwt", "credential"],
60
+ messaging: ["sms", "text", "message", "messaging", "phone", "call", "voice", "telephony", "twilio", "vonage"],
61
+ analytics: ["analytics", "tracking", "metrics", "event", "crash", "exception", "error", "monitoring", "observability", "posthog", "sentry"],
62
+ database: ["database", "db", "sql", "nosql", "postgres", "mysql", "query", "datastore", "backend", "persist"],
63
+ media: ["audio", "video", "stream", "transcribe", "transcode", "speech", "mux", "deepgram"],
64
+ notifications: ["notification", "push", "alert", "bell", "notify", "apns", "fcm"],
65
+ maps: ["map", "maps", "geo", "location", "address", "geocode", "navigation", "direction", "coordinate"],
66
+ communication: ["chat", "conversation", "channel", "room", "thread", "realtime", "websocket", "pusher", "ably"],
67
+ };
68
+ function scoreCategoryMatch(queryTokens, category) {
69
+ const signals = CATEGORY_SIGNALS[category] ?? [];
70
+ if (signals.length === 0)
71
+ return 0;
72
+ const signalSet = new Set(signals);
73
+ let matched = 0;
74
+ for (const token of queryTokens) {
75
+ if (signalSet.has(token))
76
+ matched++;
77
+ }
78
+ return Math.min(matched / 3, 1.0);
79
+ }
80
+ // ─── 1. Use Case Fit (multi-field) ────────────────────────────────────────────
81
+ const FIT_WEIGHTS = {
82
+ perfect: 1.0,
83
+ good: 0.7,
84
+ partial: 0.4,
85
+ };
86
+ /**
87
+ * Score against multiple fields with weighted relevance:
88
+ * - Use case tasks (40%) — with fit multiplier, best match wins
89
+ * - bestFor (25%) — concise purpose statement
90
+ * - Description (20%) — first 200 chars (signal-dense)
91
+ * - Category signals (15%) — broad category alignment
92
+ */
93
+ export function scoreUseCaseFit(entry, queryTokens) {
94
+ if (queryTokens.size === 0)
95
+ return 0;
96
+ // Use case task matching
97
+ let bestUseCaseScore = 0;
98
+ for (const useCase of entry.useCases) {
99
+ const useCaseTokens = tokenize(useCase.task);
100
+ const overlap = tokenOverlap(queryTokens, useCaseTokens);
101
+ const score = overlap * FIT_WEIGHTS[useCase.fit];
102
+ if (score > bestUseCaseScore)
103
+ bestUseCaseScore = score;
104
+ }
105
+ // bestFor
106
+ const bestForScore = tokenOverlap(queryTokens, tokenize(entry.bestFor));
107
+ // Description (first 200 chars)
108
+ const descScore = tokenOverlap(queryTokens, tokenize(entry.description.slice(0, 200)));
109
+ // Category match
110
+ const categoryScore = scoreCategoryMatch(queryTokens, entry.category);
111
+ return Math.min(bestUseCaseScore * 0.40 +
112
+ bestForScore * 0.25 +
113
+ descScore * 0.20 +
114
+ categoryScore * 0.15, 1.0);
115
+ }
116
+ // ─── 2. Quality Score ─────────────────────────────────────────────────────────
117
+ function scoreQuality(entry) {
118
+ return (entry.qualityScore - 1) / 9;
119
+ }
120
+ // ─── 3. Developer Experience (sub-scored) ─────────────────────────────────────
121
+ /**
122
+ * DX sub-scoring with measurable, explicit factors:
123
+ * - TypeScript as primary SDK: 3 pts
124
+ * - 2+ code examples: 2 pts
125
+ * - envVarName defined (clear auth setup): 1 pt
126
+ * - Single npm install command: 1 pt
127
+ * - Multiple language SDKs (3+): 1 pt
128
+ * - Required language match: up to 2 pts (only when requiredLanguage is specified)
129
+ *
130
+ * Normalized to 0-1 range. Base max = 8 pts; with language constraint = 10 pts.
131
+ */
132
+ export function scoreDevExperience(entry, constraints) {
133
+ let score = 0;
134
+ let maxScore = 8;
135
+ // TypeScript primary SDK (+3)
136
+ if (entry.sdk.primaryLanguage.toLowerCase() === "typescript") {
137
+ score += 3;
138
+ }
139
+ else if (entry.sdk.otherLanguages.some((l) => l.toLowerCase() === "typescript")) {
140
+ score += 1.5;
141
+ }
142
+ // 2+ code examples (+2)
143
+ if (entry.codeExamples.length >= 2) {
144
+ score += 2;
145
+ }
146
+ else if (entry.codeExamples.length === 1) {
147
+ score += 1;
148
+ }
149
+ // envVarName defined — clear auth setup (+1)
150
+ if (entry.auth.envVarName && entry.auth.envVarName.length > 0) {
151
+ score += 1;
152
+ }
153
+ // Single npm install command (+1)
154
+ if (entry.sdk.installCommand.includes("npm install")) {
155
+ score += 1;
156
+ }
157
+ // Multiple language SDKs (3+) (+1)
158
+ if (entry.sdk.otherLanguages.length >= 3) {
159
+ score += 1;
160
+ }
161
+ // Required language match (only when constraint specified; +2)
162
+ if (constraints?.requiredLanguage) {
163
+ maxScore += 2;
164
+ const required = constraints.requiredLanguage.toLowerCase();
165
+ if (entry.sdk.primaryLanguage.toLowerCase() === required) {
166
+ score += 2;
167
+ }
168
+ else if (entry.sdk.otherLanguages.some((l) => l.toLowerCase() === required)) {
169
+ score += 1;
170
+ }
171
+ }
172
+ return score / maxScore;
173
+ }
174
+ // ─── 4. Pricing Fit ───────────────────────────────────────────────────────────
175
+ function scorePricingFit(entry, constraints) {
176
+ if (constraints?.preferFree) {
177
+ // preferFree: generous free tier or open source scores highest
178
+ if (entry.pricing.model === "free" || entry.pricing.model === "open_source") {
179
+ return 1.0;
180
+ }
181
+ if (entry.pricing.model === "freemium" && entry.pricing.freeTier !== null) {
182
+ return 0.85;
183
+ }
184
+ if (entry.pricing.model === "usage_based") {
185
+ return 0.60; // Can be free at low usage
186
+ }
187
+ return 0.20; // Paid-only is a poor match
188
+ }
189
+ // No pricing constraint — slight boost for easy-to-start options
190
+ if (entry.pricing.model === "free" || entry.pricing.model === "open_source") {
191
+ return 0.70;
192
+ }
193
+ if (entry.pricing.model === "freemium") {
194
+ return 0.65;
195
+ }
196
+ if (entry.pricing.model === "usage_based") {
197
+ return 0.60;
198
+ }
199
+ return 0.50; // Paid — neutral
200
+ }
201
+ // ─── 5. Keyword Relevance ─────────────────────────────────────────────────────
202
+ /**
203
+ * Direct keyword relevance: checks if query tokens appear in name/slug/subcategory.
204
+ * This rewards exact API name matches ("algolia" for search queries, "stripe" for payments).
205
+ */
206
+ function scoreKeywordRelevance(entry, queryTokens) {
207
+ const nameTokens = tokenize(entry.name);
208
+ const slugTokens = tokenize(entry.slug.replace(/-/g, " "));
209
+ const subCatTokens = tokenize(entry.subcategory.replace(/-/g, " "));
210
+ // Boost when API name or slug appears in query (user named the tool directly)
211
+ const nameMatch = tokenOverlap(queryTokens, nameTokens);
212
+ const slugMatch = tokenOverlap(queryTokens, slugTokens);
213
+ const subCatMatch = tokenOverlap(queryTokens, subCatTokens);
214
+ // Name match is strongest signal (user asked for "stripe" or "algolia" by name)
215
+ return Math.min(nameMatch * 0.5 + slugMatch * 0.3 + subCatMatch * 0.2, 1.0);
216
+ }
217
+ // ─── 6. Recency Bonus ─────────────────────────────────────────────────────────
218
+ const NOW_MS = Date.now();
219
+ const MS_PER_DAY = 86_400_000;
220
+ /**
221
+ * Recency bonus: entries verified within 30 days score 1.0.
222
+ * Scales linearly to 0 at 365 days. Older entries score 0.
223
+ */
224
+ function scoreRecency(entry) {
225
+ try {
226
+ const verifiedMs = new Date(entry.lastVerified).getTime();
227
+ const daysOld = (NOW_MS - verifiedMs) / MS_PER_DAY;
228
+ if (daysOld <= 30)
229
+ return 1.0;
230
+ if (daysOld <= 365)
231
+ return 1.0 - (daysOld - 30) / 335;
232
+ return 0;
233
+ }
234
+ catch {
235
+ return 0;
236
+ }
237
+ }
238
+ // ─── Category Boost ───────────────────────────────────────────────────────────
239
+ function applyCategoryBoost(score, entry, detectedCategory, detectedConfidence) {
240
+ if (!detectedCategory || detectedConfidence < 0.4)
241
+ return score;
242
+ if (entry.category === detectedCategory) {
243
+ const boost = detectedConfidence * 0.12; // Up to 12% boost
244
+ return Math.min(score * (1 + boost), 1.0);
245
+ }
246
+ return score;
247
+ }
248
+ // ─── Confidence Score ─────────────────────────────────────────────────────────
249
+ /**
250
+ * Compute a confidence value (0-1) for how certain we are about the top result.
251
+ *
252
+ * High confidence (>0.8): strong category match + high useCaseFit score
253
+ * Medium confidence (0.5-0.8): partial signals
254
+ * Low confidence (<0.5): weak query, no clear category
255
+ */
256
+ export function computeConfidence(topResult, detectedCategory, detectedConfidence, allResults) {
257
+ const useCaseFit = topResult.scoreBreakdown["useCaseFit"] ?? 0;
258
+ const totalScore = topResult.score;
259
+ // Factor 1: useCaseFit raw score quality
260
+ const fitSignal = Math.min(useCaseFit / 0.5, 1.0); // 0.5+ useCaseFit → full signal
261
+ // Factor 2: category detection confidence
262
+ const catSignal = detectedCategory ? detectedConfidence : 0;
263
+ // Factor 3: score separation (top result vs runner-up)
264
+ const separation = allResults.length >= 2
265
+ ? Math.min((totalScore - allResults[1].score) * 5, 1.0)
266
+ : 0.5;
267
+ // Weighted combination
268
+ const raw = fitSignal * 0.40 + catSignal * 0.35 + separation * 0.25;
269
+ return Math.round(Math.min(raw, 1.0) * 100) / 100;
270
+ }
271
+ /** Human-readable confidence label */
272
+ export function confidenceLabel(confidence) {
273
+ if (confidence >= 0.7)
274
+ return "high";
275
+ if (confidence >= 0.4)
276
+ return "medium";
277
+ return "low";
278
+ }
279
+ // ─── Main Scoring Function ────────────────────────────────────────────────────
280
+ export function scoreEntry(entry, query, constraints, weights = SCORING_WEIGHTS, context) {
281
+ const effectiveQuery = context?.expandedQuery ?? query;
282
+ const queryTokens = tokenize(effectiveQuery);
283
+ const useCaseFitRaw = scoreUseCaseFit(entry, queryTokens);
284
+ const qualityRaw = scoreQuality(entry);
285
+ const devExperienceRaw = scoreDevExperience(entry, constraints);
286
+ const pricingFitRaw = scorePricingFit(entry, constraints);
287
+ const keywordRelevanceRaw = scoreKeywordRelevance(entry, queryTokens);
288
+ const recencyRaw = scoreRecency(entry);
289
+ const baseScore = useCaseFitRaw * weights.useCaseFit +
290
+ qualityRaw * weights.qualityScore +
291
+ devExperienceRaw * weights.developerExperience +
292
+ pricingFitRaw * weights.pricingFit +
293
+ keywordRelevanceRaw * weights.keywordRelevance +
294
+ recencyRaw * weights.recencyBonus;
295
+ const totalScore = applyCategoryBoost(baseScore, entry, context?.detectedCategory ?? null, context?.detectedConfidence ?? 0);
296
+ const scoreBreakdown = {
297
+ useCaseFit: Math.round(useCaseFitRaw * 100) / 100,
298
+ qualityScore: Math.round(qualityRaw * 100) / 100,
299
+ developerExperience: Math.round(devExperienceRaw * 100) / 100,
300
+ pricingFit: Math.round(pricingFitRaw * 100) / 100,
301
+ keywordRelevance: Math.round(keywordRelevanceRaw * 100) / 100,
302
+ recencyBonus: Math.round(recencyRaw * 100) / 100,
303
+ total: Math.round(totalScore * 100) / 100,
304
+ };
305
+ return { entry, score: totalScore, scoreBreakdown };
306
+ }
307
+ // ─── Ranking Function ─────────────────────────────────────────────────────────
308
+ export function rankEntries(entries, query, constraints, topN = 3, weights = SCORING_WEIGHTS, context) {
309
+ if (entries.length === 0)
310
+ return [];
311
+ const scored = entries.map((entry) => scoreEntry(entry, query, constraints, weights, context));
312
+ scored.sort((a, b) => b.score - a.score);
313
+ return scored.slice(0, topN);
314
+ }
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Search layer — combines SQLite text search with the scoring engine.
3
+ * Uses synonym expansion and category detection to improve result quality.
4
+ *
5
+ * Exports: SearchConstraints, RankedResult, findApis, getApiBySlug, getApisByCategory
6
+ */
7
+ import type { ApiEntry } from "./schema.js";
8
+ export interface SearchConstraints {
9
+ maxPrice?: string;
10
+ requiredLanguage?: string;
11
+ preferFree?: boolean;
12
+ }
13
+ export interface RankedResult {
14
+ entry: ApiEntry;
15
+ score: number;
16
+ scoreBreakdown: Record<string, number>;
17
+ /** Confidence in this recommendation (0-1); only set on the top result */
18
+ confidence?: number;
19
+ /** Human-readable confidence label: "high" | "medium" | "low" */
20
+ confidenceLabel?: "high" | "medium" | "low";
21
+ }
22
+ /**
23
+ * Find and rank APIs for a given task description.
24
+ *
25
+ * Pipeline:
26
+ * 1. Detect phrases (preferFree, open source, etc.) — merge with constraints
27
+ * 2. Expand query with synonyms
28
+ * 3. SQLite text search on the expanded query (broader candidate pool)
29
+ * 4. Also text-search on original query (catches exact matches)
30
+ * 5. Detect category for scoring boost
31
+ * 6. Rank all candidates with scorer using expanded query + context
32
+ */
33
+ export declare function findApis(query: string, constraints?: SearchConstraints, topN?: number): RankedResult[];
34
+ /** Get a single API entry by its slug */
35
+ export declare function getApiBySlug(slug: string): ApiEntry | null;
36
+ /** Get all APIs in a category, ranked by quality score */
37
+ export declare function getApisByCategory(category: string): ApiEntry[];
@@ -0,0 +1,111 @@
1
+ /**
2
+ * Search layer — combines SQLite text search with the scoring engine.
3
+ * Uses synonym expansion and category detection to improve result quality.
4
+ *
5
+ * Exports: SearchConstraints, RankedResult, findApis, getApiBySlug, getApisByCategory
6
+ */
7
+ import { getAllEntries, getBySlug, getByCategory } from "./db.js";
8
+ import { rankEntries, computeConfidence, confidenceLabel } from "./scorer.js";
9
+ import { expandQuery, detectCategory, detectPhrases } from "./synonyms.js";
10
+ import { getTfIdfEngine } from "./tfidf.js";
11
+ import { createLogger } from "../utils/logger.js";
12
+ const logger = createLogger("search");
13
+ /**
14
+ * Find and rank APIs for a given task description.
15
+ *
16
+ * Pipeline:
17
+ * 1. Detect phrases (preferFree, open source, etc.) — merge with constraints
18
+ * 2. Expand query with synonyms
19
+ * 3. SQLite text search on the expanded query (broader candidate pool)
20
+ * 4. Also text-search on original query (catches exact matches)
21
+ * 5. Detect category for scoring boost
22
+ * 6. Rank all candidates with scorer using expanded query + context
23
+ */
24
+ export function findApis(query, constraints, topN = 3) {
25
+ logger.debug("findApis called", { query, constraints, topN });
26
+ if (!query || query.trim().length === 0) {
27
+ logger.warn("Empty query provided to findApis");
28
+ return [];
29
+ }
30
+ const trimmedQuery = query.trim();
31
+ // Step 1: Phrase detection — auto-apply preferFree from query language
32
+ const phrases = detectPhrases(trimmedQuery);
33
+ const effectiveConstraints = {
34
+ ...constraints,
35
+ preferFree: constraints?.preferFree ?? phrases.preferFree,
36
+ };
37
+ // Step 2: Synonym expansion
38
+ const expandedQuery = expandQuery(trimmedQuery);
39
+ logger.debug("Query expanded", {
40
+ original: trimmedQuery,
41
+ expanded: expandedQuery.length > trimmedQuery.length
42
+ ? `${expandedQuery.slice(0, 80)}...`
43
+ : "unchanged",
44
+ });
45
+ // Step 3: Category detection for scoring context
46
+ const { category: detectedCategory, confidence: detectedConfidence } = detectCategory(trimmedQuery);
47
+ logger.debug("Category detected", { detectedCategory, detectedConfidence });
48
+ const scoringContext = {
49
+ expandedQuery,
50
+ detectedCategory,
51
+ detectedConfidence,
52
+ };
53
+ // Step 4: Build candidate pool via TF-IDF index
54
+ // Lazily build the index from all entries on first call
55
+ const all = getAllEntries();
56
+ const engine = getTfIdfEngine();
57
+ if (!engine.isBuilt) {
58
+ engine.build(all);
59
+ logger.debug("TF-IDF index built", { entryCount: engine.size });
60
+ }
61
+ // TF-IDF search on expanded query (broader recall than LIKE)
62
+ const tfIdfCandidates = engine.search(expandedQuery, topN * 8);
63
+ let candidates = tfIdfCandidates;
64
+ const seenSlugs = new Set(candidates.map((e) => e.slug));
65
+ // Also include all entries in the detected category (high-signal candidates)
66
+ if (detectedCategory && detectedConfidence >= 0.4) {
67
+ const categoryEntries = getByCategory(detectedCategory);
68
+ for (const entry of categoryEntries) {
69
+ if (!seenSlugs.has(entry.slug)) {
70
+ candidates.push(entry);
71
+ seenSlugs.add(entry.slug);
72
+ }
73
+ }
74
+ }
75
+ // Safety net: if still fewer candidates than requested, add remaining entries
76
+ if (candidates.length < topN) {
77
+ logger.debug("Broadening to all entries — too few TF-IDF candidates", {
78
+ candidateCount: candidates.length,
79
+ });
80
+ for (const entry of all) {
81
+ if (!seenSlugs.has(entry.slug)) {
82
+ candidates.push(entry);
83
+ seenSlugs.add(entry.slug);
84
+ }
85
+ }
86
+ }
87
+ // Step 5: Rank with scorer using expanded query + context
88
+ const results = rankEntries(candidates, trimmedQuery, effectiveConstraints, topN, undefined, scoringContext);
89
+ // Step 6: Attach confidence to top result
90
+ if (results.length > 0 && results[0]) {
91
+ const conf = computeConfidence(results[0], detectedCategory, detectedConfidence, results);
92
+ results[0].confidence = conf;
93
+ results[0].confidenceLabel = confidenceLabel(conf);
94
+ }
95
+ logger.debug("findApis results", {
96
+ candidateCount: candidates.length,
97
+ resultCount: results.length,
98
+ topSlug: results[0]?.entry.slug,
99
+ confidence: results[0]?.confidence,
100
+ detectedCategory,
101
+ });
102
+ return results;
103
+ }
104
+ /** Get a single API entry by its slug */
105
+ export function getApiBySlug(slug) {
106
+ return getBySlug(slug);
107
+ }
108
+ /** Get all APIs in a category, ranked by quality score */
109
+ export function getApisByCategory(category) {
110
+ return getByCategory(category);
111
+ }
@@ -0,0 +1,36 @@
1
+ /**
2
+ * Synonym expansion, phrase detection, and category detection for query processing.
3
+ *
4
+ * Exports:
5
+ * SYNONYM_GROUPS — raw synonym groups for testing
6
+ * expandQuery — expands a query with synonyms
7
+ * detectPhrases — extracts modifier intent from a query
8
+ * detectCategory — maps a query to a known category with confidence
9
+ */
10
+ export declare const SYNONYM_GROUPS: readonly string[][];
11
+ /**
12
+ * Expand a query by adding synonyms for any terms it contains.
13
+ * Example: "log in with Google" → "log in with Google auth authentication
14
+ * login signin signup register sso oauth identity..."
15
+ */
16
+ export declare function expandQuery(query: string): string;
17
+ export interface PhraseModifiers {
18
+ /** User wants free or cheap options */
19
+ preferFree: boolean;
20
+ /** User wants open-source or self-hostable options */
21
+ preferOpenSource: boolean;
22
+ /** User emphasizes reliability / production-grade */
23
+ preferReliable: boolean;
24
+ /** Serverless / edge / Vercel context */
25
+ preferServerless: boolean;
26
+ /** Reference to another API ("like Firebase but...") */
27
+ referenceSlug: string | null;
28
+ }
29
+ /** Extract modifier intent from a query */
30
+ export declare function detectPhrases(query: string): PhraseModifiers;
31
+ export interface CategoryMatch {
32
+ category: string | null;
33
+ confidence: number;
34
+ }
35
+ /** Detect the most likely category for a query, with a confidence score */
36
+ export declare function detectCategory(query: string): CategoryMatch;