@mixpeek/react-searchkit 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +224 -0
- package/dist/AIAnswer.d.ts +9 -0
- package/dist/IntentCTA.d.ts +8 -0
- package/dist/PoweredBy.d.ts +9 -0
- package/dist/ResultCard.d.ts +11 -0
- package/dist/SearchButton.d.ts +9 -0
- package/dist/SearchInput.d.ts +11 -0
- package/dist/SearchKit.d.ts +35 -0
- package/dist/SearchModal.d.ts +28 -0
- package/dist/SearchResults.d.ts +14 -0
- package/dist/ShareLink.d.ts +7 -0
- package/dist/ZeroResults.d.ts +7 -0
- package/dist/api/client.d.ts +44 -0
- package/dist/filters/FacetFilter.d.ts +10 -0
- package/dist/filters/FilterPanel.d.ts +12 -0
- package/dist/filters/RangeFilter.d.ts +11 -0
- package/dist/filters/SmartFilter.d.ts +10 -0
- package/dist/filters/index.d.ts +4 -0
- package/dist/hooks/useFilters.d.ts +10 -0
- package/dist/hooks/useKeyboardShortcut.d.ts +12 -0
- package/dist/hooks/useRecentSearches.d.ts +7 -0
- package/dist/hooks/useSearch.d.ts +22 -0
- package/dist/index.d.ts +39 -0
- package/dist/searchkit.cjs.js +3 -0
- package/dist/searchkit.esm.js +1771 -0
- package/dist/searchkit.umd.js +3 -0
- package/dist/style.css +1 -0
- package/dist/types.d.ts +250 -0
- package/package.json +71 -0
- package/scripts/bootstrap.mjs +149 -0
- package/scripts/index.mjs +364 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* mixpeek-index — Scrape a website and index it into a Mixpeek collection,
|
|
4
|
+
* then scaffold a retriever ready for use with <SearchKit />.
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* mixpeek-index --api-key <KEY> --url <URL> [options]
|
|
8
|
+
*
|
|
9
|
+
* Options:
|
|
10
|
+
* --api-key Mixpeek API key (required)
|
|
11
|
+
* --url Website URL to scrape and index (required)
|
|
12
|
+
* --base-url Mixpeek API base URL (default: https://api.mixpeek.com)
|
|
13
|
+
* --namespace Namespace ID to use (created if --namespace-name is set)
|
|
14
|
+
* --namespace-name Name for a new namespace (creates one if --namespace not given)
|
|
15
|
+
* --slug Retriever slug for SearchKit (default: searchkit-<timestamp>)
|
|
16
|
+
* --wait Wait for indexing to complete before exiting (default: true)
|
|
17
|
+
* --poll-interval Seconds between batch status polls (default: 10)
|
|
18
|
+
* --timeout Max seconds to wait for indexing (default: 300)
|
|
19
|
+
* --help Show this help
|
|
20
|
+
*
|
|
21
|
+
* Example:
|
|
22
|
+
* mixpeek-index --api-key mxp_sk_abc123 --url https://docs.example.com
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import { parseArgs } from "node:util";
|
|
26
|
+
|
|
27
|
+
const USAGE = `
|
|
28
|
+
mixpeek-index — Scrape a website and index it into Mixpeek, then scaffold a retriever.
|
|
29
|
+
|
|
30
|
+
Usage:
|
|
31
|
+
mixpeek-index --api-key <KEY> --url <URL> [options]
|
|
32
|
+
|
|
33
|
+
Options:
|
|
34
|
+
--api-key Mixpeek API key (required)
|
|
35
|
+
--url Website URL to scrape and index (required)
|
|
36
|
+
--base-url API base URL (default: https://api.mixpeek.com)
|
|
37
|
+
--namespace Existing namespace ID to use
|
|
38
|
+
--namespace-name Name for a new namespace (auto-created if --namespace not set)
|
|
39
|
+
--slug Retriever name/slug for SearchKit (default: searchkit-<timestamp>)
|
|
40
|
+
--wait Wait for indexing to complete (default: true)
|
|
41
|
+
--poll-interval Poll interval in seconds (default: 10)
|
|
42
|
+
--timeout Max wait time in seconds (default: 300)
|
|
43
|
+
--help Show this help
|
|
44
|
+
|
|
45
|
+
Example:
|
|
46
|
+
mixpeek-index --api-key mxp_sk_abc123 --url https://docs.example.com --slug my-docs-search
|
|
47
|
+
`;
|
|
48
|
+
|
|
49
|
+
let args;
|
|
50
|
+
try {
|
|
51
|
+
args = parseArgs({
|
|
52
|
+
options: {
|
|
53
|
+
"api-key": { type: "string" },
|
|
54
|
+
url: { type: "string" },
|
|
55
|
+
"base-url": { type: "string", default: "https://api.mixpeek.com" },
|
|
56
|
+
namespace: { type: "string" },
|
|
57
|
+
"namespace-name": { type: "string" },
|
|
58
|
+
slug: { type: "string" },
|
|
59
|
+
wait: { type: "boolean", default: true },
|
|
60
|
+
"poll-interval": { type: "string", default: "10" },
|
|
61
|
+
timeout: { type: "string", default: "300" },
|
|
62
|
+
help: { type: "boolean", default: false },
|
|
63
|
+
},
|
|
64
|
+
strict: true,
|
|
65
|
+
});
|
|
66
|
+
} catch (e) {
|
|
67
|
+
console.error(`Error: ${e.message}\n`);
|
|
68
|
+
console.error(USAGE);
|
|
69
|
+
process.exit(1);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (args.values.help) {
|
|
73
|
+
console.log(USAGE);
|
|
74
|
+
process.exit(0);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const apiKey = args.values["api-key"];
|
|
78
|
+
const targetUrl = args.values["url"];
|
|
79
|
+
|
|
80
|
+
if (!apiKey) {
|
|
81
|
+
console.error("Error: --api-key is required.\n");
|
|
82
|
+
process.exit(1);
|
|
83
|
+
}
|
|
84
|
+
if (!targetUrl) {
|
|
85
|
+
console.error("Error: --url is required.\n");
|
|
86
|
+
process.exit(1);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const BASE_URL = (args.values["base-url"] || "https://api.mixpeek.com").replace(/\/+$/, "");
|
|
90
|
+
const slug = args.values["slug"] || `searchkit-${Date.now()}`;
|
|
91
|
+
const shouldWait = args.values["wait"];
|
|
92
|
+
const pollInterval = parseInt(args.values["poll-interval"] || "10", 10) * 1000;
|
|
93
|
+
const timeout = parseInt(args.values["timeout"] || "300", 10) * 1000;
|
|
94
|
+
|
|
95
|
+
// ---------------------------------------------------------------------------
|
|
96
|
+
// Helpers
|
|
97
|
+
// ---------------------------------------------------------------------------
|
|
98
|
+
|
|
99
|
+
function headers(nsId) {
|
|
100
|
+
const h = {
|
|
101
|
+
"Content-Type": "application/json",
|
|
102
|
+
Authorization: `Bearer ${apiKey}`,
|
|
103
|
+
};
|
|
104
|
+
if (nsId) h["X-Namespace"] = nsId;
|
|
105
|
+
return h;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async function api(method, path, body, nsId) {
|
|
109
|
+
const res = await fetch(`${BASE_URL}${path}`, {
|
|
110
|
+
method,
|
|
111
|
+
headers: headers(nsId),
|
|
112
|
+
body: body ? JSON.stringify(body) : undefined,
|
|
113
|
+
});
|
|
114
|
+
const data = await res.json();
|
|
115
|
+
if (!res.ok) {
|
|
116
|
+
const msg = data?.error?.message || data?.detail || JSON.stringify(data);
|
|
117
|
+
throw new Error(`${method} ${path} → ${res.status}: ${msg}`);
|
|
118
|
+
}
|
|
119
|
+
return data;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function sleep(ms) {
|
|
123
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// ---------------------------------------------------------------------------
|
|
127
|
+
// Main flow
|
|
128
|
+
// ---------------------------------------------------------------------------
|
|
129
|
+
|
|
130
|
+
async function run() {
|
|
131
|
+
console.log(`\n🌐 mixpeek-index — Indexing ${targetUrl}\n`);
|
|
132
|
+
|
|
133
|
+
// ── Step 1: Resolve namespace ──────────────────────────────────────────────
|
|
134
|
+
let namespaceId = args.values["namespace"];
|
|
135
|
+
|
|
136
|
+
if (!namespaceId) {
|
|
137
|
+
const nsName = args.values["namespace-name"] || `searchkit-ns-${Date.now()}`;
|
|
138
|
+
console.log(`📦 Creating namespace "${nsName}"...`);
|
|
139
|
+
const nsData = await api("POST", "/v1/namespaces", {
|
|
140
|
+
namespace_name: nsName,
|
|
141
|
+
feature_extractors: [
|
|
142
|
+
{ feature_extractor_name: "web_scraper", version: "v1" },
|
|
143
|
+
{ feature_extractor_name: "text_extractor", version: "v1" },
|
|
144
|
+
],
|
|
145
|
+
});
|
|
146
|
+
namespaceId = nsData.namespace_id;
|
|
147
|
+
console.log(` ✓ Namespace: ${namespaceId}`);
|
|
148
|
+
} else {
|
|
149
|
+
console.log(` ✓ Using existing namespace: ${namespaceId}`);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// ── Step 2: Create bucket ──────────────────────────────────────────────────
|
|
153
|
+
const bucketName = `${slug}-bucket`;
|
|
154
|
+
console.log(`\n🗄️ Creating bucket "${bucketName}"...`);
|
|
155
|
+
const bucketData = await api(
|
|
156
|
+
"POST",
|
|
157
|
+
"/v1/buckets",
|
|
158
|
+
{
|
|
159
|
+
bucket_name: bucketName,
|
|
160
|
+
description: `Web content bucket for ${targetUrl}`,
|
|
161
|
+
bucket_schema: {
|
|
162
|
+
properties: {
|
|
163
|
+
url: { type: "string" },
|
|
164
|
+
title: { type: "string" },
|
|
165
|
+
},
|
|
166
|
+
},
|
|
167
|
+
},
|
|
168
|
+
namespaceId
|
|
169
|
+
);
|
|
170
|
+
const bucketId = bucketData.bucket_id;
|
|
171
|
+
console.log(` ✓ Bucket: ${bucketId}`);
|
|
172
|
+
|
|
173
|
+
// ── Step 3: Add URL as bucket object ──────────────────────────────────────
|
|
174
|
+
console.log(`\n📎 Adding ${targetUrl} as bucket object...`);
|
|
175
|
+
const objData = await api(
|
|
176
|
+
"POST",
|
|
177
|
+
`/v1/buckets/${bucketId}/objects`,
|
|
178
|
+
{
|
|
179
|
+
url: targetUrl,
|
|
180
|
+
metadata: { title: new URL(targetUrl).hostname, source: "web-scrape" },
|
|
181
|
+
},
|
|
182
|
+
namespaceId
|
|
183
|
+
);
|
|
184
|
+
const objectId = objData.object_id;
|
|
185
|
+
console.log(` ✓ Object: ${objectId}`);
|
|
186
|
+
|
|
187
|
+
// ── Step 4: Create collection with web_scraper ─────────────────────────────
|
|
188
|
+
const collectionName = `${slug}-collection`;
|
|
189
|
+
console.log(`\n🗂️ Creating collection "${collectionName}"...`);
|
|
190
|
+
const colData = await api(
|
|
191
|
+
"POST",
|
|
192
|
+
"/v1/collections",
|
|
193
|
+
{
|
|
194
|
+
collection_name: collectionName,
|
|
195
|
+
description: `Web-scraped content from ${targetUrl}`,
|
|
196
|
+
source: { type: "bucket", bucket_ids: [bucketId] },
|
|
197
|
+
feature_extractor: {
|
|
198
|
+
feature_extractor_name: "web_scraper",
|
|
199
|
+
version: "v1",
|
|
200
|
+
input_mappings: { url: "url" },
|
|
201
|
+
},
|
|
202
|
+
},
|
|
203
|
+
namespaceId
|
|
204
|
+
);
|
|
205
|
+
const collectionId = colData.collection_id;
|
|
206
|
+
console.log(` ✓ Collection: ${collectionId}`);
|
|
207
|
+
|
|
208
|
+
// ── Step 5: Trigger processing ────────────────────────────────────────────
|
|
209
|
+
console.log(`\n⚙️ Triggering collection processing...`);
|
|
210
|
+
const triggerData = await api(
|
|
211
|
+
"POST",
|
|
212
|
+
`/v1/collections/${collectionId}/trigger`,
|
|
213
|
+
{},
|
|
214
|
+
namespaceId
|
|
215
|
+
);
|
|
216
|
+
const batchId = triggerData.batch_id;
|
|
217
|
+
console.log(` ✓ Batch: ${batchId} (${triggerData.object_count} objects)`);
|
|
218
|
+
|
|
219
|
+
// ── Step 6: Wait for completion ───────────────────────────────────────────
|
|
220
|
+
if (shouldWait) {
|
|
221
|
+
console.log(`\n⏳ Waiting for indexing to complete (timeout: ${timeout / 1000}s)...`);
|
|
222
|
+
const deadline = Date.now() + timeout;
|
|
223
|
+
let dotCount = 0;
|
|
224
|
+
|
|
225
|
+
while (Date.now() < deadline) {
|
|
226
|
+
await sleep(pollInterval);
|
|
227
|
+
const batchInfo = await api(
|
|
228
|
+
"GET",
|
|
229
|
+
`/v1/buckets/${bucketId}/batches/${batchId}`,
|
|
230
|
+
null,
|
|
231
|
+
namespaceId
|
|
232
|
+
);
|
|
233
|
+
const status = batchInfo.status || "UNKNOWN";
|
|
234
|
+
process.stdout.write(`\r ${".".repeat((dotCount++ % 3) + 1).padEnd(3)} ${status} `);
|
|
235
|
+
|
|
236
|
+
if (status === "COMPLETED") {
|
|
237
|
+
const docCount = batchInfo.document_count || "?";
|
|
238
|
+
console.log(`\n ✓ Indexing complete! Documents: ${docCount}`);
|
|
239
|
+
break;
|
|
240
|
+
}
|
|
241
|
+
if (status === "FAILED" || status === "ERROR") {
|
|
242
|
+
console.error(`\n ✗ Batch failed with status: ${status}`);
|
|
243
|
+
process.exit(1);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// ── Step 7: Get feature URI ───────────────────────────────────────────────
|
|
249
|
+
console.log(`\n🔍 Discovering collection features...`);
|
|
250
|
+
let featureUri = `mixpeek://web_scraper@v1/intfloat__multilingual_e5_large_instruct`;
|
|
251
|
+
try {
|
|
252
|
+
const featData = await api(
|
|
253
|
+
"GET",
|
|
254
|
+
`/v1/collections/${collectionId}/features`,
|
|
255
|
+
null,
|
|
256
|
+
namespaceId
|
|
257
|
+
);
|
|
258
|
+
const features = featData.features || [];
|
|
259
|
+
const textFeature = features.find(
|
|
260
|
+
(f) => f.vector_index?.purpose === "text" || f.feature_address?.includes("multilingual")
|
|
261
|
+
);
|
|
262
|
+
if (textFeature?.feature_address) {
|
|
263
|
+
featureUri = textFeature.feature_address;
|
|
264
|
+
}
|
|
265
|
+
} catch {
|
|
266
|
+
console.log(` ℹ️ Using default feature URI`);
|
|
267
|
+
}
|
|
268
|
+
console.log(` ✓ Feature URI: ${featureUri}`);
|
|
269
|
+
|
|
270
|
+
// ── Step 8: Create retriever ──────────────────────────────────────────────
|
|
271
|
+
console.log(`\n🔎 Creating retriever "${slug}"...`);
|
|
272
|
+
const retData = await api(
|
|
273
|
+
"POST",
|
|
274
|
+
"/v1/retrievers",
|
|
275
|
+
{
|
|
276
|
+
retriever_name: slug,
|
|
277
|
+
description: `SearchKit retriever for ${targetUrl}`,
|
|
278
|
+
collection_identifiers: [collectionId],
|
|
279
|
+
input_schema: {
|
|
280
|
+
query: { type: "text", required: true, description: "Search query" },
|
|
281
|
+
doc_type: { type: "text", required: false, description: "Filter by page type (page/code/image)" },
|
|
282
|
+
keyword: { type: "text", required: false, description: "Keyword filter on content" },
|
|
283
|
+
},
|
|
284
|
+
stages: [
|
|
285
|
+
{
|
|
286
|
+
stage_name: "semantic_search",
|
|
287
|
+
stage_type: "filter",
|
|
288
|
+
config: {
|
|
289
|
+
stage_id: "feature_search",
|
|
290
|
+
parameters: {
|
|
291
|
+
searches: [
|
|
292
|
+
{
|
|
293
|
+
feature_uri: featureUri,
|
|
294
|
+
query: { input_mode: "text", value: "{{INPUT.query}}" },
|
|
295
|
+
top_k: 20,
|
|
296
|
+
},
|
|
297
|
+
],
|
|
298
|
+
final_top_k: 20,
|
|
299
|
+
},
|
|
300
|
+
},
|
|
301
|
+
},
|
|
302
|
+
{
|
|
303
|
+
stage_name: "sort_by_relevance",
|
|
304
|
+
stage_type: "sort",
|
|
305
|
+
config: {
|
|
306
|
+
stage_id: "sort_relevance",
|
|
307
|
+
parameters: { score_field: "score", direction: "desc" },
|
|
308
|
+
},
|
|
309
|
+
},
|
|
310
|
+
],
|
|
311
|
+
},
|
|
312
|
+
namespaceId
|
|
313
|
+
);
|
|
314
|
+
const retrieverId = retData.retriever?.retriever_id || retData.retriever_id;
|
|
315
|
+
console.log(` ✓ Retriever: ${retrieverId}`);
|
|
316
|
+
|
|
317
|
+
// ── Done ──────────────────────────────────────────────────────────────────
|
|
318
|
+
console.log(`
|
|
319
|
+
╔════════════════════════════════════════════════════════╗
|
|
320
|
+
║ ✅ Indexing complete! ║
|
|
321
|
+
╚════════════════════════════════════════════════════════╝
|
|
322
|
+
|
|
323
|
+
Resources created:
|
|
324
|
+
namespace_id: ${namespaceId}
|
|
325
|
+
bucket_id: ${bucketId}
|
|
326
|
+
collection_id: ${collectionId}
|
|
327
|
+
retriever_id: ${retrieverId}
|
|
328
|
+
|
|
329
|
+
Use with SearchKit (React component):
|
|
330
|
+
|
|
331
|
+
import { SearchKit } from "@mixpeek/react-searchkit";
|
|
332
|
+
|
|
333
|
+
<SearchKit
|
|
334
|
+
projectKey="${retrieverId}"
|
|
335
|
+
bearerToken="<YOUR_API_KEY>"
|
|
336
|
+
apiBaseUrl="${BASE_URL}"
|
|
337
|
+
placeholder="Search ${new URL(targetUrl).hostname}..."
|
|
338
|
+
/>
|
|
339
|
+
|
|
340
|
+
Or test via API:
|
|
341
|
+
|
|
342
|
+
curl -X POST ${BASE_URL}/v1/retrievers/${retrieverId}/execute \\
|
|
343
|
+
-H "Authorization: Bearer ${apiKey}" \\
|
|
344
|
+
-H "X-Namespace: ${namespaceId}" \\
|
|
345
|
+
-H "Content-Type: application/json" \\
|
|
346
|
+
-d '{"inputs": {"query": "your search query"}, "settings": {"limit": 5}}'
|
|
347
|
+
`);
|
|
348
|
+
|
|
349
|
+
// Output machine-readable JSON for piping/scripting
|
|
350
|
+
const output = {
|
|
351
|
+
namespace_id: namespaceId,
|
|
352
|
+
bucket_id: bucketId,
|
|
353
|
+
collection_id: collectionId,
|
|
354
|
+
retriever_id: retrieverId,
|
|
355
|
+
api_key: apiKey,
|
|
356
|
+
base_url: BASE_URL,
|
|
357
|
+
};
|
|
358
|
+
process.stdout.write("\n__RESULT__=" + JSON.stringify(output) + "\n");
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
run().catch((err) => {
|
|
362
|
+
console.error(`\n❌ Error: ${err.message}`);
|
|
363
|
+
process.exit(1);
|
|
364
|
+
});
|