@sholajegede/bright-data-sync 0.1.1-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +182 -0
- package/dist/client/_generated/_ignore.d.ts +1 -0
- package/dist/client/_generated/_ignore.d.ts.map +1 -0
- package/dist/client/_generated/_ignore.js +3 -0
- package/dist/client/_generated/_ignore.js.map +1 -0
- package/dist/client/index.d.ts +77 -0
- package/dist/client/index.d.ts.map +1 -0
- package/dist/client/index.js +61 -0
- package/dist/client/index.js.map +1 -0
- package/dist/component/_generated/api.d.ts +34 -0
- package/dist/component/_generated/api.d.ts.map +1 -0
- package/dist/component/_generated/api.js +31 -0
- package/dist/component/_generated/api.js.map +1 -0
- package/dist/component/_generated/component.d.ts +67 -0
- package/dist/component/_generated/component.d.ts.map +1 -0
- package/dist/component/_generated/component.js +11 -0
- package/dist/component/_generated/component.js.map +1 -0
- package/dist/component/_generated/dataModel.d.ts +46 -0
- package/dist/component/_generated/dataModel.d.ts.map +1 -0
- package/dist/component/_generated/dataModel.js +11 -0
- package/dist/component/_generated/dataModel.js.map +1 -0
- package/dist/component/_generated/server.d.ts +121 -0
- package/dist/component/_generated/server.d.ts.map +1 -0
- package/dist/component/_generated/server.js +78 -0
- package/dist/component/_generated/server.js.map +1 -0
- package/dist/component/convex.config.d.ts +3 -0
- package/dist/component/convex.config.d.ts.map +1 -0
- package/dist/component/convex.config.js +3 -0
- package/dist/component/convex.config.js.map +1 -0
- package/dist/component/lib.d.ts +83 -0
- package/dist/component/lib.d.ts.map +1 -0
- package/dist/component/lib.js +314 -0
- package/dist/component/lib.js.map +1 -0
- package/dist/component/schema.d.ts +47 -0
- package/dist/component/schema.d.ts.map +1 -0
- package/dist/component/schema.js +28 -0
- package/dist/component/schema.js.map +1 -0
- package/dist/react/index.d.ts +2 -0
- package/dist/react/index.d.ts.map +1 -0
- package/dist/react/index.js +6 -0
- package/dist/react/index.js.map +1 -0
- package/package.json +102 -0
- package/src/client/_generated/_ignore.ts +1 -0
- package/src/client/index.test.ts +13 -0
- package/src/client/index.ts +100 -0
- package/src/client/setup.test.ts +26 -0
- package/src/component/_generated/api.ts +50 -0
- package/src/component/_generated/component.ts +85 -0
- package/src/component/_generated/dataModel.ts +60 -0
- package/src/component/_generated/server.ts +156 -0
- package/src/component/convex.config.ts +3 -0
- package/src/component/lib.test.ts +21 -0
- package/src/component/lib.ts +375 -0
- package/src/component/schema.ts +30 -0
- package/src/component/setup.test.ts +11 -0
- package/src/react/index.ts +7 -0
- package/src/test.ts +18 -0
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
import { v } from "convex/values";
|
|
2
|
+
import {
|
|
3
|
+
action,
|
|
4
|
+
internalMutation,
|
|
5
|
+
internalQuery,
|
|
6
|
+
query,
|
|
7
|
+
} from "./_generated/server.js";
|
|
8
|
+
import { internal } from "./_generated/api.js";
|
|
9
|
+
|
|
10
|
+
// ─── Constants ───────────────────────────────────────────────────────────────
|
|
11
|
+
|
|
12
|
+
const DEFAULT_SEARCH_TTL_MS = 1000 * 60 * 60; // 1 hour
|
|
13
|
+
const DEFAULT_PAGE_TTL_MS = 1000 * 60 * 60 * 6; // 6 hours
|
|
14
|
+
|
|
15
|
+
// ─── Internal helpers ────────────────────────────────────────────────────────
|
|
16
|
+
|
|
17
|
+
export const getSearchByQuery = internalQuery({
|
|
18
|
+
args: { query: v.string() },
|
|
19
|
+
returns: v.union(
|
|
20
|
+
v.null(),
|
|
21
|
+
v.object({
|
|
22
|
+
_id: v.id("searches"),
|
|
23
|
+
_creationTime: v.number(),
|
|
24
|
+
query: v.string(),
|
|
25
|
+
vertical: v.optional(v.string()),
|
|
26
|
+
recency: v.optional(v.string()),
|
|
27
|
+
results: v.string(),
|
|
28
|
+
fetchedAt: v.number(),
|
|
29
|
+
expiresAt: v.number(),
|
|
30
|
+
})
|
|
31
|
+
),
|
|
32
|
+
handler: async (ctx, args) => {
|
|
33
|
+
return await ctx.db
|
|
34
|
+
.query("searches")
|
|
35
|
+
.withIndex("by_query", (q) => q.eq("query", args.query))
|
|
36
|
+
.first();
|
|
37
|
+
},
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
export const getPageByUrl = internalQuery({
|
|
41
|
+
args: { url: v.string() },
|
|
42
|
+
returns: v.union(
|
|
43
|
+
v.null(),
|
|
44
|
+
v.object({
|
|
45
|
+
_id: v.id("pages"),
|
|
46
|
+
_creationTime: v.number(),
|
|
47
|
+
url: v.string(),
|
|
48
|
+
content: v.string(),
|
|
49
|
+
fetchedAt: v.number(),
|
|
50
|
+
expiresAt: v.number(),
|
|
51
|
+
})
|
|
52
|
+
),
|
|
53
|
+
handler: async (ctx, args) => {
|
|
54
|
+
return await ctx.db
|
|
55
|
+
.query("pages")
|
|
56
|
+
.withIndex("by_url", (q) => q.eq("url", args.url))
|
|
57
|
+
.first();
|
|
58
|
+
},
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
export const upsertSearch = internalMutation({
|
|
62
|
+
args: {
|
|
63
|
+
query: v.string(),
|
|
64
|
+
vertical: v.optional(v.string()),
|
|
65
|
+
recency: v.optional(v.string()),
|
|
66
|
+
results: v.string(),
|
|
67
|
+
ttlMs: v.optional(v.number()),
|
|
68
|
+
},
|
|
69
|
+
returns: v.id("searches"),
|
|
70
|
+
handler: async (ctx, args) => {
|
|
71
|
+
const now = Date.now();
|
|
72
|
+
const expiresAt = now + (args.ttlMs ?? DEFAULT_SEARCH_TTL_MS);
|
|
73
|
+
const existing = await ctx.db
|
|
74
|
+
.query("searches")
|
|
75
|
+
.withIndex("by_query", (q) => q.eq("query", args.query))
|
|
76
|
+
.first();
|
|
77
|
+
if (existing) {
|
|
78
|
+
await ctx.db.patch("searches", existing._id, {
|
|
79
|
+
results: args.results,
|
|
80
|
+
fetchedAt: now,
|
|
81
|
+
expiresAt,
|
|
82
|
+
vertical: args.vertical,
|
|
83
|
+
recency: args.recency,
|
|
84
|
+
});
|
|
85
|
+
return existing._id;
|
|
86
|
+
}
|
|
87
|
+
return await ctx.db.insert("searches", {
|
|
88
|
+
query: args.query,
|
|
89
|
+
vertical: args.vertical,
|
|
90
|
+
recency: args.recency,
|
|
91
|
+
results: args.results,
|
|
92
|
+
fetchedAt: now,
|
|
93
|
+
expiresAt,
|
|
94
|
+
});
|
|
95
|
+
},
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
export const upsertPage = internalMutation({
|
|
99
|
+
args: {
|
|
100
|
+
url: v.string(),
|
|
101
|
+
content: v.string(),
|
|
102
|
+
ttlMs: v.optional(v.number()),
|
|
103
|
+
},
|
|
104
|
+
returns: v.id("pages"),
|
|
105
|
+
handler: async (ctx, args) => {
|
|
106
|
+
const now = Date.now();
|
|
107
|
+
const expiresAt = now + (args.ttlMs ?? DEFAULT_PAGE_TTL_MS);
|
|
108
|
+
const existing = await ctx.db
|
|
109
|
+
.query("pages")
|
|
110
|
+
.withIndex("by_url", (q) => q.eq("url", args.url))
|
|
111
|
+
.first();
|
|
112
|
+
if (existing) {
|
|
113
|
+
await ctx.db.patch("pages", existing._id, {
|
|
114
|
+
content: args.content,
|
|
115
|
+
fetchedAt: now,
|
|
116
|
+
expiresAt,
|
|
117
|
+
});
|
|
118
|
+
return existing._id;
|
|
119
|
+
}
|
|
120
|
+
return await ctx.db.insert("pages", {
|
|
121
|
+
url: args.url,
|
|
122
|
+
content: args.content,
|
|
123
|
+
fetchedAt: now,
|
|
124
|
+
expiresAt,
|
|
125
|
+
});
|
|
126
|
+
},
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
export const deleteSearchByQuery = internalMutation({
|
|
130
|
+
args: { query: v.string() },
|
|
131
|
+
returns: v.null(),
|
|
132
|
+
handler: async (ctx, args) => {
|
|
133
|
+
const existing = await ctx.db
|
|
134
|
+
.query("searches")
|
|
135
|
+
.withIndex("by_query", (q) => q.eq("query", args.query))
|
|
136
|
+
.first();
|
|
137
|
+
if (existing) await ctx.db.delete("searches", existing._id);
|
|
138
|
+
return null;
|
|
139
|
+
},
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
export const deletePageByUrl = internalMutation({
|
|
143
|
+
args: { url: v.string() },
|
|
144
|
+
returns: v.null(),
|
|
145
|
+
handler: async (ctx, args) => {
|
|
146
|
+
const existing = await ctx.db
|
|
147
|
+
.query("pages")
|
|
148
|
+
.withIndex("by_url", (q) => q.eq("url", args.url))
|
|
149
|
+
.first();
|
|
150
|
+
if (existing) await ctx.db.delete("pages", existing._id);
|
|
151
|
+
return null;
|
|
152
|
+
},
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
// ─── Public queries (reactive) ───────────────────────────────────────────────
|
|
156
|
+
|
|
157
|
+
export const getSearch = query({
|
|
158
|
+
args: { query: v.string() },
|
|
159
|
+
returns: v.union(
|
|
160
|
+
v.null(),
|
|
161
|
+
v.object({
|
|
162
|
+
results: v.string(),
|
|
163
|
+
fetchedAt: v.number(),
|
|
164
|
+
expiresAt: v.number(),
|
|
165
|
+
isFresh: v.boolean(),
|
|
166
|
+
})
|
|
167
|
+
),
|
|
168
|
+
handler: async (ctx, args) => {
|
|
169
|
+
const row = await ctx.db
|
|
170
|
+
.query("searches")
|
|
171
|
+
.withIndex("by_query", (q) => q.eq("query", args.query))
|
|
172
|
+
.first();
|
|
173
|
+
if (!row) return null;
|
|
174
|
+
return {
|
|
175
|
+
results: row.results,
|
|
176
|
+
fetchedAt: row.fetchedAt,
|
|
177
|
+
expiresAt: row.expiresAt,
|
|
178
|
+
isFresh: Date.now() < row.expiresAt,
|
|
179
|
+
};
|
|
180
|
+
},
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
export const getPage = query({
|
|
184
|
+
args: { url: v.string() },
|
|
185
|
+
returns: v.union(
|
|
186
|
+
v.null(),
|
|
187
|
+
v.object({
|
|
188
|
+
content: v.string(),
|
|
189
|
+
fetchedAt: v.number(),
|
|
190
|
+
expiresAt: v.number(),
|
|
191
|
+
isFresh: v.boolean(),
|
|
192
|
+
})
|
|
193
|
+
),
|
|
194
|
+
handler: async (ctx, args) => {
|
|
195
|
+
const row = await ctx.db
|
|
196
|
+
.query("pages")
|
|
197
|
+
.withIndex("by_url", (q) => q.eq("url", args.url))
|
|
198
|
+
.first();
|
|
199
|
+
if (!row) return null;
|
|
200
|
+
return {
|
|
201
|
+
content: row.content,
|
|
202
|
+
fetchedAt: row.fetchedAt,
|
|
203
|
+
expiresAt: row.expiresAt,
|
|
204
|
+
isFresh: Date.now() < row.expiresAt,
|
|
205
|
+
};
|
|
206
|
+
},
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
// ─── Public actions ──────────────────────────────────────────────────────────
|
|
210
|
+
|
|
211
|
+
export const search = action({
|
|
212
|
+
args: {
|
|
213
|
+
query: v.string(),
|
|
214
|
+
vertical: v.optional(v.string()),
|
|
215
|
+
recency: v.optional(v.string()),
|
|
216
|
+
ttlMs: v.optional(v.number()),
|
|
217
|
+
// Passed in from client wrapper — never stored
|
|
218
|
+
brightdataApiToken: v.string(),
|
|
219
|
+
brightdataSearchZone: v.optional(v.string()),
|
|
220
|
+
},
|
|
221
|
+
returns: v.object({
|
|
222
|
+
results: v.string(),
|
|
223
|
+
fromCache: v.boolean(),
|
|
224
|
+
fetchedAt: v.number(),
|
|
225
|
+
}),
|
|
226
|
+
handler: async (ctx, args) => {
|
|
227
|
+
// 1. Check cache
|
|
228
|
+
// 1. Check cache
|
|
229
|
+
const cached = (await ctx.runQuery(internal.lib.getSearchByQuery, {
|
|
230
|
+
query: args.query,
|
|
231
|
+
})) as {
|
|
232
|
+
_id: string;
|
|
233
|
+
results: string;
|
|
234
|
+
fetchedAt: number;
|
|
235
|
+
expiresAt: number;
|
|
236
|
+
vertical?: string;
|
|
237
|
+
recency?: string;
|
|
238
|
+
} | null;
|
|
239
|
+
if (cached && Date.now() < cached.expiresAt) {
|
|
240
|
+
return {
|
|
241
|
+
results: cached.results,
|
|
242
|
+
fromCache: true,
|
|
243
|
+
fetchedAt: cached.fetchedAt,
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// 2. Fetch from Bright Data SERP API
|
|
248
|
+
const zone = args.brightdataSearchZone ?? "serp_api1";
|
|
249
|
+
const url = new URL("https://api.brightdata.com/request");
|
|
250
|
+
const body = {
|
|
251
|
+
zone,
|
|
252
|
+
url: `https://www.google.com/search?q=${encodeURIComponent(args.query)}${
|
|
253
|
+
args.vertical ? `&tbm=${args.vertical}` : ""
|
|
254
|
+
}${args.recency ? `&tbs=qdr:${args.recency}` : ""}`,
|
|
255
|
+
format: "json",
|
|
256
|
+
};
|
|
257
|
+
|
|
258
|
+
const response = await fetch(url.toString(), {
|
|
259
|
+
method: "POST",
|
|
260
|
+
headers: {
|
|
261
|
+
"Content-Type": "application/json",
|
|
262
|
+
Authorization: `Bearer ${args.brightdataApiToken}`,
|
|
263
|
+
},
|
|
264
|
+
body: JSON.stringify(body),
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
if (!response.ok) {
|
|
268
|
+
throw new Error(
|
|
269
|
+
`Bright Data SERP API error: ${
|
|
270
|
+
response.status
|
|
271
|
+
} ${await response.text()}`
|
|
272
|
+
);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
const results = await response.text();
|
|
276
|
+
const now = Date.now();
|
|
277
|
+
|
|
278
|
+
// 3. Store in cache
|
|
279
|
+
await ctx.runMutation(internal.lib.upsertSearch, {
|
|
280
|
+
query: args.query,
|
|
281
|
+
vertical: args.vertical,
|
|
282
|
+
recency: args.recency,
|
|
283
|
+
results,
|
|
284
|
+
ttlMs: args.ttlMs,
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
return { results, fromCache: false, fetchedAt: now };
|
|
288
|
+
},
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
export const scrape = action({
|
|
292
|
+
args: {
|
|
293
|
+
url: v.string(),
|
|
294
|
+
ttlMs: v.optional(v.number()),
|
|
295
|
+
brightdataApiToken: v.string(),
|
|
296
|
+
brightdataWebUnlockerZone: v.optional(v.string()),
|
|
297
|
+
},
|
|
298
|
+
returns: v.object({
|
|
299
|
+
content: v.string(),
|
|
300
|
+
fromCache: v.boolean(),
|
|
301
|
+
fetchedAt: v.number(),
|
|
302
|
+
}),
|
|
303
|
+
handler: async (ctx, args) => {
|
|
304
|
+
// 1. Check cache
|
|
305
|
+
const cached = (await ctx.runQuery(internal.lib.getPageByUrl, {
|
|
306
|
+
url: args.url,
|
|
307
|
+
})) as {
|
|
308
|
+
_id: string;
|
|
309
|
+
content: string;
|
|
310
|
+
fetchedAt: number;
|
|
311
|
+
expiresAt: number;
|
|
312
|
+
} | null;
|
|
313
|
+
if (cached && Date.now() < cached.expiresAt) {
|
|
314
|
+
return {
|
|
315
|
+
content: cached.content,
|
|
316
|
+
fromCache: true,
|
|
317
|
+
fetchedAt: cached.fetchedAt,
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// 2. Fetch from Bright Data Web Unlocker
|
|
322
|
+
const zone = args.brightdataWebUnlockerZone ?? "web_unlocker1";
|
|
323
|
+
const response = await fetch("https://api.brightdata.com/request", {
|
|
324
|
+
method: "POST",
|
|
325
|
+
headers: {
|
|
326
|
+
"Content-Type": "application/json",
|
|
327
|
+
Authorization: `Bearer ${args.brightdataApiToken}`,
|
|
328
|
+
},
|
|
329
|
+
body: JSON.stringify({
|
|
330
|
+
zone,
|
|
331
|
+
url: args.url,
|
|
332
|
+
format: "raw",
|
|
333
|
+
}),
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
if (!response.ok) {
|
|
337
|
+
throw new Error(
|
|
338
|
+
`Bright Data Web Unlocker error: ${
|
|
339
|
+
response.status
|
|
340
|
+
} ${await response.text()}`
|
|
341
|
+
);
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
const content = await response.text();
|
|
345
|
+
const now = Date.now();
|
|
346
|
+
|
|
347
|
+
// 3. Store in cache
|
|
348
|
+
await ctx.runMutation(internal.lib.upsertPage, {
|
|
349
|
+
url: args.url,
|
|
350
|
+
content,
|
|
351
|
+
ttlMs: args.ttlMs,
|
|
352
|
+
});
|
|
353
|
+
|
|
354
|
+
return { content, fromCache: false, fetchedAt: now };
|
|
355
|
+
},
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
export const invalidate = action({
|
|
359
|
+
args: {
|
|
360
|
+
query: v.optional(v.string()),
|
|
361
|
+
url: v.optional(v.string()),
|
|
362
|
+
},
|
|
363
|
+
returns: v.null(),
|
|
364
|
+
handler: async (ctx, args) => {
|
|
365
|
+
if (args.query) {
|
|
366
|
+
await ctx.runMutation(internal.lib.deleteSearchByQuery, {
|
|
367
|
+
query: args.query,
|
|
368
|
+
});
|
|
369
|
+
}
|
|
370
|
+
if (args.url) {
|
|
371
|
+
await ctx.runMutation(internal.lib.deletePageByUrl, { url: args.url });
|
|
372
|
+
}
|
|
373
|
+
return null;
|
|
374
|
+
},
|
|
375
|
+
});
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { defineSchema, defineTable } from "convex/server";
|
|
2
|
+
import { v } from "convex/values";
|
|
3
|
+
|
|
4
|
+
export default defineSchema({
|
|
5
|
+
searches: defineTable({
|
|
6
|
+
query: v.string(),
|
|
7
|
+
vertical: v.optional(v.string()), // "web" | "news" | "shopping" etc.
|
|
8
|
+
recency: v.optional(v.string()), // "d" | "w" | "m" etc.
|
|
9
|
+
results: v.string(), // JSON-stringified array of results
|
|
10
|
+
fetchedAt: v.number(), // Date.now()
|
|
11
|
+
expiresAt: v.number(), // fetchedAt + TTL
|
|
12
|
+
})
|
|
13
|
+
.index("by_query", ["query"])
|
|
14
|
+
.index("by_expires", ["expiresAt"]),
|
|
15
|
+
|
|
16
|
+
pages: defineTable({
|
|
17
|
+
url: v.string(),
|
|
18
|
+
content: v.string(), // markdown from Web Unlocker
|
|
19
|
+
fetchedAt: v.number(),
|
|
20
|
+
expiresAt: v.number(),
|
|
21
|
+
})
|
|
22
|
+
.index("by_url", ["url"])
|
|
23
|
+
.index("by_expires", ["expiresAt"]),
|
|
24
|
+
|
|
25
|
+
processedRequests: defineTable({
|
|
26
|
+
requestKey: v.string(), // hash of query+vertical+recency OR url
|
|
27
|
+
type: v.union(v.literal("search"), v.literal("scrape")),
|
|
28
|
+
completedAt: v.number(),
|
|
29
|
+
}).index("by_key", ["requestKey"]),
|
|
30
|
+
});
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/// <reference types="vite/client" />
|
|
2
|
+
import { test } from "vitest";
|
|
3
|
+
import schema from "./schema.js";
|
|
4
|
+
import { convexTest } from "convex-test";
|
|
5
|
+
export const modules = import.meta.glob("./**/*.*s");
|
|
6
|
+
|
|
7
|
+
export function initConvexTest() {
|
|
8
|
+
const t = convexTest(schema, modules);
|
|
9
|
+
return t;
|
|
10
|
+
}
|
|
11
|
+
test("setup", () => {});
|
package/src/test.ts
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/// <reference types="vite/client" />
|
|
2
|
+
import type { TestConvex } from "convex-test";
|
|
3
|
+
import type { GenericSchema, SchemaDefinition } from "convex/server";
|
|
4
|
+
import schema from "./component/schema.js";
|
|
5
|
+
const modules = import.meta.glob("./component/**/*.ts");
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Register the component with the test convex instance.
|
|
9
|
+
* @param t - The test convex instance, e.g. from calling `convexTest`.
|
|
10
|
+
* @param name - The name of the component, as registered in convex.config.ts.
|
|
11
|
+
*/
|
|
12
|
+
export function register(
|
|
13
|
+
t: TestConvex<SchemaDefinition<GenericSchema, boolean>>,
|
|
14
|
+
name: string = "brightDataSync",
|
|
15
|
+
) {
|
|
16
|
+
t.registerComponent(name, schema, modules);
|
|
17
|
+
}
|
|
18
|
+
export default { register, schema, modules };
|