@sholajegede/bright-data-sync 0.1.1-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +182 -0
  3. package/dist/client/_generated/_ignore.d.ts +1 -0
  4. package/dist/client/_generated/_ignore.d.ts.map +1 -0
  5. package/dist/client/_generated/_ignore.js +3 -0
  6. package/dist/client/_generated/_ignore.js.map +1 -0
  7. package/dist/client/index.d.ts +77 -0
  8. package/dist/client/index.d.ts.map +1 -0
  9. package/dist/client/index.js +61 -0
  10. package/dist/client/index.js.map +1 -0
  11. package/dist/component/_generated/api.d.ts +34 -0
  12. package/dist/component/_generated/api.d.ts.map +1 -0
  13. package/dist/component/_generated/api.js +31 -0
  14. package/dist/component/_generated/api.js.map +1 -0
  15. package/dist/component/_generated/component.d.ts +67 -0
  16. package/dist/component/_generated/component.d.ts.map +1 -0
  17. package/dist/component/_generated/component.js +11 -0
  18. package/dist/component/_generated/component.js.map +1 -0
  19. package/dist/component/_generated/dataModel.d.ts +46 -0
  20. package/dist/component/_generated/dataModel.d.ts.map +1 -0
  21. package/dist/component/_generated/dataModel.js +11 -0
  22. package/dist/component/_generated/dataModel.js.map +1 -0
  23. package/dist/component/_generated/server.d.ts +121 -0
  24. package/dist/component/_generated/server.d.ts.map +1 -0
  25. package/dist/component/_generated/server.js +78 -0
  26. package/dist/component/_generated/server.js.map +1 -0
  27. package/dist/component/convex.config.d.ts +3 -0
  28. package/dist/component/convex.config.d.ts.map +1 -0
  29. package/dist/component/convex.config.js +3 -0
  30. package/dist/component/convex.config.js.map +1 -0
  31. package/dist/component/lib.d.ts +83 -0
  32. package/dist/component/lib.d.ts.map +1 -0
  33. package/dist/component/lib.js +314 -0
  34. package/dist/component/lib.js.map +1 -0
  35. package/dist/component/schema.d.ts +47 -0
  36. package/dist/component/schema.d.ts.map +1 -0
  37. package/dist/component/schema.js +28 -0
  38. package/dist/component/schema.js.map +1 -0
  39. package/dist/react/index.d.ts +2 -0
  40. package/dist/react/index.d.ts.map +1 -0
  41. package/dist/react/index.js +6 -0
  42. package/dist/react/index.js.map +1 -0
  43. package/package.json +102 -0
  44. package/src/client/_generated/_ignore.ts +1 -0
  45. package/src/client/index.test.ts +13 -0
  46. package/src/client/index.ts +100 -0
  47. package/src/client/setup.test.ts +26 -0
  48. package/src/component/_generated/api.ts +50 -0
  49. package/src/component/_generated/component.ts +85 -0
  50. package/src/component/_generated/dataModel.ts +60 -0
  51. package/src/component/_generated/server.ts +156 -0
  52. package/src/component/convex.config.ts +3 -0
  53. package/src/component/lib.test.ts +21 -0
  54. package/src/component/lib.ts +375 -0
  55. package/src/component/schema.ts +30 -0
  56. package/src/component/setup.test.ts +11 -0
  57. package/src/react/index.ts +7 -0
  58. package/src/test.ts +18 -0
@@ -0,0 +1,375 @@
1
+ import { v } from "convex/values";
2
+ import {
3
+ action,
4
+ internalMutation,
5
+ internalQuery,
6
+ query,
7
+ } from "./_generated/server.js";
8
+ import { internal } from "./_generated/api.js";
9
+
10
+ // ─── Constants ───────────────────────────────────────────────────────────────
11
+
12
+ const DEFAULT_SEARCH_TTL_MS = 1000 * 60 * 60; // 1 hour
13
+ const DEFAULT_PAGE_TTL_MS = 1000 * 60 * 60 * 6; // 6 hours
14
+
15
+ // ─── Internal helpers ────────────────────────────────────────────────────────
16
+
17
+ export const getSearchByQuery = internalQuery({
18
+ args: { query: v.string() },
19
+ returns: v.union(
20
+ v.null(),
21
+ v.object({
22
+ _id: v.id("searches"),
23
+ _creationTime: v.number(),
24
+ query: v.string(),
25
+ vertical: v.optional(v.string()),
26
+ recency: v.optional(v.string()),
27
+ results: v.string(),
28
+ fetchedAt: v.number(),
29
+ expiresAt: v.number(),
30
+ })
31
+ ),
32
+ handler: async (ctx, args) => {
33
+ return await ctx.db
34
+ .query("searches")
35
+ .withIndex("by_query", (q) => q.eq("query", args.query))
36
+ .first();
37
+ },
38
+ });
39
+
40
+ export const getPageByUrl = internalQuery({
41
+ args: { url: v.string() },
42
+ returns: v.union(
43
+ v.null(),
44
+ v.object({
45
+ _id: v.id("pages"),
46
+ _creationTime: v.number(),
47
+ url: v.string(),
48
+ content: v.string(),
49
+ fetchedAt: v.number(),
50
+ expiresAt: v.number(),
51
+ })
52
+ ),
53
+ handler: async (ctx, args) => {
54
+ return await ctx.db
55
+ .query("pages")
56
+ .withIndex("by_url", (q) => q.eq("url", args.url))
57
+ .first();
58
+ },
59
+ });
60
+
61
+ export const upsertSearch = internalMutation({
62
+ args: {
63
+ query: v.string(),
64
+ vertical: v.optional(v.string()),
65
+ recency: v.optional(v.string()),
66
+ results: v.string(),
67
+ ttlMs: v.optional(v.number()),
68
+ },
69
+ returns: v.id("searches"),
70
+ handler: async (ctx, args) => {
71
+ const now = Date.now();
72
+ const expiresAt = now + (args.ttlMs ?? DEFAULT_SEARCH_TTL_MS);
73
+ const existing = await ctx.db
74
+ .query("searches")
75
+ .withIndex("by_query", (q) => q.eq("query", args.query))
76
+ .first();
77
+ if (existing) {
78
+ await ctx.db.patch("searches", existing._id, {
79
+ results: args.results,
80
+ fetchedAt: now,
81
+ expiresAt,
82
+ vertical: args.vertical,
83
+ recency: args.recency,
84
+ });
85
+ return existing._id;
86
+ }
87
+ return await ctx.db.insert("searches", {
88
+ query: args.query,
89
+ vertical: args.vertical,
90
+ recency: args.recency,
91
+ results: args.results,
92
+ fetchedAt: now,
93
+ expiresAt,
94
+ });
95
+ },
96
+ });
97
+
98
+ export const upsertPage = internalMutation({
99
+ args: {
100
+ url: v.string(),
101
+ content: v.string(),
102
+ ttlMs: v.optional(v.number()),
103
+ },
104
+ returns: v.id("pages"),
105
+ handler: async (ctx, args) => {
106
+ const now = Date.now();
107
+ const expiresAt = now + (args.ttlMs ?? DEFAULT_PAGE_TTL_MS);
108
+ const existing = await ctx.db
109
+ .query("pages")
110
+ .withIndex("by_url", (q) => q.eq("url", args.url))
111
+ .first();
112
+ if (existing) {
113
+ await ctx.db.patch("pages", existing._id, {
114
+ content: args.content,
115
+ fetchedAt: now,
116
+ expiresAt,
117
+ });
118
+ return existing._id;
119
+ }
120
+ return await ctx.db.insert("pages", {
121
+ url: args.url,
122
+ content: args.content,
123
+ fetchedAt: now,
124
+ expiresAt,
125
+ });
126
+ },
127
+ });
128
+
129
+ export const deleteSearchByQuery = internalMutation({
130
+ args: { query: v.string() },
131
+ returns: v.null(),
132
+ handler: async (ctx, args) => {
133
+ const existing = await ctx.db
134
+ .query("searches")
135
+ .withIndex("by_query", (q) => q.eq("query", args.query))
136
+ .first();
137
+ if (existing) await ctx.db.delete("searches", existing._id);
138
+ return null;
139
+ },
140
+ });
141
+
142
+ export const deletePageByUrl = internalMutation({
143
+ args: { url: v.string() },
144
+ returns: v.null(),
145
+ handler: async (ctx, args) => {
146
+ const existing = await ctx.db
147
+ .query("pages")
148
+ .withIndex("by_url", (q) => q.eq("url", args.url))
149
+ .first();
150
+ if (existing) await ctx.db.delete("pages", existing._id);
151
+ return null;
152
+ },
153
+ });
154
+
155
+ // ─── Public queries (reactive) ───────────────────────────────────────────────
156
+
157
+ export const getSearch = query({
158
+ args: { query: v.string() },
159
+ returns: v.union(
160
+ v.null(),
161
+ v.object({
162
+ results: v.string(),
163
+ fetchedAt: v.number(),
164
+ expiresAt: v.number(),
165
+ isFresh: v.boolean(),
166
+ })
167
+ ),
168
+ handler: async (ctx, args) => {
169
+ const row = await ctx.db
170
+ .query("searches")
171
+ .withIndex("by_query", (q) => q.eq("query", args.query))
172
+ .first();
173
+ if (!row) return null;
174
+ return {
175
+ results: row.results,
176
+ fetchedAt: row.fetchedAt,
177
+ expiresAt: row.expiresAt,
178
+ isFresh: Date.now() < row.expiresAt,
179
+ };
180
+ },
181
+ });
182
+
183
+ export const getPage = query({
184
+ args: { url: v.string() },
185
+ returns: v.union(
186
+ v.null(),
187
+ v.object({
188
+ content: v.string(),
189
+ fetchedAt: v.number(),
190
+ expiresAt: v.number(),
191
+ isFresh: v.boolean(),
192
+ })
193
+ ),
194
+ handler: async (ctx, args) => {
195
+ const row = await ctx.db
196
+ .query("pages")
197
+ .withIndex("by_url", (q) => q.eq("url", args.url))
198
+ .first();
199
+ if (!row) return null;
200
+ return {
201
+ content: row.content,
202
+ fetchedAt: row.fetchedAt,
203
+ expiresAt: row.expiresAt,
204
+ isFresh: Date.now() < row.expiresAt,
205
+ };
206
+ },
207
+ });
208
+
209
+ // ─── Public actions ──────────────────────────────────────────────────────────
210
+
211
+ export const search = action({
212
+ args: {
213
+ query: v.string(),
214
+ vertical: v.optional(v.string()),
215
+ recency: v.optional(v.string()),
216
+ ttlMs: v.optional(v.number()),
217
+ // Passed in from client wrapper — never stored
218
+ brightdataApiToken: v.string(),
219
+ brightdataSearchZone: v.optional(v.string()),
220
+ },
221
+ returns: v.object({
222
+ results: v.string(),
223
+ fromCache: v.boolean(),
224
+ fetchedAt: v.number(),
225
+ }),
226
+ handler: async (ctx, args) => {
227
+ // 1. Check cache
228
+ // 1. Check cache
229
+ const cached = (await ctx.runQuery(internal.lib.getSearchByQuery, {
230
+ query: args.query,
231
+ })) as {
232
+ _id: string;
233
+ results: string;
234
+ fetchedAt: number;
235
+ expiresAt: number;
236
+ vertical?: string;
237
+ recency?: string;
238
+ } | null;
239
+ if (cached && Date.now() < cached.expiresAt) {
240
+ return {
241
+ results: cached.results,
242
+ fromCache: true,
243
+ fetchedAt: cached.fetchedAt,
244
+ };
245
+ }
246
+
247
+ // 2. Fetch from Bright Data SERP API
248
+ const zone = args.brightdataSearchZone ?? "serp_api1";
249
+ const url = new URL("https://api.brightdata.com/request");
250
+ const body = {
251
+ zone,
252
+ url: `https://www.google.com/search?q=${encodeURIComponent(args.query)}${
253
+ args.vertical ? `&tbm=${args.vertical}` : ""
254
+ }${args.recency ? `&tbs=qdr:${args.recency}` : ""}`,
255
+ format: "json",
256
+ };
257
+
258
+ const response = await fetch(url.toString(), {
259
+ method: "POST",
260
+ headers: {
261
+ "Content-Type": "application/json",
262
+ Authorization: `Bearer ${args.brightdataApiToken}`,
263
+ },
264
+ body: JSON.stringify(body),
265
+ });
266
+
267
+ if (!response.ok) {
268
+ throw new Error(
269
+ `Bright Data SERP API error: ${
270
+ response.status
271
+ } ${await response.text()}`
272
+ );
273
+ }
274
+
275
+ const results = await response.text();
276
+ const now = Date.now();
277
+
278
+ // 3. Store in cache
279
+ await ctx.runMutation(internal.lib.upsertSearch, {
280
+ query: args.query,
281
+ vertical: args.vertical,
282
+ recency: args.recency,
283
+ results,
284
+ ttlMs: args.ttlMs,
285
+ });
286
+
287
+ return { results, fromCache: false, fetchedAt: now };
288
+ },
289
+ });
290
+
291
+ export const scrape = action({
292
+ args: {
293
+ url: v.string(),
294
+ ttlMs: v.optional(v.number()),
295
+ brightdataApiToken: v.string(),
296
+ brightdataWebUnlockerZone: v.optional(v.string()),
297
+ },
298
+ returns: v.object({
299
+ content: v.string(),
300
+ fromCache: v.boolean(),
301
+ fetchedAt: v.number(),
302
+ }),
303
+ handler: async (ctx, args) => {
304
+ // 1. Check cache
305
+ const cached = (await ctx.runQuery(internal.lib.getPageByUrl, {
306
+ url: args.url,
307
+ })) as {
308
+ _id: string;
309
+ content: string;
310
+ fetchedAt: number;
311
+ expiresAt: number;
312
+ } | null;
313
+ if (cached && Date.now() < cached.expiresAt) {
314
+ return {
315
+ content: cached.content,
316
+ fromCache: true,
317
+ fetchedAt: cached.fetchedAt,
318
+ };
319
+ }
320
+
321
+ // 2. Fetch from Bright Data Web Unlocker
322
+ const zone = args.brightdataWebUnlockerZone ?? "web_unlocker1";
323
+ const response = await fetch("https://api.brightdata.com/request", {
324
+ method: "POST",
325
+ headers: {
326
+ "Content-Type": "application/json",
327
+ Authorization: `Bearer ${args.brightdataApiToken}`,
328
+ },
329
+ body: JSON.stringify({
330
+ zone,
331
+ url: args.url,
332
+ format: "raw",
333
+ }),
334
+ });
335
+
336
+ if (!response.ok) {
337
+ throw new Error(
338
+ `Bright Data Web Unlocker error: ${
339
+ response.status
340
+ } ${await response.text()}`
341
+ );
342
+ }
343
+
344
+ const content = await response.text();
345
+ const now = Date.now();
346
+
347
+ // 3. Store in cache
348
+ await ctx.runMutation(internal.lib.upsertPage, {
349
+ url: args.url,
350
+ content,
351
+ ttlMs: args.ttlMs,
352
+ });
353
+
354
+ return { content, fromCache: false, fetchedAt: now };
355
+ },
356
+ });
357
+
358
+ export const invalidate = action({
359
+ args: {
360
+ query: v.optional(v.string()),
361
+ url: v.optional(v.string()),
362
+ },
363
+ returns: v.null(),
364
+ handler: async (ctx, args) => {
365
+ if (args.query) {
366
+ await ctx.runMutation(internal.lib.deleteSearchByQuery, {
367
+ query: args.query,
368
+ });
369
+ }
370
+ if (args.url) {
371
+ await ctx.runMutation(internal.lib.deletePageByUrl, { url: args.url });
372
+ }
373
+ return null;
374
+ },
375
+ });
@@ -0,0 +1,30 @@
1
+ import { defineSchema, defineTable } from "convex/server";
2
+ import { v } from "convex/values";
3
+
4
+ export default defineSchema({
5
+ searches: defineTable({
6
+ query: v.string(),
7
+ vertical: v.optional(v.string()), // "web" | "news" | "shopping" etc.
8
+ recency: v.optional(v.string()), // "d" | "w" | "m" etc.
9
+ results: v.string(), // JSON-stringified array of results
10
+ fetchedAt: v.number(), // Date.now()
11
+ expiresAt: v.number(), // fetchedAt + TTL
12
+ })
13
+ .index("by_query", ["query"])
14
+ .index("by_expires", ["expiresAt"]),
15
+
16
+ pages: defineTable({
17
+ url: v.string(),
18
+ content: v.string(), // markdown from Web Unlocker
19
+ fetchedAt: v.number(),
20
+ expiresAt: v.number(),
21
+ })
22
+ .index("by_url", ["url"])
23
+ .index("by_expires", ["expiresAt"]),
24
+
25
+ processedRequests: defineTable({
26
+ requestKey: v.string(), // hash of query+vertical+recency OR url
27
+ type: v.union(v.literal("search"), v.literal("scrape")),
28
+ completedAt: v.number(),
29
+ }).index("by_key", ["requestKey"]),
30
+ });
@@ -0,0 +1,11 @@
1
+ /// <reference types="vite/client" />
2
+ import { test } from "vitest";
3
+ import schema from "./schema.js";
4
+ import { convexTest } from "convex-test";
5
+ export const modules = import.meta.glob("./**/*.*s");
6
+
7
+ export function initConvexTest() {
8
+ const t = convexTest(schema, modules);
9
+ return t;
10
+ }
11
+ test("setup", () => {});
@@ -0,0 +1,7 @@
1
+ "use client";
2
+
3
+ // This is where React components / hooks go.
4
+
5
+ export const useMyComponent = () => {
6
+ return {};
7
+ };
package/src/test.ts ADDED
@@ -0,0 +1,18 @@
1
+ /// <reference types="vite/client" />
2
+ import type { TestConvex } from "convex-test";
3
+ import type { GenericSchema, SchemaDefinition } from "convex/server";
4
+ import schema from "./component/schema.js";
5
+ const modules = import.meta.glob("./component/**/*.ts");
6
+
7
+ /**
8
+ * Register the component with the test convex instance.
9
+ * @param t - The test convex instance, e.g. from calling `convexTest`.
10
+ * @param name - The name of the component, as registered in convex.config.ts.
11
+ */
12
+ export function register(
13
+ t: TestConvex<SchemaDefinition<GenericSchema, boolean>>,
14
+ name: string = "brightDataSync",
15
+ ) {
16
+ t.registerComponent(name, schema, modules);
17
+ }
18
+ export default { register, schema, modules };