@epsilon-asi/actors 0.0.7 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,573 @@
1
+ import * as cheerio from "cheerio";
2
+ import {z} from "zod/v4";
3
+
4
+ /**
5
+ * ============================================================================
6
+ * Upwork Job Details Page Parser
7
+ * ============================================================================
8
+ *
9
+ * Parses the FULL Upwork job details page HTML into a strongly typed object.
10
+ *
11
+ * Designed for:
12
+ * - Saved HTML snapshots
13
+ * - Browser automation scraping
14
+ * - Puppeteer / Playwright extraction
15
+ * - LangGraph / AI ingestion pipelines
16
+ *
17
+ * This parser intentionally avoids brittle class names wherever possible and
18
+ * instead relies on semantic structure and textual anchors.
19
+ */
20
+
21
+ /* =============================================================================
22
+ * Zod Schemas
23
+ * ========================================================================== */
24
+
25
+ export const UpworkClientReviewSchema = z.object({
26
+ jobTitle: z.string().nullable(),
27
+ feedback: z.string().nullable(),
28
+ freelancerName: z.string().nullable(),
29
+ rating: z.number().nullable(),
30
+ engagementType: z.string().nullable(),
31
+ amount: z.string().nullable(),
32
+ dateRange: z.string().nullable(),
33
+ });
34
+
35
+ export type UpworkClientReview = z.infer<
36
+ typeof UpworkClientReviewSchema
37
+ >;
38
+
39
+ export const UpworkClientSchema = z.object({
40
+ paymentVerified: z.boolean(),
41
+
42
+ phoneVerified: z.boolean(),
43
+
44
+ rating: z.number().nullable(),
45
+
46
+ reviewCount: z.number().nullable(),
47
+
48
+ country: z.string().nullable(),
49
+
50
+ city: z.string().nullable(),
51
+
52
+ localTime: z.string().nullable(),
53
+
54
+ jobsPosted: z.number().nullable(),
55
+
56
+ hireRate: z.number().nullable(),
57
+
58
+ openJobs: z.number().nullable(),
59
+
60
+ totalSpent: z.string().nullable(),
61
+
62
+ hires: z.number().nullable(),
63
+
64
+ activeHires: z.number().nullable(),
65
+
66
+ avgHourlyRatePaid: z.string().nullable(),
67
+
68
+ totalHours: z.number().nullable(),
69
+
70
+ memberSince: z.string().nullable(),
71
+
72
+ recentHistory: z.array(UpworkClientReviewSchema),
73
+ });
74
+
75
+ export type UpworkClient = z.infer<typeof UpworkClientSchema>;
76
+
77
+ export const UpworkJobDetailsSchema = z.object({
78
+ jobUrl: z.string().nullable(),
79
+
80
+ projectType: z.string().nullable(),
81
+
82
+ mandatorySkills: z.array(z.string()),
83
+
84
+ preferredQualifications: z.object({
85
+ location: z.string().nullable(),
86
+ }),
87
+
88
+ activity: z.object({
89
+ proposals: z.string().nullable(),
90
+ interviewing: z.number().nullable(),
91
+ invitesSent: z.number().nullable(),
92
+ unansweredInvites: z.number().nullable(),
93
+ }),
94
+
95
+ connectsRequired: z.number().nullable(),
96
+
97
+ availableConnects: z.number().nullable(),
98
+
99
+ client: UpworkClientSchema,
100
+
101
+ otherOpenJobs: z.array(
102
+ z.object({
103
+ title: z.string(),
104
+ type: z.string().nullable(),
105
+ }),
106
+ ),
107
+ });
108
+
109
+ export type UpworkJobDetails = z.infer<
110
+ typeof UpworkJobDetailsSchema
111
+ >;
112
+
113
+ /* =============================================================================
114
+ * Utility Helpers
115
+ * ========================================================================== */
116
+
117
+ function cleanText(input?: string | null): string {
118
+ return (input ?? "")
119
+ .replace(/\s+/g, " ")
120
+ .replace(/\u00a0/g, " ")
121
+ .trim();
122
+ }
123
+
124
+ function extractNumber(input?: string | null): number | null {
125
+ if (!input) return null;
126
+
127
+ const match = input.match(/-?\d+(\.\d+)?/);
128
+
129
+ if (!match) return null;
130
+
131
+ return Number(match[0]);
132
+ }
133
+
134
+ function extractStrongLabelValue(
135
+ $root: cheerio.Cheerio<any>,
136
+ label: string,
137
+ ): string | null {
138
+ const strong = $root
139
+ .find("strong")
140
+ .filter((_, el) =>
141
+ cleanText($root.eq(0).find(el).text())
142
+ .toLowerCase()
143
+ .startsWith(label.toLowerCase()),
144
+ )
145
+ .first();
146
+
147
+ if (!strong.length) return null;
148
+
149
+ const parentText = cleanText(strong.parent().text());
150
+
151
+ return cleanText(
152
+ parentText.replace(strong.text(), ""),
153
+ );
154
+ }
155
+
156
+ /* =============================================================================
157
+ * Main Parser
158
+ * ========================================================================== */
159
+
160
+ export function parseUpworkJobDetailsPage(
161
+ html: string,
162
+ ): UpworkJobDetails {
163
+ const $ = cheerio.load(html);
164
+
165
+ /* ===========================================================================
166
+ * Project Type
167
+ * ========================================================================= */
168
+
169
+ const projectType = cleanText(
170
+ $("li")
171
+ .filter((_, el) =>
172
+ cleanText($(el).text())
173
+ .toLowerCase()
174
+ .includes("project type:"),
175
+ )
176
+ .first()
177
+ .text()
178
+ .replace(/project type:/i, ""),
179
+ ) || null;
180
+
181
+ /* ===========================================================================
182
+ * Skills
183
+ * ========================================================================= */
184
+
185
+ const mandatorySkills: string[] = [];
186
+
187
+ $("h5")
188
+ .filter((_, el) =>
189
+ cleanText($(el).text())
190
+ .toLowerCase()
191
+ .includes("skills and expertise"),
192
+ )
193
+ .closest("section")
194
+ .find(".skills-list a")
195
+ .each((_, el) => {
196
+ const skill = cleanText($(el).text());
197
+
198
+ if (skill) {
199
+ mandatorySkills.push(skill);
200
+ }
201
+ });
202
+
203
+ /* ===========================================================================
204
+ * Preferred Qualifications
205
+ * ========================================================================= */
206
+
207
+ const qualificationSection = $("h5")
208
+ .filter((_, el) =>
209
+ cleanText($(el).text())
210
+ .toLowerCase()
211
+ .includes("preferred qualifications"),
212
+ )
213
+ .closest("section");
214
+
215
+ const preferredLocation =
216
+ extractStrongLabelValue(
217
+ qualificationSection,
218
+ "Location:",
219
+ );
220
+
221
+ /* ===========================================================================
222
+ * Activity
223
+ * ========================================================================= */
224
+
225
+ const activitySection = $("h5")
226
+ .filter((_, el) =>
227
+ cleanText($(el).text())
228
+ .toLowerCase()
229
+ .includes("activity on this job"),
230
+ )
231
+ .closest("section");
232
+
233
+ const proposals = cleanText(
234
+ activitySection
235
+ .find(".ca-item")
236
+ .filter((_, el) =>
237
+ cleanText($(el).text())
238
+ .toLowerCase()
239
+ .includes("proposals:"),
240
+ )
241
+ .find(".value")
242
+ .text(),
243
+ ) || null;
244
+
245
+ const interviewing = extractNumber(
246
+ activitySection
247
+ .find(".ca-item")
248
+ .filter((_, el) =>
249
+ cleanText($(el).text())
250
+ .toLowerCase()
251
+ .includes("interviewing:"),
252
+ )
253
+ .find(".value")
254
+ .text(),
255
+ );
256
+
257
+ const invitesSent = extractNumber(
258
+ activitySection
259
+ .find(".ca-item")
260
+ .filter((_, el) =>
261
+ cleanText($(el).text())
262
+ .toLowerCase()
263
+ .includes("invites sent:"),
264
+ )
265
+ .find(".value")
266
+ .text(),
267
+ );
268
+
269
+ const unansweredInvites = extractNumber(
270
+ activitySection
271
+ .find(".ca-item")
272
+ .filter((_, el) =>
273
+ cleanText($(el).text())
274
+ .toLowerCase()
275
+ .includes("unanswered invites:"),
276
+ )
277
+ .find(".value")
278
+ .text(),
279
+ );
280
+
281
+ /* ===========================================================================
282
+ * Connects
283
+ * ========================================================================= */
284
+
285
+ const connectsRequired = extractNumber(
286
+ $("div")
287
+ .filter((_, el) =>
288
+ cleanText($(el).text())
289
+ .includes("Send a proposal for:"),
290
+ )
291
+ .text(),
292
+ );
293
+
294
+ const availableConnects = extractNumber(
295
+ $("div")
296
+ .filter((_, el) =>
297
+ cleanText($(el).text())
298
+ .includes("Available Connects:"),
299
+ )
300
+ .text(),
301
+ );
302
+
303
+ /* ===========================================================================
304
+ * Job URL
305
+ * ========================================================================= */
306
+
307
+ const jobUrl =
308
+ $("input[aria-label='Job link']").attr("value") ??
309
+ null;
310
+
311
+ /* ===========================================================================
312
+ * Client Section
313
+ * ========================================================================= */
314
+
315
+ const clientSection = $(
316
+ "[data-test='about-client-container']",
317
+ );
318
+
319
+ const paymentVerified =
320
+ cleanText(clientSection.text())
321
+ .toLowerCase()
322
+ .includes("payment method verified");
323
+
324
+ const phoneVerified =
325
+ cleanText(clientSection.text())
326
+ .toLowerCase()
327
+ .includes("phone number verified");
328
+
329
+ const rating = extractNumber(
330
+ clientSection
331
+ .find("[data-testid='buyer-rating']")
332
+ .text(),
333
+ );
334
+
335
+ const reviewCount = extractNumber(
336
+ clientSection
337
+ .find("[data-testid='buyer-rating']")
338
+ .text()
339
+ .match(/of\s+(\d+)\s+reviews/i)?.[1],
340
+ );
341
+
342
+ const country = cleanText(
343
+ clientSection
344
+ .find("[data-qa='client-location'] strong")
345
+ .first()
346
+ .text(),
347
+ ) || null;
348
+
349
+ const locationText = cleanText(
350
+ clientSection
351
+ .find("[data-qa='client-location'] div")
352
+ .text(),
353
+ );
354
+
355
+ const city =
356
+ locationText.split(/\d/)[0]?.trim() || null;
357
+
358
+ const localTimeMatch =
359
+ locationText.match(/\d{1,2}:\d{2}\s?[AP]M/i);
360
+
361
+ const localTime = localTimeMatch?.[0] ?? null;
362
+
363
+ const jobsPosted = extractNumber(
364
+ clientSection
365
+ .find("[data-qa='client-job-posting-stats']")
366
+ .text(),
367
+ );
368
+
369
+ const hireRate = extractNumber(
370
+ clientSection
371
+ .find("[data-qa='client-job-posting-stats']")
372
+ .text()
373
+ .match(/(\d+)% hire rate/i)?.[1],
374
+ );
375
+
376
+ const openJobs = extractNumber(
377
+ clientSection
378
+ .find("[data-qa='client-job-posting-stats']")
379
+ .text()
380
+ .match(/(\d+) open jobs/i)?.[1],
381
+ );
382
+
383
+ const totalSpent = cleanText(
384
+ clientSection
385
+ .find("[data-qa='client-spend']")
386
+ .text(),
387
+ ) || null;
388
+
389
+ const hires = extractNumber(
390
+ clientSection
391
+ .find("[data-qa='client-hires']")
392
+ .text()
393
+ .match(/(\d+) hires/i)?.[1],
394
+ );
395
+
396
+ const activeHires = extractNumber(
397
+ clientSection
398
+ .find("[data-qa='client-hires']")
399
+ .text()
400
+ .match(/(\d+) active/i)?.[1],
401
+ );
402
+
403
+ const avgHourlyRatePaid = cleanText(
404
+ clientSection
405
+ .find("[data-qa='client-hourly-rate']")
406
+ .text(),
407
+ ) || null;
408
+
409
+ const totalHours = extractNumber(
410
+ clientSection
411
+ .find("[data-qa='client-hours']")
412
+ .text(),
413
+ );
414
+
415
+ const memberSince = cleanText(
416
+ clientSection
417
+ .find("[data-qa='client-contract-date']")
418
+ .text(),
419
+ ) || null;
420
+
421
+ /* ===========================================================================
422
+ * Recent History
423
+ * ========================================================================= */
424
+
425
+ const recentHistory: UpworkClientReview[] = [];
426
+
427
+ $("[data-cy='job']").each((_, el) => {
428
+ const $job = $(el);
429
+
430
+ const jobTitle = cleanText(
431
+ $job.find("[data-cy='job-title']").text(),
432
+ );
433
+
434
+ const feedback = cleanText(
435
+ $job
436
+ .find(".air3-truncation")
437
+ .first()
438
+ .text(),
439
+ );
440
+
441
+ const freelancerName = cleanText(
442
+ $job
443
+ .find("a[href*='/freelancers/']")
444
+ .first()
445
+ .text(),
446
+ );
447
+
448
+ const rating = extractNumber(
449
+ $job.find(".air3-rating-value-text").first().text(),
450
+ );
451
+
452
+ const stats = cleanText(
453
+ $job.find("[data-cy='stats']").text(),
454
+ );
455
+
456
+ const engagementType =
457
+ stats.split("$")[0]?.trim() || null;
458
+
459
+ const amountMatch = stats.match(
460
+ /\$\d+(?:,\d+)?(?:\.\d+)?/,
461
+ );
462
+
463
+ const amount = amountMatch?.[0] ?? null;
464
+
465
+ const dateRange = cleanText(
466
+ $job.find("[data-cy='date']").text(),
467
+ );
468
+
469
+ recentHistory.push(
470
+ UpworkClientReviewSchema.parse({
471
+ jobTitle: jobTitle || null,
472
+ feedback: feedback || null,
473
+ freelancerName: freelancerName || null,
474
+ rating,
475
+ engagementType,
476
+ amount,
477
+ dateRange: dateRange || null,
478
+ }),
479
+ );
480
+ });
481
+
482
+ /* ===========================================================================
483
+ * Other Open Jobs
484
+ * ========================================================================= */
485
+
486
+ const otherOpenJobs: Array<{
487
+ title: string;
488
+ type: string | null;
489
+ }> = [];
490
+
491
+ $("#otherOpenJobs li").each((_, el) => {
492
+ const title = cleanText(
493
+ $(el).find("a").text(),
494
+ );
495
+
496
+ const type = cleanText(
497
+ $(el).find(".type").text(),
498
+ );
499
+
500
+ if (title) {
501
+ otherOpenJobs.push({
502
+ title,
503
+ type: type || null,
504
+ });
505
+ }
506
+ });
507
+
508
+ /* ===========================================================================
509
+ * Final Object
510
+ * ========================================================================= */
511
+
512
+ return UpworkJobDetailsSchema.parse({
513
+ jobUrl,
514
+
515
+ projectType,
516
+
517
+ mandatorySkills,
518
+
519
+ preferredQualifications: {
520
+ location: preferredLocation,
521
+ },
522
+
523
+ activity: {
524
+ proposals,
525
+ interviewing,
526
+ invitesSent,
527
+ unansweredInvites,
528
+ },
529
+
530
+ connectsRequired,
531
+
532
+ availableConnects,
533
+
534
+ client: {
535
+ paymentVerified,
536
+
537
+ phoneVerified,
538
+
539
+ rating,
540
+
541
+ reviewCount,
542
+
543
+ country,
544
+
545
+ city,
546
+
547
+ localTime,
548
+
549
+ jobsPosted,
550
+
551
+ hireRate,
552
+
553
+ openJobs,
554
+
555
+ totalSpent,
556
+
557
+ hires,
558
+
559
+ activeHires,
560
+
561
+ avgHourlyRatePaid,
562
+
563
+ totalHours,
564
+
565
+ memberSince,
566
+
567
+ recentHistory,
568
+ },
569
+
570
+ otherOpenJobs,
571
+ });
572
+ }
573
+