@epsilon-asi/actors 0.0.7 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,300 @@
1
+ import type {ElementHandle, Page} from "puppeteer-core";
2
+ import {z} from "zod/v4";
3
+
4
+ /* -------------------------------------------------------------------------------------------------
5
+ * Zod Schemas
6
+ * ------------------------------------------------------------------------------------------------- */
7
+ export enum JobCategoryType {
8
+ "hourly" = "hourly",
9
+ "fixed" = "fixed",
10
+ }
11
+
12
+ export const UpworkJobListingSchema = z.object({
13
+ id: z.string(),
14
+
15
+ title: z.string(),
16
+ url: z.string(),
17
+
18
+ postedAtText: z.string().nullable(),
19
+ proposalsText: z.string().nullable(),
20
+
21
+ description: z.string(),
22
+
23
+ skills: z.array(z.string()),
24
+
25
+ category: z.object({
26
+ type: z.enum(JobCategoryType, {}).nullable(),
27
+ raw: z.string().nullable(),
28
+ }),
29
+
30
+ hourlyRate: z
31
+ .object({
32
+ min: z.number().nullable(),
33
+ max: z.number().nullable(),
34
+ raw: z.string().nullable(),
35
+ })
36
+ .nullable(),
37
+
38
+ fixedBudget: z
39
+ .object({
40
+ amount: z.number().nullable(),
41
+ raw: z.string().nullable(),
42
+ })
43
+ .nullable(),
44
+
45
+ experienceLevel: z.string().nullable(),
46
+
47
+ estimatedTime: z.string().nullable(),
48
+
49
+ client: z.object({
50
+ paymentVerified: z.boolean(),
51
+
52
+ rating: z.number().nullable(),
53
+
54
+ totalSpent: z.string().nullable(),
55
+
56
+ location: z.string().nullable(),
57
+ }),
58
+
59
+ rawText: z.string(),
60
+ });
61
+
62
+ export type UpworkJobListing = z.infer<typeof UpworkJobListingSchema>;
63
+
64
+ /* -------------------------------------------------------------------------------------------------
65
+ * Helpers
66
+ * ------------------------------------------------------------------------------------------------- */
67
+
68
+ function cleanText(input?: string | null): string {
69
+ return (input ?? "")
70
+ .replace(/\s+/g, " ")
71
+ .replace(/\u00a0/g, " ")
72
+ .trim();
73
+ }
74
+
75
+ function parseMoney(value?: string): number | null {
76
+ if (!value) return null;
77
+ const normalized = value.replace(/[^0-9.]/g, "");
78
+
79
+ if (!normalized) {
80
+ return null;
81
+ }
82
+
83
+ const parsed = Number(normalized);
84
+
85
+ return Number.isNaN(parsed) ? null : parsed;
86
+ }
87
+
88
+ function parseHourlyRate(
89
+ raw: string | null,
90
+ ): UpworkJobListing["hourlyRate"] {
91
+ if (!raw) {
92
+ return null;
93
+ }
94
+
95
+ const matches = raw.match(/\$([\d,.]+)\s*-\s*\$([\d,.]+)/);
96
+
97
+ if (!matches || matches.length < 3) {
98
+ return {
99
+ min: null,
100
+ max: null,
101
+ raw,
102
+ };
103
+ }
104
+ return {
105
+ min: parseMoney(matches[1]),
106
+ max: parseMoney(matches[2]),
107
+ raw,
108
+ };
109
+ }
110
+
111
+ function parseFixedBudget(
112
+ raw: string | null,
113
+ ): UpworkJobListing["fixedBudget"] {
114
+ if (!raw) {
115
+ return null;
116
+ }
117
+
118
+ const match = raw.match(/\$([\d,.]+)/);
119
+
120
+ return {
121
+ amount: match ? parseMoney(match[1]) : null,
122
+ raw,
123
+ };
124
+ }
125
+
126
+ /* -------------------------------------------------------------------------------------------------
127
+ * Core Parser
128
+ * ------------------------------------------------------------------------------------------------- */
129
+
130
+ export async function parseUpworkJobListing(
131
+ article: ElementHandle<Element>,
132
+ ): Promise<UpworkJobListing> {
133
+ const extracted = await article.evaluate((node) => {
134
+ const getText = (selector: string): string | null => {
135
+ const element = node.querySelector(selector);
136
+
137
+ return element?.textContent?.trim() ?? null;
138
+ };
139
+
140
+ const getTexts = (selector: string): string[] => {
141
+ return Array.from(node.querySelectorAll(selector))
142
+ .map((el) => el.textContent?.trim() ?? "")
143
+ .filter(Boolean);
144
+ };
145
+
146
+ const titleAnchor = node.querySelector(
147
+ '[data-test="job-tile-title-link UpLink"]',
148
+ ) as HTMLAnchorElement | null;
149
+
150
+ const jobInfoItems = Array.from(
151
+ node.querySelectorAll('[data-test="JobInfo"] li'),
152
+ ).map((li) => li.textContent?.trim() ?? "");
153
+
154
+ const clientInfoItems = Array.from(
155
+ node.querySelectorAll('[data-test="JobInfoClient"] li'),
156
+ ).map((li) => li.textContent?.trim() ?? "");
157
+
158
+ const hourlyItem =
159
+ jobInfoItems.find((x) => x.toLowerCase().includes("hourly")) ?? null;
160
+
161
+ const fixedItem =
162
+ jobInfoItems.find((x) => x.toLowerCase().includes("fixed")) ?? null;
163
+
164
+ const experienceLevel =
165
+ node
166
+ .querySelector('[data-test="experience-level"]')
167
+ ?.textContent?.trim() ?? null;
168
+
169
+ const estimatedTime =
170
+ node
171
+ .querySelector('[data-test="duration-label"]')
172
+ ?.textContent?.trim() ?? null;
173
+
174
+ const ratingText =
175
+ node
176
+ .querySelector('[data-test="feedback-rating UpCRating"]')
177
+ ?.textContent?.trim() ?? null;
178
+
179
+ const spentText =
180
+ node
181
+ .querySelector('[data-test="total-spent"]')
182
+ ?.textContent?.trim() ?? null;
183
+
184
+ const locationText =
185
+ node
186
+ .querySelector('[data-test="location"]')
187
+ ?.textContent?.trim() ?? null;
188
+
189
+ return {
190
+ id:
191
+ node.getAttribute("data-ev-job-uid") ??
192
+ node.getAttribute("data-test-key") ??
193
+ "",
194
+
195
+ title: titleAnchor?.textContent?.trim() ?? "",
196
+
197
+ url: titleAnchor?.href ?? "",
198
+
199
+ postedAtText:
200
+ getText('[data-test="job-pubilshed-date"] span:first-child') ?? null,
201
+
202
+ proposalsText:
203
+ getText('[data-test="proposals-tier"]') ?? null,
204
+
205
+ description:
206
+ getText('[data-test="UpCLineClamp JobDescription"] p') ?? "",
207
+
208
+ skills: getTexts('[data-test="token"] span'),
209
+
210
+ hourlyRaw: hourlyItem,
211
+ fixedRaw: fixedItem,
212
+
213
+ experienceLevel,
214
+ estimatedTime,
215
+
216
+ paymentVerified:
217
+ node.querySelector('[data-test="payment-verified"]') !== null,
218
+
219
+ ratingText,
220
+ spentText,
221
+ locationText,
222
+
223
+ rawText: node.textContent ?? "",
224
+
225
+ categoryType: hourlyItem
226
+ ? "hourly"
227
+ : fixedItem
228
+ ? "fixed"
229
+ : null,
230
+ };
231
+ });
232
+
233
+ const hourlyRate = parseHourlyRate(extracted.hourlyRaw);
234
+ const fixedBudget = parseFixedBudget(extracted.fixedRaw);
235
+
236
+ const ratingMatch = extracted.ratingText?.match(/(\d+(\.\d+)?)/);
237
+
238
+ const parsed: UpworkJobListing = {
239
+ id: cleanText(extracted.id),
240
+
241
+ title: cleanText(extracted.title),
242
+
243
+ url: cleanText(extracted.url),
244
+
245
+ postedAtText: cleanText(extracted.postedAtText),
246
+
247
+ proposalsText: cleanText(extracted.proposalsText),
248
+
249
+ description: cleanText(extracted.description),
250
+
251
+ skills: extracted.skills.map(cleanText),
252
+
253
+ category: {
254
+ type: extracted.categoryType as (JobCategoryType | null),
255
+ raw: cleanText(extracted.hourlyRaw ?? extracted.fixedRaw),
256
+ },
257
+
258
+ hourlyRate,
259
+
260
+ fixedBudget,
261
+
262
+ experienceLevel: cleanText(extracted.experienceLevel),
263
+
264
+ estimatedTime: cleanText(extracted.estimatedTime),
265
+
266
+ client: {
267
+ paymentVerified: extracted.paymentVerified,
268
+
269
+ rating: ratingMatch ? Number(ratingMatch[1]) : null,
270
+
271
+ totalSpent: cleanText(extracted.spentText),
272
+
273
+ location: cleanText(extracted.locationText),
274
+ },
275
+
276
+ rawText: cleanText(extracted.rawText),
277
+ };
278
+
279
+ return UpworkJobListingSchema.parse(parsed);
280
+ }
281
+
282
+ /* -------------------------------------------------------------------------------------------------
283
+ * Multi-Listing Parser
284
+ * ------------------------------------------------------------------------------------------------- */
285
+
286
+ export async function parseUpworkSearchResults(
287
+ page: Page,
288
+ ): Promise<UpworkJobListing[]> {
289
+ await page.waitForSelector('#main > section > article[data-ev-sublocation="search_results"]');
290
+
291
+ const articles = await page.$$(
292
+ '#main > section > article[data-ev-sublocation="search_results"]',
293
+ );
294
+
295
+ const results = await Promise.all(
296
+ articles.map((article) => parseUpworkJobListing(article)),
297
+ );
298
+
299
+ return results;
300
+ }