@epsilon-asi/actors 0.0.7 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/RuntimeConfig.d.ts.map +1 -1
- package/dist/browser/RuntimeConfig.js +3 -0
- package/dist/browser/RuntimeConfig.js.map +1 -1
- package/dist/sites/upwork-com/upwork-com.actor.d.ts +4 -1
- package/dist/sites/upwork-com/upwork-com.actor.d.ts.map +1 -1
- package/dist/sites/upwork-com/upwork-com.actor.js +7 -8
- package/dist/sites/upwork-com/upwork-com.actor.js.map +1 -1
- package/dist/sites/upwork-com/util/parseJobDetails.d.ts +105 -0
- package/dist/sites/upwork-com/util/parseJobDetails.d.ts.map +1 -0
- package/dist/sites/upwork-com/util/parseJobDetails.js +335 -0
- package/dist/sites/upwork-com/util/parseJobDetails.js.map +1 -0
- package/dist/sites/upwork-com/util/scrapeJobListing.d.ts +41 -0
- package/dist/sites/upwork-com/util/scrapeJobListing.d.ts.map +1 -0
- package/dist/sites/upwork-com/util/scrapeJobListing.js +190 -0
- package/dist/sites/upwork-com/util/scrapeJobListing.js.map +1 -0
- package/package.json +6 -2
- package/src/browser/RuntimeConfig.ts +4 -0
- package/src/sites/upwork-com/upwork-com.actor.ts +78 -85
- package/src/sites/upwork-com/util/parseJobDetails.ts +573 -0
- package/src/sites/upwork-com/util/scrapeJobListing.ts +300 -0
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
import type {ElementHandle, Page} from "puppeteer-core";
|
|
2
|
+
import {z} from "zod/v4";
|
|
3
|
+
|
|
4
|
+
/* -------------------------------------------------------------------------------------------------
|
|
5
|
+
* Zod Schemas
|
|
6
|
+
* ------------------------------------------------------------------------------------------------- */
|
|
7
|
+
export enum JobCategoryType {
|
|
8
|
+
"hourly" = "hourly",
|
|
9
|
+
"fixed" = "fixed",
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export const UpworkJobListingSchema = z.object({
|
|
13
|
+
id: z.string(),
|
|
14
|
+
|
|
15
|
+
title: z.string(),
|
|
16
|
+
url: z.string(),
|
|
17
|
+
|
|
18
|
+
postedAtText: z.string().nullable(),
|
|
19
|
+
proposalsText: z.string().nullable(),
|
|
20
|
+
|
|
21
|
+
description: z.string(),
|
|
22
|
+
|
|
23
|
+
skills: z.array(z.string()),
|
|
24
|
+
|
|
25
|
+
category: z.object({
|
|
26
|
+
type: z.enum(JobCategoryType, {}).nullable(),
|
|
27
|
+
raw: z.string().nullable(),
|
|
28
|
+
}),
|
|
29
|
+
|
|
30
|
+
hourlyRate: z
|
|
31
|
+
.object({
|
|
32
|
+
min: z.number().nullable(),
|
|
33
|
+
max: z.number().nullable(),
|
|
34
|
+
raw: z.string().nullable(),
|
|
35
|
+
})
|
|
36
|
+
.nullable(),
|
|
37
|
+
|
|
38
|
+
fixedBudget: z
|
|
39
|
+
.object({
|
|
40
|
+
amount: z.number().nullable(),
|
|
41
|
+
raw: z.string().nullable(),
|
|
42
|
+
})
|
|
43
|
+
.nullable(),
|
|
44
|
+
|
|
45
|
+
experienceLevel: z.string().nullable(),
|
|
46
|
+
|
|
47
|
+
estimatedTime: z.string().nullable(),
|
|
48
|
+
|
|
49
|
+
client: z.object({
|
|
50
|
+
paymentVerified: z.boolean(),
|
|
51
|
+
|
|
52
|
+
rating: z.number().nullable(),
|
|
53
|
+
|
|
54
|
+
totalSpent: z.string().nullable(),
|
|
55
|
+
|
|
56
|
+
location: z.string().nullable(),
|
|
57
|
+
}),
|
|
58
|
+
|
|
59
|
+
rawText: z.string(),
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
export type UpworkJobListing = z.infer<typeof UpworkJobListingSchema>;
|
|
63
|
+
|
|
64
|
+
/* -------------------------------------------------------------------------------------------------
|
|
65
|
+
* Helpers
|
|
66
|
+
* ------------------------------------------------------------------------------------------------- */
|
|
67
|
+
|
|
68
|
+
function cleanText(input?: string | null): string {
|
|
69
|
+
return (input ?? "")
|
|
70
|
+
.replace(/\s+/g, " ")
|
|
71
|
+
.replace(/\u00a0/g, " ")
|
|
72
|
+
.trim();
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function parseMoney(value?: string): number | null {
|
|
76
|
+
if (!value) return null;
|
|
77
|
+
const normalized = value.replace(/[^0-9.]/g, "");
|
|
78
|
+
|
|
79
|
+
if (!normalized) {
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const parsed = Number(normalized);
|
|
84
|
+
|
|
85
|
+
return Number.isNaN(parsed) ? null : parsed;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function parseHourlyRate(
|
|
89
|
+
raw: string | null,
|
|
90
|
+
): UpworkJobListing["hourlyRate"] {
|
|
91
|
+
if (!raw) {
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const matches = raw.match(/\$([\d,.]+)\s*-\s*\$([\d,.]+)/);
|
|
96
|
+
|
|
97
|
+
if (!matches || matches.length < 3) {
|
|
98
|
+
return {
|
|
99
|
+
min: null,
|
|
100
|
+
max: null,
|
|
101
|
+
raw,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
return {
|
|
105
|
+
min: parseMoney(matches[1]),
|
|
106
|
+
max: parseMoney(matches[2]),
|
|
107
|
+
raw,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function parseFixedBudget(
|
|
112
|
+
raw: string | null,
|
|
113
|
+
): UpworkJobListing["fixedBudget"] {
|
|
114
|
+
if (!raw) {
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const match = raw.match(/\$([\d,.]+)/);
|
|
119
|
+
|
|
120
|
+
return {
|
|
121
|
+
amount: match ? parseMoney(match[1]) : null,
|
|
122
|
+
raw,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/* -------------------------------------------------------------------------------------------------
|
|
127
|
+
* Core Parser
|
|
128
|
+
* ------------------------------------------------------------------------------------------------- */
|
|
129
|
+
|
|
130
|
+
export async function parseUpworkJobListing(
|
|
131
|
+
article: ElementHandle<Element>,
|
|
132
|
+
): Promise<UpworkJobListing> {
|
|
133
|
+
const extracted = await article.evaluate((node) => {
|
|
134
|
+
const getText = (selector: string): string | null => {
|
|
135
|
+
const element = node.querySelector(selector);
|
|
136
|
+
|
|
137
|
+
return element?.textContent?.trim() ?? null;
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
const getTexts = (selector: string): string[] => {
|
|
141
|
+
return Array.from(node.querySelectorAll(selector))
|
|
142
|
+
.map((el) => el.textContent?.trim() ?? "")
|
|
143
|
+
.filter(Boolean);
|
|
144
|
+
};
|
|
145
|
+
|
|
146
|
+
const titleAnchor = node.querySelector(
|
|
147
|
+
'[data-test="job-tile-title-link UpLink"]',
|
|
148
|
+
) as HTMLAnchorElement | null;
|
|
149
|
+
|
|
150
|
+
const jobInfoItems = Array.from(
|
|
151
|
+
node.querySelectorAll('[data-test="JobInfo"] li'),
|
|
152
|
+
).map((li) => li.textContent?.trim() ?? "");
|
|
153
|
+
|
|
154
|
+
const clientInfoItems = Array.from(
|
|
155
|
+
node.querySelectorAll('[data-test="JobInfoClient"] li'),
|
|
156
|
+
).map((li) => li.textContent?.trim() ?? "");
|
|
157
|
+
|
|
158
|
+
const hourlyItem =
|
|
159
|
+
jobInfoItems.find((x) => x.toLowerCase().includes("hourly")) ?? null;
|
|
160
|
+
|
|
161
|
+
const fixedItem =
|
|
162
|
+
jobInfoItems.find((x) => x.toLowerCase().includes("fixed")) ?? null;
|
|
163
|
+
|
|
164
|
+
const experienceLevel =
|
|
165
|
+
node
|
|
166
|
+
.querySelector('[data-test="experience-level"]')
|
|
167
|
+
?.textContent?.trim() ?? null;
|
|
168
|
+
|
|
169
|
+
const estimatedTime =
|
|
170
|
+
node
|
|
171
|
+
.querySelector('[data-test="duration-label"]')
|
|
172
|
+
?.textContent?.trim() ?? null;
|
|
173
|
+
|
|
174
|
+
const ratingText =
|
|
175
|
+
node
|
|
176
|
+
.querySelector('[data-test="feedback-rating UpCRating"]')
|
|
177
|
+
?.textContent?.trim() ?? null;
|
|
178
|
+
|
|
179
|
+
const spentText =
|
|
180
|
+
node
|
|
181
|
+
.querySelector('[data-test="total-spent"]')
|
|
182
|
+
?.textContent?.trim() ?? null;
|
|
183
|
+
|
|
184
|
+
const locationText =
|
|
185
|
+
node
|
|
186
|
+
.querySelector('[data-test="location"]')
|
|
187
|
+
?.textContent?.trim() ?? null;
|
|
188
|
+
|
|
189
|
+
return {
|
|
190
|
+
id:
|
|
191
|
+
node.getAttribute("data-ev-job-uid") ??
|
|
192
|
+
node.getAttribute("data-test-key") ??
|
|
193
|
+
"",
|
|
194
|
+
|
|
195
|
+
title: titleAnchor?.textContent?.trim() ?? "",
|
|
196
|
+
|
|
197
|
+
url: titleAnchor?.href ?? "",
|
|
198
|
+
|
|
199
|
+
postedAtText:
|
|
200
|
+
getText('[data-test="job-pubilshed-date"] span:first-child') ?? null,
|
|
201
|
+
|
|
202
|
+
proposalsText:
|
|
203
|
+
getText('[data-test="proposals-tier"]') ?? null,
|
|
204
|
+
|
|
205
|
+
description:
|
|
206
|
+
getText('[data-test="UpCLineClamp JobDescription"] p') ?? "",
|
|
207
|
+
|
|
208
|
+
skills: getTexts('[data-test="token"] span'),
|
|
209
|
+
|
|
210
|
+
hourlyRaw: hourlyItem,
|
|
211
|
+
fixedRaw: fixedItem,
|
|
212
|
+
|
|
213
|
+
experienceLevel,
|
|
214
|
+
estimatedTime,
|
|
215
|
+
|
|
216
|
+
paymentVerified:
|
|
217
|
+
node.querySelector('[data-test="payment-verified"]') !== null,
|
|
218
|
+
|
|
219
|
+
ratingText,
|
|
220
|
+
spentText,
|
|
221
|
+
locationText,
|
|
222
|
+
|
|
223
|
+
rawText: node.textContent ?? "",
|
|
224
|
+
|
|
225
|
+
categoryType: hourlyItem
|
|
226
|
+
? "hourly"
|
|
227
|
+
: fixedItem
|
|
228
|
+
? "fixed"
|
|
229
|
+
: null,
|
|
230
|
+
};
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
const hourlyRate = parseHourlyRate(extracted.hourlyRaw);
|
|
234
|
+
const fixedBudget = parseFixedBudget(extracted.fixedRaw);
|
|
235
|
+
|
|
236
|
+
const ratingMatch = extracted.ratingText?.match(/(\d+(\.\d+)?)/);
|
|
237
|
+
|
|
238
|
+
const parsed: UpworkJobListing = {
|
|
239
|
+
id: cleanText(extracted.id),
|
|
240
|
+
|
|
241
|
+
title: cleanText(extracted.title),
|
|
242
|
+
|
|
243
|
+
url: cleanText(extracted.url),
|
|
244
|
+
|
|
245
|
+
postedAtText: cleanText(extracted.postedAtText),
|
|
246
|
+
|
|
247
|
+
proposalsText: cleanText(extracted.proposalsText),
|
|
248
|
+
|
|
249
|
+
description: cleanText(extracted.description),
|
|
250
|
+
|
|
251
|
+
skills: extracted.skills.map(cleanText),
|
|
252
|
+
|
|
253
|
+
category: {
|
|
254
|
+
type: extracted.categoryType as (JobCategoryType | null),
|
|
255
|
+
raw: cleanText(extracted.hourlyRaw ?? extracted.fixedRaw),
|
|
256
|
+
},
|
|
257
|
+
|
|
258
|
+
hourlyRate,
|
|
259
|
+
|
|
260
|
+
fixedBudget,
|
|
261
|
+
|
|
262
|
+
experienceLevel: cleanText(extracted.experienceLevel),
|
|
263
|
+
|
|
264
|
+
estimatedTime: cleanText(extracted.estimatedTime),
|
|
265
|
+
|
|
266
|
+
client: {
|
|
267
|
+
paymentVerified: extracted.paymentVerified,
|
|
268
|
+
|
|
269
|
+
rating: ratingMatch ? Number(ratingMatch[1]) : null,
|
|
270
|
+
|
|
271
|
+
totalSpent: cleanText(extracted.spentText),
|
|
272
|
+
|
|
273
|
+
location: cleanText(extracted.locationText),
|
|
274
|
+
},
|
|
275
|
+
|
|
276
|
+
rawText: cleanText(extracted.rawText),
|
|
277
|
+
};
|
|
278
|
+
|
|
279
|
+
return UpworkJobListingSchema.parse(parsed);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/* -------------------------------------------------------------------------------------------------
|
|
283
|
+
* Multi-Listing Parser
|
|
284
|
+
* ------------------------------------------------------------------------------------------------- */
|
|
285
|
+
|
|
286
|
+
export async function parseUpworkSearchResults(
|
|
287
|
+
page: Page,
|
|
288
|
+
): Promise<UpworkJobListing[]> {
|
|
289
|
+
await page.waitForSelector('#main > section > article[data-ev-sublocation="search_results"]');
|
|
290
|
+
|
|
291
|
+
const articles = await page.$$(
|
|
292
|
+
'#main > section > article[data-ev-sublocation="search_results"]',
|
|
293
|
+
);
|
|
294
|
+
|
|
295
|
+
const results = await Promise.all(
|
|
296
|
+
articles.map((article) => parseUpworkJobListing(article)),
|
|
297
|
+
);
|
|
298
|
+
|
|
299
|
+
return results;
|
|
300
|
+
}
|