@epsilon-asi/actors 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@epsilon-asi/actors",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.8",
|
|
4
4
|
"description": "A TypeScript Puppeteer actor framework using existing Chrome profiles and ghost-cursor.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -24,13 +24,14 @@
|
|
|
24
24
|
"clean": "rm -rf dist coverage"
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
|
+
"cheerio": "^1.2.0",
|
|
27
28
|
"ghost-cursor": "^1.4.2",
|
|
28
29
|
"puppeteer-core": "24.43.1"
|
|
29
30
|
},
|
|
30
31
|
"devDependencies": {
|
|
32
|
+
"@types/cheerio": "^0.22.35",
|
|
31
33
|
"@types/node": "^24.10.1",
|
|
32
34
|
"@vitest/coverage-v8": "^4.1.7",
|
|
33
|
-
|
|
34
35
|
"vitest": "^4.1.7"
|
|
35
36
|
},
|
|
36
37
|
"engines": {
|
|
@@ -1,97 +1,89 @@
|
|
|
1
1
|
import {defineLoginFlow} from '../../auth/LoginFlow.types.js';
|
|
2
2
|
import {defineActor} from '../../core/defineActor.js';
|
|
3
3
|
import {upworkComSelectors} from './upwork-com.selectors.js';
|
|
4
|
-
import type {
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
} from './upwork-com.types.js';
|
|
10
|
-
import {buildSearchParams, parseRate} from "./upwork-com.util.js";
|
|
4
|
+
import type {UpworkApplyToJobInput, UpworkApplyToJobResult, UpworkJobSearchFields} from './upwork-com.types.js';
|
|
5
|
+
import {parseRate} from "./upwork-com.util.js";
|
|
6
|
+
import {ScrapeDashboardInput} from "../example/index.js";
|
|
7
|
+
import {parseUpworkSearchResults, UpworkJobListing} from "./util/scrapeJobListing.js";
|
|
8
|
+
import {Page} from "puppeteer-core";
|
|
11
9
|
|
|
12
10
|
export const upworkComActor = defineActor({
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
11
|
+
id: 'upwork-com',
|
|
12
|
+
baseUrl: 'https://upwork.com',
|
|
13
|
+
auth: defineLoginFlow({
|
|
14
|
+
loginUrl: 'https://www.upwork.com/ab/account-security/login',
|
|
15
|
+
selectors: {
|
|
16
|
+
loggedInSignal: upworkComSelectors.login.loggedInSignal,
|
|
17
|
+
errorMessage: upworkComSelectors.login.errorMessage
|
|
18
|
+
},
|
|
19
|
+
credentials: {id: 'upwork'},
|
|
20
|
+
behavior: {
|
|
21
|
+
authCheckUrl: '/',
|
|
22
|
+
loggedInTimeoutMs: 5_000,
|
|
23
|
+
errorTimeoutMs: 1_500,
|
|
24
|
+
typing: {
|
|
25
|
+
targetWordsPerMinute: 65,
|
|
26
|
+
intervalJitterMs: 18
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
steps: [
|
|
30
|
+
{
|
|
31
|
+
type: 'fill',
|
|
32
|
+
name: 'username',
|
|
33
|
+
selector: upworkComSelectors.login.username,
|
|
34
|
+
credential: 'username'
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
type: 'click',
|
|
38
|
+
name: 'continue to password',
|
|
39
|
+
selector: upworkComSelectors.login.continueToPassword,
|
|
40
|
+
waitForSelector: upworkComSelectors.login.password,
|
|
41
|
+
waitForSelectorTimeoutMs: 5_000
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
type: 'fill',
|
|
45
|
+
name: 'password',
|
|
46
|
+
selector: upworkComSelectors.login.password,
|
|
47
|
+
credential: 'password'
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
type: 'click',
|
|
51
|
+
name: 'submit password',
|
|
52
|
+
selector: upworkComSelectors.login.submit,
|
|
53
|
+
submit: true,
|
|
54
|
+
waitForNavigation: true,
|
|
55
|
+
checkForError: false
|
|
56
|
+
}
|
|
57
|
+
]
|
|
58
|
+
}),
|
|
59
|
+
tasks: {
|
|
60
|
+
searchJobs: async (context, input: UpworkJobSearchFields = {}): Promise<UpworkJobListing[]> => {
|
|
61
|
+
const path = '/nx/s/universal-search/jobs?category2_uid=531770282580668420,531770282580668419,531770282580668418&client_hires=1-9,10-&payment_verified=1&q=%27rancher%27%20or%20%27terraform%27%20or%20%27gitops%27%20or%20%27azure%27%20or%20%27microsoft%20azure%27%20or%20%27cloud%20architect%27%20or%20%27ai%20architect%27%20or%20%27forward%20deployed%20engineer%27%20or%20%27aws%27%20or%20%27aks%27%20or%20%27eks%27%20or%20%27gke%27%20or%20%27cloud%20engineer%27%20or%20devops%20or%20kuberentes%20or%20%27platform%20engineer%27%20or%20%27infrastructure%20engineer%27%20or%20"google%20cloud%20platform"%20or%20"GCP"%20or%20"langsmith"%20or%20"langgraph"%20or%20"gemini%20enterprise"&sort=recency&user_location_match=1';
|
|
64
62
|
|
|
65
|
-
|
|
63
|
+
await context.nav.goto(path, {
|
|
66
64
|
|
|
67
|
-
|
|
65
|
+
});
|
|
68
66
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
67
|
+
return await parseUpworkSearchResults(context.session.page as Page);
|
|
68
|
+
},
|
|
69
|
+
scrapeJobs: async (context, input: ScrapeDashboardInput = {}): Promise<any> => {
|
|
70
|
+
await parseUpworkSearchResults(context.session.page as Page)
|
|
71
|
+
},
|
|
72
|
+
applyToJob: async (_context, input: UpworkApplyToJobInput): Promise<UpworkApplyToJobResult> => {
|
|
73
|
+
const coverLetter = input.coverLetter.trim();
|
|
74
|
+
if (coverLetter.length === 0) {
|
|
75
|
+
throw new Error('coverLetter must be a non-empty string.');
|
|
76
|
+
}
|
|
79
77
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
78
|
+
const rate = parseRate(input.rate);
|
|
79
|
+
if (!Number.isFinite(rate) || rate <= 0) {
|
|
80
|
+
throw new Error('rate must be a positive number.');
|
|
81
|
+
}
|
|
84
82
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
83
|
+
return {
|
|
84
|
+
coverLetter,
|
|
85
|
+
rate
|
|
86
|
+
};
|
|
87
|
+
}
|
|
89
88
|
}
|
|
90
|
-
}
|
|
91
89
|
});
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
const likeScript = `reactions.action_reactions(this, 1632181); `
|
|
95
|
-
const commentScript = `activity.comment_save(1630927, this);`
|
|
96
|
-
|
|
97
|
-
|
|
@@ -0,0 +1,573 @@
|
|
|
1
|
+
import * as cheerio from "cheerio";
|
|
2
|
+
import {z} from "zod/v4";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* ============================================================================
|
|
6
|
+
* Upwork Job Details Page Parser
|
|
7
|
+
* ============================================================================
|
|
8
|
+
*
|
|
9
|
+
* Parses the FULL Upwork job details page HTML into a strongly typed object.
|
|
10
|
+
*
|
|
11
|
+
* Designed for:
|
|
12
|
+
* - Saved HTML snapshots
|
|
13
|
+
* - Browser automation scraping
|
|
14
|
+
* - Puppeteer / Playwright extraction
|
|
15
|
+
* - LangGraph / AI ingestion pipelines
|
|
16
|
+
*
|
|
17
|
+
* This parser intentionally avoids brittle class names wherever possible and
|
|
18
|
+
* instead relies on semantic structure and textual anchors.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
/* =============================================================================
|
|
22
|
+
* Zod Schemas
|
|
23
|
+
* ========================================================================== */
|
|
24
|
+
|
|
25
|
+
export const UpworkClientReviewSchema = z.object({
|
|
26
|
+
jobTitle: z.string().nullable(),
|
|
27
|
+
feedback: z.string().nullable(),
|
|
28
|
+
freelancerName: z.string().nullable(),
|
|
29
|
+
rating: z.number().nullable(),
|
|
30
|
+
engagementType: z.string().nullable(),
|
|
31
|
+
amount: z.string().nullable(),
|
|
32
|
+
dateRange: z.string().nullable(),
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
export type UpworkClientReview = z.infer<
|
|
36
|
+
typeof UpworkClientReviewSchema
|
|
37
|
+
>;
|
|
38
|
+
|
|
39
|
+
export const UpworkClientSchema = z.object({
|
|
40
|
+
paymentVerified: z.boolean(),
|
|
41
|
+
|
|
42
|
+
phoneVerified: z.boolean(),
|
|
43
|
+
|
|
44
|
+
rating: z.number().nullable(),
|
|
45
|
+
|
|
46
|
+
reviewCount: z.number().nullable(),
|
|
47
|
+
|
|
48
|
+
country: z.string().nullable(),
|
|
49
|
+
|
|
50
|
+
city: z.string().nullable(),
|
|
51
|
+
|
|
52
|
+
localTime: z.string().nullable(),
|
|
53
|
+
|
|
54
|
+
jobsPosted: z.number().nullable(),
|
|
55
|
+
|
|
56
|
+
hireRate: z.number().nullable(),
|
|
57
|
+
|
|
58
|
+
openJobs: z.number().nullable(),
|
|
59
|
+
|
|
60
|
+
totalSpent: z.string().nullable(),
|
|
61
|
+
|
|
62
|
+
hires: z.number().nullable(),
|
|
63
|
+
|
|
64
|
+
activeHires: z.number().nullable(),
|
|
65
|
+
|
|
66
|
+
avgHourlyRatePaid: z.string().nullable(),
|
|
67
|
+
|
|
68
|
+
totalHours: z.number().nullable(),
|
|
69
|
+
|
|
70
|
+
memberSince: z.string().nullable(),
|
|
71
|
+
|
|
72
|
+
recentHistory: z.array(UpworkClientReviewSchema),
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
export type UpworkClient = z.infer<typeof UpworkClientSchema>;
|
|
76
|
+
|
|
77
|
+
export const UpworkJobDetailsSchema = z.object({
|
|
78
|
+
jobUrl: z.string().nullable(),
|
|
79
|
+
|
|
80
|
+
projectType: z.string().nullable(),
|
|
81
|
+
|
|
82
|
+
mandatorySkills: z.array(z.string()),
|
|
83
|
+
|
|
84
|
+
preferredQualifications: z.object({
|
|
85
|
+
location: z.string().nullable(),
|
|
86
|
+
}),
|
|
87
|
+
|
|
88
|
+
activity: z.object({
|
|
89
|
+
proposals: z.string().nullable(),
|
|
90
|
+
interviewing: z.number().nullable(),
|
|
91
|
+
invitesSent: z.number().nullable(),
|
|
92
|
+
unansweredInvites: z.number().nullable(),
|
|
93
|
+
}),
|
|
94
|
+
|
|
95
|
+
connectsRequired: z.number().nullable(),
|
|
96
|
+
|
|
97
|
+
availableConnects: z.number().nullable(),
|
|
98
|
+
|
|
99
|
+
client: UpworkClientSchema,
|
|
100
|
+
|
|
101
|
+
otherOpenJobs: z.array(
|
|
102
|
+
z.object({
|
|
103
|
+
title: z.string(),
|
|
104
|
+
type: z.string().nullable(),
|
|
105
|
+
}),
|
|
106
|
+
),
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
export type UpworkJobDetails = z.infer<
|
|
110
|
+
typeof UpworkJobDetailsSchema
|
|
111
|
+
>;
|
|
112
|
+
|
|
113
|
+
/* =============================================================================
|
|
114
|
+
* Utility Helpers
|
|
115
|
+
* ========================================================================== */
|
|
116
|
+
|
|
117
|
+
function cleanText(input?: string | null): string {
|
|
118
|
+
return (input ?? "")
|
|
119
|
+
.replace(/\s+/g, " ")
|
|
120
|
+
.replace(/\u00a0/g, " ")
|
|
121
|
+
.trim();
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function extractNumber(input?: string | null): number | null {
|
|
125
|
+
if (!input) return null;
|
|
126
|
+
|
|
127
|
+
const match = input.match(/-?\d+(\.\d+)?/);
|
|
128
|
+
|
|
129
|
+
if (!match) return null;
|
|
130
|
+
|
|
131
|
+
return Number(match[0]);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function extractStrongLabelValue(
|
|
135
|
+
$root: cheerio.Cheerio<any>,
|
|
136
|
+
label: string,
|
|
137
|
+
): string | null {
|
|
138
|
+
const strong = $root
|
|
139
|
+
.find("strong")
|
|
140
|
+
.filter((_, el) =>
|
|
141
|
+
cleanText($root.eq(0).find(el).text())
|
|
142
|
+
.toLowerCase()
|
|
143
|
+
.startsWith(label.toLowerCase()),
|
|
144
|
+
)
|
|
145
|
+
.first();
|
|
146
|
+
|
|
147
|
+
if (!strong.length) return null;
|
|
148
|
+
|
|
149
|
+
const parentText = cleanText(strong.parent().text());
|
|
150
|
+
|
|
151
|
+
return cleanText(
|
|
152
|
+
parentText.replace(strong.text(), ""),
|
|
153
|
+
);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/* =============================================================================
|
|
157
|
+
* Main Parser
|
|
158
|
+
* ========================================================================== */
|
|
159
|
+
|
|
160
|
+
export function parseUpworkJobDetailsPage(
|
|
161
|
+
html: string,
|
|
162
|
+
): UpworkJobDetails {
|
|
163
|
+
const $ = cheerio.load(html);
|
|
164
|
+
|
|
165
|
+
/* ===========================================================================
|
|
166
|
+
* Project Type
|
|
167
|
+
* ========================================================================= */
|
|
168
|
+
|
|
169
|
+
const projectType = cleanText(
|
|
170
|
+
$("li")
|
|
171
|
+
.filter((_, el) =>
|
|
172
|
+
cleanText($(el).text())
|
|
173
|
+
.toLowerCase()
|
|
174
|
+
.includes("project type:"),
|
|
175
|
+
)
|
|
176
|
+
.first()
|
|
177
|
+
.text()
|
|
178
|
+
.replace(/project type:/i, ""),
|
|
179
|
+
) || null;
|
|
180
|
+
|
|
181
|
+
/* ===========================================================================
|
|
182
|
+
* Skills
|
|
183
|
+
* ========================================================================= */
|
|
184
|
+
|
|
185
|
+
const mandatorySkills: string[] = [];
|
|
186
|
+
|
|
187
|
+
$("h5")
|
|
188
|
+
.filter((_, el) =>
|
|
189
|
+
cleanText($(el).text())
|
|
190
|
+
.toLowerCase()
|
|
191
|
+
.includes("skills and expertise"),
|
|
192
|
+
)
|
|
193
|
+
.closest("section")
|
|
194
|
+
.find(".skills-list a")
|
|
195
|
+
.each((_, el) => {
|
|
196
|
+
const skill = cleanText($(el).text());
|
|
197
|
+
|
|
198
|
+
if (skill) {
|
|
199
|
+
mandatorySkills.push(skill);
|
|
200
|
+
}
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
/* ===========================================================================
|
|
204
|
+
* Preferred Qualifications
|
|
205
|
+
* ========================================================================= */
|
|
206
|
+
|
|
207
|
+
const qualificationSection = $("h5")
|
|
208
|
+
.filter((_, el) =>
|
|
209
|
+
cleanText($(el).text())
|
|
210
|
+
.toLowerCase()
|
|
211
|
+
.includes("preferred qualifications"),
|
|
212
|
+
)
|
|
213
|
+
.closest("section");
|
|
214
|
+
|
|
215
|
+
const preferredLocation =
|
|
216
|
+
extractStrongLabelValue(
|
|
217
|
+
qualificationSection,
|
|
218
|
+
"Location:",
|
|
219
|
+
);
|
|
220
|
+
|
|
221
|
+
/* ===========================================================================
|
|
222
|
+
* Activity
|
|
223
|
+
* ========================================================================= */
|
|
224
|
+
|
|
225
|
+
const activitySection = $("h5")
|
|
226
|
+
.filter((_, el) =>
|
|
227
|
+
cleanText($(el).text())
|
|
228
|
+
.toLowerCase()
|
|
229
|
+
.includes("activity on this job"),
|
|
230
|
+
)
|
|
231
|
+
.closest("section");
|
|
232
|
+
|
|
233
|
+
const proposals = cleanText(
|
|
234
|
+
activitySection
|
|
235
|
+
.find(".ca-item")
|
|
236
|
+
.filter((_, el) =>
|
|
237
|
+
cleanText($(el).text())
|
|
238
|
+
.toLowerCase()
|
|
239
|
+
.includes("proposals:"),
|
|
240
|
+
)
|
|
241
|
+
.find(".value")
|
|
242
|
+
.text(),
|
|
243
|
+
) || null;
|
|
244
|
+
|
|
245
|
+
const interviewing = extractNumber(
|
|
246
|
+
activitySection
|
|
247
|
+
.find(".ca-item")
|
|
248
|
+
.filter((_, el) =>
|
|
249
|
+
cleanText($(el).text())
|
|
250
|
+
.toLowerCase()
|
|
251
|
+
.includes("interviewing:"),
|
|
252
|
+
)
|
|
253
|
+
.find(".value")
|
|
254
|
+
.text(),
|
|
255
|
+
);
|
|
256
|
+
|
|
257
|
+
const invitesSent = extractNumber(
|
|
258
|
+
activitySection
|
|
259
|
+
.find(".ca-item")
|
|
260
|
+
.filter((_, el) =>
|
|
261
|
+
cleanText($(el).text())
|
|
262
|
+
.toLowerCase()
|
|
263
|
+
.includes("invites sent:"),
|
|
264
|
+
)
|
|
265
|
+
.find(".value")
|
|
266
|
+
.text(),
|
|
267
|
+
);
|
|
268
|
+
|
|
269
|
+
const unansweredInvites = extractNumber(
|
|
270
|
+
activitySection
|
|
271
|
+
.find(".ca-item")
|
|
272
|
+
.filter((_, el) =>
|
|
273
|
+
cleanText($(el).text())
|
|
274
|
+
.toLowerCase()
|
|
275
|
+
.includes("unanswered invites:"),
|
|
276
|
+
)
|
|
277
|
+
.find(".value")
|
|
278
|
+
.text(),
|
|
279
|
+
);
|
|
280
|
+
|
|
281
|
+
/* ===========================================================================
|
|
282
|
+
* Connects
|
|
283
|
+
* ========================================================================= */
|
|
284
|
+
|
|
285
|
+
const connectsRequired = extractNumber(
|
|
286
|
+
$("div")
|
|
287
|
+
.filter((_, el) =>
|
|
288
|
+
cleanText($(el).text())
|
|
289
|
+
.includes("Send a proposal for:"),
|
|
290
|
+
)
|
|
291
|
+
.text(),
|
|
292
|
+
);
|
|
293
|
+
|
|
294
|
+
const availableConnects = extractNumber(
|
|
295
|
+
$("div")
|
|
296
|
+
.filter((_, el) =>
|
|
297
|
+
cleanText($(el).text())
|
|
298
|
+
.includes("Available Connects:"),
|
|
299
|
+
)
|
|
300
|
+
.text(),
|
|
301
|
+
);
|
|
302
|
+
|
|
303
|
+
/* ===========================================================================
|
|
304
|
+
* Job URL
|
|
305
|
+
* ========================================================================= */
|
|
306
|
+
|
|
307
|
+
const jobUrl =
|
|
308
|
+
$("input[aria-label='Job link']").attr("value") ??
|
|
309
|
+
null;
|
|
310
|
+
|
|
311
|
+
/* ===========================================================================
|
|
312
|
+
* Client Section
|
|
313
|
+
* ========================================================================= */
|
|
314
|
+
|
|
315
|
+
const clientSection = $(
|
|
316
|
+
"[data-test='about-client-container']",
|
|
317
|
+
);
|
|
318
|
+
|
|
319
|
+
const paymentVerified =
|
|
320
|
+
cleanText(clientSection.text())
|
|
321
|
+
.toLowerCase()
|
|
322
|
+
.includes("payment method verified");
|
|
323
|
+
|
|
324
|
+
const phoneVerified =
|
|
325
|
+
cleanText(clientSection.text())
|
|
326
|
+
.toLowerCase()
|
|
327
|
+
.includes("phone number verified");
|
|
328
|
+
|
|
329
|
+
const rating = extractNumber(
|
|
330
|
+
clientSection
|
|
331
|
+
.find("[data-testid='buyer-rating']")
|
|
332
|
+
.text(),
|
|
333
|
+
);
|
|
334
|
+
|
|
335
|
+
const reviewCount = extractNumber(
|
|
336
|
+
clientSection
|
|
337
|
+
.find("[data-testid='buyer-rating']")
|
|
338
|
+
.text()
|
|
339
|
+
.match(/of\s+(\d+)\s+reviews/i)?.[1],
|
|
340
|
+
);
|
|
341
|
+
|
|
342
|
+
const country = cleanText(
|
|
343
|
+
clientSection
|
|
344
|
+
.find("[data-qa='client-location'] strong")
|
|
345
|
+
.first()
|
|
346
|
+
.text(),
|
|
347
|
+
) || null;
|
|
348
|
+
|
|
349
|
+
const locationText = cleanText(
|
|
350
|
+
clientSection
|
|
351
|
+
.find("[data-qa='client-location'] div")
|
|
352
|
+
.text(),
|
|
353
|
+
);
|
|
354
|
+
|
|
355
|
+
const city =
|
|
356
|
+
locationText.split(/\d/)[0]?.trim() || null;
|
|
357
|
+
|
|
358
|
+
const localTimeMatch =
|
|
359
|
+
locationText.match(/\d{1,2}:\d{2}\s?[AP]M/i);
|
|
360
|
+
|
|
361
|
+
const localTime = localTimeMatch?.[0] ?? null;
|
|
362
|
+
|
|
363
|
+
const jobsPosted = extractNumber(
|
|
364
|
+
clientSection
|
|
365
|
+
.find("[data-qa='client-job-posting-stats']")
|
|
366
|
+
.text(),
|
|
367
|
+
);
|
|
368
|
+
|
|
369
|
+
const hireRate = extractNumber(
|
|
370
|
+
clientSection
|
|
371
|
+
.find("[data-qa='client-job-posting-stats']")
|
|
372
|
+
.text()
|
|
373
|
+
.match(/(\d+)% hire rate/i)?.[1],
|
|
374
|
+
);
|
|
375
|
+
|
|
376
|
+
const openJobs = extractNumber(
|
|
377
|
+
clientSection
|
|
378
|
+
.find("[data-qa='client-job-posting-stats']")
|
|
379
|
+
.text()
|
|
380
|
+
.match(/(\d+) open jobs/i)?.[1],
|
|
381
|
+
);
|
|
382
|
+
|
|
383
|
+
const totalSpent = cleanText(
|
|
384
|
+
clientSection
|
|
385
|
+
.find("[data-qa='client-spend']")
|
|
386
|
+
.text(),
|
|
387
|
+
) || null;
|
|
388
|
+
|
|
389
|
+
const hires = extractNumber(
|
|
390
|
+
clientSection
|
|
391
|
+
.find("[data-qa='client-hires']")
|
|
392
|
+
.text()
|
|
393
|
+
.match(/(\d+) hires/i)?.[1],
|
|
394
|
+
);
|
|
395
|
+
|
|
396
|
+
const activeHires = extractNumber(
|
|
397
|
+
clientSection
|
|
398
|
+
.find("[data-qa='client-hires']")
|
|
399
|
+
.text()
|
|
400
|
+
.match(/(\d+) active/i)?.[1],
|
|
401
|
+
);
|
|
402
|
+
|
|
403
|
+
const avgHourlyRatePaid = cleanText(
|
|
404
|
+
clientSection
|
|
405
|
+
.find("[data-qa='client-hourly-rate']")
|
|
406
|
+
.text(),
|
|
407
|
+
) || null;
|
|
408
|
+
|
|
409
|
+
const totalHours = extractNumber(
|
|
410
|
+
clientSection
|
|
411
|
+
.find("[data-qa='client-hours']")
|
|
412
|
+
.text(),
|
|
413
|
+
);
|
|
414
|
+
|
|
415
|
+
const memberSince = cleanText(
|
|
416
|
+
clientSection
|
|
417
|
+
.find("[data-qa='client-contract-date']")
|
|
418
|
+
.text(),
|
|
419
|
+
) || null;
|
|
420
|
+
|
|
421
|
+
/* ===========================================================================
|
|
422
|
+
* Recent History
|
|
423
|
+
* ========================================================================= */
|
|
424
|
+
|
|
425
|
+
const recentHistory: UpworkClientReview[] = [];
|
|
426
|
+
|
|
427
|
+
$("[data-cy='job']").each((_, el) => {
|
|
428
|
+
const $job = $(el);
|
|
429
|
+
|
|
430
|
+
const jobTitle = cleanText(
|
|
431
|
+
$job.find("[data-cy='job-title']").text(),
|
|
432
|
+
);
|
|
433
|
+
|
|
434
|
+
const feedback = cleanText(
|
|
435
|
+
$job
|
|
436
|
+
.find(".air3-truncation")
|
|
437
|
+
.first()
|
|
438
|
+
.text(),
|
|
439
|
+
);
|
|
440
|
+
|
|
441
|
+
const freelancerName = cleanText(
|
|
442
|
+
$job
|
|
443
|
+
.find("a[href*='/freelancers/']")
|
|
444
|
+
.first()
|
|
445
|
+
.text(),
|
|
446
|
+
);
|
|
447
|
+
|
|
448
|
+
const rating = extractNumber(
|
|
449
|
+
$job.find(".air3-rating-value-text").first().text(),
|
|
450
|
+
);
|
|
451
|
+
|
|
452
|
+
const stats = cleanText(
|
|
453
|
+
$job.find("[data-cy='stats']").text(),
|
|
454
|
+
);
|
|
455
|
+
|
|
456
|
+
const engagementType =
|
|
457
|
+
stats.split("$")[0]?.trim() || null;
|
|
458
|
+
|
|
459
|
+
const amountMatch = stats.match(
|
|
460
|
+
/\$\d+(?:,\d+)?(?:\.\d+)?/,
|
|
461
|
+
);
|
|
462
|
+
|
|
463
|
+
const amount = amountMatch?.[0] ?? null;
|
|
464
|
+
|
|
465
|
+
const dateRange = cleanText(
|
|
466
|
+
$job.find("[data-cy='date']").text(),
|
|
467
|
+
);
|
|
468
|
+
|
|
469
|
+
recentHistory.push(
|
|
470
|
+
UpworkClientReviewSchema.parse({
|
|
471
|
+
jobTitle: jobTitle || null,
|
|
472
|
+
feedback: feedback || null,
|
|
473
|
+
freelancerName: freelancerName || null,
|
|
474
|
+
rating,
|
|
475
|
+
engagementType,
|
|
476
|
+
amount,
|
|
477
|
+
dateRange: dateRange || null,
|
|
478
|
+
}),
|
|
479
|
+
);
|
|
480
|
+
});
|
|
481
|
+
|
|
482
|
+
/* ===========================================================================
|
|
483
|
+
* Other Open Jobs
|
|
484
|
+
* ========================================================================= */
|
|
485
|
+
|
|
486
|
+
const otherOpenJobs: Array<{
|
|
487
|
+
title: string;
|
|
488
|
+
type: string | null;
|
|
489
|
+
}> = [];
|
|
490
|
+
|
|
491
|
+
$("#otherOpenJobs li").each((_, el) => {
|
|
492
|
+
const title = cleanText(
|
|
493
|
+
$(el).find("a").text(),
|
|
494
|
+
);
|
|
495
|
+
|
|
496
|
+
const type = cleanText(
|
|
497
|
+
$(el).find(".type").text(),
|
|
498
|
+
);
|
|
499
|
+
|
|
500
|
+
if (title) {
|
|
501
|
+
otherOpenJobs.push({
|
|
502
|
+
title,
|
|
503
|
+
type: type || null,
|
|
504
|
+
});
|
|
505
|
+
}
|
|
506
|
+
});
|
|
507
|
+
|
|
508
|
+
/* ===========================================================================
|
|
509
|
+
* Final Object
|
|
510
|
+
* ========================================================================= */
|
|
511
|
+
|
|
512
|
+
return UpworkJobDetailsSchema.parse({
|
|
513
|
+
jobUrl,
|
|
514
|
+
|
|
515
|
+
projectType,
|
|
516
|
+
|
|
517
|
+
mandatorySkills,
|
|
518
|
+
|
|
519
|
+
preferredQualifications: {
|
|
520
|
+
location: preferredLocation,
|
|
521
|
+
},
|
|
522
|
+
|
|
523
|
+
activity: {
|
|
524
|
+
proposals,
|
|
525
|
+
interviewing,
|
|
526
|
+
invitesSent,
|
|
527
|
+
unansweredInvites,
|
|
528
|
+
},
|
|
529
|
+
|
|
530
|
+
connectsRequired,
|
|
531
|
+
|
|
532
|
+
availableConnects,
|
|
533
|
+
|
|
534
|
+
client: {
|
|
535
|
+
paymentVerified,
|
|
536
|
+
|
|
537
|
+
phoneVerified,
|
|
538
|
+
|
|
539
|
+
rating,
|
|
540
|
+
|
|
541
|
+
reviewCount,
|
|
542
|
+
|
|
543
|
+
country,
|
|
544
|
+
|
|
545
|
+
city,
|
|
546
|
+
|
|
547
|
+
localTime,
|
|
548
|
+
|
|
549
|
+
jobsPosted,
|
|
550
|
+
|
|
551
|
+
hireRate,
|
|
552
|
+
|
|
553
|
+
openJobs,
|
|
554
|
+
|
|
555
|
+
totalSpent,
|
|
556
|
+
|
|
557
|
+
hires,
|
|
558
|
+
|
|
559
|
+
activeHires,
|
|
560
|
+
|
|
561
|
+
avgHourlyRatePaid,
|
|
562
|
+
|
|
563
|
+
totalHours,
|
|
564
|
+
|
|
565
|
+
memberSince,
|
|
566
|
+
|
|
567
|
+
recentHistory,
|
|
568
|
+
},
|
|
569
|
+
|
|
570
|
+
otherOpenJobs,
|
|
571
|
+
});
|
|
572
|
+
}
|
|
573
|
+
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
import type {ElementHandle, Page} from "puppeteer-core";
|
|
2
|
+
import {z} from "zod/v4";
|
|
3
|
+
|
|
4
|
+
/* -------------------------------------------------------------------------------------------------
|
|
5
|
+
* Zod Schemas
|
|
6
|
+
* ------------------------------------------------------------------------------------------------- */
|
|
7
|
+
export enum JobCategoryType {
|
|
8
|
+
"hourly" = "hourly",
|
|
9
|
+
"fixed" = "fixed",
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export const UpworkJobListingSchema = z.object({
|
|
13
|
+
id: z.string(),
|
|
14
|
+
|
|
15
|
+
title: z.string(),
|
|
16
|
+
url: z.string(),
|
|
17
|
+
|
|
18
|
+
postedAtText: z.string().nullable(),
|
|
19
|
+
proposalsText: z.string().nullable(),
|
|
20
|
+
|
|
21
|
+
description: z.string(),
|
|
22
|
+
|
|
23
|
+
skills: z.array(z.string()),
|
|
24
|
+
|
|
25
|
+
category: z.object({
|
|
26
|
+
type: z.enum(JobCategoryType, {}).nullable(),
|
|
27
|
+
raw: z.string().nullable(),
|
|
28
|
+
}),
|
|
29
|
+
|
|
30
|
+
hourlyRate: z
|
|
31
|
+
.object({
|
|
32
|
+
min: z.number().nullable(),
|
|
33
|
+
max: z.number().nullable(),
|
|
34
|
+
raw: z.string().nullable(),
|
|
35
|
+
})
|
|
36
|
+
.nullable(),
|
|
37
|
+
|
|
38
|
+
fixedBudget: z
|
|
39
|
+
.object({
|
|
40
|
+
amount: z.number().nullable(),
|
|
41
|
+
raw: z.string().nullable(),
|
|
42
|
+
})
|
|
43
|
+
.nullable(),
|
|
44
|
+
|
|
45
|
+
experienceLevel: z.string().nullable(),
|
|
46
|
+
|
|
47
|
+
estimatedTime: z.string().nullable(),
|
|
48
|
+
|
|
49
|
+
client: z.object({
|
|
50
|
+
paymentVerified: z.boolean(),
|
|
51
|
+
|
|
52
|
+
rating: z.number().nullable(),
|
|
53
|
+
|
|
54
|
+
totalSpent: z.string().nullable(),
|
|
55
|
+
|
|
56
|
+
location: z.string().nullable(),
|
|
57
|
+
}),
|
|
58
|
+
|
|
59
|
+
rawText: z.string(),
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
export type UpworkJobListing = z.infer<typeof UpworkJobListingSchema>;
|
|
63
|
+
|
|
64
|
+
/* -------------------------------------------------------------------------------------------------
|
|
65
|
+
* Helpers
|
|
66
|
+
* ------------------------------------------------------------------------------------------------- */
|
|
67
|
+
|
|
68
|
+
function cleanText(input?: string | null): string {
|
|
69
|
+
return (input ?? "")
|
|
70
|
+
.replace(/\s+/g, " ")
|
|
71
|
+
.replace(/\u00a0/g, " ")
|
|
72
|
+
.trim();
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function parseMoney(value?: string): number | null {
|
|
76
|
+
if (!value) return null;
|
|
77
|
+
const normalized = value.replace(/[^0-9.]/g, "");
|
|
78
|
+
|
|
79
|
+
if (!normalized) {
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const parsed = Number(normalized);
|
|
84
|
+
|
|
85
|
+
return Number.isNaN(parsed) ? null : parsed;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function parseHourlyRate(
|
|
89
|
+
raw: string | null,
|
|
90
|
+
): UpworkJobListing["hourlyRate"] {
|
|
91
|
+
if (!raw) {
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const matches = raw.match(/\$([\d,.]+)\s*-\s*\$([\d,.]+)/);
|
|
96
|
+
|
|
97
|
+
if (!matches || matches.length < 3) {
|
|
98
|
+
return {
|
|
99
|
+
min: null,
|
|
100
|
+
max: null,
|
|
101
|
+
raw,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
return {
|
|
105
|
+
min: parseMoney(matches[1]),
|
|
106
|
+
max: parseMoney(matches[2]),
|
|
107
|
+
raw,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function parseFixedBudget(
|
|
112
|
+
raw: string | null,
|
|
113
|
+
): UpworkJobListing["fixedBudget"] {
|
|
114
|
+
if (!raw) {
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const match = raw.match(/\$([\d,.]+)/);
|
|
119
|
+
|
|
120
|
+
return {
|
|
121
|
+
amount: match ? parseMoney(match[1]) : null,
|
|
122
|
+
raw,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/* -------------------------------------------------------------------------------------------------
|
|
127
|
+
* Core Parser
|
|
128
|
+
* ------------------------------------------------------------------------------------------------- */
|
|
129
|
+
|
|
130
|
+
export async function parseUpworkJobListing(
|
|
131
|
+
article: ElementHandle<Element>,
|
|
132
|
+
): Promise<UpworkJobListing> {
|
|
133
|
+
const extracted = await article.evaluate((node) => {
|
|
134
|
+
const getText = (selector: string): string | null => {
|
|
135
|
+
const element = node.querySelector(selector);
|
|
136
|
+
|
|
137
|
+
return element?.textContent?.trim() ?? null;
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
const getTexts = (selector: string): string[] => {
|
|
141
|
+
return Array.from(node.querySelectorAll(selector))
|
|
142
|
+
.map((el) => el.textContent?.trim() ?? "")
|
|
143
|
+
.filter(Boolean);
|
|
144
|
+
};
|
|
145
|
+
|
|
146
|
+
const titleAnchor = node.querySelector(
|
|
147
|
+
'[data-test="job-tile-title-link UpLink"]',
|
|
148
|
+
) as HTMLAnchorElement | null;
|
|
149
|
+
|
|
150
|
+
const jobInfoItems = Array.from(
|
|
151
|
+
node.querySelectorAll('[data-test="JobInfo"] li'),
|
|
152
|
+
).map((li) => li.textContent?.trim() ?? "");
|
|
153
|
+
|
|
154
|
+
const clientInfoItems = Array.from(
|
|
155
|
+
node.querySelectorAll('[data-test="JobInfoClient"] li'),
|
|
156
|
+
).map((li) => li.textContent?.trim() ?? "");
|
|
157
|
+
|
|
158
|
+
const hourlyItem =
|
|
159
|
+
jobInfoItems.find((x) => x.toLowerCase().includes("hourly")) ?? null;
|
|
160
|
+
|
|
161
|
+
const fixedItem =
|
|
162
|
+
jobInfoItems.find((x) => x.toLowerCase().includes("fixed")) ?? null;
|
|
163
|
+
|
|
164
|
+
const experienceLevel =
|
|
165
|
+
node
|
|
166
|
+
.querySelector('[data-test="experience-level"]')
|
|
167
|
+
?.textContent?.trim() ?? null;
|
|
168
|
+
|
|
169
|
+
const estimatedTime =
|
|
170
|
+
node
|
|
171
|
+
.querySelector('[data-test="duration-label"]')
|
|
172
|
+
?.textContent?.trim() ?? null;
|
|
173
|
+
|
|
174
|
+
const ratingText =
|
|
175
|
+
node
|
|
176
|
+
.querySelector('[data-test="feedback-rating UpCRating"]')
|
|
177
|
+
?.textContent?.trim() ?? null;
|
|
178
|
+
|
|
179
|
+
const spentText =
|
|
180
|
+
node
|
|
181
|
+
.querySelector('[data-test="total-spent"]')
|
|
182
|
+
?.textContent?.trim() ?? null;
|
|
183
|
+
|
|
184
|
+
const locationText =
|
|
185
|
+
node
|
|
186
|
+
.querySelector('[data-test="location"]')
|
|
187
|
+
?.textContent?.trim() ?? null;
|
|
188
|
+
|
|
189
|
+
return {
|
|
190
|
+
id:
|
|
191
|
+
node.getAttribute("data-ev-job-uid") ??
|
|
192
|
+
node.getAttribute("data-test-key") ??
|
|
193
|
+
"",
|
|
194
|
+
|
|
195
|
+
title: titleAnchor?.textContent?.trim() ?? "",
|
|
196
|
+
|
|
197
|
+
url: titleAnchor?.href ?? "",
|
|
198
|
+
|
|
199
|
+
postedAtText:
|
|
200
|
+
getText('[data-test="job-pubilshed-date"] span:first-child') ?? null,
|
|
201
|
+
|
|
202
|
+
proposalsText:
|
|
203
|
+
getText('[data-test="proposals-tier"]') ?? null,
|
|
204
|
+
|
|
205
|
+
description:
|
|
206
|
+
getText('[data-test="UpCLineClamp JobDescription"] p') ?? "",
|
|
207
|
+
|
|
208
|
+
skills: getTexts('[data-test="token"] span'),
|
|
209
|
+
|
|
210
|
+
hourlyRaw: hourlyItem,
|
|
211
|
+
fixedRaw: fixedItem,
|
|
212
|
+
|
|
213
|
+
experienceLevel,
|
|
214
|
+
estimatedTime,
|
|
215
|
+
|
|
216
|
+
paymentVerified:
|
|
217
|
+
node.querySelector('[data-test="payment-verified"]') !== null,
|
|
218
|
+
|
|
219
|
+
ratingText,
|
|
220
|
+
spentText,
|
|
221
|
+
locationText,
|
|
222
|
+
|
|
223
|
+
rawText: node.textContent ?? "",
|
|
224
|
+
|
|
225
|
+
categoryType: hourlyItem
|
|
226
|
+
? "hourly"
|
|
227
|
+
: fixedItem
|
|
228
|
+
? "fixed"
|
|
229
|
+
: null,
|
|
230
|
+
};
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
const hourlyRate = parseHourlyRate(extracted.hourlyRaw);
|
|
234
|
+
const fixedBudget = parseFixedBudget(extracted.fixedRaw);
|
|
235
|
+
|
|
236
|
+
const ratingMatch = extracted.ratingText?.match(/(\d+(\.\d+)?)/);
|
|
237
|
+
|
|
238
|
+
const parsed: UpworkJobListing = {
|
|
239
|
+
id: cleanText(extracted.id),
|
|
240
|
+
|
|
241
|
+
title: cleanText(extracted.title),
|
|
242
|
+
|
|
243
|
+
url: cleanText(extracted.url),
|
|
244
|
+
|
|
245
|
+
postedAtText: cleanText(extracted.postedAtText),
|
|
246
|
+
|
|
247
|
+
proposalsText: cleanText(extracted.proposalsText),
|
|
248
|
+
|
|
249
|
+
description: cleanText(extracted.description),
|
|
250
|
+
|
|
251
|
+
skills: extracted.skills.map(cleanText),
|
|
252
|
+
|
|
253
|
+
category: {
|
|
254
|
+
type: extracted.categoryType as (JobCategoryType | null),
|
|
255
|
+
raw: cleanText(extracted.hourlyRaw ?? extracted.fixedRaw),
|
|
256
|
+
},
|
|
257
|
+
|
|
258
|
+
hourlyRate,
|
|
259
|
+
|
|
260
|
+
fixedBudget,
|
|
261
|
+
|
|
262
|
+
experienceLevel: cleanText(extracted.experienceLevel),
|
|
263
|
+
|
|
264
|
+
estimatedTime: cleanText(extracted.estimatedTime),
|
|
265
|
+
|
|
266
|
+
client: {
|
|
267
|
+
paymentVerified: extracted.paymentVerified,
|
|
268
|
+
|
|
269
|
+
rating: ratingMatch ? Number(ratingMatch[1]) : null,
|
|
270
|
+
|
|
271
|
+
totalSpent: cleanText(extracted.spentText),
|
|
272
|
+
|
|
273
|
+
location: cleanText(extracted.locationText),
|
|
274
|
+
},
|
|
275
|
+
|
|
276
|
+
rawText: cleanText(extracted.rawText),
|
|
277
|
+
};
|
|
278
|
+
|
|
279
|
+
return UpworkJobListingSchema.parse(parsed);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/* -------------------------------------------------------------------------------------------------
|
|
283
|
+
* Multi-Listing Parser
|
|
284
|
+
* ------------------------------------------------------------------------------------------------- */
|
|
285
|
+
|
|
286
|
+
export async function parseUpworkSearchResults(
|
|
287
|
+
page: Page,
|
|
288
|
+
): Promise<UpworkJobListing[]> {
|
|
289
|
+
await page.waitForSelector('#main > section > article[data-ev-sublocation="search_results"]');
|
|
290
|
+
|
|
291
|
+
const articles = await page.$$(
|
|
292
|
+
'#main > section > article[data-ev-sublocation="search_results"]',
|
|
293
|
+
);
|
|
294
|
+
|
|
295
|
+
const results = await Promise.all(
|
|
296
|
+
articles.map((article) => parseUpworkJobListing(article)),
|
|
297
|
+
);
|
|
298
|
+
|
|
299
|
+
return results;
|
|
300
|
+
}
|