linkedin-agent 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,256 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.getLinkedInPosts = getLinkedInPosts;
37
+ const fs = __importStar(require("fs"));
38
+ const path = __importStar(require("path"));
39
+ const browser_1 = require("./browser");
40
+ function extractPosts(data) {
41
+ const posts = [];
42
+ try {
43
+ const included = data?.included;
44
+ if (!Array.isArray(included))
45
+ return posts;
46
+ // Step 1: Build a map of activity URN -> social counts
47
+ const countsMap = new Map();
48
+ for (const item of included) {
49
+ if (item?.$type === "com.linkedin.voyager.dash.feed.SocialActivityCounts" && item?.urn) {
50
+ countsMap.set(item.urn, {
51
+ numLikes: item.numLikes || 0,
52
+ numComments: item.numComments || 0,
53
+ numShares: item.numShares || 0,
54
+ });
55
+ }
56
+ }
57
+ // Step 2: Find post items with commentary
58
+ for (const item of included) {
59
+ const commentary = item?.commentary?.text?.text;
60
+ if (!commentary)
61
+ continue;
62
+ // Extract activity URN from preDashEntityUrn or entityUrn
63
+ const dashUrn = item?.preDashEntityUrn || item?.entityUrn || "";
64
+ const activityMatch = dashUrn.match(/activity:(\d+)/);
65
+ if (!activityMatch)
66
+ continue;
67
+ const activityId = activityMatch[1];
68
+ const activityUrn = `urn:li:activity:${activityId}`;
69
+ // Match with social counts
70
+ const counts = countsMap.get(activityUrn) || { numLikes: 0, numComments: 0, numShares: 0 };
71
+ // Extract published time from actor subDescription
72
+ const publishedAt = item?.actor?.subDescription?.text || "";
73
+ posts.push({
74
+ id: activityUrn,
75
+ text: commentary,
76
+ publishedAt,
77
+ numLikes: counts.numLikes,
78
+ numComments: counts.numComments,
79
+ numShares: counts.numShares,
80
+ url: `https://www.linkedin.com/feed/update/${activityUrn}/`,
81
+ });
82
+ }
83
+ }
84
+ catch (e) {
85
+ // silently skip malformed data
86
+ }
87
+ return posts;
88
+ }
89
+ function extractUsernameFromUrl(profileUrl) {
90
+ const match = profileUrl.match(/linkedin\.com\/in\/([^/?]+)/);
91
+ if (!match) {
92
+ throw new Error(`Invalid LinkedIn profile URL: ${profileUrl}`);
93
+ }
94
+ return match[1].replace(/\/$/, "");
95
+ }
96
+ async function getProfileUsername(page) {
97
+ // Extract username from feed page by finding profile link
98
+ // The feed page has a profile card in the sidebar with a link to /in/{username}
99
+ const feedUrl = page.url();
100
+ // If we're already on the feed, try extracting from the page
101
+ if (!feedUrl.includes("/feed")) {
102
+ await page.goto("https://www.linkedin.com/feed/", { waitUntil: "domcontentloaded" });
103
+ await page.waitForTimeout(3000);
104
+ }
105
+ // Try multiple selectors for the profile link
106
+ const username = await page.evaluate(() => {
107
+ // Method 1: Look for profile link in nav/sidebar
108
+ const links = Array.from(document.querySelectorAll('a[href*="/in/"]'));
109
+ for (const link of links) {
110
+ const href = link.getAttribute("href") || "";
111
+ const match = href.match(/\/in\/([^/?]+)/);
112
+ if (match)
113
+ return match[1];
114
+ }
115
+ return null;
116
+ });
117
+ if (username)
118
+ return username;
119
+ // Method 2: Intercept Voyager API for miniProfile
120
+ const response = await page.goto("https://www.linkedin.com/me/", { waitUntil: "commit" });
121
+ // Wait for redirect with longer timeout, but don't fail if it doesn't redirect
122
+ try {
123
+ await page.waitForURL(/linkedin\.com\/in\/[^/]+/, { timeout: 15_000 });
124
+ const match = page.url().match(/linkedin\.com\/in\/([^/]+)/);
125
+ if (match)
126
+ return match[1];
127
+ }
128
+ catch {
129
+ // Fallback: check current URL
130
+ const match = page.url().match(/linkedin\.com\/in\/([^/]+)/);
131
+ if (match)
132
+ return match[1];
133
+ }
134
+ throw new Error("Could not detect profile username. Please provide a profile URL with --profile.");
135
+ }
136
+ async function autoScroll(page, maxScrolls = 100, shouldStop) {
137
+ let previousHeight = 0;
138
+ let noChangeCount = 0;
139
+ for (let i = 0; i < maxScrolls; i++) {
140
+ if (shouldStop?.()) {
141
+ console.log(`Reached post limit. (${i} scrolls)`);
142
+ return;
143
+ }
144
+ const currentHeight = await page.evaluate(() => document.body.scrollHeight);
145
+ if (currentHeight === previousHeight) {
146
+ noChangeCount++;
147
+ if (noChangeCount >= 3) {
148
+ console.log(`No more posts to load. (${i} scrolls)`);
149
+ return;
150
+ }
151
+ }
152
+ else {
153
+ noChangeCount = 0;
154
+ }
155
+ previousHeight = currentHeight;
156
+ await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
157
+ await page.waitForTimeout(2000);
158
+ if (i % 5 === 0) {
159
+ console.log(` ↓ Scrolling... (${i}/${maxScrolls})`);
160
+ }
161
+ }
162
+ console.log(`Reached max scrolls (${maxScrolls}).`);
163
+ }
164
+ async function getLinkedInPosts(options = {}) {
165
+ const outputDir = options.output || process.cwd();
166
+ const maxScrolls = options.maxScrolls || 100;
167
+ fs.mkdirSync(outputDir, { recursive: true });
168
+ console.log("šŸš€ Starting LinkedIn post scraper...\n");
169
+ const context = await (0, browser_1.launchBrowser)();
170
+ try {
171
+ const page = context.pages()[0] || await context.newPage();
172
+ await (0, browser_1.ensureLoggedIn)(page);
173
+ // Get profile username
174
+ const username = options.profile
175
+ ? extractUsernameFromUrl(options.profile)
176
+ : await getProfileUsername(page);
177
+ console.log(`šŸ‘¤ Profile: ${username}\n`);
178
+ // Collect posts via network interception
179
+ const allPosts = new Map();
180
+ page.on("response", async (response) => {
181
+ const url = response.url();
182
+ if (url.includes("/voyager/api")) {
183
+ const contentType = response.headers()["content-type"] || "";
184
+ if (contentType.includes("json")) {
185
+ try {
186
+ const json = await response.json();
187
+ const posts = extractPosts(json);
188
+ for (const post of posts) {
189
+ if (post.id && post.text && !allPosts.has(post.id)) {
190
+ allPosts.set(post.id, post);
191
+ console.log(` šŸ“ Found post #${allPosts.size}: "${post.text.substring(0, 50)}..."`);
192
+ }
193
+ }
194
+ }
195
+ catch {
196
+ // Not JSON or parse error - skip
197
+ }
198
+ }
199
+ }
200
+ });
201
+ // Navigate to activity page
202
+ const activityUrl = `https://www.linkedin.com/in/${username}/recent-activity/all/`;
203
+ console.log(`šŸ“‚ Navigating to: ${activityUrl}\n`);
204
+ await page.goto(activityUrl);
205
+ await page.waitForLoadState("domcontentloaded");
206
+ // Scroll to load all posts
207
+ const postLimit = options.limit;
208
+ console.log("šŸ“œ Loading posts...\n");
209
+ await autoScroll(page, maxScrolls, postLimit ? () => allPosts.size >= postLimit : undefined);
210
+ // Wait a bit for any remaining responses
211
+ await page.waitForTimeout(3000);
212
+ // Save results (merge with existing data)
213
+ const newPosts = postLimit
214
+ ? Array.from(allPosts.values()).slice(0, postLimit)
215
+ : Array.from(allPosts.values());
216
+ const outputPath = path.join(outputDir, `posts_${username}_${new Date().toISOString().split("T")[0]}.json`);
217
+ let merged = newPosts;
218
+ let existingCount = 0;
219
+ if (fs.existsSync(outputPath)) {
220
+ try {
221
+ const existing = JSON.parse(fs.readFileSync(outputPath, "utf-8"));
222
+ existingCount = existing.length;
223
+ const mergedMap = new Map();
224
+ for (const p of existing)
225
+ mergedMap.set(p.id, p);
226
+ for (const p of newPosts)
227
+ mergedMap.set(p.id, p);
228
+ merged = Array.from(mergedMap.values());
229
+ }
230
+ catch {
231
+ // If existing file is corrupted, overwrite with new data
232
+ }
233
+ }
234
+ fs.writeFileSync(outputPath, JSON.stringify(merged, null, 2), "utf-8");
235
+ const newCount = merged.length - existingCount;
236
+ if (existingCount > 0) {
237
+ console.log(`\nāœ… Done! Merged ${newPosts.length} posts with ${existingCount} existing. Total: ${merged.length} (${newCount > 0 ? `+${newCount} new` : "no new"}).`);
238
+ }
239
+ else {
240
+ console.log(`\nāœ… Done! Collected ${merged.length} posts.`);
241
+ }
242
+ console.log(`šŸ“ Saved to: ${outputPath}\n`);
243
+ const posts = merged;
244
+ // Print summary
245
+ const totalLikes = posts.reduce((sum, p) => sum + p.numLikes, 0);
246
+ const totalComments = posts.reduce((sum, p) => sum + p.numComments, 0);
247
+ console.log(`šŸ“Š Summary:`);
248
+ console.log(` Posts: ${posts.length}`);
249
+ console.log(` Likes: ${totalLikes}`);
250
+ console.log(` Comments: ${totalComments}`);
251
+ return posts;
252
+ }
253
+ finally {
254
+ await context.close();
255
+ }
256
+ }
package/package.json ADDED
@@ -0,0 +1,47 @@
1
+ {
2
+ "name": "linkedin-agent",
3
+ "version": "1.0.0",
4
+ "description": "LinkedIn automation — scrape posts, publish, edit, delete via REST API",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "bin": {
8
+ "linkedin-agent": "dist/cli.js"
9
+ },
10
+ "files": [
11
+ "dist",
12
+ "assets",
13
+ "scripts"
14
+ ],
15
+ "scripts": {
16
+ "build": "tsc",
17
+ "prepublishOnly": "npm run build",
18
+ "postinstall": "node scripts/postinstall.js",
19
+ "scrape": "ts-node src/scraper.ts"
20
+ },
21
+ "keywords": [
22
+ "linkedin",
23
+ "scraper",
24
+ "posts",
25
+ "automation",
26
+ "playwright"
27
+ ],
28
+ "author": "",
29
+ "license": "ISC",
30
+ "type": "commonjs",
31
+ "repository": {
32
+ "type": "git",
33
+ "url": "git+https://github.com/product-engineer-community/linkedin-agent.git"
34
+ },
35
+ "homepage": "https://github.com/product-engineer-community/linkedin-agent#readme",
36
+ "bugs": {
37
+ "url": "https://github.com/product-engineer-community/linkedin-agent/issues"
38
+ },
39
+ "dependencies": {
40
+ "playwright": "^1.58.2"
41
+ },
42
+ "devDependencies": {
43
+ "@types/node": "^25.2.3",
44
+ "ts-node": "^10.9.2",
45
+ "typescript": "^5.9.3"
46
+ }
47
+ }
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env node
2
+ const { execSync } = require("child_process");
3
+
4
+ try {
5
+ // Check if chromium is already installed
6
+ execSync("npx playwright install chromium --dry-run", { stdio: "pipe" });
7
+ console.log("Playwright Chromium already installed.");
8
+ } catch {
9
+ console.log("Installing Playwright Chromium...");
10
+ execSync("npx playwright install chromium", { stdio: "inherit" });
11
+ }