linkedin-automation-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. package/.env.example +12 -0
  2. package/.github/workflows/ci.yml +66 -0
  3. package/.github/workflows/publish.yml +48 -0
  4. package/.husky/pre-commit +6 -0
  5. package/.prettierignore +4 -0
  6. package/.prettierrc +10 -0
  7. package/AGENTS.md +294 -0
  8. package/CHANGELOG.md +40 -0
  9. package/GIT_RELEASE.md +167 -0
  10. package/LICENSE +21 -0
  11. package/Makefile +30 -0
  12. package/NPM_PUBLISHING.md +230 -0
  13. package/PYEOF +0 -0
  14. package/README.md +295 -0
  15. package/TESTING-GUIDE.md +151 -0
  16. package/cmd/linkedin/main.go +9 -0
  17. package/dist/agent/action-executor.d.ts +81 -0
  18. package/dist/agent/action-executor.d.ts.map +1 -0
  19. package/dist/agent/action-executor.js +170 -0
  20. package/dist/agent/action-executor.js.map +1 -0
  21. package/dist/agent/action-executor.test.d.ts +2 -0
  22. package/dist/agent/action-executor.test.d.ts.map +1 -0
  23. package/dist/agent/action-executor.test.js +366 -0
  24. package/dist/agent/action-executor.test.js.map +1 -0
  25. package/dist/agent/claude-client.d.ts +74 -0
  26. package/dist/agent/claude-client.d.ts.map +1 -0
  27. package/dist/agent/claude-client.js +314 -0
  28. package/dist/agent/claude-client.js.map +1 -0
  29. package/dist/agent/claude-client.test.d.ts +2 -0
  30. package/dist/agent/claude-client.test.d.ts.map +1 -0
  31. package/dist/agent/claude-client.test.js +590 -0
  32. package/dist/agent/claude-client.test.js.map +1 -0
  33. package/dist/agent/dom-extractor.d.ts +50 -0
  34. package/dist/agent/dom-extractor.d.ts.map +1 -0
  35. package/dist/agent/dom-extractor.js +374 -0
  36. package/dist/agent/dom-extractor.js.map +1 -0
  37. package/dist/agent/dom-extractor.test.d.ts +7 -0
  38. package/dist/agent/dom-extractor.test.d.ts.map +1 -0
  39. package/dist/agent/dom-extractor.test.js +504 -0
  40. package/dist/agent/dom-extractor.test.js.map +1 -0
  41. package/dist/agent/extension-client.d.ts +75 -0
  42. package/dist/agent/extension-client.d.ts.map +1 -0
  43. package/dist/agent/extension-client.js +245 -0
  44. package/dist/agent/extension-client.js.map +1 -0
  45. package/dist/agent/index.d.ts +8 -0
  46. package/dist/agent/index.d.ts.map +1 -0
  47. package/dist/agent/index.js +16 -0
  48. package/dist/agent/index.js.map +1 -0
  49. package/dist/agent/page-agent.d.ts +76 -0
  50. package/dist/agent/page-agent.d.ts.map +1 -0
  51. package/dist/agent/page-agent.js +236 -0
  52. package/dist/agent/page-agent.js.map +1 -0
  53. package/dist/agent/types.d.ts +236 -0
  54. package/dist/agent/types.d.ts.map +1 -0
  55. package/dist/agent/types.js +37 -0
  56. package/dist/agent/types.js.map +1 -0
  57. package/dist/cli/agent-commands.d.ts +3 -0
  58. package/dist/cli/agent-commands.d.ts.map +1 -0
  59. package/dist/cli/agent-commands.js +250 -0
  60. package/dist/cli/agent-commands.js.map +1 -0
  61. package/dist/cli/auth.d.ts +3 -0
  62. package/dist/cli/auth.d.ts.map +1 -0
  63. package/dist/cli/auth.js +288 -0
  64. package/dist/cli/auth.js.map +1 -0
  65. package/dist/cli/company.d.ts +3 -0
  66. package/dist/cli/company.d.ts.map +1 -0
  67. package/dist/cli/company.js +55 -0
  68. package/dist/cli/company.js.map +1 -0
  69. package/dist/cli/connection.d.ts +3 -0
  70. package/dist/cli/connection.d.ts.map +1 -0
  71. package/dist/cli/connection.js +79 -0
  72. package/dist/cli/connection.js.map +1 -0
  73. package/dist/cli/index.d.ts +7 -0
  74. package/dist/cli/index.d.ts.map +1 -0
  75. package/dist/cli/index.js +17 -0
  76. package/dist/cli/index.js.map +1 -0
  77. package/dist/cli/messages.d.ts +3 -0
  78. package/dist/cli/messages.d.ts.map +1 -0
  79. package/dist/cli/messages.js +268 -0
  80. package/dist/cli/messages.js.map +1 -0
  81. package/dist/cli/profile.d.ts +3 -0
  82. package/dist/cli/profile.d.ts.map +1 -0
  83. package/dist/cli/profile.js +81 -0
  84. package/dist/cli/profile.js.map +1 -0
  85. package/dist/cli/profile.test.d.ts +2 -0
  86. package/dist/cli/profile.test.d.ts.map +1 -0
  87. package/dist/cli/profile.test.js +15 -0
  88. package/dist/cli/profile.test.js.map +1 -0
  89. package/dist/cli/reply.d.ts +3 -0
  90. package/dist/cli/reply.d.ts.map +1 -0
  91. package/dist/cli/reply.js +129 -0
  92. package/dist/cli/reply.js.map +1 -0
  93. package/dist/core/audit.d.ts +17 -0
  94. package/dist/core/audit.d.ts.map +1 -0
  95. package/dist/core/audit.js +121 -0
  96. package/dist/core/audit.js.map +1 -0
  97. package/dist/core/audit.test.d.ts +2 -0
  98. package/dist/core/audit.test.d.ts.map +1 -0
  99. package/dist/core/audit.test.js +142 -0
  100. package/dist/core/audit.test.js.map +1 -0
  101. package/dist/core/browser-cookies.d.ts +19 -0
  102. package/dist/core/browser-cookies.d.ts.map +1 -0
  103. package/dist/core/browser-cookies.js +181 -0
  104. package/dist/core/browser-cookies.js.map +1 -0
  105. package/dist/core/browser.d.ts +50 -0
  106. package/dist/core/browser.d.ts.map +1 -0
  107. package/dist/core/browser.js +318 -0
  108. package/dist/core/browser.js.map +1 -0
  109. package/dist/core/config.d.ts +20 -0
  110. package/dist/core/config.d.ts.map +1 -0
  111. package/dist/core/config.js +103 -0
  112. package/dist/core/config.js.map +1 -0
  113. package/dist/core/config.test.d.ts +2 -0
  114. package/dist/core/config.test.d.ts.map +1 -0
  115. package/dist/core/config.test.js +111 -0
  116. package/dist/core/config.test.js.map +1 -0
  117. package/dist/core/storage.d.ts +19 -0
  118. package/dist/core/storage.d.ts.map +1 -0
  119. package/dist/core/storage.js +124 -0
  120. package/dist/core/storage.js.map +1 -0
  121. package/dist/core/storage.test.d.ts +2 -0
  122. package/dist/core/storage.test.d.ts.map +1 -0
  123. package/dist/core/storage.test.js +142 -0
  124. package/dist/core/storage.test.js.map +1 -0
  125. package/dist/index.d.ts +3 -0
  126. package/dist/index.d.ts.map +1 -0
  127. package/dist/index.js +63 -0
  128. package/dist/index.js.map +1 -0
  129. package/dist/linkedin/auth.d.ts +22 -0
  130. package/dist/linkedin/auth.d.ts.map +1 -0
  131. package/dist/linkedin/auth.js +167 -0
  132. package/dist/linkedin/auth.js.map +1 -0
  133. package/dist/linkedin/company-extractor.d.ts +36 -0
  134. package/dist/linkedin/company-extractor.d.ts.map +1 -0
  135. package/dist/linkedin/company-extractor.js +211 -0
  136. package/dist/linkedin/company-extractor.js.map +1 -0
  137. package/dist/linkedin/company-extractor.test.d.ts +2 -0
  138. package/dist/linkedin/company-extractor.test.d.ts.map +1 -0
  139. package/dist/linkedin/company-extractor.test.js +52 -0
  140. package/dist/linkedin/company-extractor.test.js.map +1 -0
  141. package/dist/linkedin/connector.d.ts +45 -0
  142. package/dist/linkedin/connector.d.ts.map +1 -0
  143. package/dist/linkedin/connector.js +245 -0
  144. package/dist/linkedin/connector.js.map +1 -0
  145. package/dist/linkedin/message-sender.d.ts +32 -0
  146. package/dist/linkedin/message-sender.d.ts.map +1 -0
  147. package/dist/linkedin/message-sender.js +112 -0
  148. package/dist/linkedin/message-sender.js.map +1 -0
  149. package/dist/linkedin/messages.d.ts +78 -0
  150. package/dist/linkedin/messages.d.ts.map +1 -0
  151. package/dist/linkedin/messages.js +745 -0
  152. package/dist/linkedin/messages.js.map +1 -0
  153. package/dist/linkedin/profile.d.ts +37 -0
  154. package/dist/linkedin/profile.d.ts.map +1 -0
  155. package/dist/linkedin/profile.js +268 -0
  156. package/dist/linkedin/profile.js.map +1 -0
  157. package/dist/linkedin/profile.test.d.ts +2 -0
  158. package/dist/linkedin/profile.test.d.ts.map +1 -0
  159. package/dist/linkedin/profile.test.js +68 -0
  160. package/dist/linkedin/profile.test.js.map +1 -0
  161. package/dist/linkedin/reply.d.ts +21 -0
  162. package/dist/linkedin/reply.d.ts.map +1 -0
  163. package/dist/linkedin/reply.js +76 -0
  164. package/dist/linkedin/reply.js.map +1 -0
  165. package/dist/linkedin/selector-engine.d.ts +69 -0
  166. package/dist/linkedin/selector-engine.d.ts.map +1 -0
  167. package/dist/linkedin/selector-engine.js +339 -0
  168. package/dist/linkedin/selector-engine.js.map +1 -0
  169. package/dist/linkedin/selector-engine.test.d.ts +2 -0
  170. package/dist/linkedin/selector-engine.test.d.ts.map +1 -0
  171. package/dist/linkedin/selector-engine.test.js +135 -0
  172. package/dist/linkedin/selector-engine.test.js.map +1 -0
  173. package/dist/linkedin/selectors.d.ts +65 -0
  174. package/dist/linkedin/selectors.d.ts.map +1 -0
  175. package/dist/linkedin/selectors.js +261 -0
  176. package/dist/linkedin/selectors.js.map +1 -0
  177. package/dist/templates/engine.d.ts +37 -0
  178. package/dist/templates/engine.d.ts.map +1 -0
  179. package/dist/templates/engine.js +215 -0
  180. package/dist/templates/engine.js.map +1 -0
  181. package/dist/templates/engine.test.d.ts +2 -0
  182. package/dist/templates/engine.test.d.ts.map +1 -0
  183. package/dist/templates/engine.test.js +212 -0
  184. package/dist/templates/engine.test.js.map +1 -0
  185. package/dist/templates/index.d.ts +2 -0
  186. package/dist/templates/index.d.ts.map +1 -0
  187. package/dist/templates/index.js +7 -0
  188. package/dist/templates/index.js.map +1 -0
  189. package/dist/types/index.d.ts +113 -0
  190. package/dist/types/index.d.ts.map +1 -0
  191. package/dist/types/index.js +3 -0
  192. package/dist/types/index.js.map +1 -0
  193. package/dist/types/index.test.d.ts +2 -0
  194. package/dist/types/index.test.d.ts.map +1 -0
  195. package/dist/types/index.test.js +90 -0
  196. package/dist/types/index.test.js.map +1 -0
  197. package/dist/utils/paths.d.ts +8 -0
  198. package/dist/utils/paths.d.ts.map +1 -0
  199. package/dist/utils/paths.js +68 -0
  200. package/dist/utils/paths.js.map +1 -0
  201. package/dist/utils/rate-limiter.d.ts +22 -0
  202. package/dist/utils/rate-limiter.d.ts.map +1 -0
  203. package/dist/utils/rate-limiter.js +57 -0
  204. package/dist/utils/rate-limiter.js.map +1 -0
  205. package/dist/utils/retry.d.ts +18 -0
  206. package/dist/utils/retry.d.ts.map +1 -0
  207. package/dist/utils/retry.js +49 -0
  208. package/dist/utils/retry.js.map +1 -0
  209. package/docs/connection-command.md +52 -0
  210. package/docs/plans/2025-03-03-linkedin-cli-design.md +280 -0
  211. package/docs/plans/2025-03-03-linkedin-cli-implementation-plan.md +2087 -0
  212. package/docs/plans/2025-03-03-linkedin-cli-implementation.md +2420 -0
  213. package/docs/plans/2026-02-19-linkedin-connection-feature.md +596 -0
  214. package/docs/plans/2026-02-28-messages-send-feature.md +480 -0
  215. package/docs/plans/2026-02-28-messages-show-design.md +243 -0
  216. package/docs/plans/2026-03-03-linkedin-cli-oss-publishing-design.md +394 -0
  217. package/docs/plans/2026-03-03-linkedin-cli-oss-publishing-plan.md +1592 -0
  218. package/docs/superpowers/plans/2026-03-13-linkedin-automation-resilience-migration.md +425 -0
  219. package/docs/superpowers/plans/2026-03-13-playwright-fara-migration.md +1112 -0
  220. package/docs/superpowers/plans/2026-03-14-page-agent-plan.md +1598 -0
  221. package/docs/superpowers/plans/2026-03-15-company-profile-extraction.md +591 -0
  222. package/docs/superpowers/plans/2026-03-15-profile-extraction-plan.md +943 -0
  223. package/docs/superpowers/specs/2026-03-14-company-profile-extraction-design.md +371 -0
  224. package/docs/superpowers/specs/2026-03-14-page-agent-design.md +385 -0
  225. package/docs/superpowers/specs/2026-03-15-profile-extraction-design.md +409 -0
  226. package/eslint.config.mjs +58 -0
  227. package/go.mod +9 -0
  228. package/go.sum +10 -0
  229. package/import-cookies.js +376 -0
  230. package/internal/cmd/actions.go +123 -0
  231. package/internal/cmd/auth.go +108 -0
  232. package/internal/cmd/connect.go +42 -0
  233. package/internal/cmd/message.go +44 -0
  234. package/internal/cmd/people.go +454 -0
  235. package/internal/cmd/profiles.go +121 -0
  236. package/internal/cmd/root.go +89 -0
  237. package/internal/cmd/sequence.go +192 -0
  238. package/internal/config/config.go +187 -0
  239. package/internal/config/config_test.go +121 -0
  240. package/internal/config/profile.go +65 -0
  241. package/internal/linkedin/navigator.go +195 -0
  242. package/internal/linkedin/selectors.go +39 -0
  243. package/internal/linkedin/validator.go +69 -0
  244. package/internal/pinchtab/client.go +183 -0
  245. package/internal/pinchtab/client_test.go +67 -0
  246. package/internal/pinchtab/types.go +50 -0
  247. package/internal/ratelimit/limiter.go +115 -0
  248. package/internal/ratelimit/limits.go +32 -0
  249. package/package.json +67 -0
  250. package/release.sh +66 -0
  251. package/scripts/debug-linkedin.js +156 -0
  252. package/scripts/debug-login.js +193 -0
  253. package/scripts/extract-from-edge.js +96 -0
  254. package/scripts/import-cookies.js +101 -0
  255. package/scripts/poc-show-data.js +205 -0
  256. package/scripts/proof-of-access.js +87 -0
  257. package/scripts/prove-connection.js +110 -0
  258. package/scripts/show-linkedin-data.js +173 -0
  259. package/src/agent/action-executor.test.ts +464 -0
  260. package/src/agent/action-executor.ts +203 -0
  261. package/src/agent/claude-client.test.ts +707 -0
  262. package/src/agent/claude-client.ts +422 -0
  263. package/src/agent/dom-extractor.test.ts +574 -0
  264. package/src/agent/dom-extractor.ts +437 -0
  265. package/src/agent/extension-client.ts +306 -0
  266. package/src/agent/index.ts +28 -0
  267. package/src/agent/page-agent.ts +292 -0
  268. package/src/agent/types.ts +288 -0
  269. package/src/cli/agent-commands.ts +274 -0
  270. package/src/cli/auth.ts +343 -0
  271. package/src/cli/company.ts +66 -0
  272. package/src/cli/connection.ts +89 -0
  273. package/src/cli/index.ts +7 -0
  274. package/src/cli/messages.ts +338 -0
  275. package/src/cli/profile.test.ts +14 -0
  276. package/src/cli/profile.ts +95 -0
  277. package/src/cli/reply.ts +110 -0
  278. package/src/core/audit.test.ts +134 -0
  279. package/src/core/audit.ts +98 -0
  280. package/src/core/browser-cookies.ts +203 -0
  281. package/src/core/browser.ts +304 -0
  282. package/src/core/config.test.ts +90 -0
  283. package/src/core/config.ts +81 -0
  284. package/src/core/storage.test.ts +129 -0
  285. package/src/core/storage.ts +100 -0
  286. package/src/index.ts +70 -0
  287. package/src/linkedin/auth.ts +218 -0
  288. package/src/linkedin/company-extractor.test.ts +58 -0
  289. package/src/linkedin/company-extractor.ts +222 -0
  290. package/src/linkedin/connector.ts +336 -0
  291. package/src/linkedin/message-sender.ts +141 -0
  292. package/src/linkedin/messages.ts +894 -0
  293. package/src/linkedin/profile.test.ts +79 -0
  294. package/src/linkedin/profile.ts +314 -0
  295. package/src/linkedin/reply.ts +96 -0
  296. package/src/linkedin/selector-engine.test.ts +167 -0
  297. package/src/linkedin/selector-engine.ts +393 -0
  298. package/src/linkedin/selectors.ts +268 -0
  299. package/src/templates/defaults/followup.txt +14 -0
  300. package/src/templates/defaults/meeting.txt +16 -0
  301. package/src/templates/defaults/welcome.txt +14 -0
  302. package/src/templates/engine.test.ts +228 -0
  303. package/src/templates/engine.ts +208 -0
  304. package/src/templates/index.ts +1 -0
  305. package/src/types/index.test.ts +94 -0
  306. package/src/types/index.ts +143 -0
  307. package/src/types/sql.js.d.ts +23 -0
  308. package/src/utils/paths.ts +33 -0
  309. package/src/utils/rate-limiter.ts +75 -0
  310. package/src/utils/retry.ts +78 -0
  311. package/test-cli.sh +85 -0
  312. package/test-real-data.sh +97 -0
  313. package/tsconfig.json +23 -0
  314. package/vitest.config.ts +35 -0
@@ -0,0 +1,591 @@
1
+ # Company Profile Extraction Implementation Plan
2
+
3
+ > **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking.
4
+
5
+ **Goal:** Add `linkedin-cli company info <url>` command to extract structured data from LinkedIn company profile pages.
6
+
7
+ **Architecture:** Playwright-based extraction with multi-layer selector fallbacks. No auth required - company pages are public with dismissible authwall popup. Outputs JSON to stdout.
8
+
9
+ **Tech Stack:** TypeScript, Playwright, Commander.js (existing)
10
+
11
+ ---
12
+
13
+ ## File Structure
14
+
15
+ | File | Action | Purpose |
16
+ |------|--------|---------|
17
+ | `src/types/index.ts` | Modify | Add `CompanyProfile` interface |
18
+ | `src/linkedin/selectors.ts` | Modify | Add `company` selector category |
19
+ | `src/linkedin/company-extractor.ts` | Create | `CompanyExtractor` class with extraction logic |
20
+ | `src/cli/company.ts` | Create | Command registration (`company info`) |
21
+ | `src/cli/index.ts` | Modify | Export and register `registerCompanyCommands` |
22
+ | `src/linkedin/company-extractor.test.ts` | Create | Unit tests for extractor |
23
+
24
+ ---
25
+
26
+ ## Chunk 1: Types and Selectors
27
+
28
+ ### Task 1: Add CompanyProfile Interface
29
+
30
+ **Files:**
31
+ - Modify: `src/types/index.ts`
32
+
33
+ - [ ] **Step 1: Add CompanyProfile interface to types/index.ts**
34
+
35
+ Add after the `AuditLog` interface (around line 82):
36
+
37
+ ```typescript
38
+ // Company profile types
39
+ export interface CompanyProfile {
40
+ name: string;
41
+ linkedin_url: string;
42
+ website: string | null;
43
+ industry: string | null;
44
+ company_size: string | null;
45
+ headquarters: string | null;
46
+ founded: string | null;
47
+ specialties: string[] | null;
48
+ type: string | null;
49
+ follower_count: number | null;
50
+ }
51
+ ```
52
+
53
+ - [ ] **Step 2: Verify TypeScript compiles**
54
+
55
+ Run: `npx tsc --noEmit`
56
+ Expected: No errors
57
+
58
+ - [ ] **Step 3: Commit**
59
+
60
+ ```bash
61
+ git add src/types/index.ts
62
+ git commit -m "feat: add CompanyProfile interface for company extraction"
63
+ ```
64
+
65
+ ---
66
+
67
+ ### Task 2: Add Company Selectors
68
+
69
+ **Files:**
70
+ - Modify: `src/linkedin/selectors.ts`
71
+
72
+ - [ ] **Step 1: Add company selector category to SELECTORS**
73
+
74
+ Add after the `connection` object (before the closing brace, around line 135):
75
+
76
+ ```typescript
77
+ // Company profile extraction
78
+ company: {
79
+ name: [
80
+ 'h1.text-heading-xlarge',
81
+ '.org-top-card-primary-content h1',
82
+ 'h1.org-top-card-summary__title',
83
+ ],
84
+ website: [
85
+ 'a[data-testid="website-link"]',
86
+ '.org-about-us-module__website a',
87
+ 'dd a[href^="http"]:not([href*="linkedin"])',
88
+ ],
89
+ industry: [
90
+ 'dt:has-text("Industry") + dd',
91
+ '.org-about-company-module__dl dt:has-text("Industry") + dd',
92
+ ],
93
+ company_size: [
94
+ 'dt:has-text("Company size") + dd',
95
+ 'dt:has-text("Employees") + dd',
96
+ '.org-about-company-module__dl dt:has-text("Company size") + dd',
97
+ ],
98
+ headquarters: [
99
+ 'dt:has-text("Headquarters") + dd',
100
+ '.org-about-company-module__dl dt:has-text("Headquarters") + dd',
101
+ ],
102
+ founded: [
103
+ 'dt:has-text("Founded") + dd',
104
+ '.org-about-company-module__dl dt:has-text("Founded") + dd',
105
+ ],
106
+ specialties: [
107
+ 'dt:has-text("Specialties") + dd',
108
+ '.org-about-company-module__dl dt:has-text("Specialties") + dd',
109
+ ],
110
+ type: [
111
+ 'dt:has-text("Company type") + dd',
112
+ 'dt:has-text("Type") + dd',
113
+ '.org-about-company-module__dl dt:has-text("Company type") + dd',
114
+ ],
115
+ follower_count: [
116
+ '.org-top-card-primary-content__followers-count',
117
+ '.org-top-card-module__followers-count',
118
+ 'span:has-text("followers")',
119
+ '.org-top-card-summary__followers',
120
+ ],
121
+ // Authwall dismiss button
122
+ dismissAuthwall: [
123
+ 'button[aria-label="Dismiss"]',
124
+ 'button[aria-label="Close"]',
125
+ '.artdeco-modal__dismiss',
126
+ 'button.artdeco-modal__dismiss',
127
+ ],
128
+ },
129
+ ```
130
+
131
+ - [ ] **Step 2: Verify TypeScript compiles**
132
+
133
+ Run: `npx tsc --noEmit`
134
+ Expected: No errors
135
+
136
+ - [ ] **Step 3: Commit**
137
+
138
+ ```bash
139
+ git add src/linkedin/selectors.ts
140
+ git commit -m "feat: add company selectors for profile extraction"
141
+ ```
142
+
143
+ ---
144
+
145
+ ## Chunk 2: Company Extractor
146
+
147
+ ### Task 3: Create CompanyExtractor Class
148
+
149
+ **Files:**
150
+ - Create: `src/linkedin/company-extractor.ts`
151
+ - Create: `src/linkedin/company-extractor.test.ts`
152
+
153
+ - [ ] **Step 1: Write the failing test for URL validation**
154
+
155
+ Create `src/linkedin/company-extractor.test.ts`:
156
+
157
+ ```typescript
158
+ import { describe, it, expect } from 'vitest';
159
+ import { isValidCompanyUrl } from './company-extractor';
160
+
161
+ describe('isValidCompanyUrl', () => {
162
+ it('should accept valid HTTPS company URLs', () => {
163
+ expect(isValidCompanyUrl('https://www.linkedin.com/company/openai/')).toBe(true);
164
+ expect(isValidCompanyUrl('https://www.linkedin.com/company/microsoft')).toBe(true);
165
+ expect(isValidCompanyUrl('https://www.linkedin.com/company/123company/')).toBe(true);
166
+ });
167
+
168
+ it('should reject invalid URLs', () => {
169
+ expect(isValidCompanyUrl('http://www.linkedin.com/company/openai/')).toBe(false);
170
+ expect(isValidCompanyUrl('https://linkedin.com/company/openai/')).toBe(false);
171
+ expect(isValidCompanyUrl('https://www.linkedin.com/in/openai/')).toBe(false);
172
+ expect(isValidCompanyUrl('not-a-url')).toBe(false);
173
+ expect(isValidCompanyUrl('')).toBe(false);
174
+ });
175
+ });
176
+ ```
177
+
178
+ - [ ] **Step 2: Run test to verify it fails**
179
+
180
+ Run: `npx vitest run src/linkedin/company-extractor.test.ts`
181
+ Expected: FAIL - `isValidCompanyUrl` not defined
182
+
183
+ - [ ] **Step 3: Create CompanyExtractor with URL validation**
184
+
185
+ Create `src/linkedin/company-extractor.ts`:
186
+
187
+ ```typescript
188
+ /**
189
+ * Company Profile Extractor
190
+ *
191
+ * Extracts structured data from LinkedIn company profile pages.
192
+ * No authentication required - company pages are public.
193
+ */
194
+
195
+ import type { Page } from 'playwright';
196
+ import type { CompanyProfile } from '../types';
197
+ import { SELECTORS } from './selectors';
198
+
199
+ /** URL pattern for valid LinkedIn company pages */
200
+ const COMPANY_URL_PATTERN = /^https:\/\/www\.linkedin\.com\/company\/[^\/]+\/?$/;
201
+
202
+ /**
203
+ * Validate if a URL is a valid LinkedIn company URL
204
+ */
205
+ export function isValidCompanyUrl(url: string): boolean {
206
+ return COMPANY_URL_PATTERN.test(url);
207
+ }
208
+
209
+ /**
210
+ * Parse follower count string to number
211
+ * Handles formats like "2.5M followers", "12K", "1,234"
212
+ */
213
+ export function parseFollowerCount(text: string | null): number | null {
214
+ if (!text) return null;
215
+
216
+ // Extract numeric part with optional K/M suffix
217
+ const match = text.match(/([\d,.]+)\s*([KMkm]?)/);
218
+ if (!match) return null;
219
+
220
+ let num = parseFloat(match[1].replace(/,/g, ''));
221
+ const suffix = match[2].toUpperCase();
222
+
223
+ if (suffix === 'K') num *= 1000;
224
+ else if (suffix === 'M') num *= 1000000;
225
+
226
+ return Math.round(num);
227
+ }
228
+
229
+ /**
230
+ * Parse specialties string to array
231
+ * Splits by comma and trims whitespace
232
+ */
233
+ export function parseSpecialties(text: string | null): string[] | null {
234
+ if (!text) return null;
235
+
236
+ const specialties = text
237
+ .split(',')
238
+ .map((s) => s.trim())
239
+ .filter((s) => s.length > 0);
240
+
241
+ return specialties.length > 0 ? specialties : null;
242
+ }
243
+
244
+ /**
245
+ * Extract a field using multiple selector fallbacks
246
+ */
247
+ async function extractField(
248
+ page: Page,
249
+ selectors: readonly string[]
250
+ ): Promise<string | null> {
251
+ for (const selector of selectors) {
252
+ try {
253
+ const element = page.locator(selector).first();
254
+ const text = await element.textContent({ timeout: 2000 });
255
+ if (text?.trim()) {
256
+ return text.trim();
257
+ }
258
+ } catch {
259
+ // Try next selector
260
+ }
261
+ }
262
+ return null;
263
+ }
264
+
265
+ /**
266
+ * Extract href from a link element using selector fallbacks
267
+ */
268
+ async function extractHref(
269
+ page: Page,
270
+ selectors: readonly string[]
271
+ ): Promise<string | null> {
272
+ for (const selector of selectors) {
273
+ try {
274
+ const element = page.locator(selector).first();
275
+ const href = await element.getAttribute('href', { timeout: 2000 });
276
+ if (href?.trim()) {
277
+ return href.trim();
278
+ }
279
+ } catch {
280
+ // Try next selector
281
+ }
282
+ }
283
+ return null;
284
+ }
285
+
286
+ /**
287
+ * Dismiss authwall popup if present
288
+ */
289
+ async function dismissAuthwall(page: Page): Promise<void> {
290
+ const selectors = SELECTORS.company.dismissAuthwall;
291
+
292
+ for (const selector of selectors) {
293
+ try {
294
+ const btn = page.locator(selector).first();
295
+ if (await btn.isVisible({ timeout: 1000 })) {
296
+ await btn.click();
297
+ await page.waitForTimeout(500);
298
+ return;
299
+ }
300
+ } catch {
301
+ // Try next selector
302
+ }
303
+ }
304
+ }
305
+
306
+ /**
307
+ * Company Profile Extractor
308
+ *
309
+ * Extracts structured data from LinkedIn company profile pages.
310
+ */
311
+ export class CompanyExtractor {
312
+ constructor(private page: Page) {}
313
+
314
+ /**
315
+ * Extract company profile data from a LinkedIn company URL
316
+ */
317
+ async extract(url: string): Promise<CompanyProfile> {
318
+ // Navigate to company page
319
+ await this.page.goto(url, {
320
+ waitUntil: 'domcontentloaded',
321
+ timeout: 30000,
322
+ });
323
+
324
+ // Wait for page to load
325
+ await this.page.waitForTimeout(3000);
326
+
327
+ // Dismiss authwall popup if present
328
+ await dismissAuthwall(this.page);
329
+
330
+ // Extract all fields
331
+ const name = await extractField(this.page, SELECTORS.company.name);
332
+ const website = await extractHref(this.page, SELECTORS.company.website);
333
+ const industry = await extractField(this.page, SELECTORS.company.industry);
334
+ const company_size = await extractField(this.page, SELECTORS.company.company_size);
335
+ const headquarters = await extractField(this.page, SELECTORS.company.headquarters);
336
+ const founded = await extractField(this.page, SELECTORS.company.founded);
337
+ const specialtiesRaw = await extractField(this.page, SELECTORS.company.specialties);
338
+ const type = await extractField(this.page, SELECTORS.company.type);
339
+ const followerRaw = await extractField(this.page, SELECTORS.company.follower_count);
340
+
341
+ // Build profile object
342
+ const profile: CompanyProfile = {
343
+ name: name || '',
344
+ linkedin_url: url,
345
+ website,
346
+ industry,
347
+ company_size,
348
+ headquarters,
349
+ founded,
350
+ specialties: parseSpecialties(specialtiesRaw),
351
+ type,
352
+ follower_count: parseFollowerCount(followerRaw),
353
+ };
354
+
355
+ return profile;
356
+ }
357
+ }
358
+ ```
359
+
360
+ - [ ] **Step 4: Run test to verify it passes**
361
+
362
+ Run: `npx vitest run src/linkedin/company-extractor.test.ts`
363
+ Expected: PASS
364
+
365
+ - [ ] **Step 5: Write tests for parseFollowerCount**
366
+
367
+ Add to `src/linkedin/company-extractor.test.ts`:
368
+
369
+ ```typescript
370
+ import { parseFollowerCount, parseSpecialties } from './company-extractor';
371
+
372
+ describe('parseFollowerCount', () => {
373
+ it('should parse K suffix', () => {
374
+ expect(parseFollowerCount('12K followers')).toBe(12000);
375
+ expect(parseFollowerCount('5k')).toBe(5000);
376
+ });
377
+
378
+ it('should parse M suffix', () => {
379
+ expect(parseFollowerCount('2.5M followers')).toBe(2500000);
380
+ expect(parseFollowerCount('1.2m')).toBe(1200000);
381
+ });
382
+
383
+ it('should parse plain numbers with commas', () => {
384
+ expect(parseFollowerCount('1,234 followers')).toBe(1234);
385
+ expect(parseFollowerCount('500+ followers')).toBe(500);
386
+ });
387
+
388
+ it('should return null for invalid input', () => {
389
+ expect(parseFollowerCount(null)).toBe(null);
390
+ expect(parseFollowerCount('')).toBe(null);
391
+ expect(parseFollowerCount('no numbers')).toBe(null);
392
+ });
393
+ });
394
+
395
+ describe('parseSpecialties', () => {
396
+ it('should split by comma and trim', () => {
397
+ expect(parseSpecialties('AI, ML, Research')).toEqual(['AI', 'ML', 'Research']);
398
+ expect(parseSpecialties('Artificial Intelligence')).toEqual(['Artificial Intelligence']);
399
+ });
400
+
401
+ it('should handle whitespace', () => {
402
+ expect(parseSpecialties(' AI , ML ')).toEqual(['AI', 'ML']);
403
+ });
404
+
405
+ it('should return null for empty input', () => {
406
+ expect(parseSpecialties(null)).toBe(null);
407
+ expect(parseSpecialties('')).toBe(null);
408
+ expect(parseSpecialties(' ')).toBe(null);
409
+ });
410
+ });
411
+ ```
412
+
413
+ - [ ] **Step 6: Run all tests**
414
+
415
+ Run: `npx vitest run src/linkedin/company-extractor.test.ts`
416
+ Expected: All tests PASS
417
+
418
+ **Note:** Integration tests for `CompanyExtractor.extract()` that mock Playwright pages are out of scope for this implementation. The extraction logic will be validated via manual testing (Task 5).
419
+
420
+ - [ ] **Step 7: Commit**
421
+
422
+ ```bash
423
+ git add src/linkedin/company-extractor.ts src/linkedin/company-extractor.test.ts
424
+ git commit -m "feat: add CompanyExtractor class with parsing utilities"
425
+ ```
426
+
427
+ ---
428
+
429
+ ## Chunk 3: CLI Command
430
+
431
+ ### Task 4: Create Company Command
432
+
433
+ **Files:**
434
+ - Create: `src/cli/company.ts`
435
+ - Modify: `src/cli/index.ts`
436
+
437
+ - [ ] **Step 1: Create company.ts command file**
438
+
439
+ Create `src/cli/company.ts`:
440
+
441
+ ```typescript
442
+ import { Command } from 'commander';
443
+ import chalk from 'chalk';
444
+ import { BrowserController } from '../core/browser';
445
+ import { CompanyExtractor, isValidCompanyUrl } from '../linkedin/company-extractor';
446
+ import { getConfig } from '../core/config';
447
+ import { getAuditLogger } from '../core/audit';
448
+
449
+ export function registerCompanyCommands(program: Command): void {
450
+ const company = program
451
+ .command('company')
452
+ .description('Extract information from LinkedIn company profiles');
453
+
454
+ company
455
+ .command('info')
456
+ .description('Extract company profile information')
457
+ .argument('<url>', 'LinkedIn company URL (e.g., https://www.linkedin.com/company/openai/)')
458
+ .option('--headless', 'Run browser in headless mode', false)
459
+ .option('--debug', 'Enable debug mode with screenshots and HTML snapshots', false)
460
+ .action(async (url, options) => {
461
+ const config = getConfig();
462
+ const logger = getAuditLogger();
463
+ let browser: BrowserController | null = null;
464
+
465
+ try {
466
+ // Validate URL
467
+ if (!isValidCompanyUrl(url)) {
468
+ console.error(chalk.red('✗ Invalid company URL'));
469
+ console.log(chalk.gray('Expected format: https://www.linkedin.com/company/<name>/'));
470
+ process.exit(1);
471
+ }
472
+
473
+ console.error(chalk.gray(`Extracting company info from ${url}...`));
474
+
475
+ // Launch browser
476
+ // Note: --debug flag enables screenshots and HTML snapshots via BrowserController
477
+ browser = new BrowserController({
478
+ headless: options.headless !== undefined ? options.headless : config.getValue('headless'),
479
+ debug: options.debug,
480
+ });
481
+
482
+ await browser.launch();
483
+ const page = browser.getPage();
484
+
485
+ if (!page) {
486
+ throw new Error('Failed to get page from browser');
487
+ }
488
+
489
+ // Extract company profile
490
+ const extractor = new CompanyExtractor(page);
491
+ const profile = await extractor.extract(url);
492
+
493
+ // Log successful extraction
494
+ const fieldsExtracted = Object.keys(profile).filter(k => profile[k as keyof typeof profile] !== null);
495
+ logger.log('company.info', { url, fieldsExtracted }, true);
496
+
497
+ // Output JSON to stdout
498
+ console.log(JSON.stringify(profile, null, 2));
499
+
500
+ await browser.close();
501
+ } catch (error) {
502
+ if (browser) {
503
+ await browser.close();
504
+ }
505
+
506
+ // Log failed extraction
507
+ logger.log('company.info', { url, error: error instanceof Error ? error.message : String(error) }, false);
508
+
509
+ console.error(
510
+ chalk.red('✗ Failed to extract company data:'),
511
+ error instanceof Error ? error.message : error
512
+ );
513
+ process.exit(1);
514
+ }
515
+ });
516
+ }
517
+ ```
518
+
519
+ - [ ] **Step 2: Verify TypeScript compiles**
520
+
521
+ Run: `npx tsc --noEmit`
522
+ Expected: No errors
523
+
524
+ - [ ] **Step 3: Register command in CLI index**
525
+
526
+ Read `src/cli/index.ts` and add the import and registration.
527
+
528
+ First, check what's in the file:
529
+ ```bash
530
+ cat src/cli/index.ts
531
+ ```
532
+
533
+ Then add:
534
+ 1. Import: `import { registerCompanyCommands } from './company';`
535
+ 2. Call: `registerCompanyCommands(program);` (after other register calls)
536
+
537
+ - [ ] **Step 4: Build the CLI**
538
+
539
+ Run: `npm run build`
540
+ Expected: Build succeeds
541
+
542
+ - [ ] **Step 5: Test the command manually**
543
+
544
+ Run: `node dist/cli/index.js company info https://www.linkedin.com/company/openai/`
545
+ Expected: JSON output with company data
546
+
547
+ - [ ] **Step 6: Commit**
548
+
549
+ ```bash
550
+ git add src/cli/company.ts src/cli/index.ts
551
+ git commit -m "feat: add company info CLI command"
552
+ ```
553
+
554
+ ---
555
+
556
+ ### Task 5: Final Integration Test
557
+
558
+ **Files:**
559
+ - None (manual testing)
560
+
561
+ - [ ] **Step 1: Test with OpenAI company page**
562
+
563
+ Run: `node dist/cli/index.js company info https://www.linkedin.com/company/openai/`
564
+ Expected: JSON with OpenAI data
565
+
566
+ - [ ] **Step 2: Test with Microsoft company page**
567
+
568
+ Run: `node dist/cli/index.js company info https://www.linkedin.com/company/microsoft/`
569
+ Expected: JSON with Microsoft data
570
+
571
+ - [ ] **Step 3: Test invalid URL**
572
+
573
+ Run: `node dist/cli/index.js company info https://www.linkedin.com/in/openai/`
574
+ Expected: Error message "Invalid company URL"
575
+
576
+ - [ ] **Step 4: Test headless mode**
577
+
578
+ Run: `node dist/cli/index.js company info https://www.linkedin.com/company/openai/ --headless`
579
+ Expected: JSON output (browser runs headless)
580
+
581
+ ---
582
+
583
+ ## Summary
584
+
585
+ | Task | Description | Files |
586
+ |------|-------------|-------|
587
+ | 1 | Add CompanyProfile interface | `src/types/index.ts` |
588
+ | 2 | Add company selectors | `src/linkedin/selectors.ts` |
589
+ | 3 | Create CompanyExtractor class | `src/linkedin/company-extractor.ts`, `src/linkedin/company-extractor.test.ts` |
590
+ | 4 | Create company CLI command | `src/cli/company.ts`, `src/cli/index.ts` |
591
+ | 5 | Integration testing | Manual |