linkedin-automation-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. package/.env.example +12 -0
  2. package/.github/workflows/ci.yml +66 -0
  3. package/.github/workflows/publish.yml +48 -0
  4. package/.husky/pre-commit +6 -0
  5. package/.prettierignore +4 -0
  6. package/.prettierrc +10 -0
  7. package/AGENTS.md +294 -0
  8. package/CHANGELOG.md +40 -0
  9. package/GIT_RELEASE.md +167 -0
  10. package/LICENSE +21 -0
  11. package/Makefile +30 -0
  12. package/NPM_PUBLISHING.md +230 -0
  13. package/PYEOF +0 -0
  14. package/README.md +295 -0
  15. package/TESTING-GUIDE.md +151 -0
  16. package/cmd/linkedin/main.go +9 -0
  17. package/dist/agent/action-executor.d.ts +81 -0
  18. package/dist/agent/action-executor.d.ts.map +1 -0
  19. package/dist/agent/action-executor.js +170 -0
  20. package/dist/agent/action-executor.js.map +1 -0
  21. package/dist/agent/action-executor.test.d.ts +2 -0
  22. package/dist/agent/action-executor.test.d.ts.map +1 -0
  23. package/dist/agent/action-executor.test.js +366 -0
  24. package/dist/agent/action-executor.test.js.map +1 -0
  25. package/dist/agent/claude-client.d.ts +74 -0
  26. package/dist/agent/claude-client.d.ts.map +1 -0
  27. package/dist/agent/claude-client.js +314 -0
  28. package/dist/agent/claude-client.js.map +1 -0
  29. package/dist/agent/claude-client.test.d.ts +2 -0
  30. package/dist/agent/claude-client.test.d.ts.map +1 -0
  31. package/dist/agent/claude-client.test.js +590 -0
  32. package/dist/agent/claude-client.test.js.map +1 -0
  33. package/dist/agent/dom-extractor.d.ts +50 -0
  34. package/dist/agent/dom-extractor.d.ts.map +1 -0
  35. package/dist/agent/dom-extractor.js +374 -0
  36. package/dist/agent/dom-extractor.js.map +1 -0
  37. package/dist/agent/dom-extractor.test.d.ts +7 -0
  38. package/dist/agent/dom-extractor.test.d.ts.map +1 -0
  39. package/dist/agent/dom-extractor.test.js +504 -0
  40. package/dist/agent/dom-extractor.test.js.map +1 -0
  41. package/dist/agent/extension-client.d.ts +75 -0
  42. package/dist/agent/extension-client.d.ts.map +1 -0
  43. package/dist/agent/extension-client.js +245 -0
  44. package/dist/agent/extension-client.js.map +1 -0
  45. package/dist/agent/index.d.ts +8 -0
  46. package/dist/agent/index.d.ts.map +1 -0
  47. package/dist/agent/index.js +16 -0
  48. package/dist/agent/index.js.map +1 -0
  49. package/dist/agent/page-agent.d.ts +76 -0
  50. package/dist/agent/page-agent.d.ts.map +1 -0
  51. package/dist/agent/page-agent.js +236 -0
  52. package/dist/agent/page-agent.js.map +1 -0
  53. package/dist/agent/types.d.ts +236 -0
  54. package/dist/agent/types.d.ts.map +1 -0
  55. package/dist/agent/types.js +37 -0
  56. package/dist/agent/types.js.map +1 -0
  57. package/dist/cli/agent-commands.d.ts +3 -0
  58. package/dist/cli/agent-commands.d.ts.map +1 -0
  59. package/dist/cli/agent-commands.js +250 -0
  60. package/dist/cli/agent-commands.js.map +1 -0
  61. package/dist/cli/auth.d.ts +3 -0
  62. package/dist/cli/auth.d.ts.map +1 -0
  63. package/dist/cli/auth.js +288 -0
  64. package/dist/cli/auth.js.map +1 -0
  65. package/dist/cli/company.d.ts +3 -0
  66. package/dist/cli/company.d.ts.map +1 -0
  67. package/dist/cli/company.js +55 -0
  68. package/dist/cli/company.js.map +1 -0
  69. package/dist/cli/connection.d.ts +3 -0
  70. package/dist/cli/connection.d.ts.map +1 -0
  71. package/dist/cli/connection.js +79 -0
  72. package/dist/cli/connection.js.map +1 -0
  73. package/dist/cli/index.d.ts +7 -0
  74. package/dist/cli/index.d.ts.map +1 -0
  75. package/dist/cli/index.js +17 -0
  76. package/dist/cli/index.js.map +1 -0
  77. package/dist/cli/messages.d.ts +3 -0
  78. package/dist/cli/messages.d.ts.map +1 -0
  79. package/dist/cli/messages.js +268 -0
  80. package/dist/cli/messages.js.map +1 -0
  81. package/dist/cli/profile.d.ts +3 -0
  82. package/dist/cli/profile.d.ts.map +1 -0
  83. package/dist/cli/profile.js +81 -0
  84. package/dist/cli/profile.js.map +1 -0
  85. package/dist/cli/profile.test.d.ts +2 -0
  86. package/dist/cli/profile.test.d.ts.map +1 -0
  87. package/dist/cli/profile.test.js +15 -0
  88. package/dist/cli/profile.test.js.map +1 -0
  89. package/dist/cli/reply.d.ts +3 -0
  90. package/dist/cli/reply.d.ts.map +1 -0
  91. package/dist/cli/reply.js +129 -0
  92. package/dist/cli/reply.js.map +1 -0
  93. package/dist/core/audit.d.ts +17 -0
  94. package/dist/core/audit.d.ts.map +1 -0
  95. package/dist/core/audit.js +121 -0
  96. package/dist/core/audit.js.map +1 -0
  97. package/dist/core/audit.test.d.ts +2 -0
  98. package/dist/core/audit.test.d.ts.map +1 -0
  99. package/dist/core/audit.test.js +142 -0
  100. package/dist/core/audit.test.js.map +1 -0
  101. package/dist/core/browser-cookies.d.ts +19 -0
  102. package/dist/core/browser-cookies.d.ts.map +1 -0
  103. package/dist/core/browser-cookies.js +181 -0
  104. package/dist/core/browser-cookies.js.map +1 -0
  105. package/dist/core/browser.d.ts +50 -0
  106. package/dist/core/browser.d.ts.map +1 -0
  107. package/dist/core/browser.js +318 -0
  108. package/dist/core/browser.js.map +1 -0
  109. package/dist/core/config.d.ts +20 -0
  110. package/dist/core/config.d.ts.map +1 -0
  111. package/dist/core/config.js +103 -0
  112. package/dist/core/config.js.map +1 -0
  113. package/dist/core/config.test.d.ts +2 -0
  114. package/dist/core/config.test.d.ts.map +1 -0
  115. package/dist/core/config.test.js +111 -0
  116. package/dist/core/config.test.js.map +1 -0
  117. package/dist/core/storage.d.ts +19 -0
  118. package/dist/core/storage.d.ts.map +1 -0
  119. package/dist/core/storage.js +124 -0
  120. package/dist/core/storage.js.map +1 -0
  121. package/dist/core/storage.test.d.ts +2 -0
  122. package/dist/core/storage.test.d.ts.map +1 -0
  123. package/dist/core/storage.test.js +142 -0
  124. package/dist/core/storage.test.js.map +1 -0
  125. package/dist/index.d.ts +3 -0
  126. package/dist/index.d.ts.map +1 -0
  127. package/dist/index.js +63 -0
  128. package/dist/index.js.map +1 -0
  129. package/dist/linkedin/auth.d.ts +22 -0
  130. package/dist/linkedin/auth.d.ts.map +1 -0
  131. package/dist/linkedin/auth.js +167 -0
  132. package/dist/linkedin/auth.js.map +1 -0
  133. package/dist/linkedin/company-extractor.d.ts +36 -0
  134. package/dist/linkedin/company-extractor.d.ts.map +1 -0
  135. package/dist/linkedin/company-extractor.js +211 -0
  136. package/dist/linkedin/company-extractor.js.map +1 -0
  137. package/dist/linkedin/company-extractor.test.d.ts +2 -0
  138. package/dist/linkedin/company-extractor.test.d.ts.map +1 -0
  139. package/dist/linkedin/company-extractor.test.js +52 -0
  140. package/dist/linkedin/company-extractor.test.js.map +1 -0
  141. package/dist/linkedin/connector.d.ts +45 -0
  142. package/dist/linkedin/connector.d.ts.map +1 -0
  143. package/dist/linkedin/connector.js +245 -0
  144. package/dist/linkedin/connector.js.map +1 -0
  145. package/dist/linkedin/message-sender.d.ts +32 -0
  146. package/dist/linkedin/message-sender.d.ts.map +1 -0
  147. package/dist/linkedin/message-sender.js +112 -0
  148. package/dist/linkedin/message-sender.js.map +1 -0
  149. package/dist/linkedin/messages.d.ts +78 -0
  150. package/dist/linkedin/messages.d.ts.map +1 -0
  151. package/dist/linkedin/messages.js +745 -0
  152. package/dist/linkedin/messages.js.map +1 -0
  153. package/dist/linkedin/profile.d.ts +37 -0
  154. package/dist/linkedin/profile.d.ts.map +1 -0
  155. package/dist/linkedin/profile.js +268 -0
  156. package/dist/linkedin/profile.js.map +1 -0
  157. package/dist/linkedin/profile.test.d.ts +2 -0
  158. package/dist/linkedin/profile.test.d.ts.map +1 -0
  159. package/dist/linkedin/profile.test.js +68 -0
  160. package/dist/linkedin/profile.test.js.map +1 -0
  161. package/dist/linkedin/reply.d.ts +21 -0
  162. package/dist/linkedin/reply.d.ts.map +1 -0
  163. package/dist/linkedin/reply.js +76 -0
  164. package/dist/linkedin/reply.js.map +1 -0
  165. package/dist/linkedin/selector-engine.d.ts +69 -0
  166. package/dist/linkedin/selector-engine.d.ts.map +1 -0
  167. package/dist/linkedin/selector-engine.js +339 -0
  168. package/dist/linkedin/selector-engine.js.map +1 -0
  169. package/dist/linkedin/selector-engine.test.d.ts +2 -0
  170. package/dist/linkedin/selector-engine.test.d.ts.map +1 -0
  171. package/dist/linkedin/selector-engine.test.js +135 -0
  172. package/dist/linkedin/selector-engine.test.js.map +1 -0
  173. package/dist/linkedin/selectors.d.ts +65 -0
  174. package/dist/linkedin/selectors.d.ts.map +1 -0
  175. package/dist/linkedin/selectors.js +261 -0
  176. package/dist/linkedin/selectors.js.map +1 -0
  177. package/dist/templates/engine.d.ts +37 -0
  178. package/dist/templates/engine.d.ts.map +1 -0
  179. package/dist/templates/engine.js +215 -0
  180. package/dist/templates/engine.js.map +1 -0
  181. package/dist/templates/engine.test.d.ts +2 -0
  182. package/dist/templates/engine.test.d.ts.map +1 -0
  183. package/dist/templates/engine.test.js +212 -0
  184. package/dist/templates/engine.test.js.map +1 -0
  185. package/dist/templates/index.d.ts +2 -0
  186. package/dist/templates/index.d.ts.map +1 -0
  187. package/dist/templates/index.js +7 -0
  188. package/dist/templates/index.js.map +1 -0
  189. package/dist/types/index.d.ts +113 -0
  190. package/dist/types/index.d.ts.map +1 -0
  191. package/dist/types/index.js +3 -0
  192. package/dist/types/index.js.map +1 -0
  193. package/dist/types/index.test.d.ts +2 -0
  194. package/dist/types/index.test.d.ts.map +1 -0
  195. package/dist/types/index.test.js +90 -0
  196. package/dist/types/index.test.js.map +1 -0
  197. package/dist/utils/paths.d.ts +8 -0
  198. package/dist/utils/paths.d.ts.map +1 -0
  199. package/dist/utils/paths.js +68 -0
  200. package/dist/utils/paths.js.map +1 -0
  201. package/dist/utils/rate-limiter.d.ts +22 -0
  202. package/dist/utils/rate-limiter.d.ts.map +1 -0
  203. package/dist/utils/rate-limiter.js +57 -0
  204. package/dist/utils/rate-limiter.js.map +1 -0
  205. package/dist/utils/retry.d.ts +18 -0
  206. package/dist/utils/retry.d.ts.map +1 -0
  207. package/dist/utils/retry.js +49 -0
  208. package/dist/utils/retry.js.map +1 -0
  209. package/docs/connection-command.md +52 -0
  210. package/docs/plans/2025-03-03-linkedin-cli-design.md +280 -0
  211. package/docs/plans/2025-03-03-linkedin-cli-implementation-plan.md +2087 -0
  212. package/docs/plans/2025-03-03-linkedin-cli-implementation.md +2420 -0
  213. package/docs/plans/2026-02-19-linkedin-connection-feature.md +596 -0
  214. package/docs/plans/2026-02-28-messages-send-feature.md +480 -0
  215. package/docs/plans/2026-02-28-messages-show-design.md +243 -0
  216. package/docs/plans/2026-03-03-linkedin-cli-oss-publishing-design.md +394 -0
  217. package/docs/plans/2026-03-03-linkedin-cli-oss-publishing-plan.md +1592 -0
  218. package/docs/superpowers/plans/2026-03-13-linkedin-automation-resilience-migration.md +425 -0
  219. package/docs/superpowers/plans/2026-03-13-playwright-fara-migration.md +1112 -0
  220. package/docs/superpowers/plans/2026-03-14-page-agent-plan.md +1598 -0
  221. package/docs/superpowers/plans/2026-03-15-company-profile-extraction.md +591 -0
  222. package/docs/superpowers/plans/2026-03-15-profile-extraction-plan.md +943 -0
  223. package/docs/superpowers/specs/2026-03-14-company-profile-extraction-design.md +371 -0
  224. package/docs/superpowers/specs/2026-03-14-page-agent-design.md +385 -0
  225. package/docs/superpowers/specs/2026-03-15-profile-extraction-design.md +409 -0
  226. package/eslint.config.mjs +58 -0
  227. package/go.mod +9 -0
  228. package/go.sum +10 -0
  229. package/import-cookies.js +376 -0
  230. package/internal/cmd/actions.go +123 -0
  231. package/internal/cmd/auth.go +108 -0
  232. package/internal/cmd/connect.go +42 -0
  233. package/internal/cmd/message.go +44 -0
  234. package/internal/cmd/people.go +454 -0
  235. package/internal/cmd/profiles.go +121 -0
  236. package/internal/cmd/root.go +89 -0
  237. package/internal/cmd/sequence.go +192 -0
  238. package/internal/config/config.go +187 -0
  239. package/internal/config/config_test.go +121 -0
  240. package/internal/config/profile.go +65 -0
  241. package/internal/linkedin/navigator.go +195 -0
  242. package/internal/linkedin/selectors.go +39 -0
  243. package/internal/linkedin/validator.go +69 -0
  244. package/internal/pinchtab/client.go +183 -0
  245. package/internal/pinchtab/client_test.go +67 -0
  246. package/internal/pinchtab/types.go +50 -0
  247. package/internal/ratelimit/limiter.go +115 -0
  248. package/internal/ratelimit/limits.go +32 -0
  249. package/package.json +67 -0
  250. package/release.sh +66 -0
  251. package/scripts/debug-linkedin.js +156 -0
  252. package/scripts/debug-login.js +193 -0
  253. package/scripts/extract-from-edge.js +96 -0
  254. package/scripts/import-cookies.js +101 -0
  255. package/scripts/poc-show-data.js +205 -0
  256. package/scripts/proof-of-access.js +87 -0
  257. package/scripts/prove-connection.js +110 -0
  258. package/scripts/show-linkedin-data.js +173 -0
  259. package/src/agent/action-executor.test.ts +464 -0
  260. package/src/agent/action-executor.ts +203 -0
  261. package/src/agent/claude-client.test.ts +707 -0
  262. package/src/agent/claude-client.ts +422 -0
  263. package/src/agent/dom-extractor.test.ts +574 -0
  264. package/src/agent/dom-extractor.ts +437 -0
  265. package/src/agent/extension-client.ts +306 -0
  266. package/src/agent/index.ts +28 -0
  267. package/src/agent/page-agent.ts +292 -0
  268. package/src/agent/types.ts +288 -0
  269. package/src/cli/agent-commands.ts +274 -0
  270. package/src/cli/auth.ts +343 -0
  271. package/src/cli/company.ts +66 -0
  272. package/src/cli/connection.ts +89 -0
  273. package/src/cli/index.ts +7 -0
  274. package/src/cli/messages.ts +338 -0
  275. package/src/cli/profile.test.ts +14 -0
  276. package/src/cli/profile.ts +95 -0
  277. package/src/cli/reply.ts +110 -0
  278. package/src/core/audit.test.ts +134 -0
  279. package/src/core/audit.ts +98 -0
  280. package/src/core/browser-cookies.ts +203 -0
  281. package/src/core/browser.ts +304 -0
  282. package/src/core/config.test.ts +90 -0
  283. package/src/core/config.ts +81 -0
  284. package/src/core/storage.test.ts +129 -0
  285. package/src/core/storage.ts +100 -0
  286. package/src/index.ts +70 -0
  287. package/src/linkedin/auth.ts +218 -0
  288. package/src/linkedin/company-extractor.test.ts +58 -0
  289. package/src/linkedin/company-extractor.ts +222 -0
  290. package/src/linkedin/connector.ts +336 -0
  291. package/src/linkedin/message-sender.ts +141 -0
  292. package/src/linkedin/messages.ts +894 -0
  293. package/src/linkedin/profile.test.ts +79 -0
  294. package/src/linkedin/profile.ts +314 -0
  295. package/src/linkedin/reply.ts +96 -0
  296. package/src/linkedin/selector-engine.test.ts +167 -0
  297. package/src/linkedin/selector-engine.ts +393 -0
  298. package/src/linkedin/selectors.ts +268 -0
  299. package/src/templates/defaults/followup.txt +14 -0
  300. package/src/templates/defaults/meeting.txt +16 -0
  301. package/src/templates/defaults/welcome.txt +14 -0
  302. package/src/templates/engine.test.ts +228 -0
  303. package/src/templates/engine.ts +208 -0
  304. package/src/templates/index.ts +1 -0
  305. package/src/types/index.test.ts +94 -0
  306. package/src/types/index.ts +143 -0
  307. package/src/types/sql.js.d.ts +23 -0
  308. package/src/utils/paths.ts +33 -0
  309. package/src/utils/rate-limiter.ts +75 -0
  310. package/src/utils/retry.ts +78 -0
  311. package/test-cli.sh +85 -0
  312. package/test-real-data.sh +97 -0
  313. package/tsconfig.json +23 -0
  314. package/vitest.config.ts +35 -0
@@ -0,0 +1,371 @@
1
+ # Company Profile Extraction Feature
2
+
3
+ **Date:** 2026-03-14
4
+ **Status:** Draft
5
+ **Author:** Claude
6
+
7
+ ## Overview
8
+
9
+ Add a new `company info` command to extract structured data from LinkedIn company profile pages using Playwright selectors.
10
+
11
+ ## Command Interface
12
+
13
+ ```bash
14
+ linkedin-cli company info <url> [--headless] [--debug]
15
+ ```
16
+
17
+ ### Arguments
18
+
19
+ | Argument | Required | Description |
20
+ |----------|----------|-------------|
21
+ | `url` | Yes | LinkedIn company URL (e.g., `https://www.linkedin.com/company/openai/`) |
22
+ | `--headless` | No | Run browser in headless mode (default: false, falls back to config) |
23
+ | `--debug` | No | Enable debug mode with screenshots and HTML snapshots |
24
+
25
+ ### URL Validation
26
+
27
+ Valid pattern: `^https:\/\/www\.linkedin\.com\/company\/[^\/]+\/?$`
28
+
29
+ Requires HTTPS for security. Note: The existing `connection.ts` pattern allows HTTP, but we enforce HTTPS here for new commands.
30
+
31
+ ### Example Usage
32
+
33
+ ```bash
34
+ # Extract company info (uses config headless setting)
35
+ linkedin-cli company info https://www.linkedin.com/company/openai/
36
+
37
+ # Force headless mode
38
+ linkedin-cli company info https://www.linkedin.com/company/openai/ --headless
39
+
40
+ # Debug mode for troubleshooting
41
+ linkedin-cli company info https://www.linkedin.com/company/openai/ --debug
42
+ ```
43
+
44
+ ## Output Format
45
+
46
+ JSON to stdout with the following structure:
47
+
48
+ ```json
49
+ {
50
+ "name": "OpenAI",
51
+ "linkedin_url": "https://www.linkedin.com/company/openai/",
52
+ "website": "https://openai.com",
53
+ "industry": "Research Services",
54
+ "company_size": "1,001-5,000 employees",
55
+ "headquarters": "San Francisco, California",
56
+ "founded": "2015",
57
+ "specialties": ["Artificial Intelligence", "Machine Learning", "Research"],
58
+ "type": "Privately Held",
59
+ "follower_count": 2500000
60
+ }
61
+ ```
62
+
63
+ ### Field Specifications
64
+
65
+ | Field | Type | Source Location | Nullable |
66
+ |-------|------|-----------------|----------|
67
+ | `name` | string | Top card heading | No |
68
+ | `linkedin_url` | string | Input URL | No |
69
+ | `website` | string \| null | About section | Yes |
70
+ | `industry` | string \| null | About section | Yes |
71
+ | `company_size` | string \| null | About section | Yes |
72
+ | `headquarters` | string \| null | About section | Yes |
73
+ | `founded` | string \| null | About section | Yes |
74
+ | `specialties` | string[] \| null | About section | Yes |
75
+ | `type` | string \| null | About section | Yes |
76
+ | `follower_count` | number \| null | Sidebar/top card | Yes |
77
+
78
+ ## Architecture
79
+
80
+ ### File Structure
81
+
82
+ ```
83
+ src/
84
+ ├── cli/
85
+ │ ├── index.ts # Add: export { registerCompanyCommands }
86
+ │ └── company.ts # New: Command registration
87
+ ├── linkedin/
88
+ │ ├── selectors.ts # Modify: Add company section to SELECTORS
89
+ │ └── company-extractor.ts # New: CompanyExtractor class
90
+ └── types/
91
+ └── index.ts # Add: CompanyProfile interface
92
+ ```
93
+
94
+ ### Components
95
+
96
+ #### 1. `src/cli/company.ts`
97
+
98
+ Command registration following existing patterns (`connection.ts`, `messages.ts`):
99
+
100
+ - Validates company URL pattern
101
+ - Launches browser via `BrowserController` with headless from options or config
102
+ - **No auth required** - company pages are public (just dismiss authwall popup)
103
+ - Calls `CompanyExtractor.extract()`
104
+ - Logs action via `getAuditLogger()`
105
+ - Outputs JSON to stdout
106
+ - Handles errors with appropriate exit codes
107
+
108
+ **Browser instantiation pattern:**
109
+ ```typescript
110
+ browser = new BrowserController({
111
+ headless: options.headless !== undefined ? options.headless : config.getValue('headless'),
112
+ debug: options.debug,
113
+ });
114
+ ```
115
+
116
+ **Browser cleanup pattern:**
117
+ ```typescript
118
+ try {
119
+ // ... extraction logic
120
+ await browser.close();
121
+ } catch (error) {
122
+ // Always close browser on error
123
+ if (browser) await browser.close();
124
+ throw error;
125
+ }
126
+ ```
127
+
128
+ #### 2. `src/linkedin/company-extractor.ts`
129
+
130
+ Main extraction logic:
131
+
132
+ ```typescript
133
+ export interface CompanyProfile {
134
+ name: string;
135
+ linkedin_url: string;
136
+ website: string | null;
137
+ industry: string | null;
138
+ company_size: string | null;
139
+ headquarters: string | null;
140
+ founded: string | null;
141
+ specialties: string[] | null;
142
+ type: string | null;
143
+ follower_count: number | null;
144
+ }
145
+
146
+ export class CompanyExtractor {
147
+ constructor(private page: Page) {}
148
+
149
+ async extract(url: string): Promise<CompanyProfile>;
150
+ }
151
+ ```
152
+
153
+ ### Data Flow
154
+
155
+ ```
156
+ User Input (URL)
157
+
158
+
159
+ ┌─────────────────┐
160
+ │ URL Validation │
161
+ └────────┬────────┘
162
+
163
+
164
+ ┌─────────────────┐
165
+ │ Browser Launch │
166
+ └────────┬────────┘
167
+
168
+
169
+ ┌─────────────────┐
170
+ │ Navigate to URL │
171
+ └────────┬────────┘
172
+
173
+
174
+ ┌─────────────────┐
175
+ │Dismiss Authwall │
176
+ └────────┬────────┘
177
+
178
+
179
+ ┌─────────────────┐
180
+ │ Extract Fields │
181
+ └────────┬────────┘
182
+
183
+
184
+ ┌─────────────────┐
185
+ │ JSON Output │
186
+ └─────────────────┘
187
+ ```
188
+
189
+ **Note:** No authentication required. LinkedIn company pages are public. An authwall popup may appear but can be dismissed.
190
+
191
+ ## Selector Strategy
192
+
193
+ LinkedIn company pages store data in the About section as key-value pairs. The extractor uses multi-layer fallback selectors for resilience, matching the existing `SELECTORS` pattern in `selectors.ts`.
194
+
195
+ ### COMPANY_SELECTORS Constant
196
+
197
+ Add nested `company` section to existing `SELECTORS` in `src/linkedin/selectors.ts`:
198
+
199
+ ```typescript
200
+ // Add to existing SELECTORS object
201
+ company: {
202
+ name: [
203
+ 'h1.text-heading-xlarge',
204
+ '.org-top-card-primary-content h1',
205
+ 'section[data-testid="about-us"] h1',
206
+ ],
207
+ website: [
208
+ 'a[data-testid="website-link"]',
209
+ '.org-about-us-module__website a',
210
+ 'a[href*="company"][target="_blank"]',
211
+ ],
212
+ industry: [
213
+ 'dt:has-text("Industry") + dd',
214
+ '.org-about-company-module__dl dt:has-text("Industry") + dd',
215
+ ],
216
+ company_size: [
217
+ 'dt:has-text("Company size") + dd',
218
+ 'dt:has-text("Employees") + dd',
219
+ '.org-about-company-module__dl dt:has-text("Company size") + dd',
220
+ ],
221
+ headquarters: [
222
+ 'dt:has-text("Headquarters") + dd',
223
+ '.org-about-company-module__dl dt:has-text("Headquarters") + dd',
224
+ ],
225
+ founded: [
226
+ 'dt:has-text("Founded") + dd',
227
+ '.org-about-company-module__dl dt:has-text("Founded") + dd',
228
+ ],
229
+ specialties: [
230
+ 'dt:has-text("Specialties") + dd',
231
+ '.org-about-company-module__dl dt:has-text("Specialties") + dd',
232
+ ],
233
+ type: [
234
+ 'dt:has-text("Company type") + dd',
235
+ 'dt:has-text("Type") + dd',
236
+ '.org-about-company-module__dl dt:has-text("Company type") + dd',
237
+ ],
238
+ follower_count: [
239
+ '.org-top-card-primary-content__followers-count',
240
+ '.org-top-card-module__followers-count',
241
+ 'span:has-text("followers")',
242
+ ],
243
+ }
244
+ ```
245
+
246
+ This follows the existing nested structure (`login`, `messages`, `connection`) for consistency.
247
+
248
+ ### Fallback Strategy
249
+
250
+ 1. Try each selector in array order
251
+ 2. Use `page.locator(selector).first()` to handle multiple matches
252
+ 3. Return `null` if no selector matches
253
+
254
+ ## Parsing Rules
255
+
256
+ ### Follower Count Parsing
257
+
258
+ Convert string formats to number:
259
+
260
+ | Input | Output |
261
+ |-------|--------|
262
+ | `"2.5M followers"` | `2500000` |
263
+ | `"12K followers"` | `12000` |
264
+ | `"1,234 followers"` | `1234` |
265
+ | `"500+ followers"` | `500` |
266
+
267
+ Algorithm:
268
+ 1. Extract numeric part: `/([\d,.]+)\s*([KMkmb]?)/`
269
+ 2. Remove commas
270
+ 3. Apply multiplier: K/k = 1000, M/m = 1000000
271
+ 4. Return integer
272
+
273
+ ### Specialties Parsing
274
+
275
+ Split by comma, trim whitespace:
276
+
277
+ | Input | Output |
278
+ |-------|--------|
279
+ | `"AI, ML, Research"` | `["AI", "ML", "Research"]` |
280
+ | `"Artificial Intelligence"` | `["Artificial Intelligence"]` |
281
+
282
+ ## Error Handling
283
+
284
+ | Error | Exit Code | Message |
285
+ |-------|-----------|---------|
286
+ | Invalid URL | 1 | "Invalid company URL. Expected format: https://www.linkedin.com/company/<name>/" |
287
+ | Navigation timeout | 1 | "Failed to load company page (timeout: 30000ms)" |
288
+ | Extraction failure | 1 | "Failed to extract company data: <reason>" |
289
+ | Success | 0 | JSON output |
290
+
291
+ ## Popup Dismissal
292
+
293
+ LinkedIn may show an authwall popup on company pages. The extractor should dismiss it:
294
+
295
+ ```typescript
296
+ const DISMISS_SELECTORS = [
297
+ 'button[aria-label="Dismiss"]',
298
+ 'button[aria-label="Close"]',
299
+ '.artdeco-modal__dismiss',
300
+ 'button.artdeco-modal__dismiss',
301
+ ];
302
+
303
+ async function dismissAuthwall(page: Page): Promise<void> {
304
+ for (const selector of DISMISS_SELECTORS) {
305
+ try {
306
+ const btn = page.locator(selector).first();
307
+ if (await btn.isVisible({ timeout: 1000 })) {
308
+ await btn.click();
309
+ await page.waitForTimeout(500);
310
+ return;
311
+ }
312
+ } catch {
313
+ // Try next selector
314
+ }
315
+ }
316
+ }
317
+ ```
318
+
319
+ ### Timeout Configuration
320
+
321
+ - Navigation timeout: 30000ms (matches existing `page.goto()` calls)
322
+ - Selector wait timeout: 5000ms per field
323
+
324
+ ## Audit Logging
325
+
326
+ Log extraction attempts using existing `getAuditLogger()`:
327
+
328
+ ```typescript
329
+ // On success
330
+ logger.log('company.info', { url, fieldsExtracted: Object.keys(profile).filter(k => profile[k] !== null) }, true);
331
+
332
+ // On failure
333
+ logger.log('company.info', { url, error: error.message }, false);
334
+ ```
335
+
336
+ ## Testing Strategy
337
+
338
+ ### Unit Tests
339
+
340
+ - URL validation (valid/invalid patterns)
341
+ - Selector fallback logic
342
+ - Follower count parsing (various formats)
343
+ - Specialties parsing (comma-separated string → array)
344
+
345
+ ### Integration Tests
346
+
347
+ - Mock Playwright page with sample HTML
348
+ - Test extraction with fixture data
349
+
350
+ ### Manual Testing
351
+
352
+ ```bash
353
+ # Test with real company pages
354
+ linkedin-cli company info https://www.linkedin.com/company/openai/
355
+ linkedin-cli company info https://www.linkedin.com/company/microsoft/
356
+ ```
357
+
358
+ ## Dependencies
359
+
360
+ - Existing: `BrowserController`, `getConfig()`
361
+ - No new external dependencies required
362
+ - **No auth required** - company pages are public
363
+
364
+ ## Implementation Notes
365
+
366
+ 1. Reuse existing browser infrastructure (no auth needed)
367
+ 2. Follow existing command patterns from `connection.ts`
368
+ 3. Add `company` section to existing `SELECTORS` in `selectors.ts`
369
+ 4. Dismiss authwall popup before extraction
370
+ 5. Use `page.waitForSelector()` with timeout for reliability
371
+ 6. Implement robust parsing for follower count with K/M suffixes