linkedin-automation-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. package/.env.example +12 -0
  2. package/.github/workflows/ci.yml +66 -0
  3. package/.github/workflows/publish.yml +48 -0
  4. package/.husky/pre-commit +6 -0
  5. package/.prettierignore +4 -0
  6. package/.prettierrc +10 -0
  7. package/AGENTS.md +294 -0
  8. package/CHANGELOG.md +40 -0
  9. package/GIT_RELEASE.md +167 -0
  10. package/LICENSE +21 -0
  11. package/Makefile +30 -0
  12. package/NPM_PUBLISHING.md +230 -0
  13. package/PYEOF +0 -0
  14. package/README.md +295 -0
  15. package/TESTING-GUIDE.md +151 -0
  16. package/cmd/linkedin/main.go +9 -0
  17. package/dist/agent/action-executor.d.ts +81 -0
  18. package/dist/agent/action-executor.d.ts.map +1 -0
  19. package/dist/agent/action-executor.js +170 -0
  20. package/dist/agent/action-executor.js.map +1 -0
  21. package/dist/agent/action-executor.test.d.ts +2 -0
  22. package/dist/agent/action-executor.test.d.ts.map +1 -0
  23. package/dist/agent/action-executor.test.js +366 -0
  24. package/dist/agent/action-executor.test.js.map +1 -0
  25. package/dist/agent/claude-client.d.ts +74 -0
  26. package/dist/agent/claude-client.d.ts.map +1 -0
  27. package/dist/agent/claude-client.js +314 -0
  28. package/dist/agent/claude-client.js.map +1 -0
  29. package/dist/agent/claude-client.test.d.ts +2 -0
  30. package/dist/agent/claude-client.test.d.ts.map +1 -0
  31. package/dist/agent/claude-client.test.js +590 -0
  32. package/dist/agent/claude-client.test.js.map +1 -0
  33. package/dist/agent/dom-extractor.d.ts +50 -0
  34. package/dist/agent/dom-extractor.d.ts.map +1 -0
  35. package/dist/agent/dom-extractor.js +374 -0
  36. package/dist/agent/dom-extractor.js.map +1 -0
  37. package/dist/agent/dom-extractor.test.d.ts +7 -0
  38. package/dist/agent/dom-extractor.test.d.ts.map +1 -0
  39. package/dist/agent/dom-extractor.test.js +504 -0
  40. package/dist/agent/dom-extractor.test.js.map +1 -0
  41. package/dist/agent/extension-client.d.ts +75 -0
  42. package/dist/agent/extension-client.d.ts.map +1 -0
  43. package/dist/agent/extension-client.js +245 -0
  44. package/dist/agent/extension-client.js.map +1 -0
  45. package/dist/agent/index.d.ts +8 -0
  46. package/dist/agent/index.d.ts.map +1 -0
  47. package/dist/agent/index.js +16 -0
  48. package/dist/agent/index.js.map +1 -0
  49. package/dist/agent/page-agent.d.ts +76 -0
  50. package/dist/agent/page-agent.d.ts.map +1 -0
  51. package/dist/agent/page-agent.js +236 -0
  52. package/dist/agent/page-agent.js.map +1 -0
  53. package/dist/agent/types.d.ts +236 -0
  54. package/dist/agent/types.d.ts.map +1 -0
  55. package/dist/agent/types.js +37 -0
  56. package/dist/agent/types.js.map +1 -0
  57. package/dist/cli/agent-commands.d.ts +3 -0
  58. package/dist/cli/agent-commands.d.ts.map +1 -0
  59. package/dist/cli/agent-commands.js +250 -0
  60. package/dist/cli/agent-commands.js.map +1 -0
  61. package/dist/cli/auth.d.ts +3 -0
  62. package/dist/cli/auth.d.ts.map +1 -0
  63. package/dist/cli/auth.js +288 -0
  64. package/dist/cli/auth.js.map +1 -0
  65. package/dist/cli/company.d.ts +3 -0
  66. package/dist/cli/company.d.ts.map +1 -0
  67. package/dist/cli/company.js +55 -0
  68. package/dist/cli/company.js.map +1 -0
  69. package/dist/cli/connection.d.ts +3 -0
  70. package/dist/cli/connection.d.ts.map +1 -0
  71. package/dist/cli/connection.js +79 -0
  72. package/dist/cli/connection.js.map +1 -0
  73. package/dist/cli/index.d.ts +7 -0
  74. package/dist/cli/index.d.ts.map +1 -0
  75. package/dist/cli/index.js +17 -0
  76. package/dist/cli/index.js.map +1 -0
  77. package/dist/cli/messages.d.ts +3 -0
  78. package/dist/cli/messages.d.ts.map +1 -0
  79. package/dist/cli/messages.js +268 -0
  80. package/dist/cli/messages.js.map +1 -0
  81. package/dist/cli/profile.d.ts +3 -0
  82. package/dist/cli/profile.d.ts.map +1 -0
  83. package/dist/cli/profile.js +81 -0
  84. package/dist/cli/profile.js.map +1 -0
  85. package/dist/cli/profile.test.d.ts +2 -0
  86. package/dist/cli/profile.test.d.ts.map +1 -0
  87. package/dist/cli/profile.test.js +15 -0
  88. package/dist/cli/profile.test.js.map +1 -0
  89. package/dist/cli/reply.d.ts +3 -0
  90. package/dist/cli/reply.d.ts.map +1 -0
  91. package/dist/cli/reply.js +129 -0
  92. package/dist/cli/reply.js.map +1 -0
  93. package/dist/core/audit.d.ts +17 -0
  94. package/dist/core/audit.d.ts.map +1 -0
  95. package/dist/core/audit.js +121 -0
  96. package/dist/core/audit.js.map +1 -0
  97. package/dist/core/audit.test.d.ts +2 -0
  98. package/dist/core/audit.test.d.ts.map +1 -0
  99. package/dist/core/audit.test.js +142 -0
  100. package/dist/core/audit.test.js.map +1 -0
  101. package/dist/core/browser-cookies.d.ts +19 -0
  102. package/dist/core/browser-cookies.d.ts.map +1 -0
  103. package/dist/core/browser-cookies.js +181 -0
  104. package/dist/core/browser-cookies.js.map +1 -0
  105. package/dist/core/browser.d.ts +50 -0
  106. package/dist/core/browser.d.ts.map +1 -0
  107. package/dist/core/browser.js +318 -0
  108. package/dist/core/browser.js.map +1 -0
  109. package/dist/core/config.d.ts +20 -0
  110. package/dist/core/config.d.ts.map +1 -0
  111. package/dist/core/config.js +103 -0
  112. package/dist/core/config.js.map +1 -0
  113. package/dist/core/config.test.d.ts +2 -0
  114. package/dist/core/config.test.d.ts.map +1 -0
  115. package/dist/core/config.test.js +111 -0
  116. package/dist/core/config.test.js.map +1 -0
  117. package/dist/core/storage.d.ts +19 -0
  118. package/dist/core/storage.d.ts.map +1 -0
  119. package/dist/core/storage.js +124 -0
  120. package/dist/core/storage.js.map +1 -0
  121. package/dist/core/storage.test.d.ts +2 -0
  122. package/dist/core/storage.test.d.ts.map +1 -0
  123. package/dist/core/storage.test.js +142 -0
  124. package/dist/core/storage.test.js.map +1 -0
  125. package/dist/index.d.ts +3 -0
  126. package/dist/index.d.ts.map +1 -0
  127. package/dist/index.js +63 -0
  128. package/dist/index.js.map +1 -0
  129. package/dist/linkedin/auth.d.ts +22 -0
  130. package/dist/linkedin/auth.d.ts.map +1 -0
  131. package/dist/linkedin/auth.js +167 -0
  132. package/dist/linkedin/auth.js.map +1 -0
  133. package/dist/linkedin/company-extractor.d.ts +36 -0
  134. package/dist/linkedin/company-extractor.d.ts.map +1 -0
  135. package/dist/linkedin/company-extractor.js +211 -0
  136. package/dist/linkedin/company-extractor.js.map +1 -0
  137. package/dist/linkedin/company-extractor.test.d.ts +2 -0
  138. package/dist/linkedin/company-extractor.test.d.ts.map +1 -0
  139. package/dist/linkedin/company-extractor.test.js +52 -0
  140. package/dist/linkedin/company-extractor.test.js.map +1 -0
  141. package/dist/linkedin/connector.d.ts +45 -0
  142. package/dist/linkedin/connector.d.ts.map +1 -0
  143. package/dist/linkedin/connector.js +245 -0
  144. package/dist/linkedin/connector.js.map +1 -0
  145. package/dist/linkedin/message-sender.d.ts +32 -0
  146. package/dist/linkedin/message-sender.d.ts.map +1 -0
  147. package/dist/linkedin/message-sender.js +112 -0
  148. package/dist/linkedin/message-sender.js.map +1 -0
  149. package/dist/linkedin/messages.d.ts +78 -0
  150. package/dist/linkedin/messages.d.ts.map +1 -0
  151. package/dist/linkedin/messages.js +745 -0
  152. package/dist/linkedin/messages.js.map +1 -0
  153. package/dist/linkedin/profile.d.ts +37 -0
  154. package/dist/linkedin/profile.d.ts.map +1 -0
  155. package/dist/linkedin/profile.js +268 -0
  156. package/dist/linkedin/profile.js.map +1 -0
  157. package/dist/linkedin/profile.test.d.ts +2 -0
  158. package/dist/linkedin/profile.test.d.ts.map +1 -0
  159. package/dist/linkedin/profile.test.js +68 -0
  160. package/dist/linkedin/profile.test.js.map +1 -0
  161. package/dist/linkedin/reply.d.ts +21 -0
  162. package/dist/linkedin/reply.d.ts.map +1 -0
  163. package/dist/linkedin/reply.js +76 -0
  164. package/dist/linkedin/reply.js.map +1 -0
  165. package/dist/linkedin/selector-engine.d.ts +69 -0
  166. package/dist/linkedin/selector-engine.d.ts.map +1 -0
  167. package/dist/linkedin/selector-engine.js +339 -0
  168. package/dist/linkedin/selector-engine.js.map +1 -0
  169. package/dist/linkedin/selector-engine.test.d.ts +2 -0
  170. package/dist/linkedin/selector-engine.test.d.ts.map +1 -0
  171. package/dist/linkedin/selector-engine.test.js +135 -0
  172. package/dist/linkedin/selector-engine.test.js.map +1 -0
  173. package/dist/linkedin/selectors.d.ts +65 -0
  174. package/dist/linkedin/selectors.d.ts.map +1 -0
  175. package/dist/linkedin/selectors.js +261 -0
  176. package/dist/linkedin/selectors.js.map +1 -0
  177. package/dist/templates/engine.d.ts +37 -0
  178. package/dist/templates/engine.d.ts.map +1 -0
  179. package/dist/templates/engine.js +215 -0
  180. package/dist/templates/engine.js.map +1 -0
  181. package/dist/templates/engine.test.d.ts +2 -0
  182. package/dist/templates/engine.test.d.ts.map +1 -0
  183. package/dist/templates/engine.test.js +212 -0
  184. package/dist/templates/engine.test.js.map +1 -0
  185. package/dist/templates/index.d.ts +2 -0
  186. package/dist/templates/index.d.ts.map +1 -0
  187. package/dist/templates/index.js +7 -0
  188. package/dist/templates/index.js.map +1 -0
  189. package/dist/types/index.d.ts +113 -0
  190. package/dist/types/index.d.ts.map +1 -0
  191. package/dist/types/index.js +3 -0
  192. package/dist/types/index.js.map +1 -0
  193. package/dist/types/index.test.d.ts +2 -0
  194. package/dist/types/index.test.d.ts.map +1 -0
  195. package/dist/types/index.test.js +90 -0
  196. package/dist/types/index.test.js.map +1 -0
  197. package/dist/utils/paths.d.ts +8 -0
  198. package/dist/utils/paths.d.ts.map +1 -0
  199. package/dist/utils/paths.js +68 -0
  200. package/dist/utils/paths.js.map +1 -0
  201. package/dist/utils/rate-limiter.d.ts +22 -0
  202. package/dist/utils/rate-limiter.d.ts.map +1 -0
  203. package/dist/utils/rate-limiter.js +57 -0
  204. package/dist/utils/rate-limiter.js.map +1 -0
  205. package/dist/utils/retry.d.ts +18 -0
  206. package/dist/utils/retry.d.ts.map +1 -0
  207. package/dist/utils/retry.js +49 -0
  208. package/dist/utils/retry.js.map +1 -0
  209. package/docs/connection-command.md +52 -0
  210. package/docs/plans/2025-03-03-linkedin-cli-design.md +280 -0
  211. package/docs/plans/2025-03-03-linkedin-cli-implementation-plan.md +2087 -0
  212. package/docs/plans/2025-03-03-linkedin-cli-implementation.md +2420 -0
  213. package/docs/plans/2026-02-19-linkedin-connection-feature.md +596 -0
  214. package/docs/plans/2026-02-28-messages-send-feature.md +480 -0
  215. package/docs/plans/2026-02-28-messages-show-design.md +243 -0
  216. package/docs/plans/2026-03-03-linkedin-cli-oss-publishing-design.md +394 -0
  217. package/docs/plans/2026-03-03-linkedin-cli-oss-publishing-plan.md +1592 -0
  218. package/docs/superpowers/plans/2026-03-13-linkedin-automation-resilience-migration.md +425 -0
  219. package/docs/superpowers/plans/2026-03-13-playwright-fara-migration.md +1112 -0
  220. package/docs/superpowers/plans/2026-03-14-page-agent-plan.md +1598 -0
  221. package/docs/superpowers/plans/2026-03-15-company-profile-extraction.md +591 -0
  222. package/docs/superpowers/plans/2026-03-15-profile-extraction-plan.md +943 -0
  223. package/docs/superpowers/specs/2026-03-14-company-profile-extraction-design.md +371 -0
  224. package/docs/superpowers/specs/2026-03-14-page-agent-design.md +385 -0
  225. package/docs/superpowers/specs/2026-03-15-profile-extraction-design.md +409 -0
  226. package/eslint.config.mjs +58 -0
  227. package/go.mod +9 -0
  228. package/go.sum +10 -0
  229. package/import-cookies.js +376 -0
  230. package/internal/cmd/actions.go +123 -0
  231. package/internal/cmd/auth.go +108 -0
  232. package/internal/cmd/connect.go +42 -0
  233. package/internal/cmd/message.go +44 -0
  234. package/internal/cmd/people.go +454 -0
  235. package/internal/cmd/profiles.go +121 -0
  236. package/internal/cmd/root.go +89 -0
  237. package/internal/cmd/sequence.go +192 -0
  238. package/internal/config/config.go +187 -0
  239. package/internal/config/config_test.go +121 -0
  240. package/internal/config/profile.go +65 -0
  241. package/internal/linkedin/navigator.go +195 -0
  242. package/internal/linkedin/selectors.go +39 -0
  243. package/internal/linkedin/validator.go +69 -0
  244. package/internal/pinchtab/client.go +183 -0
  245. package/internal/pinchtab/client_test.go +67 -0
  246. package/internal/pinchtab/types.go +50 -0
  247. package/internal/ratelimit/limiter.go +115 -0
  248. package/internal/ratelimit/limits.go +32 -0
  249. package/package.json +67 -0
  250. package/release.sh +66 -0
  251. package/scripts/debug-linkedin.js +156 -0
  252. package/scripts/debug-login.js +193 -0
  253. package/scripts/extract-from-edge.js +96 -0
  254. package/scripts/import-cookies.js +101 -0
  255. package/scripts/poc-show-data.js +205 -0
  256. package/scripts/proof-of-access.js +87 -0
  257. package/scripts/prove-connection.js +110 -0
  258. package/scripts/show-linkedin-data.js +173 -0
  259. package/src/agent/action-executor.test.ts +464 -0
  260. package/src/agent/action-executor.ts +203 -0
  261. package/src/agent/claude-client.test.ts +707 -0
  262. package/src/agent/claude-client.ts +422 -0
  263. package/src/agent/dom-extractor.test.ts +574 -0
  264. package/src/agent/dom-extractor.ts +437 -0
  265. package/src/agent/extension-client.ts +306 -0
  266. package/src/agent/index.ts +28 -0
  267. package/src/agent/page-agent.ts +292 -0
  268. package/src/agent/types.ts +288 -0
  269. package/src/cli/agent-commands.ts +274 -0
  270. package/src/cli/auth.ts +343 -0
  271. package/src/cli/company.ts +66 -0
  272. package/src/cli/connection.ts +89 -0
  273. package/src/cli/index.ts +7 -0
  274. package/src/cli/messages.ts +338 -0
  275. package/src/cli/profile.test.ts +14 -0
  276. package/src/cli/profile.ts +95 -0
  277. package/src/cli/reply.ts +110 -0
  278. package/src/core/audit.test.ts +134 -0
  279. package/src/core/audit.ts +98 -0
  280. package/src/core/browser-cookies.ts +203 -0
  281. package/src/core/browser.ts +304 -0
  282. package/src/core/config.test.ts +90 -0
  283. package/src/core/config.ts +81 -0
  284. package/src/core/storage.test.ts +129 -0
  285. package/src/core/storage.ts +100 -0
  286. package/src/index.ts +70 -0
  287. package/src/linkedin/auth.ts +218 -0
  288. package/src/linkedin/company-extractor.test.ts +58 -0
  289. package/src/linkedin/company-extractor.ts +222 -0
  290. package/src/linkedin/connector.ts +336 -0
  291. package/src/linkedin/message-sender.ts +141 -0
  292. package/src/linkedin/messages.ts +894 -0
  293. package/src/linkedin/profile.test.ts +79 -0
  294. package/src/linkedin/profile.ts +314 -0
  295. package/src/linkedin/reply.ts +96 -0
  296. package/src/linkedin/selector-engine.test.ts +167 -0
  297. package/src/linkedin/selector-engine.ts +393 -0
  298. package/src/linkedin/selectors.ts +268 -0
  299. package/src/templates/defaults/followup.txt +14 -0
  300. package/src/templates/defaults/meeting.txt +16 -0
  301. package/src/templates/defaults/welcome.txt +14 -0
  302. package/src/templates/engine.test.ts +228 -0
  303. package/src/templates/engine.ts +208 -0
  304. package/src/templates/index.ts +1 -0
  305. package/src/types/index.test.ts +94 -0
  306. package/src/types/index.ts +143 -0
  307. package/src/types/sql.js.d.ts +23 -0
  308. package/src/utils/paths.ts +33 -0
  309. package/src/utils/rate-limiter.ts +75 -0
  310. package/src/utils/retry.ts +78 -0
  311. package/test-cli.sh +85 -0
  312. package/test-real-data.sh +97 -0
  313. package/tsconfig.json +23 -0
  314. package/vitest.config.ts +35 -0
@@ -0,0 +1,1112 @@
1
+ # Playwright MCP + Fara-7B Migration Plan
2
+
3
+ > **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking.
4
+
5
+ **Goal:** Replace PinchTab with Playwright MCP + Fara-7B vision model so the CLI can visually identify and click the correct Connect button on LinkedIn profiles — solving the 9-button disambiguation problem.
6
+
7
+ **Architecture:** Go CLI orchestrates two HTTP services: (1) Playwright MCP server (`npx @playwright/mcp@latest --port 3000 --caps vision`) for browser control (navigate, screenshot, click-at-coordinates), and (2) Fara-7B served via Ollama for visual target identification (screenshot → pixel coordinates). The CLI takes a screenshot, sends it to Fara asking "where is the Connect button?", gets back (x,y) coordinates, and tells Playwright to click there.
8
+
9
+ **Tech Stack:** Go 1.21+, Playwright MCP (Node.js, via npx), Ollama + Fara-7B GGUF (local vision model), Cobra CLI (existing)
10
+
11
+ **Hardware:** Apple Silicon M1/M2 16GB — Fara-7B Q4_K_M (~5GB RAM, ~5-8s per inference)
12
+
13
+ ---
14
+
15
+ ## Why This Migration
16
+
17
+ The current PinchTab-based approach has a **fundamental limitation**: its snapshot model provides only `{role, name, ref}` per DOM node — **no coordinates, no position, no bounding boxes**. When LinkedIn shows 9 "Connect" buttons (1 for the profile, 8 for recommended people), the code returns the first DOM match, which is often the wrong button.
18
+
19
+ This is unsolvable with DOM heuristics. A human solves it by **looking at the page**. Fara-7B does the same — it takes a screenshot and returns pixel coordinates for the correct button.
20
+
21
+ ## File Structure
22
+
23
+ ### New files
24
+ - `internal/playwright/client.go` — Playwright MCP HTTP client (navigate, screenshot, click, type)
25
+ - `internal/playwright/types.go` — MCP request/response types
26
+ - `internal/vision/client.go` — Fara-7B OpenAI-compatible API client
27
+ - `internal/vision/types.go` — Vision request/response types
28
+ - `internal/browser/browser.go` — Browser interface combining Playwright + Vision
29
+ - `internal/browser/actions.go` — High-level actions: ConnectToProfile, SendMessage, etc.
30
+
31
+ ### Modified files
32
+ - `internal/cmd/root.go` — Add flags for Playwright/Fara endpoints
33
+ - `internal/cmd/connect.go` — Use new browser actions instead of PinchTab navigator
34
+ - `internal/cmd/message.go` — Same
35
+ - `internal/cmd/auth.go` — Use Playwright MCP for auth flow
36
+ - `internal/cmd/actions.go` — Update to use new browser actions
37
+ - `go.mod` — No new Go dependencies needed (only stdlib HTTP)
38
+ - `README.md` — New prerequisites (Ollama, Fara-7B, Playwright MCP)
39
+
40
+ ### Preserved (unchanged)
41
+ - `internal/config/` — Profile management
42
+ - `internal/ratelimit/` — Rate limiting
43
+ - `internal/linkedin/validator.go` — URL validation
44
+ - `cmd/linkedin/main.go` — Entry point
45
+
46
+ ### Deprecated (remove after migration)
47
+ - `internal/pinchtab/` — Entire package
48
+ - `internal/linkedin/navigator.go` — PinchTab-based navigator
49
+ - `internal/linkedin/selectors.go` — CSS selectors (no longer needed)
50
+ - `internal/linkedin/detector.go` — DOM-based detection (no longer needed)
51
+
52
+ ---
53
+
54
+ ## Chunk 1: Playwright MCP Client
55
+
56
+ ### Task 1.1: Playwright MCP Types
57
+
58
+ **Files:**
59
+ - Create: `internal/playwright/types.go`
60
+
61
+ - [ ] **Step 1: Define MCP JSON-RPC types for Playwright server communication**
62
+
63
+ The Playwright MCP server in HTTP mode (--port) uses the MCP Streamable HTTP transport.
64
+ We need types for: tool calls (navigate, screenshot, click) and their responses.
65
+
66
+ ```go
67
+ package playwright
68
+
69
+ // MCPRequest is a JSON-RPC 2.0 request for MCP tool calls
70
+ type MCPRequest struct {
71
+ JSONRPC string `json:"jsonrpc"`
72
+ ID int `json:"id"`
73
+ Method string `json:"method"`
74
+ Params interface{} `json:"params"`
75
+ }
76
+
77
+ // MCPResponse is a JSON-RPC 2.0 response
78
+ type MCPResponse struct {
79
+ JSONRPC string `json:"jsonrpc"`
80
+ ID int `json:"id"`
81
+ Result json.RawMessage `json:"result,omitempty"`
82
+ Error *MCPError `json:"error,omitempty"`
83
+ }
84
+
85
+ // MCPError represents a JSON-RPC error
86
+ type MCPError struct {
87
+ Code int `json:"code"`
88
+ Message string `json:"message"`
89
+ }
90
+
91
+ // ToolCallParams wraps tool name and arguments for MCP tools/call
92
+ type ToolCallParams struct {
93
+ Name string `json:"name"`
94
+ Arguments map[string]interface{} `json:"arguments,omitempty"`
95
+ }
96
+
97
+ // ToolResult contains the tool execution result
98
+ type ToolResult struct {
99
+ Content []ContentBlock `json:"content"`
100
+ IsError bool `json:"isError,omitempty"`
101
+ }
102
+
103
+ // ContentBlock is a text or image block in tool results
104
+ type ContentBlock struct {
105
+ Type string `json:"type"`
106
+ Text string `json:"text,omitempty"`
107
+ Data string `json:"data,omitempty"`
108
+ MimeType string `json:"mimeType,omitempty"`
109
+ }
110
+ ```
111
+
112
+ - [ ] **Step 2: Verify compilation**
113
+
114
+ Run: `go build ./internal/playwright/`
115
+ Expected: no errors
116
+
117
+ - [ ] **Step 3: Commit**
118
+
119
+ ```bash
120
+ git add internal/playwright/types.go
121
+ git commit -m "feat: add Playwright MCP JSON-RPC types"
122
+ ```
123
+
124
+ ### Task 1.2: Playwright MCP Client
125
+
126
+ **Files:**
127
+ - Create: `internal/playwright/client.go`
128
+ - Test: `internal/playwright/client_test.go`
129
+
130
+ - [ ] **Step 1: Write test for client initialization and tool call construction**
131
+
132
+ ```go
133
+ package playwright
134
+
135
+ import (
136
+ "testing"
137
+ )
138
+
139
+ func TestNewClient(t *testing.T) {
140
+ c := NewClient("http://localhost:3000")
141
+ if c.baseURL != "http://localhost:3000" {
142
+ t.Errorf("expected base URL http://localhost:3000, got %s", c.baseURL)
143
+ }
144
+ }
145
+
146
+ func TestBuildToolCallRequest(t *testing.T) {
147
+ c := NewClient("http://localhost:3000")
148
+ req := c.buildToolCall("browser_navigate", map[string]interface{}{
149
+ "url": "https://linkedin.com",
150
+ })
151
+ if req.Method != "tools/call" {
152
+ t.Errorf("expected method tools/call, got %s", req.Method)
153
+ }
154
+ }
155
+ ```
156
+
157
+ - [ ] **Step 2: Run test to verify it fails**
158
+
159
+ Run: `go test -v -run TestNewClient ./internal/playwright/`
160
+ Expected: FAIL — NewClient not defined
161
+
162
+ - [ ] **Step 3: Implement Client struct with Navigate, Screenshot, Click methods**
163
+
164
+ ```go
165
+ package playwright
166
+
167
+ import (
168
+ "bytes"
169
+ "encoding/json"
170
+ "fmt"
171
+ "io"
172
+ "net/http"
173
+ "sync"
174
+ "time"
175
+ )
176
+
177
+ // Client communicates with Playwright MCP server over HTTP
178
+ type Client struct {
179
+ baseURL string
180
+ sessionID string
181
+ client *http.Client
182
+ mu sync.Mutex
183
+ nextID int
184
+ }
185
+
186
+ // NewClient creates a Playwright MCP client
187
+ func NewClient(baseURL string) *Client {
188
+ return &Client{
189
+ baseURL: baseURL,
190
+ client: &http.Client{
191
+ Timeout: 60 * time.Second,
192
+ },
193
+ nextID: 1,
194
+ }
195
+ }
196
+
197
+ func (c *Client) buildToolCall(name string, args map[string]interface{}) *MCPRequest {
198
+ c.mu.Lock()
199
+ id := c.nextID
200
+ c.nextID++
201
+ c.mu.Unlock()
202
+
203
+ return &MCPRequest{
204
+ JSONRPC: "2.0",
205
+ ID: id,
206
+ Method: "tools/call",
207
+ Params: ToolCallParams{
208
+ Name: name,
209
+ Arguments: args,
210
+ },
211
+ }
212
+ }
213
+
214
+ func (c *Client) callTool(name string, args map[string]interface{}) (*ToolResult, error) {
215
+ req := c.buildToolCall(name, args)
216
+ body, err := json.Marshal(req)
217
+ if err != nil {
218
+ return nil, fmt.Errorf("failed to marshal request: %w", err)
219
+ }
220
+
221
+ httpReq, err := http.NewRequest("POST", c.baseURL+"/mcp", bytes.NewReader(body))
222
+ if err != nil {
223
+ return nil, fmt.Errorf("failed to create request: %w", err)
224
+ }
225
+ httpReq.Header.Set("Content-Type", "application/json")
226
+ httpReq.Header.Set("Accept", "application/json")
227
+ if c.sessionID != "" {
228
+ httpReq.Header.Set("Mcp-Session-Id", c.sessionID)
229
+ }
230
+
231
+ resp, err := c.client.Do(httpReq)
232
+ if err != nil {
233
+ return nil, fmt.Errorf("failed to call tool %s: %w", name, err)
234
+ }
235
+ defer resp.Body.Close()
236
+
237
+ // Capture session ID from response
238
+ if sid := resp.Header.Get("Mcp-Session-Id"); sid != "" {
239
+ c.sessionID = sid
240
+ }
241
+
242
+ respBody, err := io.ReadAll(resp.Body)
243
+ if err != nil {
244
+ return nil, fmt.Errorf("failed to read response: %w", err)
245
+ }
246
+
247
+ var mcpResp MCPResponse
248
+ if err := json.Unmarshal(respBody, &mcpResp); err != nil {
249
+ return nil, fmt.Errorf("failed to parse response: %w", err)
250
+ }
251
+
252
+ if mcpResp.Error != nil {
253
+ return nil, fmt.Errorf("MCP error %d: %s", mcpResp.Error.Code, mcpResp.Error.Message)
254
+ }
255
+
256
+ var result ToolResult
257
+ if err := json.Unmarshal(mcpResp.Result, &result); err != nil {
258
+ return nil, fmt.Errorf("failed to parse tool result: %w", err)
259
+ }
260
+
261
+ return &result, nil
262
+ }
263
+
264
+ // Navigate loads a URL in the browser
265
+ func (c *Client) Navigate(url string) error {
266
+ _, err := c.callTool("browser_navigate", map[string]interface{}{
267
+ "url": url,
268
+ })
269
+ return err
270
+ }
271
+
272
+ // Screenshot takes a screenshot and returns base64-encoded PNG
273
+ func (c *Client) Screenshot() (string, error) {
274
+ result, err := c.callTool("browser_take_screenshot", nil)
275
+ if err != nil {
276
+ return "", err
277
+ }
278
+
279
+ for _, block := range result.Content {
280
+ if block.Type == "image" && block.Data != "" {
281
+ return block.Data, nil
282
+ }
283
+ }
284
+
285
+ return "", fmt.Errorf("no image data in screenshot response")
286
+ }
287
+
288
+ // Click clicks at specific pixel coordinates (vision mode)
289
+ func (c *Client) Click(x, y int) error {
290
+ _, err := c.callTool("browser_click", map[string]interface{}{
291
+ "element": fmt.Sprintf("coordinate [%d, %d]", x, y),
292
+ "ref": fmt.Sprintf("coord_%d_%d", x, y),
293
+ })
294
+ return err
295
+ }
296
+
297
+ // Snapshot takes an accessibility tree snapshot (for fallback/verification)
298
+ func (c *Client) Snapshot() (string, error) {
299
+ result, err := c.callTool("browser_snapshot", nil)
300
+ if err != nil {
301
+ return "", err
302
+ }
303
+
304
+ for _, block := range result.Content {
305
+ if block.Type == "text" {
306
+ return block.Text, nil
307
+ }
308
+ }
309
+
310
+ return "", fmt.Errorf("no text in snapshot response")
311
+ }
312
+
313
+ // Type types text into the focused element
314
+ func (c *Client) Type(text string) error {
315
+ _, err := c.callTool("browser_type", map[string]interface{}{
316
+ "text": text,
317
+ })
318
+ return err
319
+ }
320
+
321
+ // WaitForText waits for specific text to appear on page
322
+ func (c *Client) WaitForText(text string, timeoutMs int) error {
323
+ _, err := c.callTool("browser_wait_for_text", map[string]interface{}{
324
+ "text": text,
325
+ "timeout": timeoutMs,
326
+ })
327
+ return err
328
+ }
329
+ ```
330
+
331
+ - [ ] **Step 4: Run tests**
332
+
333
+ Run: `go test -v ./internal/playwright/`
334
+ Expected: PASS
335
+
336
+ - [ ] **Step 5: Commit**
337
+
338
+ ```bash
339
+ git add internal/playwright/
340
+ git commit -m "feat: add Playwright MCP HTTP client"
341
+ ```
342
+
343
+ ---
344
+
345
+ ## Chunk 2: Fara-7B Vision Client
346
+
347
+ ### Task 2.1: Vision Types
348
+
349
+ **Files:**
350
+ - Create: `internal/vision/types.go`
351
+
352
+ - [ ] **Step 1: Define OpenAI-compatible types for Fara-7B communication**
353
+
354
+ ```go
355
+ package vision
356
+
357
+ // ChatRequest is an OpenAI-compatible chat completion request
358
+ type ChatRequest struct {
359
+ Model string `json:"model"`
360
+ Messages []Message `json:"messages"`
361
+ }
362
+
363
+ // Message contains role and content for chat
364
+ type Message struct {
365
+ Role string `json:"role"`
366
+ Content []Content `json:"content"`
367
+ }
368
+
369
+ // Content is a text or image block
370
+ type Content struct {
371
+ Type string `json:"type"`
372
+ Text string `json:"text,omitempty"`
373
+ ImageURL *ImageURL `json:"image_url,omitempty"`
374
+ }
375
+
376
+ // ImageURL wraps a base64 data URL
377
+ type ImageURL struct {
378
+ URL string `json:"url"`
379
+ }
380
+
381
+ // ChatResponse is the Ollama/OpenAI chat completion response
382
+ type ChatResponse struct {
383
+ Choices []Choice `json:"choices"`
384
+ }
385
+
386
+ // Choice contains a message from the model
387
+ type Choice struct {
388
+ Message ResponseMessage `json:"message"`
389
+ }
390
+
391
+ // ResponseMessage is the model's response
392
+ type ResponseMessage struct {
393
+ Content string `json:"content"`
394
+ ToolCalls []ToolCall `json:"tool_calls,omitempty"`
395
+ }
396
+
397
+ // ToolCall represents a function call from Fara
398
+ type ToolCall struct {
399
+ Function FunctionCall `json:"function"`
400
+ }
401
+
402
+ // FunctionCall contains the action details
403
+ type FunctionCall struct {
404
+ Name string `json:"name"`
405
+ Arguments string `json:"arguments"`
406
+ }
407
+
408
+ // FaraAction is the parsed action from Fara's response
409
+ type FaraAction struct {
410
+ Action string `json:"action"`
411
+ Coordinate [2]int `json:"coordinate"`
412
+ Text string `json:"text,omitempty"`
413
+ }
414
+ ```
415
+
416
+ - [ ] **Step 2: Verify compilation**
417
+
418
+ Run: `go build ./internal/vision/`
419
+ Expected: no errors
420
+
421
+ - [ ] **Step 3: Commit**
422
+
423
+ ```bash
424
+ git add internal/vision/types.go
425
+ git commit -m "feat: add Fara-7B vision model types"
426
+ ```
427
+
428
+ ### Task 2.2: Vision Client
429
+
430
+ **Files:**
431
+ - Create: `internal/vision/client.go`
432
+ - Test: `internal/vision/client_test.go`
433
+
434
+ - [ ] **Step 1: Write test for prompt construction and coordinate parsing**
435
+
436
+ ```go
437
+ package vision
438
+
439
+ import (
440
+ "testing"
441
+ )
442
+
443
+ func TestNewClient(t *testing.T) {
444
+ c := NewClient("http://localhost:11434", "fara-7b")
445
+ if c.baseURL != "http://localhost:11434" {
446
+ t.Errorf("unexpected base URL: %s", c.baseURL)
447
+ }
448
+ if c.model != "fara-7b" {
449
+ t.Errorf("unexpected model: %s", c.model)
450
+ }
451
+ }
452
+
453
+ func TestParseCoordinates(t *testing.T) {
454
+ tests := []struct {
455
+ name string
456
+ input string
457
+ wantX int
458
+ wantY int
459
+ wantErr bool
460
+ }{
461
+ {
462
+ name: "tool call format",
463
+ input: `{"action": "left_click", "coordinate": [850, 120]}`,
464
+ wantX: 850, wantY: 120,
465
+ },
466
+ {
467
+ name: "coordinate in text",
468
+ input: `I will click at coordinate [423, 256]`,
469
+ wantX: 423, wantY: 256,
470
+ },
471
+ {
472
+ name: "no coordinate",
473
+ input: "I don't see a Connect button",
474
+ wantErr: true,
475
+ },
476
+ }
477
+
478
+ for _, tt := range tests {
479
+ t.Run(tt.name, func(t *testing.T) {
480
+ x, y, err := parseCoordinates(tt.input)
481
+ if tt.wantErr {
482
+ if err == nil {
483
+ t.Error("expected error, got nil")
484
+ }
485
+ return
486
+ }
487
+ if err != nil {
488
+ t.Fatalf("unexpected error: %v", err)
489
+ }
490
+ if x != tt.wantX || y != tt.wantY {
491
+ t.Errorf("got (%d, %d), want (%d, %d)", x, y, tt.wantX, tt.wantY)
492
+ }
493
+ })
494
+ }
495
+ }
496
+ ```
497
+
498
+ - [ ] **Step 2: Run test to verify it fails**
499
+
500
+ Run: `go test -v -run TestParseCoordinates ./internal/vision/`
501
+ Expected: FAIL
502
+
503
+ - [ ] **Step 3: Implement vision client**
504
+
505
+ ```go
506
+ package vision
507
+
508
+ import (
509
+ "bytes"
510
+ "encoding/json"
511
+ "fmt"
512
+ "io"
513
+ "net/http"
514
+ "regexp"
515
+ "strconv"
516
+ "time"
517
+ )
518
+
519
+ // Client communicates with Fara-7B via OpenAI-compatible API
520
+ type Client struct {
521
+ baseURL string
522
+ model string
523
+ client *http.Client
524
+ }
525
+
526
+ // NewClient creates a vision client for Fara-7B
527
+ func NewClient(baseURL string, model string) *Client {
528
+ return &Client{
529
+ baseURL: baseURL,
530
+ model: model,
531
+ client: &http.Client{
532
+ Timeout: 120 * time.Second, // Vision inference can be slow
533
+ },
534
+ }
535
+ }
536
+
537
+ // IdentifyElement sends a screenshot to Fara and gets click coordinates
538
+ func (c *Client) IdentifyElement(screenshotBase64 string, task string) (int, int, error) {
539
+ req := ChatRequest{
540
+ Model: c.model,
541
+ Messages: []Message{
542
+ {
543
+ Role: "user",
544
+ Content: []Content{
545
+ {Type: "text", Text: task},
546
+ {
547
+ Type: "image_url",
548
+ ImageURL: &ImageURL{
549
+ URL: "data:image/png;base64," + screenshotBase64,
550
+ },
551
+ },
552
+ },
553
+ },
554
+ },
555
+ }
556
+
557
+ body, err := json.Marshal(req)
558
+ if err != nil {
559
+ return 0, 0, fmt.Errorf("failed to marshal vision request: %w", err)
560
+ }
561
+
562
+ resp, err := c.client.Post(
563
+ c.baseURL+"/v1/chat/completions",
564
+ "application/json",
565
+ bytes.NewReader(body),
566
+ )
567
+ if err != nil {
568
+ return 0, 0, fmt.Errorf("failed to call vision model: %w", err)
569
+ }
570
+ defer resp.Body.Close()
571
+
572
+ respBody, err := io.ReadAll(resp.Body)
573
+ if err != nil {
574
+ return 0, 0, fmt.Errorf("failed to read vision response: %w", err)
575
+ }
576
+
577
+ if resp.StatusCode != http.StatusOK {
578
+ return 0, 0, fmt.Errorf("vision model returned %d: %s", resp.StatusCode, string(respBody))
579
+ }
580
+
581
+ var chatResp ChatResponse
582
+ if err := json.Unmarshal(respBody, &chatResp); err != nil {
583
+ return 0, 0, fmt.Errorf("failed to parse vision response: %w", err)
584
+ }
585
+
586
+ if len(chatResp.Choices) == 0 {
587
+ return 0, 0, fmt.Errorf("vision model returned no choices")
588
+ }
589
+
590
+ content := chatResp.Choices[0].Message.Content
591
+ return parseCoordinates(content)
592
+ }
593
+
594
+ // VerifyState sends a screenshot and asks a yes/no question about page state
595
+ func (c *Client) VerifyState(screenshotBase64 string, question string) (string, error) {
596
+ req := ChatRequest{
597
+ Model: c.model,
598
+ Messages: []Message{
599
+ {
600
+ Role: "user",
601
+ Content: []Content{
602
+ {Type: "text", Text: question + " Answer concisely."},
603
+ {
604
+ Type: "image_url",
605
+ ImageURL: &ImageURL{
606
+ URL: "data:image/png;base64," + screenshotBase64,
607
+ },
608
+ },
609
+ },
610
+ },
611
+ },
612
+ }
613
+
614
+ body, err := json.Marshal(req)
615
+ if err != nil {
616
+ return "", fmt.Errorf("failed to marshal: %w", err)
617
+ }
618
+
619
+ resp, err := c.client.Post(
620
+ c.baseURL+"/v1/chat/completions",
621
+ "application/json",
622
+ bytes.NewReader(body),
623
+ )
624
+ if err != nil {
625
+ return "", fmt.Errorf("failed to call vision model: %w", err)
626
+ }
627
+ defer resp.Body.Close()
628
+
629
+ respBody, err := io.ReadAll(resp.Body)
630
+ if err != nil {
631
+ return "", err
632
+ }
633
+
634
+ var chatResp ChatResponse
635
+ if err := json.Unmarshal(respBody, &chatResp); err != nil {
636
+ return "", err
637
+ }
638
+
639
+ if len(chatResp.Choices) == 0 {
640
+ return "", fmt.Errorf("no choices in response")
641
+ }
642
+
643
+ return chatResp.Choices[0].Message.Content, nil
644
+ }
645
+
646
+ // Health checks if the vision model is available
647
+ func (c *Client) Health() error {
648
+ resp, err := c.client.Get(c.baseURL + "/v1/models")
649
+ if err != nil {
650
+ return fmt.Errorf("vision model not reachable: %w", err)
651
+ }
652
+ defer resp.Body.Close()
653
+
654
+ if resp.StatusCode != http.StatusOK {
655
+ return fmt.Errorf("vision model returned status %d", resp.StatusCode)
656
+ }
657
+ return nil
658
+ }
659
+
660
+ var coordRegex = regexp.MustCompile(`\[(\d+),\s*(\d+)\]`)
661
+
662
+ func parseCoordinates(text string) (int, int, error) {
663
+ matches := coordRegex.FindStringSubmatch(text)
664
+ if len(matches) < 3 {
665
+ return 0, 0, fmt.Errorf("no coordinates found in response: %s", text)
666
+ }
667
+
668
+ x, err := strconv.Atoi(matches[1])
669
+ if err != nil {
670
+ return 0, 0, fmt.Errorf("invalid x coordinate: %w", err)
671
+ }
672
+
673
+ y, err := strconv.Atoi(matches[2])
674
+ if err != nil {
675
+ return 0, 0, fmt.Errorf("invalid y coordinate: %w", err)
676
+ }
677
+
678
+ return x, y, nil
679
+ }
680
+ ```
681
+
682
+ - [ ] **Step 4: Run tests**
683
+
684
+ Run: `go test -v ./internal/vision/`
685
+ Expected: PASS
686
+
687
+ - [ ] **Step 5: Commit**
688
+
689
+ ```bash
690
+ git add internal/vision/
691
+ git commit -m "feat: add Fara-7B vision client with coordinate parsing"
692
+ ```
693
+
694
+ ---
695
+
696
+ ## Chunk 3: Browser Actions (Combining Playwright + Vision)
697
+
698
+ ### Task 3.1: Browser Interface
699
+
700
+ **Files:**
701
+ - Create: `internal/browser/browser.go`
702
+ - Create: `internal/browser/actions.go`
703
+ - Test: `internal/browser/actions_test.go`
704
+
705
+ - [ ] **Step 1: Define Browser struct that combines Playwright + Vision clients**
706
+
707
+ ```go
708
+ package browser
709
+
710
+ import (
711
+ "fmt"
712
+ "time"
713
+
714
+ "github.com/thaddeus-git/linkedin-cli/internal/playwright"
715
+ "github.com/thaddeus-git/linkedin-cli/internal/vision"
716
+ )
717
+
718
+ // Browser combines Playwright (hands) and Vision (eyes) for web automation
719
+ type Browser struct {
720
+ pw *playwright.Client
721
+ vis *vision.Client
722
+ dryRun bool
723
+ }
724
+
725
+ // NewBrowser creates a browser with both automation and vision capabilities
726
+ func NewBrowser(playwrightURL, visionURL, visionModel string, dryRun bool) *Browser {
727
+ return &Browser{
728
+ pw: playwright.NewClient(playwrightURL),
729
+ vis: vision.NewClient(visionURL, visionModel),
730
+ dryRun: dryRun,
731
+ }
732
+ }
733
+
734
+ // Health checks both Playwright MCP and Vision model are available
735
+ func (b *Browser) Health() error {
736
+ // TODO: Add Playwright health check (initialize/list tools)
737
+ if err := b.vis.Health(); err != nil {
738
+ return fmt.Errorf("vision model: %w", err)
739
+ }
740
+ return nil
741
+ }
742
+ ```
743
+
744
+ - [ ] **Step 2: Implement ConnectToProfile action — the core flow**
745
+
746
+ ```go
747
+ package browser
748
+
749
+ // ConnectResult describes the outcome of a connection attempt
750
+ type ConnectResult struct {
751
+ Success bool
752
+ Status string // "sent", "pending", "already_connected", "not_found", "error"
753
+ Message string
754
+ Screenshots []string // base64 screenshots for debugging
755
+ }
756
+
757
+ // ConnectToProfile navigates to a profile and clicks Connect
758
+ func (b *Browser) ConnectToProfile(profileURL string, note string) (*ConnectResult, error) {
759
+ result := &ConnectResult{}
760
+
761
+ if b.dryRun {
762
+ result.Success = true
763
+ result.Status = "dry_run"
764
+ result.Message = fmt.Sprintf("Would connect to %s", profileURL)
765
+ return result, nil
766
+ }
767
+
768
+ // Step 1: Navigate to profile
769
+ if err := b.pw.Navigate(profileURL); err != nil {
770
+ return nil, fmt.Errorf("failed to navigate to profile: %w", err)
771
+ }
772
+ time.Sleep(3 * time.Second) // Wait for page load
773
+
774
+ // Step 2: Take screenshot
775
+ screenshot, err := b.pw.Screenshot()
776
+ if err != nil {
777
+ return nil, fmt.Errorf("failed to take screenshot: %w", err)
778
+ }
779
+ result.Screenshots = append(result.Screenshots, screenshot)
780
+
781
+ // Step 3: Ask Fara to find the Connect button
782
+ x, y, err := b.vis.IdentifyElement(screenshot,
783
+ "Click the Connect button for this person's LinkedIn profile. "+
784
+ "The Connect button is in the main profile header area near the profile photo, "+
785
+ "NOT in the 'People also viewed' or 'People you may know' sidebar sections.")
786
+ if err != nil {
787
+ // Connect button might be hidden under "More" menu
788
+ return b.tryMoreMenuConnect(screenshot, note, result)
789
+ }
790
+
791
+ // Step 4: Click at the identified coordinates
792
+ if err := b.pw.Click(x, y); err != nil {
793
+ return nil, fmt.Errorf("failed to click Connect at (%d, %d): %w", x, y, err)
794
+ }
795
+ time.Sleep(2 * time.Second)
796
+
797
+ // Step 5: Handle optional note and Send confirmation
798
+ return b.handleConnectDialog(note, result)
799
+ }
800
+
801
+ // tryMoreMenuConnect handles the case where Connect is hidden under "... More"
802
+ func (b *Browser) tryMoreMenuConnect(screenshot string, note string, result *ConnectResult) (*ConnectResult, error) {
803
+ // Ask Fara to find the "More" button
804
+ x, y, err := b.vis.IdentifyElement(screenshot,
805
+ "Click the 'More' or '...' button in the profile header area. "+
806
+ "This is typically a three-dot menu button near the profile actions.")
807
+ if err != nil {
808
+ result.Status = "not_found"
809
+ result.Message = "Connect button not found on this profile"
810
+ return result, nil
811
+ }
812
+
813
+ // Click More menu
814
+ if err := b.pw.Click(x, y); err != nil {
815
+ return nil, fmt.Errorf("failed to click More menu: %w", err)
816
+ }
817
+ time.Sleep(1500 * time.Millisecond)
818
+
819
+ // Take new screenshot after menu opens
820
+ screenshot2, err := b.pw.Screenshot()
821
+ if err != nil {
822
+ return nil, fmt.Errorf("failed to screenshot after More menu: %w", err)
823
+ }
824
+ result.Screenshots = append(result.Screenshots, screenshot2)
825
+
826
+ // Now find Connect in the opened menu
827
+ x, y, err = b.vis.IdentifyElement(screenshot2,
828
+ "Click the Connect option in the dropdown menu that is currently open.")
829
+ if err != nil {
830
+ result.Status = "not_found"
831
+ result.Message = "Connect not found in More menu either"
832
+ return result, nil
833
+ }
834
+
835
+ if err := b.pw.Click(x, y); err != nil {
836
+ return nil, fmt.Errorf("failed to click Connect in menu: %w", err)
837
+ }
838
+ time.Sleep(2 * time.Second)
839
+
840
+ return b.handleConnectDialog(note, result)
841
+ }
842
+
843
+ // handleConnectDialog handles the note input and Send confirmation dialog
844
+ func (b *Browser) handleConnectDialog(note string, result *ConnectResult) (*ConnectResult, error) {
845
+ // Take screenshot to see if a dialog appeared
846
+ screenshot, err := b.pw.Screenshot()
847
+ if err != nil {
848
+ return nil, fmt.Errorf("failed to screenshot dialog: %w", err)
849
+ }
850
+ result.Screenshots = append(result.Screenshots, screenshot)
851
+
852
+ // Check if there's a note input and we want to add a note
853
+ if note != "" {
854
+ x, y, err := b.vis.IdentifyElement(screenshot,
855
+ "Click the 'Add a note' button if visible, or find the text input field for adding a personal note.")
856
+ if err == nil {
857
+ if err := b.pw.Click(x, y); err == nil {
858
+ time.Sleep(500 * time.Millisecond)
859
+ b.pw.Type(note)
860
+ time.Sleep(500 * time.Millisecond)
861
+ }
862
+ }
863
+ }
864
+
865
+ // Take screenshot and find Send button
866
+ screenshot, err = b.pw.Screenshot()
867
+ if err != nil {
868
+ return nil, err
869
+ }
870
+
871
+ x, y, err := b.vis.IdentifyElement(screenshot,
872
+ "Click the 'Send' button to send the connection request. "+
873
+ "This is the primary action button in the dialog/modal.")
874
+ if err == nil {
875
+ if err := b.pw.Click(x, y); err != nil {
876
+ return nil, fmt.Errorf("failed to click Send: %w", err)
877
+ }
878
+ time.Sleep(2 * time.Second)
879
+ }
880
+
881
+ // Step 6: Verify the connection was sent
882
+ return b.verifyConnectSent(result)
883
+ }
884
+
885
+ // verifyConnectSent checks if the connection request was actually sent
886
+ func (b *Browser) verifyConnectSent(result *ConnectResult) (*ConnectResult, error) {
887
+ screenshot, err := b.pw.Screenshot()
888
+ if err != nil {
889
+ return nil, err
890
+ }
891
+ result.Screenshots = append(result.Screenshots, screenshot)
892
+
893
+ answer, err := b.vis.VerifyState(screenshot,
894
+ "Look at this LinkedIn profile page. Is there a 'Pending' indicator showing that a connection request was sent? "+
895
+ "Or is the Connect button still visible? "+
896
+ "Answer with one of: PENDING, CONNECT_VISIBLE, ALREADY_CONNECTED, UNCLEAR")
897
+ if err != nil {
898
+ result.Status = "error"
899
+ result.Message = fmt.Sprintf("Failed to verify: %v", err)
900
+ return result, nil
901
+ }
902
+
903
+ switch {
904
+ case contains(answer, "PENDING"):
905
+ result.Success = true
906
+ result.Status = "sent"
907
+ result.Message = "Connection request sent successfully"
908
+ case contains(answer, "ALREADY_CONNECTED"):
909
+ result.Status = "already_connected"
910
+ result.Message = "Already connected with this person"
911
+ case contains(answer, "CONNECT_VISIBLE"):
912
+ result.Status = "error"
913
+ result.Message = "Connect button still visible — request may not have been sent"
914
+ default:
915
+ result.Status = "unclear"
916
+ result.Message = fmt.Sprintf("Verification unclear: %s", answer)
917
+ }
918
+
919
+ return result, nil
920
+ }
921
+
922
+ func contains(s, substr string) bool {
923
+ return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsCI(s, substr))
924
+ }
925
+
926
+ func containsCI(s, substr string) bool {
927
+ // Case-insensitive contains
928
+ import "strings"
929
+ return strings.Contains(strings.ToUpper(s), strings.ToUpper(substr))
930
+ }
931
+ ```
932
+
933
+ NOTE: The `contains`/`containsCI` functions above have a syntax issue — the import inside a function body is invalid Go. Fix during implementation:
934
+
935
+ ```go
936
+ import "strings"
937
+
938
+ func containsCI(s, substr string) bool {
939
+ return strings.Contains(strings.ToUpper(s), strings.ToUpper(substr))
940
+ }
941
+ ```
942
+
943
+ - [ ] **Step 3: Verify compilation**
944
+
945
+ Run: `go build ./internal/browser/`
946
+ Expected: no errors
947
+
948
+ - [ ] **Step 4: Commit**
949
+
950
+ ```bash
951
+ git add internal/browser/
952
+ git commit -m "feat: add browser actions combining Playwright + Fara vision"
953
+ ```
954
+
955
+ ---
956
+
957
+ ## Chunk 4: CLI Integration
958
+
959
+ ### Task 4.1: Update Root Command with New Endpoints
960
+
961
+ **Files:**
962
+ - Modify: `internal/cmd/root.go`
963
+
964
+ - [ ] **Step 1: Add flags for Playwright and Vision endpoints**
965
+
966
+ Add to root.go:
967
+ - `--playwright-url` flag (default: `http://localhost:3000`, env: `PLAYWRIGHT_URL`)
968
+ - `--vision-url` flag (default: `http://localhost:11434`, env: `VISION_URL`)
969
+ - `--vision-model` flag (default: `fara-7b`, env: `VISION_MODEL`)
970
+ - Helper function `getBrowser()` that creates a `browser.Browser` instance
971
+
972
+ - [ ] **Step 2: Verify build**
973
+
974
+ Run: `go build ./cmd/linkedin`
975
+
976
+ - [ ] **Step 3: Commit**
977
+
978
+ ### Task 4.2: Update Connect Command
979
+
980
+ **Files:**
981
+ - Modify: `internal/cmd/connect.go` (or `actions.go` depending on current structure)
982
+
983
+ - [ ] **Step 1: Replace PinchTab navigator calls with browser.ConnectToProfile**
984
+
985
+ The connect command should:
986
+ 1. Create browser via `getBrowser()`
987
+ 2. Call `browser.ConnectToProfile(url, note)`
988
+ 3. Report result based on ConnectResult.Status
989
+ 4. Save screenshot on failure for debugging
990
+
991
+ - [ ] **Step 2: Test with dry-run**
992
+
993
+ Run: `go run ./cmd/linkedin connect --profile thaddeus --url linkedin.com/in/test --dry-run`
994
+ Expected: "[dry-run] Would connect to..."
995
+
996
+ - [ ] **Step 3: Commit**
997
+
998
+ ### Task 4.3: Update Auth Command
999
+
1000
+ **Files:**
1001
+ - Modify: `internal/cmd/auth.go`
1002
+
1003
+ - [ ] **Step 1: Replace PinchTab auth flow with Playwright-based auth**
1004
+
1005
+ Auth should:
1006
+ 1. Start Playwright MCP (or verify it's running)
1007
+ 2. Navigate to linkedin.com/login
1008
+ 3. Wait for user to log in (press Enter)
1009
+ 4. Use vision to verify logged-in state
1010
+ 5. Save profile config
1011
+
1012
+ - [ ] **Step 2: Commit**
1013
+
1014
+ ---
1015
+
1016
+ ## Chunk 5: Setup & Documentation
1017
+
1018
+ ### Task 5.1: Installation Guide
1019
+
1020
+ **Files:**
1021
+ - Modify: `README.md`
1022
+
1023
+ - [ ] **Step 1: Update prerequisites**
1024
+
1025
+ ```markdown
1026
+ ## Prerequisites
1027
+
1028
+ 1. **Go 1.21+** — [Install Go](https://go.dev/doc/install)
1029
+ 2. **Node.js 18+** — [Install Node.js](https://nodejs.org/)
1030
+ 3. **Ollama** — Local model server
1031
+ ```bash
1032
+ brew install ollama
1033
+ ollama serve # Keep running in a terminal
1034
+ ollama pull bartowski/microsoft_Fara-7B-GGUF:Q4_K_M
1035
+ ```
1036
+ 4. **Start Playwright MCP** — Browser automation server
1037
+ ```bash
1038
+ npx @playwright/mcp@latest --port 3000 --caps vision \
1039
+ --user-data-dir ~/.linkedin-cli/browser-profile
1040
+ # Keep running in a terminal
1041
+ ```
1042
+ ```
1043
+
1044
+ - [ ] **Step 2: Commit**
1045
+
1046
+ ### Task 5.2: Startup Script
1047
+
1048
+ **Files:**
1049
+ - Create: `scripts/start.sh`
1050
+
1051
+ - [ ] **Step 1: Create convenience startup script**
1052
+
1053
+ ```bash
1054
+ #!/bin/bash
1055
+ # Start LinkedIn CLI services
1056
+
1057
+ echo "Starting Ollama..."
1058
+ ollama serve &
1059
+ OLLAMA_PID=$!
1060
+
1061
+ echo "Starting Playwright MCP..."
1062
+ npx @playwright/mcp@latest --port 3000 --caps vision \
1063
+ --user-data-dir ~/.linkedin-cli/browser-profile &
1064
+ PW_PID=$!
1065
+
1066
+ echo ""
1067
+ echo "Services running:"
1068
+ echo " Ollama: http://localhost:11434 (PID: $OLLAMA_PID)"
1069
+ echo " Playwright: http://localhost:3000 (PID: $PW_PID)"
1070
+ echo ""
1071
+ echo "Press Ctrl+C to stop all services"
1072
+
1073
+ trap "kill $OLLAMA_PID $PW_PID 2>/dev/null" EXIT
1074
+ wait
1075
+ ```
1076
+
1077
+ - [ ] **Step 2: Commit**
1078
+
1079
+ ---
1080
+
1081
+ ## Chunk 6: Cleanup
1082
+
1083
+ ### Task 6.1: Remove PinchTab Dependency
1084
+
1085
+ - [ ] **Step 1: Remove PinchTab imports from all files**
1086
+ - [ ] **Step 2: Delete `internal/pinchtab/` directory**
1087
+ - [ ] **Step 3: Delete PinchTab-specific navigator code**
1088
+ - [ ] **Step 4: Update go.mod (remove unused deps)**
1089
+ - [ ] **Step 5: Run full test suite**
1090
+
1091
+ Run: `go test ./... && go build ./cmd/linkedin`
1092
+ Expected: all pass
1093
+
1094
+ - [ ] **Step 6: Commit**
1095
+
1096
+ ```bash
1097
+ git commit -m "refactor: remove PinchTab dependency, complete migration to Playwright+Fara"
1098
+ ```
1099
+
1100
+ ---
1101
+
1102
+ ## Summary
1103
+
1104
+ | Component | Technology | Purpose |
1105
+ |-----------|-----------|---------|
1106
+ | Browser control | Playwright MCP (`--port 3000 --caps vision`) | Navigate, screenshot, click(x,y) |
1107
+ | Visual intelligence | Fara-7B via Ollama | Identify correct UI elements from screenshots |
1108
+ | CLI orchestration | Go + Cobra | Profiles, rate limits, config, command structure |
1109
+ | Session persistence | Playwright `--user-data-dir` | LinkedIn login survives restarts |
1110
+
1111
+ **Total estimated effort:** 1-2 weeks
1112
+ **Risk:** Fara-7B accuracy on LinkedIn (~85-90% expected). Mitigated by retry with refined prompts.