linkedin-automation-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +12 -0
- package/.github/workflows/ci.yml +66 -0
- package/.github/workflows/publish.yml +48 -0
- package/.husky/pre-commit +6 -0
- package/.prettierignore +4 -0
- package/.prettierrc +10 -0
- package/AGENTS.md +294 -0
- package/CHANGELOG.md +40 -0
- package/GIT_RELEASE.md +167 -0
- package/LICENSE +21 -0
- package/Makefile +30 -0
- package/NPM_PUBLISHING.md +230 -0
- package/PYEOF +0 -0
- package/README.md +295 -0
- package/TESTING-GUIDE.md +151 -0
- package/cmd/linkedin/main.go +9 -0
- package/dist/agent/action-executor.d.ts +81 -0
- package/dist/agent/action-executor.d.ts.map +1 -0
- package/dist/agent/action-executor.js +170 -0
- package/dist/agent/action-executor.js.map +1 -0
- package/dist/agent/action-executor.test.d.ts +2 -0
- package/dist/agent/action-executor.test.d.ts.map +1 -0
- package/dist/agent/action-executor.test.js +366 -0
- package/dist/agent/action-executor.test.js.map +1 -0
- package/dist/agent/claude-client.d.ts +74 -0
- package/dist/agent/claude-client.d.ts.map +1 -0
- package/dist/agent/claude-client.js +314 -0
- package/dist/agent/claude-client.js.map +1 -0
- package/dist/agent/claude-client.test.d.ts +2 -0
- package/dist/agent/claude-client.test.d.ts.map +1 -0
- package/dist/agent/claude-client.test.js +590 -0
- package/dist/agent/claude-client.test.js.map +1 -0
- package/dist/agent/dom-extractor.d.ts +50 -0
- package/dist/agent/dom-extractor.d.ts.map +1 -0
- package/dist/agent/dom-extractor.js +374 -0
- package/dist/agent/dom-extractor.js.map +1 -0
- package/dist/agent/dom-extractor.test.d.ts +7 -0
- package/dist/agent/dom-extractor.test.d.ts.map +1 -0
- package/dist/agent/dom-extractor.test.js +504 -0
- package/dist/agent/dom-extractor.test.js.map +1 -0
- package/dist/agent/extension-client.d.ts +75 -0
- package/dist/agent/extension-client.d.ts.map +1 -0
- package/dist/agent/extension-client.js +245 -0
- package/dist/agent/extension-client.js.map +1 -0
- package/dist/agent/index.d.ts +8 -0
- package/dist/agent/index.d.ts.map +1 -0
- package/dist/agent/index.js +16 -0
- package/dist/agent/index.js.map +1 -0
- package/dist/agent/page-agent.d.ts +76 -0
- package/dist/agent/page-agent.d.ts.map +1 -0
- package/dist/agent/page-agent.js +236 -0
- package/dist/agent/page-agent.js.map +1 -0
- package/dist/agent/types.d.ts +236 -0
- package/dist/agent/types.d.ts.map +1 -0
- package/dist/agent/types.js +37 -0
- package/dist/agent/types.js.map +1 -0
- package/dist/cli/agent-commands.d.ts +3 -0
- package/dist/cli/agent-commands.d.ts.map +1 -0
- package/dist/cli/agent-commands.js +250 -0
- package/dist/cli/agent-commands.js.map +1 -0
- package/dist/cli/auth.d.ts +3 -0
- package/dist/cli/auth.d.ts.map +1 -0
- package/dist/cli/auth.js +288 -0
- package/dist/cli/auth.js.map +1 -0
- package/dist/cli/company.d.ts +3 -0
- package/dist/cli/company.d.ts.map +1 -0
- package/dist/cli/company.js +55 -0
- package/dist/cli/company.js.map +1 -0
- package/dist/cli/connection.d.ts +3 -0
- package/dist/cli/connection.d.ts.map +1 -0
- package/dist/cli/connection.js +79 -0
- package/dist/cli/connection.js.map +1 -0
- package/dist/cli/index.d.ts +7 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +17 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/messages.d.ts +3 -0
- package/dist/cli/messages.d.ts.map +1 -0
- package/dist/cli/messages.js +268 -0
- package/dist/cli/messages.js.map +1 -0
- package/dist/cli/profile.d.ts +3 -0
- package/dist/cli/profile.d.ts.map +1 -0
- package/dist/cli/profile.js +81 -0
- package/dist/cli/profile.js.map +1 -0
- package/dist/cli/profile.test.d.ts +2 -0
- package/dist/cli/profile.test.d.ts.map +1 -0
- package/dist/cli/profile.test.js +15 -0
- package/dist/cli/profile.test.js.map +1 -0
- package/dist/cli/reply.d.ts +3 -0
- package/dist/cli/reply.d.ts.map +1 -0
- package/dist/cli/reply.js +129 -0
- package/dist/cli/reply.js.map +1 -0
- package/dist/core/audit.d.ts +17 -0
- package/dist/core/audit.d.ts.map +1 -0
- package/dist/core/audit.js +121 -0
- package/dist/core/audit.js.map +1 -0
- package/dist/core/audit.test.d.ts +2 -0
- package/dist/core/audit.test.d.ts.map +1 -0
- package/dist/core/audit.test.js +142 -0
- package/dist/core/audit.test.js.map +1 -0
- package/dist/core/browser-cookies.d.ts +19 -0
- package/dist/core/browser-cookies.d.ts.map +1 -0
- package/dist/core/browser-cookies.js +181 -0
- package/dist/core/browser-cookies.js.map +1 -0
- package/dist/core/browser.d.ts +50 -0
- package/dist/core/browser.d.ts.map +1 -0
- package/dist/core/browser.js +318 -0
- package/dist/core/browser.js.map +1 -0
- package/dist/core/config.d.ts +20 -0
- package/dist/core/config.d.ts.map +1 -0
- package/dist/core/config.js +103 -0
- package/dist/core/config.js.map +1 -0
- package/dist/core/config.test.d.ts +2 -0
- package/dist/core/config.test.d.ts.map +1 -0
- package/dist/core/config.test.js +111 -0
- package/dist/core/config.test.js.map +1 -0
- package/dist/core/storage.d.ts +19 -0
- package/dist/core/storage.d.ts.map +1 -0
- package/dist/core/storage.js +124 -0
- package/dist/core/storage.js.map +1 -0
- package/dist/core/storage.test.d.ts +2 -0
- package/dist/core/storage.test.d.ts.map +1 -0
- package/dist/core/storage.test.js +142 -0
- package/dist/core/storage.test.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +63 -0
- package/dist/index.js.map +1 -0
- package/dist/linkedin/auth.d.ts +22 -0
- package/dist/linkedin/auth.d.ts.map +1 -0
- package/dist/linkedin/auth.js +167 -0
- package/dist/linkedin/auth.js.map +1 -0
- package/dist/linkedin/company-extractor.d.ts +36 -0
- package/dist/linkedin/company-extractor.d.ts.map +1 -0
- package/dist/linkedin/company-extractor.js +211 -0
- package/dist/linkedin/company-extractor.js.map +1 -0
- package/dist/linkedin/company-extractor.test.d.ts +2 -0
- package/dist/linkedin/company-extractor.test.d.ts.map +1 -0
- package/dist/linkedin/company-extractor.test.js +52 -0
- package/dist/linkedin/company-extractor.test.js.map +1 -0
- package/dist/linkedin/connector.d.ts +45 -0
- package/dist/linkedin/connector.d.ts.map +1 -0
- package/dist/linkedin/connector.js +245 -0
- package/dist/linkedin/connector.js.map +1 -0
- package/dist/linkedin/message-sender.d.ts +32 -0
- package/dist/linkedin/message-sender.d.ts.map +1 -0
- package/dist/linkedin/message-sender.js +112 -0
- package/dist/linkedin/message-sender.js.map +1 -0
- package/dist/linkedin/messages.d.ts +78 -0
- package/dist/linkedin/messages.d.ts.map +1 -0
- package/dist/linkedin/messages.js +745 -0
- package/dist/linkedin/messages.js.map +1 -0
- package/dist/linkedin/profile.d.ts +37 -0
- package/dist/linkedin/profile.d.ts.map +1 -0
- package/dist/linkedin/profile.js +268 -0
- package/dist/linkedin/profile.js.map +1 -0
- package/dist/linkedin/profile.test.d.ts +2 -0
- package/dist/linkedin/profile.test.d.ts.map +1 -0
- package/dist/linkedin/profile.test.js +68 -0
- package/dist/linkedin/profile.test.js.map +1 -0
- package/dist/linkedin/reply.d.ts +21 -0
- package/dist/linkedin/reply.d.ts.map +1 -0
- package/dist/linkedin/reply.js +76 -0
- package/dist/linkedin/reply.js.map +1 -0
- package/dist/linkedin/selector-engine.d.ts +69 -0
- package/dist/linkedin/selector-engine.d.ts.map +1 -0
- package/dist/linkedin/selector-engine.js +339 -0
- package/dist/linkedin/selector-engine.js.map +1 -0
- package/dist/linkedin/selector-engine.test.d.ts +2 -0
- package/dist/linkedin/selector-engine.test.d.ts.map +1 -0
- package/dist/linkedin/selector-engine.test.js +135 -0
- package/dist/linkedin/selector-engine.test.js.map +1 -0
- package/dist/linkedin/selectors.d.ts +65 -0
- package/dist/linkedin/selectors.d.ts.map +1 -0
- package/dist/linkedin/selectors.js +261 -0
- package/dist/linkedin/selectors.js.map +1 -0
- package/dist/templates/engine.d.ts +37 -0
- package/dist/templates/engine.d.ts.map +1 -0
- package/dist/templates/engine.js +215 -0
- package/dist/templates/engine.js.map +1 -0
- package/dist/templates/engine.test.d.ts +2 -0
- package/dist/templates/engine.test.d.ts.map +1 -0
- package/dist/templates/engine.test.js +212 -0
- package/dist/templates/engine.test.js.map +1 -0
- package/dist/templates/index.d.ts +2 -0
- package/dist/templates/index.d.ts.map +1 -0
- package/dist/templates/index.js +7 -0
- package/dist/templates/index.js.map +1 -0
- package/dist/types/index.d.ts +113 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +3 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/index.test.d.ts +2 -0
- package/dist/types/index.test.d.ts.map +1 -0
- package/dist/types/index.test.js +90 -0
- package/dist/types/index.test.js.map +1 -0
- package/dist/utils/paths.d.ts +8 -0
- package/dist/utils/paths.d.ts.map +1 -0
- package/dist/utils/paths.js +68 -0
- package/dist/utils/paths.js.map +1 -0
- package/dist/utils/rate-limiter.d.ts +22 -0
- package/dist/utils/rate-limiter.d.ts.map +1 -0
- package/dist/utils/rate-limiter.js +57 -0
- package/dist/utils/rate-limiter.js.map +1 -0
- package/dist/utils/retry.d.ts +18 -0
- package/dist/utils/retry.d.ts.map +1 -0
- package/dist/utils/retry.js +49 -0
- package/dist/utils/retry.js.map +1 -0
- package/docs/connection-command.md +52 -0
- package/docs/plans/2025-03-03-linkedin-cli-design.md +280 -0
- package/docs/plans/2025-03-03-linkedin-cli-implementation-plan.md +2087 -0
- package/docs/plans/2025-03-03-linkedin-cli-implementation.md +2420 -0
- package/docs/plans/2026-02-19-linkedin-connection-feature.md +596 -0
- package/docs/plans/2026-02-28-messages-send-feature.md +480 -0
- package/docs/plans/2026-02-28-messages-show-design.md +243 -0
- package/docs/plans/2026-03-03-linkedin-cli-oss-publishing-design.md +394 -0
- package/docs/plans/2026-03-03-linkedin-cli-oss-publishing-plan.md +1592 -0
- package/docs/superpowers/plans/2026-03-13-linkedin-automation-resilience-migration.md +425 -0
- package/docs/superpowers/plans/2026-03-13-playwright-fara-migration.md +1112 -0
- package/docs/superpowers/plans/2026-03-14-page-agent-plan.md +1598 -0
- package/docs/superpowers/plans/2026-03-15-company-profile-extraction.md +591 -0
- package/docs/superpowers/plans/2026-03-15-profile-extraction-plan.md +943 -0
- package/docs/superpowers/specs/2026-03-14-company-profile-extraction-design.md +371 -0
- package/docs/superpowers/specs/2026-03-14-page-agent-design.md +385 -0
- package/docs/superpowers/specs/2026-03-15-profile-extraction-design.md +409 -0
- package/eslint.config.mjs +58 -0
- package/go.mod +9 -0
- package/go.sum +10 -0
- package/import-cookies.js +376 -0
- package/internal/cmd/actions.go +123 -0
- package/internal/cmd/auth.go +108 -0
- package/internal/cmd/connect.go +42 -0
- package/internal/cmd/message.go +44 -0
- package/internal/cmd/people.go +454 -0
- package/internal/cmd/profiles.go +121 -0
- package/internal/cmd/root.go +89 -0
- package/internal/cmd/sequence.go +192 -0
- package/internal/config/config.go +187 -0
- package/internal/config/config_test.go +121 -0
- package/internal/config/profile.go +65 -0
- package/internal/linkedin/navigator.go +195 -0
- package/internal/linkedin/selectors.go +39 -0
- package/internal/linkedin/validator.go +69 -0
- package/internal/pinchtab/client.go +183 -0
- package/internal/pinchtab/client_test.go +67 -0
- package/internal/pinchtab/types.go +50 -0
- package/internal/ratelimit/limiter.go +115 -0
- package/internal/ratelimit/limits.go +32 -0
- package/package.json +67 -0
- package/release.sh +66 -0
- package/scripts/debug-linkedin.js +156 -0
- package/scripts/debug-login.js +193 -0
- package/scripts/extract-from-edge.js +96 -0
- package/scripts/import-cookies.js +101 -0
- package/scripts/poc-show-data.js +205 -0
- package/scripts/proof-of-access.js +87 -0
- package/scripts/prove-connection.js +110 -0
- package/scripts/show-linkedin-data.js +173 -0
- package/src/agent/action-executor.test.ts +464 -0
- package/src/agent/action-executor.ts +203 -0
- package/src/agent/claude-client.test.ts +707 -0
- package/src/agent/claude-client.ts +422 -0
- package/src/agent/dom-extractor.test.ts +574 -0
- package/src/agent/dom-extractor.ts +437 -0
- package/src/agent/extension-client.ts +306 -0
- package/src/agent/index.ts +28 -0
- package/src/agent/page-agent.ts +292 -0
- package/src/agent/types.ts +288 -0
- package/src/cli/agent-commands.ts +274 -0
- package/src/cli/auth.ts +343 -0
- package/src/cli/company.ts +66 -0
- package/src/cli/connection.ts +89 -0
- package/src/cli/index.ts +7 -0
- package/src/cli/messages.ts +338 -0
- package/src/cli/profile.test.ts +14 -0
- package/src/cli/profile.ts +95 -0
- package/src/cli/reply.ts +110 -0
- package/src/core/audit.test.ts +134 -0
- package/src/core/audit.ts +98 -0
- package/src/core/browser-cookies.ts +203 -0
- package/src/core/browser.ts +304 -0
- package/src/core/config.test.ts +90 -0
- package/src/core/config.ts +81 -0
- package/src/core/storage.test.ts +129 -0
- package/src/core/storage.ts +100 -0
- package/src/index.ts +70 -0
- package/src/linkedin/auth.ts +218 -0
- package/src/linkedin/company-extractor.test.ts +58 -0
- package/src/linkedin/company-extractor.ts +222 -0
- package/src/linkedin/connector.ts +336 -0
- package/src/linkedin/message-sender.ts +141 -0
- package/src/linkedin/messages.ts +894 -0
- package/src/linkedin/profile.test.ts +79 -0
- package/src/linkedin/profile.ts +314 -0
- package/src/linkedin/reply.ts +96 -0
- package/src/linkedin/selector-engine.test.ts +167 -0
- package/src/linkedin/selector-engine.ts +393 -0
- package/src/linkedin/selectors.ts +268 -0
- package/src/templates/defaults/followup.txt +14 -0
- package/src/templates/defaults/meeting.txt +16 -0
- package/src/templates/defaults/welcome.txt +14 -0
- package/src/templates/engine.test.ts +228 -0
- package/src/templates/engine.ts +208 -0
- package/src/templates/index.ts +1 -0
- package/src/types/index.test.ts +94 -0
- package/src/types/index.ts +143 -0
- package/src/types/sql.js.d.ts +23 -0
- package/src/utils/paths.ts +33 -0
- package/src/utils/rate-limiter.ts +75 -0
- package/src/utils/retry.ts +78 -0
- package/test-cli.sh +85 -0
- package/test-real-data.sh +97 -0
- package/tsconfig.json +23 -0
- package/vitest.config.ts +35 -0
|
@@ -0,0 +1,1112 @@
|
|
|
1
|
+
# Playwright MCP + Fara-7B Migration Plan
|
|
2
|
+
|
|
3
|
+
> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking.
|
|
4
|
+
|
|
5
|
+
**Goal:** Replace PinchTab with Playwright MCP + Fara-7B vision model so the CLI can visually identify and click the correct Connect button on LinkedIn profiles — solving the 9-button disambiguation problem.
|
|
6
|
+
|
|
7
|
+
**Architecture:** Go CLI orchestrates two HTTP services: (1) Playwright MCP server (`npx @playwright/mcp@latest --port 3000 --caps vision`) for browser control (navigate, screenshot, click-at-coordinates), and (2) Fara-7B served via Ollama for visual target identification (screenshot → pixel coordinates). The CLI takes a screenshot, sends it to Fara asking "where is the Connect button?", gets back (x,y) coordinates, and tells Playwright to click there.
|
|
8
|
+
|
|
9
|
+
**Tech Stack:** Go 1.21+, Playwright MCP (Node.js, via npx), Ollama + Fara-7B GGUF (local vision model), Cobra CLI (existing)
|
|
10
|
+
|
|
11
|
+
**Hardware:** Apple Silicon M1/M2 16GB — Fara-7B Q4_K_M (~5GB RAM, ~5-8s per inference)
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Why This Migration
|
|
16
|
+
|
|
17
|
+
The current PinchTab-based approach has a **fundamental limitation**: its snapshot model provides only `{role, name, ref}` per DOM node — **no coordinates, no position, no bounding boxes**. When LinkedIn shows 9 "Connect" buttons (1 for the profile, 8 for recommended people), the code returns the first DOM match, which is often the wrong button.
|
|
18
|
+
|
|
19
|
+
This is unsolvable with DOM heuristics. A human solves it by **looking at the page**. Fara-7B does the same — it takes a screenshot and returns pixel coordinates for the correct button.
|
|
20
|
+
|
|
21
|
+
## File Structure
|
|
22
|
+
|
|
23
|
+
### New files
|
|
24
|
+
- `internal/playwright/client.go` — Playwright MCP HTTP client (navigate, screenshot, click, type)
|
|
25
|
+
- `internal/playwright/types.go` — MCP request/response types
|
|
26
|
+
- `internal/vision/client.go` — Fara-7B OpenAI-compatible API client
|
|
27
|
+
- `internal/vision/types.go` — Vision request/response types
|
|
28
|
+
- `internal/browser/browser.go` — Browser interface combining Playwright + Vision
|
|
29
|
+
- `internal/browser/actions.go` — High-level actions: ConnectToProfile, SendMessage, etc.
|
|
30
|
+
|
|
31
|
+
### Modified files
|
|
32
|
+
- `internal/cmd/root.go` — Add flags for Playwright/Fara endpoints
|
|
33
|
+
- `internal/cmd/connect.go` — Use new browser actions instead of PinchTab navigator
|
|
34
|
+
- `internal/cmd/message.go` — Same
|
|
35
|
+
- `internal/cmd/auth.go` — Use Playwright MCP for auth flow
|
|
36
|
+
- `internal/cmd/actions.go` — Update to use new browser actions
|
|
37
|
+
- `go.mod` — No new Go dependencies needed (only stdlib HTTP)
|
|
38
|
+
- `README.md` — New prerequisites (Ollama, Fara-7B, Playwright MCP)
|
|
39
|
+
|
|
40
|
+
### Preserved (unchanged)
|
|
41
|
+
- `internal/config/` — Profile management
|
|
42
|
+
- `internal/ratelimit/` — Rate limiting
|
|
43
|
+
- `internal/linkedin/validator.go` — URL validation
|
|
44
|
+
- `cmd/linkedin/main.go` — Entry point
|
|
45
|
+
|
|
46
|
+
### Deprecated (remove after migration)
|
|
47
|
+
- `internal/pinchtab/` — Entire package
|
|
48
|
+
- `internal/linkedin/navigator.go` — PinchTab-based navigator
|
|
49
|
+
- `internal/linkedin/selectors.go` — CSS selectors (no longer needed)
|
|
50
|
+
- `internal/linkedin/detector.go` — DOM-based detection (no longer needed)
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## Chunk 1: Playwright MCP Client
|
|
55
|
+
|
|
56
|
+
### Task 1.1: Playwright MCP Types
|
|
57
|
+
|
|
58
|
+
**Files:**
|
|
59
|
+
- Create: `internal/playwright/types.go`
|
|
60
|
+
|
|
61
|
+
- [ ] **Step 1: Define MCP JSON-RPC types for Playwright server communication**
|
|
62
|
+
|
|
63
|
+
The Playwright MCP server in HTTP mode (--port) uses the MCP Streamable HTTP transport.
|
|
64
|
+
We need types for: tool calls (navigate, screenshot, click) and their responses.
|
|
65
|
+
|
|
66
|
+
```go
|
|
67
|
+
package playwright
|
|
68
|
+
|
|
69
|
+
// MCPRequest is a JSON-RPC 2.0 request for MCP tool calls
|
|
70
|
+
type MCPRequest struct {
|
|
71
|
+
JSONRPC string `json:"jsonrpc"`
|
|
72
|
+
ID int `json:"id"`
|
|
73
|
+
Method string `json:"method"`
|
|
74
|
+
Params interface{} `json:"params"`
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// MCPResponse is a JSON-RPC 2.0 response
|
|
78
|
+
type MCPResponse struct {
|
|
79
|
+
JSONRPC string `json:"jsonrpc"`
|
|
80
|
+
ID int `json:"id"`
|
|
81
|
+
Result json.RawMessage `json:"result,omitempty"`
|
|
82
|
+
Error *MCPError `json:"error,omitempty"`
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// MCPError represents a JSON-RPC error
|
|
86
|
+
type MCPError struct {
|
|
87
|
+
Code int `json:"code"`
|
|
88
|
+
Message string `json:"message"`
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// ToolCallParams wraps tool name and arguments for MCP tools/call
|
|
92
|
+
type ToolCallParams struct {
|
|
93
|
+
Name string `json:"name"`
|
|
94
|
+
Arguments map[string]interface{} `json:"arguments,omitempty"`
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// ToolResult contains the tool execution result
|
|
98
|
+
type ToolResult struct {
|
|
99
|
+
Content []ContentBlock `json:"content"`
|
|
100
|
+
IsError bool `json:"isError,omitempty"`
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// ContentBlock is a text or image block in tool results
|
|
104
|
+
type ContentBlock struct {
|
|
105
|
+
Type string `json:"type"`
|
|
106
|
+
Text string `json:"text,omitempty"`
|
|
107
|
+
Data string `json:"data,omitempty"`
|
|
108
|
+
MimeType string `json:"mimeType,omitempty"`
|
|
109
|
+
}
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
- [ ] **Step 2: Verify compilation**
|
|
113
|
+
|
|
114
|
+
Run: `go build ./internal/playwright/`
|
|
115
|
+
Expected: no errors
|
|
116
|
+
|
|
117
|
+
- [ ] **Step 3: Commit**
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
git add internal/playwright/types.go
|
|
121
|
+
git commit -m "feat: add Playwright MCP JSON-RPC types"
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Task 1.2: Playwright MCP Client
|
|
125
|
+
|
|
126
|
+
**Files:**
|
|
127
|
+
- Create: `internal/playwright/client.go`
|
|
128
|
+
- Test: `internal/playwright/client_test.go`
|
|
129
|
+
|
|
130
|
+
- [ ] **Step 1: Write test for client initialization and tool call construction**
|
|
131
|
+
|
|
132
|
+
```go
|
|
133
|
+
package playwright
|
|
134
|
+
|
|
135
|
+
import (
|
|
136
|
+
"testing"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
func TestNewClient(t *testing.T) {
|
|
140
|
+
c := NewClient("http://localhost:3000")
|
|
141
|
+
if c.baseURL != "http://localhost:3000" {
|
|
142
|
+
t.Errorf("expected base URL http://localhost:3000, got %s", c.baseURL)
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
func TestBuildToolCallRequest(t *testing.T) {
|
|
147
|
+
c := NewClient("http://localhost:3000")
|
|
148
|
+
req := c.buildToolCall("browser_navigate", map[string]interface{}{
|
|
149
|
+
"url": "https://linkedin.com",
|
|
150
|
+
})
|
|
151
|
+
if req.Method != "tools/call" {
|
|
152
|
+
t.Errorf("expected method tools/call, got %s", req.Method)
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
- [ ] **Step 2: Run test to verify it fails**
|
|
158
|
+
|
|
159
|
+
Run: `go test -v -run TestNewClient ./internal/playwright/`
|
|
160
|
+
Expected: FAIL — NewClient not defined
|
|
161
|
+
|
|
162
|
+
- [ ] **Step 3: Implement Client struct with Navigate, Screenshot, Click methods**
|
|
163
|
+
|
|
164
|
+
```go
|
|
165
|
+
package playwright
|
|
166
|
+
|
|
167
|
+
import (
|
|
168
|
+
"bytes"
|
|
169
|
+
"encoding/json"
|
|
170
|
+
"fmt"
|
|
171
|
+
"io"
|
|
172
|
+
"net/http"
|
|
173
|
+
"sync"
|
|
174
|
+
"time"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
// Client communicates with Playwright MCP server over HTTP
|
|
178
|
+
type Client struct {
|
|
179
|
+
baseURL string
|
|
180
|
+
sessionID string
|
|
181
|
+
client *http.Client
|
|
182
|
+
mu sync.Mutex
|
|
183
|
+
nextID int
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// NewClient creates a Playwright MCP client
|
|
187
|
+
func NewClient(baseURL string) *Client {
|
|
188
|
+
return &Client{
|
|
189
|
+
baseURL: baseURL,
|
|
190
|
+
client: &http.Client{
|
|
191
|
+
Timeout: 60 * time.Second,
|
|
192
|
+
},
|
|
193
|
+
nextID: 1,
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
func (c *Client) buildToolCall(name string, args map[string]interface{}) *MCPRequest {
|
|
198
|
+
c.mu.Lock()
|
|
199
|
+
id := c.nextID
|
|
200
|
+
c.nextID++
|
|
201
|
+
c.mu.Unlock()
|
|
202
|
+
|
|
203
|
+
return &MCPRequest{
|
|
204
|
+
JSONRPC: "2.0",
|
|
205
|
+
ID: id,
|
|
206
|
+
Method: "tools/call",
|
|
207
|
+
Params: ToolCallParams{
|
|
208
|
+
Name: name,
|
|
209
|
+
Arguments: args,
|
|
210
|
+
},
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
func (c *Client) callTool(name string, args map[string]interface{}) (*ToolResult, error) {
|
|
215
|
+
req := c.buildToolCall(name, args)
|
|
216
|
+
body, err := json.Marshal(req)
|
|
217
|
+
if err != nil {
|
|
218
|
+
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
httpReq, err := http.NewRequest("POST", c.baseURL+"/mcp", bytes.NewReader(body))
|
|
222
|
+
if err != nil {
|
|
223
|
+
return nil, fmt.Errorf("failed to create request: %w", err)
|
|
224
|
+
}
|
|
225
|
+
httpReq.Header.Set("Content-Type", "application/json")
|
|
226
|
+
httpReq.Header.Set("Accept", "application/json")
|
|
227
|
+
if c.sessionID != "" {
|
|
228
|
+
httpReq.Header.Set("Mcp-Session-Id", c.sessionID)
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
resp, err := c.client.Do(httpReq)
|
|
232
|
+
if err != nil {
|
|
233
|
+
return nil, fmt.Errorf("failed to call tool %s: %w", name, err)
|
|
234
|
+
}
|
|
235
|
+
defer resp.Body.Close()
|
|
236
|
+
|
|
237
|
+
// Capture session ID from response
|
|
238
|
+
if sid := resp.Header.Get("Mcp-Session-Id"); sid != "" {
|
|
239
|
+
c.sessionID = sid
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
respBody, err := io.ReadAll(resp.Body)
|
|
243
|
+
if err != nil {
|
|
244
|
+
return nil, fmt.Errorf("failed to read response: %w", err)
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
var mcpResp MCPResponse
|
|
248
|
+
if err := json.Unmarshal(respBody, &mcpResp); err != nil {
|
|
249
|
+
return nil, fmt.Errorf("failed to parse response: %w", err)
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
if mcpResp.Error != nil {
|
|
253
|
+
return nil, fmt.Errorf("MCP error %d: %s", mcpResp.Error.Code, mcpResp.Error.Message)
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
var result ToolResult
|
|
257
|
+
if err := json.Unmarshal(mcpResp.Result, &result); err != nil {
|
|
258
|
+
return nil, fmt.Errorf("failed to parse tool result: %w", err)
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
return &result, nil
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Navigate loads a URL in the browser
|
|
265
|
+
func (c *Client) Navigate(url string) error {
|
|
266
|
+
_, err := c.callTool("browser_navigate", map[string]interface{}{
|
|
267
|
+
"url": url,
|
|
268
|
+
})
|
|
269
|
+
return err
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// Screenshot takes a screenshot and returns base64-encoded PNG
|
|
273
|
+
func (c *Client) Screenshot() (string, error) {
|
|
274
|
+
result, err := c.callTool("browser_take_screenshot", nil)
|
|
275
|
+
if err != nil {
|
|
276
|
+
return "", err
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
for _, block := range result.Content {
|
|
280
|
+
if block.Type == "image" && block.Data != "" {
|
|
281
|
+
return block.Data, nil
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
return "", fmt.Errorf("no image data in screenshot response")
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Click clicks at specific pixel coordinates (vision mode)
|
|
289
|
+
func (c *Client) Click(x, y int) error {
|
|
290
|
+
_, err := c.callTool("browser_click", map[string]interface{}{
|
|
291
|
+
"element": fmt.Sprintf("coordinate [%d, %d]", x, y),
|
|
292
|
+
"ref": fmt.Sprintf("coord_%d_%d", x, y),
|
|
293
|
+
})
|
|
294
|
+
return err
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Snapshot takes an accessibility tree snapshot (for fallback/verification)
|
|
298
|
+
func (c *Client) Snapshot() (string, error) {
|
|
299
|
+
result, err := c.callTool("browser_snapshot", nil)
|
|
300
|
+
if err != nil {
|
|
301
|
+
return "", err
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
for _, block := range result.Content {
|
|
305
|
+
if block.Type == "text" {
|
|
306
|
+
return block.Text, nil
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
return "", fmt.Errorf("no text in snapshot response")
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// Type types text into the focused element
|
|
314
|
+
func (c *Client) Type(text string) error {
|
|
315
|
+
_, err := c.callTool("browser_type", map[string]interface{}{
|
|
316
|
+
"text": text,
|
|
317
|
+
})
|
|
318
|
+
return err
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// WaitForText waits for specific text to appear on page
|
|
322
|
+
func (c *Client) WaitForText(text string, timeoutMs int) error {
|
|
323
|
+
_, err := c.callTool("browser_wait_for_text", map[string]interface{}{
|
|
324
|
+
"text": text,
|
|
325
|
+
"timeout": timeoutMs,
|
|
326
|
+
})
|
|
327
|
+
return err
|
|
328
|
+
}
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
- [ ] **Step 4: Run tests**
|
|
332
|
+
|
|
333
|
+
Run: `go test -v ./internal/playwright/`
|
|
334
|
+
Expected: PASS
|
|
335
|
+
|
|
336
|
+
- [ ] **Step 5: Commit**
|
|
337
|
+
|
|
338
|
+
```bash
|
|
339
|
+
git add internal/playwright/
|
|
340
|
+
git commit -m "feat: add Playwright MCP HTTP client"
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
---
|
|
344
|
+
|
|
345
|
+
## Chunk 2: Fara-7B Vision Client
|
|
346
|
+
|
|
347
|
+
### Task 2.1: Vision Types
|
|
348
|
+
|
|
349
|
+
**Files:**
|
|
350
|
+
- Create: `internal/vision/types.go`
|
|
351
|
+
|
|
352
|
+
- [ ] **Step 1: Define OpenAI-compatible types for Fara-7B communication**
|
|
353
|
+
|
|
354
|
+
```go
|
|
355
|
+
package vision
|
|
356
|
+
|
|
357
|
+
// ChatRequest is an OpenAI-compatible chat completion request
|
|
358
|
+
type ChatRequest struct {
|
|
359
|
+
Model string `json:"model"`
|
|
360
|
+
Messages []Message `json:"messages"`
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// Message contains role and content for chat
|
|
364
|
+
type Message struct {
|
|
365
|
+
Role string `json:"role"`
|
|
366
|
+
Content []Content `json:"content"`
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
// Content is a text or image block
|
|
370
|
+
type Content struct {
|
|
371
|
+
Type string `json:"type"`
|
|
372
|
+
Text string `json:"text,omitempty"`
|
|
373
|
+
ImageURL *ImageURL `json:"image_url,omitempty"`
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
// ImageURL wraps a base64 data URL
|
|
377
|
+
type ImageURL struct {
|
|
378
|
+
URL string `json:"url"`
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// ChatResponse is the Ollama/OpenAI chat completion response
|
|
382
|
+
type ChatResponse struct {
|
|
383
|
+
Choices []Choice `json:"choices"`
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// Choice contains a message from the model
|
|
387
|
+
type Choice struct {
|
|
388
|
+
Message ResponseMessage `json:"message"`
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// ResponseMessage is the model's response
|
|
392
|
+
type ResponseMessage struct {
|
|
393
|
+
Content string `json:"content"`
|
|
394
|
+
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// ToolCall represents a function call from Fara
|
|
398
|
+
type ToolCall struct {
|
|
399
|
+
Function FunctionCall `json:"function"`
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// FunctionCall contains the action details
|
|
403
|
+
type FunctionCall struct {
|
|
404
|
+
Name string `json:"name"`
|
|
405
|
+
Arguments string `json:"arguments"`
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// FaraAction is the parsed action from Fara's response
|
|
409
|
+
type FaraAction struct {
|
|
410
|
+
Action string `json:"action"`
|
|
411
|
+
Coordinate [2]int `json:"coordinate"`
|
|
412
|
+
Text string `json:"text,omitempty"`
|
|
413
|
+
}
|
|
414
|
+
```
|
|
415
|
+
|
|
416
|
+
- [ ] **Step 2: Verify compilation**
|
|
417
|
+
|
|
418
|
+
Run: `go build ./internal/vision/`
|
|
419
|
+
Expected: no errors
|
|
420
|
+
|
|
421
|
+
- [ ] **Step 3: Commit**
|
|
422
|
+
|
|
423
|
+
```bash
|
|
424
|
+
git add internal/vision/types.go
|
|
425
|
+
git commit -m "feat: add Fara-7B vision model types"
|
|
426
|
+
```
|
|
427
|
+
|
|
428
|
+
### Task 2.2: Vision Client
|
|
429
|
+
|
|
430
|
+
**Files:**
|
|
431
|
+
- Create: `internal/vision/client.go`
|
|
432
|
+
- Test: `internal/vision/client_test.go`
|
|
433
|
+
|
|
434
|
+
- [ ] **Step 1: Write test for prompt construction and coordinate parsing**
|
|
435
|
+
|
|
436
|
+
```go
|
|
437
|
+
package vision
|
|
438
|
+
|
|
439
|
+
import (
|
|
440
|
+
"testing"
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
func TestNewClient(t *testing.T) {
|
|
444
|
+
c := NewClient("http://localhost:11434", "fara-7b")
|
|
445
|
+
if c.baseURL != "http://localhost:11434" {
|
|
446
|
+
t.Errorf("unexpected base URL: %s", c.baseURL)
|
|
447
|
+
}
|
|
448
|
+
if c.model != "fara-7b" {
|
|
449
|
+
t.Errorf("unexpected model: %s", c.model)
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
func TestParseCoordinates(t *testing.T) {
|
|
454
|
+
tests := []struct {
|
|
455
|
+
name string
|
|
456
|
+
input string
|
|
457
|
+
wantX int
|
|
458
|
+
wantY int
|
|
459
|
+
wantErr bool
|
|
460
|
+
}{
|
|
461
|
+
{
|
|
462
|
+
name: "tool call format",
|
|
463
|
+
input: `{"action": "left_click", "coordinate": [850, 120]}`,
|
|
464
|
+
wantX: 850, wantY: 120,
|
|
465
|
+
},
|
|
466
|
+
{
|
|
467
|
+
name: "coordinate in text",
|
|
468
|
+
input: `I will click at coordinate [423, 256]`,
|
|
469
|
+
wantX: 423, wantY: 256,
|
|
470
|
+
},
|
|
471
|
+
{
|
|
472
|
+
name: "no coordinate",
|
|
473
|
+
input: "I don't see a Connect button",
|
|
474
|
+
wantErr: true,
|
|
475
|
+
},
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
for _, tt := range tests {
|
|
479
|
+
t.Run(tt.name, func(t *testing.T) {
|
|
480
|
+
x, y, err := parseCoordinates(tt.input)
|
|
481
|
+
if tt.wantErr {
|
|
482
|
+
if err == nil {
|
|
483
|
+
t.Error("expected error, got nil")
|
|
484
|
+
}
|
|
485
|
+
return
|
|
486
|
+
}
|
|
487
|
+
if err != nil {
|
|
488
|
+
t.Fatalf("unexpected error: %v", err)
|
|
489
|
+
}
|
|
490
|
+
if x != tt.wantX || y != tt.wantY {
|
|
491
|
+
t.Errorf("got (%d, %d), want (%d, %d)", x, y, tt.wantX, tt.wantY)
|
|
492
|
+
}
|
|
493
|
+
})
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
```
|
|
497
|
+
|
|
498
|
+
- [ ] **Step 2: Run test to verify it fails**
|
|
499
|
+
|
|
500
|
+
Run: `go test -v -run TestParseCoordinates ./internal/vision/`
|
|
501
|
+
Expected: FAIL
|
|
502
|
+
|
|
503
|
+
- [ ] **Step 3: Implement vision client**
|
|
504
|
+
|
|
505
|
+
```go
|
|
506
|
+
package vision
|
|
507
|
+
|
|
508
|
+
import (
|
|
509
|
+
"bytes"
|
|
510
|
+
"encoding/json"
|
|
511
|
+
"fmt"
|
|
512
|
+
"io"
|
|
513
|
+
"net/http"
|
|
514
|
+
"regexp"
|
|
515
|
+
"strconv"
|
|
516
|
+
"time"
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
// Client communicates with Fara-7B via OpenAI-compatible API
|
|
520
|
+
type Client struct {
|
|
521
|
+
baseURL string
|
|
522
|
+
model string
|
|
523
|
+
client *http.Client
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
// NewClient creates a vision client for Fara-7B
|
|
527
|
+
func NewClient(baseURL string, model string) *Client {
|
|
528
|
+
return &Client{
|
|
529
|
+
baseURL: baseURL,
|
|
530
|
+
model: model,
|
|
531
|
+
client: &http.Client{
|
|
532
|
+
Timeout: 120 * time.Second, // Vision inference can be slow
|
|
533
|
+
},
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
// IdentifyElement sends a screenshot to Fara and gets click coordinates
|
|
538
|
+
func (c *Client) IdentifyElement(screenshotBase64 string, task string) (int, int, error) {
|
|
539
|
+
req := ChatRequest{
|
|
540
|
+
Model: c.model,
|
|
541
|
+
Messages: []Message{
|
|
542
|
+
{
|
|
543
|
+
Role: "user",
|
|
544
|
+
Content: []Content{
|
|
545
|
+
{Type: "text", Text: task},
|
|
546
|
+
{
|
|
547
|
+
Type: "image_url",
|
|
548
|
+
ImageURL: &ImageURL{
|
|
549
|
+
URL: "data:image/png;base64," + screenshotBase64,
|
|
550
|
+
},
|
|
551
|
+
},
|
|
552
|
+
},
|
|
553
|
+
},
|
|
554
|
+
},
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
body, err := json.Marshal(req)
|
|
558
|
+
if err != nil {
|
|
559
|
+
return 0, 0, fmt.Errorf("failed to marshal vision request: %w", err)
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
resp, err := c.client.Post(
|
|
563
|
+
c.baseURL+"/v1/chat/completions",
|
|
564
|
+
"application/json",
|
|
565
|
+
bytes.NewReader(body),
|
|
566
|
+
)
|
|
567
|
+
if err != nil {
|
|
568
|
+
return 0, 0, fmt.Errorf("failed to call vision model: %w", err)
|
|
569
|
+
}
|
|
570
|
+
defer resp.Body.Close()
|
|
571
|
+
|
|
572
|
+
respBody, err := io.ReadAll(resp.Body)
|
|
573
|
+
if err != nil {
|
|
574
|
+
return 0, 0, fmt.Errorf("failed to read vision response: %w", err)
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
if resp.StatusCode != http.StatusOK {
|
|
578
|
+
return 0, 0, fmt.Errorf("vision model returned %d: %s", resp.StatusCode, string(respBody))
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
var chatResp ChatResponse
|
|
582
|
+
if err := json.Unmarshal(respBody, &chatResp); err != nil {
|
|
583
|
+
return 0, 0, fmt.Errorf("failed to parse vision response: %w", err)
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
if len(chatResp.Choices) == 0 {
|
|
587
|
+
return 0, 0, fmt.Errorf("vision model returned no choices")
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
content := chatResp.Choices[0].Message.Content
|
|
591
|
+
return parseCoordinates(content)
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
// VerifyState sends a screenshot and asks a yes/no question about page state
|
|
595
|
+
func (c *Client) VerifyState(screenshotBase64 string, question string) (string, error) {
|
|
596
|
+
req := ChatRequest{
|
|
597
|
+
Model: c.model,
|
|
598
|
+
Messages: []Message{
|
|
599
|
+
{
|
|
600
|
+
Role: "user",
|
|
601
|
+
Content: []Content{
|
|
602
|
+
{Type: "text", Text: question + " Answer concisely."},
|
|
603
|
+
{
|
|
604
|
+
Type: "image_url",
|
|
605
|
+
ImageURL: &ImageURL{
|
|
606
|
+
URL: "data:image/png;base64," + screenshotBase64,
|
|
607
|
+
},
|
|
608
|
+
},
|
|
609
|
+
},
|
|
610
|
+
},
|
|
611
|
+
},
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
body, err := json.Marshal(req)
|
|
615
|
+
if err != nil {
|
|
616
|
+
return "", fmt.Errorf("failed to marshal: %w", err)
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
resp, err := c.client.Post(
|
|
620
|
+
c.baseURL+"/v1/chat/completions",
|
|
621
|
+
"application/json",
|
|
622
|
+
bytes.NewReader(body),
|
|
623
|
+
)
|
|
624
|
+
if err != nil {
|
|
625
|
+
return "", fmt.Errorf("failed to call vision model: %w", err)
|
|
626
|
+
}
|
|
627
|
+
defer resp.Body.Close()
|
|
628
|
+
|
|
629
|
+
respBody, err := io.ReadAll(resp.Body)
|
|
630
|
+
if err != nil {
|
|
631
|
+
return "", err
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
var chatResp ChatResponse
|
|
635
|
+
if err := json.Unmarshal(respBody, &chatResp); err != nil {
|
|
636
|
+
return "", err
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
if len(chatResp.Choices) == 0 {
|
|
640
|
+
return "", fmt.Errorf("no choices in response")
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
return chatResp.Choices[0].Message.Content, nil
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
// Health checks if the vision model is available
|
|
647
|
+
func (c *Client) Health() error {
|
|
648
|
+
resp, err := c.client.Get(c.baseURL + "/v1/models")
|
|
649
|
+
if err != nil {
|
|
650
|
+
return fmt.Errorf("vision model not reachable: %w", err)
|
|
651
|
+
}
|
|
652
|
+
defer resp.Body.Close()
|
|
653
|
+
|
|
654
|
+
if resp.StatusCode != http.StatusOK {
|
|
655
|
+
return fmt.Errorf("vision model returned status %d", resp.StatusCode)
|
|
656
|
+
}
|
|
657
|
+
return nil
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
var coordRegex = regexp.MustCompile(`\[(\d+),\s*(\d+)\]`)
|
|
661
|
+
|
|
662
|
+
func parseCoordinates(text string) (int, int, error) {
|
|
663
|
+
matches := coordRegex.FindStringSubmatch(text)
|
|
664
|
+
if len(matches) < 3 {
|
|
665
|
+
return 0, 0, fmt.Errorf("no coordinates found in response: %s", text)
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
x, err := strconv.Atoi(matches[1])
|
|
669
|
+
if err != nil {
|
|
670
|
+
return 0, 0, fmt.Errorf("invalid x coordinate: %w", err)
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
y, err := strconv.Atoi(matches[2])
|
|
674
|
+
if err != nil {
|
|
675
|
+
return 0, 0, fmt.Errorf("invalid y coordinate: %w", err)
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
return x, y, nil
|
|
679
|
+
}
|
|
680
|
+
```
|
|
681
|
+
|
|
682
|
+
- [ ] **Step 4: Run tests**
|
|
683
|
+
|
|
684
|
+
Run: `go test -v ./internal/vision/`
|
|
685
|
+
Expected: PASS
|
|
686
|
+
|
|
687
|
+
- [ ] **Step 5: Commit**
|
|
688
|
+
|
|
689
|
+
```bash
|
|
690
|
+
git add internal/vision/
|
|
691
|
+
git commit -m "feat: add Fara-7B vision client with coordinate parsing"
|
|
692
|
+
```
|
|
693
|
+
|
|
694
|
+
---
|
|
695
|
+
|
|
696
|
+
## Chunk 3: Browser Actions (Combining Playwright + Vision)
|
|
697
|
+
|
|
698
|
+
### Task 3.1: Browser Interface
|
|
699
|
+
|
|
700
|
+
**Files:**
|
|
701
|
+
- Create: `internal/browser/browser.go`
|
|
702
|
+
- Create: `internal/browser/actions.go`
|
|
703
|
+
- Test: `internal/browser/actions_test.go`
|
|
704
|
+
|
|
705
|
+
- [ ] **Step 1: Define Browser struct that combines Playwright + Vision clients**
|
|
706
|
+
|
|
707
|
+
```go
|
|
708
|
+
package browser
|
|
709
|
+
|
|
710
|
+
import (
|
|
711
|
+
"fmt"
|
|
712
|
+
"time"
|
|
713
|
+
|
|
714
|
+
"github.com/thaddeus-git/linkedin-cli/internal/playwright"
|
|
715
|
+
"github.com/thaddeus-git/linkedin-cli/internal/vision"
|
|
716
|
+
)
|
|
717
|
+
|
|
718
|
+
// Browser combines Playwright (hands) and Vision (eyes) for web automation
|
|
719
|
+
type Browser struct {
|
|
720
|
+
pw *playwright.Client
|
|
721
|
+
vis *vision.Client
|
|
722
|
+
dryRun bool
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
// NewBrowser creates a browser with both automation and vision capabilities
|
|
726
|
+
func NewBrowser(playwrightURL, visionURL, visionModel string, dryRun bool) *Browser {
|
|
727
|
+
return &Browser{
|
|
728
|
+
pw: playwright.NewClient(playwrightURL),
|
|
729
|
+
vis: vision.NewClient(visionURL, visionModel),
|
|
730
|
+
dryRun: dryRun,
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
// Health checks both Playwright MCP and Vision model are available
|
|
735
|
+
func (b *Browser) Health() error {
|
|
736
|
+
// TODO: Add Playwright health check (initialize/list tools)
|
|
737
|
+
if err := b.vis.Health(); err != nil {
|
|
738
|
+
return fmt.Errorf("vision model: %w", err)
|
|
739
|
+
}
|
|
740
|
+
return nil
|
|
741
|
+
}
|
|
742
|
+
```
|
|
743
|
+
|
|
744
|
+
- [ ] **Step 2: Implement ConnectToProfile action — the core flow**
|
|
745
|
+
|
|
746
|
+
```go
|
|
747
|
+
package browser
|
|
748
|
+
|
|
749
|
+
// ConnectResult describes the outcome of a connection attempt
|
|
750
|
+
type ConnectResult struct {
|
|
751
|
+
Success bool
|
|
752
|
+
Status string // "sent", "pending", "already_connected", "not_found", "error"
|
|
753
|
+
Message string
|
|
754
|
+
Screenshots []string // base64 screenshots for debugging
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
// ConnectToProfile navigates to a profile and clicks Connect
|
|
758
|
+
func (b *Browser) ConnectToProfile(profileURL string, note string) (*ConnectResult, error) {
|
|
759
|
+
result := &ConnectResult{}
|
|
760
|
+
|
|
761
|
+
if b.dryRun {
|
|
762
|
+
result.Success = true
|
|
763
|
+
result.Status = "dry_run"
|
|
764
|
+
result.Message = fmt.Sprintf("Would connect to %s", profileURL)
|
|
765
|
+
return result, nil
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
// Step 1: Navigate to profile
|
|
769
|
+
if err := b.pw.Navigate(profileURL); err != nil {
|
|
770
|
+
return nil, fmt.Errorf("failed to navigate to profile: %w", err)
|
|
771
|
+
}
|
|
772
|
+
time.Sleep(3 * time.Second) // Wait for page load
|
|
773
|
+
|
|
774
|
+
// Step 2: Take screenshot
|
|
775
|
+
screenshot, err := b.pw.Screenshot()
|
|
776
|
+
if err != nil {
|
|
777
|
+
return nil, fmt.Errorf("failed to take screenshot: %w", err)
|
|
778
|
+
}
|
|
779
|
+
result.Screenshots = append(result.Screenshots, screenshot)
|
|
780
|
+
|
|
781
|
+
// Step 3: Ask Fara to find the Connect button
|
|
782
|
+
x, y, err := b.vis.IdentifyElement(screenshot,
|
|
783
|
+
"Click the Connect button for this person's LinkedIn profile. "+
|
|
784
|
+
"The Connect button is in the main profile header area near the profile photo, "+
|
|
785
|
+
"NOT in the 'People also viewed' or 'People you may know' sidebar sections.")
|
|
786
|
+
if err != nil {
|
|
787
|
+
// Connect button might be hidden under "More" menu
|
|
788
|
+
return b.tryMoreMenuConnect(screenshot, note, result)
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
// Step 4: Click at the identified coordinates
|
|
792
|
+
if err := b.pw.Click(x, y); err != nil {
|
|
793
|
+
return nil, fmt.Errorf("failed to click Connect at (%d, %d): %w", x, y, err)
|
|
794
|
+
}
|
|
795
|
+
time.Sleep(2 * time.Second)
|
|
796
|
+
|
|
797
|
+
// Step 5: Handle optional note and Send confirmation
|
|
798
|
+
return b.handleConnectDialog(note, result)
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
// tryMoreMenuConnect handles the case where Connect is hidden under "... More"
|
|
802
|
+
func (b *Browser) tryMoreMenuConnect(screenshot string, note string, result *ConnectResult) (*ConnectResult, error) {
|
|
803
|
+
// Ask Fara to find the "More" button
|
|
804
|
+
x, y, err := b.vis.IdentifyElement(screenshot,
|
|
805
|
+
"Click the 'More' or '...' button in the profile header area. "+
|
|
806
|
+
"This is typically a three-dot menu button near the profile actions.")
|
|
807
|
+
if err != nil {
|
|
808
|
+
result.Status = "not_found"
|
|
809
|
+
result.Message = "Connect button not found on this profile"
|
|
810
|
+
return result, nil
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
// Click More menu
|
|
814
|
+
if err := b.pw.Click(x, y); err != nil {
|
|
815
|
+
return nil, fmt.Errorf("failed to click More menu: %w", err)
|
|
816
|
+
}
|
|
817
|
+
time.Sleep(1500 * time.Millisecond)
|
|
818
|
+
|
|
819
|
+
// Take new screenshot after menu opens
|
|
820
|
+
screenshot2, err := b.pw.Screenshot()
|
|
821
|
+
if err != nil {
|
|
822
|
+
return nil, fmt.Errorf("failed to screenshot after More menu: %w", err)
|
|
823
|
+
}
|
|
824
|
+
result.Screenshots = append(result.Screenshots, screenshot2)
|
|
825
|
+
|
|
826
|
+
// Now find Connect in the opened menu
|
|
827
|
+
x, y, err = b.vis.IdentifyElement(screenshot2,
|
|
828
|
+
"Click the Connect option in the dropdown menu that is currently open.")
|
|
829
|
+
if err != nil {
|
|
830
|
+
result.Status = "not_found"
|
|
831
|
+
result.Message = "Connect not found in More menu either"
|
|
832
|
+
return result, nil
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
if err := b.pw.Click(x, y); err != nil {
|
|
836
|
+
return nil, fmt.Errorf("failed to click Connect in menu: %w", err)
|
|
837
|
+
}
|
|
838
|
+
time.Sleep(2 * time.Second)
|
|
839
|
+
|
|
840
|
+
return b.handleConnectDialog(note, result)
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
// handleConnectDialog handles the note input and Send confirmation dialog
|
|
844
|
+
func (b *Browser) handleConnectDialog(note string, result *ConnectResult) (*ConnectResult, error) {
|
|
845
|
+
// Take screenshot to see if a dialog appeared
|
|
846
|
+
screenshot, err := b.pw.Screenshot()
|
|
847
|
+
if err != nil {
|
|
848
|
+
return nil, fmt.Errorf("failed to screenshot dialog: %w", err)
|
|
849
|
+
}
|
|
850
|
+
result.Screenshots = append(result.Screenshots, screenshot)
|
|
851
|
+
|
|
852
|
+
// Check if there's a note input and we want to add a note
|
|
853
|
+
if note != "" {
|
|
854
|
+
x, y, err := b.vis.IdentifyElement(screenshot,
|
|
855
|
+
"Click the 'Add a note' button if visible, or find the text input field for adding a personal note.")
|
|
856
|
+
if err == nil {
|
|
857
|
+
if err := b.pw.Click(x, y); err == nil {
|
|
858
|
+
time.Sleep(500 * time.Millisecond)
|
|
859
|
+
b.pw.Type(note)
|
|
860
|
+
time.Sleep(500 * time.Millisecond)
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
// Take screenshot and find Send button
|
|
866
|
+
screenshot, err = b.pw.Screenshot()
|
|
867
|
+
if err != nil {
|
|
868
|
+
return nil, err
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
x, y, err := b.vis.IdentifyElement(screenshot,
|
|
872
|
+
"Click the 'Send' button to send the connection request. "+
|
|
873
|
+
"This is the primary action button in the dialog/modal.")
|
|
874
|
+
if err == nil {
|
|
875
|
+
if err := b.pw.Click(x, y); err != nil {
|
|
876
|
+
return nil, fmt.Errorf("failed to click Send: %w", err)
|
|
877
|
+
}
|
|
878
|
+
time.Sleep(2 * time.Second)
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
// Step 6: Verify the connection was sent
|
|
882
|
+
return b.verifyConnectSent(result)
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
// verifyConnectSent checks if the connection request was actually sent
|
|
886
|
+
func (b *Browser) verifyConnectSent(result *ConnectResult) (*ConnectResult, error) {
|
|
887
|
+
screenshot, err := b.pw.Screenshot()
|
|
888
|
+
if err != nil {
|
|
889
|
+
return nil, err
|
|
890
|
+
}
|
|
891
|
+
result.Screenshots = append(result.Screenshots, screenshot)
|
|
892
|
+
|
|
893
|
+
answer, err := b.vis.VerifyState(screenshot,
|
|
894
|
+
"Look at this LinkedIn profile page. Is there a 'Pending' indicator showing that a connection request was sent? "+
|
|
895
|
+
"Or is the Connect button still visible? "+
|
|
896
|
+
"Answer with one of: PENDING, CONNECT_VISIBLE, ALREADY_CONNECTED, UNCLEAR")
|
|
897
|
+
if err != nil {
|
|
898
|
+
result.Status = "error"
|
|
899
|
+
result.Message = fmt.Sprintf("Failed to verify: %v", err)
|
|
900
|
+
return result, nil
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
switch {
|
|
904
|
+
case contains(answer, "PENDING"):
|
|
905
|
+
result.Success = true
|
|
906
|
+
result.Status = "sent"
|
|
907
|
+
result.Message = "Connection request sent successfully"
|
|
908
|
+
case contains(answer, "ALREADY_CONNECTED"):
|
|
909
|
+
result.Status = "already_connected"
|
|
910
|
+
result.Message = "Already connected with this person"
|
|
911
|
+
case contains(answer, "CONNECT_VISIBLE"):
|
|
912
|
+
result.Status = "error"
|
|
913
|
+
result.Message = "Connect button still visible — request may not have been sent"
|
|
914
|
+
default:
|
|
915
|
+
result.Status = "unclear"
|
|
916
|
+
result.Message = fmt.Sprintf("Verification unclear: %s", answer)
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
return result, nil
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
func contains(s, substr string) bool {
|
|
923
|
+
return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsCI(s, substr))
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
func containsCI(s, substr string) bool {
|
|
927
|
+
// Case-insensitive contains
|
|
928
|
+
import "strings"
|
|
929
|
+
return strings.Contains(strings.ToUpper(s), strings.ToUpper(substr))
|
|
930
|
+
}
|
|
931
|
+
```
|
|
932
|
+
|
|
933
|
+
NOTE: The `contains`/`containsCI` functions above have a syntax issue — the import inside a function body is invalid Go. Fix during implementation:
|
|
934
|
+
|
|
935
|
+
```go
|
|
936
|
+
import "strings"
|
|
937
|
+
|
|
938
|
+
func containsCI(s, substr string) bool {
|
|
939
|
+
return strings.Contains(strings.ToUpper(s), strings.ToUpper(substr))
|
|
940
|
+
}
|
|
941
|
+
```
|
|
942
|
+
|
|
943
|
+
- [ ] **Step 3: Verify compilation**
|
|
944
|
+
|
|
945
|
+
Run: `go build ./internal/browser/`
|
|
946
|
+
Expected: no errors
|
|
947
|
+
|
|
948
|
+
- [ ] **Step 4: Commit**
|
|
949
|
+
|
|
950
|
+
```bash
|
|
951
|
+
git add internal/browser/
|
|
952
|
+
git commit -m "feat: add browser actions combining Playwright + Fara vision"
|
|
953
|
+
```
|
|
954
|
+
|
|
955
|
+
---
|
|
956
|
+
|
|
957
|
+
## Chunk 4: CLI Integration
|
|
958
|
+
|
|
959
|
+
### Task 4.1: Update Root Command with New Endpoints
|
|
960
|
+
|
|
961
|
+
**Files:**
|
|
962
|
+
- Modify: `internal/cmd/root.go`
|
|
963
|
+
|
|
964
|
+
- [ ] **Step 1: Add flags for Playwright and Vision endpoints**
|
|
965
|
+
|
|
966
|
+
Add to root.go:
|
|
967
|
+
- `--playwright-url` flag (default: `http://localhost:3000`, env: `PLAYWRIGHT_URL`)
|
|
968
|
+
- `--vision-url` flag (default: `http://localhost:11434`, env: `VISION_URL`)
|
|
969
|
+
- `--vision-model` flag (default: `fara-7b`, env: `VISION_MODEL`)
|
|
970
|
+
- Helper function `getBrowser()` that creates a `browser.Browser` instance
|
|
971
|
+
|
|
972
|
+
- [ ] **Step 2: Verify build**
|
|
973
|
+
|
|
974
|
+
Run: `go build ./cmd/linkedin`
|
|
975
|
+
|
|
976
|
+
- [ ] **Step 3: Commit**
|
|
977
|
+
|
|
978
|
+
### Task 4.2: Update Connect Command
|
|
979
|
+
|
|
980
|
+
**Files:**
|
|
981
|
+
- Modify: `internal/cmd/connect.go` (or `actions.go` depending on current structure)
|
|
982
|
+
|
|
983
|
+
- [ ] **Step 1: Replace PinchTab navigator calls with browser.ConnectToProfile**
|
|
984
|
+
|
|
985
|
+
The connect command should:
|
|
986
|
+
1. Create browser via `getBrowser()`
|
|
987
|
+
2. Call `browser.ConnectToProfile(url, note)`
|
|
988
|
+
3. Report result based on ConnectResult.Status
|
|
989
|
+
4. Save screenshot on failure for debugging
|
|
990
|
+
|
|
991
|
+
- [ ] **Step 2: Test with dry-run**
|
|
992
|
+
|
|
993
|
+
Run: `go run ./cmd/linkedin connect --profile thaddeus --url linkedin.com/in/test --dry-run`
|
|
994
|
+
Expected: "[dry-run] Would connect to..."
|
|
995
|
+
|
|
996
|
+
- [ ] **Step 3: Commit**
|
|
997
|
+
|
|
998
|
+
### Task 4.3: Update Auth Command
|
|
999
|
+
|
|
1000
|
+
**Files:**
|
|
1001
|
+
- Modify: `internal/cmd/auth.go`
|
|
1002
|
+
|
|
1003
|
+
- [ ] **Step 1: Replace PinchTab auth flow with Playwright-based auth**
|
|
1004
|
+
|
|
1005
|
+
Auth should:
|
|
1006
|
+
1. Start Playwright MCP (or verify it's running)
|
|
1007
|
+
2. Navigate to linkedin.com/login
|
|
1008
|
+
3. Wait for user to log in (press Enter)
|
|
1009
|
+
4. Use vision to verify logged-in state
|
|
1010
|
+
5. Save profile config
|
|
1011
|
+
|
|
1012
|
+
- [ ] **Step 2: Commit**
|
|
1013
|
+
|
|
1014
|
+
---
|
|
1015
|
+
|
|
1016
|
+
## Chunk 5: Setup & Documentation
|
|
1017
|
+
|
|
1018
|
+
### Task 5.1: Installation Guide
|
|
1019
|
+
|
|
1020
|
+
**Files:**
|
|
1021
|
+
- Modify: `README.md`
|
|
1022
|
+
|
|
1023
|
+
- [ ] **Step 1: Update prerequisites**
|
|
1024
|
+
|
|
1025
|
+
```markdown
|
|
1026
|
+
## Prerequisites
|
|
1027
|
+
|
|
1028
|
+
1. **Go 1.21+** — [Install Go](https://go.dev/doc/install)
|
|
1029
|
+
2. **Node.js 18+** — [Install Node.js](https://nodejs.org/)
|
|
1030
|
+
3. **Ollama** — Local model server
|
|
1031
|
+
```bash
|
|
1032
|
+
brew install ollama
|
|
1033
|
+
ollama serve # Keep running in a terminal
|
|
1034
|
+
ollama pull bartowski/microsoft_Fara-7B-GGUF:Q4_K_M
|
|
1035
|
+
```
|
|
1036
|
+
4. **Start Playwright MCP** — Browser automation server
|
|
1037
|
+
```bash
|
|
1038
|
+
npx @playwright/mcp@latest --port 3000 --caps vision \
|
|
1039
|
+
--user-data-dir ~/.linkedin-cli/browser-profile
|
|
1040
|
+
# Keep running in a terminal
|
|
1041
|
+
```
|
|
1042
|
+
```
|
|
1043
|
+
|
|
1044
|
+
- [ ] **Step 2: Commit**
|
|
1045
|
+
|
|
1046
|
+
### Task 5.2: Startup Script
|
|
1047
|
+
|
|
1048
|
+
**Files:**
|
|
1049
|
+
- Create: `scripts/start.sh`
|
|
1050
|
+
|
|
1051
|
+
- [ ] **Step 1: Create convenience startup script**
|
|
1052
|
+
|
|
1053
|
+
```bash
|
|
1054
|
+
#!/bin/bash
|
|
1055
|
+
# Start LinkedIn CLI services
|
|
1056
|
+
|
|
1057
|
+
echo "Starting Ollama..."
|
|
1058
|
+
ollama serve &
|
|
1059
|
+
OLLAMA_PID=$!
|
|
1060
|
+
|
|
1061
|
+
echo "Starting Playwright MCP..."
|
|
1062
|
+
npx @playwright/mcp@latest --port 3000 --caps vision \
|
|
1063
|
+
--user-data-dir ~/.linkedin-cli/browser-profile &
|
|
1064
|
+
PW_PID=$!
|
|
1065
|
+
|
|
1066
|
+
echo ""
|
|
1067
|
+
echo "Services running:"
|
|
1068
|
+
echo " Ollama: http://localhost:11434 (PID: $OLLAMA_PID)"
|
|
1069
|
+
echo " Playwright: http://localhost:3000 (PID: $PW_PID)"
|
|
1070
|
+
echo ""
|
|
1071
|
+
echo "Press Ctrl+C to stop all services"
|
|
1072
|
+
|
|
1073
|
+
trap "kill $OLLAMA_PID $PW_PID 2>/dev/null" EXIT
|
|
1074
|
+
wait
|
|
1075
|
+
```
|
|
1076
|
+
|
|
1077
|
+
- [ ] **Step 2: Commit**
|
|
1078
|
+
|
|
1079
|
+
---
|
|
1080
|
+
|
|
1081
|
+
## Chunk 6: Cleanup
|
|
1082
|
+
|
|
1083
|
+
### Task 6.1: Remove PinchTab Dependency
|
|
1084
|
+
|
|
1085
|
+
- [ ] **Step 1: Remove PinchTab imports from all files**
|
|
1086
|
+
- [ ] **Step 2: Delete `internal/pinchtab/` directory**
|
|
1087
|
+
- [ ] **Step 3: Delete PinchTab-specific navigator code**
|
|
1088
|
+
- [ ] **Step 4: Update go.mod (remove unused deps)**
|
|
1089
|
+
- [ ] **Step 5: Run full test suite**
|
|
1090
|
+
|
|
1091
|
+
Run: `go test ./... && go build ./cmd/linkedin`
|
|
1092
|
+
Expected: all pass
|
|
1093
|
+
|
|
1094
|
+
- [ ] **Step 6: Commit**
|
|
1095
|
+
|
|
1096
|
+
```bash
|
|
1097
|
+
git commit -m "refactor: remove PinchTab dependency, complete migration to Playwright+Fara"
|
|
1098
|
+
```
|
|
1099
|
+
|
|
1100
|
+
---
|
|
1101
|
+
|
|
1102
|
+
## Summary
|
|
1103
|
+
|
|
1104
|
+
| Component | Technology | Purpose |
|
|
1105
|
+
|-----------|-----------|---------|
|
|
1106
|
+
| Browser control | Playwright MCP (`--port 3000 --caps vision`) | Navigate, screenshot, click(x,y) |
|
|
1107
|
+
| Visual intelligence | Fara-7B via Ollama | Identify correct UI elements from screenshots |
|
|
1108
|
+
| CLI orchestration | Go + Cobra | Profiles, rate limits, config, command structure |
|
|
1109
|
+
| Session persistence | Playwright `--user-data-dir` | LinkedIn login survives restarts |
|
|
1110
|
+
|
|
1111
|
+
**Total estimated effort:** 1-2 weeks
|
|
1112
|
+
**Risk:** Fara-7B accuracy on LinkedIn (~85-90% expected). Mitigated by retry with refined prompts.
|