lobster-cli 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +148 -268
  3. package/dist/agent/core.js +63 -0
  4. package/dist/agent/core.js.map +1 -1
  5. package/dist/agent/index.js +63 -0
  6. package/dist/agent/index.js.map +1 -1
  7. package/dist/browser/chrome-attach.js +102 -0
  8. package/dist/browser/chrome-attach.js.map +1 -0
  9. package/dist/browser/dom/compact-snapshot.js +162 -0
  10. package/dist/browser/dom/compact-snapshot.js.map +1 -0
  11. package/dist/browser/dom/index.js +160 -0
  12. package/dist/browser/dom/index.js.map +1 -1
  13. package/dist/browser/index.js +1201 -70
  14. package/dist/browser/index.js.map +1 -1
  15. package/dist/browser/manager.js +443 -11
  16. package/dist/browser/manager.js.map +1 -1
  17. package/dist/browser/page-adapter.js +370 -1
  18. package/dist/browser/page-adapter.js.map +1 -1
  19. package/dist/browser/profiles.js +238 -0
  20. package/dist/browser/profiles.js.map +1 -0
  21. package/dist/browser/semantic-find.js +152 -0
  22. package/dist/browser/semantic-find.js.map +1 -0
  23. package/dist/browser/stealth.js +187 -0
  24. package/dist/browser/stealth.js.map +1 -0
  25. package/dist/config/index.js +8 -1
  26. package/dist/config/index.js.map +1 -1
  27. package/dist/config/schema.js +8 -1
  28. package/dist/config/schema.js.map +1 -1
  29. package/dist/doc/index.js +31715 -0
  30. package/dist/doc/index.js.map +1 -0
  31. package/dist/domain-guard.js +103 -0
  32. package/dist/domain-guard.js.map +1 -0
  33. package/dist/index.js +32914 -262
  34. package/dist/index.js.map +1 -1
  35. package/dist/lib.js +1488 -241
  36. package/dist/lib.js.map +1 -1
  37. package/dist/llm/client.js +63 -0
  38. package/dist/llm/client.js.map +1 -1
  39. package/dist/llm/index.js +63 -0
  40. package/dist/llm/index.js.map +1 -1
  41. package/dist/llm/openai-client.js +63 -0
  42. package/dist/llm/openai-client.js.map +1 -1
  43. package/dist/router/index.js +925 -61
  44. package/dist/router/index.js.map +1 -1
  45. package/package.json +16 -2
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 iexcalibur
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md CHANGED
@@ -10,6 +10,7 @@
10
10
  </p>
11
11
 
12
12
  <p align="center">
13
+ <a href="https://www.npmjs.com/package/lobster-cli"><img src="https://img.shields.io/npm/v/lobster-cli?style=flat-square&color=c9a84c" alt="npm" /></a>
13
14
  <img src="https://img.shields.io/badge/node-%3E%3D20-black?style=flat-square" />
14
15
  <img src="https://img.shields.io/badge/license-MIT-black?style=flat-square" />
15
16
  <img src="https://img.shields.io/badge/AI-optional-c9a84c?style=flat-square" />
@@ -17,6 +18,25 @@
17
18
 
18
19
  ---
19
20
 
21
+ ## Why LobsterCLI
22
+
23
+ Every web automation tool today does one thing. Puppeteer gives you browser control — but no intelligence. LangChain gives you AI chains — but no browser. Scrapy gives you crawling — but no JavaScript. You end up wiring 3-4 tools together, writing glue code, and deciding which tool to use for each task.
24
+
25
+ LobsterCLI is a single engine that does all of it. It has a **Smart Router** that automatically picks the best approach for your task — from a simple HTTP fetch to a full AI agent — so you never have to choose.
26
+
27
+ **What makes it unique:**
28
+
29
+ - **One tool, not four.** Fetch, scrape, explore, and AI-navigate — all from `lobster`.
30
+ - **AI is optional.** 80% of features work without any API key. The free tier covers most use cases.
31
+ - **Smart Brain.** When you do use AI, the Brain classifies your intent and gathers only what's needed — no wasted tokens on screenshots when text is enough.
32
+ - **Three interfaces, one engine.** CLI for developers, Chrome extension for everyone, npm library for builders. Same core, same results.
33
+ - **Persistent sessions.** Login once with `--profile`, your cookies and auth survive forever. Attach to your running Chrome with `--attach` — use your real logged-in sessions.
34
+ - **Stealth built-in.** Anti-bot detection out of the box. No separate puppeteer-stealth plugin, no extra config.
35
+ - **Domain restrictions.** Build vertical products on top — a finance tool that only works on Bloomberg, a legal tool that only works on case databases. One config line.
36
+ - **Actually works on modern sites.** Full React/Vue event simulation, shadow DOM, iframe extraction, dynamic content detection. Not just static HTML scraping.
37
+
38
+ ---
39
+
20
40
  ## What is LobsterCLI
21
41
 
22
42
  LobsterCLI is a web automation engine that works in three ways:
@@ -24,366 +44,226 @@ LobsterCLI is a web automation engine that works in three ways:
24
44
  | Product | What it is | Install |
25
45
  |---------|-----------|---------|
26
46
  | **CLI** | Terminal tool — fetch, scrape, explore, automate | `npm install -g lobster-cli` |
27
- | **Chrome Extension** | Side panel chat UI — analyze any page you're browsing | Load `extension/` folder in Chrome |
28
- | **Library** | Import into your own Node.js project | `import { ... } from 'lobster-cli'` |
47
+ | **Chrome Extension** | Side panel chat UI — analyze any page you're browsing | Load `extension/` in Chrome |
48
+ | **Library** | Import into your own Node.js project | `npm install lobster-cli` |
29
49
 
30
- All three share the same core engine same Brain, same DOM extraction, same LLM client, same agent loop.
50
+ All three share the same core engine. **80% of features work without any AI key.**
31
51
 
32
52
  ---
33
53
 
34
- ## When AI is needed vs when it's free
35
-
36
- This is the most important thing to understand:
37
-
38
- ### CLI — What's free, what needs AI
39
-
40
- | Command | AI needed? | What it does |
41
- |---------|-----------|-------------|
42
- | `lobster fetch <url>` | **No** | Fetch page, extract as markdown/text/snapshot/HTML. Uses in-house parser, no Chrome needed. |
43
- | `lobster fetch <url> -e chrome` | **No** | Same but with full Chrome for JS-heavy pages. |
44
- | `lobster explore <url>` | **No** | Discover APIs, intercept network calls, detect frameworks, generate adapters. |
45
- | `lobster run <url>` | **No** | Run pre-built site adapters and YAML pipelines. |
46
- | `lobster list` | **No** | List all registered adapters. |
47
- | `lobster config` | **No** | View/edit settings. |
48
- | `lobster doctor` | **No** | Diagnose setup, check Chrome, verify API key. |
49
- | `lobster setup` | **No** | Interactive setup wizard (AI provider selection is optional). |
50
- | `lobster plugin install` | **No** | Install community adapters from GitHub. |
51
- | `lobster agent "task"` | **Yes** | AI agent that reads pages, reasons, clicks, types, and navigates autonomously. |
52
-
53
- **80% of CLI features work without any API key.**
54
+ ## Install
54
55
 
55
- ### Chrome Extension — What's free, what needs AI
56
+ **npm:** [npmjs.com/package/lobster-cli](https://www.npmjs.com/package/lobster-cli)
56
57
 
57
- | Action | AI needed? | What it does |
58
- |--------|-----------|-------------|
59
- | Click **"Summarize this page"** | **No** | Extracts headings, word count, content preview, page type, framework detection. |
60
- | Click **"Extract as Markdown"** | **No** | Converts full page DOM to clean Markdown. Copy to clipboard. |
61
- | Click **"Detect all forms"** | **No** | Scans all form fields — labels, types, values, required/disabled state. |
62
- | Click **"Show key links"** | **No** | Extracts all meaningful links from the page. |
63
- | Click **"Monitor API calls"** | **No** | Intercepts fetch/XHR, shows live API calls with method/URL/status. |
64
- | Click **"DOM snapshot"** | **No** | 12-stage pruned DOM snapshot, LLM-optimized. |
65
- | Type any question | **Yes** | Brain classifies intent, gathers right data, sends to LLM for answer. |
66
- | Click **"What's on screen?"** | **Yes** | Captures screenshot + sends to vision model for visual analysis. |
58
+ ```bash
59
+ npm install -g lobster-cli
60
+ ```
67
61
 
68
- **The 6 built-in chips work without AI. Typed questions need an API key.**
62
+ Or clone and build:
69
63
 
70
- ### What the Brain does (smart intent classification)
64
+ ```bash
65
+ git clone https://github.com/iexcalibur/lobster-cli.git
66
+ cd lobster-cli
67
+ npm install && npm run build && npm link
68
+ ```
71
69
 
72
- When you type a question, the Brain decides what data to gather before answering:
70
+ Requires Node.js 20+. Chrome/Chromium needed only for browser features.
73
71
 
74
- ```
75
- "summarize this page" → Brain: { screenshot: false, markdown: true }
76
- Cost: ~$0.001 (text only)
72
+ ---
77
73
 
78
- "what is this email about" → Brain: { screenshot: false, markdown: true }
79
- Cost: ~$0.001 (text only)
74
+ ## Quick Start
80
75
 
81
- "what images are on this page" → Brain: { screenshot: true, markdown: true }
82
- Cost: ~$0.01 (screenshot + text)
76
+ ```bash
77
+ # Extract content from any page (no AI, no Chrome)
78
+ lobster fetch https://example.com
83
79
 
84
- "what does the layout look like" → Brain: { screenshot: true, markdown: false }
85
- Cost: ~$0.01 (screenshot only)
80
+ # Discover hidden APIs on a site
81
+ lobster explore https://reddit.com
86
82
 
87
- "what forms are on this page" → Brain: { screenshot: false, forms: true }
88
- Cost: ~$0.001 (form extraction + text)
89
- ```
83
+ # AI agent navigates autonomously (needs API key)
84
+ lobster agent "find pricing on example.com" --url https://example.com
90
85
 
91
- The Brain saves money by **not** taking screenshots when they aren't needed. Most questions only need text.
86
+ # Use stealth mode to avoid bot detection
87
+ lobster fetch https://linkedin.com --stealth
92
88
 
93
- ### Library Same rules apply
89
+ # Use a persistent profile (cookies survive restarts)
90
+ lobster agent "check my notifications" --url https://github.com --profile work
94
91
 
95
- ```typescript
96
- import { classifyIntent } from 'lobster-cli/brain' // No AI needed for heuristic mode
97
- import { PuppeteerPage } from 'lobster-cli/page' // No AI needed
98
- import { MARKDOWN_SCRIPT } from 'lobster-cli/dom' // No AI needed
99
- import { exploreSite } from 'lobster-cli/discover' // No AI needed
100
- import { AgentCore } from 'lobster-cli/agent' // Needs AI
101
- import { LLM } from 'lobster-cli/llm' // Needs AI
92
+ # Attach to your running Chrome (use your logged-in sessions)
93
+ lobster agent "check inbox" --url https://gmail.com --attach
102
94
  ```
103
95
 
104
96
  ---
105
97
 
106
- ## Install
98
+ ## What's Free vs What Needs AI
107
99
 
108
100
  ### CLI
109
101
 
110
- ```bash
111
- # Install globally
112
- npm install -g lobster-cli
113
-
114
- # Or clone and build
115
- git clone https://github.com/iexcalibur/lobster-cli.git
116
- cd lobster-cli
117
- npm install
118
- npm run build
119
- npm link
120
- ```
121
-
122
- Requires Node.js 20+. Chrome/Chromium needed only for browser features.
102
+ | Command | AI? | What it does |
103
+ |---------|-----|-------------|
104
+ | `lobster fetch <url>` | No | Extract as markdown, text, snapshot, HTML, semantic tree |
105
+ | `lobster explore <url>` | No | Discover APIs, detect frameworks, generate adapters |
106
+ | `lobster run <url>` | No | Run site adapters and YAML pipelines |
107
+ | `lobster list` | No | List registered adapters |
108
+ | `lobster config` | No | View/edit settings |
109
+ | `lobster doctor` | No | Diagnose setup |
110
+ | `lobster setup` | No | Interactive setup wizard |
111
+ | `lobster plugin install` | No | Install community adapters |
112
+ | `lobster agent "task"` | **Yes** | AI agent that reasons, clicks, types, navigates |
123
113
 
124
114
  ### Chrome Extension
125
115
 
126
- 1. Clone this repo
127
- 2. Open `chrome://extensions/`
128
- 3. Enable **Developer mode** (top right)
129
- 4. Click **Load unpacked** select the `extension/` folder
130
- 5. Click the LobsterCLI icon opens as a side panel
131
-
132
- ### Library (in your own project)
133
-
134
- ```bash
135
- npm install lobster-cli
136
- ```
137
-
138
- ```typescript
139
- import { classifyIntent, heuristicClassify } from 'lobster-cli/brain'
140
- import { PuppeteerPage, BrowserManager } from 'lobster-cli/browser'
141
- import { SNAPSHOT_SCRIPT, MARKDOWN_SCRIPT } from 'lobster-cli/dom'
142
- import { AgentCore } from 'lobster-cli/agent'
143
- import { exploreSite } from 'lobster-cli/discover'
144
- import { executePipeline } from 'lobster-cli/pipeline'
145
- import { LLM } from 'lobster-cli/llm'
146
- ```
116
+ | Action | AI? | What it does |
117
+ |--------|-----|-------------|
118
+ | Summarize this page | No | Headings, word count, content preview, framework detection |
119
+ | Extract as Markdown | No | Full DOM-to-Markdown with copy button |
120
+ | Detect all forms | No | Labels, types, values, required/disabled state |
121
+ | Show key links | No | All meaningful links on the page |
122
+ | Monitor API calls | No | Live fetch/XHR interception |
123
+ | DOM snapshot | No | 12-stage pruned snapshot |
124
+ | Type any question | **Yes** | Brain classifies intent, gathers data, sends to LLM |
125
+ | What's on screen? | **Yes** | Captures screenshot + vision model analysis |
147
126
 
148
127
  ---
149
128
 
150
129
  ## AI Setup (optional)
151
130
 
152
- AI is only needed for `lobster agent` and typed chat questions in the extension.
153
-
154
131
  ```bash
155
132
  lobster setup
156
133
  ```
157
134
 
158
- | Provider | Default Model | Cost | Best for |
159
- |----------|--------------|------|----------|
160
- | **Google Gemini** | gemini-2.5-flash | **Free tier** | Most users — free, fast, vision support |
161
- | **OpenAI** | gpt-4o | Pay per token | Best reasoning |
162
- | **Anthropic** | claude-sonnet-4 | Pay per token | Best for code analysis |
163
- | **Ollama** | llama3.1 | **Free (local)** | Privacy, offline use |
164
-
165
- Or set manually:
166
-
167
- ```bash
168
- lobster config set llm.provider gemini
169
- lobster config set llm.apiKey AIza...
170
- lobster config set llm.model gemini-2.5-flash
171
- ```
135
+ | Provider | Model | Cost |
136
+ |----------|-------|------|
137
+ | **Google Gemini** | gemini-2.5-flash | Free tier available |
138
+ | **OpenAI** | gpt-4o | Pay per token |
139
+ | **Anthropic** | claude-sonnet-4 | Pay per token |
140
+ | **Ollama** | llama3.1 | Free (runs locally) |
172
141
 
173
142
  ---
174
143
 
175
- ## CLI Commands
144
+ ## Key Features
176
145
 
177
- ### `lobster fetch <url>` — Extract content (no AI)
146
+ ### Smart Brain
147
+ When you ask a question, the Brain classifies your intent and gathers only what's needed:
178
148
 
179
- ```bash
180
- lobster fetch https://example.com # markdown output
181
- lobster fetch https://example.com -d snapshot # LLM-optimized DOM
182
- lobster fetch https://example.com -d text # plain text
183
- lobster fetch https://example.com -d semantic # W3C accessible tree
184
- lobster fetch https://example.com -d html # raw HTML
185
- lobster fetch https://example.com -e chrome -w 5 # force Chrome, wait 5s
186
149
  ```
187
-
188
- Engines: `auto` (default), `fast` (in-house parser, no Chrome), `chrome` (full browser).
189
-
190
- ### `lobster run <url>` — Smart router (no AI)
191
-
192
- ```bash
193
- lobster run https://api.github.com/users/octocat # direct HTTP fetch
194
- lobster run https://news.ycombinator.com # adapter if registered
195
- lobster run https://example.com -f yaml # output as YAML
150
+ "summarize this page" → reads text only (~$0.001)
151
+ "what images are showing" → captures screenshot + text (~$0.01)
152
+ "what forms are on this page" → extracts form data (~$0.001)
196
153
  ```
197
154
 
198
- ### `lobster agent <task>` — AI agent (needs API key)
155
+ ### Persistent Profiles
156
+ Store Chrome sessions that survive restarts — cookies, auth, extensions.
199
157
 
200
158
  ```bash
201
- lobster agent "search for TypeScript on Hacker News" --url https://news.ycombinator.com
202
- lobster agent "find the cheapest flight to Tokyo" --url https://google.com/flights
203
- lobster agent "log in and check my notifications" --url https://github.com
159
+ lobster profile create work
160
+ lobster agent "check gmail" --profile work # login persists
204
161
  ```
205
162
 
206
- The agent observes the DOM → sends to LLM → decides what to click/type/scroll → repeats until done (max 40 steps).
207
-
208
- ### `lobster explore <url>` — Discover APIs (no AI)
163
+ ### Chrome Attach
164
+ Connect to your running Chrome with all your logins.
209
165
 
210
166
  ```bash
211
- lobster explore https://reddit.com
212
- lobster explore https://twitter.com -w 5
167
+ lobster agent "check notifications" --attach
213
168
  ```
214
169
 
215
- Intercepts network calls, clicks around to find hidden APIs, detects frameworks, scores endpoints, and generates adapter files.
216
-
217
- ### `lobster config` — Settings
170
+ ### Stealth Mode
171
+ Anti-bot detection — hides headless Chrome fingerprint.
218
172
 
219
173
  ```bash
220
- lobster config show
221
- lobster config set llm.provider anthropic
222
- lobster config set browser.headless false
174
+ lobster fetch https://protected-site.com --stealth
223
175
  ```
224
176
 
225
- ### `lobster plugin` — Community adapters
177
+ ### Compact Snapshot
178
+ Token-efficient DOM snapshots (~800 tokens vs ~3000+).
226
179
 
227
180
  ```bash
228
- lobster plugin install github-user/reddit-adapter
229
- lobster plugin list
181
+ lobster fetch https://example.com -d snapshot --compact
230
182
  ```
231
183
 
232
- ---
233
-
234
- ## Chrome Extension
184
+ ### Semantic Element Finding
185
+ AI agent finds elements by name instead of guessing index numbers.
235
186
 
236
- The extension is a chat-style side panel (like Gemini or Claude) docked to the right of your browser.
237
-
238
- ### Free features (no API key)
187
+ ```
188
+ "click the login button" → finds [1] button "Sign In" (score: 0.81)
189
+ ```
239
190
 
240
- Click any of the 6 built-in chips:
191
+ ### Domain Guard
192
+ Restrict which websites the engine can operate on.
241
193
 
242
- - **Summarize this page** — page type, headings, word count, framework, content preview
243
- - **Extract as Markdown** full DOM-to-Markdown with copy button
244
- - **Detect all forms** — every form field with label, type, value, required state
245
- - **Show key links** — all meaningful links on the page
246
- - **Monitor API calls** — live fetch/XHR interception
247
- - **DOM snapshot** — 12-stage pruned snapshot
194
+ ```typescript
195
+ import { DomainGuard } from 'lobster-cli/domain-guard'
248
196
 
249
- ### AI features (needs API key)
197
+ const guard = new DomainGuard({
198
+ allowDomains: ['bloomberg.com', 'yahoo.com'],
199
+ blockMessage: 'This tool only works on finance sites.',
200
+ })
201
+ ```
250
202
 
251
- Type any question in the chat:
203
+ ### Site Explorer
204
+ Discover hidden APIs on any website — no AI needed.
252
205
 
253
- - "What is this email about?" — reads page text, gives natural language answer
254
- - "What images are on this page?" — captures screenshot, uses vision model
255
- - "Draft a reply to this email" — reads content, writes a response
256
- - "What color is the header?" — captures screenshot, analyzes visually
206
+ ```bash
207
+ lobster explore https://reddit.com
208
+ ```
257
209
 
258
- The **Brain** automatically decides whether to capture a screenshot or just read text, so you don't pay for vision when you don't need it.
210
+ Intercepts network calls, clicks buttons to trigger hidden APIs, detects frameworks, scores endpoints, and generates YAML adapters.
259
211
 
260
- ### Setup AI in extension
212
+ ### Pipeline Engine
213
+ Declarative YAML pipelines with 17 steps and template expressions.
261
214
 
262
- Click the gear icon → Settings → pick provider → enter key → Save.
215
+ ```yaml
216
+ steps:
217
+ - navigate: https://news.ycombinator.com
218
+ - evaluate: |
219
+ [...document.querySelectorAll('.titleline > a')]
220
+ .map(a => ({ title: a.textContent, url: a.href }))
221
+ - limit: 10
222
+ ```
263
223
 
264
224
  ---
265
225
 
266
- ## Use as a library
226
+ ## Use as a Library
267
227
 
268
- LobsterCLI exports every module for use in your own projects:
228
+ ```bash
229
+ npm install lobster-cli
230
+ ```
269
231
 
270
232
  ```typescript
271
- // Brain intent classification
272
- import { classifyIntent, heuristicClassify } from 'lobster-cli/brain'
273
-
274
- // Browser — page control
233
+ import { classifyIntent } from 'lobster-cli/brain'
275
234
  import { BrowserManager, PuppeteerPage } from 'lobster-cli/browser'
276
-
277
- // DOM scripts — run in any browser context
278
- import { SNAPSHOT_SCRIPT, MARKDOWN_SCRIPT, FORM_STATE_SCRIPT } from 'lobster-cli/dom'
279
-
280
- // Agent — autonomous web navigation
235
+ import { SNAPSHOT_SCRIPT, MARKDOWN_SCRIPT } from 'lobster-cli/dom'
281
236
  import { AgentCore } from 'lobster-cli/agent'
282
-
283
- // LLM — multi-provider client
284
- import { LLM } from 'lobster-cli/llm'
285
-
286
- // Pipeline — declarative YAML execution
287
- import { executePipeline } from 'lobster-cli/pipeline'
288
-
289
- // Discovery — find site APIs
237
+ import { DomainGuard } from 'lobster-cli/domain-guard'
290
238
  import { exploreSite } from 'lobster-cli/discover'
291
-
292
- // Config — load/save settings
293
- import { loadConfig, saveConfig } from 'lobster-cli/config'
294
- ```
295
-
296
- ### Example: build a search agent
297
-
298
- ```typescript
299
- import { BrowserManager } from 'lobster-cli/browser'
300
- import { AgentCore } from 'lobster-cli/agent'
239
+ import { executePipeline } from 'lobster-cli/pipeline'
301
240
  import { LLM } from 'lobster-cli/llm'
302
- import { classifyIntent } from 'lobster-cli/brain'
303
-
304
- async function search(query) {
305
- // Brain decides what data is needed
306
- const intent = await classifyIntent(query, 'Google Search')
307
-
308
- // Launch browser
309
- const browser = new BrowserManager({ headless: true })
310
- const page = await browser.launch('https://google.com')
311
-
312
- // Run agent
313
- const agent = new AgentCore({ page, llm: new LLM(config), maxSteps: 20 })
314
- const result = await agent.execute(query)
315
-
316
- await page.close()
317
- return result
318
- }
319
241
  ```
320
242
 
321
243
  ---
322
244
 
323
- ## Architecture
324
-
325
- ```
326
- lobster-cli/
327
- ├── src/
328
- │ ├── brain/ → Intent classifier (LLM + heuristic fallback)
329
- │ ├── browser/ → IPage interface, Puppeteer adapter, DOM scripts
330
- │ │ └── dom/ → 6 extraction strategies (snapshot, markdown, semantic, etc.)
331
- │ ├── agent/ → Observe-think-act loop, 8 tools, auto-fixer
332
- │ ├── llm/ → Multi-provider client (OpenAI, Anthropic, Gemini, Ollama)
333
- │ ├── pipeline/ → YAML pipeline engine, 17 steps, template expressions
334
- │ ├── adapter/ → Site adapter registry, YAML/TS loaders
335
- │ ├── router/ → Smart routing (HTTP → Engine → Adapter → Agent)
336
- │ ├── discover/ → API discovery, endpoint scoring, adapter generation
337
- │ ├── cascade/ → Auth strategy detection (public → cookie → header → intercept)
338
- │ ├── config/ → Settings (~/.lobster/config.yaml)
339
- │ ├── output/ → Formatters (table, JSON, YAML, CSV, Markdown)
340
- │ ├── plugin/ → GitHub plugin install/uninstall
341
- │ ├── lib.ts → Library exports (for npm import)
342
- │ ├── cli.ts → CLI commands (commander)
343
- │ └── index.ts → CLI entry point
344
-
345
- ├── extension/ → Chrome extension (side panel)
346
- │ ├── sidepanel/ → Chat UI (HTML/CSS/JS)
347
- │ ├── background/ → Service worker (LLM calls, screenshot capture)
348
- │ ├── shared/ → DOM scripts (same code as CLI, ported to browser JS)
349
- │ ├── options/ → Settings page
350
- │ └── manifest.json → Chrome extension manifest (v3, side panel API)
351
-
352
- ├── logo.svg → Logo (SVG)
353
- ├── logo.png → Logo (512px PNG)
354
- └── package.json → Dual: CLI binary + library exports
355
- ```
245
+ ## Chrome Extension
356
246
 
357
- ---
247
+ Chat-style side panel docked to the right of your browser (like Gemini or Claude).
358
248
 
359
- ## Environment Variables
249
+ **Install:**
250
+ 1. Clone this repo
251
+ 2. Open `chrome://extensions/` → enable Developer mode
252
+ 3. Click Load unpacked → select `extension/` folder
253
+ 4. Click LobsterCLI icon → side panel opens
360
254
 
361
- | Variable | Purpose |
362
- |----------|---------|
363
- | `LOBSTER_API_KEY` | LLM API key (overrides config) |
364
- | `LOBSTER_MODEL` | LLM model name |
365
- | `LOBSTER_BASE_URL` | LLM API base URL |
366
- | `LOBSTER_CDP_ENDPOINT` | Chrome DevTools Protocol endpoint |
367
- | `LOBSTER_BROWSER_PATH` | Path to Chrome/Chromium binary |
255
+ **AI setup:** Click gear icon → Settings → pick provider → enter key → Save.
368
256
 
369
257
  ---
370
258
 
371
- ## Dependencies
372
-
373
- | Package | Purpose |
374
- |---------|---------|
375
- | `commander` | CLI framework |
376
- | `puppeteer-core` | Chrome control (no bundled Chrome) |
377
- | `zod` | Schema validation |
378
- | `chalk` | Terminal colors |
379
- | `cli-table3` | Table formatting |
380
- | `js-yaml` | YAML parsing |
381
- | `ws` | WebSocket |
259
+ ## Documentation
382
260
 
383
- No AI SDK. LLM calls use native `fetch()` with our own protocol adapters.
261
+ For detailed technical documentation, see **[DOCS.md](DOCS.md)**.
384
262
 
385
263
  ---
386
264
 
387
265
  ## License
388
266
 
389
- MIT
267
+ MIT License — Copyright (c) 2025 iexcalibur
268
+
269
+ See [LICENSE](LICENSE) for full text.
@@ -212,6 +212,69 @@ var OpenAIClient = class {
212
212
  } : void 0
213
213
  };
214
214
  }
215
+ /**
216
+ * Simple vision call — send a screenshot + text prompt, get text back.
217
+ * Used by PDF Doctor for targeted issue resolution.
218
+ */
219
+ async chatWithVision(prompt, screenshotBase64) {
220
+ const headers = this.buildHeaders();
221
+ if (this.config.provider === "anthropic") {
222
+ const body2 = {
223
+ model: this.config.model,
224
+ max_tokens: 1024,
225
+ temperature: 0.1,
226
+ messages: [{
227
+ role: "user",
228
+ content: [
229
+ {
230
+ type: "image",
231
+ source: {
232
+ type: "base64",
233
+ media_type: "image/jpeg",
234
+ data: screenshotBase64
235
+ }
236
+ },
237
+ { type: "text", text: prompt }
238
+ ]
239
+ }]
240
+ };
241
+ const resp2 = await fetch(`${this.config.baseURL}/messages`, {
242
+ method: "POST",
243
+ headers,
244
+ body: JSON.stringify(body2)
245
+ });
246
+ if (!resp2.ok) throw new Error(`Anthropic vision error: ${resp2.status}`);
247
+ const json2 = await resp2.json();
248
+ const content = json2.content;
249
+ return content?.[0]?.text || "";
250
+ }
251
+ const body = {
252
+ model: this.config.model,
253
+ max_tokens: 1024,
254
+ temperature: 0.1,
255
+ messages: [{
256
+ role: "user",
257
+ content: [
258
+ {
259
+ type: "image_url",
260
+ image_url: {
261
+ url: `data:image/jpeg;base64,${screenshotBase64}`
262
+ }
263
+ },
264
+ { type: "text", text: prompt }
265
+ ]
266
+ }]
267
+ };
268
+ const resp = await fetch(`${this.config.baseURL}/chat/completions`, {
269
+ method: "POST",
270
+ headers,
271
+ body: JSON.stringify(body)
272
+ });
273
+ if (!resp.ok) throw new Error(`Vision API error: ${resp.status}`);
274
+ const json = await resp.json();
275
+ const choice = json.choices?.[0];
276
+ return choice?.message?.content || "";
277
+ }
215
278
  };
216
279
 
217
280
  // src/llm/utils.ts