libretto 0.6.9 → 0.6.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/cli/cli.js +2 -0
  2. package/dist/cli/commands/auth.js +535 -0
  3. package/dist/cli/commands/billing.js +74 -0
  4. package/dist/cli/commands/browser.js +8 -3
  5. package/dist/cli/commands/deploy.js +2 -7
  6. package/dist/cli/commands/execution.js +99 -136
  7. package/dist/cli/commands/snapshot.js +38 -126
  8. package/dist/cli/core/ai-model.js +0 -3
  9. package/dist/cli/core/auth-fetch.js +195 -0
  10. package/dist/cli/core/auth-storage.js +52 -0
  11. package/dist/cli/core/browser.js +128 -202
  12. package/dist/cli/core/daemon/config.js +6 -0
  13. package/dist/cli/core/daemon/daemon.js +298 -0
  14. package/dist/cli/core/daemon/exec.js +86 -0
  15. package/dist/cli/core/daemon/index.js +16 -0
  16. package/dist/cli/core/daemon/ipc.js +171 -0
  17. package/dist/cli/core/daemon/pages.js +15 -0
  18. package/dist/cli/core/daemon/snapshot.js +86 -0
  19. package/dist/cli/core/daemon/spawn.js +90 -0
  20. package/dist/cli/core/exec-compiler.js +111 -0
  21. package/dist/cli/core/prompt.js +72 -0
  22. package/dist/cli/core/providers/libretto-cloud.js +2 -6
  23. package/dist/cli/core/readonly-exec.js +1 -1
  24. package/dist/cli/router.js +4 -0
  25. package/dist/cli/workers/run-integration-runtime.js +0 -5
  26. package/dist/shared/state/session-state.d.ts +1 -0
  27. package/dist/shared/state/session-state.js +2 -1
  28. package/docs/browser-automation-approaches.md +435 -0
  29. package/docs/releasing.md +117 -0
  30. package/package.json +4 -3
  31. package/skills/libretto/SKILL.md +14 -1
  32. package/skills/libretto-readonly/SKILL.md +1 -1
  33. package/src/cli/cli.ts +2 -0
  34. package/src/cli/commands/auth.ts +787 -0
  35. package/src/cli/commands/billing.ts +133 -0
  36. package/src/cli/commands/browser.ts +8 -2
  37. package/src/cli/commands/deploy.ts +2 -7
  38. package/src/cli/commands/execution.ts +126 -186
  39. package/src/cli/commands/snapshot.ts +46 -143
  40. package/src/cli/core/ai-model.ts +4 -5
  41. package/src/cli/core/auth-fetch.ts +283 -0
  42. package/src/cli/core/auth-storage.ts +102 -0
  43. package/src/cli/core/browser.ts +159 -242
  44. package/src/cli/core/daemon/config.ts +46 -0
  45. package/src/cli/core/daemon/daemon.ts +429 -0
  46. package/src/cli/core/daemon/exec.ts +128 -0
  47. package/src/cli/core/daemon/index.ts +24 -0
  48. package/src/cli/core/daemon/ipc.ts +294 -0
  49. package/src/cli/core/daemon/pages.ts +21 -0
  50. package/src/cli/core/daemon/snapshot.ts +114 -0
  51. package/src/cli/core/daemon/spawn.ts +171 -0
  52. package/src/cli/core/exec-compiler.ts +169 -0
  53. package/src/cli/core/prompt.ts +94 -0
  54. package/src/cli/core/providers/libretto-cloud.ts +2 -6
  55. package/src/cli/core/readonly-exec.ts +2 -1
  56. package/src/cli/router.ts +4 -0
  57. package/src/cli/workers/run-integration-runtime.ts +0 -6
  58. package/src/shared/state/session-state.ts +1 -0
  59. package/dist/cli/core/browser-daemon.js +0 -122
  60. package/src/cli/core/browser-daemon.ts +0 -198
@@ -0,0 +1,435 @@
1
+ # Browser Automation Approaches: Bot Detection, Data Capture, and Integration Strategies
2
+
3
+ This guide covers the different approaches to capturing data during browser automation, how bot detection works, how to identify what detection a site uses, and the trade-offs of each approach.
4
+
5
+ ---
6
+
7
+ ## Table of Contents
8
+
9
+ 1. [How Bot Detection Works](#how-bot-detection-works)
10
+ 2. [Identifying Bot Detection on a Target Site](#identifying-bot-detection-on-a-target-site)
11
+ 3. [Integration Approaches](#integration-approaches)
12
+ - [Approach 1: Regular Playwright Automation](#approach-1-regular-playwright-automation)
13
+ - [Approach 2: Passive Network Interception (`page.onResponse`)](#approach-2-passive-network-interception-pageonresponse)
14
+ - [Approach 3: In-Browser Fetch (`page.evaluate(() => fetch(...))`)](#approach-3-in-browser-fetch-pageevaluate--fetch)
15
+ - [Approach 4: Direct HTTP from Node.js](#approach-4-direct-http-from-nodejs)
16
+ 4. [Comparison Matrix](#comparison-matrix)
17
+ 5. [Decision Guide](#decision-guide)
18
+ 6. [Infrastructure and Operational Considerations](#infrastructure-and-operational-considerations)
19
+
20
+ ---
21
+
22
+ ## How Bot Detection Works
23
+
24
+ Bot detection systems operate at multiple layers. Understanding each layer helps you choose the right automation approach.
25
+
26
+ ### Layer 1: Browser Fingerprinting
27
+
28
+ When a browser connects to a site, the site can inspect dozens of signals to determine if the browser is real or automated:
29
+
30
+ - **Navigator properties**: `navigator.webdriver` is set to `true` in automated browsers. Detection scripts check this immediately. Playwright sets this by default.
31
+ - **Browser plugin/extension footprint**: Real browsers have plugins like PDF viewers, font lists, and media codecs. Automated browsers often have none.
32
+ - **WebGL and Canvas fingerprinting**: The site renders invisible graphics and hashes the output. Headless browsers produce distinct rendering artifacts.
33
+ - **Screen and window dimensions**: Headless browsers often report unusual viewport sizes or have `window.outerWidth === 0`.
34
+ - **User-Agent consistency**: The User-Agent string must match the actual browser behavior. Claiming to be Chrome 120 but having Firefox-like JS engine behavior is a red flag.
35
+ - **CDP (Chrome DevTools Protocol) detection**: Some sites detect whether a CDP session is attached, which is how Playwright controls the browser.
36
+ - **Headless-specific object detection**: Automated browsers are missing objects and properties that exist in real headed Chrome. Detection scripts check for missing `chrome.runtime`, absent `Notification.permission` prompts, `navigator.permissions.query()` behaving differently, `window.chrome` being undefined or incomplete, and `navigator.plugins` being empty. In headless mode, `navigator.plugins.length === 0` and `navigator.languages` may be empty or contain only `"en"`, which are strong signals.
37
+ - **Iframe and sandbox detection**: Some sites check if their code is running inside an iframe or a sandboxed context by comparing `window.self !== window.top`, inspecting `window.frameElement`, or checking for restricted capabilities that sandboxing removes (e.g., `allow-scripts`, `allow-same-origin`). Bot protection scripts may also test for the presence of `document.hasFocus()` returning `false` (common in headless or background contexts) and whether `document.visibilityState` is `"visible"`.
38
+
39
+ ### Layer 2: Behavioral Analysis
40
+
41
+ Beyond the browser itself, detection systems analyze how the user behaves:
42
+
43
+ - **Mouse movement patterns**: Real users have natural mouse trajectories with acceleration curves. Automated clicks happen without preceding mouse movement.
44
+ - **Typing cadence**: Real typing has variable delays between keystrokes. `page.fill()` inserts text instantly. `page.type()` with default settings uses uniform delays.
45
+ - **Scroll behavior**: Real users scroll with momentum and variable speed. Programmatic scrolling is instant or perfectly uniform.
46
+ - **Navigation timing**: Real users take time to read content before clicking. Bots navigate instantly between actions.
47
+ - **Interaction sequence**: Clicking a submit button without first clicking/focusing the input fields is suspicious.
48
+
49
+ ### Layer 3: Network-Level Detection
50
+
51
+ The network request itself carries signals:
52
+
53
+ - **TLS fingerprint (JA3/JA4)**: Every HTTP client has a unique TLS handshake fingerprint based on the cipher suites, extensions, and elliptic curves it offers. Node.js `fetch`/`axios` have a completely different TLS fingerprint than Chrome. This is one of the strongest detection signals and is very hard to fake from outside a browser.
54
+ - **HTTP/2 fingerprint**: The SETTINGS frame, WINDOW_UPDATE behavior, and header ordering in HTTP/2 differ between browsers and HTTP libraries.
55
+ - **Header ordering and values**: Browsers send headers in a specific order (e.g., Chrome always sends `sec-ch-ua` headers). Node.js HTTP clients send headers in a different order or omit browser-specific headers entirely.
56
+ - **Cookie state**: Requests from a real browser session carry the full cookie jar. External HTTP requests must manually replicate cookies and may miss HttpOnly cookies or cookies set by JavaScript.
57
+ - **Referer and Origin**: Browser requests automatically include the correct `Referer` and `Origin` headers based on navigation state. External requests must fabricate these.
58
+
59
+ ### Layer 4: API-Level Monitoring
60
+
61
+ Some sophisticated sites monitor the behavior of their own frontend code:
62
+
63
+ - **Fetch/XHR monkey-patching**: The site overrides `window.fetch` and/or `XMLHttpRequest.prototype.open` with wrapper functions that log every request, including its call stack. If a `fetch()` call originates from code that isn't part of the site's own bundle, it can be flagged.
64
+ ```js
65
+ // What the site does (runs very early, before your code):
66
+ const _fetch = window.fetch;
67
+ window.fetch = function(...args) {
68
+ const stack = new Error().stack;
69
+ if (!isExpectedCallSite(stack)) {
70
+ reportAnomaly({ url: args[0], stack });
71
+ }
72
+ return _fetch.apply(this, args);
73
+ };
74
+ ```
75
+ - **Proxy-based interception**: Instead of replacing `fetch`, some sites use `Proxy` objects to wrap it. This is harder to detect because `fetch.toString()` still returns `"function fetch() { [native code] }"`.
76
+ - **Timing correlation**: The site knows which API calls its own code makes and when. If an endpoint is called at a time when the UI flow wouldn't trigger it, that's anomalous.
77
+ - **Request frequency and patterns**: The site's own code calls APIs in predictable patterns (e.g., pagination calls come in sequence, search calls follow debounce timings). Automation that deviates from these patterns can be flagged.
78
+
79
+ ### Layer 5: Enterprise Bot Protection Services
80
+
81
+ Many sites don't build their own detection — they use third-party services:
82
+
83
+ | Service | Common Indicators |
84
+ |---|---|
85
+ | **Akamai Bot Manager** | Scripts from `*.akamaized.net`, `_abck` cookie, `sensor_data` payloads |
86
+ | **PerimeterX (HUMAN)** | Scripts loading from `*.perimeterx.net` or `*.px-cdn.net`, `_px` cookies |
87
+ | **DataDome** | Scripts from `*.datadome.co`, `datadome` cookie, interstitial challenge pages |
88
+ | **Cloudflare Bot Management** | `cf_clearance` cookie, challenge pages with "Checking your browser" message |
89
+ | **Shape Security (F5)** | Obfuscated inline scripts that collect telemetry, `_imp_apg_r_` style cookies |
90
+ | **Kasada** | Scripts from `*.kasada.io`, `x-kpsdk-*` headers |
91
+
92
+ These services combine many of the detection layers above into a single product. They are continuously updated to catch new automation techniques.
93
+
94
+ ---
95
+
96
+ ## Identifying Bot Detection on a Target Site
97
+
98
+ Before building your automation, audit the target site to understand what you're up against.
99
+
100
+ ### Step 1: Check for Enterprise Bot Protection
101
+
102
+ Open the site in a normal browser with DevTools open (Network tab):
103
+
104
+ 1. **Look at initial script loads**: Filter by JS in the Network tab. Look for domains associated with known bot protection services (listed in the table above).
105
+ 2. **Check cookies**: In DevTools Application > Cookies, look for telltale cookies like `_abck`, `_px`, `datadome`, `cf_clearance`, etc.
106
+ 3. **Watch for challenge pages**: Navigate around the site. If you ever see a "Checking your browser..." interstitial, the site uses active bot protection.
107
+ 4. **Inspect the page source**: View source and look at the first `<script>` tags. Enterprise bot protection scripts are typically injected before any application code.
108
+
109
+ ### Step 2: Check if Fetch/XHR is Patched
110
+
111
+ Open the browser console and run:
112
+
113
+ ```js
114
+ // Check if fetch has been wrapped
115
+ window.fetch.toString()
116
+ // Native (safe): "function fetch() { [native code] }"
117
+ // Patched (flagged): will show actual JavaScript source
118
+
119
+ // Check XMLHttpRequest
120
+ XMLHttpRequest.prototype.open.toString()
121
+ // Native: "function open() { [native code] }"
122
+
123
+ // Check for property descriptor tampering
124
+ Object.getOwnPropertyDescriptor(window, 'fetch')
125
+ // Native: { value: ƒ, writable: true, enumerable: true, configurable: true }
126
+ // Tampered: may have getters/setters or different configurability
127
+ ```
128
+
129
+ **Important caveat**: If the site uses `Proxy` to wrap `fetch`, the `toString()` check will still return `"[native code]"`. To catch this:
130
+
131
+ ```js
132
+ // Attempt to detect Proxy-based wrapping
133
+ try {
134
+ // Proxied functions sometimes behave differently with certain operations
135
+ const desc = Object.getOwnPropertyDescriptor(window, 'fetch');
136
+ console.log('configurable:', desc.configurable);
137
+ console.log('writable:', desc.writable);
138
+
139
+ // Compare prototype chain
140
+ console.log(window.fetch instanceof Function); // should be true
141
+ console.log(window.fetch.prototype); // native fetch has no prototype
142
+ } catch (e) {
143
+ console.log('fetch access is trapped');
144
+ }
145
+ ```
146
+
147
+ ### Step 3: Check for Behavioral Monitoring
148
+
149
+ Look for signs that the site collects behavioral telemetry:
150
+
151
+ ```js
152
+ // Check if common event listeners are heavily registered
153
+ getEventListeners(document)
154
+ // In Chrome DevTools, this shows all listeners. An unusually large number
155
+ // of mousemove, keydown, scroll, and touchstart listeners suggests telemetry.
156
+
157
+ // Check for known telemetry globals
158
+ // PerimeterX:
159
+ typeof window._pxAppId !== 'undefined'
160
+ // Akamai:
161
+ typeof window.bmak !== 'undefined'
162
+ // DataDome:
163
+ typeof window.ddjskey !== 'undefined'
164
+ ```
165
+
166
+ ### Step 4: Test with Plain Playwright
167
+
168
+ The simplest test: run a basic Playwright script against the site and see what happens.
169
+
170
+ ```typescript
171
+ import { chromium } from 'playwright';
172
+ const browser = await chromium.launch({ headless: false });
173
+ const page = await browser.newPage();
174
+ await page.goto('https://target-site.com');
175
+ // If you get a challenge page, CAPTCHA, or block — bot detection is active.
176
+ ```
177
+
178
+ If plain Playwright gets blocked, you know the site has browser-level detection. If it works fine, the site likely only has basic or no detection.
179
+
180
+ ---
181
+
182
+ ## Integration Approaches
183
+
184
+ ### Approach 1: Regular Playwright Automation
185
+
186
+ Standard Playwright usage — navigate pages, click elements, fill forms, read DOM content using selectors and `page.evaluate()`.
187
+
188
+ ```typescript
189
+ // Navigate and interact
190
+ await page.goto('https://example.com/search');
191
+ await page.fill('#query', 'search term');
192
+ await page.click('#submit');
193
+ await page.waitForSelector('.results');
194
+
195
+ // Extract data from the DOM
196
+ const results = await page.evaluate(() => {
197
+ return Array.from(document.querySelectorAll('.result-item')).map(el => ({
198
+ title: el.querySelector('h2')?.textContent,
199
+ price: el.querySelector('.price')?.textContent,
200
+ }));
201
+ });
202
+ ```
203
+
204
+ **Pros:**
205
+ - Simplest approach — uses Playwright as intended
206
+ - No need to understand the site's API structure
207
+ - Works with any site regardless of how data is rendered (server-side, client-side, or hybrid)
208
+ - Data extraction is visual/DOM-based, which maps naturally to what a user sees
209
+ - Easy to debug with `headless: false` and Playwright's trace viewer
210
+ - Integrates directly with Libretto's step-based workflow, recovery, and extraction features
211
+
212
+ **Cons:**
213
+ - **Moderate bot detection risk**: Playwright sets `navigator.webdriver = true` and has other detectable fingerprints out of the box
214
+ - Slower than API-based approaches — requires full page rendering
215
+ - Fragile against DOM changes — selectors break when the site updates its markup
216
+ - Harder to get structured data — you're scraping rendered HTML rather than clean API responses
217
+ - Cannot access data that isn't rendered in the DOM (e.g., API responses with fields the UI doesn't display)
218
+
219
+ **Bot detection risk: MODERATE**
220
+ Plain Playwright is detectable by browser fingerprinting (Layer 1). Sites with any enterprise bot protection will likely flag it. Sites without active detection won't notice.
221
+
222
+ **Mitigation:** Use `playwright-extra` with the stealth plugin to patch common fingerprint leaks, or use Playwright with a persistent browser context that looks more like a real browser profile.
223
+
224
+ ---
225
+
226
+ ### Approach 2: Passive Network Interception (`page.onResponse`)
227
+
228
+ Listen to network responses that the browser naturally makes as you navigate. You don't make any extra requests — you just capture the data flowing through.
229
+
230
+ ```typescript
231
+ const capturedData: any[] = [];
232
+
233
+ page.on('response', async (response) => {
234
+ const url = response.url();
235
+ if (url.includes('/api/search/results')) {
236
+ const json = await response.json();
237
+ capturedData.push(json);
238
+ }
239
+ });
240
+
241
+ // Trigger the data load by interacting with the UI normally
242
+ await page.goto('https://example.com/search?q=term');
243
+ await page.waitForSelector('.results');
244
+ // capturedData now has the raw API response
245
+ ```
246
+
247
+ **Pros:**
248
+ - **Zero additional bot detection risk from network requests** — you're not making any extra calls. The requests that happen are the ones the site's own code triggers.
249
+ - Gets clean, structured API data (JSON) rather than scraped DOM content
250
+ - API responses often contain more data than the UI displays (hidden fields, IDs, metadata)
251
+ - Not fragile against DOM changes — the API contract tends to be more stable than CSS selectors
252
+ - Works with Playwright's existing page context — no additional setup
253
+
254
+ **Cons:**
255
+ - **You only get data that the page naturally loads** — you must trigger the right UI flow to cause the requests you need. If the data requires clicking through 5 pages, you must automate all 5 page navigations.
256
+ - Still requires Playwright browser automation to drive the page, so you still have the browser fingerprinting risk from Approach 1 for the navigation itself
257
+ - Timing can be tricky — you must set up the listener before the navigation that triggers the request
258
+ - Responses may be paginated or partial — the site's UI might lazy-load data, requiring you to trigger scrolling or "load more" interactions
259
+ - If the site uses GraphQL or batched API calls, parsing the right data out of responses requires understanding the API structure
260
+ - Some responses may be encrypted or obfuscated by bot protection services
261
+
262
+ **Bot detection risk: LOW**
263
+ The network requests themselves carry zero additional risk since they originate from the site's own JavaScript. The only risk is from the browser automation layer needed to drive the UI (same as Approach 1). No extra fetch calls means no anomalous network patterns for API-level monitoring to flag.
264
+
265
+ ---
266
+
267
+ ### Approach 3: In-Browser Fetch (`page.evaluate(() => fetch(...))`)
268
+
269
+ Execute fetch calls from within the browser page's JavaScript context. The requests originate from the browser process itself with all the right credentials and fingerprints.
270
+
271
+ ```typescript
272
+ const data = await page.evaluate(async () => {
273
+ const res = await fetch('/api/search/results?q=term&page=2', {
274
+ headers: {
275
+ 'Content-Type': 'application/json',
276
+ 'X-Requested-With': 'XMLHttpRequest',
277
+ },
278
+ });
279
+ return res.json();
280
+ });
281
+ ```
282
+
283
+ **Pros:**
284
+ - **Requests come from the real browser** — same TLS fingerprint, same cookies, same origin, same HTTP/2 settings. From the server's perspective, it looks identical to a request the site's own JS would make.
285
+ - Full control over which endpoints you call and with what parameters — no need to trigger UI flows
286
+ - Can call endpoints the UI doesn't naturally hit (e.g., fetch page 50 of results without clicking "next" 49 times)
287
+ - Gets clean, structured API data (JSON)
288
+ - Faster than driving the UI — skip page rendering and go straight to the data
289
+ - No need to understand DOM structure or deal with selector fragility
290
+
291
+ **Cons:**
292
+ - **Requires understanding the site's API** — you need to know the endpoint URLs, required headers, authentication tokens, request body format, etc. This requires reverse-engineering the site's network traffic first.
293
+ - **Vulnerable to fetch/XHR monkey-patching** (Layer 4) — if the site wraps `window.fetch`, your calls will be intercepted and may be flagged because the call stack won't match the site's expected code paths
294
+ - Still requires a Playwright browser to be running (for the execution context), so you have the browser fingerprinting overhead from Approach 1
295
+ - API endpoints can change without notice (no public contract)
296
+ - Must handle authentication tokens/CSRF tokens that the site's own code normally manages
297
+ - If the site uses dynamic or signed request parameters, you may need to reverse-engineer the signing logic
298
+
299
+ **Bot detection risk: LOW to MODERATE**
300
+ The network-level risk is very low — the requests are genuine browser requests. The risk comes from:
301
+ 1. Browser fingerprinting (same as Approach 1)
302
+ 2. Fetch/XHR monkey-patching detecting unexpected call stacks (Layer 4)
303
+ 3. Timing and pattern analysis if your requests don't match normal UI flow patterns
304
+
305
+ Most sites do **not** implement Layer 4 monitoring. This approach is effectively undetectable on the vast majority of sites. Only sites with enterprise-grade bot protection from services like PerimeterX or Shape Security are likely to catch this.
306
+
307
+ ---
308
+
309
+ ### Approach 4: Direct HTTP from Node.js
310
+
311
+ Make HTTP requests directly from Node.js using `fetch`, `axios`, `got`, or similar libraries. No browser involved.
312
+
313
+ ```typescript
314
+ import axios from 'axios';
315
+
316
+ const response = await axios.get('https://example.com/api/search/results', {
317
+ params: { q: 'term', page: 1 },
318
+ headers: {
319
+ 'User-Agent': 'Mozilla/5.0 ...',
320
+ 'Cookie': 'session=abc123; ...',
321
+ },
322
+ });
323
+ const data = response.data;
324
+ ```
325
+
326
+ **Pros:**
327
+ - **Fastest approach** — no browser overhead, no page rendering, minimal memory usage
328
+ - Simple code — just HTTP requests, no browser lifecycle management
329
+ - Easy to parallelize — make many concurrent requests without launching multiple browser instances
330
+ - Lowest resource consumption — suitable for high-volume data collection
331
+
332
+ **Cons:**
333
+ - **Highest bot detection risk by far** — this is what bot detection systems are primarily designed to catch
334
+ - **TLS fingerprint is completely wrong** — Node.js has a fundamentally different TLS fingerprint than any browser. This is the #1 detection signal and is extremely difficult to fake. Even with libraries like `got` or custom TLS settings, matching Chrome's exact fingerprint is a cat-and-mouse game.
335
+ - **No cookies unless manually managed** — you must extract cookies from a browser session and replicate them, including HttpOnly cookies you can't access from JS
336
+ - **No browser-specific headers** — `sec-ch-ua`, `sec-fetch-*`, and other headers that browsers add automatically must be manually fabricated and kept up to date
337
+ - **No JavaScript execution** — if the site requires JS to set cookies, generate tokens, or solve challenges, you can't do it
338
+ - **CSRF/auth tokens** — must be manually extracted and refreshed
339
+ - **Breaks easily** — API changes, new security headers, or updated bot protection will break your requests with no fallback
340
+
341
+ **Bot detection risk: VERY HIGH**
342
+ Detectable at nearly every layer. TLS fingerprinting alone will catch this on any site with even basic bot protection. This approach only works reliably against sites with no bot detection whatsoever.
343
+
344
+ ---
345
+
346
+ ## Comparison Matrix
347
+
348
+ | Criteria | Regular Playwright | `page.onResponse` | `page.evaluate(fetch)` | Direct Node.js HTTP |
349
+ |---|---|---|---|---|
350
+ | **Bot detection risk** | Moderate | Low | Low-Moderate | Very High |
351
+ | **Browser fingerprint risk** | Yes | Yes | Yes | N/A (worse: wrong fingerprint) |
352
+ | **Network fingerprint risk** | None (browser requests) | None (browser requests) | None (browser requests) | Very High |
353
+ | **API monitoring risk** | None | None | Low (fetch patching) | N/A |
354
+ | **Data quality** | DOM-dependent | Structured JSON | Structured JSON | Structured JSON |
355
+ | **Setup complexity** | Low | Medium | Medium-High | Low-Medium |
356
+ | **API reverse-engineering needed** | No | Partial (identify endpoints) | Yes (full) | Yes (full) |
357
+ | **Control over data fetching** | Low | Low | High | High |
358
+ | **Speed** | Slow | Medium | Medium-Fast | Fast |
359
+ | **Resource usage** | High | High | High | Low |
360
+ | **Resilience to DOM changes** | Low | High | High | High |
361
+ | **Resilience to API changes** | Medium | Low | Low | Low |
362
+
363
+ ---
364
+
365
+ ## Decision Guide
366
+
367
+ **Use Regular Playwright (Approach 1) when:**
368
+ - The data you need is visible in the DOM and straightforward to extract with selectors
369
+ - The site doesn't have aggressive bot protection, or you're using stealth plugins
370
+ - You want the simplest implementation that integrates with Libretto's recovery and extraction features
371
+ - The data is rendered server-side and doesn't come from a separate API call
372
+
373
+ **Use `page.onResponse` (Approach 2) when:**
374
+ - The site loads data via API calls during normal navigation (most modern SPAs)
375
+ - You want structured JSON data without reverse-engineering the full API
376
+ - Minimizing detection risk is important
377
+ - You're already navigating through the UI and want to passively capture data along the way
378
+
379
+ **Use `page.evaluate(fetch)` (Approach 3) when:**
380
+ - You need data from API endpoints that the UI doesn't naturally trigger (e.g., deep pagination, bulk exports)
381
+ - You've verified the site doesn't monkey-patch `fetch` (or you can work around it)
382
+ - You want maximum control over which data you fetch and when
383
+ - You've already reverse-engineered the relevant API endpoints
384
+
385
+ **Use Direct Node.js HTTP (Approach 4) when:**
386
+ - The target site has zero bot detection
387
+ - Speed and resource efficiency are the primary concerns
388
+ - You're hitting a public/documented API (not scraping a website)
389
+ - You need to make thousands of concurrent requests
390
+
391
+ **Hybrid approach (recommended for most cases):**
392
+ Combine Approach 1 + Approach 2. Use regular Playwright to navigate and interact with the site (handling popups, login flows, etc. with Libretto's recovery features), and passively intercept API responses with `page.onResponse` to capture structured data. This gives you the reliability of browser-based navigation with the data quality of API responses, at minimal detection risk.
393
+
394
+ ---
395
+
396
+ ## Infrastructure and Operational Considerations
397
+
398
+ The sections above cover the front-end detection and integration strategies for automating within a browser. The following are infrastructure-level concerns that affect reliability and longevity of automations at scale. These are secondary to the core approach but become important in production.
399
+
400
+ ### IP Reputation and Rate Limiting
401
+
402
+ Bot detection doesn't stop at the browser — the IP address you connect from is one of the first things evaluated:
403
+
404
+ - **Datacenter vs. residential IPs**: Cloud provider IP ranges (AWS, GCP, Azure) are well-known and flagged by most bot protection services. Requests from these ranges face higher scrutiny or outright blocking regardless of how good the browser fingerprint is.
405
+ - **Rate limiting**: Even without bot detection, sites enforce per-IP request limits. Hitting the same site too frequently from one IP triggers throttling or temporary bans.
406
+ - **Geo-mismatch**: If your IP geolocates to Virginia but your browser reports `America/Los_Angeles` timezone and `en-US` locale consistent with California, that inconsistency is a signal.
407
+ - **Proxy rotation**: Residential proxy services provide IP addresses from real ISPs, making requests appear to originate from normal households. Rotating proxies distribute requests across many IPs to avoid rate limits. This is the standard production approach for high-volume automation.
408
+
409
+ ### CAPTCHA and Challenge Handling
410
+
411
+ When bot detection triggers, sites typically respond with a challenge rather than an outright block:
412
+
413
+ - **reCAPTCHA v2**: The checkbox or image-selection challenge. Can sometimes be bypassed in automated browsers if the risk score is low enough (it evaluates browser fingerprint and behavior before showing the challenge).
414
+ - **reCAPTCHA v3**: Invisible — returns a score (0.0 to 1.0) with no user interaction. The site decides what to do with the score. A well-fingerprinted browser with natural behavior scores higher.
415
+ - **hCaptcha**: Similar to reCAPTCHA v2 but used by sites that want an alternative to Google. Cloudflare uses it as a fallback.
416
+ - **Cloudflare Turnstile**: Non-interactive challenge that evaluates browser signals. Replaces traditional CAPTCHAs on many Cloudflare-protected sites.
417
+ - In practice, if a CAPTCHA is triggered during an automation, it usually means the browser fingerprint or behavior failed earlier checks. Fixing the root cause (better stealth, slower interaction patterns) is more effective than trying to solve CAPTCHAs programmatically.
418
+
419
+ ### Detection and Recovery Patterns
420
+
421
+ Understanding how blocks manifest helps you build resilient automations:
422
+
423
+ - **Soft blocks**: The site returns degraded results (fewer items, missing data, slower responses) without an explicit error. These are hard to detect — you may not realize you're getting incomplete data.
424
+ - **Hard blocks**: HTTP 403, CAPTCHA pages, "Access Denied" responses, or redirects to a challenge page. These are obvious but require recovery logic.
425
+ - **Cookie consent and GDPR banners**: Not bot detection per se, but a common obstacle. These overlays block interactions with the underlying page. Automations need to detect and dismiss them before proceeding.
426
+ - **Fingerprint testing**: Before deploying an automation, test your browser's fingerprint against public detection test sites (e.g., `bot.sannysoft.com`, `browserleaks.com`) to identify what signals you're leaking.
427
+
428
+ ### Anti-Detection Maintenance
429
+
430
+ Bot detection is adversarial — both sides are continuously updating:
431
+
432
+ - Enterprise bot protection services (Akamai, PerimeterX, etc.) push updates frequently. An automation that works today may break next week with no changes on your end.
433
+ - Browser updates change fingerprints. When Chrome updates, your automation's User-Agent, feature set, and expected behavior profile all change.
434
+ - Stealth patches need to keep pace with detection updates. Relying on community-maintained stealth plugins means you're dependent on their update cadence.
435
+ - Budget time for ongoing maintenance of any automation that targets a site with active bot protection.
@@ -0,0 +1,117 @@
1
+ # Releasing Libretto
2
+
3
+ ## For people
4
+
5
+ 1. From the repository root, run `pnpm prepare-release patch` (or `minor`/`major`). This pulls `main`, runs tests, bumps `packages/libretto/package.json`, and opens a release PR.
6
+ 2. Wait for CI and evals to finish on the PR. Review the eval summary comment.
7
+ 3. Merge the PR. GitHub Actions will automatically publish to npm and create the GitHub release.
8
+
9
+ Only admins with merge access to `main` can trigger a release. The release workflow runs on push to `main`, so branch protection is the access control — there is no separate approval step.
10
+
11
+ ---
12
+
13
+ ## For agents
14
+
15
+ Libretto uses a simple release flow:
16
+
17
+ 1. Create a release PR from `main`.
18
+ 2. Merge the PR into `main`.
19
+ 3. Let GitHub Actions publish the package and create the GitHub release.
20
+
21
+ This repo does not publish from local machines and does not push directly to `main`.
22
+
23
+ ## Requirements
24
+
25
+ GitHub Actions needs these repository secrets:
26
+
27
+ - `OPENAI_API_KEY`: used by the existing test suite during the release workflow.
28
+
29
+ The release workflow uses a GitHub Actions environment named `release`. Create that environment in the repository settings (no required reviewers — access is controlled by branch protection on `main` instead).
30
+
31
+ On npm, configure `libretto` to trust this repository and workflow for publishing. The trusted publisher fields should match:
32
+
33
+ - Organization or user: `saffron-health`
34
+ - Repository: `libretto`
35
+ - Workflow filename: `release.yml`
36
+ - Environment name: `release`
37
+
38
+ If you prefer the CLI, the setup command is:
39
+
40
+ ```bash
41
+ npm trust github libretto --repo saffron-health/libretto --file release.yml --env release
42
+ ```
43
+
44
+ Trusted publishing only works on supported cloud-hosted runners. This workflow uses `ubuntu-latest`, which satisfies that requirement. npm also requires a recent toolchain for trusted publishing, so the publish job runs on Node 24.
45
+
46
+ The workflow needs `contents: write` to create the GitHub release and tag, and `id-token: write` so npm trusted publishing can exchange the GitHub OIDC token for a short-lived publish credential.
47
+
48
+ After trusted publishing is working, remove any old npm publish token from the repo secrets. npm recommends restricting token-based publishing after the migration.
49
+
50
+ GitHub release notes are auto-generated from merged pull requests. The release note categories live in `.github/release.yml`, so PR labels control where entries show up in the changelog.
51
+
52
+ ## Prepare a release PR
53
+
54
+ Run one of these from a clean working tree:
55
+
56
+ ```bash
57
+ pnpm prepare-release patch
58
+ pnpm prepare-release minor
59
+ pnpm prepare-release major
60
+ ```
61
+
62
+ The root `scripts/prepare-release.sh` script does the following:
63
+
64
+ 1. Checks that the working tree is clean.
65
+ 2. Updates local `main` from `origin/main`.
66
+ 3. Runs `pnpm install --frozen-lockfile`, `pnpm --filter libretto type-check`, and `pnpm --filter libretto test`.
67
+ 4. Bumps the version in `packages/libretto/package.json`.
68
+ 5. Creates a release branch.
69
+ 6. Commits the version bump.
70
+ 7. Pushes the branch and opens a PR to `main` with the `release` label.
71
+
72
+ Release PRs also run the eval workflow. That workflow compares the current eval score against the latest successful `main` baseline and fails if the score drifts by more than 5 percentage points in either direction.
73
+
74
+ ## Merge behavior
75
+
76
+ After the release PR merges, `.github/workflows/release.yml` runs on `main`.
77
+
78
+ The workflow:
79
+
80
+ 1. Reads the version from `packages/libretto/package.json`.
81
+ 2. Checks whether that version already exists on npm and in GitHub Releases.
82
+ 3. Runs install, type-check, and tests for the `libretto` package in a verification job.
83
+ 4. Publishes `libretto@X.Y.Z` to npm from `packages/libretto` with trusted publishing if it is not already published.
84
+ 5. Creates GitHub release `vX.Y.Z` with generated release notes if it does not already exist.
85
+
86
+ This makes the workflow safe to re-run after partial failures. For example, if npm publish succeeds but GitHub release creation fails, a re-run will skip npm and only create the missing release.
87
+
88
+ ## Eval gating on release PRs
89
+
90
+ `.github/workflows/evals.yml` now runs automatically for release PRs and for qualifying pushes to `main`.
91
+
92
+ - On `main`, it records the current eval summary as the baseline artifact for future release PRs.
93
+ - On release PRs, it runs evals again and compares the overall score against the latest successful `main` baseline.
94
+ - If the score moves outside a `+/-5%` window, the eval job fails and flags the release PR.
95
+
96
+ If no successful baseline artifact exists yet, the release PR eval job reports that and skips the comparison for that run.
97
+
98
+ ## Changelog behavior
99
+
100
+ The GitHub Releases page is the changelog for this repo.
101
+
102
+ When the workflow runs `gh release create ... --generate-notes`, GitHub builds the release notes from the merged PRs since the previous release. `.github/release.yml` groups PRs into sections such as Features, Fixes, and Documentation.
103
+
104
+ Today the categories map directly to labels that already exist in the repo:
105
+
106
+ - `enhancement` -> Features
107
+ - `bug` -> Fixes
108
+ - `documentation` -> Documentation
109
+
110
+ To keep release notes readable, use clear PR titles and apply one of those labels before merging. If a PR should not appear in the changelog, add the `skip-changelog` label.
111
+
112
+ ## Notes
113
+
114
+ - Protect `main` in GitHub settings. Branch protection is the primary control that limits who can merge release-triggering commits into `main`. Restrict merge access to admins.
115
+ - Only merge a release PR when `main` is ready to ship.
116
+ - Do not create git tags in the PR branch. Tags are created by the release workflow after merge.
117
+ - Release notes are AI-generated from merged PRs by `scripts/generate-changelog.ts`.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "libretto",
3
- "version": "0.6.9",
3
+ "version": "0.6.11",
4
4
  "description": "AI-powered browser automation library and CLI built on Playwright",
5
5
  "license": "MIT",
6
6
  "homepage": "https://libretto.sh",
@@ -15,6 +15,7 @@
15
15
  },
16
16
  "files": [
17
17
  "dist",
18
+ "docs",
18
19
  "src",
19
20
  "scripts",
20
21
  "skills"
@@ -37,7 +38,7 @@
37
38
  "check:skills": "pnpm run check:mirrors",
38
39
  "build": "tsup --config tsup.config.ts",
39
40
  "type-check": "tsc --noEmit",
40
- "test": "pnpm run build && vitest run",
41
+ "test": "vitest run",
41
42
  "test:watch": "vitest",
42
43
  "cli": "node dist/index.js",
43
44
  "generate-changelog": "tsx scripts/generate-changelog.ts",
@@ -79,7 +80,7 @@
79
80
  "openai": "^6.29.0",
80
81
  "tsup": "^8.5.1",
81
82
  "typescript": "^5.9.3",
82
- "vitest": "^4.1.0"
83
+ "vitest": "^4.1.5"
83
84
  },
84
85
  "dependencies": {
85
86
  "ai": "^6.0.116",
@@ -4,7 +4,7 @@ description: "Browser automation CLI for building, maintaining, and running brow
4
4
  license: MIT
5
5
  metadata:
6
6
  author: saffron-health
7
- version: "0.6.9"
7
+ version: "0.6.11"
8
8
  ---
9
9
 
10
10
  ## How Libretto Works
@@ -13,6 +13,19 @@ metadata:
13
13
  - Use Libretto commands to inspect the site and open pages, observe state, inspect requests, and prototype interactions.
14
14
  - Libretto work must end in script changes. Create or edit the workflow file instead of stopping at interactive exploration.
15
15
 
16
+ ## Shipped Source & Documentation
17
+
18
+ The npm package includes `src/` (full TypeScript source) and `docs/` for deeper understanding of internals and design decisions. Read these when you need implementation context beyond what this skill file covers. Resolve paths from the package root (e.g. `node_modules/libretto/`).
19
+
20
+ Full documentation is published at [libretto.sh](https://libretto.sh). Available pages:
21
+
22
+ - Get started: [introduction](https://libretto.sh/get-started/introduction), [installation](https://libretto.sh/get-started/installation), [configuration](https://libretto.sh/get-started/configuration)
23
+ - Fundamentals: [core concepts](https://libretto.sh/fundamentals/core-concepts), [how workflow generation works](https://libretto.sh/fundamentals/how-workflow-generation-works), [automation and bot detection](https://libretto.sh/fundamentals/automation-and-bot-detection), [website authentication](https://libretto.sh/fundamentals/website-authentication)
24
+ - Workflow guides: [one-shot generation](https://libretto.sh/workflow-guides/one-shot-workflow-generation), [interactive building](https://libretto.sh/workflow-guides/interactive-workflow-building), [debugging workflows](https://libretto.sh/workflow-guides/debugging-workflows), [convert to network requests](https://libretto.sh/workflow-guides/convert-to-network-requests)
25
+ - CLI reference: [open and connect](https://libretto.sh/cli-reference/open-and-connect), [sessions](https://libretto.sh/cli-reference/sessions), [profiles](https://libretto.sh/cli-reference/profiles), [snapshot](https://libretto.sh/cli-reference/snapshot), [exec](https://libretto.sh/cli-reference/exec), [run and resume](https://libretto.sh/cli-reference/run-and-resume), [session logs](https://libretto.sh/cli-reference/session-logs), [pages](https://libretto.sh/cli-reference/pages)
26
+ - Library API: [workflow](https://libretto.sh/library-api/workflow), [AI extraction](https://libretto.sh/library-api/ai-extraction), [network requests](https://libretto.sh/library-api/network-requests), [file downloads](https://libretto.sh/library-api/file-downloads)
27
+ - Hosting: [introduction](https://libretto.sh/hosting/introduction), [GCP](https://libretto.sh/hosting/gcp), [AWS](https://libretto.sh/hosting/aws)
28
+
16
29
  ## Default Integration Approach
17
30
 
18
31
  - Prefer network requests first for new integrations unless the user explicitly asks for Playwright or UI automation, then do not use the site's internal API.
@@ -4,7 +4,7 @@ description: "Read-only Libretto workflow for diagnosing live browser state with
4
4
  license: MIT
5
5
  metadata:
6
6
  author: saffron-health
7
- version: "0.6.9"
7
+ version: "0.6.11"
8
8
  ---
9
9
 
10
10
  ## How Libretto Read-Only Works
package/src/cli/cli.ts CHANGED
@@ -2,6 +2,7 @@ import { resolveAiSetupStatus } from "./core/ai-model.js";
2
2
  import { ensureLibrettoSetup } from "./core/context.js";
3
3
  import { createCLIApp } from "./router.js";
4
4
  import { warnIfInstalledSkillOutOfDate } from "./core/skill-version.js";
5
+ import { loadEnv } from "../shared/env/load-env.js";
5
6
 
6
7
  function renderUsage(app: ReturnType<typeof createCLIApp>): string {
7
8
  return `${app.renderHelp()}
@@ -48,6 +49,7 @@ function isRootHelpRequest(rawArgs: readonly string[]): boolean {
48
49
  export async function runLibrettoCLI(): Promise<void> {
49
50
  const rawArgs = process.argv.slice(2);
50
51
  let exitCode = 0;
52
+ loadEnv();
51
53
  ensureLibrettoSetup();
52
54
  const app = createCLIApp();
53
55