alvin-bot 4.20.2 → 4.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,6 +17,7 @@
17
17
  import fs from "fs";
18
18
  import path from "path";
19
19
  import { IDENTITY_FILE, PREFERENCES_FILE, PROJECTS_MEMORY_DIR, MEMORY_FILE, } from "../paths.js";
20
+ import { getEffectiveInjectMode } from "./memory-inject-mode.js";
20
21
  const MAX_L0_L1_CHARS = 5000;
21
22
  const MAX_L2_PROJECT_CHARS = 1500;
22
23
  const MAX_L2_TOTAL_CHARS = 3000;
@@ -96,6 +97,9 @@ export function buildLayeredContext(query) {
96
97
  const layers = loadMemoryLayers();
97
98
  const parts = [];
98
99
  let l0l1Chars = 0;
100
+ // identity.md (L0) and preferences.md (L1) are ALWAYS plain-text injected,
101
+ // regardless of inject mode. They're tiny, manually curated, and contain
102
+ // always-on rules that semantic search may miss for short / generic queries.
99
103
  if (layers.identity) {
100
104
  const truncated = layers.identity.length > MAX_L0_L1_CHARS
101
105
  ? layers.identity.slice(0, MAX_L0_L1_CHARS) + "\n[...truncated]"
@@ -111,21 +115,26 @@ export function buildLayeredContext(query) {
111
115
  parts.push("## Preferences (L1)\n" + truncated);
112
116
  l0l1Chars += truncated.length;
113
117
  }
114
- // Backwards-compat: if no identity AND no preferences, use the monolithic
115
- // MEMORY.md as L1 fully (existing user setups). If split files exist,
116
- // include MEMORY.md as a secondary L1 with tighter truncation.
117
- if (!layers.identity && !layers.preferences && layers.longTerm) {
118
- const truncated = layers.longTerm.length > MAX_L0_L1_CHARS
119
- ? layers.longTerm.slice(0, MAX_L0_L1_CHARS) + "\n[...truncated]"
120
- : layers.longTerm;
121
- parts.push("## Long-term Memory (L1, monolithic)\n" + truncated);
122
- }
123
- else if (layers.longTerm) {
124
- const SECONDARY_CAP = 1500;
125
- const truncated = layers.longTerm.length > SECONDARY_CAP
126
- ? layers.longTerm.slice(0, SECONDARY_CAP) + "\n[...truncated]"
127
- : layers.longTerm;
128
- parts.push("## Long-term Memory (L1, legacy MEMORY.md)\n" + truncated);
118
+ // The monolithic MEMORY.md plain-text inject is gated by the effective
119
+ // inject mode (v4.22):
120
+ // legacy inject as before (full or secondary, depending on split-file presence)
121
+ // sqlite skip; the same content lives in the SQLite store and is
122
+ // surfaced on-demand via searchMemory() in personality.ts
123
+ const mode = getEffectiveInjectMode();
124
+ if (mode === "legacy" && layers.longTerm) {
125
+ if (!layers.identity && !layers.preferences) {
126
+ const truncated = layers.longTerm.length > MAX_L0_L1_CHARS
127
+ ? layers.longTerm.slice(0, MAX_L0_L1_CHARS) + "\n[...truncated]"
128
+ : layers.longTerm;
129
+ parts.push("## Long-term Memory (L1, monolithic)\n" + truncated);
130
+ }
131
+ else {
132
+ const SECONDARY_CAP = 1500;
133
+ const truncated = layers.longTerm.length > SECONDARY_CAP
134
+ ? layers.longTerm.slice(0, SECONDARY_CAP) + "\n[...truncated]"
135
+ : layers.longTerm;
136
+ parts.push("## Long-term Memory (L1, legacy MEMORY.md)\n" + truncated);
137
+ }
129
138
  }
130
139
  // L2: project-specific, only when a query is provided
131
140
  if (query && layers.projects.length > 0) {
@@ -12,6 +12,7 @@ import { resolve } from "path";
12
12
  import { MEMORY_DIR, MEMORY_FILE } from "../paths.js";
13
13
  import { reindexMemory } from "./embeddings.js";
14
14
  import { buildLayeredContext } from "./memory-layers.js";
15
+ import { getEffectiveInjectMode } from "./memory-inject-mode.js";
15
16
  // Ensure dirs exist
16
17
  if (!fs.existsSync(MEMORY_DIR))
17
18
  fs.mkdirSync(MEMORY_DIR, { recursive: true });
@@ -78,23 +79,28 @@ export function appendDailyLog(entry) {
78
79
  */
79
80
  export function buildMemoryContext(query) {
80
81
  const parts = [];
81
- // L0+L1 (+ matched L2 if query) via layered loader
82
+ const mode = getEffectiveInjectMode();
83
+ // L0+L1 (+ matched L2 if query) via layered loader. The loader itself
84
+ // respects MEMORY_INJECT_MODE for the monolithic MEMORY.md slice.
82
85
  const layered = buildLayeredContext(query);
83
86
  if (layered) {
84
87
  parts.push(layered);
85
88
  }
86
- // Today's log
87
- const todayLog = loadDailyLog();
88
- if (todayLog) {
89
- const truncated = todayLog.length > 1500 ? todayLog.slice(-1500) : todayLog;
90
- parts.push(`## Today's Log\n${truncated}`);
91
- }
92
- // Yesterday's log (for continuity)
93
- const yesterday = new Date(Date.now() - 86_400_000).toISOString().slice(0, 10);
94
- const yesterdayLog = loadDailyLog(yesterday);
95
- if (yesterdayLog) {
96
- const truncated = yesterdayLog.length > 500 ? yesterdayLog.slice(-500) : yesterdayLog;
97
- parts.push(`## Yesterday's Log (summary)\n${truncated}`);
89
+ // Daily logs are bulk-injected only in legacy mode. In sqlite mode they're
90
+ // discoverable via searchMemory() — every log file is indexed individually
91
+ // and surfaced when relevant to the user's query.
92
+ if (mode === "legacy") {
93
+ const todayLog = loadDailyLog();
94
+ if (todayLog) {
95
+ const truncated = todayLog.length > 1500 ? todayLog.slice(-1500) : todayLog;
96
+ parts.push(`## Today's Log\n${truncated}`);
97
+ }
98
+ const yesterday = new Date(Date.now() - 86_400_000).toISOString().slice(0, 10);
99
+ const yesterdayLog = loadDailyLog(yesterday);
100
+ if (yesterdayLog) {
101
+ const truncated = yesterdayLog.length > 500 ? yesterdayLog.slice(-500) : yesterdayLog;
102
+ parts.push(`## Yesterday's Log (summary)\n${truncated}`);
103
+ }
98
104
  }
99
105
  if (parts.length === 0)
100
106
  return "";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "alvin-bot",
3
- "version": "4.20.2",
3
+ "version": "4.22.0",
4
4
  "description": "Alvin Bot — Your personal AI agent on Telegram, WhatsApp, Discord, Signal, and Web.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -0,0 +1,183 @@
1
+ ---
2
+ name: Agent Browser (Snapshot+Ref)
3
+ description: Token-efficient browser automation via the `agent-browser` CLI (Vercel Labs). Uses accessibility-tree snapshots with @eN refs (~200–400 tokens per page) instead of raw HTML parsing — typically 90%+ cheaper than Playwright/Puppeteer. Use for click-fill-extract on public pages, single-page test flows, structured form submission, and screenshots-with-refs. Optional dependency — only active if `agent-browser` is on the PATH; otherwise the regular Browser Automation skill takes over.
4
+ triggers: snapshot the page, get refs, list interactive elements, click @e, fill @e, agent-browser, click button on, click the button, fill in the field, extract from page, find on page, scrape page interactively, visit and click, open page and click, navigate and fill, semantic locator, accessibility tree, snapshot+ref, schau auf der Seite nach, klicke auf den Button, fülle das Feld, formular ausfüllen
5
+ priority: 9
6
+ category: automation
7
+ ---
8
+
9
+ # Agent Browser — Token-Efficient Snapshot+Ref Workflow
10
+
11
+ Use this skill when interactive browser automation is needed (click, fill,
12
+ extract, screenshot) AND `agent-browser` is installed. The accessibility-tree
13
+ snapshot makes per-page interaction roughly an order of magnitude cheaper in
14
+ tokens than parsing rendered HTML with Playwright.
15
+
16
+ ## Pre-flight: is the CLI installed?
17
+
18
+ ```bash
19
+ command -v agent-browser >/dev/null 2>&1 \
20
+ && echo "agent-browser ok" \
21
+ || echo "fall back to the Browser Automation skill"
22
+ ```
23
+
24
+ If absent: **stop and use the regular Browser Automation skill** (Tier 1
25
+ Stealth / Tier 2 CDP). Don't suggest installing it unless the user asks —
26
+ it's an opt-in tool, see `alvin-bot doctor` for installation hints.
27
+
28
+ ## Core loop
29
+
30
+ ```bash
31
+ agent-browser open <url>
32
+ agent-browser snapshot -i # interactive elements, with @e1..@eN refs
33
+ agent-browser click @e3 # act on a ref
34
+ agent-browser snapshot -i # CRITICAL — re-snapshot after every page change
35
+ agent-browser close
36
+ ```
37
+
38
+ Refs (`@e1`, `@e2`, …) are **assigned fresh every snapshot**. They go stale
39
+ the moment the page changes (click that navigates, form submit, dynamic
40
+ re-render, modal open). Always re-snapshot before the next ref interaction.
41
+ This single rule is the most common pitfall.
42
+
43
+ A snapshot looks like:
44
+
45
+ ```
46
+ Page: Example - Log in
47
+ URL: https://example.com/login
48
+
49
+ @e1 [heading] "Log in"
50
+ @e2 [form]
51
+ @e3 [input type="email"] placeholder="Email"
52
+ @e4 [input type="password"] placeholder="Password"
53
+ @e5 [button type="submit"] "Continue"
54
+ @e6 [link] "Forgot password?"
55
+ ```
56
+
57
+ ## Common patterns
58
+
59
+ ### Read a page
60
+
61
+ ```bash
62
+ agent-browser snapshot -i # interactive only (preferred)
63
+ agent-browser snapshot -i -u # include href URLs on links
64
+ agent-browser snapshot -i --json # machine-readable
65
+ agent-browser get text @e1 # visible text of an element
66
+ agent-browser get attr @e10 href # any attribute
67
+ agent-browser get url # current URL
68
+ ```
69
+
70
+ ### Interact
71
+
72
+ ```bash
73
+ agent-browser click @e1
74
+ agent-browser fill @e2 "user@example.com" # clear + type
75
+ agent-browser type @e2 " more text" # type without clearing
76
+ agent-browser press Enter
77
+ agent-browser select @e4 "option-value"
78
+ agent-browser upload @e5 file.pdf
79
+ agent-browser scroll down 500
80
+ agent-browser screenshot result.png
81
+ ```
82
+
83
+ ### Wait for the right thing (most failures come from bad waits)
84
+
85
+ ```bash
86
+ agent-browser wait @e1 # until an element appears
87
+ agent-browser wait --text "Success" # until specific text on the page
88
+ agent-browser wait --url "**/dashboard" # until URL matches glob
89
+ agent-browser wait --load networkidle # post-navigation catch-all
90
+ ```
91
+
92
+ Avoid bare `wait 2000` except in throwaway debugging. Default timeout: 25 s.
93
+
94
+ ### Find by semantics when refs aren't ergonomic
95
+
96
+ ```bash
97
+ agent-browser find role button click --name "Submit"
98
+ agent-browser find text "Sign In" click --exact
99
+ agent-browser find label "Email" fill "user@example.com"
100
+ agent-browser find placeholder "Search" type "query"
101
+ agent-browser find testid "submit-btn" click
102
+ ```
103
+
104
+ ### Multiple isolated browser sessions (parallel users)
105
+
106
+ ```bash
107
+ agent-browser --session a open https://app.example.com
108
+ agent-browser --session b open https://app.example.com
109
+ agent-browser --session a fill @e1 "alice@test.com"
110
+ agent-browser --session b fill @e1 "bob@test.com"
111
+ ```
112
+
113
+ ### Persist login across runs
114
+
115
+ ```bash
116
+ # Save once after a successful login:
117
+ agent-browser state save ./auth.json
118
+
119
+ # Resume already-logged-in:
120
+ agent-browser --state ./auth.json open https://app.example.com
121
+ ```
122
+
123
+ ### Auth vault (don't put passwords in shell history)
124
+
125
+ ```bash
126
+ agent-browser auth save my-app --url https://app.example.com/login \
127
+ --username user@example.com --password-stdin
128
+ # (paste password, Ctrl+D)
129
+
130
+ agent-browser auth login my-app
131
+ ```
132
+
133
+ ### Iframes
134
+
135
+ Iframes are inlined in the snapshot — refs work transparently. To scope a
136
+ snapshot to one iframe:
137
+
138
+ ```bash
139
+ agent-browser frame @e3
140
+ agent-browser snapshot -i
141
+ agent-browser frame main
142
+ ```
143
+
144
+ ### Mock network (testing)
145
+
146
+ ```bash
147
+ agent-browser network route "**/api/users" --body '{"users":[]}'
148
+ agent-browser network route "**/analytics" --abort
149
+ agent-browser network har start /tmp/trace.har
150
+ # ... do stuff ...
151
+ agent-browser network har stop
152
+ ```
153
+
154
+ ## When NOT to use this skill
155
+
156
+ | Situation | Skill |
157
+ |---|---|
158
+ | Bot-protected site (Cloudflare, DataDome) | regular **Browser Automation** skill, Tier 1 Stealth |
159
+ | Logged-in personal account on LinkedIn / Gmail | **Browser Automation**, Tier 2 CDP (`alvin-bot browser …`) |
160
+ | User wants to watch a complex flow live | **Browser Automation**, Tier 3 Extension |
161
+ | Static HTML / public JSON / RSS / API | `curl` / WebFetch — no browser engine needed |
162
+
163
+ agent-browser is great for **task automation on cooperative pages** (your
164
+ own apps, public data sites, form submissions). It is *not* a stealth tool.
165
+
166
+ ## Diagnostics
167
+
168
+ ```bash
169
+ agent-browser doctor # full env check
170
+ agent-browser doctor --quick # local-only
171
+ agent-browser dashboard start # observability UI on :4848
172
+ agent-browser skills get core # the upstream tool's own usage guide
173
+ ```
174
+
175
+ ## One-liner sanity test
176
+
177
+ ```bash
178
+ agent-browser open https://example.com \
179
+ && agent-browser snapshot -i \
180
+ && agent-browser close
181
+ ```
182
+
183
+ Expect two `@e` refs (heading + link). If that works, the tool is healthy.
@@ -15,12 +15,20 @@ Du hast drei Browser-Strategien plus WebFetch. **Wähle die billigste passende S
15
15
  | Task | Tool | Warum |
16
16
  |------|------|-------|
17
17
  | Einzelne öffentliche Seite, nur Text | `curl` oder WebFetch | Am schnellsten, keine Browser-Engine |
18
+ | Interaktiv (klicken/füllen/extrahieren) auf kooperativer Seite | **Tier 1.5 agent-browser** *(falls installiert)* | Snapshot+Ref-Workflow ist ~90 % token-günstiger als rohes Playwright. Siehe Skill „Agent Browser". |
18
19
  | Öffentliche Seite mit JS / Cloudflare | **Tier 1 Stealth** | Headless + Fingerprint-Masking |
19
20
  | Login-pflichtige Seite (LinkedIn, Gmail, …) | **Tier 2 CDP** | Echtes Chromium, persistente Cookies |
20
21
  | Komplexer Multi-Step-Flow, User soll zusehen | **Tier 3 Extension** | Nur in interaktiven CLI-Sessions |
21
22
 
22
23
  **NIEMALS** nacktes `node -e "const {chromium}…"` für externe Seiten — wird sofort geblockt.
23
24
 
25
+ **Vorab prüfen ob agent-browser verfügbar ist:**
26
+ ```bash
27
+ command -v agent-browser >/dev/null 2>&1 && echo "Tier 1.5 verfügbar"
28
+ ```
29
+ Falls ja und der Task ist „klick X, lies Y, fülle Z aus" → den `agent-browser`-Skill nehmen.
30
+ Falls nein → mit Tier 1/2/3 weitermachen wie unten. Installation auf Wunsch des Users: `npm i -g agent-browser && agent-browser install`.
31
+
24
32
  ---
25
33
 
26
34
  ## Tier 0 — curl / WebFetch (schnellster Pfad)