alvin-bot 4.15.2 → 4.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +51 -18
- package/README.md +13 -13
- package/bin/cli.js +124 -0
- package/dist/handlers/platform-message.js +2 -2
- package/dist/paths.js +12 -2
- package/dist/services/alvin-mcp-tools.js +1 -1
- package/dist/services/asset-index.js +5 -11
- package/dist/services/browser-manager.js +19 -6
- package/dist/services/cdp-bootstrap.js +351 -0
- package/dist/services/memory-layers.js +1 -1
- package/dist/services/personality.js +1 -1
- package/dist/services/session.js +1 -1
- package/dist/services/skills.js +4 -7
- package/dist/services/workspaces.js +4 -4
- package/docs/security.md +4 -4
- package/package.json +1 -1
- package/skills/browse/SKILL.md +77 -70
- package/skills/social-fetch/SKILL.md +3 -3
- package/skills/webcheck/SKILL.md +1 -1
- package/test/async-agent-chunk-flow.test.ts +1 -1
- package/test/claude-sdk-tool-use-id.test.ts +1 -1
- package/test/memory-extractor.test.ts +10 -10
- package/test/memory-layers.test.ts +15 -15
- package/test/memory-sdk-injection.test.ts +4 -4
- package/test/memory-stress-restart.test.ts +2 -2
- package/test/multi-session-stress.test.ts +21 -21
- package/test/platform-session-key.test.ts +2 -2
- package/test/slack-test-connection.test.ts +3 -3
- package/test/subagent-delivery-platform-routing.test.ts +2 -2
- package/test/telegram-workspace-command.test.ts +5 -5
- package/test/workspaces.test.ts +32 -32
package/skills/browse/SKILL.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: Browser Automation
|
|
3
|
-
description: 3-tier browser control — stealth scraping, CDP with persistent cookies
|
|
3
|
+
description: 3-tier browser control — WebFetch for plain pages, stealth scraping for JS/Cloudflare, CDP with persistent cookies for login-walled sites. Navigate, screenshot, extract text, interact with logged-in pages.
|
|
4
4
|
triggers: browse, browser, test webapp, test app, test website, screenshot page, interact with, click on, fill form, visual test, qa test, check page, open page, test my app, browse to, open url, puppeteer, playwright, browser automation, linkedin, stepstone, indeed, scrape, fetch page, crawl, teste die seite, teste die app, schau dir an, öffne die seite, teste mal, visual check, check the ui, check the page, webseite öffnen, seite abrufen
|
|
5
5
|
priority: 8
|
|
6
6
|
category: automation
|
|
@@ -14,97 +14,112 @@ Du hast drei Browser-Strategien plus WebFetch. **Wähle die billigste passende S
|
|
|
14
14
|
|
|
15
15
|
| Task | Tool | Warum |
|
|
16
16
|
|------|------|-------|
|
|
17
|
-
| Einzelne öffentliche Seite, nur Text |
|
|
17
|
+
| Einzelne öffentliche Seite, nur Text | `curl` oder WebFetch | Am schnellsten, keine Browser-Engine |
|
|
18
18
|
| Öffentliche Seite mit JS / Cloudflare | **Tier 1 Stealth** | Headless + Fingerprint-Masking |
|
|
19
|
-
| Login-pflichtige Seite (LinkedIn, Gmail, …) | **Tier 2 CDP** | Echtes
|
|
20
|
-
| Komplexer Multi-Step-Flow, User soll zusehen | **Tier 3 Extension** |
|
|
19
|
+
| Login-pflichtige Seite (LinkedIn, Gmail, …) | **Tier 2 CDP** | Echtes Chromium, persistente Cookies |
|
|
20
|
+
| Komplexer Multi-Step-Flow, User soll zusehen | **Tier 3 Extension** | Nur in interaktiven CLI-Sessions |
|
|
21
21
|
|
|
22
|
-
**NIEMALS**
|
|
22
|
+
**NIEMALS** nacktes `node -e "const {chromium}…"` für externe Seiten — wird sofort geblockt.
|
|
23
23
|
|
|
24
24
|
---
|
|
25
25
|
|
|
26
|
-
## Tier 0 —
|
|
26
|
+
## Tier 0 — curl / WebFetch (schnellster Pfad)
|
|
27
27
|
|
|
28
28
|
Für statische Seiten oder APIs, die keine JS-Rendering brauchen:
|
|
29
29
|
|
|
30
30
|
```bash
|
|
31
|
-
# Direkter curl
|
|
32
31
|
curl -sL -H "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36" \
|
|
33
|
-
"https://
|
|
34
|
-
|
|
35
|
-
# Oder das WebFetch-Tool, wenn verfügbar (interpretiert Inhalt direkt)
|
|
32
|
+
"https://example.com/public-page"
|
|
36
33
|
```
|
|
37
34
|
|
|
38
|
-
Wenn das einen 403/Captcha
|
|
35
|
+
Wenn das einen 403/Captcha liefert → eskaliere auf Tier 1.
|
|
39
36
|
|
|
40
37
|
---
|
|
41
38
|
|
|
42
39
|
## Tier 1 — Playwright Stealth (headless, schnell, maskiert)
|
|
43
40
|
|
|
44
|
-
|
|
41
|
+
Für Seiten mit JS-Rendering oder Bot-Detection. Der Bot hat eine eingebaute Stealth-Pipeline; keine Hub-Scripts nötig.
|
|
45
42
|
|
|
46
|
-
|
|
47
|
-
# Seite laden, JSON-Metadata zurück (title, url, html_length)
|
|
48
|
-
~/.claude/hub/SCRIPTS/browser.sh stealth "https://www.stepstone.de/jobs/it-delivery"
|
|
43
|
+
**Empfohlener Weg — Bot-API:** Der interne `browser-manager` wählt automatisch die richtige Strategie. Für Scripts direkt nutzbar:
|
|
49
44
|
|
|
50
|
-
|
|
51
|
-
|
|
45
|
+
```bash
|
|
46
|
+
# Falls ein externes Dev-Hub-Script vorhanden ist, kann es genutzt werden:
|
|
47
|
+
# ~/.claude/hub/SCRIPTS/browser.sh stealth "<url>"
|
|
52
48
|
```
|
|
53
49
|
|
|
54
|
-
|
|
50
|
+
Ansonsten direkt über Playwright in einem kurzen Node-Script:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
node -e "
|
|
54
|
+
(async () => {
|
|
55
|
+
const { chromium } = require('playwright');
|
|
56
|
+
const b = await chromium.launch({ headless: true });
|
|
57
|
+
const p = await b.newPage({ userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' });
|
|
58
|
+
await p.goto(process.argv[1], { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
59
|
+
console.log(JSON.stringify({ url: p.url(), title: await p.title() }));
|
|
60
|
+
await b.close();
|
|
61
|
+
})();
|
|
62
|
+
" "https://example.com"
|
|
63
|
+
```
|
|
55
64
|
|
|
56
|
-
**Wann blockt das:** reCAPTCHA v3, aggressive Cloudflare, Login-Walls.
|
|
65
|
+
**Wann blockt das:** reCAPTCHA v3, aggressive Cloudflare, Login-Walls → eskaliere auf Tier 2.
|
|
57
66
|
|
|
58
|
-
**Konkrete funktionierende Targets (Stand 2026):**
|
|
59
|
-
- StepStone (alle Job-Suchen) ✅
|
|
60
|
-
- Michael Page ✅
|
|
61
|
-
- Hays ✅
|
|
62
|
-
- Öffentliche Blog-Posts, News-Sites ✅
|
|
63
|
-
- LinkedIn (ohne Login) ❌ → Tier 2
|
|
64
|
-
- Indeed / Glassdoor ❌ (403 Scraping-Block) → nur über E-Mail-Alerts
|
|
67
|
+
**Konkrete funktionierende Targets (Stand 2026):** StepStone, Michael Page, Hays, Blogs, News-Sites.
|
|
65
68
|
|
|
66
69
|
---
|
|
67
70
|
|
|
68
|
-
## Tier 2 —
|
|
71
|
+
## Tier 2 — Chromium CDP (Bot-managed, persistent Profile)
|
|
69
72
|
|
|
70
|
-
Echtes
|
|
73
|
+
Echtes Chromium mit Profil unter `~/.alvin-bot/browser/profile/`. Login-Cookies für LinkedIn/Gmail/etc. bleiben über Sessions erhalten.
|
|
74
|
+
|
|
75
|
+
**Bot-CLI (empfohlen — funktioniert auf jedem Alvin-Bot-Install):**
|
|
71
76
|
|
|
72
77
|
```bash
|
|
73
|
-
#
|
|
74
|
-
|
|
75
|
-
|
|
78
|
+
# Starten (headless als Default — perfekt für Cron/Daemon)
|
|
79
|
+
alvin-bot browser start
|
|
80
|
+
alvin-bot browser start headful # sichtbar, wenn User zusehen soll
|
|
76
81
|
|
|
77
82
|
# Navigieren
|
|
78
|
-
|
|
83
|
+
alvin-bot browser goto "https://www.linkedin.com/jobs/search/?keywords=IT+Director"
|
|
84
|
+
|
|
85
|
+
# Screenshot → speichert nach ~/.alvin-bot/browser/screenshots/
|
|
86
|
+
alvin-bot browser shot "https://www.linkedin.com/feed/" linkedin_feed.png
|
|
79
87
|
|
|
80
|
-
#
|
|
81
|
-
|
|
88
|
+
# JS in Seite ausführen
|
|
89
|
+
alvin-bot browser eval "https://example.com" "document.title"
|
|
82
90
|
|
|
83
91
|
# Tabs auflisten
|
|
84
|
-
|
|
92
|
+
alvin-bot browser tabs
|
|
93
|
+
|
|
94
|
+
# Beenden (meistens nicht nötig — Chromium läuft persistent bis Bot-Neustart)
|
|
95
|
+
alvin-bot browser stop
|
|
85
96
|
|
|
86
|
-
#
|
|
87
|
-
|
|
97
|
+
# Diagnose bei Problemen
|
|
98
|
+
alvin-bot browser doctor
|
|
88
99
|
```
|
|
89
100
|
|
|
90
|
-
**
|
|
91
|
-
> "Bitte einmal in Chrome (Hub-Profil) bei LinkedIn einloggen. Cookies bleiben dann dauerhaft erhalten."
|
|
101
|
+
**Architektur:** Der Bot nutzt Playwright's gebundeltes Chromium ("Google Chrome for Testing"), nicht das normale User-Chrome. Keine LaunchServices-Kollision mit parallel laufendem Chrome. Erste Einrichtung nach `npm install`:
|
|
92
102
|
|
|
93
|
-
|
|
103
|
+
```bash
|
|
104
|
+
# Playwright-Chromium einmal installieren
|
|
105
|
+
npx playwright install chromium
|
|
106
|
+
```
|
|
94
107
|
|
|
95
|
-
**
|
|
108
|
+
**Login-Setup (einmalig):** Falls die Seite ausgeloggt ist, den User fragen:
|
|
109
|
+
> "Bitte einmal in Chromium (Bot-Profil) bei <Seite> einloggen. Cookies bleiben dann dauerhaft erhalten."
|
|
110
|
+
|
|
111
|
+
Starten mit `alvin-bot browser start headful`, User loggt in → ab dann persistiert das Profil unter `~/.alvin-bot/browser/profile/`.
|
|
112
|
+
|
|
113
|
+
**Wie teste ich ob eingeloggt:** nach `goto` die URL prüfen — wenn `/authwall` oder `/login` im Pfad steht, bist du ausgeloggt.
|
|
96
114
|
|
|
97
115
|
---
|
|
98
116
|
|
|
99
117
|
## Tier 3 — Claude-in-Chrome Extension (visuelle Kontrolle)
|
|
100
118
|
|
|
101
|
-
Nur in interaktiven CLI-Sessions, nicht im
|
|
119
|
+
Nur in interaktiven Claude Code CLI-Sessions verfügbar, **nicht** im Bot-Daemon.
|
|
102
120
|
|
|
103
121
|
```bash
|
|
104
|
-
#
|
|
105
|
-
~/.claude/hub/SCRIPTS/browser.sh ext check
|
|
106
|
-
|
|
107
|
-
# Dann MCP-Tools über ToolSearch laden:
|
|
122
|
+
# MCP-Tools über ToolSearch laden:
|
|
108
123
|
# mcp__claude-in-chrome__tabs_context_mcp
|
|
109
124
|
# mcp__claude-in-chrome__navigate
|
|
110
125
|
# mcp__claude-in-chrome__computer
|
|
@@ -117,45 +132,37 @@ Nur in interaktiven CLI-Sessions, nicht im Cron/Daemon.
|
|
|
117
132
|
## Eskalations-Regel (PFLICHT)
|
|
118
133
|
|
|
119
134
|
```
|
|
120
|
-
Öffentliche Text-Seite → Tier 0 (WebFetch
|
|
135
|
+
Öffentliche Text-Seite → Tier 0 (curl/WebFetch)
|
|
121
136
|
↓ 403/Cloudflare/leerer HTML?
|
|
122
|
-
Tier 1 (stealth) →
|
|
137
|
+
Tier 1 (stealth) → Node+Playwright headless
|
|
123
138
|
↓ Captcha/Login-Wall?
|
|
124
|
-
Tier 2 (CDP) →
|
|
139
|
+
Tier 2 (CDP) → alvin-bot browser start + goto <url>
|
|
125
140
|
↓ Cookies fehlen?
|
|
126
|
-
|
|
141
|
+
Den User fragen: "Bitte einmal in Chromium bei <Seite> einloggen, dann kann ich weitermachen."
|
|
127
142
|
```
|
|
128
143
|
|
|
129
144
|
**NIEMALS aufgeben mit "Browser funktioniert nicht"** — es gibt immer einen nächsten Schritt. Lieber ehrlich melden "Tier 1 blockt mit Captcha, versuche Tier 2" als "Failed to load".
|
|
130
145
|
|
|
131
|
-
## Status
|
|
146
|
+
## Status & Diagnose
|
|
132
147
|
|
|
133
148
|
```bash
|
|
134
|
-
#
|
|
135
|
-
|
|
149
|
+
# Aktueller CDP-Zustand
|
|
150
|
+
alvin-bot browser status
|
|
136
151
|
|
|
137
|
-
#
|
|
152
|
+
# Vollständige Diagnose (Binary, Port, PID, Profile-Lock, Chrome-Konflikt)
|
|
153
|
+
alvin-bot browser doctor
|
|
154
|
+
|
|
155
|
+
# Raw check ob CDP-Endpoint antwortet
|
|
138
156
|
curl -s http://127.0.0.1:9222/json/version | head -c 200
|
|
139
157
|
```
|
|
140
158
|
|
|
141
159
|
## Screenshot-Ausgabe ansehen
|
|
142
160
|
|
|
143
|
-
Screenshots
|
|
144
|
-
|
|
145
|
-
## Interaktive Ops (Klicken, Formular füllen)
|
|
146
|
-
|
|
147
|
-
Für einfache Fälle: `cdp eval` mit JavaScript, das in der Seite ausgeführt wird:
|
|
148
|
-
|
|
149
|
-
```bash
|
|
150
|
-
~/.claude/hub/SCRIPTS/browser.sh cdp eval "https://example.com/login" \
|
|
151
|
-
"document.querySelector('#username').value='test'; document.querySelector('#password').value='pw'; document.querySelector('form').submit();"
|
|
152
|
-
```
|
|
153
|
-
|
|
154
|
-
Für komplexere Flows (sequentielles Klicken nach DOM-Updates) → Tier 3 (Extension) nutzen.
|
|
161
|
+
Screenshots landen in `~/.alvin-bot/browser/screenshots/` (wenn nur Dateiname angegeben) oder dem absoluten Pfad. Read-Tool auf den Pfad zeigt dir das Bild direkt an.
|
|
155
162
|
|
|
156
163
|
## Wichtige Notes
|
|
157
164
|
|
|
158
|
-
- **
|
|
159
|
-
- **Headless vs Headful:** Im Cron/Daemon
|
|
160
|
-
- **
|
|
161
|
-
- **
|
|
165
|
+
- **Profile-Konflikt:** Chromium kann `~/.alvin-bot/browser/profile/` nicht doppelt öffnen. `alvin-bot browser doctor` zeigt stale Locks.
|
|
166
|
+
- **Headless vs Headful:** Im Cron/Daemon IMMER `headless` (Default) — headful scheitert an fehlendem Display.
|
|
167
|
+
- **Persistenz:** Cookies, LocalStorage, IndexedDB — alles in `~/.alvin-bot/browser/profile/`. Überlebt Bot-Restarts.
|
|
168
|
+
- **Kein User-Chrome-Konflikt:** Das Bot-Chromium ist ein separater Binary (Chrome-for-Testing), läuft parallel zum normalen Chrome ohne LaunchServices-Kollision.
|
|
@@ -78,7 +78,7 @@ print(f'Has images: {has_images}')
|
|
|
78
78
|
```
|
|
79
79
|
|
|
80
80
|
**Wenn yt-dlp fehlschlaegt** (private IG Posts etc.):
|
|
81
|
-
1. Tier 2 CDP Browser:
|
|
81
|
+
1. Tier 2 CDP Browser: `alvin-bot browser goto <url>` + `alvin-bot browser shot <url>`
|
|
82
82
|
2. Cookies exportieren fuer yt-dlp: `--cookies /tmp/ig-cookies.txt`
|
|
83
83
|
3. Oder `instaloader` fuer Instagram: `python3 -m instaloader -- -<shortcode>`
|
|
84
84
|
|
|
@@ -264,7 +264,7 @@ Nach dem Security-Check IMMER eine klare Empfehlung:
|
|
|
264
264
|
|
|
265
265
|
## Phase 6: Nutzwert-Bewertung
|
|
266
266
|
|
|
267
|
-
### Fuer
|
|
267
|
+
### Fuer Nutzwert des Users bewerten
|
|
268
268
|
|
|
269
269
|
| Dimension | Fragen |
|
|
270
270
|
|-----------|--------|
|
|
@@ -382,4 +382,4 @@ Max 50 Zeichen, lowercase, Bindestriche, keine Sonderzeichen.
|
|
|
382
382
|
- **Engagement-Metriken:** Like/View Ratio als Qualitaetssignal
|
|
383
383
|
- **Creator-Profil:** Schneller Check wer der Creator ist (Expertise, Follower, Posting-Frequenz)
|
|
384
384
|
- **Vergleichbare Posts:** Gibt es bessere/ausfuehrlichere Quellen zum gleichen Thema?
|
|
385
|
-
- **Content-Inspiration:** Falls
|
|
385
|
+
- **Content-Inspiration:** Falls thematisch passend — als Inspiration vormerken?
|
package/skills/webcheck/SKILL.md
CHANGED
|
@@ -147,4 +147,4 @@ Nach dem Audit immer konkrete Empfehlungen mit Prioritaet:
|
|
|
147
147
|
- 🟡 MITTEL — SEO/Best-Practice, bei Gelegenheit
|
|
148
148
|
- 🟢 NIEDRIG — Nice-to-have
|
|
149
149
|
|
|
150
|
-
Falls
|
|
150
|
+
Falls DNS-Provider-Login noetig (CAA, DKIM, DMARC): CDP Chrome nutzen falls Session aktiv, sonst User fragen.
|
|
@@ -27,14 +27,14 @@ describe("memory-extractor (v4.11.0)", () => {
|
|
|
27
27
|
it("parseExtractedFacts handles a clean JSON response", async () => {
|
|
28
28
|
const { parseExtractedFacts } = await import("../src/services/memory-extractor.js");
|
|
29
29
|
const json = JSON.stringify({
|
|
30
|
-
user_facts: ["User
|
|
30
|
+
user_facts: ["User User lives in Berlin"],
|
|
31
31
|
preferences: ["Replies in German"],
|
|
32
|
-
decisions: ["Use
|
|
32
|
+
decisions: ["Use VPS VPS for production"],
|
|
33
33
|
});
|
|
34
34
|
const facts = parseExtractedFacts(json);
|
|
35
|
-
expect(facts.user_facts).toEqual(["User
|
|
35
|
+
expect(facts.user_facts).toEqual(["User User lives in Berlin"]);
|
|
36
36
|
expect(facts.preferences).toEqual(["Replies in German"]);
|
|
37
|
-
expect(facts.decisions).toEqual(["Use
|
|
37
|
+
expect(facts.decisions).toEqual(["Use VPS VPS for production"]);
|
|
38
38
|
});
|
|
39
39
|
|
|
40
40
|
it("parseExtractedFacts handles JSON wrapped in markdown code fences", async () => {
|
|
@@ -77,33 +77,33 @@ Hope this helps!`;
|
|
|
77
77
|
it("appendFactsToMemoryFile writes new facts under structured headers", async () => {
|
|
78
78
|
const { appendFactsToMemoryFile } = await import("../src/services/memory-extractor.js");
|
|
79
79
|
await appendFactsToMemoryFile({
|
|
80
|
-
user_facts: ["
|
|
80
|
+
user_facts: ["User uses launchd for the bot"],
|
|
81
81
|
preferences: [],
|
|
82
82
|
decisions: ["v4.11.0 ships memory persistence"],
|
|
83
83
|
});
|
|
84
84
|
const memFile = resolve(TEST_DATA_DIR, "memory", "MEMORY.md");
|
|
85
85
|
expect(fs.existsSync(memFile)).toBe(true);
|
|
86
86
|
const content = fs.readFileSync(memFile, "utf-8");
|
|
87
|
-
expect(content).toMatch(/
|
|
87
|
+
expect(content).toMatch(/User uses launchd for the bot/);
|
|
88
88
|
expect(content).toMatch(/v4\.11\.0 ships memory persistence/);
|
|
89
89
|
});
|
|
90
90
|
|
|
91
91
|
it("appendFactsToMemoryFile dedupes on exact-string match", async () => {
|
|
92
92
|
const { appendFactsToMemoryFile } = await import("../src/services/memory-extractor.js");
|
|
93
93
|
await appendFactsToMemoryFile({
|
|
94
|
-
user_facts: ["
|
|
94
|
+
user_facts: ["User uses launchd for the bot"],
|
|
95
95
|
preferences: [],
|
|
96
96
|
decisions: [],
|
|
97
97
|
});
|
|
98
98
|
await appendFactsToMemoryFile({
|
|
99
|
-
user_facts: ["
|
|
99
|
+
user_facts: ["User uses launchd for the bot", "User drinks coffee"],
|
|
100
100
|
preferences: [],
|
|
101
101
|
decisions: [],
|
|
102
102
|
});
|
|
103
103
|
const content = fs.readFileSync(resolve(TEST_DATA_DIR, "memory", "MEMORY.md"), "utf-8");
|
|
104
|
-
const matches = content.match(/
|
|
104
|
+
const matches = content.match(/User uses launchd for the bot/g);
|
|
105
105
|
expect(matches).toHaveLength(1); // not duplicated
|
|
106
|
-
expect(content).toMatch(/
|
|
106
|
+
expect(content).toMatch(/User drinks coffee/);
|
|
107
107
|
});
|
|
108
108
|
|
|
109
109
|
it("appendFactsToMemoryFile returns 0 when all facts are duplicates", async () => {
|
|
@@ -40,11 +40,11 @@ describe("memory-layers (v4.11.0)", () => {
|
|
|
40
40
|
it("loads identity.md as L0 always", async () => {
|
|
41
41
|
fs.writeFileSync(
|
|
42
42
|
resolve(TEST_DATA_DIR, "memory", "identity.md"),
|
|
43
|
-
"# Identity\n\nName:
|
|
43
|
+
"# Identity\n\nName: Test User\nLocation: Berlin",
|
|
44
44
|
);
|
|
45
45
|
const { loadMemoryLayers } = await import("../src/services/memory-layers.js");
|
|
46
46
|
const layered = loadMemoryLayers();
|
|
47
|
-
expect(layered.identity).toMatch(/
|
|
47
|
+
expect(layered.identity).toMatch(/Test User/);
|
|
48
48
|
});
|
|
49
49
|
|
|
50
50
|
it("loads preferences.md as L1 always", async () => {
|
|
@@ -70,8 +70,8 @@ describe("memory-layers (v4.11.0)", () => {
|
|
|
70
70
|
it("loads projects/*.md and exposes them with their filename as topic", async () => {
|
|
71
71
|
fs.mkdirSync(resolve(TEST_DATA_DIR, "memory", "projects"), { recursive: true });
|
|
72
72
|
fs.writeFileSync(
|
|
73
|
-
resolve(TEST_DATA_DIR, "memory", "projects", "
|
|
74
|
-
"#
|
|
73
|
+
resolve(TEST_DATA_DIR, "memory", "projects", "my-project.md"),
|
|
74
|
+
"# my-project\nVPS: 10.0.0.1, runs nginx + pm2",
|
|
75
75
|
);
|
|
76
76
|
fs.writeFileSync(
|
|
77
77
|
resolve(TEST_DATA_DIR, "memory", "projects", "homes.md"),
|
|
@@ -81,14 +81,14 @@ describe("memory-layers (v4.11.0)", () => {
|
|
|
81
81
|
const layered = loadMemoryLayers();
|
|
82
82
|
expect(layered.projects).toHaveLength(2);
|
|
83
83
|
const topics = layered.projects.map(p => p.topic).sort();
|
|
84
|
-
expect(topics).toEqual(["
|
|
84
|
+
expect(topics).toEqual(["homes", "my-project"]);
|
|
85
85
|
expect(layered.projects.find(p => p.topic === "homes")?.content).toMatch(/homes_production/);
|
|
86
86
|
});
|
|
87
87
|
|
|
88
88
|
it("buildLayeredContext returns all L0+L1 plus matching L2 by topic keyword", async () => {
|
|
89
89
|
fs.writeFileSync(
|
|
90
90
|
resolve(TEST_DATA_DIR, "memory", "identity.md"),
|
|
91
|
-
"Name:
|
|
91
|
+
"Name: User",
|
|
92
92
|
);
|
|
93
93
|
fs.writeFileSync(
|
|
94
94
|
resolve(TEST_DATA_DIR, "memory", "preferences.md"),
|
|
@@ -100,24 +100,24 @@ describe("memory-layers (v4.11.0)", () => {
|
|
|
100
100
|
"HOMES uses Postgres",
|
|
101
101
|
);
|
|
102
102
|
fs.writeFileSync(
|
|
103
|
-
resolve(TEST_DATA_DIR, "memory", "projects", "
|
|
104
|
-
"
|
|
103
|
+
resolve(TEST_DATA_DIR, "memory", "projects", "my-project.md"),
|
|
104
|
+
"my-project uses MySQL",
|
|
105
105
|
);
|
|
106
106
|
|
|
107
107
|
const { buildLayeredContext } = await import("../src/services/memory-layers.js");
|
|
108
108
|
|
|
109
109
|
// Query mentions HOMES → only the homes project should be loaded
|
|
110
110
|
const ctx = buildLayeredContext("Tell me about HOMES backups");
|
|
111
|
-
expect(ctx).toMatch(/Name:
|
|
111
|
+
expect(ctx).toMatch(/Name: User/); // L0
|
|
112
112
|
expect(ctx).toMatch(/Be terse/); // L1
|
|
113
113
|
expect(ctx).toMatch(/HOMES uses Postgres/); // L2 matched
|
|
114
|
-
expect(ctx).not.toMatch(/
|
|
114
|
+
expect(ctx).not.toMatch(/my-project uses MySQL/); // L2 not matched
|
|
115
115
|
});
|
|
116
116
|
|
|
117
117
|
it("buildLayeredContext without a query returns L0+L1 only (boot-up brief)", async () => {
|
|
118
118
|
fs.writeFileSync(
|
|
119
119
|
resolve(TEST_DATA_DIR, "memory", "identity.md"),
|
|
120
|
-
"Name:
|
|
120
|
+
"Name: User",
|
|
121
121
|
);
|
|
122
122
|
fs.writeFileSync(
|
|
123
123
|
resolve(TEST_DATA_DIR, "memory", "preferences.md"),
|
|
@@ -131,14 +131,14 @@ describe("memory-layers (v4.11.0)", () => {
|
|
|
131
131
|
|
|
132
132
|
const { buildLayeredContext } = await import("../src/services/memory-layers.js");
|
|
133
133
|
const ctx = buildLayeredContext();
|
|
134
|
-
expect(ctx).toMatch(/Name:
|
|
134
|
+
expect(ctx).toMatch(/Name: User/);
|
|
135
135
|
expect(ctx).not.toMatch(/Postgres/); // L2 only loaded with a query
|
|
136
136
|
});
|
|
137
137
|
|
|
138
138
|
it("token budget: layered context truncates long projects to fit budget", async () => {
|
|
139
139
|
fs.writeFileSync(
|
|
140
140
|
resolve(TEST_DATA_DIR, "memory", "identity.md"),
|
|
141
|
-
"Name:
|
|
141
|
+
"Name: User",
|
|
142
142
|
);
|
|
143
143
|
fs.mkdirSync(resolve(TEST_DATA_DIR, "memory", "projects"), { recursive: true });
|
|
144
144
|
const longContent = "homes ".repeat(2000); // ~10000 chars
|
|
@@ -155,7 +155,7 @@ describe("memory-layers (v4.11.0)", () => {
|
|
|
155
155
|
it("monolithic MEMORY.md and split files coexist (split takes priority, mono is secondary)", async () => {
|
|
156
156
|
fs.writeFileSync(
|
|
157
157
|
resolve(TEST_DATA_DIR, "memory", "identity.md"),
|
|
158
|
-
"Name:
|
|
158
|
+
"Name: User",
|
|
159
159
|
);
|
|
160
160
|
fs.writeFileSync(
|
|
161
161
|
resolve(TEST_DATA_DIR, "memory", "MEMORY.md"),
|
|
@@ -163,7 +163,7 @@ describe("memory-layers (v4.11.0)", () => {
|
|
|
163
163
|
);
|
|
164
164
|
const { buildLayeredContext } = await import("../src/services/memory-layers.js");
|
|
165
165
|
const ctx = buildLayeredContext("anything");
|
|
166
|
-
expect(ctx).toMatch(/Name:
|
|
166
|
+
expect(ctx).toMatch(/Name: User/); // L0
|
|
167
167
|
expect(ctx).toMatch(/Old fact still there/); // legacy still included
|
|
168
168
|
});
|
|
169
169
|
});
|
|
@@ -20,7 +20,7 @@ beforeEach(() => {
|
|
|
20
20
|
fs.mkdirSync(resolve(TEST_DATA_DIR, "memory"), { recursive: true });
|
|
21
21
|
fs.writeFileSync(
|
|
22
22
|
resolve(TEST_DATA_DIR, "memory", "MEMORY.md"),
|
|
23
|
-
"# Long-term Memory\n\n- User
|
|
23
|
+
"# Long-term Memory\n\n- User User prefers terse answers.\n- HOMES uses Postgres `homes_production`.\n",
|
|
24
24
|
);
|
|
25
25
|
process.env.ALVIN_DATA_DIR = TEST_DATA_DIR;
|
|
26
26
|
vi.resetModules();
|
|
@@ -30,14 +30,14 @@ describe("SDK memory injection (v4.11.0)", () => {
|
|
|
30
30
|
it("buildSystemPrompt(isSDK=true) now includes MEMORY.md content", async () => {
|
|
31
31
|
const { buildSystemPrompt } = await import("../src/services/personality.js");
|
|
32
32
|
const prompt = buildSystemPrompt(true, "en", "1234");
|
|
33
|
-
expect(prompt).toMatch(/User
|
|
33
|
+
expect(prompt).toMatch(/User User prefers terse answers/);
|
|
34
34
|
expect(prompt).toMatch(/HOMES uses Postgres/);
|
|
35
35
|
});
|
|
36
36
|
|
|
37
37
|
it("non-SDK still gets memory injection (regression check)", async () => {
|
|
38
38
|
const { buildSystemPrompt } = await import("../src/services/personality.js");
|
|
39
39
|
const prompt = buildSystemPrompt(false, "en", "1234");
|
|
40
|
-
expect(prompt).toMatch(/User
|
|
40
|
+
expect(prompt).toMatch(/User User prefers terse answers/);
|
|
41
41
|
});
|
|
42
42
|
|
|
43
43
|
it("no MEMORY.md → SDK prompt builds without crash and without memory section", async () => {
|
|
@@ -141,6 +141,6 @@ describe("SDK smart prompt (semantic recall) on first turn (v4.11.0)", () => {
|
|
|
141
141
|
|
|
142
142
|
const prompt = await buildSmartSystemPrompt(true, "en", "test query", "1234", true);
|
|
143
143
|
expect(prompt).toBeTruthy();
|
|
144
|
-
expect(prompt).toMatch(/User
|
|
144
|
+
expect(prompt).toMatch(/User User prefers terse answers/); // base still works
|
|
145
145
|
});
|
|
146
146
|
});
|
|
@@ -164,7 +164,7 @@ describe("memory persistence stress (v4.11.0)", () => {
|
|
|
164
164
|
it("memory-layers handles unicode in identity and projects", async () => {
|
|
165
165
|
fs.writeFileSync(
|
|
166
166
|
resolve(TEST_DATA_DIR, "memory", "identity.md"),
|
|
167
|
-
"Name:
|
|
167
|
+
"Name: Test User 🦊\nLocation: Berlin",
|
|
168
168
|
);
|
|
169
169
|
fs.mkdirSync(resolve(TEST_DATA_DIR, "memory", "projects"), { recursive: true });
|
|
170
170
|
fs.writeFileSync(
|
|
@@ -255,7 +255,7 @@ describe("memory persistence stress (v4.11.0)", () => {
|
|
|
255
255
|
});
|
|
256
256
|
|
|
257
257
|
it("layered context with very long identity stays under budget", async () => {
|
|
258
|
-
const longIdentity = "Name:
|
|
258
|
+
const longIdentity = "Name: User. ".repeat(2000); // 22000 chars
|
|
259
259
|
fs.writeFileSync(
|
|
260
260
|
resolve(TEST_DATA_DIR, "memory", "identity.md"),
|
|
261
261
|
longIdentity,
|
|
@@ -40,9 +40,9 @@ function writeWs(name: string, purpose: string, body: string, channels: string[]
|
|
|
40
40
|
|
|
41
41
|
describe("multi-session stress (v4.12.0)", () => {
|
|
42
42
|
it("5 parallel Slack channels each get isolated sessions", async () => {
|
|
43
|
-
writeWs("
|
|
43
|
+
writeWs("my-project", "my-project dev", "my-project persona", ["C_ALEV"]);
|
|
44
44
|
writeWs("homes", "HOMES SaaS", "HOMES persona", ["C_HOMES"]);
|
|
45
|
-
writeWs("
|
|
45
|
+
writeWs("my-landing", "my-landing app", "my-landing persona", ["C_JOBS"]);
|
|
46
46
|
writeWs("perseus", "Trading bot", "Perseus persona", ["C_PERSEUS"]);
|
|
47
47
|
writeWs("alvin", "Bot development", "Alvin persona", ["C_ALVIN"]);
|
|
48
48
|
|
|
@@ -51,9 +51,9 @@ describe("multi-session stress (v4.12.0)", () => {
|
|
|
51
51
|
initWorkspaces();
|
|
52
52
|
|
|
53
53
|
const channels = [
|
|
54
|
-
{ id: "C_ALEV", ws: "
|
|
54
|
+
{ id: "C_ALEV", ws: "my-project" },
|
|
55
55
|
{ id: "C_HOMES", ws: "homes" },
|
|
56
|
-
{ id: "C_JOBS", ws: "
|
|
56
|
+
{ id: "C_JOBS", ws: "my-landing" },
|
|
57
57
|
{ id: "C_PERSEUS", ws: "perseus" },
|
|
58
58
|
{ id: "C_ALVIN", ws: "alvin" },
|
|
59
59
|
];
|
|
@@ -81,9 +81,9 @@ describe("multi-session stress (v4.12.0)", () => {
|
|
|
81
81
|
});
|
|
82
82
|
|
|
83
83
|
it("survives full restart: 5 workspaces + 5 sessions persisted and rehydrated", async () => {
|
|
84
|
-
writeWs("
|
|
84
|
+
writeWs("my-project", "my-project", "persona", ["C_ALEV"]);
|
|
85
85
|
writeWs("homes", "HOMES", "persona", ["C_HOMES"]);
|
|
86
|
-
writeWs("
|
|
86
|
+
writeWs("my-landing", "my-landing", "persona", ["C_JOBS"]);
|
|
87
87
|
writeWs("perseus", "Perseus", "persona", ["C_PERSEUS"]);
|
|
88
88
|
writeWs("alvin", "Alvin", "persona", ["C_ALVIN"]);
|
|
89
89
|
|
|
@@ -126,13 +126,13 @@ describe("multi-session stress (v4.12.0)", () => {
|
|
|
126
126
|
const { getSession, getCostByWorkspace } = await import("../src/services/session.js");
|
|
127
127
|
|
|
128
128
|
const a = getSession("slack:C_A");
|
|
129
|
-
a.workspaceName = "
|
|
129
|
+
a.workspaceName = "my-project";
|
|
130
130
|
a.totalCost = 0.10;
|
|
131
131
|
a.messageCount = 3;
|
|
132
132
|
a.toolUseCount = 5;
|
|
133
133
|
|
|
134
134
|
const b = getSession("slack:C_B");
|
|
135
|
-
b.workspaceName = "
|
|
135
|
+
b.workspaceName = "my-project";
|
|
136
136
|
b.totalCost = 0.05;
|
|
137
137
|
b.messageCount = 2;
|
|
138
138
|
b.toolUseCount = 1;
|
|
@@ -144,10 +144,10 @@ describe("multi-session stress (v4.12.0)", () => {
|
|
|
144
144
|
c.toolUseCount = 8;
|
|
145
145
|
|
|
146
146
|
const breakdown = getCostByWorkspace();
|
|
147
|
-
expect(breakdown["
|
|
148
|
-
expect(breakdown["
|
|
149
|
-
expect(breakdown["
|
|
150
|
-
expect(breakdown["
|
|
147
|
+
expect(breakdown["my-project"].sessionCount).toBe(2);
|
|
148
|
+
expect(breakdown["my-project"].messageCount).toBe(5);
|
|
149
|
+
expect(breakdown["my-project"].toolUseCount).toBe(6);
|
|
150
|
+
expect(breakdown["my-project"].totalCost).toBeCloseTo(0.15, 10);
|
|
151
151
|
expect(breakdown["homes"].sessionCount).toBe(1);
|
|
152
152
|
expect(breakdown["homes"].messageCount).toBe(10);
|
|
153
153
|
expect(breakdown["homes"].toolUseCount).toBe(8);
|
|
@@ -155,7 +155,7 @@ describe("multi-session stress (v4.12.0)", () => {
|
|
|
155
155
|
});
|
|
156
156
|
|
|
157
157
|
it("workspaces hot-reload picks up a new channel ID", async () => {
|
|
158
|
-
writeWs("
|
|
158
|
+
writeWs("my-project", "my-project", "persona");
|
|
159
159
|
const { initWorkspaces, resolveWorkspaceOrDefault, reloadWorkspaces } =
|
|
160
160
|
await import("../src/services/workspaces.js");
|
|
161
161
|
initWorkspaces();
|
|
@@ -165,20 +165,20 @@ describe("multi-session stress (v4.12.0)", () => {
|
|
|
165
165
|
expect(ws.name).toBe("default");
|
|
166
166
|
|
|
167
167
|
// Add channel to config + reload
|
|
168
|
-
writeWs("
|
|
168
|
+
writeWs("my-project", "my-project", "persona", ["C_NEW"]);
|
|
169
169
|
reloadWorkspaces();
|
|
170
170
|
|
|
171
171
|
ws = resolveWorkspaceOrDefault("slack", "C_NEW", undefined);
|
|
172
|
-
expect(ws.name).toBe("
|
|
172
|
+
expect(ws.name).toBe("my-project");
|
|
173
173
|
});
|
|
174
174
|
|
|
175
175
|
it("channel-name fallback finds workspace when no explicit ID mapping", async () => {
|
|
176
|
-
writeWs("
|
|
176
|
+
writeWs("my-project", "my-project", "persona");
|
|
177
177
|
const { initWorkspaces, resolveWorkspaceOrDefault } = await import("../src/services/workspaces.js");
|
|
178
178
|
initWorkspaces();
|
|
179
179
|
|
|
180
|
-
const ws = resolveWorkspaceOrDefault("slack", "C_UNMAPPED", "#
|
|
181
|
-
expect(ws.name).toBe("
|
|
180
|
+
const ws = resolveWorkspaceOrDefault("slack", "C_UNMAPPED", "#my-project");
|
|
181
|
+
expect(ws.name).toBe("my-project");
|
|
182
182
|
});
|
|
183
183
|
|
|
184
184
|
it("malformed workspace doesn't break loading of other workspaces", async () => {
|
|
@@ -228,7 +228,7 @@ describe("multi-session stress (v4.12.0)", () => {
|
|
|
228
228
|
});
|
|
229
229
|
|
|
230
230
|
it("simulated restart + workspace switch: workspaceName persists across flush cycles", async () => {
|
|
231
|
-
writeWs("
|
|
231
|
+
writeWs("my-project", "my-project", "persona", ["C_ALEV"]);
|
|
232
232
|
const { initWorkspaces, resolveWorkspaceOrDefault } = await import("../src/services/workspaces.js");
|
|
233
233
|
const { buildSessionKey, getSession } = await import("../src/services/session.js");
|
|
234
234
|
const { flushSessions } = await import("../src/services/session-persistence.js");
|
|
@@ -249,7 +249,7 @@ describe("multi-session stress (v4.12.0)", () => {
|
|
|
249
249
|
|
|
250
250
|
const restored = s2.getSession("slack:C_ALEV");
|
|
251
251
|
expect(restored.sessionId).toBe("alev-resume");
|
|
252
|
-
expect(restored.workspaceName).toBe("
|
|
253
|
-
expect(restored.workingDir).toContain("
|
|
252
|
+
expect(restored.workspaceName).toBe("my-project");
|
|
253
|
+
expect(restored.workingDir).toContain("my-project");
|
|
254
254
|
});
|
|
255
255
|
});
|
|
@@ -63,7 +63,7 @@ describe("buildSessionKey with string userIds (v4.12.0)", () => {
|
|
|
63
63
|
process.env.SESSION_MODE = "per-user";
|
|
64
64
|
vi.resetModules();
|
|
65
65
|
const { buildSessionKey } = await import("../src/services/session.js");
|
|
66
|
-
const key = buildSessionKey("telegram", "123456",
|
|
67
|
-
expect(key).toBe("
|
|
66
|
+
const key = buildSessionKey("telegram", "123456", 1234567890);
|
|
67
|
+
expect(key).toBe("1234567890");
|
|
68
68
|
});
|
|
69
69
|
});
|