@canivel/ralph 0.2.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/.agents/ralph/PROMPT_build.md +126 -126
  2. package/.agents/ralph/agents.sh +17 -15
  3. package/.agents/ralph/config.sh +25 -25
  4. package/.agents/ralph/log-activity.sh +15 -15
  5. package/.agents/ralph/loop.sh +1027 -1001
  6. package/.agents/ralph/references/CONTEXT_ENGINEERING.md +126 -126
  7. package/.agents/ralph/references/GUARDRAILS.md +174 -174
  8. package/AGENTS.md +20 -20
  9. package/README.md +270 -266
  10. package/bin/ralph +766 -765
  11. package/diagram.svg +55 -55
  12. package/examples/commands.md +46 -46
  13. package/package.json +39 -39
  14. package/skills/commit/SKILL.md +219 -219
  15. package/skills/commit/references/commit_examples.md +292 -292
  16. package/skills/dev-browser/SKILL.md +211 -211
  17. package/skills/dev-browser/bun.lock +443 -443
  18. package/skills/dev-browser/package-lock.json +2988 -2988
  19. package/skills/dev-browser/package.json +31 -31
  20. package/skills/dev-browser/references/scraping.md +155 -155
  21. package/skills/dev-browser/scripts/start-relay.ts +32 -32
  22. package/skills/dev-browser/scripts/start-server.ts +117 -117
  23. package/skills/dev-browser/server.sh +24 -24
  24. package/skills/dev-browser/src/client.ts +474 -474
  25. package/skills/dev-browser/src/index.ts +287 -287
  26. package/skills/dev-browser/src/relay.ts +731 -731
  27. package/skills/dev-browser/src/snapshot/__tests__/snapshot.test.ts +223 -223
  28. package/skills/dev-browser/src/snapshot/browser-script.ts +877 -877
  29. package/skills/dev-browser/src/snapshot/index.ts +14 -14
  30. package/skills/dev-browser/src/snapshot/inject.ts +13 -13
  31. package/skills/dev-browser/src/types.ts +34 -34
  32. package/skills/dev-browser/tsconfig.json +36 -36
  33. package/skills/dev-browser/vitest.config.ts +12 -12
  34. package/skills/prd/SKILL.md +235 -235
  35. package/tests/agent-loops.mjs +79 -79
  36. package/tests/agent-ping.mjs +39 -39
  37. package/tests/audit.md +56 -56
  38. package/tests/cli-smoke.mjs +47 -47
  39. package/tests/real-agents.mjs +127 -127
@@ -1,31 +1,31 @@
1
- {
2
- "name": "dev-browser",
3
- "version": "0.0.1",
4
- "type": "module",
5
- "imports": {
6
- "@/*": "./src/*"
7
- },
8
- "scripts": {
9
- "start-server": "npx tsx scripts/start-server.ts",
10
- "start-extension": "npx tsx scripts/start-relay.ts",
11
- "dev": "npx tsx --watch src/index.ts",
12
- "test": "vitest run",
13
- "test:watch": "vitest"
14
- },
15
- "dependencies": {
16
- "@hono/node-server": "^1.19.7",
17
- "@hono/node-ws": "^1.2.0",
18
- "express": "^4.21.0",
19
- "hono": "^4.11.1",
20
- "playwright": "^1.49.0"
21
- },
22
- "devDependencies": {
23
- "@types/express": "^5.0.0",
24
- "tsx": "^4.21.0",
25
- "typescript": "^5.0.0",
26
- "vitest": "^2.1.0"
27
- },
28
- "optionalDependencies": {
29
- "@rollup/rollup-linux-x64-gnu": "^4.0.0"
30
- }
31
- }
1
+ {
2
+ "name": "dev-browser",
3
+ "version": "0.0.1",
4
+ "type": "module",
5
+ "imports": {
6
+ "@/*": "./src/*"
7
+ },
8
+ "scripts": {
9
+ "start-server": "npx tsx scripts/start-server.ts",
10
+ "start-extension": "npx tsx scripts/start-relay.ts",
11
+ "dev": "npx tsx --watch src/index.ts",
12
+ "test": "vitest run",
13
+ "test:watch": "vitest"
14
+ },
15
+ "dependencies": {
16
+ "@hono/node-server": "^1.19.7",
17
+ "@hono/node-ws": "^1.2.0",
18
+ "express": "^4.21.0",
19
+ "hono": "^4.11.1",
20
+ "playwright": "^1.49.0"
21
+ },
22
+ "devDependencies": {
23
+ "@types/express": "^5.0.0",
24
+ "tsx": "^4.21.0",
25
+ "typescript": "^5.0.0",
26
+ "vitest": "^2.1.0"
27
+ },
28
+ "optionalDependencies": {
29
+ "@rollup/rollup-linux-x64-gnu": "^4.0.0"
30
+ }
31
+ }
@@ -1,155 +1,155 @@
1
- # Data Scraping Guide
2
-
3
- For large datasets (followers, posts, search results), **intercept and replay network requests** rather than scrolling and parsing the DOM. This is faster, more reliable, and handles pagination automatically.
4
-
5
- ## Why Not Scroll?
6
-
7
- Scrolling is slow, unreliable, and wastes time. APIs return structured data with pagination built in. Always prefer API replay.
8
-
9
- ## Start Small, Then Scale
10
-
11
- **Don't try to automate everything at once.** Work incrementally:
12
-
13
- 1. **Capture one request** - verify you're intercepting the right endpoint
14
- 2. **Inspect one response** - understand the schema before writing extraction code
15
- 3. **Extract a few items** - make sure your parsing logic works
16
- 4. **Then scale up** - add pagination loop only after the basics work
17
-
18
- This prevents wasting time debugging a complex script when the issue is a simple path like `data.user.timeline` vs `data.user.result.timeline`.
19
-
20
- ## Step-by-Step Workflow
21
-
22
- ### 1. Capture Request Details
23
-
24
- First, intercept a request to understand URL structure and required headers:
25
-
26
- ```typescript
27
- import { connect, waitForPageLoad } from "@/client.js";
28
- import * as fs from "node:fs";
29
-
30
- const client = await connect();
31
- const page = await client.page("site");
32
-
33
- let capturedRequest = null;
34
- page.on("request", (request) => {
35
- const url = request.url();
36
- // Look for API endpoints (adjust pattern for your target site)
37
- if (url.includes("/api/") || url.includes("/graphql/")) {
38
- capturedRequest = {
39
- url: url,
40
- headers: request.headers(),
41
- method: request.method(),
42
- };
43
- fs.writeFileSync("tmp/request-details.json", JSON.stringify(capturedRequest, null, 2));
44
- console.log("Captured request:", url.substring(0, 80) + "...");
45
- }
46
- });
47
-
48
- await page.goto("https://example.com/profile");
49
- await waitForPageLoad(page);
50
- await page.waitForTimeout(3000);
51
-
52
- await client.disconnect();
53
- ```
54
-
55
- ### 2. Capture Response to Understand Schema
56
-
57
- Save a raw response to inspect the data structure:
58
-
59
- ```typescript
60
- page.on("response", async (response) => {
61
- const url = response.url();
62
- if (url.includes("UserTweets") || url.includes("/api/data")) {
63
- const json = await response.json();
64
- fs.writeFileSync("tmp/api-response.json", JSON.stringify(json, null, 2));
65
- console.log("Captured response");
66
- }
67
- });
68
- ```
69
-
70
- Then analyze the structure to find:
71
-
72
- - Where the data array lives (e.g., `data.user.result.timeline.instructions[].entries`)
73
- - Where pagination cursors are (e.g., `cursor-bottom` entries)
74
- - What fields you need to extract
75
-
76
- ### 3. Replay API with Pagination
77
-
78
- Once you understand the schema, replay requests directly:
79
-
80
- ```typescript
81
- import { connect } from "@/client.js";
82
- import * as fs from "node:fs";
83
-
84
- const client = await connect();
85
- const page = await client.page("site");
86
-
87
- const results = new Map(); // Use Map for deduplication
88
- const headers = JSON.parse(fs.readFileSync("tmp/request-details.json", "utf8")).headers;
89
- const baseUrl = "https://example.com/api/data";
90
-
91
- let cursor = null;
92
- let hasMore = true;
93
-
94
- while (hasMore) {
95
- // Build URL with pagination cursor
96
- const params = { count: 20 };
97
- if (cursor) params.cursor = cursor;
98
- const url = `${baseUrl}?params=${encodeURIComponent(JSON.stringify(params))}`;
99
-
100
- // Execute fetch in browser context (has auth cookies/headers)
101
- const response = await page.evaluate(
102
- async ({ url, headers }) => {
103
- const res = await fetch(url, { headers });
104
- return res.json();
105
- },
106
- { url, headers }
107
- );
108
-
109
- // Extract data and cursor (adjust paths for your API)
110
- const entries = response?.data?.entries || [];
111
- for (const entry of entries) {
112
- if (entry.type === "cursor-bottom") {
113
- cursor = entry.value;
114
- } else if (entry.id && !results.has(entry.id)) {
115
- results.set(entry.id, {
116
- id: entry.id,
117
- text: entry.content,
118
- timestamp: entry.created_at,
119
- });
120
- }
121
- }
122
-
123
- console.log(`Fetched page, total: ${results.size}`);
124
-
125
- // Check stop conditions
126
- if (!cursor || entries.length === 0) hasMore = false;
127
-
128
- // Rate limiting - be respectful
129
- await new Promise((r) => setTimeout(r, 500));
130
- }
131
-
132
- // Export results
133
- const data = Array.from(results.values());
134
- fs.writeFileSync("tmp/results.json", JSON.stringify(data, null, 2));
135
- console.log(`Saved ${data.length} items`);
136
-
137
- await client.disconnect();
138
- ```
139
-
140
- ## Key Patterns
141
-
142
- | Pattern | Description |
143
- | ----------------------- | ------------------------------------------------------ |
144
- | `page.on('request')` | Capture outgoing request URL + headers |
145
- | `page.on('response')` | Capture response data to understand schema |
146
- | `page.evaluate(fetch)` | Replay requests in browser context (inherits auth) |
147
- | `Map` for deduplication | APIs often return overlapping data across pages |
148
- | Cursor-based pagination | Look for `cursor`, `next_token`, `offset` in responses |
149
-
150
- ## Tips
151
-
152
- - **Extension mode**: `page.context().cookies()` doesn't work - capture auth headers from intercepted requests instead
153
- - **Rate limiting**: Add 500ms+ delays between requests to avoid blocks
154
- - **Stop conditions**: Check for empty results, missing cursor, or reaching a date/ID threshold
155
- - **GraphQL APIs**: URL params often include `variables` and `features` JSON objects - capture and reuse them
1
+ # Data Scraping Guide
2
+
3
+ For large datasets (followers, posts, search results), **intercept and replay network requests** rather than scrolling and parsing the DOM. This is faster, more reliable, and handles pagination automatically.
4
+
5
+ ## Why Not Scroll?
6
+
7
+ Scrolling is slow, unreliable, and wastes time. APIs return structured data with pagination built in. Always prefer API replay.
8
+
9
+ ## Start Small, Then Scale
10
+
11
+ **Don't try to automate everything at once.** Work incrementally:
12
+
13
+ 1. **Capture one request** - verify you're intercepting the right endpoint
14
+ 2. **Inspect one response** - understand the schema before writing extraction code
15
+ 3. **Extract a few items** - make sure your parsing logic works
16
+ 4. **Then scale up** - add pagination loop only after the basics work
17
+
18
+ This prevents wasting time debugging a complex script when the issue is a simple path like `data.user.timeline` vs `data.user.result.timeline`.
19
+
20
+ ## Step-by-Step Workflow
21
+
22
+ ### 1. Capture Request Details
23
+
24
+ First, intercept a request to understand URL structure and required headers:
25
+
26
+ ```typescript
27
+ import { connect, waitForPageLoad } from "@/client.js";
28
+ import * as fs from "node:fs";
29
+
30
+ const client = await connect();
31
+ const page = await client.page("site");
32
+
33
+ let capturedRequest = null;
34
+ page.on("request", (request) => {
35
+ const url = request.url();
36
+ // Look for API endpoints (adjust pattern for your target site)
37
+ if (url.includes("/api/") || url.includes("/graphql/")) {
38
+ capturedRequest = {
39
+ url: url,
40
+ headers: request.headers(),
41
+ method: request.method(),
42
+ };
43
+ fs.writeFileSync("tmp/request-details.json", JSON.stringify(capturedRequest, null, 2));
44
+ console.log("Captured request:", url.substring(0, 80) + "...");
45
+ }
46
+ });
47
+
48
+ await page.goto("https://example.com/profile");
49
+ await waitForPageLoad(page);
50
+ await page.waitForTimeout(3000);
51
+
52
+ await client.disconnect();
53
+ ```
54
+
55
+ ### 2. Capture Response to Understand Schema
56
+
57
+ Save a raw response to inspect the data structure:
58
+
59
+ ```typescript
60
+ page.on("response", async (response) => {
61
+ const url = response.url();
62
+ if (url.includes("UserTweets") || url.includes("/api/data")) {
63
+ const json = await response.json();
64
+ fs.writeFileSync("tmp/api-response.json", JSON.stringify(json, null, 2));
65
+ console.log("Captured response");
66
+ }
67
+ });
68
+ ```
69
+
70
+ Then analyze the structure to find:
71
+
72
+ - Where the data array lives (e.g., `data.user.result.timeline.instructions[].entries`)
73
+ - Where pagination cursors are (e.g., `cursor-bottom` entries)
74
+ - What fields you need to extract
75
+
76
+ ### 3. Replay API with Pagination
77
+
78
+ Once you understand the schema, replay requests directly:
79
+
80
+ ```typescript
81
+ import { connect } from "@/client.js";
82
+ import * as fs from "node:fs";
83
+
84
+ const client = await connect();
85
+ const page = await client.page("site");
86
+
87
+ const results = new Map(); // Use Map for deduplication
88
+ const headers = JSON.parse(fs.readFileSync("tmp/request-details.json", "utf8")).headers;
89
+ const baseUrl = "https://example.com/api/data";
90
+
91
+ let cursor = null;
92
+ let hasMore = true;
93
+
94
+ while (hasMore) {
95
+ // Build URL with pagination cursor
96
+ const params = { count: 20 };
97
+ if (cursor) params.cursor = cursor;
98
+ const url = `${baseUrl}?params=${encodeURIComponent(JSON.stringify(params))}`;
99
+
100
+ // Execute fetch in browser context (has auth cookies/headers)
101
+ const response = await page.evaluate(
102
+ async ({ url, headers }) => {
103
+ const res = await fetch(url, { headers });
104
+ return res.json();
105
+ },
106
+ { url, headers }
107
+ );
108
+
109
+ // Extract data and cursor (adjust paths for your API)
110
+ const entries = response?.data?.entries || [];
111
+ for (const entry of entries) {
112
+ if (entry.type === "cursor-bottom") {
113
+ cursor = entry.value;
114
+ } else if (entry.id && !results.has(entry.id)) {
115
+ results.set(entry.id, {
116
+ id: entry.id,
117
+ text: entry.content,
118
+ timestamp: entry.created_at,
119
+ });
120
+ }
121
+ }
122
+
123
+ console.log(`Fetched page, total: ${results.size}`);
124
+
125
+ // Check stop conditions
126
+ if (!cursor || entries.length === 0) hasMore = false;
127
+
128
+ // Rate limiting - be respectful
129
+ await new Promise((r) => setTimeout(r, 500));
130
+ }
131
+
132
+ // Export results
133
+ const data = Array.from(results.values());
134
+ fs.writeFileSync("tmp/results.json", JSON.stringify(data, null, 2));
135
+ console.log(`Saved ${data.length} items`);
136
+
137
+ await client.disconnect();
138
+ ```
139
+
140
+ ## Key Patterns
141
+
142
+ | Pattern | Description |
143
+ | ----------------------- | ------------------------------------------------------ |
144
+ | `page.on('request')` | Capture outgoing request URL + headers |
145
+ | `page.on('response')` | Capture response data to understand schema |
146
+ | `page.evaluate(fetch)` | Replay requests in browser context (inherits auth) |
147
+ | `Map` for deduplication | APIs often return overlapping data across pages |
148
+ | Cursor-based pagination | Look for `cursor`, `next_token`, `offset` in responses |
149
+
150
+ ## Tips
151
+
152
+ - **Extension mode**: `page.context().cookies()` doesn't work - capture auth headers from intercepted requests instead
153
+ - **Rate limiting**: Add 500ms+ delays between requests to avoid blocks
154
+ - **Stop conditions**: Check for empty results, missing cursor, or reaching a date/ID threshold
155
+ - **GraphQL APIs**: URL params often include `variables` and `features` JSON objects - capture and reuse them
@@ -1,32 +1,32 @@
1
- /**
2
- * Start the CDP relay server for Chrome extension mode
3
- *
4
- * Usage: npm run start-extension
5
- */
6
-
7
- import { serveRelay } from "@/relay.js";
8
-
9
- const PORT = parseInt(process.env.PORT || "9222", 10);
10
- const HOST = process.env.HOST || "127.0.0.1";
11
-
12
- async function main() {
13
- const server = await serveRelay({
14
- port: PORT,
15
- host: HOST,
16
- });
17
-
18
- // Handle shutdown
19
- const shutdown = async () => {
20
- console.log("\nShutting down relay server...");
21
- await server.stop();
22
- process.exit(0);
23
- };
24
-
25
- process.on("SIGINT", shutdown);
26
- process.on("SIGTERM", shutdown);
27
- }
28
-
29
- main().catch((err) => {
30
- console.error("Failed to start relay server:", err);
31
- process.exit(1);
32
- });
1
+ /**
2
+ * Start the CDP relay server for Chrome extension mode
3
+ *
4
+ * Usage: npm run start-extension
5
+ */
6
+
7
+ import { serveRelay } from "@/relay.js";
8
+
9
+ const PORT = parseInt(process.env.PORT || "9222", 10);
10
+ const HOST = process.env.HOST || "127.0.0.1";
11
+
12
+ async function main() {
13
+ const server = await serveRelay({
14
+ port: PORT,
15
+ host: HOST,
16
+ });
17
+
18
+ // Handle shutdown
19
+ const shutdown = async () => {
20
+ console.log("\nShutting down relay server...");
21
+ await server.stop();
22
+ process.exit(0);
23
+ };
24
+
25
+ process.on("SIGINT", shutdown);
26
+ process.on("SIGTERM", shutdown);
27
+ }
28
+
29
+ main().catch((err) => {
30
+ console.error("Failed to start relay server:", err);
31
+ process.exit(1);
32
+ });