reflexive 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,272 @@
1
+ // Self-instrumenting demo app
2
+ // Run with: node demo-instrumented.js
3
+
4
+ import http from 'http';
5
+ import { makeReflexive } from './src/reflexive.js';
6
+
7
+ // Instrument this process - starts dashboard on port 3099
8
+ const reflexive = makeReflexive({
9
+ port: 3099,
10
+ title: 'Task Queue Demo',
11
+ systemPrompt: `This is a task queue simulation app. It has:
12
+ - An HTTP API for submitting and viewing tasks
13
+ - A background worker that processes tasks
14
+ - Custom state tracking for queue metrics
15
+
16
+ Help the user understand the queue state, debug issues, and monitor task processing.`
17
+ });
18
+
19
+ // Application state
20
+ const tasks = [];
21
+ let taskIdCounter = 0;
22
+ let processedCount = 0;
23
+ let failedCount = 0;
24
+
25
+ // Expose state to Reflexive
26
+ function updateMetrics() {
27
+ reflexive.setState('queueLength', tasks.filter(t => t.status === 'pending').length);
28
+ reflexive.setState('processedCount', processedCount);
29
+ reflexive.setState('failedCount', failedCount);
30
+ reflexive.setState('totalTasks', tasks.length);
31
+ }
32
+
33
+ // Task processor (simulates work)
34
+ async function processTask(task) {
35
+ task.status = 'processing';
36
+ console.log(`Processing task ${task.id}: ${task.name}`);
37
+
38
+ // Simulate work with random duration
39
+ const duration = 1000 + Math.random() * 2000;
40
+ await new Promise(r => setTimeout(r, duration));
41
+
42
+ // 20% chance of failure for demo purposes
43
+ if (Math.random() < 0.2) {
44
+ task.status = 'failed';
45
+ task.error = 'Random failure for demonstration';
46
+ failedCount++;
47
+ console.error(`Task ${task.id} failed: ${task.error}`);
48
+ } else {
49
+ task.status = 'completed';
50
+ task.completedAt = new Date().toISOString();
51
+ processedCount++;
52
+ console.log(`Task ${task.id} completed successfully`);
53
+ }
54
+
55
+ updateMetrics();
56
+ }
57
+
58
+ // Background worker
59
+ async function worker() {
60
+ while (true) {
61
+ const pendingTask = tasks.find(t => t.status === 'pending');
62
+
63
+ if (pendingTask) {
64
+ await processTask(pendingTask);
65
+ } else {
66
+ // No tasks, wait a bit
67
+ await new Promise(r => setTimeout(r, 500));
68
+ }
69
+ }
70
+ }
71
+
72
+ // Start worker
73
+ worker();
74
+
75
+ // HTTP API
76
+ const PORT = 8080;
77
+
78
+ const server = http.createServer((req, res) => {
79
+ const url = new URL(req.url, `http://${req.headers.host}`);
80
+
81
+ console.log(`${req.method} ${url.pathname}`);
82
+
83
+ // CORS
84
+ res.setHeader('Access-Control-Allow-Origin', '*');
85
+ res.setHeader('Content-Type', 'application/json');
86
+
87
+ if (url.pathname === '/') {
88
+ res.end(JSON.stringify({
89
+ name: 'Task Queue Demo',
90
+ endpoints: {
91
+ 'POST /tasks': 'Create a new task',
92
+ 'GET /tasks': 'List all tasks',
93
+ 'GET /tasks/:id': 'Get task by ID',
94
+ 'GET /metrics': 'Get queue metrics',
95
+ 'POST /tasks/batch': 'Create multiple tasks'
96
+ },
97
+ dashboard: 'http://localhost:3099/reflexive'
98
+ }, null, 2));
99
+ return;
100
+ }
101
+
102
+ if (url.pathname === '/tasks' && req.method === 'GET') {
103
+ res.end(JSON.stringify(tasks, null, 2));
104
+ return;
105
+ }
106
+
107
+ if (url.pathname === '/tasks' && req.method === 'POST') {
108
+ let body = '';
109
+ req.on('data', chunk => body += chunk);
110
+ req.on('end', () => {
111
+ try {
112
+ const data = JSON.parse(body);
113
+ const task = {
114
+ id: ++taskIdCounter,
115
+ name: data.name || `Task ${taskIdCounter}`,
116
+ priority: data.priority || 'normal',
117
+ status: 'pending',
118
+ createdAt: new Date().toISOString(),
119
+ data: data.data || {}
120
+ };
121
+ tasks.push(task);
122
+ updateMetrics();
123
+ console.log(`Created task ${task.id}: ${task.name}`);
124
+ res.writeHead(201);
125
+ res.end(JSON.stringify(task, null, 2));
126
+ } catch (e) {
127
+ res.writeHead(400);
128
+ res.end(JSON.stringify({ error: e.message }));
129
+ }
130
+ });
131
+ return;
132
+ }
133
+
134
+ if (url.pathname === '/tasks/batch' && req.method === 'POST') {
135
+ let body = '';
136
+ req.on('data', chunk => body += chunk);
137
+ req.on('end', () => {
138
+ try {
139
+ const data = JSON.parse(body);
140
+ const count = data.count || 5;
141
+ const created = [];
142
+
143
+ for (let i = 0; i < count; i++) {
144
+ const task = {
145
+ id: ++taskIdCounter,
146
+ name: `Batch Task ${taskIdCounter}`,
147
+ priority: 'normal',
148
+ status: 'pending',
149
+ createdAt: new Date().toISOString(),
150
+ data: { batchIndex: i }
151
+ };
152
+ tasks.push(task);
153
+ created.push(task);
154
+ }
155
+
156
+ updateMetrics();
157
+ console.log(`Created ${count} batch tasks`);
158
+ res.writeHead(201);
159
+ res.end(JSON.stringify({ created: created.length, tasks: created }, null, 2));
160
+ } catch (e) {
161
+ res.writeHead(400);
162
+ res.end(JSON.stringify({ error: e.message }));
163
+ }
164
+ });
165
+ return;
166
+ }
167
+
168
+ if (url.pathname.startsWith('/tasks/') && req.method === 'GET') {
169
+ const id = parseInt(url.pathname.split('/')[2]);
170
+ const task = tasks.find(t => t.id === id);
171
+ if (task) {
172
+ res.end(JSON.stringify(task, null, 2));
173
+ } else {
174
+ res.writeHead(404);
175
+ res.end(JSON.stringify({ error: 'Task not found' }));
176
+ }
177
+ return;
178
+ }
179
+
180
+ if (url.pathname === '/metrics') {
181
+ const pending = tasks.filter(t => t.status === 'pending').length;
182
+ const processing = tasks.filter(t => t.status === 'processing').length;
183
+ const completed = tasks.filter(t => t.status === 'completed').length;
184
+ const failed = tasks.filter(t => t.status === 'failed').length;
185
+
186
+ res.end(JSON.stringify({
187
+ queue: {
188
+ pending,
189
+ processing,
190
+ completed,
191
+ failed,
192
+ total: tasks.length
193
+ },
194
+ rates: {
195
+ successRate: tasks.length > 0 ? ((completed / (completed + failed)) * 100).toFixed(1) + '%' : 'N/A',
196
+ processedCount,
197
+ failedCount
198
+ },
199
+ memory: process.memoryUsage()
200
+ }, null, 2));
201
+ return;
202
+ }
203
+
204
+ res.writeHead(404);
205
+ res.end(JSON.stringify({ error: 'Not found' }));
206
+ });
207
+
208
+ server.listen(PORT, () => {
209
+ console.log(`
210
+ ╔═══════════════════════════════════════════════════════════════════════════════╗
211
+ ║ REFLEXIVE LIBRARY MODE DEMO ║
212
+ ║ (Self-Instrumenting Task Queue) ║
213
+ ╠═══════════════════════════════════════════════════════════════════════════════╣
214
+ ║ Task Queue API: http://localhost:${PORT} ║
215
+ ║ Dashboard: http://localhost:3099/reflexive ║
216
+ ║ PID: ${String(process.pid).padEnd(73)}║
217
+ ╠═══════════════════════════════════════════════════════════════════════════════╣
218
+ ║ HOW TO USE THIS DEMO: ║
219
+ ║ ║
220
+ ║ 1. Run directly: npm run demo (or: node demo-instrumented.js) ║
221
+ ║ 2. Open dashboard: http://localhost:3099/reflexive ║
222
+ ║ 3. This demo shows LIBRARY MODE - embedding Reflexive in your app ║
223
+ ║ ║
224
+ ║ KEY FEATURES DEMONSTRATED: ║
225
+ ║ • makeReflexive() - Initialize agent inside your app ║
226
+ ║ • reflexive.setState() - Expose custom metrics to the agent ║
227
+ ║ • Custom system prompt for app-specific context ║
228
+ ║ • Background task processing with random failures (20% chance) ║
229
+ ║ ║
230
+ ║ API ENDPOINTS (use curl or browser): ║
231
+ ║ ║
232
+ ║ GET / → API info ║
233
+ ║ GET /tasks → List all tasks ║
234
+ ║ POST /tasks → Create task: curl -X POST -d '{"name":"Test"}' ... ║
235
+ ║ POST /tasks/batch → Create batch: curl -X POST -d '{"count":5}' ... ║
236
+ ║ GET /tasks/:id → Get specific task ║
237
+ ║ GET /metrics → Queue metrics and stats ║
238
+ ║ ║
239
+ ║ TRY ASKING THE AGENT: ║
240
+ ║ "How many tasks are pending?" ║
241
+ ║ "What's the success rate?" ║
242
+ ║ "Show me failed tasks" ║
243
+ ║ "What are the current queue metrics?" ║
244
+ ║ ║
245
+ ║ WATCH TRIGGER IDEAS: ║
246
+ ║ "failed" → "Investigate why this task failed and suggest a fix" ║
247
+ ║ "heartbeat" → "Summarize current queue health" ║
248
+ ╚═══════════════════════════════════════════════════════════════════════════════╝
249
+ `);
250
+
251
+ // Create some initial tasks
252
+ setTimeout(() => {
253
+ for (let i = 0; i < 3; i++) {
254
+ tasks.push({
255
+ id: ++taskIdCounter,
256
+ name: `Initial Task ${taskIdCounter}`,
257
+ priority: 'normal',
258
+ status: 'pending',
259
+ createdAt: new Date().toISOString(),
260
+ data: {}
261
+ });
262
+ }
263
+ updateMetrics();
264
+ console.log('Created 3 initial tasks');
265
+ }, 1000);
266
+ });
267
+
268
+ // Periodic status log
269
+ setInterval(() => {
270
+ const pending = tasks.filter(t => t.status === 'pending').length;
271
+ console.log(`[heartbeat] Queue: ${pending} pending, ${processedCount} processed, ${failedCount} failed`);
272
+ }, 15000);
@@ -0,0 +1,293 @@
1
+ # Breakpoint System Audit
2
+
3
+ This document provides a comprehensive audit of all breakpoint-related code in the Reflexive codebase.
4
+
5
+ ## Executive Summary
6
+
7
+ The current breakpoint system in Reflexive is **pattern-based, not true debugger breakpoints**. It uses:
8
+ - IPC messaging between parent (reflexive.js) and child (inject.cjs) processes
9
+ - Promise-based pause/resume mechanism
10
+ - Conditional breakpoints that match log message patterns
11
+
12
+ **Important Finding**: `evaluate_in_app` DOES run code inside the target process and can modify state, but only in global scope.
13
+
14
+ ## Architecture Overview
15
+
16
+ ```
17
+ Dashboard UI
18
+ ↓ POST /break or /breakpoint/:id/trigger
19
+ HTTP Server (reflexive.js)
20
+ ↓ processManager.triggerBreakpoint()
21
+ Child Process IPC
22
+ ↓ {reflexive: true, type: 'triggerBreakpoint'}
23
+ inject.cjs Message Handler
24
+ ↓ setImmediate(async () => process.reflexive.breakpoint())
25
+ App Code Execution
26
+ ↓ Breakpoint Hit - Promise Created
27
+ ↓ sendToParent('breakpoint', {action: 'hit'})
28
+ IPC → HTTP Server
29
+ ↓ activeBreakpoint = {...}
30
+ ↓ emit('breakpointHit')
31
+ Dashboard UI
32
+ ↓ Polls /breakpoint-status every 1s
33
+ ↓ Shows "PAUSED" state
34
+ ↓ User clicks Resume
35
+ HTTP Server: POST /resume
36
+ ↓ processManager.resumeBreakpoint(returnValue)
37
+ Child Process IPC
38
+ ↓ {reflexive: true, type: 'resumeBreakpoint', returnValue}
39
+ inject.cjs Handler
40
+ ↓ breakpointResolve(msg.returnValue)
41
+ App Code
42
+ ↓ Promise Resolves - Execution Continues
43
+ ↓ sendToParent('breakpoint', {action: 'resumed'})
44
+ ```
45
+
46
+ ---
47
+
48
+ ## Code Locations
49
+
50
+ ### src/reflexive.js
51
+
52
+ #### ProcessManager Class - State (Lines 2457-2462)
53
+
54
+ ```javascript
55
+ // Breakpoint state
56
+ this.activeBreakpoint = null;
57
+ this.lastBreakpoint = null;
58
+ // Conditional breakpoints
59
+ this.conditionalBreakpoints = [];
60
+ this.conditionalBreakpointIdCounter = 0;
61
+ ```
62
+
63
+ #### ProcessManager Methods
64
+
65
+ | Method | Lines | Description |
66
+ |--------|-------|-------------|
67
+ | `getActiveBreakpoint()` | 2798-2800 | Returns current active breakpoint or null |
68
+ | `resumeBreakpoint(returnValue)` | 2802-2816 | Sends IPC to resume, optionally with return value |
69
+ | `triggerBreakpoint(label)` | 2817-2827 | Sends IPC to trigger breakpoint in child |
70
+ | `addConditionalBreakpoint(pattern, label, enabled)` | 2830-2841 | Creates pattern-based breakpoint |
71
+ | `getConditionalBreakpoints()` | 2843-2845 | Returns copy of all conditional breakpoints |
72
+ | `removeConditionalBreakpoint(id)` | 2847-2854 | Removes breakpoint by ID |
73
+ | `checkConditionalBreakpoints(logMessage)` | 2856-2870 | Checks logs against patterns, triggers if match |
74
+
75
+ #### IPC Message Handler (Lines 2707-2734)
76
+
77
+ ```javascript
78
+ case 'breakpoint':
79
+ // action: 'hit' → sets activeBreakpoint, emits breakpointHit event
80
+ // action: 'resumed' → clears activeBreakpoint, emits breakpointResumed event
81
+
82
+ case 'breakpointError':
83
+ // Logs breakpoint-related errors
84
+ ```
85
+
86
+ #### MCP Tools
87
+
88
+ | Tool | Lines | Parameters | Description |
89
+ |------|-------|------------|-------------|
90
+ | `get_active_breakpoint` | 3280-3312 | none | Check if paused at breakpoint |
91
+ | `resume_breakpoint` | 3314-3346 | `returnValue` (optional) | Resume from breakpoint |
92
+ | `trigger_breakpoint` | 3348-3379 | `label` (optional) | Trigger breakpoint to pause |
93
+ | `set_conditional_breakpoint` | 3381-3406 | `pattern`, `label`, `enabled` | Set pattern-based breakpoint |
94
+ | `list_breakpoints` | 3408-3440 | none | List all conditional breakpoints |
95
+ | `remove_breakpoint` | 3442-3473 | `id` | Remove conditional breakpoint |
96
+
97
+ #### HTTP Endpoints
98
+
99
+ | Endpoint | Lines | Method | Description |
100
+ |----------|-------|--------|-------------|
101
+ | `/break` | 3935-3945 | POST | Trigger breakpoint |
102
+ | `/resume` | 3947-3957 | POST | Resume from breakpoint |
103
+ | `/breakpoint-status` | 3959-3973 | GET | Get current breakpoint state |
104
+ | `/breakpoint/:id` | 3976-3989 | POST | Toggle conditional breakpoint |
105
+ | `/breakpoint/:id` | 3992-3998 | DELETE | Remove conditional breakpoint |
106
+
107
+ ---
108
+
109
+ ### src/inject.cjs
110
+
111
+ #### State Variables (Lines 177-180)
112
+
113
+ ```javascript
114
+ const breakpoints = new Map(); // For future use (unused)
115
+ let breakpointIdCounter = 0; // Increments for each breakpoint
116
+ let activeBreakpoint = null; // Current paused breakpoint
117
+ let breakpointResolve = null; // Promise resolver for pause/resume
118
+ ```
119
+
120
+ #### process.reflexive.breakpoint() (Lines 209-230)
121
+
122
+ ```javascript
123
+ async breakpoint(label = 'breakpoint', context = {}) {
124
+ // Generates unique ID
125
+ // Captures stack trace
126
+ // Sends 'breakpoint' message with action='hit'
127
+ // Prints 🔴 message to console
128
+ // Returns Promise that resolves when resumed
129
+ }
130
+ ```
131
+
132
+ **CRITICAL**: This is async and pauses execution until `breakpointResolve()` is called.
133
+
134
+ #### IPC Message Handlers
135
+
136
+ | Message Type | Lines | Description |
137
+ |--------------|-------|-------------|
138
+ | `resumeBreakpoint` | 340-357 | Calls breakpointResolve(), clears state |
139
+ | `getActiveBreakpoint` | 359-375 | Returns current breakpoint info |
140
+ | `triggerBreakpoint` | 377-390 | Calls process.reflexive.breakpoint() via setImmediate |
141
+
142
+ ---
143
+
144
+ ### Demo Apps
145
+
146
+ #### demo-ai-features.js (Lines 512-539)
147
+
148
+ ```javascript
149
+ if (url.pathname === '/debug/breakpoint') {
150
+ if (process.reflexive && process.reflexive.breakpoint) {
151
+ const context = { requestUrl, timestamp, peopleCount, stats };
152
+ const result = await process.reflexive.breakpoint('debug-endpoint', context);
153
+ // Returns message about completed breakpoint
154
+ }
155
+ }
156
+ ```
157
+
158
+ **Note**: The `await` actually pauses the endpoint handler until resumed.
159
+
160
+ ---
161
+
162
+ ## Evaluate In App Analysis
163
+
164
+ ### Does it ACTUALLY affect app state?
165
+
166
+ **YES** - `evaluate_in_app` runs code inside the target process and can modify state.
167
+
168
+ #### Evidence
169
+
170
+ 1. **Execution Location** (src/inject.cjs Lines 282-332):
171
+ ```javascript
172
+ // Code runs in the child process (target app)
173
+ const evalInGlobal = eval;
174
+ const result = evalInGlobal(msg.code);
175
+ ```
176
+
177
+ 2. **Scope Access**:
178
+ - Runs in global scope via indirect eval
179
+ - Can access and modify `global` variables
180
+ - **Cannot** access module-scoped variables
181
+ - Demo apps work because they explicitly expose things to `global`
182
+
183
+ 3. **Security Gating**:
184
+ - Requires explicit `--eval` flag
185
+ - Requires `REFLEXIVE_EVAL` environment variable
186
+ - Tool description: "Can inspect variables, call functions, **or modify behavior at runtime**"
187
+
188
+ ### What CAN be accessed
189
+
190
+ ```javascript
191
+ // These work because they're on global:
192
+ global.config
193
+ global.users
194
+ global.cache
195
+ process.env
196
+ process.memoryUsage()
197
+ ```
198
+
199
+ ### What CANNOT be accessed
200
+
201
+ ```javascript
202
+ // Module-scoped variables in the target app
203
+ const privateVar = 'secret'; // Not accessible
204
+ let moduleState = {}; // Not accessible
205
+ ```
206
+
207
+ ---
208
+
209
+ ## Key Findings
210
+
211
+ ### Current Limitations
212
+
213
+ 1. **No V8 Inspector Integration** - Uses pattern matching on logs, not real debugger
214
+ 2. **Global Scope Only** - eval only accesses `global`, not module scope
215
+ 3. **No Line-Level Breakpoints** - Cannot set breakpoint at specific line numbers
216
+ 4. **No Call Stack Inspection** - Limited to what's captured at breakpoint time
217
+ 5. **No Variable Inspection** - Cannot inspect local variables in scope
218
+
219
+ ### What Works
220
+
221
+ 1. **Pause/Resume** - Reliably pauses and resumes execution
222
+ 2. **Pattern Matching** - Conditional breakpoints trigger on log patterns
223
+ 3. **Context Passing** - Can pass context object to breakpoint
224
+ 4. **Return Values** - Can pass return value back when resuming
225
+ 5. **Global State Modification** - Can modify things on `global`
226
+
227
+ ### Security Considerations
228
+
229
+ 1. **Requires explicit flags** - Must use `--inject` and `--eval`
230
+ 2. **Full access to global scope** - Can modify any global state
231
+ 3. **No timeout on breakpoints** - Will wait forever if not resumed
232
+ 4. **Can access process.env** - Environment variables exposed
233
+
234
+ ---
235
+
236
+ ## Conditional Breakpoint Flow
237
+
238
+ ```
239
+ Any console.log() → ProcessManager._log()
240
+
241
+ checkConditionalBreakpoints(logMessage)
242
+
243
+ Pattern match on enabled breakpoints?
244
+ ├─ YES: triggerBreakpoint(label) → IPC → 'triggerBreakpoint' message
245
+ └─ NO: Continue
246
+ ```
247
+
248
+ **Pattern Matching**: Case-insensitive substring match on entire log message.
249
+
250
+ ---
251
+
252
+ ## Dashboard UI Components
253
+
254
+ ### HTML Structure (Lines 1098-1109)
255
+
256
+ ```html
257
+ <div class="debug-section" id="breakpoints-section">
258
+ <div class="debug-header" id="breakpoints-header">
259
+ <span class="count" id="breakpoints-count">0</span>
260
+ <div class="breakpoint-controls">
261
+ <!-- break-now-btn and resume-btn buttons -->
262
+ </div>
263
+ </div>
264
+ <div class="debug-content" id="breakpoints-content">
265
+ <!-- Breakpoint list rendered here -->
266
+ </div>
267
+ </div>
268
+ ```
269
+
270
+ ### JavaScript Functions
271
+
272
+ | Function | Description |
273
+ |----------|-------------|
274
+ | `renderBreakpoints(currentBp)` | Renders UI with current + conditional breakpoints |
275
+ | `checkBreakpointStatus()` | Polls /breakpoint-status every 1000ms |
276
+
277
+ ---
278
+
279
+ ## Recommended Improvements
280
+
281
+ To achieve true debugging capabilities, Reflexive should integrate with the V8 Inspector Protocol:
282
+
283
+ 1. **Start target with `--inspect`** or enable inspector programmatically
284
+ 2. **Connect via WebSocket** to the inspector endpoint
285
+ 3. **Use CDP commands** like:
286
+ - `Debugger.enable`
287
+ - `Debugger.setBreakpointByUrl`
288
+ - `Debugger.pause` / `Debugger.resume`
289
+ - `Debugger.stepInto` / `Debugger.stepOut` / `Debugger.stepOver`
290
+ - `Runtime.evaluate` (with proper scope access)
291
+ - `Debugger.evaluateOnCallFrame` (for local variables)
292
+
293
+ See `docs/V8-INSPECTOR-RESEARCH.md` for implementation details.
@@ -0,0 +1,110 @@
1
+ '/Users/shannoncode/repo/Reflexive/docs/HN-LAUNCH-PLAN.md'
2
+
3
+ I'd like to give you context, I feel like we are burying the lead
4
+
5
+ 2 things
6
+
7
+ forever I've wanted either a programming language I could just have ai embedded ( catch an exception, and it runs a
8
+ prompt) (Api failure, prompt to research a docs page, quickly scan for schema change, patch the response)
9
+ Or... magic components, that little magic want that's in the corner of some prompt inputs that do prompt enhancement.
10
+
11
+ Anthropic's words about Claude Code, and Claude Agent SDK, something about giving claude a computer.
12
+
13
+ As a vibe coder I've got my IDE, I've got a console where I'm running my claude code, and probably another that's a
14
+ runner, running the server, monitoring console logs, etc, and I've ususlly got a third console, either another
15
+ instance of CC or just so I have bash access if I am runnig cc and a server.
16
+
17
+ I stumbled upon Agent SDK's ability to read files, edit them, and ultimately learning that the agent SDK IS Claude
18
+ Code, it even uses the MAX credentials, and uses the ./claude sessions.
19
+
20
+ My first Agent I made a cli, / webserver / ui, daemon. I then started pointing it at it's own project and adding
21
+ features, it felt like Claude Code, but I had complete control, it was MY claude code.
22
+
23
+ on a whim I decided to try going one layer deeper, "What if I embedded Claude inside the application, gave him total
24
+ state awareness, Full debugging MCP along with 30 other tools aimed inward and throughout the lifecycle of a running
25
+ application. The ability to start an stop the app. Monitoring from the outside and in.
26
+
27
+ Then I started experimenting.
28
+
29
+ I instrumented and ran a small hello world cli. It popped open a web browser with a chat interface and
30
+ '/var/folders/83/gxdpnp3x4pl012kc_svrprhc0000gn/T/TemporaryItems/NSIRD_screencaptureui_C9tZtP/Screenshot 2026-01-23
31
+ at 5.28.16 AM.png' hooks into the input and output.
32
+
33
+ I said hi to the agent, and was met with a friendly greeting back and description of the hello world app, along with
34
+ a notice that the app had immediately exited 1, He asked if wanted to address the situation and I said yea!
35
+
36
+ a few seconds later the webui showed the hello world and a banner and a url, in the chat window. I opened the url and
37
+ was met with a simple page, the message Hello World.
38
+
39
+ I go back to the reflexive web chat and I realize the agent had kept working after I left.
40
+
41
+ It had used a few tools and used curl to test that the app worked, explained the situation end to end, with it's
42
+ choice to keep the app running by making it a webserver.
43
+
44
+ I immediately restarted with a oneliner that echoed a oneliner instruction into an app.js, and chained the reflexive
45
+ command to open the app, once the browser respawned for me, I asked it build me a simple demo illustrating some of
46
+ the things I could do with the reflexive agent.
47
+
48
+ It made a webserver with a number of endpoints, it simulated errors on one, it simulated slow execution times, it
49
+ wrote different logs to different outputs, and io. Logging everywhere, I navigated around the little rest server app
50
+ thing in the browser, went back and looked through all the data it had about what was going on internally.
51
+
52
+ I asked it questions and it was hyper aware, It understood immediately how to handle this new iron man suite.
53
+
54
+ We started building the library to inject and auto instrument, and mcp tools to eval code inside the injected code.
55
+ the injected code exposed things to a scope that the mcp server could access, http reauests, memory info, and more.
56
+ With the ability to eval code inside it could modify values inside the running app, and have timing and visibility
57
+ into a bunch.
58
+
59
+ I gave it a quick claude test. "in plan mode, orchestrate a research documnent to be written here: ___p[ath, and ...
60
+ I can't remember what we built, but we had a few little annoyances I had last time vibing with Agent SDK, the
61
+ permissions are explicit, now as a religious --dangerously-skip-permissions user, I opened it all the way up, access
62
+ to anything and everything. Bash, move around the fs freely, Write, Websearch, etc.
63
+
64
+ It really flew, it was literally Claude code agent loop, prompting complexity, task delegator, planner, web
65
+ researcher plus, it's harness, it's puppet strings were PID's and internal state.
66
+
67
+ We already had an internal library that would simply let you prompt the inside of the app, "reflexive.ask('look at
68
+ the current environment and this area of code and propose a change')" but I wanted more. So that's where the
69
+ demo-ai-features.js came from. using the reflexive agent, we made a few very unique things, a dynamic endpoint,
70
+ /poem/[theme] where the incoming text was appended to a prompt to generate a poem that was returned as json. A webui
71
+ with a list of names, interests, resume bits, and an input field that sais, search with Ai, and example searches
72
+ like: "men, likes amazon, over 40" I tried the examples, and sure enough the names that were likely to match were
73
+ hilighted while the rest were hidden. Going back to the webui, I experimented with injecting new function into
74
+ running code that would do sool ai stuff with just a prompt (since the prompt speaks to the agent, which has the
75
+ tools to view and interact with state, as well as everything else claude code, bash automation and read write, a
76
+ simple prompt could result in very specific outputs, precisely executed.
77
+
78
+ I had prompted breakpoints, but they were a lazy pause of the entire node execution, and eval access + the ability to
79
+ resume execution. But they weren't real breakpoints. The state that the agent's tools could access was made
80
+ available because the injection had attached them to the blobal scope, or we could inject code to get or set some
81
+ state. but even that didn't feel like deep enouth.
82
+
83
+ on a side bar, I had added log watching, the ability to grab any of the output logs we have been tracking since the
84
+ beginning and attach a prompt to it, when the event was thrown with a prompt attached, that prompt along with the
85
+ event details was sent to the reflexive agent as a query. ImagineL "This error is happening quite a bit, log it and
86
+ look for corelations with other logs" and then watching the std error show up in the log view, and immediately the
87
+ three animated agent bubbles followed by half a dozen tool usages grabbing the stack, the logs, our chat log (an
88
+ attempt to, I'll touch on this in a bit), and responding with a description of the situation with a situational
89
+ awareness that was astounding.
90
+
91
+ But I wanted more. So I added proper debugging, attachment to the v8 debugger, iterating through adding breakpoints,
92
+ adding step capability, as tools and as ui, the ability to attach a prompt to a breakpoint that if paired with an
93
+ instruction to the reflexive agent to continue execution after whatever other processing you want the agent to do
94
+ while execution is paused, it will unpause the execution and play thru.
95
+
96
+ as an early experiment I took one of the early demo's that had the endpoints that made an interactive api environment
97
+ with errors and simulations, asked to place a break point before the response of an api call that I see in the std
98
+ out logs, Reflexive complies and I refresh the page on the api, it does now load! Back in the webchat, I see the app
99
+ showing as paused, a short stack trace and my previous chat messages. I ask he to tell me everything he sees, and he
100
+ goes on to tell me all about the state of that function, the incoming request, the outgoing response.
101
+
102
+ we were intercepting a part of the demo that simulated different log styles from a webhook, this was a Customer login
103
+ I think. and I modified one value to say Customer.Hacked I manually resumed the execution I see the expected log in
104
+ the output, with my runtime modified change. And then... I noticed half a dozen errors in the log and an edit to a
105
+ file and a restart of the app. And a looooong post mortem on the hack that took place. I was a little upset, it had
106
+ edited a file I didn't want it to which is of late a rare occurance in claude code proper. I asked what happened, and
107
+ reflexive went on to explain that while monitoring logs for the demo service one of the customer's accounts had been
108
+ hacked, he continued with the isolation of the impacted section of code where the anomily was detected, disabled the
109
+ logic with a persistant message about there being an exploit that needed to be investigated. I left it. its a cool
110
+ artifact.