@anma-labs/mcpgaze 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ [package]
2
+ name = "mcpgaze-proxy"
3
+ version = "1.0.0"
4
+ edition = "2021"
5
+ description = "Zero-dependency byte-exact stdio proxy hot-path for mcpgaze."
6
+
7
+ [[bin]]
8
+ name = "mcpgaze-proxy"
9
+ path = "src/main.rs"
10
+
11
+ [profile.release]
12
+ opt-level = 3
13
+ lto = true
14
+ codegen-units = 1
15
+ panic = "abort"
@@ -0,0 +1,445 @@
1
+ //! mcpgaze-proxy — the native hot-path for `mcpgaze wrap`.
2
+ //!
3
+ //! A zero-dependency (std-only) transparent stdio proxy. It spawns the real MCP
4
+ //! server and sits between it and the client, forwarding bytes UNTOUCHED on each
5
+ //! direction and emitting an observation log on a side channel — the same
6
+ //! invariant as the TypeScript proxy, in a single static binary with no Node
7
+ //! runtime required.
8
+ //!
9
+ //! Scope note: this hot-path does forward + capture with best-effort, allocation
10
+ //! -light JSON-RPC classification. Rich id-correlation/latency and the full
11
+ //! command surface remain in the Node CLI, which can post-process this JSONL
12
+ //! (the event schema is identical, so `mcpgaze triage` consumes it directly).
13
+
14
+ use std::env;
15
+ use std::fs::OpenOptions;
16
+ use std::io::{self, BufWriter, Read, Write};
17
+ use std::process::{Command, Stdio};
18
+ use std::sync::{Arc, Mutex};
19
+ use std::thread;
20
+ use std::time::{SystemTime, UNIX_EPOCH};
21
+
22
+ type Log = Arc<Mutex<BufWriter<std::fs::File>>>;
23
+
24
+ fn now_ms() -> u128 {
25
+ SystemTime::now()
26
+ .duration_since(UNIX_EPOCH)
27
+ .map(|d| d.as_millis())
28
+ .unwrap_or(0)
29
+ }
30
+
31
+ fn json_escape(s: &str) -> String {
32
+ let mut out = String::with_capacity(s.len() + 2);
33
+ for c in s.chars() {
34
+ match c {
35
+ '"' => out.push_str("\\\""),
36
+ '\\' => out.push_str("\\\\"),
37
+ '\n' => out.push_str("\\n"),
38
+ '\r' => out.push_str("\\r"),
39
+ '\t' => out.push_str("\\t"),
40
+ c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)),
41
+ c => out.push(c),
42
+ }
43
+ }
44
+ out
45
+ }
46
+
47
+ /// One single-pass scan of a wire line, modelling the same view the Node proxy's
48
+ /// JSON parser has: only TOP-LEVEL object keys count.
49
+ ///
50
+ /// History: the original classifier was three whole-line substring tests
51
+ /// (`line.contains("\"method\"")` etc.). That misfired whenever a key token
52
+ /// appeared inside a string VALUE, a NESTED object, or a batch ARRAY element, and
53
+ /// counted `id:null` as an id and a response without its `result`/`error` key as a
54
+ /// response — diverging from the Node observer (a differential audit found 88 such
55
+ /// kind disagreements on well-formed lines; see scripts/diff-proxies.mjs --corpus
56
+ /// and KNOWN-ISSUES.md). This scanner is still parse-free, allocation-light,
57
+ /// std-only and panic-free on arbitrary `&str` (char-safe, O(n)); it does NOT
58
+ /// JSON-parse, does NOT decode `\uXXXX` key/value escapes, and never validates
59
+ /// JSON grammar — the residual escaped-key / escaped-value / malformed drift is
60
+ /// documented and accepted.
61
+ struct TopLevel {
62
+ is_object: bool, // first non-ws char is '{'
63
+ is_array: bool, // first non-ws char is '[' (a batch -> "unknown", as Node sees an Array)
64
+ has_method: bool,
65
+ has_id: bool,
66
+ id_is_null: bool, // value of the LAST top-level "id" key is the literal null
67
+ has_result: bool,
68
+ has_error: bool,
69
+ /// Raw (un-decoded) string value of the LAST top-level "method" key, or None
70
+ /// if absent / non-string. Escapes are kept verbatim — decoding them needs the
71
+ /// JSON string parser this hot-path deliberately omits.
72
+ method_value: Option<String>,
73
+ }
74
+
75
+ fn recognize(k: &str) -> Option<&'static str> {
76
+ match k {
77
+ "method" => Some("method"),
78
+ "id" => Some("id"),
79
+ "result" => Some("result"),
80
+ "error" => Some("error"),
81
+ _ => None,
82
+ }
83
+ }
84
+
85
+ /// Read the JSON string at value position `v` (the text just after a `:`),
86
+ /// returning its raw content with escape sequences KEPT verbatim, or None if the
87
+ /// value is not a string. Mirrors the old `string_field` value semantics exactly.
88
+ fn read_json_string(v: &str) -> Option<String> {
89
+ let body = v.trim_start().strip_prefix('"')?;
90
+ let mut result = String::new();
91
+ let mut chars = body.chars();
92
+ while let Some(c) = chars.next() {
93
+ match c {
94
+ '\\' => {
95
+ result.push(c);
96
+ if let Some(n) = chars.next() {
97
+ result.push(n);
98
+ }
99
+ }
100
+ '"' => return Some(result),
101
+ _ => result.push(c),
102
+ }
103
+ }
104
+ None
105
+ }
106
+
107
+ fn scan_top_level(line: &str) -> TopLevel {
108
+ let mut t = TopLevel {
109
+ is_object: false,
110
+ is_array: false,
111
+ has_method: false,
112
+ has_id: false,
113
+ id_is_null: false,
114
+ has_result: false,
115
+ has_error: false,
116
+ method_value: None,
117
+ };
118
+
119
+ // Phase 1: the first non-whitespace char decides the container type.
120
+ match line.chars().find(|c| !c.is_whitespace()) {
121
+ Some('{') => t.is_object = true,
122
+ Some('[') => {
123
+ t.is_array = true;
124
+ return t;
125
+ }
126
+ _ => return t, // scalar / empty -> neither object nor array -> "unknown"
127
+ }
128
+
129
+ // Phase 2: walk the body. depth 1 == directly inside the top-level object.
130
+ // A quoted token is a KEY only at depth 1 while we `expect_key`; after a
131
+ // recognized key + ':' we peek the value (id's null-ness, method's string).
132
+ let mut depth: i32 = 0;
133
+ let mut in_str = false;
134
+ let mut esc = false;
135
+ let mut expect_key = false;
136
+ let mut reading_key = false;
137
+ let mut cur_key = String::new(); // single reused buffer
138
+ let mut pending_key: Option<&'static str> = None;
139
+
140
+ for (i, c) in line.char_indices() {
141
+ if in_str {
142
+ if esc {
143
+ esc = false;
144
+ if reading_key {
145
+ cur_key.push(c);
146
+ }
147
+ continue;
148
+ }
149
+ match c {
150
+ '\\' => {
151
+ esc = true;
152
+ if reading_key {
153
+ cur_key.push(c);
154
+ }
155
+ }
156
+ '"' => {
157
+ in_str = false;
158
+ if reading_key {
159
+ reading_key = false;
160
+ if let Some(name) = recognize(&cur_key) {
161
+ match name {
162
+ "method" => t.has_method = true,
163
+ "id" => t.has_id = true,
164
+ "result" => t.has_result = true,
165
+ "error" => t.has_error = true,
166
+ _ => {}
167
+ }
168
+ pending_key = Some(name);
169
+ } else {
170
+ pending_key = None;
171
+ }
172
+ expect_key = false; // now expect ':' then a value
173
+ }
174
+ }
175
+ _ => {
176
+ if reading_key {
177
+ cur_key.push(c);
178
+ }
179
+ }
180
+ }
181
+ continue;
182
+ }
183
+ match c {
184
+ '"' => {
185
+ in_str = true;
186
+ if depth == 1 && expect_key {
187
+ reading_key = true;
188
+ cur_key.clear();
189
+ }
190
+ }
191
+ '{' => {
192
+ depth += 1;
193
+ if depth == 1 {
194
+ expect_key = true;
195
+ }
196
+ }
197
+ '}' => {
198
+ depth -= 1;
199
+ if depth == 0 {
200
+ break;
201
+ }
202
+ }
203
+ '[' => depth += 1,
204
+ ']' => depth -= 1,
205
+ ':' => {
206
+ if depth == 1 {
207
+ if let Some(name) = pending_key.take() {
208
+ let val = &line[i + 1..]; // ':' is 1 byte -> i+1 is a char boundary
209
+ if name == "id" {
210
+ // Assign (not OR): the LAST top-level id wins, like JSON.parse.
211
+ t.id_is_null = val.trim_start().starts_with("null");
212
+ } else if name == "method" {
213
+ t.method_value = read_json_string(val);
214
+ }
215
+ }
216
+ }
217
+ }
218
+ ',' if depth == 1 => {
219
+ expect_key = true;
220
+ pending_key = None;
221
+ }
222
+ _ => {}
223
+ }
224
+ }
225
+
226
+ t
227
+ }
228
+
229
+ /// Classify a wire line exactly as the Node proxy's `classify()` would (modulo the
230
+ /// documented parse-free residuals): only top-level fields count, `id:null` is NOT
231
+ /// an id (but `id:0` is), and response/error require their key to be PRESENT.
232
+ fn classify(t: &TopLevel) -> &'static str {
233
+ if t.is_array || !t.is_object {
234
+ return "unknown"; // top-level array (batch) or non-object -> unknown, as Node sees it
235
+ }
236
+ let id = t.has_id && !t.id_is_null;
237
+ if t.has_method && id {
238
+ "request"
239
+ } else if t.has_method && !id {
240
+ "notification"
241
+ } else if !t.has_method && id && t.has_error {
242
+ "error"
243
+ } else if !t.has_method && id && t.has_result {
244
+ "response"
245
+ } else {
246
+ "unknown"
247
+ }
248
+ }
249
+
250
+ fn write_line(log: &Log, s: &str) {
251
+ if let Ok(mut g) = log.lock() {
252
+ let _ = g.write_all(s.as_bytes());
253
+ let _ = g.flush();
254
+ }
255
+ }
256
+
257
+ fn log_message(log: &Log, dir: &str, line: &str) {
258
+ let t = scan_top_level(line);
259
+ let kind = classify(&t);
260
+ let method = match &t.method_value {
261
+ Some(m) => format!("\"{}\"", json_escape(m)),
262
+ None => "null".to_string(),
263
+ };
264
+ let ev = format!(
265
+ "{{\"t\":{},\"type\":\"message\",\"dir\":\"{}\",\"kind\":\"{}\",\"method\":{},\"raw\":\"{}\"}}\n",
266
+ now_ms(),
267
+ dir,
268
+ kind,
269
+ method,
270
+ json_escape(line),
271
+ );
272
+ write_line(log, &ev);
273
+ }
274
+
275
+ /// Upper bound on the observation accumulator. A peer streaming bytes with no
276
+ /// '\n' would otherwise grow `acc` without limit until allocation fails; with
277
+ /// `panic = "abort"` that aborts (SIGABRT) the whole proxy — the observer
278
+ /// crashing the wire. A real MCP message is tiny, so past this cap we discard the
279
+ /// over-long line and resync at the next newline. The bytes are already forwarded.
280
+ const MAX_LINE: usize = 1024 * 1024; // 1 MiB
281
+
282
+ /// Forward bytes from `src` to `dst` UNTOUCHED, then observe complete lines.
283
+ /// The forward write happens before any observation work.
284
+ fn pump<R: Read + Send + 'static, W: Write + Send + 'static>(
285
+ mut src: R,
286
+ mut dst: W,
287
+ dir: &'static str,
288
+ log: Log,
289
+ ) {
290
+ let mut buf = [0u8; 65536];
291
+ let mut acc: Vec<u8> = Vec::new();
292
+ let mut overflow = false; // discarding an over-long, newline-free line
293
+ loop {
294
+ match src.read(&mut buf) {
295
+ Ok(0) => break,
296
+ Ok(n) => {
297
+ if dst.write_all(&buf[..n]).is_err() {
298
+ break;
299
+ }
300
+ let _ = dst.flush();
301
+ if overflow {
302
+ // Forwarding already happened; find the resync point and drop
303
+ // everything up to (and including) the next newline.
304
+ match buf[..n].iter().position(|&b| b == b'\n') {
305
+ Some(pos) => {
306
+ overflow = false;
307
+ acc.extend_from_slice(&buf[pos + 1..n]);
308
+ }
309
+ None => continue,
310
+ }
311
+ } else {
312
+ acc.extend_from_slice(&buf[..n]);
313
+ }
314
+ while let Some(pos) = acc.iter().position(|&b| b == b'\n') {
315
+ let line: Vec<u8> = acc.drain(..=pos).collect();
316
+ let text = String::from_utf8_lossy(&line[..line.len() - 1]);
317
+ // Trim like JS String.prototype.trim(), which also strips the
318
+ // U+FEFF BOM (Rust's str::trim does not) — keeps the recorded
319
+ // `raw` identical across the Node and Rust proxies.
320
+ let trimmed = text.trim_matches(|c: char| c.is_whitespace() || c == '\u{feff}');
321
+ if !trimmed.is_empty() {
322
+ log_message(&log, dir, trimmed);
323
+ }
324
+ }
325
+ if !overflow && acc.len() > MAX_LINE {
326
+ overflow = true;
327
+ acc = Vec::new(); // free the buffer; resync at the next newline
328
+ }
329
+ }
330
+ Err(_) => break,
331
+ }
332
+ }
333
+ }
334
+
335
+ fn pump_stderr<R: Read + Send + 'static>(mut src: R, log: Log) {
336
+ let mut buf = [0u8; 65536];
337
+ loop {
338
+ match src.read(&mut buf) {
339
+ Ok(0) => break,
340
+ Ok(n) => {
341
+ let _ = io::stderr().write_all(&buf[..n]);
342
+ let text = String::from_utf8_lossy(&buf[..n]);
343
+ let ev = format!(
344
+ "{{\"t\":{},\"type\":\"server_stderr\",\"text\":\"{}\"}}\n",
345
+ now_ms(),
346
+ json_escape(&text),
347
+ );
348
+ write_line(&log, &ev);
349
+ }
350
+ Err(_) => break,
351
+ }
352
+ }
353
+ }
354
+
355
+ fn main() {
356
+ let args: Vec<String> = env::args().collect();
357
+ let sep = args.iter().position(|a| a == "--");
358
+ let (opts, cmd) = match sep {
359
+ Some(i) => (&args[1..i], &args[i + 1..]),
360
+ None => {
361
+ eprintln!("usage: mcpgaze-proxy [--log <path>] -- <server command...>");
362
+ std::process::exit(2);
363
+ }
364
+ };
365
+ if cmd.is_empty() {
366
+ eprintln!("usage: mcpgaze-proxy [--log <path>] -- <server command...>");
367
+ std::process::exit(2);
368
+ }
369
+ let mut log_path = String::from(".mcpgaze/session.jsonl");
370
+ let mut it = opts.iter();
371
+ while let Some(o) = it.next() {
372
+ if o == "--log" {
373
+ if let Some(p) = it.next() {
374
+ log_path = p.clone();
375
+ }
376
+ }
377
+ }
378
+
379
+ if let Some(parent) = std::path::Path::new(&log_path).parent() {
380
+ let _ = std::fs::create_dir_all(parent);
381
+ }
382
+ let file = OpenOptions::new()
383
+ .create(true)
384
+ .append(true)
385
+ .open(&log_path)
386
+ .unwrap_or_else(|e| {
387
+ eprintln!("mcpgaze-proxy: cannot open log {log_path}: {e}");
388
+ std::process::exit(1);
389
+ });
390
+ let log: Log = Arc::new(Mutex::new(BufWriter::new(file)));
391
+
392
+ let mut child = match Command::new(&cmd[0])
393
+ .args(&cmd[1..])
394
+ .stdin(Stdio::piped())
395
+ .stdout(Stdio::piped())
396
+ .stderr(Stdio::piped())
397
+ .spawn()
398
+ {
399
+ Ok(c) => c,
400
+ Err(e) => {
401
+ write_line(
402
+ &log,
403
+ &format!(
404
+ "{{\"t\":{},\"type\":\"note\",\"code\":\"spawn-error\",\"detail\":\"{}\"}}\n",
405
+ now_ms(),
406
+ json_escape(&e.to_string())
407
+ ),
408
+ );
409
+ eprintln!("mcpgaze-proxy: spawn failed: {e}");
410
+ std::process::exit(127);
411
+ }
412
+ };
413
+
414
+ let child_stdin = child.stdin.take().expect("stdin");
415
+ let child_stdout = child.stdout.take().expect("stdout");
416
+ let child_stderr = child.stderr.take().expect("stderr");
417
+
418
+ // client -> server (detached: blocks on our stdin; process exit reaps it)
419
+ {
420
+ let log = Arc::clone(&log);
421
+ thread::spawn(move || pump(io::stdin(), child_stdin, "c2s", log));
422
+ }
423
+ // server -> client (joined so we flush all wire bytes before exit)
424
+ let out_handle = {
425
+ let log = Arc::clone(&log);
426
+ thread::spawn(move || pump(child_stdout, io::stdout(), "s2c", log))
427
+ };
428
+ let err_handle = {
429
+ let log = Arc::clone(&log);
430
+ thread::spawn(move || pump_stderr(child_stderr, log))
431
+ };
432
+
433
+ let status = child.wait().map(|s| s.code().unwrap_or(0)).unwrap_or(1);
434
+ let _ = out_handle.join();
435
+ let _ = err_handle.join();
436
+ write_line(
437
+ &log,
438
+ &format!(
439
+ "{{\"t\":{},\"type\":\"note\",\"code\":\"server-exit\",\"detail\":\"code={}\"}}\n",
440
+ now_ms(),
441
+ status
442
+ ),
443
+ );
444
+ std::process::exit(status);
445
+ }
package/package.json ADDED
@@ -0,0 +1,59 @@
1
+ {
2
+ "name": "@anma-labs/mcpgaze",
3
+ "version": "1.0.1",
4
+ "description": "A transparent wiretap for MCP servers. See exactly what your AI client sends your server — without breaking the protocol — and catch tool-schema drift in CI.",
5
+ "repository": {
6
+ "type": "git",
7
+ "url": "git+https://github.com/anma-labs/mcpgaze.git"
8
+ },
9
+ "homepage": "https://github.com/anma-labs/mcpgaze#readme",
10
+ "bugs": {
11
+ "url": "https://github.com/anma-labs/mcpgaze/issues"
12
+ },
13
+ "type": "module",
14
+ "bin": {
15
+ "mcpgaze": "dist/index.js"
16
+ },
17
+ "files": [
18
+ "dist",
19
+ "native/mcpgaze-proxy/Cargo.toml",
20
+ "native/mcpgaze-proxy/src",
21
+ "!native/**/target"
22
+ ],
23
+ "engines": {
24
+ "node": ">=18"
25
+ },
26
+ "scripts": {
27
+ "build": "tsup src/index.ts --format esm --target node18 --clean",
28
+ "typecheck": "tsc --noEmit",
29
+ "test": "node --import tsx --test src/test/*.test.ts",
30
+ "test:fuzz": "node --import tsx --test src/test/fuzz.test.ts",
31
+ "harden": "npm run build && node scripts/diff-proxies.mjs --corpus scripts/corpus --repeat 10 && node scripts/wire-integrity.mjs && node scripts/dogfood.mjs",
32
+ "dev": "tsx src/index.ts",
33
+ "prepack": "npm run build",
34
+ "pretest": "npm run build"
35
+ },
36
+ "keywords": [
37
+ "mcp",
38
+ "model-context-protocol",
39
+ "debugging",
40
+ "proxy",
41
+ "observability",
42
+ "schema-drift",
43
+ "cli",
44
+ "ai-agents",
45
+ "llm-tools"
46
+ ],
47
+ "license": "Apache-2.0",
48
+ "devDependencies": {
49
+ "@modelcontextprotocol/sdk": "^1.29.0",
50
+ "@types/node": "^20.14.0",
51
+ "tsup": "^8.3.0",
52
+ "tsx": "^4.19.0",
53
+ "typescript": "^5.5.0",
54
+ "zod": "^3.23.0"
55
+ },
56
+ "publishConfig": {
57
+ "access": "public"
58
+ }
59
+ }