note-connector 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/paths.js +4 -0
- package/dist/setup-dependencies.js +56 -13
- package/package.json +3 -2
- package/py/pyproject.toml +86 -0
- package/py/src/note_mcp/__init__.py +7 -0
- package/py/src/note_mcp/__main__.py +65 -0
- package/py/src/note_mcp/api/__init__.py +31 -0
- package/py/src/note_mcp/api/articles.py +1395 -0
- package/py/src/note_mcp/api/client.py +318 -0
- package/py/src/note_mcp/api/embeds.py +482 -0
- package/py/src/note_mcp/api/images.py +456 -0
- package/py/src/note_mcp/api/preview.py +142 -0
- package/py/src/note_mcp/api/public_notes.py +150 -0
- package/py/src/note_mcp/auth/__init__.py +9 -0
- package/py/src/note_mcp/auth/browser.py +574 -0
- package/py/src/note_mcp/auth/file_session.py +145 -0
- package/py/src/note_mcp/auth/session.py +240 -0
- package/py/src/note_mcp/browser/__init__.py +10 -0
- package/py/src/note_mcp/browser/config.py +21 -0
- package/py/src/note_mcp/browser/manager.py +182 -0
- package/py/src/note_mcp/browser/preview.py +68 -0
- package/py/src/note_mcp/browser/url_helpers.py +18 -0
- package/py/src/note_mcp/chatgpt/__init__.py +1 -0
- package/py/src/note_mcp/chatgpt/__main__.py +63 -0
- package/py/src/note_mcp/chatgpt/access_log.py +25 -0
- package/py/src/note_mcp/chatgpt/auth.py +52 -0
- package/py/src/note_mcp/chatgpt/images.py +92 -0
- package/py/src/note_mcp/chatgpt/login_once.py +26 -0
- package/py/src/note_mcp/chatgpt/middleware.py +31 -0
- package/py/src/note_mcp/chatgpt/tools.py +255 -0
- package/py/src/note_mcp/chatgpt/widgets.py +121 -0
- package/py/src/note_mcp/decorators.py +113 -0
- package/py/src/note_mcp/investigator/__init__.py +33 -0
- package/py/src/note_mcp/investigator/__main__.py +11 -0
- package/py/src/note_mcp/investigator/cli.py +313 -0
- package/py/src/note_mcp/investigator/core.py +653 -0
- package/py/src/note_mcp/investigator/mcp_tools.py +225 -0
- package/py/src/note_mcp/models.py +557 -0
- package/py/src/note_mcp/py.typed +0 -0
- package/py/src/note_mcp/server.py +905 -0
- package/py/src/note_mcp/utils/__init__.py +7 -0
- package/py/src/note_mcp/utils/file_parser.py +314 -0
- package/py/src/note_mcp/utils/html_to_markdown.py +477 -0
- package/py/src/note_mcp/utils/logging.py +119 -0
- package/py/src/note_mcp/utils/markdown.py +12 -0
- package/py/src/note_mcp/utils/markdown_to_html.py +826 -0
|
@@ -0,0 +1,653 @@
|
|
|
1
|
+
"""HTTP traffic investigation core module.
|
|
2
|
+
|
|
3
|
+
Provides proxy management and capture session handling for API investigation.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import base64
|
|
10
|
+
import contextlib
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
import re
|
|
15
|
+
import subprocess
|
|
16
|
+
import time
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import TYPE_CHECKING, Any, ClassVar, TextIO
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from playwright.async_api import Browser, BrowserContext, Page, Playwright
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
# Timeout constants (Article 6: named constants for all timeouts)
|
|
27
|
+
PROXY_STOP_TIMEOUT_SEC: int = 5 # Timeout for stopping proxy process
|
|
28
|
+
PAGE_CLOSE_WAIT_TIMEOUT_MS: int = 0 # Immediate timeout for page close check
|
|
29
|
+
MITMDUMP_READ_TIMEOUT_SEC: int = 30 # Timeout for reading mitmdump output
|
|
30
|
+
PAGE_NAVIGATION_TIMEOUT_MS: int = 30000 # Timeout for page navigation
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class CapturedRequest:
|
|
35
|
+
"""Represents a captured HTTP request/response pair."""
|
|
36
|
+
|
|
37
|
+
method: str
|
|
38
|
+
url: str
|
|
39
|
+
headers: dict[str, str] = field(default_factory=dict)
|
|
40
|
+
body: str | None = None
|
|
41
|
+
response_status: int = 0
|
|
42
|
+
response_headers: dict[str, str] = field(default_factory=dict)
|
|
43
|
+
response_body: str | None = None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class ProxyManager:
|
|
47
|
+
"""Manages mitmproxy (mitmdump) process lifecycle.
|
|
48
|
+
|
|
49
|
+
Attributes:
|
|
50
|
+
port: Proxy server port
|
|
51
|
+
process: Subprocess running mitmdump
|
|
52
|
+
output_file: Path to traffic capture file
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(self, port: int = 8080) -> None:
|
|
56
|
+
"""Initialize proxy manager.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
port: Port number for the proxy server
|
|
60
|
+
"""
|
|
61
|
+
self.port = port
|
|
62
|
+
self.process: subprocess.Popen[bytes] | None = None
|
|
63
|
+
self.output_file: Path | None = None
|
|
64
|
+
self._log_handle: TextIO | None = None
|
|
65
|
+
|
|
66
|
+
def start(self, output: Path, domain_filter: str | None = None) -> None:
|
|
67
|
+
"""Start mitmproxy in dump mode.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
output: Path to save captured traffic
|
|
71
|
+
domain_filter: Optional domain to filter (e.g., "note.com")
|
|
72
|
+
"""
|
|
73
|
+
self.output_file = output
|
|
74
|
+
|
|
75
|
+
# Use uv run to execute mitmdump within the project's virtual environment
|
|
76
|
+
cmd = [
|
|
77
|
+
"uv",
|
|
78
|
+
"run",
|
|
79
|
+
"mitmdump",
|
|
80
|
+
"--mode",
|
|
81
|
+
f"regular@{self.port}",
|
|
82
|
+
"--set",
|
|
83
|
+
"flow_detail=3",
|
|
84
|
+
"-w",
|
|
85
|
+
str(output),
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
# Add domain filter if specified
|
|
89
|
+
if domain_filter:
|
|
90
|
+
cmd.extend(["--set", f"filter=~d {domain_filter}"])
|
|
91
|
+
|
|
92
|
+
# Don't capture stdout/stderr with PIPE - it causes blocking issues
|
|
93
|
+
# mitmproxy needs to write output freely without buffer pressure
|
|
94
|
+
# Log to file for debugging, stdout to DEVNULL
|
|
95
|
+
log_file = Path("/tmp/mitmproxy_debug.log")
|
|
96
|
+
try:
|
|
97
|
+
self._log_handle = open(log_file, "w") # noqa: SIM115
|
|
98
|
+
self.process = subprocess.Popen(
|
|
99
|
+
cmd,
|
|
100
|
+
stdout=subprocess.DEVNULL,
|
|
101
|
+
stderr=self._log_handle,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Wait for proxy to start - mitmproxy needs time to initialize
|
|
105
|
+
time.sleep(3.0)
|
|
106
|
+
|
|
107
|
+
if self.process.poll() is not None:
|
|
108
|
+
msg = f"Failed to start mitmproxy on port {self.port}. Check if the port is already in use."
|
|
109
|
+
raise RuntimeError(msg)
|
|
110
|
+
except Exception:
|
|
111
|
+
# Clean up log handle on failure
|
|
112
|
+
if self._log_handle:
|
|
113
|
+
self._log_handle.close()
|
|
114
|
+
self._log_handle = None
|
|
115
|
+
raise
|
|
116
|
+
|
|
117
|
+
def stop(self) -> None:
|
|
118
|
+
"""Stop mitmproxy process."""
|
|
119
|
+
if self.process:
|
|
120
|
+
self.process.terminate()
|
|
121
|
+
try:
|
|
122
|
+
self.process.wait(timeout=PROXY_STOP_TIMEOUT_SEC)
|
|
123
|
+
except subprocess.TimeoutExpired:
|
|
124
|
+
self.process.kill()
|
|
125
|
+
self.process.wait()
|
|
126
|
+
self.process = None
|
|
127
|
+
if self._log_handle:
|
|
128
|
+
self._log_handle.close()
|
|
129
|
+
self._log_handle = None
|
|
130
|
+
|
|
131
|
+
def is_running(self) -> bool:
|
|
132
|
+
"""Check if mitmproxy is running.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
True if process is running
|
|
136
|
+
"""
|
|
137
|
+
return self.process is not None and self.process.poll() is None
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class CaptureSession:
|
|
141
|
+
"""Manages a traffic capture session with browser.
|
|
142
|
+
|
|
143
|
+
Combines ProxyManager with Playwright browser for interactive
|
|
144
|
+
traffic investigation.
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
def __init__(self, proxy_port: int = 8080) -> None:
|
|
148
|
+
"""Initialize capture session.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
proxy_port: Port for the proxy server
|
|
152
|
+
"""
|
|
153
|
+
self.proxy = ProxyManager(proxy_port)
|
|
154
|
+
self.proxy_port = proxy_port
|
|
155
|
+
self._playwright: Playwright | None = None
|
|
156
|
+
self._browser: Browser | None = None
|
|
157
|
+
self._context: BrowserContext | None = None
|
|
158
|
+
self._page: Page | None = None
|
|
159
|
+
|
|
160
|
+
def _verify_proxy_ready(self, timeout: float = 5.0) -> bool:
|
|
161
|
+
"""Verify that the proxy is accepting connections.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
timeout: Maximum time to wait for proxy
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
True if proxy is ready, False otherwise
|
|
168
|
+
"""
|
|
169
|
+
import socket
|
|
170
|
+
|
|
171
|
+
start_time = time.time()
|
|
172
|
+
while time.time() - start_time < timeout:
|
|
173
|
+
try:
|
|
174
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
|
175
|
+
sock.settimeout(1.0)
|
|
176
|
+
result = sock.connect_ex(("127.0.0.1", self.proxy_port))
|
|
177
|
+
if result == 0:
|
|
178
|
+
logger.info(f"Proxy is ready on port {self.proxy_port}")
|
|
179
|
+
return True
|
|
180
|
+
except OSError:
|
|
181
|
+
pass
|
|
182
|
+
time.sleep(0.5)
|
|
183
|
+
|
|
184
|
+
logger.error(f"Proxy did not become ready on port {self.proxy_port}")
|
|
185
|
+
return False
|
|
186
|
+
|
|
187
|
+
async def start(
|
|
188
|
+
self,
|
|
189
|
+
output: Path,
|
|
190
|
+
domain_filter: str | None = None,
|
|
191
|
+
restore_session: bool = True,
|
|
192
|
+
) -> Page:
|
|
193
|
+
"""Start capture session with browser.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
output: Path to save captured traffic
|
|
197
|
+
domain_filter: Optional domain to filter
|
|
198
|
+
restore_session: Whether to restore saved session cookies
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Playwright Page instance for interaction
|
|
202
|
+
"""
|
|
203
|
+
# Start proxy first
|
|
204
|
+
self.proxy.start(output, domain_filter)
|
|
205
|
+
|
|
206
|
+
# Verify proxy is accepting connections before launching browser
|
|
207
|
+
if not self._verify_proxy_ready():
|
|
208
|
+
raise RuntimeError(f"Proxy is not accepting connections on port {self.proxy_port}")
|
|
209
|
+
|
|
210
|
+
# Start browser with proxy settings
|
|
211
|
+
from playwright.async_api import async_playwright
|
|
212
|
+
|
|
213
|
+
self._playwright = await async_playwright().start()
|
|
214
|
+
# Use Playwright's bundled Chromium with certificate bypass flags
|
|
215
|
+
# This avoids certificate trust issues with mitmproxy
|
|
216
|
+
self._browser = await self._playwright.chromium.launch(
|
|
217
|
+
headless=False,
|
|
218
|
+
proxy={"server": f"http://127.0.0.1:{self.proxy_port}"},
|
|
219
|
+
args=[
|
|
220
|
+
"--no-sandbox", # Required for some WSL2 configurations
|
|
221
|
+
"--ignore-certificate-errors", # Bypass all certificate validation
|
|
222
|
+
"--allow-insecure-localhost", # Allow insecure localhost connections
|
|
223
|
+
],
|
|
224
|
+
)
|
|
225
|
+
self._context = await self._browser.new_context(
|
|
226
|
+
ignore_https_errors=True,
|
|
227
|
+
)
|
|
228
|
+
self._page = await self._context.new_page()
|
|
229
|
+
|
|
230
|
+
# Restore session if available and requested
|
|
231
|
+
if restore_session:
|
|
232
|
+
await self._restore_session()
|
|
233
|
+
|
|
234
|
+
return self._page
|
|
235
|
+
|
|
236
|
+
async def _restore_session(self) -> bool:
|
|
237
|
+
"""Restore saved session cookies to browser context.
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
True if session was restored, False otherwise
|
|
241
|
+
"""
|
|
242
|
+
try:
|
|
243
|
+
from note_mcp.auth.session import SessionManager
|
|
244
|
+
|
|
245
|
+
session_manager = SessionManager()
|
|
246
|
+
saved_session = session_manager.load()
|
|
247
|
+
|
|
248
|
+
if saved_session and not saved_session.is_expired() and saved_session.cookies:
|
|
249
|
+
logger.info("Restoring saved session cookies...")
|
|
250
|
+
|
|
251
|
+
# Convert saved cookies to Playwright format
|
|
252
|
+
playwright_cookies: list[dict[str, Any]] = []
|
|
253
|
+
for name, value in saved_session.cookies.items():
|
|
254
|
+
playwright_cookies.append(
|
|
255
|
+
{
|
|
256
|
+
"name": name,
|
|
257
|
+
"value": value,
|
|
258
|
+
"domain": ".note.com",
|
|
259
|
+
"path": "/",
|
|
260
|
+
}
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
if self._context:
|
|
264
|
+
await self._context.add_cookies(playwright_cookies) # type: ignore[arg-type]
|
|
265
|
+
logger.info(f"Restored {len(playwright_cookies)} cookies for user: {saved_session.username}")
|
|
266
|
+
return True
|
|
267
|
+
else:
|
|
268
|
+
logger.warning("No browser context available for session restore")
|
|
269
|
+
return False
|
|
270
|
+
|
|
271
|
+
logger.info("No valid saved session found - manual login required")
|
|
272
|
+
return False
|
|
273
|
+
|
|
274
|
+
except ImportError as e:
|
|
275
|
+
logger.error(f"Session module import failed: {e}")
|
|
276
|
+
return False
|
|
277
|
+
except Exception as e:
|
|
278
|
+
logger.warning(f"Failed to restore session (will require manual login): {e}")
|
|
279
|
+
return False
|
|
280
|
+
|
|
281
|
+
async def wait_for_close(self) -> None:
|
|
282
|
+
"""Wait for browser to be closed by user."""
|
|
283
|
+
if self._page:
|
|
284
|
+
# Wait indefinitely for page close (suppress if page already closed)
|
|
285
|
+
with contextlib.suppress(Exception):
|
|
286
|
+
await self._page.wait_for_event("close", timeout=PAGE_CLOSE_WAIT_TIMEOUT_MS)
|
|
287
|
+
|
|
288
|
+
async def close(self) -> None:
|
|
289
|
+
"""Close browser and stop proxy."""
|
|
290
|
+
if self._browser:
|
|
291
|
+
await self._browser.close()
|
|
292
|
+
self._browser = None
|
|
293
|
+
|
|
294
|
+
if self._playwright:
|
|
295
|
+
await self._playwright.stop()
|
|
296
|
+
self._playwright = None
|
|
297
|
+
|
|
298
|
+
self.proxy.stop()
|
|
299
|
+
|
|
300
|
+
self._context = None
|
|
301
|
+
self._page = None
|
|
302
|
+
|
|
303
|
+
# =========================================================================
|
|
304
|
+
# Browser operation methods for MCP tools
|
|
305
|
+
# =========================================================================
|
|
306
|
+
|
|
307
|
+
async def navigate(self, url: str) -> str:
|
|
308
|
+
"""Navigate to specified URL.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
url: Target URL to navigate to
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
Navigation result with page title
|
|
315
|
+
|
|
316
|
+
Raises:
|
|
317
|
+
RuntimeError: If session not started
|
|
318
|
+
"""
|
|
319
|
+
if not self._page:
|
|
320
|
+
raise RuntimeError("Session not started")
|
|
321
|
+
await self._page.goto(url, wait_until="domcontentloaded")
|
|
322
|
+
title = await self._page.title()
|
|
323
|
+
return f"Navigated to {url}, title: {title}"
|
|
324
|
+
|
|
325
|
+
async def click(self, selector: str) -> str:
|
|
326
|
+
"""Click element by CSS selector.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
selector: CSS selector for target element
|
|
330
|
+
|
|
331
|
+
Returns:
|
|
332
|
+
Click result message
|
|
333
|
+
|
|
334
|
+
Raises:
|
|
335
|
+
RuntimeError: If session not started
|
|
336
|
+
"""
|
|
337
|
+
if not self._page:
|
|
338
|
+
raise RuntimeError("Session not started")
|
|
339
|
+
await self._page.click(selector)
|
|
340
|
+
return f"Clicked {selector}"
|
|
341
|
+
|
|
342
|
+
async def type_text(self, selector: str, text: str) -> str:
|
|
343
|
+
"""Type text into specified element.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
selector: CSS selector for input element
|
|
347
|
+
text: Text to type
|
|
348
|
+
|
|
349
|
+
Returns:
|
|
350
|
+
Type result message
|
|
351
|
+
|
|
352
|
+
Raises:
|
|
353
|
+
RuntimeError: If session not started
|
|
354
|
+
"""
|
|
355
|
+
if not self._page:
|
|
356
|
+
raise RuntimeError("Session not started")
|
|
357
|
+
await self._page.fill(selector, text)
|
|
358
|
+
return f"Typed text into {selector}"
|
|
359
|
+
|
|
360
|
+
async def screenshot(self) -> str:
|
|
361
|
+
"""Take screenshot of current page.
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
Base64-encoded PNG screenshot
|
|
365
|
+
|
|
366
|
+
Raises:
|
|
367
|
+
RuntimeError: If session not started
|
|
368
|
+
"""
|
|
369
|
+
if not self._page:
|
|
370
|
+
raise RuntimeError("Session not started")
|
|
371
|
+
screenshot_bytes = await self._page.screenshot()
|
|
372
|
+
return base64.b64encode(screenshot_bytes).decode()
|
|
373
|
+
|
|
374
|
+
async def get_page_content(self) -> str:
|
|
375
|
+
"""Get current page HTML content.
|
|
376
|
+
|
|
377
|
+
Returns:
|
|
378
|
+
Full HTML content of current page
|
|
379
|
+
|
|
380
|
+
Raises:
|
|
381
|
+
RuntimeError: If session not started
|
|
382
|
+
"""
|
|
383
|
+
if not self._page:
|
|
384
|
+
raise RuntimeError("Session not started")
|
|
385
|
+
return await self._page.content()
|
|
386
|
+
|
|
387
|
+
# =========================================================================
|
|
388
|
+
# Traffic analysis methods
|
|
389
|
+
# =========================================================================
|
|
390
|
+
|
|
391
|
+
def get_traffic(self, pattern: str | None = None) -> list[dict[str, Any]]:
|
|
392
|
+
"""Get captured traffic as list of request/response pairs.
|
|
393
|
+
|
|
394
|
+
Args:
|
|
395
|
+
pattern: Optional regex pattern to filter URLs
|
|
396
|
+
|
|
397
|
+
Returns:
|
|
398
|
+
List of traffic entries with method, url, status, etc.
|
|
399
|
+
"""
|
|
400
|
+
if not self.proxy.output_file or not self.proxy.output_file.exists():
|
|
401
|
+
return []
|
|
402
|
+
|
|
403
|
+
traffic: list[dict[str, Any]] = []
|
|
404
|
+
try:
|
|
405
|
+
# Read mitmproxy flow file using mitmdump
|
|
406
|
+
# Note: flow_detail=1 produces concise output, flow_detail=0 produces nothing
|
|
407
|
+
result = subprocess.run(
|
|
408
|
+
[
|
|
409
|
+
"uv",
|
|
410
|
+
"run",
|
|
411
|
+
"mitmdump",
|
|
412
|
+
"-r",
|
|
413
|
+
str(self.proxy.output_file),
|
|
414
|
+
"-n", # No upstream connection
|
|
415
|
+
"--set",
|
|
416
|
+
"flow_detail=1",
|
|
417
|
+
],
|
|
418
|
+
capture_output=True,
|
|
419
|
+
text=True,
|
|
420
|
+
timeout=MITMDUMP_READ_TIMEOUT_SEC,
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
# Parse output lines
|
|
424
|
+
# Format with flow_detail=1:
|
|
425
|
+
# "127.0.0.1:49898: GET https://note.com/ HTTP/2.0"
|
|
426
|
+
# " << HTTP/2.0 200 OK 38.5k"
|
|
427
|
+
current_entry: dict[str, Any] | None = None
|
|
428
|
+
for line in result.stdout.strip().split("\n"):
|
|
429
|
+
if not line:
|
|
430
|
+
continue
|
|
431
|
+
|
|
432
|
+
# Request line: contains method and URL
|
|
433
|
+
methods = [": GET ", ": POST ", ": PUT ", ": DELETE ", ": PATCH "]
|
|
434
|
+
if any(m in line for m in methods):
|
|
435
|
+
# Parse request line
|
|
436
|
+
parts = line.split(": ", 1)
|
|
437
|
+
if len(parts) >= 2:
|
|
438
|
+
request_part = parts[1]
|
|
439
|
+
req_parts = request_part.split()
|
|
440
|
+
if len(req_parts) >= 2:
|
|
441
|
+
method = req_parts[0]
|
|
442
|
+
url = req_parts[1]
|
|
443
|
+
|
|
444
|
+
# Apply pattern filter
|
|
445
|
+
if pattern and not re.search(pattern, url):
|
|
446
|
+
current_entry = None
|
|
447
|
+
continue
|
|
448
|
+
|
|
449
|
+
current_entry = {
|
|
450
|
+
"method": method,
|
|
451
|
+
"url": url,
|
|
452
|
+
"status": 0,
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
# Response line: contains status code
|
|
456
|
+
elif line.strip().startswith("<<") and current_entry:
|
|
457
|
+
# Parse response status
|
|
458
|
+
# Format: "<< HTTP/2.0 200 OK 38.5k" or "<< 200 OK 105b"
|
|
459
|
+
resp_parts = line.strip().split()
|
|
460
|
+
for part in resp_parts:
|
|
461
|
+
if part.isdigit():
|
|
462
|
+
current_entry["status"] = int(part)
|
|
463
|
+
break
|
|
464
|
+
|
|
465
|
+
traffic.append(current_entry)
|
|
466
|
+
current_entry = None
|
|
467
|
+
|
|
468
|
+
except subprocess.TimeoutExpired:
|
|
469
|
+
logger.warning("Traffic read timed out after 30 seconds")
|
|
470
|
+
except subprocess.CalledProcessError as e:
|
|
471
|
+
logger.error(f"mitmdump failed with exit code {e.returncode}: {e.stderr}")
|
|
472
|
+
except FileNotFoundError:
|
|
473
|
+
logger.error("mitmdump not found - ensure mitmproxy is installed")
|
|
474
|
+
except Exception as e:
|
|
475
|
+
logger.error(f"Failed to read traffic: {type(e).__name__}: {e}")
|
|
476
|
+
|
|
477
|
+
return traffic
|
|
478
|
+
|
|
479
|
+
def analyze_traffic(self, pattern: str, method: str | None = None) -> str:
|
|
480
|
+
"""Analyze traffic matching pattern.
|
|
481
|
+
|
|
482
|
+
Args:
|
|
483
|
+
pattern: Regex pattern to match URLs
|
|
484
|
+
method: Optional HTTP method filter
|
|
485
|
+
|
|
486
|
+
Returns:
|
|
487
|
+
Analysis result as formatted string
|
|
488
|
+
"""
|
|
489
|
+
traffic = self.get_traffic(pattern)
|
|
490
|
+
|
|
491
|
+
if method:
|
|
492
|
+
traffic = [t for t in traffic if t["method"].upper() == method.upper()]
|
|
493
|
+
|
|
494
|
+
if not traffic:
|
|
495
|
+
return f"No traffic matching pattern: {pattern}"
|
|
496
|
+
|
|
497
|
+
# Build analysis report
|
|
498
|
+
lines = [f"Traffic Analysis for pattern: {pattern}"]
|
|
499
|
+
if method:
|
|
500
|
+
lines.append(f" Method filter: {method}")
|
|
501
|
+
lines.append(f" Total requests: {len(traffic)}")
|
|
502
|
+
lines.append("")
|
|
503
|
+
|
|
504
|
+
# Group by URL
|
|
505
|
+
url_counts: dict[str, int] = {}
|
|
506
|
+
for t in traffic:
|
|
507
|
+
url = t["url"]
|
|
508
|
+
url_counts[url] = url_counts.get(url, 0) + 1
|
|
509
|
+
|
|
510
|
+
lines.append("Requests by URL:")
|
|
511
|
+
for url, count in sorted(url_counts.items(), key=lambda x: -x[1]):
|
|
512
|
+
lines.append(f" [{count}x] {url}")
|
|
513
|
+
|
|
514
|
+
return "\n".join(lines)
|
|
515
|
+
|
|
516
|
+
def export_traffic(self, output_path: str) -> str:
|
|
517
|
+
"""Export captured traffic to JSON file.
|
|
518
|
+
|
|
519
|
+
Args:
|
|
520
|
+
output_path: Path to output JSON file
|
|
521
|
+
|
|
522
|
+
Returns:
|
|
523
|
+
Export result message
|
|
524
|
+
"""
|
|
525
|
+
traffic = self.get_traffic()
|
|
526
|
+
output = Path(output_path)
|
|
527
|
+
output.parent.mkdir(parents=True, exist_ok=True)
|
|
528
|
+
|
|
529
|
+
with open(output, "w", encoding="utf-8") as f:
|
|
530
|
+
json.dump(traffic, f, ensure_ascii=False, indent=2)
|
|
531
|
+
|
|
532
|
+
return f"Exported {len(traffic)} requests to {output_path}"
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
class CaptureSessionManager:
|
|
536
|
+
"""Singleton manager for sharing CaptureSession across MCP tools.
|
|
537
|
+
|
|
538
|
+
Ensures only one capture session is active at a time
|
|
539
|
+
and provides thread-safe access.
|
|
540
|
+
"""
|
|
541
|
+
|
|
542
|
+
_instance: ClassVar[CaptureSession | None] = None
|
|
543
|
+
_lock: ClassVar[asyncio.Lock] = asyncio.Lock()
|
|
544
|
+
_domain: ClassVar[str | None] = None
|
|
545
|
+
_output_file: ClassVar[Path | None] = None
|
|
546
|
+
|
|
547
|
+
@classmethod
|
|
548
|
+
async def get_or_create(
|
|
549
|
+
cls,
|
|
550
|
+
domain: str,
|
|
551
|
+
port: int = 8080,
|
|
552
|
+
) -> CaptureSession:
|
|
553
|
+
"""Get existing session or create new one.
|
|
554
|
+
|
|
555
|
+
Args:
|
|
556
|
+
domain: Domain filter for traffic capture
|
|
557
|
+
port: Proxy port number
|
|
558
|
+
|
|
559
|
+
Returns:
|
|
560
|
+
Active CaptureSession instance
|
|
561
|
+
"""
|
|
562
|
+
async with cls._lock:
|
|
563
|
+
if cls._instance is None:
|
|
564
|
+
cls._instance = CaptureSession(port)
|
|
565
|
+
cls._domain = domain
|
|
566
|
+
# Use APP_DATA_DIR environment variable with fallback to /app/data
|
|
567
|
+
data_dir = Path(os.environ.get("APP_DATA_DIR", "/app/data"))
|
|
568
|
+
data_dir.mkdir(parents=True, exist_ok=True)
|
|
569
|
+
cls._output_file = data_dir / f"capture_{int(time.time())}.flow"
|
|
570
|
+
await cls._instance.start(
|
|
571
|
+
output=cls._output_file,
|
|
572
|
+
domain_filter=domain,
|
|
573
|
+
restore_session=True,
|
|
574
|
+
)
|
|
575
|
+
return cls._instance
|
|
576
|
+
|
|
577
|
+
@classmethod
|
|
578
|
+
async def get_active_session(cls) -> CaptureSession | None:
|
|
579
|
+
"""Get active session if exists (thread-safe).
|
|
580
|
+
|
|
581
|
+
Returns:
|
|
582
|
+
Active CaptureSession instance or None if no session is active
|
|
583
|
+
"""
|
|
584
|
+
async with cls._lock:
|
|
585
|
+
return cls._instance
|
|
586
|
+
|
|
587
|
+
@classmethod
|
|
588
|
+
async def close(cls) -> None:
|
|
589
|
+
"""Close active session if exists."""
|
|
590
|
+
async with cls._lock:
|
|
591
|
+
if cls._instance:
|
|
592
|
+
await cls._instance.close()
|
|
593
|
+
cls._instance = None
|
|
594
|
+
cls._domain = None
|
|
595
|
+
cls._output_file = None
|
|
596
|
+
|
|
597
|
+
@classmethod
|
|
598
|
+
def get_status(cls) -> dict[str, Any]:
|
|
599
|
+
"""Get current session status.
|
|
600
|
+
|
|
601
|
+
Returns:
|
|
602
|
+
Status dict with active flag, domain, and output file
|
|
603
|
+
"""
|
|
604
|
+
if cls._instance is None:
|
|
605
|
+
return {"active": False}
|
|
606
|
+
|
|
607
|
+
return {
|
|
608
|
+
"active": True,
|
|
609
|
+
"domain": cls._domain,
|
|
610
|
+
"output_file": str(cls._output_file) if cls._output_file else None,
|
|
611
|
+
"proxy_running": cls._instance.proxy.is_running(),
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
async def run_capture_session(
|
|
616
|
+
output: Path,
|
|
617
|
+
initial_url: str = "https://note.com",
|
|
618
|
+
proxy_port: int = 8080,
|
|
619
|
+
domain_filter: str | None = None,
|
|
620
|
+
restore_session: bool = True,
|
|
621
|
+
) -> None:
|
|
622
|
+
"""Run an interactive capture session.
|
|
623
|
+
|
|
624
|
+
Starts proxy and browser, navigates to initial URL,
|
|
625
|
+
then waits for user to close browser.
|
|
626
|
+
|
|
627
|
+
Args:
|
|
628
|
+
output: Path to save captured traffic
|
|
629
|
+
initial_url: URL to navigate to initially
|
|
630
|
+
proxy_port: Port for proxy server
|
|
631
|
+
domain_filter: Optional domain to filter traffic
|
|
632
|
+
restore_session: Whether to restore saved session cookies
|
|
633
|
+
"""
|
|
634
|
+
session = CaptureSession(proxy_port)
|
|
635
|
+
|
|
636
|
+
try:
|
|
637
|
+
page = await session.start(output, domain_filter, restore_session)
|
|
638
|
+
|
|
639
|
+
# Try to navigate, but don't fail if it times out
|
|
640
|
+
# User can manually navigate if automatic navigation fails
|
|
641
|
+
try:
|
|
642
|
+
await page.goto(initial_url, timeout=PAGE_NAVIGATION_TIMEOUT_MS, wait_until="domcontentloaded")
|
|
643
|
+
except Exception as nav_error:
|
|
644
|
+
import logging
|
|
645
|
+
|
|
646
|
+
logging.warning(f"Auto-navigation failed: {nav_error}")
|
|
647
|
+
logging.info("Please navigate manually in the browser")
|
|
648
|
+
|
|
649
|
+
# Wait for user to close browser
|
|
650
|
+
await session.wait_for_close()
|
|
651
|
+
|
|
652
|
+
finally:
|
|
653
|
+
await session.close()
|