note-connector 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/paths.js +4 -0
  2. package/dist/setup-dependencies.js +56 -13
  3. package/package.json +3 -2
  4. package/py/pyproject.toml +86 -0
  5. package/py/src/note_mcp/__init__.py +7 -0
  6. package/py/src/note_mcp/__main__.py +65 -0
  7. package/py/src/note_mcp/api/__init__.py +31 -0
  8. package/py/src/note_mcp/api/articles.py +1395 -0
  9. package/py/src/note_mcp/api/client.py +318 -0
  10. package/py/src/note_mcp/api/embeds.py +482 -0
  11. package/py/src/note_mcp/api/images.py +660 -0
  12. package/py/src/note_mcp/api/preview.py +142 -0
  13. package/py/src/note_mcp/api/public_notes.py +150 -0
  14. package/py/src/note_mcp/auth/__init__.py +9 -0
  15. package/py/src/note_mcp/auth/browser.py +574 -0
  16. package/py/src/note_mcp/auth/file_session.py +145 -0
  17. package/py/src/note_mcp/auth/session.py +240 -0
  18. package/py/src/note_mcp/browser/__init__.py +10 -0
  19. package/py/src/note_mcp/browser/config.py +21 -0
  20. package/py/src/note_mcp/browser/manager.py +182 -0
  21. package/py/src/note_mcp/browser/preview.py +68 -0
  22. package/py/src/note_mcp/browser/url_helpers.py +18 -0
  23. package/py/src/note_mcp/chatgpt/__init__.py +1 -0
  24. package/py/src/note_mcp/chatgpt/__main__.py +63 -0
  25. package/py/src/note_mcp/chatgpt/access_log.py +25 -0
  26. package/py/src/note_mcp/chatgpt/auth.py +52 -0
  27. package/py/src/note_mcp/chatgpt/images.py +92 -0
  28. package/py/src/note_mcp/chatgpt/login_once.py +26 -0
  29. package/py/src/note_mcp/chatgpt/middleware.py +31 -0
  30. package/py/src/note_mcp/chatgpt/tools.py +255 -0
  31. package/py/src/note_mcp/chatgpt/widgets.py +121 -0
  32. package/py/src/note_mcp/decorators.py +113 -0
  33. package/py/src/note_mcp/investigator/__init__.py +33 -0
  34. package/py/src/note_mcp/investigator/__main__.py +11 -0
  35. package/py/src/note_mcp/investigator/cli.py +313 -0
  36. package/py/src/note_mcp/investigator/core.py +653 -0
  37. package/py/src/note_mcp/investigator/mcp_tools.py +225 -0
  38. package/py/src/note_mcp/models.py +562 -0
  39. package/py/src/note_mcp/py.typed +0 -0
  40. package/py/src/note_mcp/server.py +944 -0
  41. package/py/src/note_mcp/utils/__init__.py +7 -0
  42. package/py/src/note_mcp/utils/file_parser.py +314 -0
  43. package/py/src/note_mcp/utils/html_to_markdown.py +477 -0
  44. package/py/src/note_mcp/utils/logging.py +119 -0
  45. package/py/src/note_mcp/utils/markdown.py +12 -0
  46. package/py/src/note_mcp/utils/markdown_to_html.py +826 -0
@@ -0,0 +1,142 @@
1
+ """Preview API functions for note.com.
2
+
3
+ Provides functionality to get preview access tokens
4
+ and fetch preview page HTML.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import logging
11
+ from typing import TYPE_CHECKING
12
+
13
+ import httpx
14
+
15
+ from note_mcp.api.articles import build_preview_url, get_preview_access_token
16
+ from note_mcp.models import ErrorCode, NoteAPIError
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ if TYPE_CHECKING:
21
+ from note_mcp.models import Session
22
+
23
+
24
+ # Re-export for convenience
25
+ __all__ = ["get_preview_access_token", "build_preview_url", "get_preview_html"]
26
+
27
+ # Common User-Agent string for API requests
28
+ USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"
29
+
30
+ # Retry configuration for transient errors
31
+ MAX_TRANSIENT_RETRIES = 3 # Maximum retries for transient errors (502/503/504)
32
+ BASE_DELAY = 0.5 # Initial backoff delay in seconds
33
+ MAX_DELAY = 4.0 # Maximum backoff delay in seconds
34
+
35
+
36
+ async def get_preview_html(
37
+ session: Session,
38
+ article_key: str,
39
+ ) -> str:
40
+ """Fetch preview page HTML for an article.
41
+
42
+ Gets preview access token via API and fetches the preview page HTML.
43
+ Useful for E2E testing and content verification.
44
+
45
+ Retry behavior:
46
+ - Authentication errors (401/403): Retries once with a fresh token
47
+ - Transient server errors (502/503/504): Retries with exponential backoff
48
+
49
+ Args:
50
+ session: Authenticated session
51
+ article_key: Article key (e.g., "n1234567890ab")
52
+
53
+ Returns:
54
+ Preview page HTML as string
55
+
56
+ Raises:
57
+ NoteAPIError: If token fetch or HTML fetch fails after all retries
58
+ """
59
+ # Build cookie header
60
+ cookie_parts = [f"{k}={v}" for k, v in session.cookies.items()]
61
+ cookies_header = "; ".join(cookie_parts)
62
+
63
+ # HTTP headers for requests
64
+ headers = {
65
+ "Cookie": cookies_header,
66
+ "User-Agent": USER_AGENT,
67
+ }
68
+
69
+ # Auth error status codes that trigger token refresh retry
70
+ auth_error_codes = {401, 403}
71
+
72
+ # Transient server error codes that trigger backoff retry
73
+ transient_error_codes = {502, 503, 504}
74
+
75
+ last_response: httpx.Response | None = None
76
+ auth_retry_used = False
77
+ transient_retry_count = 0
78
+
79
+ while True:
80
+ # Get preview access token via API
81
+ access_token = await get_preview_access_token(session, article_key)
82
+
83
+ # Build preview URL
84
+ preview_url = build_preview_url(article_key, access_token)
85
+
86
+ # Fetch HTML via httpx
87
+ async with httpx.AsyncClient() as client:
88
+ response = await client.get(
89
+ preview_url,
90
+ headers=headers,
91
+ follow_redirects=True,
92
+ )
93
+
94
+ if response.is_success:
95
+ return response.text
96
+
97
+ last_response = response
98
+ status_code = response.status_code
99
+
100
+ # Handle auth errors: retry once with fresh token
101
+ if status_code in auth_error_codes and not auth_retry_used:
102
+ logger.warning(
103
+ "Preview HTML fetch got auth error %d, retrying with fresh token",
104
+ status_code,
105
+ )
106
+ auth_retry_used = True
107
+ continue
108
+
109
+ # Handle transient server errors: retry with exponential backoff
110
+ if status_code in transient_error_codes and transient_retry_count < MAX_TRANSIENT_RETRIES:
111
+ delay = min(BASE_DELAY * (2**transient_retry_count), MAX_DELAY)
112
+ logger.warning(
113
+ "Preview HTML fetch got transient error %d, retrying in %.1fs (%d/%d)",
114
+ status_code,
115
+ delay,
116
+ transient_retry_count + 1,
117
+ MAX_TRANSIENT_RETRIES,
118
+ )
119
+ await asyncio.sleep(delay)
120
+ transient_retry_count += 1
121
+ continue
122
+
123
+ # No more retries available
124
+ break
125
+
126
+ # All attempts failed
127
+ assert last_response is not None
128
+
129
+ # Use NOT_AUTHENTICATED for 401 errors, API_ERROR for others
130
+ error_code = ErrorCode.NOT_AUTHENTICATED if last_response.status_code == 401 else ErrorCode.API_ERROR
131
+
132
+ raise NoteAPIError(
133
+ code=error_code,
134
+ message=f"Failed to fetch preview HTML. Status: {last_response.status_code}",
135
+ details={
136
+ "article_key": article_key,
137
+ "status_code": last_response.status_code,
138
+ "response_text": last_response.text[:500] if last_response.text else "(empty)",
139
+ "auth_retry_used": auth_retry_used,
140
+ "transient_retry_count": transient_retry_count,
141
+ },
142
+ )
@@ -0,0 +1,150 @@
1
+ """Fetch and search public note.com articles (no login required)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from typing import Any
7
+ from urllib.parse import quote
8
+
9
+ import httpx
10
+
11
+ from note_mcp.models import ErrorCode, NoteAPIError, PublicArticle, PublicArticleSummary, PublicSearchResult
12
+ from note_mcp.utils.html_to_markdown import html_to_markdown
13
+
14
+ NOTE_API_BASE = "https://note.com/api"
15
+ USER_AGENT = "Mozilla/5.0 (compatible; note-connector/1.0)"
16
+ NOTE_URL_PATTERN = re.compile(r"^https?://(?:www\.)?note\.com/(?P<user>[a-zA-Z0-9_-]+)/n/(?P<key>n[a-z0-9]+)/?$")
17
+ NOTE_KEY_PATTERN = re.compile(r"^n[a-z0-9]+$")
18
+
19
+
20
+ def extract_note_key_from_url(url: str) -> str:
21
+ """Extract article key from a public note.com URL."""
22
+ match = NOTE_URL_PATTERN.match(url.strip())
23
+ if not match:
24
+ raise NoteAPIError(
25
+ code=ErrorCode.INVALID_INPUT,
26
+ message=f"Invalid note.com article URL: {url}",
27
+ details={"url": url},
28
+ )
29
+ return match.group("key")
30
+
31
+
32
+ def _normalize_key(note_key_or_url: str) -> str:
33
+ text = note_key_or_url.strip()
34
+ if text.startswith("http"):
35
+ return extract_note_key_from_url(text)
36
+ if NOTE_KEY_PATTERN.match(text):
37
+ return text
38
+ raise NoteAPIError(
39
+ code=ErrorCode.INVALID_INPUT,
40
+ message="Provide note key (n...) or https://note.com/user/n/n... URL",
41
+ details={"input": note_key_or_url},
42
+ )
43
+
44
+
45
+ def _public_article_url(username: str, key: str) -> str:
46
+ return f"https://note.com/{username}/n/{key}"
47
+
48
+
49
+ async def fetch_public_article(note_key_or_url: str) -> PublicArticle:
50
+ """Fetch a published article by key or public URL."""
51
+ key = _normalize_key(note_key_or_url)
52
+ url = f"{NOTE_API_BASE}/v3/notes/{key}"
53
+ async with httpx.AsyncClient(timeout=30.0) as client:
54
+ response = await client.get(url, headers={"Accept": "application/json", "User-Agent": USER_AGENT})
55
+ if response.status_code == 404:
56
+ raise NoteAPIError(
57
+ code=ErrorCode.INVALID_INPUT,
58
+ message=f"Article not found or not public: {key}",
59
+ details={"key": key},
60
+ )
61
+ if response.status_code != 200:
62
+ raise NoteAPIError(
63
+ code=ErrorCode.API_ERROR,
64
+ message=f"Failed to fetch public article: HTTP {response.status_code}",
65
+ details={"key": key, "status": response.status_code},
66
+ )
67
+ payload: dict[str, Any] = response.json()
68
+ data_raw = payload.get("data")
69
+ if not isinstance(data_raw, dict):
70
+ data: dict[str, Any] = {}
71
+ else:
72
+ data = data_raw
73
+ if not data:
74
+ raise NoteAPIError(
75
+ code=ErrorCode.API_ERROR,
76
+ message="Invalid API response for public article",
77
+ details={"key": key},
78
+ )
79
+ user_raw = data.get("user")
80
+ user: dict[str, Any] = user_raw if isinstance(user_raw, dict) else {}
81
+ username = str(user.get("urlname") or "")
82
+ if not username:
83
+ raise NoteAPIError(
84
+ code=ErrorCode.API_ERROR,
85
+ message="Public article response missing author urlname",
86
+ details={"key": key},
87
+ )
88
+ body_html = str(data.get("body") or "")
89
+ return PublicArticle(
90
+ key=str(data.get("key") or key),
91
+ title=str(data.get("name") or ""),
92
+ body_markdown=html_to_markdown(body_html),
93
+ author_username=username,
94
+ author_nickname=str(user.get("nickname")) if user.get("nickname") else None,
95
+ url=_public_article_url(username, str(data.get("key") or key)),
96
+ status=str(data.get("status") or "published"),
97
+ )
98
+
99
+
100
+ async def search_public_notes(query: str, *, size: int = 10) -> PublicSearchResult:
101
+ """Search published notes on note.com."""
102
+ if not query.strip():
103
+ raise NoteAPIError(
104
+ code=ErrorCode.INVALID_INPUT,
105
+ message="Search query must not be empty",
106
+ details={},
107
+ )
108
+ size_clamped = max(1, min(size, 20))
109
+ url = f"{NOTE_API_BASE}/v3/searches?context=note&q={quote(query)}&size={size_clamped}"
110
+ async with httpx.AsyncClient(timeout=30.0) as client:
111
+ response = await client.get(url, headers={"Accept": "application/json", "User-Agent": USER_AGENT})
112
+ if response.status_code != 200:
113
+ raise NoteAPIError(
114
+ code=ErrorCode.API_ERROR,
115
+ message=f"Search failed: HTTP {response.status_code}",
116
+ details={"query": query, "status": response.status_code},
117
+ )
118
+ payload_search: dict[str, Any] = response.json()
119
+ data_search = payload_search.get("data")
120
+ search_data: dict[str, Any] = data_search if isinstance(data_search, dict) else {}
121
+ notes_block_raw = search_data.get("notes")
122
+ notes_block: dict[str, Any] = notes_block_raw if isinstance(notes_block_raw, dict) else {}
123
+ contents: list[Any] = []
124
+ raw = notes_block.get("contents")
125
+ if isinstance(raw, list):
126
+ contents = raw
127
+ items: list[PublicArticleSummary] = []
128
+ for item in contents:
129
+ if not isinstance(item, dict):
130
+ continue
131
+ user_item_raw = item.get("user")
132
+ user_item: dict[str, Any] = user_item_raw if isinstance(user_item_raw, dict) else {}
133
+ username = str(user_item.get("urlname") or "")
134
+ key = str(item.get("key") or "")
135
+ if not username or not key:
136
+ continue
137
+ items.append(
138
+ PublicArticleSummary(
139
+ key=key,
140
+ title=str(item.get("name") or ""),
141
+ author_username=username,
142
+ author_nickname=str(user_item.get("nickname")) if user_item.get("nickname") else None,
143
+ url=_public_article_url(username, key),
144
+ published_at=str(item.get("publish_at")) if item.get("publish_at") else None,
145
+ )
146
+ )
147
+ is_last = None
148
+ if isinstance(notes_block, dict) and "is_last_page" in notes_block:
149
+ is_last = bool(notes_block.get("is_last_page"))
150
+ return PublicSearchResult(items=items, query=query, is_last_page=is_last)
@@ -0,0 +1,9 @@
1
+ """Authentication module for note-mcp.
2
+
3
+ Provides session management and browser-based login functionality.
4
+ """
5
+
6
+ from note_mcp.auth.browser import login_with_browser
7
+ from note_mcp.auth.session import KeyringError, SessionManager
8
+
9
+ __all__ = ["SessionManager", "KeyringError", "login_with_browser"]