note-connector 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/paths.js +4 -0
- package/dist/setup-dependencies.js +56 -13
- package/package.json +3 -2
- package/py/pyproject.toml +86 -0
- package/py/src/note_mcp/__init__.py +7 -0
- package/py/src/note_mcp/__main__.py +65 -0
- package/py/src/note_mcp/api/__init__.py +31 -0
- package/py/src/note_mcp/api/articles.py +1395 -0
- package/py/src/note_mcp/api/client.py +318 -0
- package/py/src/note_mcp/api/embeds.py +482 -0
- package/py/src/note_mcp/api/images.py +660 -0
- package/py/src/note_mcp/api/preview.py +142 -0
- package/py/src/note_mcp/api/public_notes.py +150 -0
- package/py/src/note_mcp/auth/__init__.py +9 -0
- package/py/src/note_mcp/auth/browser.py +574 -0
- package/py/src/note_mcp/auth/file_session.py +145 -0
- package/py/src/note_mcp/auth/session.py +240 -0
- package/py/src/note_mcp/browser/__init__.py +10 -0
- package/py/src/note_mcp/browser/config.py +21 -0
- package/py/src/note_mcp/browser/manager.py +182 -0
- package/py/src/note_mcp/browser/preview.py +68 -0
- package/py/src/note_mcp/browser/url_helpers.py +18 -0
- package/py/src/note_mcp/chatgpt/__init__.py +1 -0
- package/py/src/note_mcp/chatgpt/__main__.py +63 -0
- package/py/src/note_mcp/chatgpt/access_log.py +25 -0
- package/py/src/note_mcp/chatgpt/auth.py +52 -0
- package/py/src/note_mcp/chatgpt/images.py +92 -0
- package/py/src/note_mcp/chatgpt/login_once.py +26 -0
- package/py/src/note_mcp/chatgpt/middleware.py +31 -0
- package/py/src/note_mcp/chatgpt/tools.py +255 -0
- package/py/src/note_mcp/chatgpt/widgets.py +121 -0
- package/py/src/note_mcp/decorators.py +113 -0
- package/py/src/note_mcp/investigator/__init__.py +33 -0
- package/py/src/note_mcp/investigator/__main__.py +11 -0
- package/py/src/note_mcp/investigator/cli.py +313 -0
- package/py/src/note_mcp/investigator/core.py +653 -0
- package/py/src/note_mcp/investigator/mcp_tools.py +225 -0
- package/py/src/note_mcp/models.py +562 -0
- package/py/src/note_mcp/py.typed +0 -0
- package/py/src/note_mcp/server.py +944 -0
- package/py/src/note_mcp/utils/__init__.py +7 -0
- package/py/src/note_mcp/utils/file_parser.py +314 -0
- package/py/src/note_mcp/utils/html_to_markdown.py +477 -0
- package/py/src/note_mcp/utils/logging.py +119 -0
- package/py/src/note_mcp/utils/markdown.py +12 -0
- package/py/src/note_mcp/utils/markdown_to_html.py +826 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Preview API functions for note.com.
|
|
2
|
+
|
|
3
|
+
Provides functionality to get preview access tokens
|
|
4
|
+
and fetch preview page HTML.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import logging
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
import httpx
|
|
14
|
+
|
|
15
|
+
from note_mcp.api.articles import build_preview_url, get_preview_access_token
|
|
16
|
+
from note_mcp.models import ErrorCode, NoteAPIError
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from note_mcp.models import Session
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Re-export for convenience
|
|
25
|
+
__all__ = ["get_preview_access_token", "build_preview_url", "get_preview_html"]
|
|
26
|
+
|
|
27
|
+
# Common User-Agent string for API requests
|
|
28
|
+
USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"
|
|
29
|
+
|
|
30
|
+
# Retry configuration for transient errors
|
|
31
|
+
MAX_TRANSIENT_RETRIES = 3 # Maximum retries for transient errors (502/503/504)
|
|
32
|
+
BASE_DELAY = 0.5 # Initial backoff delay in seconds
|
|
33
|
+
MAX_DELAY = 4.0 # Maximum backoff delay in seconds
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
async def get_preview_html(
|
|
37
|
+
session: Session,
|
|
38
|
+
article_key: str,
|
|
39
|
+
) -> str:
|
|
40
|
+
"""Fetch preview page HTML for an article.
|
|
41
|
+
|
|
42
|
+
Gets preview access token via API and fetches the preview page HTML.
|
|
43
|
+
Useful for E2E testing and content verification.
|
|
44
|
+
|
|
45
|
+
Retry behavior:
|
|
46
|
+
- Authentication errors (401/403): Retries once with a fresh token
|
|
47
|
+
- Transient server errors (502/503/504): Retries with exponential backoff
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
session: Authenticated session
|
|
51
|
+
article_key: Article key (e.g., "n1234567890ab")
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Preview page HTML as string
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
NoteAPIError: If token fetch or HTML fetch fails after all retries
|
|
58
|
+
"""
|
|
59
|
+
# Build cookie header
|
|
60
|
+
cookie_parts = [f"{k}={v}" for k, v in session.cookies.items()]
|
|
61
|
+
cookies_header = "; ".join(cookie_parts)
|
|
62
|
+
|
|
63
|
+
# HTTP headers for requests
|
|
64
|
+
headers = {
|
|
65
|
+
"Cookie": cookies_header,
|
|
66
|
+
"User-Agent": USER_AGENT,
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
# Auth error status codes that trigger token refresh retry
|
|
70
|
+
auth_error_codes = {401, 403}
|
|
71
|
+
|
|
72
|
+
# Transient server error codes that trigger backoff retry
|
|
73
|
+
transient_error_codes = {502, 503, 504}
|
|
74
|
+
|
|
75
|
+
last_response: httpx.Response | None = None
|
|
76
|
+
auth_retry_used = False
|
|
77
|
+
transient_retry_count = 0
|
|
78
|
+
|
|
79
|
+
while True:
|
|
80
|
+
# Get preview access token via API
|
|
81
|
+
access_token = await get_preview_access_token(session, article_key)
|
|
82
|
+
|
|
83
|
+
# Build preview URL
|
|
84
|
+
preview_url = build_preview_url(article_key, access_token)
|
|
85
|
+
|
|
86
|
+
# Fetch HTML via httpx
|
|
87
|
+
async with httpx.AsyncClient() as client:
|
|
88
|
+
response = await client.get(
|
|
89
|
+
preview_url,
|
|
90
|
+
headers=headers,
|
|
91
|
+
follow_redirects=True,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
if response.is_success:
|
|
95
|
+
return response.text
|
|
96
|
+
|
|
97
|
+
last_response = response
|
|
98
|
+
status_code = response.status_code
|
|
99
|
+
|
|
100
|
+
# Handle auth errors: retry once with fresh token
|
|
101
|
+
if status_code in auth_error_codes and not auth_retry_used:
|
|
102
|
+
logger.warning(
|
|
103
|
+
"Preview HTML fetch got auth error %d, retrying with fresh token",
|
|
104
|
+
status_code,
|
|
105
|
+
)
|
|
106
|
+
auth_retry_used = True
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
# Handle transient server errors: retry with exponential backoff
|
|
110
|
+
if status_code in transient_error_codes and transient_retry_count < MAX_TRANSIENT_RETRIES:
|
|
111
|
+
delay = min(BASE_DELAY * (2**transient_retry_count), MAX_DELAY)
|
|
112
|
+
logger.warning(
|
|
113
|
+
"Preview HTML fetch got transient error %d, retrying in %.1fs (%d/%d)",
|
|
114
|
+
status_code,
|
|
115
|
+
delay,
|
|
116
|
+
transient_retry_count + 1,
|
|
117
|
+
MAX_TRANSIENT_RETRIES,
|
|
118
|
+
)
|
|
119
|
+
await asyncio.sleep(delay)
|
|
120
|
+
transient_retry_count += 1
|
|
121
|
+
continue
|
|
122
|
+
|
|
123
|
+
# No more retries available
|
|
124
|
+
break
|
|
125
|
+
|
|
126
|
+
# All attempts failed
|
|
127
|
+
assert last_response is not None
|
|
128
|
+
|
|
129
|
+
# Use NOT_AUTHENTICATED for 401 errors, API_ERROR for others
|
|
130
|
+
error_code = ErrorCode.NOT_AUTHENTICATED if last_response.status_code == 401 else ErrorCode.API_ERROR
|
|
131
|
+
|
|
132
|
+
raise NoteAPIError(
|
|
133
|
+
code=error_code,
|
|
134
|
+
message=f"Failed to fetch preview HTML. Status: {last_response.status_code}",
|
|
135
|
+
details={
|
|
136
|
+
"article_key": article_key,
|
|
137
|
+
"status_code": last_response.status_code,
|
|
138
|
+
"response_text": last_response.text[:500] if last_response.text else "(empty)",
|
|
139
|
+
"auth_retry_used": auth_retry_used,
|
|
140
|
+
"transient_retry_count": transient_retry_count,
|
|
141
|
+
},
|
|
142
|
+
)
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""Fetch and search public note.com articles (no login required)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from typing import Any
|
|
7
|
+
from urllib.parse import quote
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
|
|
11
|
+
from note_mcp.models import ErrorCode, NoteAPIError, PublicArticle, PublicArticleSummary, PublicSearchResult
|
|
12
|
+
from note_mcp.utils.html_to_markdown import html_to_markdown
|
|
13
|
+
|
|
14
|
+
NOTE_API_BASE = "https://note.com/api"
|
|
15
|
+
USER_AGENT = "Mozilla/5.0 (compatible; note-connector/1.0)"
|
|
16
|
+
NOTE_URL_PATTERN = re.compile(r"^https?://(?:www\.)?note\.com/(?P<user>[a-zA-Z0-9_-]+)/n/(?P<key>n[a-z0-9]+)/?$")
|
|
17
|
+
NOTE_KEY_PATTERN = re.compile(r"^n[a-z0-9]+$")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def extract_note_key_from_url(url: str) -> str:
|
|
21
|
+
"""Extract article key from a public note.com URL."""
|
|
22
|
+
match = NOTE_URL_PATTERN.match(url.strip())
|
|
23
|
+
if not match:
|
|
24
|
+
raise NoteAPIError(
|
|
25
|
+
code=ErrorCode.INVALID_INPUT,
|
|
26
|
+
message=f"Invalid note.com article URL: {url}",
|
|
27
|
+
details={"url": url},
|
|
28
|
+
)
|
|
29
|
+
return match.group("key")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _normalize_key(note_key_or_url: str) -> str:
|
|
33
|
+
text = note_key_or_url.strip()
|
|
34
|
+
if text.startswith("http"):
|
|
35
|
+
return extract_note_key_from_url(text)
|
|
36
|
+
if NOTE_KEY_PATTERN.match(text):
|
|
37
|
+
return text
|
|
38
|
+
raise NoteAPIError(
|
|
39
|
+
code=ErrorCode.INVALID_INPUT,
|
|
40
|
+
message="Provide note key (n...) or https://note.com/user/n/n... URL",
|
|
41
|
+
details={"input": note_key_or_url},
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _public_article_url(username: str, key: str) -> str:
|
|
46
|
+
return f"https://note.com/{username}/n/{key}"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
async def fetch_public_article(note_key_or_url: str) -> PublicArticle:
|
|
50
|
+
"""Fetch a published article by key or public URL."""
|
|
51
|
+
key = _normalize_key(note_key_or_url)
|
|
52
|
+
url = f"{NOTE_API_BASE}/v3/notes/{key}"
|
|
53
|
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
54
|
+
response = await client.get(url, headers={"Accept": "application/json", "User-Agent": USER_AGENT})
|
|
55
|
+
if response.status_code == 404:
|
|
56
|
+
raise NoteAPIError(
|
|
57
|
+
code=ErrorCode.INVALID_INPUT,
|
|
58
|
+
message=f"Article not found or not public: {key}",
|
|
59
|
+
details={"key": key},
|
|
60
|
+
)
|
|
61
|
+
if response.status_code != 200:
|
|
62
|
+
raise NoteAPIError(
|
|
63
|
+
code=ErrorCode.API_ERROR,
|
|
64
|
+
message=f"Failed to fetch public article: HTTP {response.status_code}",
|
|
65
|
+
details={"key": key, "status": response.status_code},
|
|
66
|
+
)
|
|
67
|
+
payload: dict[str, Any] = response.json()
|
|
68
|
+
data_raw = payload.get("data")
|
|
69
|
+
if not isinstance(data_raw, dict):
|
|
70
|
+
data: dict[str, Any] = {}
|
|
71
|
+
else:
|
|
72
|
+
data = data_raw
|
|
73
|
+
if not data:
|
|
74
|
+
raise NoteAPIError(
|
|
75
|
+
code=ErrorCode.API_ERROR,
|
|
76
|
+
message="Invalid API response for public article",
|
|
77
|
+
details={"key": key},
|
|
78
|
+
)
|
|
79
|
+
user_raw = data.get("user")
|
|
80
|
+
user: dict[str, Any] = user_raw if isinstance(user_raw, dict) else {}
|
|
81
|
+
username = str(user.get("urlname") or "")
|
|
82
|
+
if not username:
|
|
83
|
+
raise NoteAPIError(
|
|
84
|
+
code=ErrorCode.API_ERROR,
|
|
85
|
+
message="Public article response missing author urlname",
|
|
86
|
+
details={"key": key},
|
|
87
|
+
)
|
|
88
|
+
body_html = str(data.get("body") or "")
|
|
89
|
+
return PublicArticle(
|
|
90
|
+
key=str(data.get("key") or key),
|
|
91
|
+
title=str(data.get("name") or ""),
|
|
92
|
+
body_markdown=html_to_markdown(body_html),
|
|
93
|
+
author_username=username,
|
|
94
|
+
author_nickname=str(user.get("nickname")) if user.get("nickname") else None,
|
|
95
|
+
url=_public_article_url(username, str(data.get("key") or key)),
|
|
96
|
+
status=str(data.get("status") or "published"),
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
async def search_public_notes(query: str, *, size: int = 10) -> PublicSearchResult:
|
|
101
|
+
"""Search published notes on note.com."""
|
|
102
|
+
if not query.strip():
|
|
103
|
+
raise NoteAPIError(
|
|
104
|
+
code=ErrorCode.INVALID_INPUT,
|
|
105
|
+
message="Search query must not be empty",
|
|
106
|
+
details={},
|
|
107
|
+
)
|
|
108
|
+
size_clamped = max(1, min(size, 20))
|
|
109
|
+
url = f"{NOTE_API_BASE}/v3/searches?context=note&q={quote(query)}&size={size_clamped}"
|
|
110
|
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
111
|
+
response = await client.get(url, headers={"Accept": "application/json", "User-Agent": USER_AGENT})
|
|
112
|
+
if response.status_code != 200:
|
|
113
|
+
raise NoteAPIError(
|
|
114
|
+
code=ErrorCode.API_ERROR,
|
|
115
|
+
message=f"Search failed: HTTP {response.status_code}",
|
|
116
|
+
details={"query": query, "status": response.status_code},
|
|
117
|
+
)
|
|
118
|
+
payload_search: dict[str, Any] = response.json()
|
|
119
|
+
data_search = payload_search.get("data")
|
|
120
|
+
search_data: dict[str, Any] = data_search if isinstance(data_search, dict) else {}
|
|
121
|
+
notes_block_raw = search_data.get("notes")
|
|
122
|
+
notes_block: dict[str, Any] = notes_block_raw if isinstance(notes_block_raw, dict) else {}
|
|
123
|
+
contents: list[Any] = []
|
|
124
|
+
raw = notes_block.get("contents")
|
|
125
|
+
if isinstance(raw, list):
|
|
126
|
+
contents = raw
|
|
127
|
+
items: list[PublicArticleSummary] = []
|
|
128
|
+
for item in contents:
|
|
129
|
+
if not isinstance(item, dict):
|
|
130
|
+
continue
|
|
131
|
+
user_item_raw = item.get("user")
|
|
132
|
+
user_item: dict[str, Any] = user_item_raw if isinstance(user_item_raw, dict) else {}
|
|
133
|
+
username = str(user_item.get("urlname") or "")
|
|
134
|
+
key = str(item.get("key") or "")
|
|
135
|
+
if not username or not key:
|
|
136
|
+
continue
|
|
137
|
+
items.append(
|
|
138
|
+
PublicArticleSummary(
|
|
139
|
+
key=key,
|
|
140
|
+
title=str(item.get("name") or ""),
|
|
141
|
+
author_username=username,
|
|
142
|
+
author_nickname=str(user_item.get("nickname")) if user_item.get("nickname") else None,
|
|
143
|
+
url=_public_article_url(username, key),
|
|
144
|
+
published_at=str(item.get("publish_at")) if item.get("publish_at") else None,
|
|
145
|
+
)
|
|
146
|
+
)
|
|
147
|
+
is_last = None
|
|
148
|
+
if isinstance(notes_block, dict) and "is_last_page" in notes_block:
|
|
149
|
+
is_last = bool(notes_block.get("is_last_page"))
|
|
150
|
+
return PublicSearchResult(items=items, query=query, is_last_page=is_last)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Authentication module for note-mcp.
|
|
2
|
+
|
|
3
|
+
Provides session management and browser-based login functionality.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from note_mcp.auth.browser import login_with_browser
|
|
7
|
+
from note_mcp.auth.session import KeyringError, SessionManager
|
|
8
|
+
|
|
9
|
+
__all__ = ["SessionManager", "KeyringError", "login_with_browser"]
|