note-connector 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/paths.js +4 -0
- package/dist/setup-dependencies.js +56 -13
- package/package.json +3 -2
- package/py/pyproject.toml +86 -0
- package/py/src/note_mcp/__init__.py +7 -0
- package/py/src/note_mcp/__main__.py +65 -0
- package/py/src/note_mcp/api/__init__.py +31 -0
- package/py/src/note_mcp/api/articles.py +1395 -0
- package/py/src/note_mcp/api/client.py +318 -0
- package/py/src/note_mcp/api/embeds.py +482 -0
- package/py/src/note_mcp/api/images.py +660 -0
- package/py/src/note_mcp/api/preview.py +142 -0
- package/py/src/note_mcp/api/public_notes.py +150 -0
- package/py/src/note_mcp/auth/__init__.py +9 -0
- package/py/src/note_mcp/auth/browser.py +574 -0
- package/py/src/note_mcp/auth/file_session.py +145 -0
- package/py/src/note_mcp/auth/session.py +240 -0
- package/py/src/note_mcp/browser/__init__.py +10 -0
- package/py/src/note_mcp/browser/config.py +21 -0
- package/py/src/note_mcp/browser/manager.py +182 -0
- package/py/src/note_mcp/browser/preview.py +68 -0
- package/py/src/note_mcp/browser/url_helpers.py +18 -0
- package/py/src/note_mcp/chatgpt/__init__.py +1 -0
- package/py/src/note_mcp/chatgpt/__main__.py +63 -0
- package/py/src/note_mcp/chatgpt/access_log.py +25 -0
- package/py/src/note_mcp/chatgpt/auth.py +52 -0
- package/py/src/note_mcp/chatgpt/images.py +92 -0
- package/py/src/note_mcp/chatgpt/login_once.py +26 -0
- package/py/src/note_mcp/chatgpt/middleware.py +31 -0
- package/py/src/note_mcp/chatgpt/tools.py +255 -0
- package/py/src/note_mcp/chatgpt/widgets.py +121 -0
- package/py/src/note_mcp/decorators.py +113 -0
- package/py/src/note_mcp/investigator/__init__.py +33 -0
- package/py/src/note_mcp/investigator/__main__.py +11 -0
- package/py/src/note_mcp/investigator/cli.py +313 -0
- package/py/src/note_mcp/investigator/core.py +653 -0
- package/py/src/note_mcp/investigator/mcp_tools.py +225 -0
- package/py/src/note_mcp/models.py +562 -0
- package/py/src/note_mcp/py.typed +0 -0
- package/py/src/note_mcp/server.py +944 -0
- package/py/src/note_mcp/utils/__init__.py +7 -0
- package/py/src/note_mcp/utils/file_parser.py +314 -0
- package/py/src/note_mcp/utils/html_to_markdown.py +477 -0
- package/py/src/note_mcp/utils/logging.py +119 -0
- package/py/src/note_mcp/utils/markdown.py +12 -0
- package/py/src/note_mcp/utils/markdown_to_html.py +826 -0
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""Utility modules for note-mcp."""
|
|
2
|
+
|
|
3
|
+
from note_mcp.utils.html_to_markdown import html_to_markdown
|
|
4
|
+
from note_mcp.utils.logging import get_logger, setup_logging
|
|
5
|
+
from note_mcp.utils.markdown_to_html import markdown_to_html
|
|
6
|
+
|
|
7
|
+
__all__ = ["html_to_markdown", "markdown_to_html", "setup_logging", "get_logger"]
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
"""Markdown file parser for note.com article creation.
|
|
2
|
+
|
|
3
|
+
Parses Markdown files with YAML frontmatter support,
|
|
4
|
+
extracts titles from headings, and detects local images.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
import yaml
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class LocalImage:
|
|
19
|
+
"""Represents a local image found in Markdown content.
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
markdown_path: The path as written in Markdown (e.g., ./images/test.png)
|
|
23
|
+
absolute_path: The resolved absolute path to the image file
|
|
24
|
+
alt_text: The alt text for the image (empty string if not provided)
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
markdown_path: str
|
|
28
|
+
absolute_path: Path
|
|
29
|
+
alt_text: str = ""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ParsedArticle:
|
|
34
|
+
"""Represents a parsed Markdown article.
|
|
35
|
+
|
|
36
|
+
Attributes:
|
|
37
|
+
title: The article title (from frontmatter or heading)
|
|
38
|
+
body: The article body content (Markdown)
|
|
39
|
+
tags: List of tags for the article
|
|
40
|
+
local_images: List of local images detected in the content
|
|
41
|
+
source_path: Path to the source Markdown file
|
|
42
|
+
eyecatch: Path to the eyecatch (header) image, if specified
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
title: str
|
|
46
|
+
body: str
|
|
47
|
+
tags: list[str] = field(default_factory=list)
|
|
48
|
+
local_images: list[LocalImage] = field(default_factory=list)
|
|
49
|
+
source_path: Path | None = None
|
|
50
|
+
eyecatch: Path | None = None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# Pattern to match YAML frontmatter (must start at beginning of file)
|
|
54
|
+
FRONTMATTER_PATTERN = re.compile(r"^---\n(.*?)\n---\n?", re.DOTALL)
|
|
55
|
+
|
|
56
|
+
# Pattern to match Markdown images: 
|
|
57
|
+
IMAGE_PATTERN = re.compile(r"!\[([^\]]*)\]\(([^)]+)\)")
|
|
58
|
+
|
|
59
|
+
# Patterns that indicate a URL (not a local file)
|
|
60
|
+
URL_PREFIXES = ("http://", "https://", "data:")
|
|
61
|
+
|
|
62
|
+
# Patterns to match H1 and H2 title headings
|
|
63
|
+
_H1_TITLE_PATTERN = re.compile(r"^#\s+(.+)$", re.MULTILINE)
|
|
64
|
+
_H2_TITLE_PATTERN = re.compile(r"^##\s+(.+)$", re.MULTILINE)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def parse_markdown_file(file_path: Path | str) -> ParsedArticle:
|
|
68
|
+
"""Parse a Markdown file and extract article components.
|
|
69
|
+
|
|
70
|
+
The function extracts:
|
|
71
|
+
- Title: From YAML frontmatter, or first H1, or first H2 as fallback
|
|
72
|
+
- Tags: From YAML frontmatter (optional)
|
|
73
|
+
- Body: The Markdown content (frontmatter stripped, title heading removed)
|
|
74
|
+
- Local images: Paths to local image files (not URLs)
|
|
75
|
+
- Eyecatch: Path to eyecatch image from YAML frontmatter (optional)
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
file_path: Path to the Markdown file (str or Path)
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
ParsedArticle with extracted components
|
|
82
|
+
|
|
83
|
+
Raises:
|
|
84
|
+
FileNotFoundError: If the file does not exist
|
|
85
|
+
ValueError: If no title can be extracted
|
|
86
|
+
"""
|
|
87
|
+
path = Path(file_path) if isinstance(file_path, str) else file_path
|
|
88
|
+
|
|
89
|
+
if not path.exists():
|
|
90
|
+
raise FileNotFoundError(f"ファイルが見つかりません: {path}")
|
|
91
|
+
|
|
92
|
+
content = path.read_text(encoding="utf-8")
|
|
93
|
+
|
|
94
|
+
# Try to extract YAML frontmatter
|
|
95
|
+
frontmatter_data = _extract_frontmatter(content)
|
|
96
|
+
body = _strip_frontmatter(content)
|
|
97
|
+
|
|
98
|
+
# Get title from frontmatter or headings
|
|
99
|
+
title = _get_title(frontmatter_data, body)
|
|
100
|
+
if not title:
|
|
101
|
+
raise ValueError(f"タイトルが見つかりません: {path}")
|
|
102
|
+
|
|
103
|
+
# If title came from heading (not frontmatter), remove it from body
|
|
104
|
+
if not _has_frontmatter_title(frontmatter_data):
|
|
105
|
+
body = _remove_title_heading(body, title)
|
|
106
|
+
|
|
107
|
+
# Get tags from frontmatter
|
|
108
|
+
tags = _get_tags(frontmatter_data)
|
|
109
|
+
|
|
110
|
+
# Get eyecatch image path from frontmatter
|
|
111
|
+
eyecatch = _get_eyecatch(frontmatter_data, path.parent)
|
|
112
|
+
|
|
113
|
+
# Detect local images
|
|
114
|
+
local_images = _detect_local_images(body, path.parent)
|
|
115
|
+
|
|
116
|
+
# Normalize body whitespace
|
|
117
|
+
body = body.strip()
|
|
118
|
+
|
|
119
|
+
return ParsedArticle(
|
|
120
|
+
title=title,
|
|
121
|
+
body=body,
|
|
122
|
+
tags=tags,
|
|
123
|
+
local_images=local_images,
|
|
124
|
+
source_path=path,
|
|
125
|
+
eyecatch=eyecatch,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _extract_frontmatter(content: str) -> dict[str, Any]:
|
|
130
|
+
"""Extract YAML frontmatter from content.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
content: The full file content
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Parsed YAML as dict, or empty dict if no valid frontmatter
|
|
137
|
+
"""
|
|
138
|
+
match = FRONTMATTER_PATTERN.match(content)
|
|
139
|
+
if not match:
|
|
140
|
+
return {}
|
|
141
|
+
|
|
142
|
+
yaml_content = match.group(1)
|
|
143
|
+
try:
|
|
144
|
+
data = yaml.safe_load(yaml_content)
|
|
145
|
+
return data if isinstance(data, dict) else {}
|
|
146
|
+
except yaml.YAMLError:
|
|
147
|
+
return {}
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _strip_frontmatter(content: str) -> str:
|
|
151
|
+
"""Remove YAML frontmatter from content.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
content: The full file content
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
Content with frontmatter removed
|
|
158
|
+
"""
|
|
159
|
+
match = FRONTMATTER_PATTERN.match(content)
|
|
160
|
+
if match:
|
|
161
|
+
return content[match.end() :]
|
|
162
|
+
return content
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _has_frontmatter_title(frontmatter: dict[str, Any]) -> bool:
|
|
166
|
+
"""Check if frontmatter has a non-empty title.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
frontmatter: Parsed YAML frontmatter
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
True if frontmatter has a non-empty title
|
|
173
|
+
"""
|
|
174
|
+
title = frontmatter.get("title", "")
|
|
175
|
+
return bool(title and str(title).strip())
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _get_title(frontmatter: dict[str, Any], body: str) -> str | None:
|
|
179
|
+
"""Extract title from frontmatter or body headings.
|
|
180
|
+
|
|
181
|
+
Priority:
|
|
182
|
+
1. YAML frontmatter 'title' field (if non-empty)
|
|
183
|
+
2. First H1 heading
|
|
184
|
+
3. First H2 heading
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
frontmatter: Parsed YAML frontmatter
|
|
188
|
+
body: The body content (frontmatter stripped)
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
The extracted title, or None if not found
|
|
192
|
+
"""
|
|
193
|
+
# Try frontmatter first
|
|
194
|
+
if _has_frontmatter_title(frontmatter):
|
|
195
|
+
return str(frontmatter["title"]).strip()
|
|
196
|
+
|
|
197
|
+
# Try H1 heading
|
|
198
|
+
h1_match = _H1_TITLE_PATTERN.search(body)
|
|
199
|
+
if h1_match:
|
|
200
|
+
return h1_match.group(1).strip()
|
|
201
|
+
|
|
202
|
+
# Try H2 heading as fallback
|
|
203
|
+
h2_match = _H2_TITLE_PATTERN.search(body)
|
|
204
|
+
if h2_match:
|
|
205
|
+
return h2_match.group(1).strip()
|
|
206
|
+
|
|
207
|
+
return None
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _remove_title_heading(body: str, title: str) -> str:
|
|
211
|
+
"""Remove the title heading from body content.
|
|
212
|
+
|
|
213
|
+
Removes the first H1 or H2 heading that matches the title.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
body: The body content
|
|
217
|
+
title: The title that was extracted
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Body with the title heading removed
|
|
221
|
+
"""
|
|
222
|
+
# Escape special regex characters in title
|
|
223
|
+
escaped_title = re.escape(title)
|
|
224
|
+
|
|
225
|
+
# Try to remove H1 first
|
|
226
|
+
h1_pattern = re.compile(rf"^#\s+{escaped_title}\s*\n?", re.MULTILINE)
|
|
227
|
+
new_body, count = h1_pattern.subn("", body, count=1)
|
|
228
|
+
if count > 0:
|
|
229
|
+
return new_body
|
|
230
|
+
|
|
231
|
+
# Try to remove H2
|
|
232
|
+
h2_pattern = re.compile(rf"^##\s+{escaped_title}\s*\n?", re.MULTILINE)
|
|
233
|
+
new_body, count = h2_pattern.subn("", body, count=1)
|
|
234
|
+
if count > 0:
|
|
235
|
+
return new_body
|
|
236
|
+
|
|
237
|
+
return body
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _get_tags(frontmatter: dict[str, Any]) -> list[str]:
|
|
241
|
+
"""Extract tags from frontmatter.
|
|
242
|
+
|
|
243
|
+
Handles both list and single string formats.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
frontmatter: Parsed YAML frontmatter
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
List of tags (empty list if none)
|
|
250
|
+
"""
|
|
251
|
+
tags = frontmatter.get("tags", [])
|
|
252
|
+
|
|
253
|
+
if isinstance(tags, str):
|
|
254
|
+
return [tags] if tags.strip() else []
|
|
255
|
+
|
|
256
|
+
if isinstance(tags, list):
|
|
257
|
+
return [str(tag).strip() for tag in tags if tag]
|
|
258
|
+
|
|
259
|
+
return []
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _get_eyecatch(frontmatter: dict[str, Any], base_dir: Path) -> Path | None:
|
|
263
|
+
"""Extract eyecatch image path from frontmatter.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
frontmatter: Parsed YAML frontmatter
|
|
267
|
+
base_dir: Base directory for resolving relative paths
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
Resolved absolute path to eyecatch image, or None if not specified
|
|
271
|
+
"""
|
|
272
|
+
eyecatch = frontmatter.get("eyecatch", "")
|
|
273
|
+
|
|
274
|
+
if not eyecatch or not str(eyecatch).strip():
|
|
275
|
+
return None
|
|
276
|
+
|
|
277
|
+
eyecatch_str = str(eyecatch).strip()
|
|
278
|
+
return (base_dir / eyecatch_str).resolve()
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _detect_local_images(body: str, base_dir: Path) -> list[LocalImage]:
|
|
282
|
+
"""Detect local image references in Markdown content.
|
|
283
|
+
|
|
284
|
+
Finds all image references that are local files (not URLs).
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
body: The Markdown body content
|
|
288
|
+
base_dir: Base directory for resolving relative paths
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
List of LocalImage objects
|
|
292
|
+
"""
|
|
293
|
+
images: list[LocalImage] = []
|
|
294
|
+
|
|
295
|
+
for match in IMAGE_PATTERN.finditer(body):
|
|
296
|
+
alt_text = match.group(1)
|
|
297
|
+
image_path = match.group(2)
|
|
298
|
+
|
|
299
|
+
# Skip URLs and data URIs
|
|
300
|
+
if any(image_path.startswith(prefix) for prefix in URL_PREFIXES):
|
|
301
|
+
continue
|
|
302
|
+
|
|
303
|
+
# Resolve the absolute path
|
|
304
|
+
absolute_path = (base_dir / image_path).resolve()
|
|
305
|
+
|
|
306
|
+
images.append(
|
|
307
|
+
LocalImage(
|
|
308
|
+
markdown_path=image_path,
|
|
309
|
+
absolute_path=absolute_path,
|
|
310
|
+
alt_text=alt_text,
|
|
311
|
+
)
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
return images
|