note-connector 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/paths.js +4 -0
  2. package/dist/setup-dependencies.js +61 -7
  3. package/package.json +3 -2
  4. package/py/pyproject.toml +86 -0
  5. package/py/src/note_mcp/__init__.py +7 -0
  6. package/py/src/note_mcp/__main__.py +65 -0
  7. package/py/src/note_mcp/api/__init__.py +31 -0
  8. package/py/src/note_mcp/api/articles.py +1395 -0
  9. package/py/src/note_mcp/api/client.py +318 -0
  10. package/py/src/note_mcp/api/embeds.py +482 -0
  11. package/py/src/note_mcp/api/images.py +456 -0
  12. package/py/src/note_mcp/api/preview.py +142 -0
  13. package/py/src/note_mcp/api/public_notes.py +150 -0
  14. package/py/src/note_mcp/auth/__init__.py +9 -0
  15. package/py/src/note_mcp/auth/browser.py +574 -0
  16. package/py/src/note_mcp/auth/file_session.py +145 -0
  17. package/py/src/note_mcp/auth/session.py +240 -0
  18. package/py/src/note_mcp/browser/__init__.py +10 -0
  19. package/py/src/note_mcp/browser/config.py +21 -0
  20. package/py/src/note_mcp/browser/manager.py +182 -0
  21. package/py/src/note_mcp/browser/preview.py +68 -0
  22. package/py/src/note_mcp/browser/url_helpers.py +18 -0
  23. package/py/src/note_mcp/chatgpt/__init__.py +1 -0
  24. package/py/src/note_mcp/chatgpt/__main__.py +63 -0
  25. package/py/src/note_mcp/chatgpt/access_log.py +25 -0
  26. package/py/src/note_mcp/chatgpt/auth.py +52 -0
  27. package/py/src/note_mcp/chatgpt/images.py +92 -0
  28. package/py/src/note_mcp/chatgpt/login_once.py +26 -0
  29. package/py/src/note_mcp/chatgpt/middleware.py +31 -0
  30. package/py/src/note_mcp/chatgpt/tools.py +255 -0
  31. package/py/src/note_mcp/chatgpt/widgets.py +121 -0
  32. package/py/src/note_mcp/decorators.py +113 -0
  33. package/py/src/note_mcp/investigator/__init__.py +33 -0
  34. package/py/src/note_mcp/investigator/__main__.py +11 -0
  35. package/py/src/note_mcp/investigator/cli.py +313 -0
  36. package/py/src/note_mcp/investigator/core.py +653 -0
  37. package/py/src/note_mcp/investigator/mcp_tools.py +225 -0
  38. package/py/src/note_mcp/models.py +557 -0
  39. package/py/src/note_mcp/py.typed +0 -0
  40. package/py/src/note_mcp/server.py +905 -0
  41. package/py/src/note_mcp/utils/__init__.py +7 -0
  42. package/py/src/note_mcp/utils/file_parser.py +314 -0
  43. package/py/src/note_mcp/utils/html_to_markdown.py +477 -0
  44. package/py/src/note_mcp/utils/logging.py +119 -0
  45. package/py/src/note_mcp/utils/markdown.py +12 -0
  46. package/py/src/note_mcp/utils/markdown_to_html.py +826 -0
@@ -0,0 +1,7 @@
1
+ """Utility modules for note-mcp."""
2
+
3
+ from note_mcp.utils.html_to_markdown import html_to_markdown
4
+ from note_mcp.utils.logging import get_logger, setup_logging
5
+ from note_mcp.utils.markdown_to_html import markdown_to_html
6
+
7
+ __all__ = ["html_to_markdown", "markdown_to_html", "setup_logging", "get_logger"]
@@ -0,0 +1,314 @@
1
+ """Markdown file parser for note.com article creation.
2
+
3
+ Parses Markdown files with YAML frontmatter support,
4
+ extracts titles from headings, and detects local images.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import re
10
+ from dataclasses import dataclass, field
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ import yaml
15
+
16
+
17
+ @dataclass
18
+ class LocalImage:
19
+ """Represents a local image found in Markdown content.
20
+
21
+ Attributes:
22
+ markdown_path: The path as written in Markdown (e.g., ./images/test.png)
23
+ absolute_path: The resolved absolute path to the image file
24
+ alt_text: The alt text for the image (empty string if not provided)
25
+ """
26
+
27
+ markdown_path: str
28
+ absolute_path: Path
29
+ alt_text: str = ""
30
+
31
+
32
+ @dataclass
33
+ class ParsedArticle:
34
+ """Represents a parsed Markdown article.
35
+
36
+ Attributes:
37
+ title: The article title (from frontmatter or heading)
38
+ body: The article body content (Markdown)
39
+ tags: List of tags for the article
40
+ local_images: List of local images detected in the content
41
+ source_path: Path to the source Markdown file
42
+ eyecatch: Path to the eyecatch (header) image, if specified
43
+ """
44
+
45
+ title: str
46
+ body: str
47
+ tags: list[str] = field(default_factory=list)
48
+ local_images: list[LocalImage] = field(default_factory=list)
49
+ source_path: Path | None = None
50
+ eyecatch: Path | None = None
51
+
52
+
53
+ # Pattern to match YAML frontmatter (must start at beginning of file)
54
+ FRONTMATTER_PATTERN = re.compile(r"^---\n(.*?)\n---\n?", re.DOTALL)
55
+
56
+ # Pattern to match Markdown images: ![alt](path)
57
+ IMAGE_PATTERN = re.compile(r"!\[([^\]]*)\]\(([^)]+)\)")
58
+
59
+ # Patterns that indicate a URL (not a local file)
60
+ URL_PREFIXES = ("http://", "https://", "data:")
61
+
62
+ # Patterns to match H1 and H2 title headings
63
+ _H1_TITLE_PATTERN = re.compile(r"^#\s+(.+)$", re.MULTILINE)
64
+ _H2_TITLE_PATTERN = re.compile(r"^##\s+(.+)$", re.MULTILINE)
65
+
66
+
67
+ def parse_markdown_file(file_path: Path | str) -> ParsedArticle:
68
+ """Parse a Markdown file and extract article components.
69
+
70
+ The function extracts:
71
+ - Title: From YAML frontmatter, or first H1, or first H2 as fallback
72
+ - Tags: From YAML frontmatter (optional)
73
+ - Body: The Markdown content (frontmatter stripped, title heading removed)
74
+ - Local images: Paths to local image files (not URLs)
75
+ - Eyecatch: Path to eyecatch image from YAML frontmatter (optional)
76
+
77
+ Args:
78
+ file_path: Path to the Markdown file (str or Path)
79
+
80
+ Returns:
81
+ ParsedArticle with extracted components
82
+
83
+ Raises:
84
+ FileNotFoundError: If the file does not exist
85
+ ValueError: If no title can be extracted
86
+ """
87
+ path = Path(file_path) if isinstance(file_path, str) else file_path
88
+
89
+ if not path.exists():
90
+ raise FileNotFoundError(f"ファイルが見つかりません: {path}")
91
+
92
+ content = path.read_text(encoding="utf-8")
93
+
94
+ # Try to extract YAML frontmatter
95
+ frontmatter_data = _extract_frontmatter(content)
96
+ body = _strip_frontmatter(content)
97
+
98
+ # Get title from frontmatter or headings
99
+ title = _get_title(frontmatter_data, body)
100
+ if not title:
101
+ raise ValueError(f"タイトルが見つかりません: {path}")
102
+
103
+ # If title came from heading (not frontmatter), remove it from body
104
+ if not _has_frontmatter_title(frontmatter_data):
105
+ body = _remove_title_heading(body, title)
106
+
107
+ # Get tags from frontmatter
108
+ tags = _get_tags(frontmatter_data)
109
+
110
+ # Get eyecatch image path from frontmatter
111
+ eyecatch = _get_eyecatch(frontmatter_data, path.parent)
112
+
113
+ # Detect local images
114
+ local_images = _detect_local_images(body, path.parent)
115
+
116
+ # Normalize body whitespace
117
+ body = body.strip()
118
+
119
+ return ParsedArticle(
120
+ title=title,
121
+ body=body,
122
+ tags=tags,
123
+ local_images=local_images,
124
+ source_path=path,
125
+ eyecatch=eyecatch,
126
+ )
127
+
128
+
129
+ def _extract_frontmatter(content: str) -> dict[str, Any]:
130
+ """Extract YAML frontmatter from content.
131
+
132
+ Args:
133
+ content: The full file content
134
+
135
+ Returns:
136
+ Parsed YAML as dict, or empty dict if no valid frontmatter
137
+ """
138
+ match = FRONTMATTER_PATTERN.match(content)
139
+ if not match:
140
+ return {}
141
+
142
+ yaml_content = match.group(1)
143
+ try:
144
+ data = yaml.safe_load(yaml_content)
145
+ return data if isinstance(data, dict) else {}
146
+ except yaml.YAMLError:
147
+ return {}
148
+
149
+
150
+ def _strip_frontmatter(content: str) -> str:
151
+ """Remove YAML frontmatter from content.
152
+
153
+ Args:
154
+ content: The full file content
155
+
156
+ Returns:
157
+ Content with frontmatter removed
158
+ """
159
+ match = FRONTMATTER_PATTERN.match(content)
160
+ if match:
161
+ return content[match.end() :]
162
+ return content
163
+
164
+
165
+ def _has_frontmatter_title(frontmatter: dict[str, Any]) -> bool:
166
+ """Check if frontmatter has a non-empty title.
167
+
168
+ Args:
169
+ frontmatter: Parsed YAML frontmatter
170
+
171
+ Returns:
172
+ True if frontmatter has a non-empty title
173
+ """
174
+ title = frontmatter.get("title", "")
175
+ return bool(title and str(title).strip())
176
+
177
+
178
+ def _get_title(frontmatter: dict[str, Any], body: str) -> str | None:
179
+ """Extract title from frontmatter or body headings.
180
+
181
+ Priority:
182
+ 1. YAML frontmatter 'title' field (if non-empty)
183
+ 2. First H1 heading
184
+ 3. First H2 heading
185
+
186
+ Args:
187
+ frontmatter: Parsed YAML frontmatter
188
+ body: The body content (frontmatter stripped)
189
+
190
+ Returns:
191
+ The extracted title, or None if not found
192
+ """
193
+ # Try frontmatter first
194
+ if _has_frontmatter_title(frontmatter):
195
+ return str(frontmatter["title"]).strip()
196
+
197
+ # Try H1 heading
198
+ h1_match = _H1_TITLE_PATTERN.search(body)
199
+ if h1_match:
200
+ return h1_match.group(1).strip()
201
+
202
+ # Try H2 heading as fallback
203
+ h2_match = _H2_TITLE_PATTERN.search(body)
204
+ if h2_match:
205
+ return h2_match.group(1).strip()
206
+
207
+ return None
208
+
209
+
210
+ def _remove_title_heading(body: str, title: str) -> str:
211
+ """Remove the title heading from body content.
212
+
213
+ Removes the first H1 or H2 heading that matches the title.
214
+
215
+ Args:
216
+ body: The body content
217
+ title: The title that was extracted
218
+
219
+ Returns:
220
+ Body with the title heading removed
221
+ """
222
+ # Escape special regex characters in title
223
+ escaped_title = re.escape(title)
224
+
225
+ # Try to remove H1 first
226
+ h1_pattern = re.compile(rf"^#\s+{escaped_title}\s*\n?", re.MULTILINE)
227
+ new_body, count = h1_pattern.subn("", body, count=1)
228
+ if count > 0:
229
+ return new_body
230
+
231
+ # Try to remove H2
232
+ h2_pattern = re.compile(rf"^##\s+{escaped_title}\s*\n?", re.MULTILINE)
233
+ new_body, count = h2_pattern.subn("", body, count=1)
234
+ if count > 0:
235
+ return new_body
236
+
237
+ return body
238
+
239
+
240
+ def _get_tags(frontmatter: dict[str, Any]) -> list[str]:
241
+ """Extract tags from frontmatter.
242
+
243
+ Handles both list and single string formats.
244
+
245
+ Args:
246
+ frontmatter: Parsed YAML frontmatter
247
+
248
+ Returns:
249
+ List of tags (empty list if none)
250
+ """
251
+ tags = frontmatter.get("tags", [])
252
+
253
+ if isinstance(tags, str):
254
+ return [tags] if tags.strip() else []
255
+
256
+ if isinstance(tags, list):
257
+ return [str(tag).strip() for tag in tags if tag]
258
+
259
+ return []
260
+
261
+
262
+ def _get_eyecatch(frontmatter: dict[str, Any], base_dir: Path) -> Path | None:
263
+ """Extract eyecatch image path from frontmatter.
264
+
265
+ Args:
266
+ frontmatter: Parsed YAML frontmatter
267
+ base_dir: Base directory for resolving relative paths
268
+
269
+ Returns:
270
+ Resolved absolute path to eyecatch image, or None if not specified
271
+ """
272
+ eyecatch = frontmatter.get("eyecatch", "")
273
+
274
+ if not eyecatch or not str(eyecatch).strip():
275
+ return None
276
+
277
+ eyecatch_str = str(eyecatch).strip()
278
+ return (base_dir / eyecatch_str).resolve()
279
+
280
+
281
+ def _detect_local_images(body: str, base_dir: Path) -> list[LocalImage]:
282
+ """Detect local image references in Markdown content.
283
+
284
+ Finds all image references that are local files (not URLs).
285
+
286
+ Args:
287
+ body: The Markdown body content
288
+ base_dir: Base directory for resolving relative paths
289
+
290
+ Returns:
291
+ List of LocalImage objects
292
+ """
293
+ images: list[LocalImage] = []
294
+
295
+ for match in IMAGE_PATTERN.finditer(body):
296
+ alt_text = match.group(1)
297
+ image_path = match.group(2)
298
+
299
+ # Skip URLs and data URIs
300
+ if any(image_path.startswith(prefix) for prefix in URL_PREFIXES):
301
+ continue
302
+
303
+ # Resolve the absolute path
304
+ absolute_path = (base_dir / image_path).resolve()
305
+
306
+ images.append(
307
+ LocalImage(
308
+ markdown_path=image_path,
309
+ absolute_path=absolute_path,
310
+ alt_text=alt_text,
311
+ )
312
+ )
313
+
314
+ return images