note-connector 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/paths.js +4 -0
  2. package/dist/setup-dependencies.js +56 -13
  3. package/package.json +3 -2
  4. package/py/pyproject.toml +86 -0
  5. package/py/src/note_mcp/__init__.py +7 -0
  6. package/py/src/note_mcp/__main__.py +65 -0
  7. package/py/src/note_mcp/api/__init__.py +31 -0
  8. package/py/src/note_mcp/api/articles.py +1395 -0
  9. package/py/src/note_mcp/api/client.py +318 -0
  10. package/py/src/note_mcp/api/embeds.py +482 -0
  11. package/py/src/note_mcp/api/images.py +456 -0
  12. package/py/src/note_mcp/api/preview.py +142 -0
  13. package/py/src/note_mcp/api/public_notes.py +150 -0
  14. package/py/src/note_mcp/auth/__init__.py +9 -0
  15. package/py/src/note_mcp/auth/browser.py +574 -0
  16. package/py/src/note_mcp/auth/file_session.py +145 -0
  17. package/py/src/note_mcp/auth/session.py +240 -0
  18. package/py/src/note_mcp/browser/__init__.py +10 -0
  19. package/py/src/note_mcp/browser/config.py +21 -0
  20. package/py/src/note_mcp/browser/manager.py +182 -0
  21. package/py/src/note_mcp/browser/preview.py +68 -0
  22. package/py/src/note_mcp/browser/url_helpers.py +18 -0
  23. package/py/src/note_mcp/chatgpt/__init__.py +1 -0
  24. package/py/src/note_mcp/chatgpt/__main__.py +63 -0
  25. package/py/src/note_mcp/chatgpt/access_log.py +25 -0
  26. package/py/src/note_mcp/chatgpt/auth.py +52 -0
  27. package/py/src/note_mcp/chatgpt/images.py +92 -0
  28. package/py/src/note_mcp/chatgpt/login_once.py +26 -0
  29. package/py/src/note_mcp/chatgpt/middleware.py +31 -0
  30. package/py/src/note_mcp/chatgpt/tools.py +255 -0
  31. package/py/src/note_mcp/chatgpt/widgets.py +121 -0
  32. package/py/src/note_mcp/decorators.py +113 -0
  33. package/py/src/note_mcp/investigator/__init__.py +33 -0
  34. package/py/src/note_mcp/investigator/__main__.py +11 -0
  35. package/py/src/note_mcp/investigator/cli.py +313 -0
  36. package/py/src/note_mcp/investigator/core.py +653 -0
  37. package/py/src/note_mcp/investigator/mcp_tools.py +225 -0
  38. package/py/src/note_mcp/models.py +557 -0
  39. package/py/src/note_mcp/py.typed +0 -0
  40. package/py/src/note_mcp/server.py +905 -0
  41. package/py/src/note_mcp/utils/__init__.py +7 -0
  42. package/py/src/note_mcp/utils/file_parser.py +314 -0
  43. package/py/src/note_mcp/utils/html_to_markdown.py +477 -0
  44. package/py/src/note_mcp/utils/logging.py +119 -0
  45. package/py/src/note_mcp/utils/markdown.py +12 -0
  46. package/py/src/note_mcp/utils/markdown_to_html.py +826 -0
@@ -0,0 +1,477 @@
1
+ """HTML to Markdown conversion utility.
2
+
3
+ Converts note.com HTML format (ProseMirror) back to Markdown.
4
+ This is the reverse operation of markdown_to_html.
5
+ """
6
+
7
+ import html
8
+ import re
9
+ from collections.abc import Callable
10
+
11
+ # Pre-compiled regex patterns for basic elements
12
+ # Match <pre><code>...</code></pre> with or without class="codeBlock"
13
+ _CODE_BLOCK_PATTERN = re.compile(
14
+ r"<pre[^>]*><code>(.*?)</code></pre>",
15
+ re.DOTALL | re.IGNORECASE,
16
+ )
17
+ # Match <pre>...</pre> without <code> tag (fallback for some note.com formats)
18
+ _PRE_ONLY_PATTERN = re.compile(
19
+ r"<pre[^>]*>(?!<code>)(.*?)</pre>",
20
+ re.DOTALL | re.IGNORECASE,
21
+ )
22
+ _HEADING_PATTERN = re.compile(
23
+ r"<(h[1-6])[^>]*>(.*?)</\1>",
24
+ re.IGNORECASE | re.DOTALL,
25
+ )
26
+ _PARAGRAPH_PATTERN = re.compile(
27
+ r"<p[^>]*>(.*?)</p>",
28
+ re.IGNORECASE | re.DOTALL,
29
+ )
30
+ _HR_PATTERN = re.compile(r"<hr[^>]*/?>", re.IGNORECASE)
31
+
32
+ # Patterns for complex elements
33
+ _BLOCKQUOTE_FIGURE_PATTERN = re.compile(
34
+ r"<figure[^>]*>\s*<blockquote[^>]*>(.*?)</blockquote>\s*"
35
+ r"<figcaption>(.*?)</figcaption>\s*</figure>",
36
+ re.DOTALL | re.IGNORECASE,
37
+ )
38
+ _BR_PATTERN = re.compile(r"<br\s*/?>", re.IGNORECASE)
39
+ _FIGCAPTION_LINK_PATTERN = re.compile(
40
+ r'<a\s+href="([^"]+)"[^>]*>([^<]+)</a>',
41
+ re.IGNORECASE,
42
+ )
43
+ _IMAGE_FIGURE_PATTERN = re.compile(
44
+ r'<figure[^>]*>\s*<img[^>]*src="([^"]+)"[^>]*alt="([^"]*)"[^>]*>\s*'
45
+ r"<figcaption>(.*?)</figcaption>\s*</figure>",
46
+ re.DOTALL | re.IGNORECASE,
47
+ )
48
+ # Alternative pattern for img with alt before src
49
+ _IMAGE_FIGURE_PATTERN_ALT = re.compile(
50
+ r'<figure[^>]*>\s*<img[^>]*alt="([^"]*)"[^>]*src="([^"]+)"[^>]*>\s*'
51
+ r"<figcaption>(.*?)</figcaption>\s*</figure>",
52
+ re.DOTALL | re.IGNORECASE,
53
+ )
54
+ _UL_PATTERN = re.compile(r"<ul[^>]*>(.*?)</ul>", re.DOTALL | re.IGNORECASE)
55
+ _OL_PATTERN = re.compile(r"<ol[^>]*>(.*?)</ol>", re.DOTALL | re.IGNORECASE)
56
+
57
+ # Patterns for inline elements
58
+ _LINK_PATTERN = re.compile(
59
+ r'<a\s+href="([^"]+)"[^>]*>(.*?)</a>',
60
+ re.DOTALL | re.IGNORECASE,
61
+ )
62
+ _STRONG_PATTERN = re.compile(r"<strong>(.*?)</strong>", re.DOTALL | re.IGNORECASE)
63
+ _EM_PATTERN = re.compile(r"<em>(.*?)</em>", re.DOTALL | re.IGNORECASE)
64
+ _INLINE_CODE_PATTERN = re.compile(r"<code>(.*?)</code>", re.DOTALL | re.IGNORECASE)
65
+ _STRIKETHROUGH_PATTERN = re.compile(r"<s>(.*?)</s>", re.DOTALL | re.IGNORECASE)
66
+
67
+ # TOC element pattern (note.com uses TableOfContents class)
68
+ # Match elements with class containing "TableOfContents"
69
+ _TOC_ELEMENT_PATTERN = re.compile(
70
+ r'<[^>]*class="[^"]*TableOfContents[^"]*"[^>]*>.*?</(?:div|section|nav)>',
71
+ re.DOTALL | re.IGNORECASE,
72
+ )
73
+ # Also match self-closing or empty TOC elements
74
+ _TOC_ELEMENT_SIMPLE_PATTERN = re.compile(
75
+ r'<[^>]*class="[^"]*TableOfContents[^"]*"[^>]*/?>',
76
+ re.IGNORECASE,
77
+ )
78
+
79
+ # Text alignment pattern (Issue #40)
80
+ # Match <p ... style="text-align: center/right/left" ...> with possible other style properties
81
+ _TEXT_ALIGN_P_PATTERN = re.compile(
82
+ r'<p([^>]*style="[^"]*text-align:\s*(center|right|left)[^"]*"[^>]*)>(.*?)</p>',
83
+ re.DOTALL | re.IGNORECASE,
84
+ )
85
+
86
+ # Cleanup patterns
87
+ _UUID_ATTR_PATTERN = re.compile(
88
+ r'\s(?:name|id)="[a-f0-9-]{36}"',
89
+ re.IGNORECASE,
90
+ )
91
+
92
+
93
+ def _strip_fence_markers(code: str) -> str:
94
+ """Strip fence markers from code block content.
95
+
96
+ Handles various formats:
97
+ - ```python\\ncode\\n```
98
+ - ```\\ncode\\n```
99
+ - code with fence markers at boundaries
100
+ """
101
+ # Remove opening fence marker (``` or ```language)
102
+ if code.startswith("```"):
103
+ # Find the end of the first line (after language identifier)
104
+ newline_pos = code.find("\n")
105
+ if newline_pos != -1:
106
+ code = code[newline_pos + 1 :]
107
+ else:
108
+ # No newline, remove just the opening ``` and optional language
109
+ first_word_end = 3 # Skip ```
110
+ while first_word_end < len(code) and code[first_word_end].isalnum():
111
+ first_word_end += 1
112
+ code = code[first_word_end:]
113
+
114
+ # Remove closing fence marker (```)
115
+ code = code.rstrip()
116
+ if code.endswith("```"):
117
+ code = code[:-3]
118
+
119
+ return code.strip()
120
+
121
+
122
+ def _create_code_block_extractor(code_blocks: list[str]) -> Callable[[re.Match[str]], str]:
123
+ """Create a code block extractor closure with local storage.
124
+
125
+ Args:
126
+ code_blocks: List to store extracted code blocks (mutated in place)
127
+
128
+ Returns:
129
+ A function that extracts code blocks and returns placeholders
130
+ """
131
+
132
+ def extract_code_block(match: re.Match[str]) -> str:
133
+ """Extract code block and replace with placeholder."""
134
+ code = match.group(1)
135
+ code = html.unescape(code)
136
+ # Remove any remaining fence markers (``` at start/end)
137
+ code = _strip_fence_markers(code)
138
+ # Include trailing newlines for proper paragraph separation
139
+ block = f"```\n{code}\n```\n\n"
140
+ code_blocks.append(block)
141
+ return f"__CODE_BLOCK_{len(code_blocks) - 1}__"
142
+
143
+ return extract_code_block
144
+
145
+
146
+ def _convert_text_align_paragraph(match: re.Match[str]) -> str:
147
+ """Convert text-aligned paragraph to Markdown format with alignment markers.
148
+
149
+ Args:
150
+ match: Regex match with groups (attrs, alignment, content)
151
+
152
+ Returns:
153
+ Markdown with alignment markers:
154
+ - center: ->text<-
155
+ - right: ->text
156
+ - left: <-text
157
+ """
158
+ alignment = match.group(2).lower()
159
+ content = match.group(3).strip()
160
+
161
+ alignment_formats = {
162
+ "center": f"->{content}<-\n\n",
163
+ "right": f"->{content}\n\n",
164
+ "left": f"<-{content}\n\n",
165
+ }
166
+ return alignment_formats.get(alignment, f"{content}\n\n")
167
+
168
+
169
+ def _convert_heading(match: re.Match[str]) -> str:
170
+ """Convert heading to Markdown format."""
171
+ level = int(match.group(1)[1]) # h1 -> 1, h2 -> 2, etc.
172
+ text = match.group(2).strip()
173
+ return f"{'#' * level} {text}\n\n"
174
+
175
+
176
+ def _convert_paragraph(match: re.Match[str]) -> str:
177
+ """Convert paragraph to Markdown format."""
178
+ content = match.group(1).strip()
179
+ return f"{content}\n\n"
180
+
181
+
182
+ def _convert_blockquote_figure(match: re.Match[str]) -> str:
183
+ """Convert blockquote figure to Markdown format."""
184
+ content = match.group(1)
185
+ figcaption = match.group(2).strip()
186
+
187
+ # Remove <p> tags from content
188
+ content = re.sub(r"<p[^>]*>(.*?)</p>", r"\1", content, flags=re.DOTALL | re.IGNORECASE)
189
+
190
+ # Convert <br> to newlines
191
+ content = _BR_PATTERN.sub("\n", content)
192
+
193
+ # Build blockquote lines
194
+ lines = content.strip().split("\n")
195
+ quote_lines = [f"> {line.strip()}" for line in lines if line.strip()]
196
+
197
+ # Add citation if present
198
+ if figcaption:
199
+ # Check for link in figcaption
200
+ link_match = _FIGCAPTION_LINK_PATTERN.search(figcaption)
201
+ if link_match:
202
+ url = link_match.group(1)
203
+ text = link_match.group(2)
204
+ quote_lines.append(f"> — {text} ({url})")
205
+ else:
206
+ quote_lines.append(f"> — {figcaption}")
207
+
208
+ return "\n".join(quote_lines) + "\n\n"
209
+
210
+
211
+ def _convert_image_figure(match: re.Match[str], alt_first: bool = False) -> str:
212
+ """Convert image figure to Markdown format."""
213
+ if alt_first:
214
+ alt = match.group(1)
215
+ src = match.group(2)
216
+ caption = match.group(3).strip()
217
+ else:
218
+ src = match.group(1)
219
+ alt = match.group(2)
220
+ caption = match.group(3).strip()
221
+
222
+ if caption:
223
+ return f'![{alt}]({src} "{caption}")\n\n'
224
+ return f"![{alt}]({src})\n\n"
225
+
226
+
227
+ def _find_matching_tags(
228
+ html_content: str,
229
+ tag_name: str,
230
+ ) -> list[tuple[str, int, int]]:
231
+ """Find all top-level matching tag pairs with proper nesting support.
232
+
233
+ Args:
234
+ html_content: HTML string to search
235
+ tag_name: Tag name to find (e.g., "li", "ul")
236
+
237
+ Returns:
238
+ List of (content, start_pos, end_pos) tuples for each match
239
+ """
240
+ results: list[tuple[str, int, int]] = []
241
+ open_tag = f"<{tag_name}"
242
+ close_tag = f"</{tag_name}>"
243
+ pos = 0
244
+
245
+ while pos < len(html_content):
246
+ # Find opening tag
247
+ tag_start = html_content.find(open_tag, pos)
248
+ if tag_start == -1:
249
+ break
250
+
251
+ # Find the > that closes the opening tag
252
+ tag_end = html_content.find(">", tag_start)
253
+ if tag_end == -1:
254
+ break
255
+
256
+ # Track depth to find matching close tag
257
+ depth = 1
258
+ search_pos = tag_end + 1
259
+
260
+ while depth > 0 and search_pos < len(html_content):
261
+ next_open = html_content.find(open_tag, search_pos)
262
+ next_close = html_content.find(close_tag, search_pos)
263
+
264
+ if next_close == -1:
265
+ break
266
+
267
+ if next_open != -1 and next_open < next_close:
268
+ depth += 1
269
+ search_pos = next_open + len(open_tag)
270
+ else:
271
+ depth -= 1
272
+ if depth == 0:
273
+ content = html_content[tag_end + 1 : next_close]
274
+ results.append((content, tag_start, next_close + len(close_tag)))
275
+ search_pos = next_close + len(close_tag)
276
+
277
+ pos = search_pos
278
+
279
+ return results
280
+
281
+
282
+ def _find_matching_li_tags(html_content: str) -> list[str]:
283
+ """Find all top-level <li> elements, properly handling nested lists."""
284
+ return [content for content, _, _ in _find_matching_tags(html_content, "li")]
285
+
286
+
287
+ def _convert_list(html_content: str, ordered: bool = False, indent_level: int = 0) -> str:
288
+ """Convert list to Markdown format with nested list support.
289
+
290
+ Args:
291
+ html_content: HTML content of list
292
+ ordered: True for ordered list, False for unordered
293
+ indent_level: Current indentation level (0 = top level)
294
+
295
+ Returns:
296
+ Markdown formatted list
297
+ """
298
+ indent = " " * indent_level # 2 spaces per level
299
+ lines: list[str] = []
300
+ counter = 1
301
+
302
+ # Use proper tag matching instead of regex
303
+ li_contents = _find_matching_li_tags(html_content)
304
+
305
+ for li_content in li_contents:
306
+ # Extract text from first <p> tag if present (before any nested lists)
307
+ p_match = re.search(r"<p[^>]*>(.*?)</p>", li_content, re.DOTALL | re.IGNORECASE)
308
+ if p_match:
309
+ text = p_match.group(1).strip()
310
+ else:
311
+ # Remove any nested lists before extracting text
312
+ text = _UL_PATTERN.sub("", li_content)
313
+ text = _OL_PATTERN.sub("", text)
314
+ text = text.strip()
315
+
316
+ # Clean up any remaining HTML tags from text
317
+ text = re.sub(r"<[^>]+>", "", text).strip()
318
+
319
+ # Add list item
320
+ if text: # Only add if there's text content
321
+ if ordered:
322
+ lines.append(f"{indent}{counter}. {text}")
323
+ counter += 1
324
+ else:
325
+ lines.append(f"{indent}- {text}")
326
+
327
+ # Process nested lists
328
+ nested_ul = _UL_PATTERN.search(li_content)
329
+ nested_ol = _OL_PATTERN.search(li_content)
330
+ if nested_ul:
331
+ nested_md = _convert_list(nested_ul.group(1), ordered=False, indent_level=indent_level + 1)
332
+ lines.append(nested_md.rstrip())
333
+ if nested_ol:
334
+ nested_md = _convert_list(nested_ol.group(1), ordered=True, indent_level=indent_level + 1)
335
+ lines.append(nested_md.rstrip())
336
+
337
+ return "\n".join(lines) + "\n"
338
+
339
+
340
+ def _find_matching_tag_content(html_content: str, tag_name: str) -> tuple[str, int, int] | None:
341
+ """Find the content of a tag, properly handling nested same-name tags.
342
+
343
+ Returns (content, start_pos, end_pos) or None if not found.
344
+ """
345
+ results = _find_matching_tags(html_content, tag_name)
346
+ return results[0] if results else None
347
+
348
+
349
+ def _convert_all_lists(html_content: str) -> str:
350
+ """Convert all lists in the HTML content, properly handling nesting."""
351
+ result = html_content
352
+
353
+ # Process lists repeatedly until no more are found
354
+ # We process from innermost to outermost by repeatedly finding and replacing
355
+ max_iterations = 100 # Prevent infinite loops
356
+ for _ in range(max_iterations):
357
+ # Try to find a ul or ol
358
+ ul_match = _find_matching_tag_content(result, "ul")
359
+ ol_match = _find_matching_tag_content(result, "ol")
360
+
361
+ # Find which one comes first
362
+ if ul_match is None and ol_match is None:
363
+ break
364
+
365
+ if ul_match is not None and (ol_match is None or ul_match[1] < ol_match[1]):
366
+ # Process ul
367
+ content, start, end = ul_match
368
+ md = _convert_list(content, ordered=False)
369
+ result = result[:start] + md + result[end:]
370
+ elif ol_match is not None:
371
+ # Process ol
372
+ content, start, end = ol_match
373
+ md = _convert_list(content, ordered=True)
374
+ result = result[:start] + md + result[end:]
375
+
376
+ return result
377
+
378
+
379
+ def _convert_link(match: re.Match[str]) -> str:
380
+ """Convert link to Markdown format."""
381
+ url = match.group(1)
382
+ text = match.group(2).strip()
383
+ return f"[{text}]({url})"
384
+
385
+
386
+ def _convert_inline_elements(text: str) -> str:
387
+ """Convert inline elements to Markdown format."""
388
+ result = text
389
+
390
+ # Links
391
+ result = _LINK_PATTERN.sub(_convert_link, result)
392
+
393
+ # Bold
394
+ result = _STRONG_PATTERN.sub(r"**\1**", result)
395
+
396
+ # Italic
397
+ result = _EM_PATTERN.sub(r"*\1*", result)
398
+
399
+ # Strikethrough
400
+ result = _STRIKETHROUGH_PATTERN.sub(r"~~\1~~", result)
401
+
402
+ # Inline code (must be after code block extraction to avoid false matches)
403
+ result = _INLINE_CODE_PATTERN.sub(r"`\1`", result)
404
+
405
+ return result
406
+
407
+
408
+ def html_to_markdown(html_content: str) -> str:
409
+ """Convert note.com HTML to Markdown.
410
+
411
+ Args:
412
+ html_content: HTML string from note.com editor (ProseMirror format)
413
+
414
+ Returns:
415
+ Markdown formatted text
416
+ """
417
+ if not html_content or not html_content.strip():
418
+ return ""
419
+
420
+ # Use local storage for code blocks (thread-safe)
421
+ code_blocks: list[str] = []
422
+ extract_code_block = _create_code_block_extractor(code_blocks)
423
+
424
+ result = html_content
425
+
426
+ # 1. コードブロック(プレースホルダーで保護)
427
+ result = _CODE_BLOCK_PATTERN.sub(extract_code_block, result)
428
+ # Also handle <pre> without <code> tag (some note.com formats)
429
+ result = _PRE_ONLY_PATTERN.sub(extract_code_block, result)
430
+
431
+ # 2. TOC要素を[TOC]マーカーに変換
432
+ result = _TOC_ELEMENT_PATTERN.sub("[TOC]\n\n", result)
433
+ result = _TOC_ELEMENT_SIMPLE_PATTERN.sub("[TOC]\n\n", result)
434
+
435
+ # 3. figure要素(blockquoteとimageを先に処理)
436
+ result = _BLOCKQUOTE_FIGURE_PATTERN.sub(_convert_blockquote_figure, result)
437
+ result = _IMAGE_FIGURE_PATTERN.sub(lambda m: _convert_image_figure(m, alt_first=False), result)
438
+ result = _IMAGE_FIGURE_PATTERN_ALT.sub(lambda m: _convert_image_figure(m, alt_first=True), result)
439
+
440
+ # 4. 見出し
441
+ result = _HEADING_PATTERN.sub(_convert_heading, result)
442
+
443
+ # 5. リスト(ネスト対応 - 適切なタグマッチングを使用)
444
+ result = _convert_all_lists(result)
445
+
446
+ # 6. 水平線
447
+ result = _HR_PATTERN.sub("\n---\n\n", result)
448
+
449
+ # 7. インライン要素(リンク、太字、斜体、インラインコード)
450
+ result = _convert_inline_elements(result)
451
+
452
+ # 8. テキスト配置を持つ段落(通常の段落変換より先に処理)
453
+ result = _TEXT_ALIGN_P_PATTERN.sub(_convert_text_align_paragraph, result)
454
+
455
+ # 9. 段落(他の要素処理後に適用)
456
+ result = _PARAGRAPH_PATTERN.sub(_convert_paragraph, result)
457
+
458
+ # === 最終処理 ===
459
+
460
+ # プレースホルダー復元(コードブロック)
461
+ for i, block in enumerate(code_blocks):
462
+ result = result.replace(f"__CODE_BLOCK_{i}__", block)
463
+
464
+ # UUID属性削除(残存する場合のクリーンアップ)
465
+ result = _UUID_ATTR_PATTERN.sub("", result)
466
+
467
+ # 残存するHTMLタグを削除(エンティティデコード前に実行)
468
+ # これにより、ユーザーコンテンツ内の &lt;tag&gt; が保護される
469
+ result = re.sub(r"<[^>]+>", "", result)
470
+
471
+ # HTMLエンティティデコード
472
+ result = html.unescape(result)
473
+
474
+ # 連続する空行を正規化
475
+ result = re.sub(r"\n{3,}", "\n\n", result)
476
+
477
+ return result.strip()
@@ -0,0 +1,119 @@
1
+ """Secure logging configuration for note-mcp.
2
+
3
+ Provides logging setup with cookie value masking for security.
4
+ Cookie values are completely masked in all log output.
5
+ """
6
+
7
+ import logging
8
+ import re
9
+
10
+
11
+ class CookieMaskingFilter(logging.Filter):
12
+ """Logging filter that masks cookie values for security.
13
+
14
+ All cookie values are replaced with [MASKED] to prevent
15
+ credential leakage in logs.
16
+ """
17
+
18
+ # Patterns to match cookie values in various formats
19
+ COOKIE_PATTERNS = [
20
+ # Match note_gql_auth_token=VALUE or _note_session_v5=VALUE
21
+ re.compile(r"(note_gql_auth_token|_note_session_v5)[=:]\s*([^\s;,}\"']+)"),
22
+ # Match cookie dict format {"name": "value"}
23
+ re.compile(r'(["\']?(?:note_gql_auth_token|_note_session_v5)["\']?\s*[=:]\s*["\'])([^"\']+)(["\'])'),
24
+ # Match Cookie header format
25
+ re.compile(r"(Cookie:\s*[^;]*?(?:note_gql_auth_token|_note_session_v5)=)([^;\s]+)"),
26
+ ]
27
+
28
+ def filter(self, record: logging.LogRecord) -> bool:
29
+ """Filter and mask cookie values in log records.
30
+
31
+ Args:
32
+ record: Log record to process
33
+
34
+ Returns:
35
+ Always True (record is always passed through, just modified)
36
+ """
37
+ if record.msg:
38
+ record.msg = self._mask_cookies(str(record.msg))
39
+ if record.args:
40
+ # Handle args that might contain sensitive data
41
+ new_args: list[object] = []
42
+ for arg in record.args:
43
+ if isinstance(arg, str):
44
+ new_args.append(self._mask_cookies(arg))
45
+ else:
46
+ new_args.append(arg)
47
+ record.args = tuple(new_args)
48
+ return True
49
+
50
+ def _mask_cookies(self, text: str) -> str:
51
+ """Mask all cookie values in text.
52
+
53
+ Args:
54
+ text: Text potentially containing cookie values
55
+
56
+ Returns:
57
+ Text with cookie values replaced by [MASKED]
58
+ """
59
+ result = text
60
+ for pattern in self.COOKIE_PATTERNS:
61
+ # Replace the value part (group 2) with [MASKED]
62
+ def mask_match(m: re.Match[str]) -> str:
63
+ suffix = m.group(3) if len(m.groups()) > 2 else ""
64
+ return m.group(1) + "[MASKED]" + suffix
65
+
66
+ result = pattern.sub(mask_match, result)
67
+ return result
68
+
69
+
70
+ def setup_logging(level: int = logging.INFO, name: str | None = None) -> logging.Logger:
71
+ """Set up logging with cookie masking.
72
+
73
+ Configures a logger with the CookieMaskingFilter to prevent
74
+ credential leakage in log output.
75
+
76
+ Args:
77
+ level: Logging level (default: INFO)
78
+ name: Logger name (default: "note_mcp")
79
+
80
+ Returns:
81
+ Configured logger instance
82
+ """
83
+ logger_name = name or "note_mcp"
84
+ logger = logging.getLogger(logger_name)
85
+ logger.setLevel(level)
86
+
87
+ # Remove existing handlers to avoid duplicates
88
+ for handler in logger.handlers[:]:
89
+ logger.removeHandler(handler)
90
+
91
+ # Create console handler with formatting
92
+ handler = logging.StreamHandler()
93
+ handler.setLevel(level)
94
+ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
95
+ handler.setFormatter(formatter)
96
+
97
+ # Add cookie masking filter
98
+ handler.addFilter(CookieMaskingFilter())
99
+
100
+ logger.addHandler(handler)
101
+
102
+ return logger
103
+
104
+
105
+ def get_logger(name: str | None = None) -> logging.Logger:
106
+ """Get a logger instance with cookie masking.
107
+
108
+ Gets or creates a child logger under the note_mcp namespace.
109
+ All loggers created this way inherit the cookie masking filter.
110
+
111
+ Args:
112
+ name: Logger name suffix (e.g., "api" for "note_mcp.api")
113
+
114
+ Returns:
115
+ Logger instance
116
+ """
117
+ if name:
118
+ return logging.getLogger(f"note_mcp.{name}")
119
+ return logging.getLogger("note_mcp")
@@ -0,0 +1,12 @@
1
+ """Backward compatibility alias for markdown modules.
2
+
3
+ This module re-exports functions for backward compatibility.
4
+ New code should import directly from:
5
+ - note_mcp.utils.markdown_to_html
6
+ - note_mcp.utils.html_to_markdown
7
+ """
8
+
9
+ from note_mcp.utils.html_to_markdown import html_to_markdown
10
+ from note_mcp.utils.markdown_to_html import markdown_to_html
11
+
12
+ __all__ = ["html_to_markdown", "markdown_to_html"]