@yeongjaeyou/claude-code-config 0.4.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/ask-codex.md +131 -345
- package/.claude/commands/ask-deepwiki.md +15 -15
- package/.claude/commands/ask-gemini.md +134 -352
- package/.claude/commands/code-review.md +41 -40
- package/.claude/commands/commit-and-push.md +35 -36
- package/.claude/commands/council.md +318 -0
- package/.claude/commands/edit-notebook.md +34 -33
- package/.claude/commands/gh/create-issue-label.md +19 -17
- package/.claude/commands/gh/decompose-issue.md +66 -65
- package/.claude/commands/gh/init-project.md +46 -52
- package/.claude/commands/gh/post-merge.md +74 -79
- package/.claude/commands/gh/resolve-issue.md +38 -46
- package/.claude/commands/plan.md +15 -14
- package/.claude/commands/tm/convert-prd.md +53 -53
- package/.claude/commands/tm/post-merge.md +92 -112
- package/.claude/commands/tm/resolve-issue.md +148 -154
- package/.claude/commands/tm/review-prd-with-codex.md +272 -279
- package/.claude/commands/tm/sync-to-github.md +189 -212
- package/.claude/guidelines/cv-guidelines.md +30 -0
- package/.claude/guidelines/id-reference.md +34 -0
- package/.claude/guidelines/work-guidelines.md +17 -0
- package/.claude/skills/notion-md-uploader/SKILL.md +252 -0
- package/.claude/skills/notion-md-uploader/references/notion_block_types.md +323 -0
- package/.claude/skills/notion-md-uploader/references/setup_guide.md +156 -0
- package/.claude/skills/notion-md-uploader/scripts/__pycache__/markdown_parser.cpython-311.pyc +0 -0
- package/.claude/skills/notion-md-uploader/scripts/__pycache__/notion_client.cpython-311.pyc +0 -0
- package/.claude/skills/notion-md-uploader/scripts/__pycache__/notion_converter.cpython-311.pyc +0 -0
- package/.claude/skills/notion-md-uploader/scripts/markdown_parser.py +607 -0
- package/.claude/skills/notion-md-uploader/scripts/notion_client.py +337 -0
- package/.claude/skills/notion-md-uploader/scripts/notion_converter.py +477 -0
- package/.claude/skills/notion-md-uploader/scripts/upload_md.py +298 -0
- package/.claude/skills/skill-creator/LICENSE.txt +202 -0
- package/.claude/skills/skill-creator/SKILL.md +209 -0
- package/.claude/skills/skill-creator/scripts/init_skill.py +303 -0
- package/.claude/skills/skill-creator/scripts/package_skill.py +110 -0
- package/.claude/skills/skill-creator/scripts/quick_validate.py +65 -0
- package/README.md +159 -129
- package/package.json +1 -1
|
Binary file
|
|
Binary file
|
package/.claude/skills/notion-md-uploader/scripts/__pycache__/notion_converter.cpython-311.pyc
ADDED
|
Binary file
|
|
@@ -0,0 +1,607 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Markdown Parser for Notion conversion.
|
|
4
|
+
|
|
5
|
+
Parses Markdown text into an AST-like structure that can be
|
|
6
|
+
converted to Notion blocks. Uses regex-based parsing to avoid
|
|
7
|
+
external dependencies.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from enum import Enum, auto
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BlockType(Enum):
|
|
17
|
+
"""Types of Markdown blocks."""
|
|
18
|
+
|
|
19
|
+
HEADING1 = auto()
|
|
20
|
+
HEADING2 = auto()
|
|
21
|
+
HEADING3 = auto()
|
|
22
|
+
PARAGRAPH = auto()
|
|
23
|
+
BULLETED_LIST = auto()
|
|
24
|
+
NUMBERED_LIST = auto()
|
|
25
|
+
CODE_BLOCK = auto()
|
|
26
|
+
QUOTE = auto()
|
|
27
|
+
DIVIDER = auto()
|
|
28
|
+
IMAGE = auto()
|
|
29
|
+
TABLE = auto()
|
|
30
|
+
TODO = auto()
|
|
31
|
+
CALLOUT = auto()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class InlineStyle:
|
|
36
|
+
"""Represents inline text styling."""
|
|
37
|
+
|
|
38
|
+
text: str
|
|
39
|
+
bold: bool = False
|
|
40
|
+
italic: bool = False
|
|
41
|
+
strikethrough: bool = False
|
|
42
|
+
code: bool = False
|
|
43
|
+
link: str | None = None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class MarkdownBlock:
|
|
48
|
+
"""Represents a parsed Markdown block."""
|
|
49
|
+
|
|
50
|
+
block_type: BlockType
|
|
51
|
+
content: list[InlineStyle] | str = ""
|
|
52
|
+
children: list["MarkdownBlock"] = field(default_factory=list)
|
|
53
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class MarkdownParser:
|
|
57
|
+
"""Parser for Markdown text."""
|
|
58
|
+
|
|
59
|
+
# Regex patterns for block-level elements
|
|
60
|
+
HEADING_PATTERN = re.compile(r"^(#{1,6})\s+(.+)$")
|
|
61
|
+
CODE_BLOCK_START = re.compile(r"^```(\w*)$")
|
|
62
|
+
CODE_BLOCK_END = re.compile(r"^```$")
|
|
63
|
+
BULLETED_LIST_PATTERN = re.compile(r"^(\s*)[-*+]\s+(.+)$")
|
|
64
|
+
NUMBERED_LIST_PATTERN = re.compile(r"^(\s*)\d+\.\s+(.+)$")
|
|
65
|
+
QUOTE_PATTERN = re.compile(r"^>\s*(.*)$")
|
|
66
|
+
DIVIDER_PATTERN = re.compile(r"^-{3,}$|^\*{3,}$|^_{3,}$")
|
|
67
|
+
IMAGE_PATTERN = re.compile(r"!\[([^\]]*)\]\(([^)]+)\)")
|
|
68
|
+
TODO_PATTERN = re.compile(r"^(\s*)[-*]\s+\[([ xX])\]\s+(.+)$")
|
|
69
|
+
CALLOUT_PATTERN = re.compile(r"^>\s*\[!(NOTE|WARNING|TIP|IMPORTANT|CAUTION)\]\s*$", re.IGNORECASE)
|
|
70
|
+
TABLE_ROW_PATTERN = re.compile(r"^\|(.+)\|$")
|
|
71
|
+
TABLE_SEPARATOR_PATTERN = re.compile(r"^\|[\s\-:|]+\|$")
|
|
72
|
+
|
|
73
|
+
# Regex patterns for inline elements
|
|
74
|
+
BOLD_PATTERN = re.compile(r"\*\*(.+?)\*\*|__(.+?)__")
|
|
75
|
+
ITALIC_PATTERN = re.compile(r"(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)|(?<!_)_(?!_)(.+?)(?<!_)_(?!_)")
|
|
76
|
+
STRIKETHROUGH_PATTERN = re.compile(r"~~(.+?)~~")
|
|
77
|
+
INLINE_CODE_PATTERN = re.compile(r"`([^`]+)`")
|
|
78
|
+
LINK_PATTERN = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
|
|
79
|
+
|
|
80
|
+
def __init__(self, base_path: str = ""):
|
|
81
|
+
"""Initialize the parser.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
base_path: Base path for resolving relative image paths
|
|
85
|
+
"""
|
|
86
|
+
self.base_path = base_path
|
|
87
|
+
|
|
88
|
+
def parse(self, markdown_text: str) -> list[MarkdownBlock]:
|
|
89
|
+
"""Parse Markdown text into a list of blocks.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
markdown_text: The Markdown text to parse
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
List of MarkdownBlock objects
|
|
96
|
+
"""
|
|
97
|
+
lines = markdown_text.split("\n")
|
|
98
|
+
blocks: list[MarkdownBlock] = []
|
|
99
|
+
i = 0
|
|
100
|
+
|
|
101
|
+
while i < len(lines):
|
|
102
|
+
line = lines[i]
|
|
103
|
+
|
|
104
|
+
# Skip empty lines
|
|
105
|
+
if not line.strip():
|
|
106
|
+
i += 1
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
# Check for code block
|
|
110
|
+
code_match = self.CODE_BLOCK_START.match(line)
|
|
111
|
+
if code_match:
|
|
112
|
+
block, i = self._parse_code_block(lines, i)
|
|
113
|
+
blocks.append(block)
|
|
114
|
+
continue
|
|
115
|
+
|
|
116
|
+
# Check for divider
|
|
117
|
+
if self.DIVIDER_PATTERN.match(line.strip()):
|
|
118
|
+
blocks.append(MarkdownBlock(block_type=BlockType.DIVIDER))
|
|
119
|
+
i += 1
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
# Check for heading
|
|
123
|
+
heading_match = self.HEADING_PATTERN.match(line)
|
|
124
|
+
if heading_match:
|
|
125
|
+
level = len(heading_match.group(1))
|
|
126
|
+
text = heading_match.group(2)
|
|
127
|
+
block_type = {
|
|
128
|
+
1: BlockType.HEADING1,
|
|
129
|
+
2: BlockType.HEADING2,
|
|
130
|
+
3: BlockType.HEADING3,
|
|
131
|
+
}.get(level, BlockType.HEADING3)
|
|
132
|
+
blocks.append(MarkdownBlock(
|
|
133
|
+
block_type=block_type,
|
|
134
|
+
content=self._parse_inline(text),
|
|
135
|
+
))
|
|
136
|
+
i += 1
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
# Check for callout (GitHub-style)
|
|
140
|
+
callout_match = self.CALLOUT_PATTERN.match(line)
|
|
141
|
+
if callout_match:
|
|
142
|
+
block, i = self._parse_callout(lines, i, callout_match.group(1))
|
|
143
|
+
blocks.append(block)
|
|
144
|
+
continue
|
|
145
|
+
|
|
146
|
+
# Check for quote
|
|
147
|
+
quote_match = self.QUOTE_PATTERN.match(line)
|
|
148
|
+
if quote_match:
|
|
149
|
+
block, i = self._parse_quote(lines, i)
|
|
150
|
+
blocks.append(block)
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
# Check for TODO item
|
|
154
|
+
todo_match = self.TODO_PATTERN.match(line)
|
|
155
|
+
if todo_match:
|
|
156
|
+
checked = todo_match.group(2).lower() == "x"
|
|
157
|
+
text = todo_match.group(3)
|
|
158
|
+
blocks.append(MarkdownBlock(
|
|
159
|
+
block_type=BlockType.TODO,
|
|
160
|
+
content=self._parse_inline(text),
|
|
161
|
+
metadata={"checked": checked},
|
|
162
|
+
))
|
|
163
|
+
i += 1
|
|
164
|
+
continue
|
|
165
|
+
|
|
166
|
+
# Check for bulleted list
|
|
167
|
+
bullet_match = self.BULLETED_LIST_PATTERN.match(line)
|
|
168
|
+
if bullet_match:
|
|
169
|
+
block, i = self._parse_list(lines, i, is_numbered=False)
|
|
170
|
+
blocks.extend(block)
|
|
171
|
+
continue
|
|
172
|
+
|
|
173
|
+
# Check for numbered list
|
|
174
|
+
number_match = self.NUMBERED_LIST_PATTERN.match(line)
|
|
175
|
+
if number_match:
|
|
176
|
+
block, i = self._parse_list(lines, i, is_numbered=True)
|
|
177
|
+
blocks.extend(block)
|
|
178
|
+
continue
|
|
179
|
+
|
|
180
|
+
# Check for table
|
|
181
|
+
table_match = self.TABLE_ROW_PATTERN.match(line)
|
|
182
|
+
if table_match and i + 1 < len(lines):
|
|
183
|
+
next_line = lines[i + 1]
|
|
184
|
+
if self.TABLE_SEPARATOR_PATTERN.match(next_line):
|
|
185
|
+
block, i = self._parse_table(lines, i)
|
|
186
|
+
blocks.append(block)
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
# Check for standalone image
|
|
190
|
+
if line.strip().startswith("!["):
|
|
191
|
+
image_match = self.IMAGE_PATTERN.search(line)
|
|
192
|
+
if image_match:
|
|
193
|
+
blocks.append(MarkdownBlock(
|
|
194
|
+
block_type=BlockType.IMAGE,
|
|
195
|
+
content=image_match.group(1), # alt text
|
|
196
|
+
metadata={"url": image_match.group(2)},
|
|
197
|
+
))
|
|
198
|
+
i += 1
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
# Default: paragraph
|
|
202
|
+
block, i = self._parse_paragraph(lines, i)
|
|
203
|
+
blocks.append(block)
|
|
204
|
+
|
|
205
|
+
return blocks
|
|
206
|
+
|
|
207
|
+
def _parse_inline(self, text: str) -> list[InlineStyle]:
|
|
208
|
+
"""Parse inline formatting in text.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
text: Text to parse
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
List of InlineStyle objects
|
|
215
|
+
"""
|
|
216
|
+
if not text:
|
|
217
|
+
return []
|
|
218
|
+
|
|
219
|
+
# Simple approach: split by formatting markers and track state
|
|
220
|
+
result: list[InlineStyle] = []
|
|
221
|
+
|
|
222
|
+
# First, handle links and images specially
|
|
223
|
+
# Then handle bold, italic, strikethrough, code
|
|
224
|
+
|
|
225
|
+
# For simplicity, use a token-based approach
|
|
226
|
+
segments = self._tokenize_inline(text)
|
|
227
|
+
return segments
|
|
228
|
+
|
|
229
|
+
def _tokenize_inline(self, text: str) -> list[InlineStyle]:
|
|
230
|
+
"""Tokenize inline text into styled segments.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
text: Text to tokenize
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
List of InlineStyle objects
|
|
237
|
+
"""
|
|
238
|
+
if not text:
|
|
239
|
+
return []
|
|
240
|
+
|
|
241
|
+
result: list[InlineStyle] = []
|
|
242
|
+
pos = 0
|
|
243
|
+
|
|
244
|
+
while pos < len(text):
|
|
245
|
+
# Check for inline code (highest priority, doesn't nest)
|
|
246
|
+
code_match = self.INLINE_CODE_PATTERN.match(text, pos)
|
|
247
|
+
if code_match:
|
|
248
|
+
result.append(InlineStyle(text=code_match.group(1), code=True))
|
|
249
|
+
pos = code_match.end()
|
|
250
|
+
continue
|
|
251
|
+
|
|
252
|
+
# Check for link
|
|
253
|
+
link_match = self.LINK_PATTERN.match(text, pos)
|
|
254
|
+
if link_match:
|
|
255
|
+
link_text = link_match.group(1)
|
|
256
|
+
link_url = link_match.group(2)
|
|
257
|
+
result.append(InlineStyle(text=link_text, link=link_url))
|
|
258
|
+
pos = link_match.end()
|
|
259
|
+
continue
|
|
260
|
+
|
|
261
|
+
# Check for bold
|
|
262
|
+
bold_match = self.BOLD_PATTERN.match(text, pos)
|
|
263
|
+
if bold_match:
|
|
264
|
+
content = bold_match.group(1) or bold_match.group(2)
|
|
265
|
+
result.append(InlineStyle(text=content, bold=True))
|
|
266
|
+
pos = bold_match.end()
|
|
267
|
+
continue
|
|
268
|
+
|
|
269
|
+
# Check for strikethrough
|
|
270
|
+
strike_match = self.STRIKETHROUGH_PATTERN.match(text, pos)
|
|
271
|
+
if strike_match:
|
|
272
|
+
result.append(InlineStyle(text=strike_match.group(1), strikethrough=True))
|
|
273
|
+
pos = strike_match.end()
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
# Check for italic (must be after bold check)
|
|
277
|
+
italic_match = self.ITALIC_PATTERN.match(text, pos)
|
|
278
|
+
if italic_match:
|
|
279
|
+
content = italic_match.group(1) or italic_match.group(2)
|
|
280
|
+
result.append(InlineStyle(text=content, italic=True))
|
|
281
|
+
pos = italic_match.end()
|
|
282
|
+
continue
|
|
283
|
+
|
|
284
|
+
# Regular text - find next special character
|
|
285
|
+
next_special = len(text)
|
|
286
|
+
for pattern in [r"\*", r"_", r"`", r"\[", r"~"]:
|
|
287
|
+
match = re.search(pattern, text[pos + 1:])
|
|
288
|
+
if match:
|
|
289
|
+
next_special = min(next_special, pos + 1 + match.start())
|
|
290
|
+
|
|
291
|
+
plain_text = text[pos:next_special]
|
|
292
|
+
if plain_text:
|
|
293
|
+
result.append(InlineStyle(text=plain_text))
|
|
294
|
+
pos = next_special
|
|
295
|
+
|
|
296
|
+
# Merge adjacent plain text segments
|
|
297
|
+
merged: list[InlineStyle] = []
|
|
298
|
+
for segment in result:
|
|
299
|
+
if (merged and
|
|
300
|
+
not segment.bold and not segment.italic and
|
|
301
|
+
not segment.strikethrough and not segment.code and
|
|
302
|
+
not segment.link and
|
|
303
|
+
not merged[-1].bold and not merged[-1].italic and
|
|
304
|
+
not merged[-1].strikethrough and not merged[-1].code and
|
|
305
|
+
not merged[-1].link):
|
|
306
|
+
merged[-1] = InlineStyle(text=merged[-1].text + segment.text)
|
|
307
|
+
else:
|
|
308
|
+
merged.append(segment)
|
|
309
|
+
|
|
310
|
+
return merged if merged else [InlineStyle(text=text)]
|
|
311
|
+
|
|
312
|
+
def _parse_code_block(
|
|
313
|
+
self, lines: list[str], start: int
|
|
314
|
+
) -> tuple[MarkdownBlock, int]:
|
|
315
|
+
"""Parse a fenced code block.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
lines: All lines
|
|
319
|
+
start: Starting line index
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
Tuple of (MarkdownBlock, next line index)
|
|
323
|
+
"""
|
|
324
|
+
match = self.CODE_BLOCK_START.match(lines[start])
|
|
325
|
+
language = match.group(1) if match else ""
|
|
326
|
+
|
|
327
|
+
code_lines = []
|
|
328
|
+
i = start + 1
|
|
329
|
+
while i < len(lines):
|
|
330
|
+
if self.CODE_BLOCK_END.match(lines[i]):
|
|
331
|
+
i += 1
|
|
332
|
+
break
|
|
333
|
+
code_lines.append(lines[i])
|
|
334
|
+
i += 1
|
|
335
|
+
|
|
336
|
+
return (
|
|
337
|
+
MarkdownBlock(
|
|
338
|
+
block_type=BlockType.CODE_BLOCK,
|
|
339
|
+
content="\n".join(code_lines),
|
|
340
|
+
metadata={"language": language or "plain text"},
|
|
341
|
+
),
|
|
342
|
+
i,
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
def _parse_quote(
|
|
346
|
+
self, lines: list[str], start: int
|
|
347
|
+
) -> tuple[MarkdownBlock, int]:
|
|
348
|
+
"""Parse a blockquote.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
lines: All lines
|
|
352
|
+
start: Starting line index
|
|
353
|
+
|
|
354
|
+
Returns:
|
|
355
|
+
Tuple of (MarkdownBlock, next line index)
|
|
356
|
+
"""
|
|
357
|
+
quote_lines = []
|
|
358
|
+
i = start
|
|
359
|
+
while i < len(lines):
|
|
360
|
+
match = self.QUOTE_PATTERN.match(lines[i])
|
|
361
|
+
if match:
|
|
362
|
+
quote_lines.append(match.group(1))
|
|
363
|
+
i += 1
|
|
364
|
+
else:
|
|
365
|
+
break
|
|
366
|
+
|
|
367
|
+
content = " ".join(quote_lines)
|
|
368
|
+
return (
|
|
369
|
+
MarkdownBlock(
|
|
370
|
+
block_type=BlockType.QUOTE,
|
|
371
|
+
content=self._parse_inline(content),
|
|
372
|
+
),
|
|
373
|
+
i,
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
def _parse_callout(
|
|
377
|
+
self, lines: list[str], start: int, callout_type: str
|
|
378
|
+
) -> tuple[MarkdownBlock, int]:
|
|
379
|
+
"""Parse a GitHub-style callout.
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
lines: All lines
|
|
383
|
+
start: Starting line index
|
|
384
|
+
callout_type: Type of callout (NOTE, WARNING, etc.)
|
|
385
|
+
|
|
386
|
+
Returns:
|
|
387
|
+
Tuple of (MarkdownBlock, next line index)
|
|
388
|
+
"""
|
|
389
|
+
callout_lines = []
|
|
390
|
+
i = start + 1
|
|
391
|
+
while i < len(lines):
|
|
392
|
+
match = self.QUOTE_PATTERN.match(lines[i])
|
|
393
|
+
if match:
|
|
394
|
+
callout_lines.append(match.group(1))
|
|
395
|
+
i += 1
|
|
396
|
+
else:
|
|
397
|
+
break
|
|
398
|
+
|
|
399
|
+
content = " ".join(callout_lines)
|
|
400
|
+
icon_map = {
|
|
401
|
+
"note": "info",
|
|
402
|
+
"warning": "warning",
|
|
403
|
+
"tip": "lightbulb",
|
|
404
|
+
"important": "star",
|
|
405
|
+
"caution": "warning",
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
return (
|
|
409
|
+
MarkdownBlock(
|
|
410
|
+
block_type=BlockType.CALLOUT,
|
|
411
|
+
content=self._parse_inline(content),
|
|
412
|
+
metadata={
|
|
413
|
+
"type": callout_type.lower(),
|
|
414
|
+
"icon": icon_map.get(callout_type.lower(), "info"),
|
|
415
|
+
},
|
|
416
|
+
),
|
|
417
|
+
i,
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
def _parse_list(
|
|
421
|
+
self, lines: list[str], start: int, is_numbered: bool
|
|
422
|
+
) -> tuple[list[MarkdownBlock], int]:
|
|
423
|
+
"""Parse a list (bulleted or numbered).
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
lines: All lines
|
|
427
|
+
start: Starting line index
|
|
428
|
+
is_numbered: True for numbered lists
|
|
429
|
+
|
|
430
|
+
Returns:
|
|
431
|
+
Tuple of (list of MarkdownBlocks, next line index)
|
|
432
|
+
"""
|
|
433
|
+
blocks: list[MarkdownBlock] = []
|
|
434
|
+
pattern = self.NUMBERED_LIST_PATTERN if is_numbered else self.BULLETED_LIST_PATTERN
|
|
435
|
+
block_type = BlockType.NUMBERED_LIST if is_numbered else BlockType.BULLETED_LIST
|
|
436
|
+
|
|
437
|
+
i = start
|
|
438
|
+
while i < len(lines):
|
|
439
|
+
line = lines[i]
|
|
440
|
+
if not line.strip():
|
|
441
|
+
i += 1
|
|
442
|
+
break
|
|
443
|
+
|
|
444
|
+
match = pattern.match(line)
|
|
445
|
+
if match:
|
|
446
|
+
text = match.group(2)
|
|
447
|
+
blocks.append(MarkdownBlock(
|
|
448
|
+
block_type=block_type,
|
|
449
|
+
content=self._parse_inline(text),
|
|
450
|
+
))
|
|
451
|
+
i += 1
|
|
452
|
+
else:
|
|
453
|
+
break
|
|
454
|
+
|
|
455
|
+
return blocks, i
|
|
456
|
+
|
|
457
|
+
def _parse_table(
|
|
458
|
+
self, lines: list[str], start: int
|
|
459
|
+
) -> tuple[MarkdownBlock, int]:
|
|
460
|
+
"""Parse a Markdown table.
|
|
461
|
+
|
|
462
|
+
Args:
|
|
463
|
+
lines: All lines
|
|
464
|
+
start: Starting line index
|
|
465
|
+
|
|
466
|
+
Returns:
|
|
467
|
+
Tuple of (MarkdownBlock, next line index)
|
|
468
|
+
"""
|
|
469
|
+
rows: list[list[str]] = []
|
|
470
|
+
i = start
|
|
471
|
+
|
|
472
|
+
# Parse header row
|
|
473
|
+
header_match = self.TABLE_ROW_PATTERN.match(lines[i])
|
|
474
|
+
if header_match:
|
|
475
|
+
cells = [c.strip() for c in header_match.group(1).split("|")]
|
|
476
|
+
rows.append(cells)
|
|
477
|
+
i += 1
|
|
478
|
+
|
|
479
|
+
# Skip separator row
|
|
480
|
+
if i < len(lines) and self.TABLE_SEPARATOR_PATTERN.match(lines[i]):
|
|
481
|
+
i += 1
|
|
482
|
+
|
|
483
|
+
# Parse data rows
|
|
484
|
+
while i < len(lines):
|
|
485
|
+
row_match = self.TABLE_ROW_PATTERN.match(lines[i])
|
|
486
|
+
if row_match:
|
|
487
|
+
cells = [c.strip() for c in row_match.group(1).split("|")]
|
|
488
|
+
rows.append(cells)
|
|
489
|
+
i += 1
|
|
490
|
+
else:
|
|
491
|
+
break
|
|
492
|
+
|
|
493
|
+
return (
|
|
494
|
+
MarkdownBlock(
|
|
495
|
+
block_type=BlockType.TABLE,
|
|
496
|
+
metadata={
|
|
497
|
+
"rows": rows,
|
|
498
|
+
"has_header": True,
|
|
499
|
+
"column_count": len(rows[0]) if rows else 0,
|
|
500
|
+
},
|
|
501
|
+
),
|
|
502
|
+
i,
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
def _parse_paragraph(
|
|
506
|
+
self, lines: list[str], start: int
|
|
507
|
+
) -> tuple[MarkdownBlock, int]:
|
|
508
|
+
"""Parse a paragraph.
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
lines: All lines
|
|
512
|
+
start: Starting line index
|
|
513
|
+
|
|
514
|
+
Returns:
|
|
515
|
+
Tuple of (MarkdownBlock, next line index)
|
|
516
|
+
"""
|
|
517
|
+
para_lines = []
|
|
518
|
+
i = start
|
|
519
|
+
|
|
520
|
+
while i < len(lines):
|
|
521
|
+
line = lines[i]
|
|
522
|
+
# Stop at empty line or new block-level element
|
|
523
|
+
if not line.strip():
|
|
524
|
+
break
|
|
525
|
+
if self.HEADING_PATTERN.match(line):
|
|
526
|
+
break
|
|
527
|
+
if self.CODE_BLOCK_START.match(line):
|
|
528
|
+
break
|
|
529
|
+
if self.BULLETED_LIST_PATTERN.match(line):
|
|
530
|
+
break
|
|
531
|
+
if self.NUMBERED_LIST_PATTERN.match(line):
|
|
532
|
+
break
|
|
533
|
+
if self.QUOTE_PATTERN.match(line):
|
|
534
|
+
break
|
|
535
|
+
if self.DIVIDER_PATTERN.match(line.strip()):
|
|
536
|
+
break
|
|
537
|
+
if self.TODO_PATTERN.match(line):
|
|
538
|
+
break
|
|
539
|
+
# Stop at image lines to allow them to be parsed as IMAGE blocks
|
|
540
|
+
if line.strip().startswith("![") and self.IMAGE_PATTERN.search(line):
|
|
541
|
+
break
|
|
542
|
+
|
|
543
|
+
para_lines.append(line)
|
|
544
|
+
i += 1
|
|
545
|
+
|
|
546
|
+
# Handle trailing double spaces as line breaks
|
|
547
|
+
processed_lines = []
|
|
548
|
+
for line in para_lines:
|
|
549
|
+
if line.endswith(" "):
|
|
550
|
+
processed_lines.append(line.rstrip() + "\n")
|
|
551
|
+
else:
|
|
552
|
+
processed_lines.append(line + " ")
|
|
553
|
+
content = "".join(processed_lines).strip()
|
|
554
|
+
|
|
555
|
+
return (
|
|
556
|
+
MarkdownBlock(
|
|
557
|
+
block_type=BlockType.PARAGRAPH,
|
|
558
|
+
content=self._parse_inline(content),
|
|
559
|
+
),
|
|
560
|
+
i,
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
def main():
|
|
565
|
+
"""Test the Markdown parser."""
|
|
566
|
+
test_md = """# Heading 1
|
|
567
|
+
|
|
568
|
+
This is a paragraph with **bold** and *italic* text.
|
|
569
|
+
|
|
570
|
+
## Heading 2
|
|
571
|
+
|
|
572
|
+
- Bullet item 1
|
|
573
|
+
- Bullet item 2
|
|
574
|
+
|
|
575
|
+
1. Numbered item 1
|
|
576
|
+
2. Numbered item 2
|
|
577
|
+
|
|
578
|
+
```python
|
|
579
|
+
def hello():
|
|
580
|
+
print("Hello, World!")
|
|
581
|
+
```
|
|
582
|
+
|
|
583
|
+
> This is a quote
|
|
584
|
+
|
|
585
|
+
> [!NOTE]
|
|
586
|
+
> This is a callout
|
|
587
|
+
|
|
588
|
+
| Col1 | Col2 |
|
|
589
|
+
|------|------|
|
|
590
|
+
| A | B |
|
|
591
|
+
|
|
592
|
+
- [x] Completed task
|
|
593
|
+
- [ ] Pending task
|
|
594
|
+
|
|
595
|
+

|
|
596
|
+
|
|
597
|
+
---
|
|
598
|
+
"""
|
|
599
|
+
parser = MarkdownParser()
|
|
600
|
+
blocks = parser.parse(test_md)
|
|
601
|
+
|
|
602
|
+
for block in blocks:
|
|
603
|
+
print(f"{block.block_type.name}: {block.content[:50] if isinstance(block.content, str) else len(block.content)} items")
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
if __name__ == "__main__":
|
|
607
|
+
main()
|