markdown-analysis 0.1.1__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/PKG-INFO +1 -1
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/PKG-INFO +1 -1
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/mrkdwn_analysis/markdown_analyzer.py +82 -4
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/setup.py +1 -1
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/LICENSE +0 -0
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/README.md +0 -0
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/SOURCES.txt +0 -0
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/dependency_links.txt +0 -0
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/requires.txt +0 -0
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/top_level.txt +0 -0
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/mrkdwn_analysis/__init__.py +0 -0
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/setup.cfg +0 -0
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/test/__init__.py +0 -0
@@ -284,19 +284,24 @@ class MarkdownParser:
|
|
284
284
|
self.pos = self.length
|
285
285
|
|
286
286
|
def parse_fenced_code_block(self, lang):
|
287
|
+
initial_line = self.pos
|
288
|
+
initial_indent = len(self.lines[self.pos]) - len(self.lines[self.pos].lstrip())
|
289
|
+
fence_marker = self.lines[self.pos].strip()[:3] # Get ``` or ~~~
|
287
290
|
self.pos += 1
|
288
291
|
start = self.pos
|
292
|
+
|
289
293
|
while self.pos < self.length:
|
290
294
|
line = self.lines[self.pos]
|
291
|
-
if line.strip()
|
295
|
+
if line.strip() == fence_marker:
|
292
296
|
content = "\n".join(self.lines[start:self.pos])
|
293
297
|
self.tokens.append(BlockToken('code', content=content, meta={"language": lang}, line=start+1))
|
294
298
|
self.pos += 1
|
295
299
|
return
|
296
300
|
self.pos += 1
|
297
|
-
|
298
|
-
|
299
|
-
self.pos =
|
301
|
+
|
302
|
+
# If we reach here, we didn't find the closing fence
|
303
|
+
self.pos = initial_line # Reset position if fence not found
|
304
|
+
raise ValueError(f"Unclosed code fence starting at line {initial_line + 1}")
|
300
305
|
|
301
306
|
def parse_blockquote(self):
|
302
307
|
start = self.pos
|
@@ -553,3 +558,76 @@ class MarkdownAnalyzer:
|
|
553
558
|
'characters': self.count_characters()
|
554
559
|
}
|
555
560
|
return analysis
|
561
|
+
|
562
|
+
# =================== SUPPORT MDX ===================
|
563
|
+
|
564
|
+
class MDXBlockToken(BlockToken):
|
565
|
+
def __init__(self, type_, content="", level=None, meta=None, line=None):
|
566
|
+
super().__init__(type_, content, level, meta, line)
|
567
|
+
|
568
|
+
class MDXMarkdownParser(MarkdownParser):
|
569
|
+
JSX_IMPORT_RE = re.compile(r'^import\s+.*?\s+from\s+["\'](.*?)["\'];?\s*$')
|
570
|
+
JSX_COMPONENT_START_RE = re.compile(r'^<([A-Z][A-Za-z0-9]*|[a-z]+\.[A-Z][A-Za-z0-9]*).*?(?:>|\/>)$')
|
571
|
+
JSX_COMPONENT_END_RE = re.compile(r'^</([A-Z][A-Za-z0-9]*|[a-z]+\.[A-Z][A-Za-z0-9]*)>$')
|
572
|
+
|
573
|
+
def __init__(self, text):
|
574
|
+
super().__init__(text)
|
575
|
+
self.in_jsx_block = False
|
576
|
+
self.current_jsx_content = []
|
577
|
+
self.jsx_start_line = None
|
578
|
+
|
579
|
+
def handle_potential_hanging(self):
|
580
|
+
if self.pos >= self.length:
|
581
|
+
return False
|
582
|
+
line = self.lines[self.pos].strip()
|
583
|
+
if '</TabItem>' in line or '</Tabs>' in line:
|
584
|
+
self.pos += 1
|
585
|
+
return True
|
586
|
+
return False
|
587
|
+
|
588
|
+
def parse_fenced_code_block(self, lang):
|
589
|
+
initial_line = self.pos
|
590
|
+
self.pos += 1
|
591
|
+
content = []
|
592
|
+
|
593
|
+
while self.pos < self.length:
|
594
|
+
line = self.lines[self.pos]
|
595
|
+
if line.strip() == '```':
|
596
|
+
if content:
|
597
|
+
# Preserve proper indentation
|
598
|
+
base_indent = min(len(line) - len(line.lstrip())
|
599
|
+
for line in content if line.strip())
|
600
|
+
clean_content = []
|
601
|
+
for line in content:
|
602
|
+
if line.strip():
|
603
|
+
clean_content.append(' ' + line[base_indent:])
|
604
|
+
self.tokens.append(BlockToken('code',
|
605
|
+
content='\n'.join(clean_content),
|
606
|
+
meta={"language": lang.strip(), "code_type": "fenced"},
|
607
|
+
line=initial_line + 1))
|
608
|
+
self.pos += 1
|
609
|
+
return
|
610
|
+
content.append(line)
|
611
|
+
self.pos += 1
|
612
|
+
|
613
|
+
def parse(self):
|
614
|
+
self.tokens = []
|
615
|
+
while self.pos < self.length:
|
616
|
+
line = self.lines[self.pos].strip()
|
617
|
+
if self.FENCE_RE.match(line):
|
618
|
+
lang = self.FENCE_RE.match(line).group(1)
|
619
|
+
self.parse_fenced_code_block(lang)
|
620
|
+
continue
|
621
|
+
self.pos += 1
|
622
|
+
return self.tokens
|
623
|
+
|
624
|
+
class MDXMarkdownAnalyzer(MarkdownAnalyzer):
|
625
|
+
def __init__(self, file_path, encoding='utf-8'):
|
626
|
+
with open(file_path, 'r', encoding=encoding) as f:
|
627
|
+
self.text = f.read()
|
628
|
+
parser = MDXMarkdownParser(self.text)
|
629
|
+
self.tokens = parser.parse()
|
630
|
+
self.references = parser.references
|
631
|
+
self.footnotes = parser.footnotes
|
632
|
+
self.inline_parser = InlineParser(references=self.references, footnotes=self.footnotes)
|
633
|
+
self._parse_inline_tokens()
|
File without changes
|
File without changes
|
File without changes
|
{markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
{markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/top_level.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|