markdown-analysis 0.1.2__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {markdown_analysis-0.1.2 → markdown_analysis-0.1.3}/PKG-INFO +1 -1
- {markdown_analysis-0.1.2 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/PKG-INFO +1 -1
- {markdown_analysis-0.1.2 → markdown_analysis-0.1.3}/mrkdwn_analysis/markdown_analyzer.py +73 -0
- {markdown_analysis-0.1.2 → markdown_analysis-0.1.3}/setup.py +1 -1
- {markdown_analysis-0.1.2 → markdown_analysis-0.1.3}/LICENSE +0 -0
- {markdown_analysis-0.1.2 → markdown_analysis-0.1.3}/README.md +0 -0
- {markdown_analysis-0.1.2 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/SOURCES.txt +0 -0
- {markdown_analysis-0.1.2 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/dependency_links.txt +0 -0
- {markdown_analysis-0.1.2 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/requires.txt +0 -0
- {markdown_analysis-0.1.2 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/top_level.txt +0 -0
- {markdown_analysis-0.1.2 → markdown_analysis-0.1.3}/mrkdwn_analysis/__init__.py +0 -0
- {markdown_analysis-0.1.2 → markdown_analysis-0.1.3}/setup.cfg +0 -0
- {markdown_analysis-0.1.2 → markdown_analysis-0.1.3}/test/__init__.py +0 -0
@@ -558,3 +558,76 @@ class MarkdownAnalyzer:
|
|
558
558
|
'characters': self.count_characters()
|
559
559
|
}
|
560
560
|
return analysis
|
561
|
+
|
562
|
+
# =================== SUPPORT MDX ===================
|
563
|
+
|
564
|
+
class MDXBlockToken(BlockToken):
|
565
|
+
def __init__(self, type_, content="", level=None, meta=None, line=None):
|
566
|
+
super().__init__(type_, content, level, meta, line)
|
567
|
+
|
568
|
+
class MDXMarkdownParser(MarkdownParser):
|
569
|
+
JSX_IMPORT_RE = re.compile(r'^import\s+.*?\s+from\s+["\'](.*?)["\'];?\s*$')
|
570
|
+
JSX_COMPONENT_START_RE = re.compile(r'^<([A-Z][A-Za-z0-9]*|[a-z]+\.[A-Z][A-Za-z0-9]*).*?(?:>|\/>)$')
|
571
|
+
JSX_COMPONENT_END_RE = re.compile(r'^</([A-Z][A-Za-z0-9]*|[a-z]+\.[A-Z][A-Za-z0-9]*)>$')
|
572
|
+
|
573
|
+
def __init__(self, text):
|
574
|
+
super().__init__(text)
|
575
|
+
self.in_jsx_block = False
|
576
|
+
self.current_jsx_content = []
|
577
|
+
self.jsx_start_line = None
|
578
|
+
|
579
|
+
def handle_potential_hanging(self):
|
580
|
+
if self.pos >= self.length:
|
581
|
+
return False
|
582
|
+
line = self.lines[self.pos].strip()
|
583
|
+
if '</TabItem>' in line or '</Tabs>' in line:
|
584
|
+
self.pos += 1
|
585
|
+
return True
|
586
|
+
return False
|
587
|
+
|
588
|
+
def parse_fenced_code_block(self, lang):
|
589
|
+
initial_line = self.pos
|
590
|
+
self.pos += 1
|
591
|
+
content = []
|
592
|
+
|
593
|
+
while self.pos < self.length:
|
594
|
+
line = self.lines[self.pos]
|
595
|
+
if line.strip() == '```':
|
596
|
+
if content:
|
597
|
+
# Preserve proper indentation
|
598
|
+
base_indent = min(len(line) - len(line.lstrip())
|
599
|
+
for line in content if line.strip())
|
600
|
+
clean_content = []
|
601
|
+
for line in content:
|
602
|
+
if line.strip():
|
603
|
+
clean_content.append(' ' + line[base_indent:])
|
604
|
+
self.tokens.append(BlockToken('code',
|
605
|
+
content='\n'.join(clean_content),
|
606
|
+
meta={"language": lang.strip(), "code_type": "fenced"},
|
607
|
+
line=initial_line + 1))
|
608
|
+
self.pos += 1
|
609
|
+
return
|
610
|
+
content.append(line)
|
611
|
+
self.pos += 1
|
612
|
+
|
613
|
+
def parse(self):
|
614
|
+
self.tokens = []
|
615
|
+
while self.pos < self.length:
|
616
|
+
line = self.lines[self.pos].strip()
|
617
|
+
if self.FENCE_RE.match(line):
|
618
|
+
lang = self.FENCE_RE.match(line).group(1)
|
619
|
+
self.parse_fenced_code_block(lang)
|
620
|
+
continue
|
621
|
+
self.pos += 1
|
622
|
+
return self.tokens
|
623
|
+
|
624
|
+
class MDXMarkdownAnalyzer(MarkdownAnalyzer):
|
625
|
+
def __init__(self, file_path, encoding='utf-8'):
|
626
|
+
with open(file_path, 'r', encoding=encoding) as f:
|
627
|
+
self.text = f.read()
|
628
|
+
parser = MDXMarkdownParser(self.text)
|
629
|
+
self.tokens = parser.parse()
|
630
|
+
self.references = parser.references
|
631
|
+
self.footnotes = parser.footnotes
|
632
|
+
self.inline_parser = InlineParser(references=self.references, footnotes=self.footnotes)
|
633
|
+
self._parse_inline_tokens()
|
File without changes
|
File without changes
|
File without changes
|
{markdown_analysis-0.1.2 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
{markdown_analysis-0.1.2 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/top_level.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|