markdown-analysis 0.1.1__tar.gz → 0.1.3__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/PKG-INFO +1 -1
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/PKG-INFO +1 -1
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/mrkdwn_analysis/markdown_analyzer.py +82 -4
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/setup.py +1 -1
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/LICENSE +0 -0
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/README.md +0 -0
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/SOURCES.txt +0 -0
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/dependency_links.txt +0 -0
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/requires.txt +0 -0
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/top_level.txt +0 -0
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/mrkdwn_analysis/__init__.py +0 -0
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/setup.cfg +0 -0
- {markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/test/__init__.py +0 -0
@@ -284,19 +284,24 @@ class MarkdownParser:
|
|
284
284
|
self.pos = self.length
|
285
285
|
|
286
286
|
def parse_fenced_code_block(self, lang):
|
287
|
+
initial_line = self.pos
|
288
|
+
initial_indent = len(self.lines[self.pos]) - len(self.lines[self.pos].lstrip())
|
289
|
+
fence_marker = self.lines[self.pos].strip()[:3] # Get ``` or ~~~
|
287
290
|
self.pos += 1
|
288
291
|
start = self.pos
|
292
|
+
|
289
293
|
while self.pos < self.length:
|
290
294
|
line = self.lines[self.pos]
|
291
|
-
if line.strip()
|
295
|
+
if line.strip() == fence_marker:
|
292
296
|
content = "\n".join(self.lines[start:self.pos])
|
293
297
|
self.tokens.append(BlockToken('code', content=content, meta={"language": lang}, line=start+1))
|
294
298
|
self.pos += 1
|
295
299
|
return
|
296
300
|
self.pos += 1
|
297
|
-
|
298
|
-
|
299
|
-
self.pos =
|
301
|
+
|
302
|
+
# If we reach here, we didn't find the closing fence
|
303
|
+
self.pos = initial_line # Reset position if fence not found
|
304
|
+
raise ValueError(f"Unclosed code fence starting at line {initial_line + 1}")
|
300
305
|
|
301
306
|
def parse_blockquote(self):
|
302
307
|
start = self.pos
|
@@ -553,3 +558,76 @@ class MarkdownAnalyzer:
|
|
553
558
|
'characters': self.count_characters()
|
554
559
|
}
|
555
560
|
return analysis
|
561
|
+
|
562
|
+
# =================== SUPPORT MDX ===================
|
563
|
+
|
564
|
+
class MDXBlockToken(BlockToken):
|
565
|
+
def __init__(self, type_, content="", level=None, meta=None, line=None):
|
566
|
+
super().__init__(type_, content, level, meta, line)
|
567
|
+
|
568
|
+
class MDXMarkdownParser(MarkdownParser):
|
569
|
+
JSX_IMPORT_RE = re.compile(r'^import\s+.*?\s+from\s+["\'](.*?)["\'];?\s*$')
|
570
|
+
JSX_COMPONENT_START_RE = re.compile(r'^<([A-Z][A-Za-z0-9]*|[a-z]+\.[A-Z][A-Za-z0-9]*).*?(?:>|\/>)$')
|
571
|
+
JSX_COMPONENT_END_RE = re.compile(r'^</([A-Z][A-Za-z0-9]*|[a-z]+\.[A-Z][A-Za-z0-9]*)>$')
|
572
|
+
|
573
|
+
def __init__(self, text):
|
574
|
+
super().__init__(text)
|
575
|
+
self.in_jsx_block = False
|
576
|
+
self.current_jsx_content = []
|
577
|
+
self.jsx_start_line = None
|
578
|
+
|
579
|
+
def handle_potential_hanging(self):
|
580
|
+
if self.pos >= self.length:
|
581
|
+
return False
|
582
|
+
line = self.lines[self.pos].strip()
|
583
|
+
if '</TabItem>' in line or '</Tabs>' in line:
|
584
|
+
self.pos += 1
|
585
|
+
return True
|
586
|
+
return False
|
587
|
+
|
588
|
+
def parse_fenced_code_block(self, lang):
|
589
|
+
initial_line = self.pos
|
590
|
+
self.pos += 1
|
591
|
+
content = []
|
592
|
+
|
593
|
+
while self.pos < self.length:
|
594
|
+
line = self.lines[self.pos]
|
595
|
+
if line.strip() == '```':
|
596
|
+
if content:
|
597
|
+
# Preserve proper indentation
|
598
|
+
base_indent = min(len(line) - len(line.lstrip())
|
599
|
+
for line in content if line.strip())
|
600
|
+
clean_content = []
|
601
|
+
for line in content:
|
602
|
+
if line.strip():
|
603
|
+
clean_content.append(' ' + line[base_indent:])
|
604
|
+
self.tokens.append(BlockToken('code',
|
605
|
+
content='\n'.join(clean_content),
|
606
|
+
meta={"language": lang.strip(), "code_type": "fenced"},
|
607
|
+
line=initial_line + 1))
|
608
|
+
self.pos += 1
|
609
|
+
return
|
610
|
+
content.append(line)
|
611
|
+
self.pos += 1
|
612
|
+
|
613
|
+
def parse(self):
|
614
|
+
self.tokens = []
|
615
|
+
while self.pos < self.length:
|
616
|
+
line = self.lines[self.pos].strip()
|
617
|
+
if self.FENCE_RE.match(line):
|
618
|
+
lang = self.FENCE_RE.match(line).group(1)
|
619
|
+
self.parse_fenced_code_block(lang)
|
620
|
+
continue
|
621
|
+
self.pos += 1
|
622
|
+
return self.tokens
|
623
|
+
|
624
|
+
class MDXMarkdownAnalyzer(MarkdownAnalyzer):
|
625
|
+
def __init__(self, file_path, encoding='utf-8'):
|
626
|
+
with open(file_path, 'r', encoding=encoding) as f:
|
627
|
+
self.text = f.read()
|
628
|
+
parser = MDXMarkdownParser(self.text)
|
629
|
+
self.tokens = parser.parse()
|
630
|
+
self.references = parser.references
|
631
|
+
self.footnotes = parser.footnotes
|
632
|
+
self.inline_parser = InlineParser(references=self.references, footnotes=self.footnotes)
|
633
|
+
self._parse_inline_tokens()
|
File without changes
|
File without changes
|
File without changes
|
{markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
{markdown_analysis-0.1.1 → markdown_analysis-0.1.3}/markdown_analysis.egg-info/top_level.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|