markdown-analysis 0.1.1__tar.gz → 0.1.3__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: markdown_analysis
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: UNKNOWN
5
5
  Home-page: https://github.com/yannbanas/mrkdwn_analysis
6
6
  Author: yannbanas
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: markdown-analysis
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: UNKNOWN
5
5
  Home-page: https://github.com/yannbanas/mrkdwn_analysis
6
6
  Author: yannbanas
@@ -284,19 +284,24 @@ class MarkdownParser:
284
284
  self.pos = self.length
285
285
 
286
286
  def parse_fenced_code_block(self, lang):
287
+ initial_line = self.pos
288
+ initial_indent = len(self.lines[self.pos]) - len(self.lines[self.pos].lstrip())
289
+ fence_marker = self.lines[self.pos].strip()[:3] # Get ``` or ~~~
287
290
  self.pos += 1
288
291
  start = self.pos
292
+
289
293
  while self.pos < self.length:
290
294
  line = self.lines[self.pos]
291
- if line.strip().startswith('```'):
295
+ if line.strip() == fence_marker:
292
296
  content = "\n".join(self.lines[start:self.pos])
293
297
  self.tokens.append(BlockToken('code', content=content, meta={"language": lang}, line=start+1))
294
298
  self.pos += 1
295
299
  return
296
300
  self.pos += 1
297
- content = "\n".join(self.lines[start:])
298
- self.tokens.append(BlockToken('code', content=content, meta={"language": lang}, line=start+1))
299
- self.pos = self.length
301
+
302
+ # If we reach here, we didn't find the closing fence
303
+ self.pos = initial_line # Reset position if fence not found
304
+ raise ValueError(f"Unclosed code fence starting at line {initial_line + 1}")
300
305
 
301
306
  def parse_blockquote(self):
302
307
  start = self.pos
@@ -553,3 +558,76 @@ class MarkdownAnalyzer:
553
558
  'characters': self.count_characters()
554
559
  }
555
560
  return analysis
561
+
562
+ # =================== SUPPORT MDX ===================
563
+
564
+ class MDXBlockToken(BlockToken):
565
+ def __init__(self, type_, content="", level=None, meta=None, line=None):
566
+ super().__init__(type_, content, level, meta, line)
567
+
568
+ class MDXMarkdownParser(MarkdownParser):
569
+ JSX_IMPORT_RE = re.compile(r'^import\s+.*?\s+from\s+["\'](.*?)["\'];?\s*$')
570
+ JSX_COMPONENT_START_RE = re.compile(r'^<([A-Z][A-Za-z0-9]*|[a-z]+\.[A-Z][A-Za-z0-9]*).*?(?:>|\/>)$')
571
+ JSX_COMPONENT_END_RE = re.compile(r'^</([A-Z][A-Za-z0-9]*|[a-z]+\.[A-Z][A-Za-z0-9]*)>$')
572
+
573
+ def __init__(self, text):
574
+ super().__init__(text)
575
+ self.in_jsx_block = False
576
+ self.current_jsx_content = []
577
+ self.jsx_start_line = None
578
+
579
+ def handle_potential_hanging(self):
580
+ if self.pos >= self.length:
581
+ return False
582
+ line = self.lines[self.pos].strip()
583
+ if '</TabItem>' in line or '</Tabs>' in line:
584
+ self.pos += 1
585
+ return True
586
+ return False
587
+
588
+ def parse_fenced_code_block(self, lang):
589
+ initial_line = self.pos
590
+ self.pos += 1
591
+ content = []
592
+
593
+ while self.pos < self.length:
594
+ line = self.lines[self.pos]
595
+ if line.strip() == '```':
596
+ if content:
597
+ # Preserve proper indentation
598
+ base_indent = min(len(line) - len(line.lstrip())
599
+ for line in content if line.strip())
600
+ clean_content = []
601
+ for line in content:
602
+ if line.strip():
603
+ clean_content.append(' ' + line[base_indent:])
604
+ self.tokens.append(BlockToken('code',
605
+ content='\n'.join(clean_content),
606
+ meta={"language": lang.strip(), "code_type": "fenced"},
607
+ line=initial_line + 1))
608
+ self.pos += 1
609
+ return
610
+ content.append(line)
611
+ self.pos += 1
612
+
613
+ def parse(self):
614
+ self.tokens = []
615
+ while self.pos < self.length:
616
+ line = self.lines[self.pos].strip()
617
+ if self.FENCE_RE.match(line):
618
+ lang = self.FENCE_RE.match(line).group(1)
619
+ self.parse_fenced_code_block(lang)
620
+ continue
621
+ self.pos += 1
622
+ return self.tokens
623
+
624
+ class MDXMarkdownAnalyzer(MarkdownAnalyzer):
625
+ def __init__(self, file_path, encoding='utf-8'):
626
+ with open(file_path, 'r', encoding=encoding) as f:
627
+ self.text = f.read()
628
+ parser = MDXMarkdownParser(self.text)
629
+ self.tokens = parser.parse()
630
+ self.references = parser.references
631
+ self.footnotes = parser.footnotes
632
+ self.inline_parser = InlineParser(references=self.references, footnotes=self.footnotes)
633
+ self._parse_inline_tokens()
@@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
6
6
 
7
7
  setup(
8
8
  name='markdown_analysis',
9
- version='0.1.1',
9
+ version='0.1.3',
10
10
  long_description=long_description,
11
11
  long_description_content_type="text/markdown",
12
12
  author='yannbanas',