markdown-analysis 0.1.2__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: markdown_analysis
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: UNKNOWN
5
5
  Home-page: https://github.com/yannbanas/mrkdwn_analysis
6
6
  Author: yannbanas
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: markdown-analysis
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: UNKNOWN
5
5
  Home-page: https://github.com/yannbanas/mrkdwn_analysis
6
6
  Author: yannbanas
@@ -558,3 +558,76 @@ class MarkdownAnalyzer:
558
558
  'characters': self.count_characters()
559
559
  }
560
560
  return analysis
561
+
562
+ # =================== SUPPORT MDX ===================
563
+
564
+ class MDXBlockToken(BlockToken):
565
+ def __init__(self, type_, content="", level=None, meta=None, line=None):
566
+ super().__init__(type_, content, level, meta, line)
567
+
568
+ class MDXMarkdownParser(MarkdownParser):
569
+ JSX_IMPORT_RE = re.compile(r'^import\s+.*?\s+from\s+["\'](.*?)["\'];?\s*$')
570
+ JSX_COMPONENT_START_RE = re.compile(r'^<([A-Z][A-Za-z0-9]*|[a-z]+\.[A-Z][A-Za-z0-9]*).*?(?:>|\/>)$')
571
+ JSX_COMPONENT_END_RE = re.compile(r'^</([A-Z][A-Za-z0-9]*|[a-z]+\.[A-Z][A-Za-z0-9]*)>$')
572
+
573
+ def __init__(self, text):
574
+ super().__init__(text)
575
+ self.in_jsx_block = False
576
+ self.current_jsx_content = []
577
+ self.jsx_start_line = None
578
+
579
+ def handle_potential_hanging(self):
580
+ if self.pos >= self.length:
581
+ return False
582
+ line = self.lines[self.pos].strip()
583
+ if '</TabItem>' in line or '</Tabs>' in line:
584
+ self.pos += 1
585
+ return True
586
+ return False
587
+
588
+ def parse_fenced_code_block(self, lang):
589
+ initial_line = self.pos
590
+ self.pos += 1
591
+ content = []
592
+
593
+ while self.pos < self.length:
594
+ line = self.lines[self.pos]
595
+ if line.strip() == '```':
596
+ if content:
597
+ # Preserve proper indentation
598
+ base_indent = min(len(line) - len(line.lstrip())
599
+ for line in content if line.strip())
600
+ clean_content = []
601
+ for line in content:
602
+ if line.strip():
603
+ clean_content.append(' ' + line[base_indent:])
604
+ self.tokens.append(BlockToken('code',
605
+ content='\n'.join(clean_content),
606
+ meta={"language": lang.strip(), "code_type": "fenced"},
607
+ line=initial_line + 1))
608
+ self.pos += 1
609
+ return
610
+ content.append(line)
611
+ self.pos += 1
612
+
613
+ def parse(self):
614
+ self.tokens = []
615
+ while self.pos < self.length:
616
+ line = self.lines[self.pos].strip()
617
+ if self.FENCE_RE.match(line):
618
+ lang = self.FENCE_RE.match(line).group(1)
619
+ self.parse_fenced_code_block(lang)
620
+ continue
621
+ self.pos += 1
622
+ return self.tokens
623
+
624
+ class MDXMarkdownAnalyzer(MarkdownAnalyzer):
625
+ def __init__(self, file_path, encoding='utf-8'):
626
+ with open(file_path, 'r', encoding=encoding) as f:
627
+ self.text = f.read()
628
+ parser = MDXMarkdownParser(self.text)
629
+ self.tokens = parser.parse()
630
+ self.references = parser.references
631
+ self.footnotes = parser.footnotes
632
+ self.inline_parser = InlineParser(references=self.references, footnotes=self.footnotes)
633
+ self._parse_inline_tokens()
@@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
6
6
 
7
7
  setup(
8
8
  name='markdown_analysis',
9
- version='0.1.2',
9
+ version='0.1.3',
10
10
  long_description=long_description,
11
11
  long_description_content_type="text/markdown",
12
12
  author='yannbanas',