markdown-analysis 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: markdown_analysis
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: UNKNOWN
5
5
  Home-page: https://github.com/yannbanas/mrkdwn_analysis
6
6
  Author: yannbanas
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: markdown-analysis
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: UNKNOWN
5
5
  Home-page: https://github.com/yannbanas/mrkdwn_analysis
6
6
  Author: yannbanas
@@ -284,19 +284,24 @@ class MarkdownParser:
284
284
  self.pos = self.length
285
285
 
286
286
  def parse_fenced_code_block(self, lang):
287
+ initial_line = self.pos
288
+ initial_indent = len(self.lines[self.pos]) - len(self.lines[self.pos].lstrip())
289
+ fence_marker = self.lines[self.pos].strip()[:3] # Get ``` or ~~~
287
290
  self.pos += 1
288
291
  start = self.pos
292
+
289
293
  while self.pos < self.length:
290
294
  line = self.lines[self.pos]
291
- if line.strip().startswith('```'):
295
+ if line.strip() == fence_marker:
292
296
  content = "\n".join(self.lines[start:self.pos])
293
297
  self.tokens.append(BlockToken('code', content=content, meta={"language": lang}, line=start+1))
294
298
  self.pos += 1
295
299
  return
296
300
  self.pos += 1
297
- content = "\n".join(self.lines[start:])
298
- self.tokens.append(BlockToken('code', content=content, meta={"language": lang}, line=start+1))
299
- self.pos = self.length
301
+
302
+ # If we reach here, we didn't find the closing fence
303
+ self.pos = initial_line # Reset position if fence not found
304
+ raise ValueError(f"Unclosed code fence starting at line {initial_line + 1}")
300
305
 
301
306
  def parse_blockquote(self):
302
307
  start = self.pos
@@ -553,3 +558,76 @@ class MarkdownAnalyzer:
553
558
  'characters': self.count_characters()
554
559
  }
555
560
  return analysis
561
+
562
+ # =================== SUPPORT MDX ===================
563
+
564
+ class MDXBlockToken(BlockToken):
565
+ def __init__(self, type_, content="", level=None, meta=None, line=None):
566
+ super().__init__(type_, content, level, meta, line)
567
+
568
+ class MDXMarkdownParser(MarkdownParser):
569
+ JSX_IMPORT_RE = re.compile(r'^import\s+.*?\s+from\s+["\'](.*?)["\'];?\s*$')
570
+ JSX_COMPONENT_START_RE = re.compile(r'^<([A-Z][A-Za-z0-9]*|[a-z]+\.[A-Z][A-Za-z0-9]*).*?(?:>|\/>)$')
571
+ JSX_COMPONENT_END_RE = re.compile(r'^</([A-Z][A-Za-z0-9]*|[a-z]+\.[A-Z][A-Za-z0-9]*)>$')
572
+
573
+ def __init__(self, text):
574
+ super().__init__(text)
575
+ self.in_jsx_block = False
576
+ self.current_jsx_content = []
577
+ self.jsx_start_line = None
578
+
579
+ def handle_potential_hanging(self):
580
+ if self.pos >= self.length:
581
+ return False
582
+ line = self.lines[self.pos].strip()
583
+ if '</TabItem>' in line or '</Tabs>' in line:
584
+ self.pos += 1
585
+ return True
586
+ return False
587
+
588
+ def parse_fenced_code_block(self, lang):
589
+ initial_line = self.pos
590
+ self.pos += 1
591
+ content = []
592
+
593
+ while self.pos < self.length:
594
+ line = self.lines[self.pos]
595
+ if line.strip() == '```':
596
+ if content:
597
+ # Preserve proper indentation
598
+ base_indent = min(len(line) - len(line.lstrip())
599
+ for line in content if line.strip())
600
+ clean_content = []
601
+ for line in content:
602
+ if line.strip():
603
+ clean_content.append(' ' + line[base_indent:])
604
+ self.tokens.append(BlockToken('code',
605
+ content='\n'.join(clean_content),
606
+ meta={"language": lang.strip(), "code_type": "fenced"},
607
+ line=initial_line + 1))
608
+ self.pos += 1
609
+ return
610
+ content.append(line)
611
+ self.pos += 1
612
+
613
+ def parse(self):
614
+ self.tokens = []
615
+ while self.pos < self.length:
616
+ line = self.lines[self.pos].strip()
617
+ if self.FENCE_RE.match(line):
618
+ lang = self.FENCE_RE.match(line).group(1)
619
+ self.parse_fenced_code_block(lang)
620
+ continue
621
+ self.pos += 1
622
+ return self.tokens
623
+
624
+ class MDXMarkdownAnalyzer(MarkdownAnalyzer):
625
+ def __init__(self, file_path, encoding='utf-8'):
626
+ with open(file_path, 'r', encoding=encoding) as f:
627
+ self.text = f.read()
628
+ parser = MDXMarkdownParser(self.text)
629
+ self.tokens = parser.parse()
630
+ self.references = parser.references
631
+ self.footnotes = parser.footnotes
632
+ self.inline_parser = InlineParser(references=self.references, footnotes=self.footnotes)
633
+ self._parse_inline_tokens()
@@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
6
6
 
7
7
  setup(
8
8
  name='markdown_analysis',
9
- version='0.1.1',
9
+ version='0.1.3',
10
10
  long_description=long_description,
11
11
  long_description_content_type="text/markdown",
12
12
  author='yannbanas',