markdowndata 0.0.5__tar.gz → 0.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {markdowndata-0.0.5 → markdowndata-0.0.6}/PKG-INFO +1 -1
- {markdowndata-0.0.5 → markdowndata-0.0.6}/markdowndata/__init__.py +1 -1
- {markdowndata-0.0.5 → markdowndata-0.0.6}/markdowndata/process_markdown.py +4 -1
- {markdowndata-0.0.5 → markdowndata-0.0.6}/markdowndata/section_tree.py +18 -1
- {markdowndata-0.0.5 → markdowndata-0.0.6}/pyproject.toml +1 -1
- {markdowndata-0.0.5 → markdowndata-0.0.6}/LICENSE +0 -0
- {markdowndata-0.0.5 → markdowndata-0.0.6}/markdowndata/content_parser.py +0 -0
- {markdowndata-0.0.5 → markdowndata-0.0.6}/markdowndata/utils.py +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from typing import List
|
|
2
|
+
|
|
2
3
|
from .content_parser import parse_content_block
|
|
3
4
|
from .section_tree import split_sections, build_section_tree
|
|
4
5
|
from .utils import Node
|
|
@@ -9,6 +10,7 @@ class MarkDataParser:
|
|
|
9
10
|
Parses a Markdown document into a JSON-like dictionary structure.
|
|
10
11
|
Builds a hierarchy of sections and converts each section's content into a structured form.
|
|
11
12
|
"""
|
|
13
|
+
|
|
12
14
|
def __init__(self):
|
|
13
15
|
self.data = {}
|
|
14
16
|
|
|
@@ -29,7 +31,7 @@ class MarkDataParser:
|
|
|
29
31
|
|
|
30
32
|
# Convert the section tree into a JSON-like dictionary structure
|
|
31
33
|
self.data = self.build_dict(section_tree)
|
|
32
|
-
return self.data
|
|
34
|
+
return self.data['Root']
|
|
33
35
|
|
|
34
36
|
def build_dict(self, sections: List[Node]) -> dict:
|
|
35
37
|
"""
|
|
@@ -55,4 +57,5 @@ class MarkDataParser:
|
|
|
55
57
|
|
|
56
58
|
# Use the node's title as the key in the dictionary
|
|
57
59
|
result[node.title] = merged
|
|
60
|
+
|
|
58
61
|
return result
|
|
@@ -12,6 +12,17 @@ def split_sections(text: str):
|
|
|
12
12
|
matches = list(pattern.finditer(text))
|
|
13
13
|
|
|
14
14
|
sections = []
|
|
15
|
+
|
|
16
|
+
# First grab anything before the initial header
|
|
17
|
+
end = matches[0].start() if matches else len(text)
|
|
18
|
+
sections.append(Section(
|
|
19
|
+
title="", # The section's title text
|
|
20
|
+
level=0, # The number of # symbols indicates nesting level
|
|
21
|
+
start=0, # Position where this header starts in the text
|
|
22
|
+
end=end, # Position where this section's content ends
|
|
23
|
+
content=text[:end].strip() # The actual text content of this section (excluding header)
|
|
24
|
+
))
|
|
25
|
+
|
|
15
26
|
for i, match in enumerate(matches):
|
|
16
27
|
# Calculate the 'end' of the current section:
|
|
17
28
|
# It's the start of the next header or the end of the document.
|
|
@@ -34,6 +45,12 @@ def build_section_tree(sections):
|
|
|
34
45
|
Uses a stack to track the current section hierarchy.
|
|
35
46
|
"""
|
|
36
47
|
root = Node(title='Root', level=0, parsed={}, subsections=[])
|
|
48
|
+
|
|
49
|
+
# If the initial section is level 0, it makes part of the root
|
|
50
|
+
if sections[0].level == 0:
|
|
51
|
+
root.parsed = parse_content_block(sections[0].content)
|
|
52
|
+
sections.pop(0)
|
|
53
|
+
|
|
37
54
|
stack = [root]
|
|
38
55
|
|
|
39
56
|
for section in sections:
|
|
@@ -55,4 +72,4 @@ def build_section_tree(sections):
|
|
|
55
72
|
# Push this node to the stack (might have its own children)
|
|
56
73
|
stack.append(node)
|
|
57
74
|
|
|
58
|
-
return root
|
|
75
|
+
return [root]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|