markdowndata 0.0.5__tar.gz → 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {markdowndata-0.0.5 → markdowndata-0.1.0}/PKG-INFO +4 -2
- {markdowndata-0.0.5 → markdowndata-0.1.0}/markdowndata/__init__.py +1 -1
- {markdowndata-0.0.5 → markdowndata-0.1.0}/markdowndata/content_parser.py +4 -9
- {markdowndata-0.0.5 → markdowndata-0.1.0}/markdowndata/process_markdown.py +6 -7
- {markdowndata-0.0.5 → markdowndata-0.1.0}/markdowndata/section_tree.py +18 -1
- {markdowndata-0.0.5 → markdowndata-0.1.0}/pyproject.toml +1 -1
- {markdowndata-0.0.5 → markdowndata-0.1.0}/LICENSE +0 -0
- {markdowndata-0.0.5 → markdowndata-0.1.0}/markdowndata/utils.py +0 -0
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: markdowndata
|
|
3
|
-
Version: 0.0
|
|
3
|
+
Version: 0.1.0
|
|
4
4
|
Summary: Tool to convert markdown tables into json objects
|
|
5
5
|
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
6
7
|
Author: Gordon Bean
|
|
7
8
|
Author-email: gbean@cs.byu.edu
|
|
8
9
|
Requires-Python: >=3.10,<4.0
|
|
@@ -12,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
12
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
14
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
15
17
|
Requires-Dist: bs4 (>=0.0.2,<0.0.3)
|
|
16
18
|
Requires-Dist: markdown-it-py (>=3.0.0,<4.0.0)
|
|
17
19
|
Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
|
|
@@ -32,17 +32,12 @@ def detect_value_type(text: str) -> str | None:
|
|
|
32
32
|
return 'md_text'
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
def
|
|
35
|
+
def yaml_parser(text: str) -> dict:
|
|
36
36
|
"""
|
|
37
|
-
Parse YAML from a string (surrounded by ===)
|
|
38
|
-
Assumes YAML is a block at the beginning of the text.
|
|
37
|
+
Parse YAML from a string (surrounded by ===)
|
|
39
38
|
"""
|
|
40
39
|
match = re.search(r'===\s*\n(.*?)\n===', text, re.DOTALL)
|
|
41
|
-
|
|
42
|
-
yaml_data = yaml.safe_load(match.group(1))
|
|
43
|
-
if yaml_data:
|
|
44
|
-
return {k: convert_value(v) for k, v in yaml_data.items()}
|
|
45
|
-
return {}
|
|
40
|
+
return yaml.safe_load(match.group(1))
|
|
46
41
|
|
|
47
42
|
|
|
48
43
|
def md_table_parser(text: str) -> list[dict]:
|
|
@@ -167,7 +162,7 @@ def parse_content_block(text: str):
|
|
|
167
162
|
raise ValueError(f'No parser found for content: {text}')
|
|
168
163
|
|
|
169
164
|
parser_functions = {
|
|
170
|
-
'yaml_dict':
|
|
165
|
+
'yaml_dict': yaml_parser,
|
|
171
166
|
'md_table': md_table_parser,
|
|
172
167
|
'md_list': md_list_parser,
|
|
173
168
|
'md_text': md_text_parser
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from typing import List
|
|
2
|
+
|
|
2
3
|
from .content_parser import parse_content_block
|
|
3
4
|
from .section_tree import split_sections, build_section_tree
|
|
4
5
|
from .utils import Node
|
|
@@ -9,6 +10,7 @@ class MarkDataParser:
|
|
|
9
10
|
Parses a Markdown document into a JSON-like dictionary structure.
|
|
10
11
|
Builds a hierarchy of sections and converts each section's content into a structured form.
|
|
11
12
|
"""
|
|
13
|
+
|
|
12
14
|
def __init__(self):
|
|
13
15
|
self.data = {}
|
|
14
16
|
|
|
@@ -29,7 +31,7 @@ class MarkDataParser:
|
|
|
29
31
|
|
|
30
32
|
# Convert the section tree into a JSON-like dictionary structure
|
|
31
33
|
self.data = self.build_dict(section_tree)
|
|
32
|
-
return self.data
|
|
34
|
+
return self.data['Root']
|
|
33
35
|
|
|
34
36
|
def build_dict(self, sections: List[Node]) -> dict:
|
|
35
37
|
"""
|
|
@@ -39,12 +41,8 @@ class MarkDataParser:
|
|
|
39
41
|
for node in sections:
|
|
40
42
|
sub_dict = self.build_dict(node.subsections)
|
|
41
43
|
|
|
42
|
-
if
|
|
43
|
-
# If
|
|
44
|
-
merged = {**node.parsed, **sub_dict}
|
|
45
|
-
elif node.subsections:
|
|
46
|
-
# If subsections exist but parsed content is not a dict,
|
|
47
|
-
# wrap both into a new dictionary
|
|
44
|
+
if node.subsections:
|
|
45
|
+
# If subsections exist and there is parsed content wrap both into a new dictionary
|
|
48
46
|
merged = {
|
|
49
47
|
'content': node.parsed,
|
|
50
48
|
**sub_dict
|
|
@@ -55,4 +53,5 @@ class MarkDataParser:
|
|
|
55
53
|
|
|
56
54
|
# Use the node's title as the key in the dictionary
|
|
57
55
|
result[node.title] = merged
|
|
56
|
+
|
|
58
57
|
return result
|
|
@@ -12,6 +12,17 @@ def split_sections(text: str):
|
|
|
12
12
|
matches = list(pattern.finditer(text))
|
|
13
13
|
|
|
14
14
|
sections = []
|
|
15
|
+
|
|
16
|
+
# First grab anything before the initial header
|
|
17
|
+
end = matches[0].start() if matches else len(text)
|
|
18
|
+
sections.append(Section(
|
|
19
|
+
title="", # The section's title text
|
|
20
|
+
level=0, # The number of # symbols indicates nesting level
|
|
21
|
+
start=0, # Position where this header starts in the text
|
|
22
|
+
end=end, # Position where this section's content ends
|
|
23
|
+
content=text[:end].strip() # The actual text content of this section (excluding header)
|
|
24
|
+
))
|
|
25
|
+
|
|
15
26
|
for i, match in enumerate(matches):
|
|
16
27
|
# Calculate the 'end' of the current section:
|
|
17
28
|
# It's the start of the next header or the end of the document.
|
|
@@ -34,6 +45,12 @@ def build_section_tree(sections):
|
|
|
34
45
|
Uses a stack to track the current section hierarchy.
|
|
35
46
|
"""
|
|
36
47
|
root = Node(title='Root', level=0, parsed={}, subsections=[])
|
|
48
|
+
|
|
49
|
+
# If the initial section is level 0, it makes part of the root
|
|
50
|
+
if sections[0].level == 0:
|
|
51
|
+
root.parsed = parse_content_block(sections[0].content)
|
|
52
|
+
sections.pop(0)
|
|
53
|
+
|
|
37
54
|
stack = [root]
|
|
38
55
|
|
|
39
56
|
for section in sections:
|
|
@@ -55,4 +72,4 @@ def build_section_tree(sections):
|
|
|
55
72
|
# Push this node to the stack (might have its own children)
|
|
56
73
|
stack.append(node)
|
|
57
74
|
|
|
58
|
-
return root
|
|
75
|
+
return [root]
|
|
File without changes
|
|
File without changes
|