markdowndata 0.0.6__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- markdowndata/content_parser.py +4 -9
- markdowndata/process_markdown.py +2 -6
- markdowndata/section_tree.py +9 -1
- {markdowndata-0.0.6.dist-info → markdowndata-0.1.1.dist-info}/METADATA +4 -2
- markdowndata-0.1.1.dist-info/RECORD +9 -0
- {markdowndata-0.0.6.dist-info → markdowndata-0.1.1.dist-info}/WHEEL +1 -1
- markdowndata-0.0.6.dist-info/RECORD +0 -9
- {markdowndata-0.0.6.dist-info → markdowndata-0.1.1.dist-info/licenses}/LICENSE +0 -0
markdowndata/content_parser.py
CHANGED
|
@@ -32,17 +32,12 @@ def detect_value_type(text: str) -> str | None:
|
|
|
32
32
|
return 'md_text'
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
def
|
|
35
|
+
def yaml_parser(text: str) -> dict:
|
|
36
36
|
"""
|
|
37
|
-
Parse YAML from a string (surrounded by ===)
|
|
38
|
-
Assumes YAML is a block at the beginning of the text.
|
|
37
|
+
Parse YAML from a string (surrounded by ===)
|
|
39
38
|
"""
|
|
40
39
|
match = re.search(r'===\s*\n(.*?)\n===', text, re.DOTALL)
|
|
41
|
-
|
|
42
|
-
yaml_data = yaml.safe_load(match.group(1))
|
|
43
|
-
if yaml_data:
|
|
44
|
-
return {k: convert_value(v) for k, v in yaml_data.items()}
|
|
45
|
-
return {}
|
|
40
|
+
return yaml.safe_load(match.group(1))
|
|
46
41
|
|
|
47
42
|
|
|
48
43
|
def md_table_parser(text: str) -> list[dict]:
|
|
@@ -167,7 +162,7 @@ def parse_content_block(text: str):
|
|
|
167
162
|
raise ValueError(f'No parser found for content: {text}')
|
|
168
163
|
|
|
169
164
|
parser_functions = {
|
|
170
|
-
'yaml_dict':
|
|
165
|
+
'yaml_dict': yaml_parser,
|
|
171
166
|
'md_table': md_table_parser,
|
|
172
167
|
'md_list': md_list_parser,
|
|
173
168
|
'md_text': md_text_parser
|
markdowndata/process_markdown.py
CHANGED
|
@@ -41,12 +41,8 @@ class MarkDataParser:
|
|
|
41
41
|
for node in sections:
|
|
42
42
|
sub_dict = self.build_dict(node.subsections)
|
|
43
43
|
|
|
44
|
-
if
|
|
45
|
-
# If
|
|
46
|
-
merged = {**node.parsed, **sub_dict}
|
|
47
|
-
elif node.subsections:
|
|
48
|
-
# If subsections exist but parsed content is not a dict,
|
|
49
|
-
# wrap both into a new dictionary
|
|
44
|
+
if node.subsections:
|
|
45
|
+
# If subsections exist and there is parsed content wrap both into a new dictionary
|
|
50
46
|
merged = {
|
|
51
47
|
'content': node.parsed,
|
|
52
48
|
**sub_dict
|
markdowndata/section_tree.py
CHANGED
|
@@ -9,7 +9,15 @@ def split_sections(text: str):
|
|
|
9
9
|
Each section is identified by a header (e.g., #, ##, ###).
|
|
10
10
|
"""
|
|
11
11
|
pattern = re.compile(r'^(?P<header>#+) (?P<title>[^\n]+)', re.MULTILINE)
|
|
12
|
-
|
|
12
|
+
code_spans = [match.span() for match in re.finditer(r'```.*?```', text, re.DOTALL)]
|
|
13
|
+
|
|
14
|
+
def is_in_code_block(pos: int) -> bool:
|
|
15
|
+
for start, end in code_spans:
|
|
16
|
+
if start <= pos < end:
|
|
17
|
+
return True
|
|
18
|
+
return False
|
|
19
|
+
|
|
20
|
+
matches = [match for match in pattern.finditer(text) if not is_in_code_block(match.start())]
|
|
13
21
|
|
|
14
22
|
sections = []
|
|
15
23
|
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: markdowndata
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.1.1
|
|
4
4
|
Summary: Tool to convert markdown tables into json objects
|
|
5
5
|
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
6
7
|
Author: Gordon Bean
|
|
7
8
|
Author-email: gbean@cs.byu.edu
|
|
8
9
|
Requires-Python: >=3.10,<4.0
|
|
@@ -12,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
12
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
14
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
15
17
|
Requires-Dist: bs4 (>=0.0.2,<0.0.3)
|
|
16
18
|
Requires-Dist: markdown-it-py (>=3.0.0,<4.0.0)
|
|
17
19
|
Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
markdowndata/__init__.py,sha256=jB8leosqIVXuO4LmU0KWn3OyJwgAtGKQBab9JUsbSQ0,172
|
|
2
|
+
markdowndata/content_parser.py,sha256=x0ellHrZn6RZynUzwFGnxdzhS_Y2tL33zzvKrEwBSQc,5157
|
|
3
|
+
markdowndata/process_markdown.py,sha256=jswWDnkApAMoT1gEXe2dfSB2pHt6U3vFuWrh6PgQjBY,1981
|
|
4
|
+
markdowndata/section_tree.py,sha256=qCKZHO4mUJqdVD81tqeIlETRK4Esk6z9XZ90IM6LNOA,3032
|
|
5
|
+
markdowndata/utils.py,sha256=jVTKEGZiLCKdv1cS-73iMf_GaXT6cBc3cl9tp_fjKOw,2040
|
|
6
|
+
markdowndata-0.1.1.dist-info/METADATA,sha256=hfZuT7qF8jNhqMIaycyfVzxxLqC9DPUKzfl0yX7LtM4,706
|
|
7
|
+
markdowndata-0.1.1.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
|
|
8
|
+
markdowndata-0.1.1.dist-info/licenses/LICENSE,sha256=K-k1T7XcwAVVmLsHhfWMye6r7p45xz3xwv5S5FBSyZE,1074
|
|
9
|
+
markdowndata-0.1.1.dist-info/RECORD,,
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
markdowndata/__init__.py,sha256=jB8leosqIVXuO4LmU0KWn3OyJwgAtGKQBab9JUsbSQ0,172
|
|
2
|
-
markdowndata/content_parser.py,sha256=FdapNfkvCj71K5lUSGQh0H1jS5vF36-l1EImsX28J2o,5387
|
|
3
|
-
markdowndata/process_markdown.py,sha256=hcG7X-zdVTl3zHqPjSBYkmwo2Hg0YQYIDhEKeRhpQOo,2193
|
|
4
|
-
markdowndata/section_tree.py,sha256=gn6PtzXcg4iN7F8Tldy3p3jrWe9IYqrcDEAjNm940ME,2722
|
|
5
|
-
markdowndata/utils.py,sha256=jVTKEGZiLCKdv1cS-73iMf_GaXT6cBc3cl9tp_fjKOw,2040
|
|
6
|
-
markdowndata-0.0.6.dist-info/LICENSE,sha256=K-k1T7XcwAVVmLsHhfWMye6r7p45xz3xwv5S5FBSyZE,1074
|
|
7
|
-
markdowndata-0.0.6.dist-info/METADATA,sha256=CeBPGYKdAft1cxA5DGZ1v7Ebgx1DTKNxrIKTwYRgJsg,633
|
|
8
|
-
markdowndata-0.0.6.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
9
|
-
markdowndata-0.0.6.dist-info/RECORD,,
|
|
File without changes
|