markdowndata 0.0.5__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
markdowndata/__init__.py CHANGED
@@ -7,4 +7,4 @@ def loads(text):
7
7
 
8
8
 
9
9
  def load(file):
10
- return loads(file.read())
10
+ return loads(file.read())
@@ -32,17 +32,12 @@ def detect_value_type(text: str) -> str | None:
32
32
  return 'md_text'
33
33
 
34
34
 
35
- def yaml_dict_parser(text: str) -> dict:
35
+ def yaml_parser(text: str) -> dict:
36
36
  """
37
- Parse YAML from a string (surrounded by ===) and returns it as a dictionary.
38
- Assumes YAML is a block at the beginning of the text.
37
+ Parse YAML from a string (surrounded by ===)
39
38
  """
40
39
  match = re.search(r'===\s*\n(.*?)\n===', text, re.DOTALL)
41
- if match:
42
- yaml_data = yaml.safe_load(match.group(1))
43
- if yaml_data:
44
- return {k: convert_value(v) for k, v in yaml_data.items()}
45
- return {}
40
+ return yaml.safe_load(match.group(1))
46
41
 
47
42
 
48
43
  def md_table_parser(text: str) -> list[dict]:
@@ -167,7 +162,7 @@ def parse_content_block(text: str):
167
162
  raise ValueError(f'No parser found for content: {text}')
168
163
 
169
164
  parser_functions = {
170
- 'yaml_dict': yaml_dict_parser,
165
+ 'yaml_dict': yaml_parser,
171
166
  'md_table': md_table_parser,
172
167
  'md_list': md_list_parser,
173
168
  'md_text': md_text_parser
@@ -1,4 +1,5 @@
1
1
  from typing import List
2
+
2
3
  from .content_parser import parse_content_block
3
4
  from .section_tree import split_sections, build_section_tree
4
5
  from .utils import Node
@@ -9,6 +10,7 @@ class MarkDataParser:
9
10
  Parses a Markdown document into a JSON-like dictionary structure.
10
11
  Builds a hierarchy of sections and converts each section's content into a structured form.
11
12
  """
13
+
12
14
  def __init__(self):
13
15
  self.data = {}
14
16
 
@@ -29,7 +31,7 @@ class MarkDataParser:
29
31
 
30
32
  # Convert the section tree into a JSON-like dictionary structure
31
33
  self.data = self.build_dict(section_tree)
32
- return self.data
34
+ return self.data['Root']
33
35
 
34
36
  def build_dict(self, sections: List[Node]) -> dict:
35
37
  """
@@ -39,12 +41,8 @@ class MarkDataParser:
39
41
  for node in sections:
40
42
  sub_dict = self.build_dict(node.subsections)
41
43
 
42
- if isinstance(node.parsed, dict):
43
- # If the parsed content is a dictionary, merge it with its subsections
44
- merged = {**node.parsed, **sub_dict}
45
- elif node.subsections:
46
- # If subsections exist but parsed content is not a dict,
47
- # wrap both into a new dictionary
44
+ if node.subsections:
45
+ # If subsections exist and there is parsed content wrap both into a new dictionary
48
46
  merged = {
49
47
  'content': node.parsed,
50
48
  **sub_dict
@@ -55,4 +53,5 @@ class MarkDataParser:
55
53
 
56
54
  # Use the node's title as the key in the dictionary
57
55
  result[node.title] = merged
56
+
58
57
  return result
@@ -12,6 +12,17 @@ def split_sections(text: str):
12
12
  matches = list(pattern.finditer(text))
13
13
 
14
14
  sections = []
15
+
16
+ # First grab anything before the initial header
17
+ end = matches[0].start() if matches else len(text)
18
+ sections.append(Section(
19
+ title="", # The section's title text
20
+ level=0, # The number of # symbols indicates nesting level
21
+ start=0, # Position where this header starts in the text
22
+ end=end, # Position where this section's content ends
23
+ content=text[:end].strip() # The actual text content of this section (excluding header)
24
+ ))
25
+
15
26
  for i, match in enumerate(matches):
16
27
  # Calculate the 'end' of the current section:
17
28
  # It's the start of the next header or the end of the document.
@@ -34,6 +45,12 @@ def build_section_tree(sections):
34
45
  Uses a stack to track the current section hierarchy.
35
46
  """
36
47
  root = Node(title='Root', level=0, parsed={}, subsections=[])
48
+
49
+ # If the initial section is level 0, it makes part of the root
50
+ if sections[0].level == 0:
51
+ root.parsed = parse_content_block(sections[0].content)
52
+ sections.pop(0)
53
+
37
54
  stack = [root]
38
55
 
39
56
  for section in sections:
@@ -55,4 +72,4 @@ def build_section_tree(sections):
55
72
  # Push this node to the stack (might have its own children)
56
73
  stack.append(node)
57
74
 
58
- return root.subsections
75
+ return [root]
@@ -1,8 +1,9 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: markdowndata
3
- Version: 0.0.5
3
+ Version: 0.1.0
4
4
  Summary: Tool to convert markdown tables into json objects
5
5
  License: MIT
6
+ License-File: LICENSE
6
7
  Author: Gordon Bean
7
8
  Author-email: gbean@cs.byu.edu
8
9
  Requires-Python: >=3.10,<4.0
@@ -12,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.10
12
13
  Classifier: Programming Language :: Python :: 3.11
13
14
  Classifier: Programming Language :: Python :: 3.12
14
15
  Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Programming Language :: Python :: 3.14
15
17
  Requires-Dist: bs4 (>=0.0.2,<0.0.3)
16
18
  Requires-Dist: markdown-it-py (>=3.0.0,<4.0.0)
17
19
  Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
@@ -0,0 +1,9 @@
1
+ markdowndata/__init__.py,sha256=jB8leosqIVXuO4LmU0KWn3OyJwgAtGKQBab9JUsbSQ0,172
2
+ markdowndata/content_parser.py,sha256=x0ellHrZn6RZynUzwFGnxdzhS_Y2tL33zzvKrEwBSQc,5157
3
+ markdowndata/process_markdown.py,sha256=jswWDnkApAMoT1gEXe2dfSB2pHt6U3vFuWrh6PgQjBY,1981
4
+ markdowndata/section_tree.py,sha256=gn6PtzXcg4iN7F8Tldy3p3jrWe9IYqrcDEAjNm940ME,2722
5
+ markdowndata/utils.py,sha256=jVTKEGZiLCKdv1cS-73iMf_GaXT6cBc3cl9tp_fjKOw,2040
6
+ markdowndata-0.1.0.dist-info/METADATA,sha256=DIZoJgyceiwDGWp40bNI04xmk8uZExE1tWr8aYCeYEs,706
7
+ markdowndata-0.1.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
8
+ markdowndata-0.1.0.dist-info/licenses/LICENSE,sha256=K-k1T7XcwAVVmLsHhfWMye6r7p45xz3xwv5S5FBSyZE,1074
9
+ markdowndata-0.1.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.3
2
+ Generator: poetry-core 2.2.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,9 +0,0 @@
1
- markdowndata/__init__.py,sha256=YWWuimxElvXp_jFOYzSAQ7zhtaGU2-jFmKYJq9Mtqp4,171
2
- markdowndata/content_parser.py,sha256=FdapNfkvCj71K5lUSGQh0H1jS5vF36-l1EImsX28J2o,5387
3
- markdowndata/process_markdown.py,sha256=ajJBMp2J5ipPUf4r3nxbl2JFdwKWV-82YL79RmWVrDA,2182
4
- markdowndata/section_tree.py,sha256=X4kPqHCa_2hkiq6roOHUc22G3pNIQjpUv_ni_J14XXQ,2060
5
- markdowndata/utils.py,sha256=jVTKEGZiLCKdv1cS-73iMf_GaXT6cBc3cl9tp_fjKOw,2040
6
- markdowndata-0.0.5.dist-info/LICENSE,sha256=K-k1T7XcwAVVmLsHhfWMye6r7p45xz3xwv5S5FBSyZE,1074
7
- markdowndata-0.0.5.dist-info/METADATA,sha256=4-ewjm9G3G8jmK0eU6DzWlnLtR7XGzxtICghQGj1cDw,633
8
- markdowndata-0.0.5.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
9
- markdowndata-0.0.5.dist-info/RECORD,,