markdowndata 0.0.6__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,9 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: markdowndata
3
- Version: 0.0.6
3
+ Version: 0.1.1
4
4
  Summary: Tool to convert markdown tables into json objects
5
5
  License: MIT
6
+ License-File: LICENSE
6
7
  Author: Gordon Bean
7
8
  Author-email: gbean@cs.byu.edu
8
9
  Requires-Python: >=3.10,<4.0
@@ -12,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.10
12
13
  Classifier: Programming Language :: Python :: 3.11
13
14
  Classifier: Programming Language :: Python :: 3.12
14
15
  Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Programming Language :: Python :: 3.14
15
17
  Requires-Dist: bs4 (>=0.0.2,<0.0.3)
16
18
  Requires-Dist: markdown-it-py (>=3.0.0,<4.0.0)
17
19
  Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
@@ -32,17 +32,12 @@ def detect_value_type(text: str) -> str | None:
32
32
  return 'md_text'
33
33
 
34
34
 
35
- def yaml_dict_parser(text: str) -> dict:
35
+ def yaml_parser(text: str) -> dict:
36
36
  """
37
- Parse YAML from a string (surrounded by ===) and returns it as a dictionary.
38
- Assumes YAML is a block at the beginning of the text.
37
+ Parse YAML from a string (surrounded by ===)
39
38
  """
40
39
  match = re.search(r'===\s*\n(.*?)\n===', text, re.DOTALL)
41
- if match:
42
- yaml_data = yaml.safe_load(match.group(1))
43
- if yaml_data:
44
- return {k: convert_value(v) for k, v in yaml_data.items()}
45
- return {}
40
+ return yaml.safe_load(match.group(1))
46
41
 
47
42
 
48
43
  def md_table_parser(text: str) -> list[dict]:
@@ -167,7 +162,7 @@ def parse_content_block(text: str):
167
162
  raise ValueError(f'No parser found for content: {text}')
168
163
 
169
164
  parser_functions = {
170
- 'yaml_dict': yaml_dict_parser,
165
+ 'yaml_dict': yaml_parser,
171
166
  'md_table': md_table_parser,
172
167
  'md_list': md_list_parser,
173
168
  'md_text': md_text_parser
@@ -41,12 +41,8 @@ class MarkDataParser:
41
41
  for node in sections:
42
42
  sub_dict = self.build_dict(node.subsections)
43
43
 
44
- if isinstance(node.parsed, dict):
45
- # If the parsed content is a dictionary, merge it with its subsections
46
- merged = {**node.parsed, **sub_dict}
47
- elif node.subsections:
48
- # If subsections exist but parsed content is not a dict,
49
- # wrap both into a new dictionary
44
+ if node.subsections:
45
+ # If subsections exist and there is parsed content wrap both into a new dictionary
50
46
  merged = {
51
47
  'content': node.parsed,
52
48
  **sub_dict
@@ -9,7 +9,15 @@ def split_sections(text: str):
9
9
  Each section is identified by a header (e.g., #, ##, ###).
10
10
  """
11
11
  pattern = re.compile(r'^(?P<header>#+) (?P<title>[^\n]+)', re.MULTILINE)
12
- matches = list(pattern.finditer(text))
12
+ code_spans = [match.span() for match in re.finditer(r'```.*?```', text, re.DOTALL)]
13
+
14
+ def is_in_code_block(pos: int) -> bool:
15
+ for start, end in code_spans:
16
+ if start <= pos < end:
17
+ return True
18
+ return False
19
+
20
+ matches = [match for match in pattern.finditer(text) if not is_in_code_block(match.start())]
13
21
 
14
22
  sections = []
15
23
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "markdowndata"
3
- version = "0.0.6"
3
+ version = "0.1.1"
4
4
  description = "Tool to convert markdown tables into json objects"
5
5
  authors = ["Gordon Bean <gbean@cs.byu.edu>", "Robert Greathouse <robbykap@byu.edu>"]
6
6
  license = "MIT"
File without changes