PyPI - markdowndata - Versions diffs - 0.0.2__tar.gz → 0.0.3__tar.gz - Mend

markdowndata 0.0.2tar.gz → 0.0.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

{markdowndata-0.0.2 → markdowndata-0.0.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: markdowndata
-Version: 0.0.2
+Version: 0.0.3
 Summary: Tool to convert markdown tables into json objects
 License: MIT
 Author: Gordon Bean

{markdowndata-0.0.2 → markdowndata-0.0.3}/markdowndata/content_parser.py RENAMED Viewed

@@ -1,13 +1,13 @@
 import re
 import yaml
-from .utils import convert_value, get_md_soup
+from .utils import convert_value, get_md_soup, is_single_tag_block, is_structural_line
 def detect_value_type(text: str) -> str | None:
     """
     Detect the type of content block (YAML, table, list, or text).
-    Returns the content type string or None if no match is found.
+    Only classifies as md_table or md_list if the content is *only* that structure.
     """
     text = text.strip()
     if not text:
@@ -17,16 +17,19 @@ def detect_value_type(text: str) -> str | None:
     if re.search(r'===\s*\n(.*?)\n===', text, re.DOTALL):
         return 'yaml_dict'
-    # Convert markdown to HTML and analyze for tables, lists, or text
+    # Convert markdown to HTML
     soup = get_md_soup(text)
-    if soup.find('table'):
+    # Check for exactly one <table> and no other tags or text
+    if soup.find('table') and is_single_tag_block(soup, 'table'):
         return 'md_table'
-    elif soup.find('ul'):
+    # Check for exactly one <ul> and no other tags or text
+    if soup.find('ul') and is_single_tag_block(soup, 'ul'):
         return 'md_list'
-    elif soup.get_text(strip=True):
-        return 'md_text'
-    return None
+    # Fallback: process everything as Markdown text
+    return 'md_text'
 def yaml_dict_parser(text: str) -> dict:
@@ -77,12 +80,75 @@ def md_list_parser(text: str) -> list:
 def md_text_parser(text: str) -> str:
     """
-    Parse a Markdown text block and return its text content as a string.
-    Ensures lines flow together as a paragraph, not split across lines.
+    Parse Markdown text by:
+    - Joining lines separated by a single newline (soft breaks)
+    - Preserving formatting (bold, italic, headers, code, code block, etc.)
+    - Preserving paragraph breaks (double or more newlines)
+    - Preserving fenced code blocks exactly
     """
-    soup = get_md_soup(text)
-    raw_text = soup.get_text(strip=True)
-    return convert_value(' '.join(raw_text.splitlines()))
+    process_text = process_code_blocks(text, process_soft_breaks)
+    return convert_value(process_text)
+def process_code_blocks(text: str, non_code_callback) -> str:
+    """
+    Process Markdown text, preserving fenced code blocks and applying a transformation
+    only to the non-code sections using `non_code_callback`.
+    """
+    code_pattern = re.compile(r'```.*?```', re.DOTALL)
+    result = []
+    last_end = 0
+    for match in code_pattern.finditer(text):
+        start, end = match.span()
+        non_code_part = text[last_end:start]
+        code_block = match.group()
+        if non_code_part.strip():
+            result.append(non_code_callback(non_code_part))
+        result.append(code_block)
+        last_end = end
+    remaining = text[last_end:]
+    if remaining.strip():
+        result.append(non_code_callback(remaining))
+    return '\n\n'.join(result)
+def process_soft_breaks(text: str) -> str:
+    """
+    Joins lines separated by a single newline, except for Markdown block elements:
+    - Lists
+    - Tables
+    - Blockquotes
+    Preserves paragraph breaks (2+ newlines).
+    """
+    lines = text.split('\n')
+    processed = []
+    paragraph_lines = []
+    for line in lines:
+        if line.strip() == '':
+            if paragraph_lines:
+                processed.append(' '.join(paragraph_lines))
+                paragraph_lines = []
+            processed.append('')
+            continue
+        if is_structural_line(line):
+            if paragraph_lines:
+                processed.append(' '.join(paragraph_lines))
+                paragraph_lines = []
+            processed.append(line)
+        else:
+            paragraph_lines.append(line.strip())
+    if paragraph_lines:
+        processed.append(' '.join(paragraph_lines))
+    return '\n'.join(processed)
 def parse_content_block(text: str):

{markdowndata-0.0.2 → markdowndata-0.0.3}/markdowndata/utils.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import re
 from typing import Union
 from dataclasses import dataclass
@@ -5,6 +6,9 @@ from bs4 import BeautifulSoup
 from markdown_it import MarkdownIt
+STRUCTURAL_LINE_RE = re.compile(r'^(\s*[-*+]\s+|\s*\d+\.\s+|\|.+\||>\s*)')
 @dataclass
 class Section:
     """
@@ -41,13 +45,33 @@ def get_md_soup(text: str) -> BeautifulSoup:
     return BeautifulSoup(html, 'html.parser')
+def is_single_tag_block(soup, tag_name: str) -> bool:
+    """
+    Check if the block consists of a single top-level tag (e.g. <table>, <ul>)
+    with no other sibling tags.
+    """
+    tags = soup.find_all(recursive=False)
+    return len(tags) == 1 and tags[0].name == tag_name
+def is_structural_line(line: str) -> bool:
+    """
+    Returns True if the line is a Markdown block element (list, table row, blockquote).
+    """
+    return bool(STRUCTURAL_LINE_RE.match(line))
 def convert_value(value: str) -> Union[int, float, str]:
     """
     Convert a string to an int, float, or datetime object is possible, or return the original string.
     """
     try:
         value = value.strip()
-        num = float(value)
-        return int(num) if num.is_integer() else num
+        if value.isdigit():
+            return int(value)
+        elif '.' in value:
+            return float(value)
+        else:
+            return value
     except (ValueError, AttributeError):
         return value

{markdowndata-0.0.2 → markdowndata-0.0.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "markdowndata"
-version = "0.0.2"
+version = "0.0.3"
 description = "Tool to convert markdown tables into json objects"
 authors = ["Gordon Bean <gbean@cs.byu.edu>", "Robert Greathouse <robbykap@byu.edu>"]
 license = "MIT"

{markdowndata-0.0.2 → markdowndata-0.0.3}/LICENSE RENAMED Viewed

File without changes

{markdowndata-0.0.2 → markdowndata-0.0.3}/markdowndata/__init__.py RENAMED Viewed

File without changes

{markdowndata-0.0.2 → markdowndata-0.0.3}/markdowndata/process_markdown.py RENAMED Viewed

File without changes

{markdowndata-0.0.2 → markdowndata-0.0.3}/markdowndata/section_tree.py RENAMED Viewed

File without changes

markdowndata 0.0.2__tar.gz → 0.0.3__tar.gz

markdowndata 0.0.2tar.gz → 0.0.3tar.gz