PyPI - raw-docx - Versions diffs - 0.5.0__tar.gz → 0.7.0__tar.gz - Mend

raw-docx 0.5.0tar.gz → 0.7.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

{raw_docx-0.5.0 → raw_docx-0.7.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: raw_docx
-Version: 0.5.0
+Version: 0.7.0
 Summary: A package for processing and analyzing raw document formats
 Home-page: https://github.com/daveih/raw_docx
 Author: Dave Iberson-Hurst
@@ -18,12 +18,13 @@ Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: python-docx
-Requires-Dist: python-json-logger
+Requires-Dist: simple_error_log
 Dynamic: author
 Dynamic: classifier
 Dynamic: description
 Dynamic: description-content-type
 Dynamic: home-page
+Dynamic: license-file
 Dynamic: requires-dist
 Dynamic: requires-python
 Dynamic: summary

{raw_docx-0.5.0 → raw_docx-0.7.0}/setup.py RENAMED Viewed

@@ -4,7 +4,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
     long_description = fh.read()
 package_info = {}
-with open("src/raw_docx/__version__.py") as fp:
+with open("src/raw_docx/__info__.py") as fp:
     exec(fp.read(), package_info)
 setup(
@@ -19,7 +19,7 @@ setup(
     packages=find_packages(where="src"),
     package_dir={"": "src"},
     package_data={},
-    install_requires=["python-docx", "python-json-logger"],
+    install_requires=["python-docx", "simple_error_log"],
     tests_require=["pytest", "pytest-cov", "pytest-mock", "python-dotenv"],
     classifiers=[
         "Development Status :: 3 - Alpha",

raw_docx-0.7.0/src/raw_docx/__info__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __package_version__ = "0.7.0"

raw_docx-0.7.0/src/raw_docx/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+from .raw_docx import RawDocx
+from .raw_document import RawDocument
+from .raw_image import RawImage
+from .raw_list_item import RawListItem
+from .raw_list import RawList
+from .raw_paragraph import RawParagraph
+from .raw_run import RawRun
+from .raw_section import RawSection
+from .raw_table_cell import RawTableCell
+from .raw_table_row import RawTableRow
+from .raw_table import RawTable
+__all__ = [
+    "RawDocx",
+    "RawDocument",
+    "RawImage",
+    "RawList",
+    "RawListItem",
+    "RawParagraph",
+    "RawRun",
+    "RawSection",
+    "RawTableCell",
+    "RawTableRow",
+    "RawTable",
+]

{raw_docx-0.5.0/src/raw_docx → raw_docx-0.7.0/src/raw_docx/docx}/docx_paragraph.py RENAMED Viewed

@@ -1,31 +1,35 @@
 from docx.text.paragraph import Paragraph
 from docx.styles.style import ParagraphStyle
 from docx.text.run import Run
-from .raw_logger import logger
-from .raw_run import RawRun
+from simple_error_log import Errors
+from raw_docx.raw_run import RawRun
-def extract_runs(paragraph: Paragraph) -> list[dict]:
+def install():
+    setattr(Paragraph, "extract_runs", extract_runs)
+def extract_runs(paragraph: Paragraph, errors: Errors) -> list[RawRun]:
     if paragraph.text.startswith(
         "This template is intended for interventional clinical trials.  The template is suitable"
     ):
-        logger.info(f"Paragraph style {paragraph.style.name}")
+        errors.info(f"Paragraph style {paragraph.style.name}")
     data = [
         {
             "text": run.text,
-            "color": _get_run_color(paragraph.style, run),
-            "highlight": _get_highlight_color(run),
+            "color": _get_run_color(paragraph.style, run, errors),
+            "highlight": _get_highlight_color(run, errors),
             "keep": True,
             # "style": run.style.name if run.style else paragraph.style.name
             "style": paragraph.style.name,
         }
         for run in paragraph.runs
     ]
-    data = _tidy_runs_color(data)
+    data = _tidy_runs_color(data, errors)
     return [RawRun(x["text"], x["color"], x["highlight"], x["style"]) for x in data]
-def _tidy_runs_color(data: list[dict]) -> list[dict]:
+def _tidy_runs_color(data: list[dict], errors: Errors) -> list[dict]:
     more = False
     for index, run in enumerate(data):
         if (
@@ -38,14 +42,14 @@ def _tidy_runs_color(data: list[dict]) -> list[dict]:
             more = True
     new_data = [x for x in data if x["keep"]]
     if more:
-        new_data = _tidy_runs_color(new_data)
+        new_data = _tidy_runs_color(new_data, errors)
     return new_data
-def _get_run_color(paragraph: Paragraph, run: Run) -> str | None:
-    paragraph_color = _get_font_colour(paragraph)
-    font_color = _get_font_colour(run)
-    style_color = _run_style_color(run)
+def _get_run_color(paragraph: Paragraph, run: Run, errors: Errors) -> str | None:
+    paragraph_color = _get_font_colour(paragraph, errors)
+    font_color = _get_font_colour(run, errors)
+    style_color = _run_style_color(run, errors)
     if font_color:
         result = str(font_color)
     elif style_color:
@@ -55,15 +59,15 @@ def _get_run_color(paragraph: Paragraph, run: Run) -> str | None:
     return result
-def _get_highlight_color(run: Run) -> str | None:
+def _get_highlight_color(run: Run, errors: Errors) -> str | None:
     try:
         return str(run.font.highlight_color)
     except Exception as e:
-        logger.exception("Failed to get run highlight color", e)
+        errors.exception("Failed to get run highlight color", e)
         return None
-def _run_style_color(run: Run) -> str | None:
+def _run_style_color(run: Run, errors: Errors) -> str | None:
     try:
         run_color = None
         run_style = run.style
@@ -74,16 +78,13 @@ def _run_style_color(run: Run) -> str | None:
                 run_style = run_style.base_style
         return run_color
     except Exception as e:
-        logger.exception("Failed to get run style color", e)
+        errors.exception("Failed to get run style color", e)
         return None
-def _get_font_colour(item: Run | ParagraphStyle) -> str | None:
+def _get_font_colour(item: Run | ParagraphStyle, errors: Errors) -> str | None:
     try:
         return item.font.color.rgb
     except Exception as e:
-        logger.exception("Failed to get font color", e)
+        errors.exception("Failed to get font color", e)
         return None
-setattr(Paragraph, "extract_runs", extract_runs)

{raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx/raw_docx.py RENAMED Viewed

@@ -3,15 +3,16 @@ import re
 import docx
 import zipfile
 from pathlib import Path
-from .raw_document import RawDocument
-from .raw_section import RawSection
-from .raw_paragraph import RawParagraph
-from .raw_image import RawImage
-from .raw_table import RawTable
-from .raw_table_row import RawTableRow
-from .raw_table_cell import RawTableCell
-from .raw_list import RawList
-from .raw_list_item import RawListItem
+from raw_docx.raw_document import RawDocument
+from raw_docx.raw_section import RawSection
+from raw_docx.raw_paragraph import RawParagraph
+from raw_docx.raw_image import RawImage
+from raw_docx.raw_table import RawTable
+from raw_docx.raw_table_row import RawTableRow
+from raw_docx.raw_table_cell import RawTableCell
+from raw_docx.raw_list import RawList
+from raw_docx.raw_list_item import RawListItem
+from raw_docx.docx.docx_paragraph import install
 from docx import Document as DocXProcessor
 from docx.document import Document
 from docx.oxml.table import CT_Tbl, CT_TcPr
@@ -19,8 +20,7 @@ from docx.oxml.text.paragraph import CT_P
 from docx.table import Table, _Cell
 from docx.text.paragraph import Paragraph
 from lxml import etree
-from .raw_logger import logger
-from .docx_paragraph import extract_runs  # Needed such that method inserted into class
+from simple_error_log import Errors
 class RawDocx:
@@ -28,12 +28,17 @@ class RawDocx:
         pass
     def __init__(self, full_path: str):
+        install()
+        self.errors = Errors()
         path = Path(full_path)
         # path.stem, path.suffix[1:]
         self.full_path = full_path
         self.dir = path.parent
         self.filename = path.name
         self.image_path = os.path.join(self.dir, "images")
+        self.errors.debug(
+            f"RawDocx initialisation: full_path='{self.full_path}', dir='{self.dir}', image_path0'{self.image_path}', filename='{self.filename}"
+        )
         self.image_rels = {}
         self._organise_dir()
         self.source_document = DocXProcessor(self.full_path)
@@ -46,28 +51,26 @@ class RawDocx:
         except FileExistsError:
             pass
         except Exception as e:
-            logger.exception("Failed to create image directory", e)
+            self.errors.exception("Failed to create image directory", e)
     def _process(self):
         try:
-            self._extract_images()
+            self._process_images()
             for block_item in self._iter_block_items(self.source_document):
                 target_section = self.target_document.current_section()
                 if isinstance(block_item, Paragraph):
-                    # print(f"PARA BLOCK: {block_item.text}")
                     self._process_paragraph(block_item, target_section, self.image_rels)
                 elif isinstance(block_item, Table):
                     self._process_table(block_item, target_section)
                 else:
-                    logger.warning("Ignoring element")
+                    self.errors.warning("Ignoring element")
                     raise ValueError
         except Exception as e:
-            logger.exception("Exception raised processing document", e)
+            self.errors.exception("Exception raised processing document", e)
-    def _extract_images(self):
+    def _process_images(self):
         # Extract images to image dir
         self._extract_images()
-        # Save all 'rId:filenames' as references
         for r in self.source_document.part.rels.values():
             if isinstance(r._target, docx.parts.image.ImagePart):
                 self.image_rels[r.rId] = os.path.join(
@@ -91,9 +94,8 @@ class RawDocx:
         for child in parent_elm.iterchildren():
             if isinstance(child, str):
-                logger.warning(f"Ignoring eTree element {child}")
+                self.errors.warning(f"Ignoring eTree element {child}")
             elif isinstance(child, CT_P):
-                # print(f"PARA: {child.text}")
                 yield Paragraph(child, parent)
             elif isinstance(child, CT_Tbl):
                 yield Table(child, parent)
@@ -109,13 +111,12 @@ class RawDocx:
                 ):
                     pass
                 else:
-                    logger.warning(f"Ignoring eTree element {self._tree(child)}")
+                    self.errors.warning(f"Ignoring eTree element {self._tree(child)}")
             else:
                 raise ValueError(f"something's not right with a child {type(child)}")
     def _tree(self, node, tab=1):
-        # print(f"{'  ' * tab}{node.tag} {node.text}")
         for child in node:
             self._tree(child, tab + 1)
@@ -155,7 +156,9 @@ class RawDocx:
                         if block_item.tag == CT_TcPr:
                             pass
                         else:
-                            logger.warning(f"Ignoring eTree element {block_item.tag}")
+                            self.errors.warning(
+                                f"Ignoring eTree element {block_item.tag}"
+                            )
                     else:
                         raise self.LogicError(
                             f"something's not right with a child {type(block_item)}"
@@ -164,15 +167,15 @@ class RawDocx:
     def _process_cell(self, paragraph, target_cell: RawTableCell):
         if self._is_list(paragraph):
             list_level = self.get_list_level(paragraph)
-            item = RawListItem(paragraph.extract_runs(), list_level)
+            item = RawListItem(paragraph.extract_runs(self.errors), list_level)
             if target_cell.is_in_list():
                 list = target_cell.current_list()
             else:
-                list = RawList()
+                list = RawList(self.errors)
                 target_cell.add(list)
             list.add(item)
         else:
-            target_paragraph = RawParagraph(paragraph.extract_runs())
+            target_paragraph = RawParagraph(paragraph.extract_runs(self.errors))
             target_cell.add(target_paragraph)
     def _process_paragraph(
@@ -183,23 +186,21 @@ class RawDocx:
             target_section = RawSection(paragraph.text, paragraph.text, level)
             self.target_document.add(target_section)
         elif self._is_list(paragraph):
-            # print(f"START LIST: {paragraph.text}")
             list_level = self.get_list_level(paragraph)
-            item = RawListItem(paragraph.extract_runs(), list_level)
+            item = RawListItem(paragraph.extract_runs(self.errors), list_level)
             if target_section.is_in_list():
                 list = target_section.current_list()
             else:
-                list = RawList()
+                list = RawList(self.errors)
                 target_section.add(list)
             list.add(item)
         elif "Graphic" in paragraph._p.xml:
             for rId in image_rels:
                 if rId in paragraph._p.xml:
-                    target_image = RawImage(image_rels[rId])
+                    target_image = RawImage(image_rels[rId], self.errors)
                     target_section.add(target_image)
         else:
-            # print(f"START RUNS: {paragraph.text}")
-            target_paragraph = RawParagraph(paragraph.extract_runs())
+            target_paragraph = RawParagraph(paragraph.extract_runs(self.errors))
             target_section.add(target_paragraph)
     def get_list_level(self, paragraph):

{raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx/raw_image.py RENAMED Viewed

@@ -1,12 +1,13 @@
 import os
 import base64
-from .raw_logger import logger
+from simple_error_log import Errors
 class RawImage:
     FILE_TYPE_MAP = {".png": "png", ".jpg": "jpg", ".jpeg": "jpg"}
-    def __init__(self, filepath: str):
+    def __init__(self, filepath: str, errors: Errors):
+        self.errors = errors
         self.filepath = filepath
     def to_html(self):
@@ -21,7 +22,7 @@ class RawImage:
             else:
                 return f"""<p style="color:red">Note: Unable to process embedded image of type '{file_extension}', image ignored.</p>"""
         except Exception as e:
-            logger.exception("Exception converting image", e)
+            self.errors.exception("Exception converting image", e)
             return (
                 """<p style="color:red">Note: Error encountered processing image.</p>"""
             )

{raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx/raw_list.py RENAMED Viewed

@@ -1,9 +1,10 @@
 from .raw_list_item import RawListItem
-from .raw_logger import logger
+from simple_error_log import Errors
 class RawList:
-    def __init__(self, level=0):
+    def __init__(self, errors: Errors, level=0):
+        self.errors = errors
         self.items = []  # List to store RawListItems and nested RawLists
         self.level = level
@@ -13,15 +14,15 @@ class RawList:
         elif item.level > self.level:
             list = self.items[-1] if self.items else None
             if not isinstance(list, RawList):
-                list = RawList(item.level)
+                list = RawList(self.errors, item.level)
                 self.items.append(list)
             list.add(item)
             if item.level > self.level + 1:
-                logger.warning(
+                self.errors.warning(
                     f"Adding list item '{item}' to item but level jump greater than 1"
                 )
         else:
-            logger.error(
+            self.errors.error(
                 f"Failed to add list item '{item}' to list '{self}', levels are in error"
             )

{raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx/raw_list_item.py RENAMED Viewed

@@ -12,7 +12,8 @@ class RawListItem(RawParagraph):
         return f"{'  ' * self.level}{self.text}"
     def to_html(self) -> str:
-        return f"{escape(self.text)}"
+        return f"{self.text}"
+        # return f"{escape(self.text)}"
     def to_dict(self) -> dict:
         return {"type": "list_item", "text": self.text, "level": self.level}

{raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx/raw_paragraph.py RENAMED Viewed

@@ -11,7 +11,7 @@ class RawParagraph:
     def to_html(self) -> str:
         klass_list = " ".join(self.klasses)
         open_tag = f'<p class="{klass_list}">' if self.klasses else "<p>"
-        return f"{open_tag}{escape(self.text)}</p>"
+        return f"{open_tag}{self.text}</p>"
     def find(self, text: str) -> bool:
         return True if text in self.text else False
@@ -31,5 +31,9 @@ class RawParagraph:
             "classes": self.klasses,
         }
+    def add_span(self, text: str, klass: str) -> None:
+        new_str = f'<span class="{klass}">{text}</span>'
+        self.text = new_str + self.text[len(text) :]
     def _run_text(self) -> str:
         return "".join([run.text for run in self.runs])

{raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: raw_docx
-Version: 0.5.0
+Version: 0.7.0
 Summary: A package for processing and analyzing raw document formats
 Home-page: https://github.com/daveih/raw_docx
 Author: Dave Iberson-Hurst
@@ -18,12 +18,13 @@ Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: python-docx
-Requires-Dist: python-json-logger
+Requires-Dist: simple_error_log
 Dynamic: author
 Dynamic: classifier
 Dynamic: description
 Dynamic: description-content-type
 Dynamic: home-page
+Dynamic: license-file
 Dynamic: requires-dist
 Dynamic: requires-python
 Dynamic: summary

{raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx.egg-info/SOURCES.txt RENAMED Viewed

@@ -1,15 +1,13 @@
 LICENSE
 README.md
 setup.py
+src/raw_docx/__info__.py
 src/raw_docx/__init__.py
-src/raw_docx/__version__.py
-src/raw_docx/docx_paragraph.py
 src/raw_docx/raw_document.py
 src/raw_docx/raw_docx.py
 src/raw_docx/raw_image.py
 src/raw_docx/raw_list.py
 src/raw_docx/raw_list_item.py
-src/raw_docx/raw_logger.py
 src/raw_docx/raw_paragraph.py
 src/raw_docx/raw_run.py
 src/raw_docx/raw_section.py
@@ -21,6 +19,8 @@ src/raw_docx.egg-info/SOURCES.txt
 src/raw_docx.egg-info/dependency_links.txt
 src/raw_docx.egg-info/requires.txt
 src/raw_docx.egg-info/top_level.txt
+src/raw_docx/docx/__init__.py
+src/raw_docx/docx/docx_paragraph.py
 tests/test_docx_paragraph.py
 tests/test_integration.py
 tests/test_raw_document.py
@@ -28,7 +28,6 @@ tests/test_raw_docx.py
 tests/test_raw_image.py
 tests/test_raw_list.py
 tests/test_raw_list_item.py
-tests/test_raw_logger.py
 tests/test_raw_paragraph.py
 tests/test_raw_run.py
 tests/test_raw_section.py

raw_docx-0.7.0/src/raw_docx.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ python-docx
2	+ simple_error_log

{raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_docx_paragraph.py RENAMED Viewed

@@ -1,13 +1,14 @@
 from unittest.mock import Mock, PropertyMock
 from docx.text.paragraph import Paragraph
 from docx.text.run import Run
-from src.raw_docx.docx_paragraph import (
+from src.raw_docx.docx.docx_paragraph import (
     extract_runs,
     _tidy_runs_color,
     _get_highlight_color,
     _run_style_color,
     _get_font_colour,
 )
+from simple_error_log import Errors
 def create_mock_run(text="", color=None, highlight=None, style=None):
@@ -56,45 +57,48 @@ def create_mock_paragraph(text="", style_name="Normal"):
 def test_get_font_colour():
+    errors = Errors()
     """Test getting font color from a run"""
     # Test with no color
     run = create_mock_run()
-    assert _get_font_colour(run) is None
+    assert _get_font_colour(run, errors) is None
     # Test with color
     run = create_mock_run(color="FF0000")
-    assert _get_font_colour(run) == "FF0000"
+    assert _get_font_colour(run, errors) == "FF0000"
     # Test with exception
     run = Mock(spec=Run)
     run.font = (
         None  # This should cause an AttributeError when code tries to access color
     )
-    assert _get_font_colour(run) is None
+    assert _get_font_colour(run, errors) is None
 def test_get_highlight_color():
+    errors = Errors()
     """Test getting highlight color from a run"""
     # Test with no highlight
     run = Mock(spec=Run)
     run.font = None
-    assert _get_highlight_color(run) is None
+    assert _get_highlight_color(run, errors) is None
     # Test with highlight
     run = create_mock_run(highlight="yellow")
-    assert _get_highlight_color(run) == "yellow"
+    assert _get_highlight_color(run, errors) == "yellow"
 def test_run_style_color():
+    errors = Errors()
     """Test getting color from run style"""
     # Test with no style
     run = create_mock_run()
-    assert _run_style_color(run) is None
+    assert _run_style_color(run, errors) is None
     # Test with direct style color
     run = create_mock_run(style="Normal")
     type(run.style.font.color).rgb = PropertyMock(return_value="FF0000")
-    assert _run_style_color(run) == "FF0000"
+    assert _run_style_color(run, errors) == "FF0000"
     # Test with base style color
     run = create_mock_run(style="Normal")
@@ -104,10 +108,11 @@ def test_run_style_color():
     type(base_style.font.color).rgb = PropertyMock(return_value="0000FF")
     base_style.base_style = None
     run.style.base_style = base_style
-    assert _run_style_color(run) == "0000FF"
+    assert _run_style_color(run, errors) == "0000FF"
 def test_tidy_runs_color():
+    errors = Errors()
     """Test tidying up runs with colors"""
     # Test with different colors - should not merge
     data = [
@@ -126,7 +131,7 @@ def test_tidy_runs_color():
             "keep": True,
         },
     ]
-    result = _tidy_runs_color(data)
+    result = _tidy_runs_color(data, errors)
     assert len(result) == 2
     assert all(item["keep"] for item in result)
@@ -154,12 +159,13 @@ def test_tidy_runs_color():
             "keep": True,
         },
     ]
-    result = _tidy_runs_color(data)
+    result = _tidy_runs_color(data, errors)
     assert len(result) == 1
     assert result[0]["text"] == "Test More"
 def test_extract_runs_mixed_styles():
+    errors = Errors()
     """Test extracting runs with different styles"""
     paragraph = create_mock_paragraph()
     runs = [
@@ -169,12 +175,13 @@ def test_extract_runs_mixed_styles():
     ]
     paragraph.runs = runs
-    result = extract_runs(paragraph)
+    result = extract_runs(paragraph, errors)
     assert len(result) == 3
     assert [r.style for r in result] == ["Normal", "Normal", "Normal"]
 def test_extract_runs_with_mixed_colors():
+    errors = Errors()
     """Test extracting runs with different colors and highlights"""
     paragraph = create_mock_paragraph()
     runs = [
@@ -184,6 +191,6 @@ def test_extract_runs_with_mixed_colors():
     ]
     paragraph.runs = runs
-    result = extract_runs(paragraph)
+    result = extract_runs(paragraph, errors)
     assert len(result) == 3
     assert [r.color for r in result] == ["FF0000", "0000FF", "FF0000"]

{raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_raw_image.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import pytest
 from src.raw_docx.raw_image import RawImage
+from simple_error_log import Errors
 @pytest.fixture
@@ -27,45 +28,51 @@ def temp_image_unsupported(tmp_path):
 def test_image_initialization_jpg(temp_image_jpg):
+    errors = Errors()
     """Test image initialization with JPG"""
-    image = RawImage(temp_image_jpg)
+    image = RawImage(temp_image_jpg, errors)
     assert image.filepath == temp_image_jpg
 def test_image_initialization_png(temp_image_png):
+    errors = Errors()
     """Test image initialization with PNG"""
-    image = RawImage(temp_image_png)
+    image = RawImage(temp_image_png, errors)
     assert image.filepath == temp_image_png
 def test_to_html_jpg(temp_image_jpg):
+    errors = Errors()
     """Test getting HTML for JPG image"""
-    image = RawImage(temp_image_jpg)
+    image = RawImage(temp_image_jpg, errors)
     html = image.to_html()
     assert "data:image/jpg;base64," in html
     assert '<img alt="alt text" src=' in html
 def test_to_html_png(temp_image_png):
+    errors = Errors()
     """Test getting HTML for PNG image"""
-    image = RawImage(temp_image_png)
+    image = RawImage(temp_image_png, errors)
     html = image.to_html()
     assert "data:image/png;base64," in html
     assert '<img alt="alt text" src=' in html
 def test_to_html_unsupported_format(temp_image_unsupported):
+    errors = Errors()
     """Test getting HTML for unsupported image format"""
-    image = RawImage(temp_image_unsupported)
+    image = RawImage(temp_image_unsupported, errors)
     html = image.to_html()
     assert "Unable to process embedded image" in html
     assert "color:red" in html
 def test_to_html_missing_file(tmp_path):
+    errors = Errors()
     """Test getting HTML for missing image file"""
     missing_file = str(tmp_path / "missing.jpg")
-    image = RawImage(missing_file)
+    image = RawImage(missing_file, errors)
     html = image.to_html()
     assert "Error encountered processing image" in html
     assert "color:red" in html
@@ -79,8 +86,9 @@ def test_supported_file_types():
 def test_to_dict_jpg(temp_image_jpg):
+    errors = Errors()
     """Test converting JPG image to dictionary"""
-    image = RawImage(temp_image_jpg)
+    image = RawImage(temp_image_jpg, errors)
     result = image.to_dict()
     assert result["type"] == "image"
     assert result["filepath"] == temp_image_jpg
@@ -89,8 +97,9 @@ def test_to_dict_jpg(temp_image_jpg):
 def test_to_dict_png(temp_image_png):
+    errors = Errors()
     """Test converting PNG image to dictionary"""
-    image = RawImage(temp_image_png)
+    image = RawImage(temp_image_png, errors)
     result = image.to_dict()
     assert result["type"] == "image"
     assert result["filepath"] == temp_image_png
@@ -99,8 +108,9 @@ def test_to_dict_png(temp_image_png):
 def test_to_dict_unsupported(temp_image_unsupported):
+    errors = Errors()
     """Test converting unsupported image to dictionary"""
-    image = RawImage(temp_image_unsupported)
+    image = RawImage(temp_image_unsupported, errors)
     result = image.to_dict()
     assert result["type"] == "image"
     assert result["filepath"] == temp_image_unsupported

{raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_raw_list.py RENAMED Viewed

@@ -1,13 +1,14 @@
 import pytest
-from unittest.mock import patch
 from src.raw_docx.raw_list import RawList
 from src.raw_docx.raw_list_item import RawListItem
 from src.raw_docx.raw_run import RawRun
+from simple_error_log import Errors
 @pytest.fixture
 def raw_list():
-    return RawList()
+    errors = Errors()
+    return RawList(errors)
 @pytest.fixture
@@ -22,16 +23,18 @@ def test_list_initialization(raw_list):
 def test_add_item(list_item):
+    errors = Errors()
     """Test adding an item to the list"""
-    list = RawList(1)
+    list = RawList(errors, 1)
     list.add(list_item)
     assert len(list.items) == 1
     assert list.items[0] == list_item
 def test_to_text():
+    errors = Errors()
     """Test to text"""
-    list = RawList(1)
+    list = RawList(errors, 1)
     items = [
         RawListItem([RawRun("Item 1", "", None, "Normal")], 1),
         RawListItem([RawRun("Item 1.1", "", None, "Normal")], 2),
@@ -43,8 +46,9 @@ def test_to_text():
 def test_add_multiple_items():
+    errors = Errors()
     """Test adding multiple items with different levels"""
-    list = RawList(1)
+    list = RawList(errors, 1)
     items = [
         RawListItem([RawRun("Item 1", "", None, "Normal")], 1),
         RawListItem([RawRun("Item 1.1", "", None, "Normal")], 2),
@@ -57,22 +61,21 @@ def test_add_multiple_items():
 def test_add_multiple_items_level_error():
+    errors = Errors()
     """Test adding multiple items with different levels with level error"""
-    with patch("src.raw_docx.raw_list.logger") as mock_logger:
-        list = RawList(1)
-        items = [
-            RawListItem([RawRun("Item 1", "", None, "Normal")], 1),
-            RawListItem([RawRun("Item 1.1.1", "", None, "Normal")], 3),
-            RawListItem([RawRun("Item 2", "", None, "Normal")], 1),
-        ]
-        for item in items:
-            list.add(item)
-        assert len(list.items) == 3
-        assert [item.level for item in list.items] == [1, 3, 1]
-        mock_logger.warning.assert_called_once()
-        error_msg = mock_logger.warning.call_args[0][0]
-        assert "Adding list item" in error_msg
-        assert "to item but level jump greater than 1" in error_msg
+    list = RawList(errors, 1)
+    items = [
+        RawListItem([RawRun("Item 1", "", None, "Normal")], 1),
+        RawListItem([RawRun("Item 1.1.1", "", None, "Normal")], 3),
+        RawListItem([RawRun("Item 2", "", None, "Normal")], 1),
+    ]
+    for item in items:
+        list.add(item)
+    assert len(list.items) == 3
+    assert [item.level for item in list.items] == [1, 3, 1]
+    assert errors.count() == 1
+    assert "Adding list item" in errors._items[0].message
+    assert "to item but level jump greater than 1" in errors._items[0].message
 def test_to_html(raw_list):
@@ -89,8 +92,9 @@ def test_to_html(raw_list):
 def test_nested_list_to_html():
+    errors = Errors()
     """Test converting nested list to HTML format"""
-    root_list = RawList(0)
+    root_list = RawList(errors, 0)
     items = [
         RawListItem([RawRun("Item 1", "", None, "Normal")], 1),
         RawListItem([RawRun("Subitem 1.1", "", None, "Normal")], 2),
@@ -104,26 +108,25 @@ def test_nested_list_to_html():
 def test_add_item_lower_level_logs_error():
+    errors = Errors()
     """Test that adding an item with lower level than list level logs an error"""
-    with patch("src.raw_docx.raw_list.logger") as mock_logger:
-        list_obj = RawList(2)  # List with level 2
-        item = RawListItem(
-            [RawRun("Test Item", "", None, "Normal")], 1
-        )  # Item with lower level 1
-        list_obj.add(item)
-        mock_logger.error.assert_called_once()
-        error_msg = mock_logger.error.call_args[0][0]
-        assert "Failed to add list item" in error_msg
-        assert "levels are in error" in error_msg
+    list_obj = RawList(errors, 2)  # List with level 2
+    item = RawListItem(
+        [RawRun("Test Item", "", None, "Normal")], 1
+    )  # Item with lower level 1
+    list_obj.add(item)
+    assert errors.count() == 1
+    assert "Failed to add list item" in errors._items[0].message
+    assert "levels are in error" in errors._items[0].message
 def test_to_dict():
+    errors = Errors()
     """Test converting list to dictionary"""
-    list_obj = RawList(1)
+    list_obj = RawList(errors, 1)
     item1 = RawListItem([RawRun("Item 1", "", None, "Normal")], 1)
     item2 = RawListItem([RawRun("Item 2", "", None, "Normal")], 1)
-    sublist = RawList(2)
+    sublist = RawList(errors, 2)
     sublist.add(RawListItem([RawRun("Subitem 1", "", None, "Normal")], 2))
     list_obj.add(item1)
@@ -151,10 +154,11 @@ def test_to_dict():
 def test_all_items():
-    list_obj = RawList(1)
+    errors = Errors()
+    list_obj = RawList(errors, 1)
     item1 = RawListItem([RawRun("Item 1", "", None, "Normal")], 1)
     item2 = RawListItem([RawRun("Item 2", "", None, "Normal")], 1)
-    sublist = RawList(2)
+    sublist = RawList(errors, 2)
     item3 = RawListItem([RawRun("Subitem 1", "", None, "Normal")], 2)
     sublist.add(item3)
     list_obj.add(item1)

{raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_raw_section.py RENAMED Viewed

@@ -8,6 +8,7 @@ from src.raw_docx.raw_table_row import RawTableRow
 from src.raw_docx.raw_table_cell import RawTableCell
 from src.raw_docx.raw_list_item import RawListItem
 from src.raw_docx.raw_run import RawRun
+from simple_error_log import Errors
 @pytest.fixture
@@ -23,9 +24,10 @@ def paragraph():
 @pytest.fixture
 def image(tmp_path):
+    errors = Errors()
     image_path = tmp_path / "test.jpg"
     image_path.write_bytes(b"dummy image content")
-    return RawImage(str(image_path))
+    return RawImage(str(image_path), errors)
 def test_section_initialization(section):
@@ -37,6 +39,7 @@ def test_section_initialization(section):
 def test_section_initialization_strip(section):
+    # errors = Errors()
     """Test section initialization"""
     section = RawSection(" Test Section    ", "     Test Content ", 1)
     assert section.title == "Test Section"
@@ -46,6 +49,7 @@ def test_section_initialization_strip(section):
 def test_add_paragraph(section, paragraph):
+    # errors = Errors()
     """Test adding a paragraph to section"""
     section.add(paragraph)
     assert len(section.items) == 1
@@ -53,6 +57,7 @@ def test_add_paragraph(section, paragraph):
 def test_add_image(section, image):
+    # errors = Errors()
     """Test adding an image to section"""
     section.add(image)
     assert len(section.items) == 1
@@ -60,13 +65,15 @@ def test_add_image(section, image):
 def test_is_in_list_empty_section(section):
+    # errors = Errors()
     """Test is_in_list with empty section"""
     assert not section.is_in_list()
 def test_is_in_list_with_list(section):
+    errors = Errors()
     """Test is_in_list with a list"""
-    section.add(RawList())
+    section.add(RawList(errors))
     assert section.is_in_list()
@@ -76,8 +83,9 @@ def test_current_list_no_list(section):
 def test_current_list_with_list(section):
+    errors = Errors()
     """Test current_list with existing list"""
-    test_list = RawList()
+    test_list = RawList(errors)
     section.add(test_list)
     assert section.current_list() == test_list
@@ -105,6 +113,7 @@ def test_to_html_between(section):
 def test_paragraphs(section):
+    errors = Errors()
     """Test getting all paragraphs"""
     run1 = RawRun("First", "", "", "Normal")
     run2 = RawRun("Second", "", "", "Normal")
@@ -113,7 +122,7 @@ def test_paragraphs(section):
     p2 = RawParagraph([run2])
     section.add(p1)
-    section.add(RawList())  # Add non-paragraph item
+    section.add(RawList(errors))  # Add non-paragraph item
     section.add(p2)
     paragraphs = section.paragraphs()
@@ -133,9 +142,10 @@ def test_tables(section):
 def test_lists(section):
+    errors = Errors()
     """Test getting all lists"""
     run = RawRun("Test", "", "", "Normal")
-    list1 = RawList()
+    list1 = RawList(errors)
     section.add(RawParagraph([run]))
     section.add(list1)
     lists = section.lists()
@@ -209,9 +219,10 @@ def test_find_first_at_start_not_found(section):
 def test_has_lists(section):
+    errors = Errors()
     """Test checking if section has lists"""
     assert not section.has_lists()
-    section.add(RawList())
+    section.add(RawList(errors))
     assert section.has_lists()
@@ -245,6 +256,7 @@ def test_next(section):
 def test_next_paragraph(section):
+    errors = Errors()
     """Test getting next paragraph"""
     run1 = RawRun("First", "", "", "Normal")
     run2 = RawRun("Second", "", "", "Normal")
@@ -253,7 +265,7 @@ def test_next_paragraph(section):
     p2 = RawParagraph([run2])
     section.add(p1)
-    section.add(RawList())  # Add non-paragraph item
+    section.add(RawList(errors))  # Add non-paragraph item
     section.add(p2)
     assert section.next_paragraph(0) == p1
@@ -293,12 +305,13 @@ def test_format_heading(section):
 def test_to_dict(section):
+    errors = Errors()
     """Test converting section to dictionary"""
     # Add various types of content
     run = RawRun("Test paragraph", "", "", "Normal")
     section.add(RawParagraph([run]))
-    list_obj = RawList(1)
+    list_obj = RawList(errors, 1)
     list_obj.add(RawListItem([RawRun("Test item", "", None, "Normal")], 1))
     section.add(list_obj)
@@ -407,6 +420,7 @@ def test_section_search(section):
 def test_section_list_operations(section):
+    errors = Errors()
     """Test list-related operations"""
     # Test empty section
     assert not section.is_in_list()
@@ -414,7 +428,7 @@ def test_section_list_operations(section):
     assert not section.has_lists()
     # Add a list
-    list1 = RawList(1)
+    list1 = RawList(errors, 1)
     section.add(list1)
     # Test with list

{raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_raw_table_cell.py RENAMED Viewed

@@ -4,11 +4,13 @@ from src.raw_docx.raw_paragraph import RawParagraph
 from src.raw_docx.raw_list import RawList
 from src.raw_docx.raw_table import RawTable
 from src.raw_docx.raw_run import RawRun
+from simple_error_log import Errors
 @pytest.fixture
 def list():
-    return RawList()
+    errors = Errors()
+    return RawList(errors)
 @pytest.fixture

raw_docx-0.5.0/src/raw_docx/__version__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- __package_version__ = "0.5.0"

raw_docx-0.5.0/src/raw_docx/raw_logger.py DELETED Viewed

@@ -1,67 +0,0 @@
-import sys
-import logging
-from pathlib import Path
-from typing import Optional
-from pythonjsonlogger import jsonlogger
-class RawLogger:
-    _instance = None
-    _initialized = False
-    def __new__(cls):
-        if cls._instance is None:
-            cls._instance = super().__new__(cls)
-        return cls._instance
-    def __init__(self):
-        if not RawLogger._initialized:
-            self.logger = logging.getLogger("raw_docx")
-            self.logger.setLevel(logging.INFO)
-            # Create JSON formatter
-            formatter = jsonlogger.JsonFormatter(
-                fmt="%(asctime)s %(name)s %(levelname)s %(message)s",
-                datefmt="%Y-%m-%d %H:%M:%S",
-            )
-            # Console handler
-            console_handler = logging.StreamHandler(sys.stdout)
-            console_handler.setFormatter(formatter)
-            self.logger.addHandler(console_handler)
-            RawLogger._initialized = True
-    def setup_file_logging(self, log_dir: Optional[str] = None):
-        """Setup file logging in addition to console logging"""
-        if log_dir:
-            log_path = Path(log_dir)
-            log_path.mkdir(parents=True, exist_ok=True)
-            file_handler = logging.FileHandler(log_path / "raw_docx.log")
-            file_handler.setFormatter(
-                jsonlogger.JsonFormatter(
-                    fmt="%(asctime)s %(name)s %(levelname)s %(message)s",
-                    datefmt="%Y-%m-%d %H:%M:%S",
-                )
-            )
-            self.logger.addHandler(file_handler)
-    def info(self, message: str):
-        """Log info message"""
-        self.logger.info(message)
-    def warning(self, message: str):
-        """Log warning message"""
-        self.logger.warning(message)
-    def error(self, message: str):
-        """Log error message"""
-        self.logger.error(message)
-    def exception(self, message: str, exc: Exception):
-        """Log exception with message"""
-        self.logger.exception(message, exc_info=exc)
-# Create singleton instance
-logger = RawLogger()

raw_docx-0.5.0/src/raw_docx.egg-info/requires.txt DELETED Viewed

	@@ -1,2 +0,0 @@
1	- python-docx
2	- python-json-logger

raw_docx-0.5.0/tests/test_raw_logger.py DELETED Viewed

@@ -1,112 +0,0 @@
-import json
-import logging
-import pytest
-from pathlib import Path
-from src.raw_docx.raw_logger import RawLogger
-@pytest.fixture
-def logger_instance():
-    """Fixture to provide a fresh logger instance for each test"""
-    # Reset the singleton state
-    RawLogger._instance = None
-    RawLogger._initialized = False
-    # Clear any existing handlers
-    logger = logging.getLogger("raw_docx")
-    logger.handlers.clear()
-    return RawLogger()
-@pytest.fixture
-def temp_log_dir(tmp_path):
-    """Fixture to provide a temporary directory for log files"""
-    log_dir = tmp_path / "logs"
-    log_dir.mkdir()
-    return str(log_dir)
-def test_singleton_pattern():
-    """Test that RawLogger implements singleton pattern correctly"""
-    logger1 = RawLogger()
-    logger2 = RawLogger()
-    assert logger1 is logger2
-def test_default_initialization(logger_instance):
-    """Test default logger initialization"""
-    assert logger_instance.logger.level == logging.INFO
-    assert len(logger_instance.logger.handlers) == 1
-    assert isinstance(logger_instance.logger.handlers[0], logging.StreamHandler)
-def test_file_logging_setup(logger_instance, temp_log_dir):
-    """Test setting up file logging"""
-    logger_instance.setup_file_logging(temp_log_dir)
-    # Check that a file handler was added
-    assert len(logger_instance.logger.handlers) == 2
-    assert any(
-        isinstance(h, logging.FileHandler) for h in logger_instance.logger.handlers
-    )
-    # Check that log file was created
-    log_file = Path(temp_log_dir) / "raw_docx.log"
-    assert log_file.exists()
-def test_log_message_format(logger_instance, temp_log_dir, caplog):
-    """Test that log messages are properly formatted as JSON"""
-    logger_instance.setup_file_logging(temp_log_dir)
-    test_message = "Test log message"
-    logger_instance.info(test_message)
-    # Read the log file
-    log_file = Path(temp_log_dir) / "raw_docx.log"
-    with open(log_file) as f:
-        log_entry = json.loads(f.readline())
-    # Check JSON structure
-    assert "asctime" in log_entry
-    assert "name" in log_entry
-    assert "levelname" in log_entry
-    assert "message" in log_entry
-    assert log_entry["message"] == test_message
-    assert log_entry["levelname"] == "INFO"
-def test_log_levels(logger_instance, caplog):
-    """Test different log levels"""
-    test_message = "Test message"
-    logger_instance.info(test_message)
-    assert "INFO" in caplog.text
-    logger_instance.warning(test_message)
-    assert "WARNING" in caplog.text
-    logger_instance.error(test_message)
-    assert "ERROR" in caplog.text
-def test_exception_logging(logger_instance, caplog):
-    """Test exception logging"""
-    try:
-        raise ValueError("Test exception")
-    except ValueError as e:
-        logger_instance.exception("Error occurred", e)
-    assert "ERROR" in caplog.text
-    assert "Test exception" in caplog.text
-def test_invalid_log_directory(logger_instance, tmp_path):
-    """Test handling of invalid log directory"""
-    invalid_dir = tmp_path / "nonexistent" / "logs"
-    logger_instance.setup_file_logging(str(invalid_dir))
-    # Check that the directory was created
-    assert invalid_dir.exists()
-    assert invalid_dir.is_dir()