raw-docx 0.5.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {raw_docx-0.5.0 → raw_docx-0.7.0}/PKG-INFO +4 -3
  2. {raw_docx-0.5.0 → raw_docx-0.7.0}/setup.py +2 -2
  3. raw_docx-0.7.0/src/raw_docx/__info__.py +1 -0
  4. raw_docx-0.7.0/src/raw_docx/__init__.py +25 -0
  5. {raw_docx-0.5.0/src/raw_docx → raw_docx-0.7.0/src/raw_docx/docx}/docx_paragraph.py +23 -22
  6. {raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx/raw_docx.py +33 -32
  7. {raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx/raw_image.py +4 -3
  8. {raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx/raw_list.py +6 -5
  9. {raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx/raw_list_item.py +2 -1
  10. {raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx/raw_paragraph.py +5 -1
  11. {raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx.egg-info/PKG-INFO +4 -3
  12. {raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx.egg-info/SOURCES.txt +3 -4
  13. raw_docx-0.7.0/src/raw_docx.egg-info/requires.txt +2 -0
  14. {raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_docx_paragraph.py +20 -13
  15. {raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_raw_image.py +19 -9
  16. {raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_raw_list.py +40 -36
  17. {raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_raw_section.py +23 -9
  18. {raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_raw_table_cell.py +3 -1
  19. raw_docx-0.5.0/src/raw_docx/__version__.py +0 -1
  20. raw_docx-0.5.0/src/raw_docx/raw_logger.py +0 -67
  21. raw_docx-0.5.0/src/raw_docx.egg-info/requires.txt +0 -2
  22. raw_docx-0.5.0/tests/test_raw_logger.py +0 -112
  23. {raw_docx-0.5.0 → raw_docx-0.7.0}/LICENSE +0 -0
  24. {raw_docx-0.5.0 → raw_docx-0.7.0}/README.md +0 -0
  25. {raw_docx-0.5.0 → raw_docx-0.7.0}/setup.cfg +0 -0
  26. {raw_docx-0.5.0/src/raw_docx → raw_docx-0.7.0/src/raw_docx/docx}/__init__.py +0 -0
  27. {raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx/raw_document.py +0 -0
  28. {raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx/raw_run.py +0 -0
  29. {raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx/raw_section.py +0 -0
  30. {raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx/raw_table.py +0 -0
  31. {raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx/raw_table_cell.py +0 -0
  32. {raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx/raw_table_row.py +0 -0
  33. {raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx.egg-info/dependency_links.txt +0 -0
  34. {raw_docx-0.5.0 → raw_docx-0.7.0}/src/raw_docx.egg-info/top_level.txt +0 -0
  35. {raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_integration.py +0 -0
  36. {raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_raw_document.py +0 -0
  37. {raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_raw_docx.py +0 -0
  38. {raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_raw_list_item.py +0 -0
  39. {raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_raw_paragraph.py +0 -0
  40. {raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_raw_run.py +0 -0
  41. {raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_raw_table.py +0 -0
  42. {raw_docx-0.5.0 → raw_docx-0.7.0}/tests/test_raw_table_row.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: raw_docx
3
- Version: 0.5.0
3
+ Version: 0.7.0
4
4
  Summary: A package for processing and analyzing raw document formats
5
5
  Home-page: https://github.com/daveih/raw_docx
6
6
  Author: Dave Iberson-Hurst
@@ -18,12 +18,13 @@ Requires-Python: >=3.8
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
20
  Requires-Dist: python-docx
21
- Requires-Dist: python-json-logger
21
+ Requires-Dist: simple_error_log
22
22
  Dynamic: author
23
23
  Dynamic: classifier
24
24
  Dynamic: description
25
25
  Dynamic: description-content-type
26
26
  Dynamic: home-page
27
+ Dynamic: license-file
27
28
  Dynamic: requires-dist
28
29
  Dynamic: requires-python
29
30
  Dynamic: summary
@@ -4,7 +4,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
4
4
  long_description = fh.read()
5
5
 
6
6
  package_info = {}
7
- with open("src/raw_docx/__version__.py") as fp:
7
+ with open("src/raw_docx/__info__.py") as fp:
8
8
  exec(fp.read(), package_info)
9
9
 
10
10
  setup(
@@ -19,7 +19,7 @@ setup(
19
19
  packages=find_packages(where="src"),
20
20
  package_dir={"": "src"},
21
21
  package_data={},
22
- install_requires=["python-docx", "python-json-logger"],
22
+ install_requires=["python-docx", "simple_error_log"],
23
23
  tests_require=["pytest", "pytest-cov", "pytest-mock", "python-dotenv"],
24
24
  classifiers=[
25
25
  "Development Status :: 3 - Alpha",
@@ -0,0 +1 @@
1
+ __package_version__ = "0.7.0"
@@ -0,0 +1,25 @@
1
+ from .raw_docx import RawDocx
2
+ from .raw_document import RawDocument
3
+ from .raw_image import RawImage
4
+ from .raw_list_item import RawListItem
5
+ from .raw_list import RawList
6
+ from .raw_paragraph import RawParagraph
7
+ from .raw_run import RawRun
8
+ from .raw_section import RawSection
9
+ from .raw_table_cell import RawTableCell
10
+ from .raw_table_row import RawTableRow
11
+ from .raw_table import RawTable
12
+
13
+ __all__ = [
14
+ "RawDocx",
15
+ "RawDocument",
16
+ "RawImage",
17
+ "RawList",
18
+ "RawListItem",
19
+ "RawParagraph",
20
+ "RawRun",
21
+ "RawSection",
22
+ "RawTableCell",
23
+ "RawTableRow",
24
+ "RawTable",
25
+ ]
@@ -1,31 +1,35 @@
1
1
  from docx.text.paragraph import Paragraph
2
2
  from docx.styles.style import ParagraphStyle
3
3
  from docx.text.run import Run
4
- from .raw_logger import logger
5
- from .raw_run import RawRun
4
+ from simple_error_log import Errors
5
+ from raw_docx.raw_run import RawRun
6
6
 
7
7
 
8
- def extract_runs(paragraph: Paragraph) -> list[dict]:
8
+ def install():
9
+ setattr(Paragraph, "extract_runs", extract_runs)
10
+
11
+
12
+ def extract_runs(paragraph: Paragraph, errors: Errors) -> list[RawRun]:
9
13
  if paragraph.text.startswith(
10
14
  "This template is intended for interventional clinical trials. The template is suitable"
11
15
  ):
12
- logger.info(f"Paragraph style {paragraph.style.name}")
16
+ errors.info(f"Paragraph style {paragraph.style.name}")
13
17
  data = [
14
18
  {
15
19
  "text": run.text,
16
- "color": _get_run_color(paragraph.style, run),
17
- "highlight": _get_highlight_color(run),
20
+ "color": _get_run_color(paragraph.style, run, errors),
21
+ "highlight": _get_highlight_color(run, errors),
18
22
  "keep": True,
19
23
  # "style": run.style.name if run.style else paragraph.style.name
20
24
  "style": paragraph.style.name,
21
25
  }
22
26
  for run in paragraph.runs
23
27
  ]
24
- data = _tidy_runs_color(data)
28
+ data = _tidy_runs_color(data, errors)
25
29
  return [RawRun(x["text"], x["color"], x["highlight"], x["style"]) for x in data]
26
30
 
27
31
 
28
- def _tidy_runs_color(data: list[dict]) -> list[dict]:
32
+ def _tidy_runs_color(data: list[dict], errors: Errors) -> list[dict]:
29
33
  more = False
30
34
  for index, run in enumerate(data):
31
35
  if (
@@ -38,14 +42,14 @@ def _tidy_runs_color(data: list[dict]) -> list[dict]:
38
42
  more = True
39
43
  new_data = [x for x in data if x["keep"]]
40
44
  if more:
41
- new_data = _tidy_runs_color(new_data)
45
+ new_data = _tidy_runs_color(new_data, errors)
42
46
  return new_data
43
47
 
44
48
 
45
- def _get_run_color(paragraph: Paragraph, run: Run) -> str | None:
46
- paragraph_color = _get_font_colour(paragraph)
47
- font_color = _get_font_colour(run)
48
- style_color = _run_style_color(run)
49
+ def _get_run_color(paragraph: Paragraph, run: Run, errors: Errors) -> str | None:
50
+ paragraph_color = _get_font_colour(paragraph, errors)
51
+ font_color = _get_font_colour(run, errors)
52
+ style_color = _run_style_color(run, errors)
49
53
  if font_color:
50
54
  result = str(font_color)
51
55
  elif style_color:
@@ -55,15 +59,15 @@ def _get_run_color(paragraph: Paragraph, run: Run) -> str | None:
55
59
  return result
56
60
 
57
61
 
58
- def _get_highlight_color(run: Run) -> str | None:
62
+ def _get_highlight_color(run: Run, errors: Errors) -> str | None:
59
63
  try:
60
64
  return str(run.font.highlight_color)
61
65
  except Exception as e:
62
- logger.exception("Failed to get run highlight color", e)
66
+ errors.exception("Failed to get run highlight color", e)
63
67
  return None
64
68
 
65
69
 
66
- def _run_style_color(run: Run) -> str | None:
70
+ def _run_style_color(run: Run, errors: Errors) -> str | None:
67
71
  try:
68
72
  run_color = None
69
73
  run_style = run.style
@@ -74,16 +78,13 @@ def _run_style_color(run: Run) -> str | None:
74
78
  run_style = run_style.base_style
75
79
  return run_color
76
80
  except Exception as e:
77
- logger.exception("Failed to get run style color", e)
81
+ errors.exception("Failed to get run style color", e)
78
82
  return None
79
83
 
80
84
 
81
- def _get_font_colour(item: Run | ParagraphStyle) -> str | None:
85
+ def _get_font_colour(item: Run | ParagraphStyle, errors: Errors) -> str | None:
82
86
  try:
83
87
  return item.font.color.rgb
84
88
  except Exception as e:
85
- logger.exception("Failed to get font color", e)
89
+ errors.exception("Failed to get font color", e)
86
90
  return None
87
-
88
-
89
- setattr(Paragraph, "extract_runs", extract_runs)
@@ -3,15 +3,16 @@ import re
3
3
  import docx
4
4
  import zipfile
5
5
  from pathlib import Path
6
- from .raw_document import RawDocument
7
- from .raw_section import RawSection
8
- from .raw_paragraph import RawParagraph
9
- from .raw_image import RawImage
10
- from .raw_table import RawTable
11
- from .raw_table_row import RawTableRow
12
- from .raw_table_cell import RawTableCell
13
- from .raw_list import RawList
14
- from .raw_list_item import RawListItem
6
+ from raw_docx.raw_document import RawDocument
7
+ from raw_docx.raw_section import RawSection
8
+ from raw_docx.raw_paragraph import RawParagraph
9
+ from raw_docx.raw_image import RawImage
10
+ from raw_docx.raw_table import RawTable
11
+ from raw_docx.raw_table_row import RawTableRow
12
+ from raw_docx.raw_table_cell import RawTableCell
13
+ from raw_docx.raw_list import RawList
14
+ from raw_docx.raw_list_item import RawListItem
15
+ from raw_docx.docx.docx_paragraph import install
15
16
  from docx import Document as DocXProcessor
16
17
  from docx.document import Document
17
18
  from docx.oxml.table import CT_Tbl, CT_TcPr
@@ -19,8 +20,7 @@ from docx.oxml.text.paragraph import CT_P
19
20
  from docx.table import Table, _Cell
20
21
  from docx.text.paragraph import Paragraph
21
22
  from lxml import etree
22
- from .raw_logger import logger
23
- from .docx_paragraph import extract_runs # Needed such that method inserted into class
23
+ from simple_error_log import Errors
24
24
 
25
25
 
26
26
  class RawDocx:
@@ -28,12 +28,17 @@ class RawDocx:
28
28
  pass
29
29
 
30
30
  def __init__(self, full_path: str):
31
+ install()
32
+ self.errors = Errors()
31
33
  path = Path(full_path)
32
34
  # path.stem, path.suffix[1:]
33
35
  self.full_path = full_path
34
36
  self.dir = path.parent
35
37
  self.filename = path.name
36
38
  self.image_path = os.path.join(self.dir, "images")
39
+ self.errors.debug(
40
+ f"RawDocx initialisation: full_path='{self.full_path}', dir='{self.dir}', image_path0'{self.image_path}', filename='{self.filename}"
41
+ )
37
42
  self.image_rels = {}
38
43
  self._organise_dir()
39
44
  self.source_document = DocXProcessor(self.full_path)
@@ -46,28 +51,26 @@ class RawDocx:
46
51
  except FileExistsError:
47
52
  pass
48
53
  except Exception as e:
49
- logger.exception("Failed to create image directory", e)
54
+ self.errors.exception("Failed to create image directory", e)
50
55
 
51
56
  def _process(self):
52
57
  try:
53
- self._extract_images()
58
+ self._process_images()
54
59
  for block_item in self._iter_block_items(self.source_document):
55
60
  target_section = self.target_document.current_section()
56
61
  if isinstance(block_item, Paragraph):
57
- # print(f"PARA BLOCK: {block_item.text}")
58
62
  self._process_paragraph(block_item, target_section, self.image_rels)
59
63
  elif isinstance(block_item, Table):
60
64
  self._process_table(block_item, target_section)
61
65
  else:
62
- logger.warning("Ignoring element")
66
+ self.errors.warning("Ignoring element")
63
67
  raise ValueError
64
68
  except Exception as e:
65
- logger.exception("Exception raised processing document", e)
69
+ self.errors.exception("Exception raised processing document", e)
66
70
 
67
- def _extract_images(self):
71
+ def _process_images(self):
68
72
  # Extract images to image dir
69
73
  self._extract_images()
70
- # Save all 'rId:filenames' as references
71
74
  for r in self.source_document.part.rels.values():
72
75
  if isinstance(r._target, docx.parts.image.ImagePart):
73
76
  self.image_rels[r.rId] = os.path.join(
@@ -91,9 +94,8 @@ class RawDocx:
91
94
 
92
95
  for child in parent_elm.iterchildren():
93
96
  if isinstance(child, str):
94
- logger.warning(f"Ignoring eTree element {child}")
97
+ self.errors.warning(f"Ignoring eTree element {child}")
95
98
  elif isinstance(child, CT_P):
96
- # print(f"PARA: {child.text}")
97
99
  yield Paragraph(child, parent)
98
100
  elif isinstance(child, CT_Tbl):
99
101
  yield Table(child, parent)
@@ -109,13 +111,12 @@ class RawDocx:
109
111
  ):
110
112
  pass
111
113
  else:
112
- logger.warning(f"Ignoring eTree element {self._tree(child)}")
114
+ self.errors.warning(f"Ignoring eTree element {self._tree(child)}")
113
115
 
114
116
  else:
115
117
  raise ValueError(f"something's not right with a child {type(child)}")
116
118
 
117
119
  def _tree(self, node, tab=1):
118
- # print(f"{' ' * tab}{node.tag} {node.text}")
119
120
  for child in node:
120
121
  self._tree(child, tab + 1)
121
122
 
@@ -155,7 +156,9 @@ class RawDocx:
155
156
  if block_item.tag == CT_TcPr:
156
157
  pass
157
158
  else:
158
- logger.warning(f"Ignoring eTree element {block_item.tag}")
159
+ self.errors.warning(
160
+ f"Ignoring eTree element {block_item.tag}"
161
+ )
159
162
  else:
160
163
  raise self.LogicError(
161
164
  f"something's not right with a child {type(block_item)}"
@@ -164,15 +167,15 @@ class RawDocx:
164
167
  def _process_cell(self, paragraph, target_cell: RawTableCell):
165
168
  if self._is_list(paragraph):
166
169
  list_level = self.get_list_level(paragraph)
167
- item = RawListItem(paragraph.extract_runs(), list_level)
170
+ item = RawListItem(paragraph.extract_runs(self.errors), list_level)
168
171
  if target_cell.is_in_list():
169
172
  list = target_cell.current_list()
170
173
  else:
171
- list = RawList()
174
+ list = RawList(self.errors)
172
175
  target_cell.add(list)
173
176
  list.add(item)
174
177
  else:
175
- target_paragraph = RawParagraph(paragraph.extract_runs())
178
+ target_paragraph = RawParagraph(paragraph.extract_runs(self.errors))
176
179
  target_cell.add(target_paragraph)
177
180
 
178
181
  def _process_paragraph(
@@ -183,23 +186,21 @@ class RawDocx:
183
186
  target_section = RawSection(paragraph.text, paragraph.text, level)
184
187
  self.target_document.add(target_section)
185
188
  elif self._is_list(paragraph):
186
- # print(f"START LIST: {paragraph.text}")
187
189
  list_level = self.get_list_level(paragraph)
188
- item = RawListItem(paragraph.extract_runs(), list_level)
190
+ item = RawListItem(paragraph.extract_runs(self.errors), list_level)
189
191
  if target_section.is_in_list():
190
192
  list = target_section.current_list()
191
193
  else:
192
- list = RawList()
194
+ list = RawList(self.errors)
193
195
  target_section.add(list)
194
196
  list.add(item)
195
197
  elif "Graphic" in paragraph._p.xml:
196
198
  for rId in image_rels:
197
199
  if rId in paragraph._p.xml:
198
- target_image = RawImage(image_rels[rId])
200
+ target_image = RawImage(image_rels[rId], self.errors)
199
201
  target_section.add(target_image)
200
202
  else:
201
- # print(f"START RUNS: {paragraph.text}")
202
- target_paragraph = RawParagraph(paragraph.extract_runs())
203
+ target_paragraph = RawParagraph(paragraph.extract_runs(self.errors))
203
204
  target_section.add(target_paragraph)
204
205
 
205
206
  def get_list_level(self, paragraph):
@@ -1,12 +1,13 @@
1
1
  import os
2
2
  import base64
3
- from .raw_logger import logger
3
+ from simple_error_log import Errors
4
4
 
5
5
 
6
6
  class RawImage:
7
7
  FILE_TYPE_MAP = {".png": "png", ".jpg": "jpg", ".jpeg": "jpg"}
8
8
 
9
- def __init__(self, filepath: str):
9
+ def __init__(self, filepath: str, errors: Errors):
10
+ self.errors = errors
10
11
  self.filepath = filepath
11
12
 
12
13
  def to_html(self):
@@ -21,7 +22,7 @@ class RawImage:
21
22
  else:
22
23
  return f"""<p style="color:red">Note: Unable to process embedded image of type '{file_extension}', image ignored.</p>"""
23
24
  except Exception as e:
24
- logger.exception("Exception converting image", e)
25
+ self.errors.exception("Exception converting image", e)
25
26
  return (
26
27
  """<p style="color:red">Note: Error encountered processing image.</p>"""
27
28
  )
@@ -1,9 +1,10 @@
1
1
  from .raw_list_item import RawListItem
2
- from .raw_logger import logger
2
+ from simple_error_log import Errors
3
3
 
4
4
 
5
5
  class RawList:
6
- def __init__(self, level=0):
6
+ def __init__(self, errors: Errors, level=0):
7
+ self.errors = errors
7
8
  self.items = [] # List to store RawListItems and nested RawLists
8
9
  self.level = level
9
10
 
@@ -13,15 +14,15 @@ class RawList:
13
14
  elif item.level > self.level:
14
15
  list = self.items[-1] if self.items else None
15
16
  if not isinstance(list, RawList):
16
- list = RawList(item.level)
17
+ list = RawList(self.errors, item.level)
17
18
  self.items.append(list)
18
19
  list.add(item)
19
20
  if item.level > self.level + 1:
20
- logger.warning(
21
+ self.errors.warning(
21
22
  f"Adding list item '{item}' to item but level jump greater than 1"
22
23
  )
23
24
  else:
24
- logger.error(
25
+ self.errors.error(
25
26
  f"Failed to add list item '{item}' to list '{self}', levels are in error"
26
27
  )
27
28
 
@@ -12,7 +12,8 @@ class RawListItem(RawParagraph):
12
12
  return f"{' ' * self.level}{self.text}"
13
13
 
14
14
  def to_html(self) -> str:
15
- return f"{escape(self.text)}"
15
+ return f"{self.text}"
16
+ # return f"{escape(self.text)}"
16
17
 
17
18
  def to_dict(self) -> dict:
18
19
  return {"type": "list_item", "text": self.text, "level": self.level}
@@ -11,7 +11,7 @@ class RawParagraph:
11
11
  def to_html(self) -> str:
12
12
  klass_list = " ".join(self.klasses)
13
13
  open_tag = f'<p class="{klass_list}">' if self.klasses else "<p>"
14
- return f"{open_tag}{escape(self.text)}</p>"
14
+ return f"{open_tag}{self.text}</p>"
15
15
 
16
16
  def find(self, text: str) -> bool:
17
17
  return True if text in self.text else False
@@ -31,5 +31,9 @@ class RawParagraph:
31
31
  "classes": self.klasses,
32
32
  }
33
33
 
34
+ def add_span(self, text: str, klass: str) -> None:
35
+ new_str = f'<span class="{klass}">{text}</span>'
36
+ self.text = new_str + self.text[len(text) :]
37
+
34
38
  def _run_text(self) -> str:
35
39
  return "".join([run.text for run in self.runs])
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: raw_docx
3
- Version: 0.5.0
3
+ Version: 0.7.0
4
4
  Summary: A package for processing and analyzing raw document formats
5
5
  Home-page: https://github.com/daveih/raw_docx
6
6
  Author: Dave Iberson-Hurst
@@ -18,12 +18,13 @@ Requires-Python: >=3.8
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
20
  Requires-Dist: python-docx
21
- Requires-Dist: python-json-logger
21
+ Requires-Dist: simple_error_log
22
22
  Dynamic: author
23
23
  Dynamic: classifier
24
24
  Dynamic: description
25
25
  Dynamic: description-content-type
26
26
  Dynamic: home-page
27
+ Dynamic: license-file
27
28
  Dynamic: requires-dist
28
29
  Dynamic: requires-python
29
30
  Dynamic: summary
@@ -1,15 +1,13 @@
1
1
  LICENSE
2
2
  README.md
3
3
  setup.py
4
+ src/raw_docx/__info__.py
4
5
  src/raw_docx/__init__.py
5
- src/raw_docx/__version__.py
6
- src/raw_docx/docx_paragraph.py
7
6
  src/raw_docx/raw_document.py
8
7
  src/raw_docx/raw_docx.py
9
8
  src/raw_docx/raw_image.py
10
9
  src/raw_docx/raw_list.py
11
10
  src/raw_docx/raw_list_item.py
12
- src/raw_docx/raw_logger.py
13
11
  src/raw_docx/raw_paragraph.py
14
12
  src/raw_docx/raw_run.py
15
13
  src/raw_docx/raw_section.py
@@ -21,6 +19,8 @@ src/raw_docx.egg-info/SOURCES.txt
21
19
  src/raw_docx.egg-info/dependency_links.txt
22
20
  src/raw_docx.egg-info/requires.txt
23
21
  src/raw_docx.egg-info/top_level.txt
22
+ src/raw_docx/docx/__init__.py
23
+ src/raw_docx/docx/docx_paragraph.py
24
24
  tests/test_docx_paragraph.py
25
25
  tests/test_integration.py
26
26
  tests/test_raw_document.py
@@ -28,7 +28,6 @@ tests/test_raw_docx.py
28
28
  tests/test_raw_image.py
29
29
  tests/test_raw_list.py
30
30
  tests/test_raw_list_item.py
31
- tests/test_raw_logger.py
32
31
  tests/test_raw_paragraph.py
33
32
  tests/test_raw_run.py
34
33
  tests/test_raw_section.py
@@ -0,0 +1,2 @@
1
+ python-docx
2
+ simple_error_log
@@ -1,13 +1,14 @@
1
1
  from unittest.mock import Mock, PropertyMock
2
2
  from docx.text.paragraph import Paragraph
3
3
  from docx.text.run import Run
4
- from src.raw_docx.docx_paragraph import (
4
+ from src.raw_docx.docx.docx_paragraph import (
5
5
  extract_runs,
6
6
  _tidy_runs_color,
7
7
  _get_highlight_color,
8
8
  _run_style_color,
9
9
  _get_font_colour,
10
10
  )
11
+ from simple_error_log import Errors
11
12
 
12
13
 
13
14
  def create_mock_run(text="", color=None, highlight=None, style=None):
@@ -56,45 +57,48 @@ def create_mock_paragraph(text="", style_name="Normal"):
56
57
 
57
58
 
58
59
  def test_get_font_colour():
60
+ errors = Errors()
59
61
  """Test getting font color from a run"""
60
62
  # Test with no color
61
63
  run = create_mock_run()
62
- assert _get_font_colour(run) is None
64
+ assert _get_font_colour(run, errors) is None
63
65
 
64
66
  # Test with color
65
67
  run = create_mock_run(color="FF0000")
66
- assert _get_font_colour(run) == "FF0000"
68
+ assert _get_font_colour(run, errors) == "FF0000"
67
69
 
68
70
  # Test with exception
69
71
  run = Mock(spec=Run)
70
72
  run.font = (
71
73
  None # This should cause an AttributeError when code tries to access color
72
74
  )
73
- assert _get_font_colour(run) is None
75
+ assert _get_font_colour(run, errors) is None
74
76
 
75
77
 
76
78
  def test_get_highlight_color():
79
+ errors = Errors()
77
80
  """Test getting highlight color from a run"""
78
81
  # Test with no highlight
79
82
  run = Mock(spec=Run)
80
83
  run.font = None
81
- assert _get_highlight_color(run) is None
84
+ assert _get_highlight_color(run, errors) is None
82
85
 
83
86
  # Test with highlight
84
87
  run = create_mock_run(highlight="yellow")
85
- assert _get_highlight_color(run) == "yellow"
88
+ assert _get_highlight_color(run, errors) == "yellow"
86
89
 
87
90
 
88
91
  def test_run_style_color():
92
+ errors = Errors()
89
93
  """Test getting color from run style"""
90
94
  # Test with no style
91
95
  run = create_mock_run()
92
- assert _run_style_color(run) is None
96
+ assert _run_style_color(run, errors) is None
93
97
 
94
98
  # Test with direct style color
95
99
  run = create_mock_run(style="Normal")
96
100
  type(run.style.font.color).rgb = PropertyMock(return_value="FF0000")
97
- assert _run_style_color(run) == "FF0000"
101
+ assert _run_style_color(run, errors) == "FF0000"
98
102
 
99
103
  # Test with base style color
100
104
  run = create_mock_run(style="Normal")
@@ -104,10 +108,11 @@ def test_run_style_color():
104
108
  type(base_style.font.color).rgb = PropertyMock(return_value="0000FF")
105
109
  base_style.base_style = None
106
110
  run.style.base_style = base_style
107
- assert _run_style_color(run) == "0000FF"
111
+ assert _run_style_color(run, errors) == "0000FF"
108
112
 
109
113
 
110
114
  def test_tidy_runs_color():
115
+ errors = Errors()
111
116
  """Test tidying up runs with colors"""
112
117
  # Test with different colors - should not merge
113
118
  data = [
@@ -126,7 +131,7 @@ def test_tidy_runs_color():
126
131
  "keep": True,
127
132
  },
128
133
  ]
129
- result = _tidy_runs_color(data)
134
+ result = _tidy_runs_color(data, errors)
130
135
  assert len(result) == 2
131
136
  assert all(item["keep"] for item in result)
132
137
 
@@ -154,12 +159,13 @@ def test_tidy_runs_color():
154
159
  "keep": True,
155
160
  },
156
161
  ]
157
- result = _tidy_runs_color(data)
162
+ result = _tidy_runs_color(data, errors)
158
163
  assert len(result) == 1
159
164
  assert result[0]["text"] == "Test More"
160
165
 
161
166
 
162
167
  def test_extract_runs_mixed_styles():
168
+ errors = Errors()
163
169
  """Test extracting runs with different styles"""
164
170
  paragraph = create_mock_paragraph()
165
171
  runs = [
@@ -169,12 +175,13 @@ def test_extract_runs_mixed_styles():
169
175
  ]
170
176
  paragraph.runs = runs
171
177
 
172
- result = extract_runs(paragraph)
178
+ result = extract_runs(paragraph, errors)
173
179
  assert len(result) == 3
174
180
  assert [r.style for r in result] == ["Normal", "Normal", "Normal"]
175
181
 
176
182
 
177
183
  def test_extract_runs_with_mixed_colors():
184
+ errors = Errors()
178
185
  """Test extracting runs with different colors and highlights"""
179
186
  paragraph = create_mock_paragraph()
180
187
  runs = [
@@ -184,6 +191,6 @@ def test_extract_runs_with_mixed_colors():
184
191
  ]
185
192
  paragraph.runs = runs
186
193
 
187
- result = extract_runs(paragraph)
194
+ result = extract_runs(paragraph, errors)
188
195
  assert len(result) == 3
189
196
  assert [r.color for r in result] == ["FF0000", "0000FF", "FF0000"]
@@ -1,5 +1,6 @@
1
1
  import pytest
2
2
  from src.raw_docx.raw_image import RawImage
3
+ from simple_error_log import Errors
3
4
 
4
5
 
5
6
  @pytest.fixture
@@ -27,45 +28,51 @@ def temp_image_unsupported(tmp_path):
27
28
 
28
29
 
29
30
  def test_image_initialization_jpg(temp_image_jpg):
31
+ errors = Errors()
30
32
  """Test image initialization with JPG"""
31
- image = RawImage(temp_image_jpg)
33
+ image = RawImage(temp_image_jpg, errors)
32
34
  assert image.filepath == temp_image_jpg
33
35
 
34
36
 
35
37
  def test_image_initialization_png(temp_image_png):
38
+ errors = Errors()
36
39
  """Test image initialization with PNG"""
37
- image = RawImage(temp_image_png)
40
+ image = RawImage(temp_image_png, errors)
38
41
  assert image.filepath == temp_image_png
39
42
 
40
43
 
41
44
  def test_to_html_jpg(temp_image_jpg):
45
+ errors = Errors()
42
46
  """Test getting HTML for JPG image"""
43
- image = RawImage(temp_image_jpg)
47
+ image = RawImage(temp_image_jpg, errors)
44
48
  html = image.to_html()
45
49
  assert "data:image/jpg;base64," in html
46
50
  assert '<img alt="alt text" src=' in html
47
51
 
48
52
 
49
53
  def test_to_html_png(temp_image_png):
54
+ errors = Errors()
50
55
  """Test getting HTML for PNG image"""
51
- image = RawImage(temp_image_png)
56
+ image = RawImage(temp_image_png, errors)
52
57
  html = image.to_html()
53
58
  assert "data:image/png;base64," in html
54
59
  assert '<img alt="alt text" src=' in html
55
60
 
56
61
 
57
62
  def test_to_html_unsupported_format(temp_image_unsupported):
63
+ errors = Errors()
58
64
  """Test getting HTML for unsupported image format"""
59
- image = RawImage(temp_image_unsupported)
65
+ image = RawImage(temp_image_unsupported, errors)
60
66
  html = image.to_html()
61
67
  assert "Unable to process embedded image" in html
62
68
  assert "color:red" in html
63
69
 
64
70
 
65
71
  def test_to_html_missing_file(tmp_path):
72
+ errors = Errors()
66
73
  """Test getting HTML for missing image file"""
67
74
  missing_file = str(tmp_path / "missing.jpg")
68
- image = RawImage(missing_file)
75
+ image = RawImage(missing_file, errors)
69
76
  html = image.to_html()
70
77
  assert "Error encountered processing image" in html
71
78
  assert "color:red" in html
@@ -79,8 +86,9 @@ def test_supported_file_types():
79
86
 
80
87
 
81
88
  def test_to_dict_jpg(temp_image_jpg):
89
+ errors = Errors()
82
90
  """Test converting JPG image to dictionary"""
83
- image = RawImage(temp_image_jpg)
91
+ image = RawImage(temp_image_jpg, errors)
84
92
  result = image.to_dict()
85
93
  assert result["type"] == "image"
86
94
  assert result["filepath"] == temp_image_jpg
@@ -89,8 +97,9 @@ def test_to_dict_jpg(temp_image_jpg):
89
97
 
90
98
 
91
99
  def test_to_dict_png(temp_image_png):
100
+ errors = Errors()
92
101
  """Test converting PNG image to dictionary"""
93
- image = RawImage(temp_image_png)
102
+ image = RawImage(temp_image_png, errors)
94
103
  result = image.to_dict()
95
104
  assert result["type"] == "image"
96
105
  assert result["filepath"] == temp_image_png
@@ -99,8 +108,9 @@ def test_to_dict_png(temp_image_png):
99
108
 
100
109
 
101
110
  def test_to_dict_unsupported(temp_image_unsupported):
111
+ errors = Errors()
102
112
  """Test converting unsupported image to dictionary"""
103
- image = RawImage(temp_image_unsupported)
113
+ image = RawImage(temp_image_unsupported, errors)
104
114
  result = image.to_dict()
105
115
  assert result["type"] == "image"
106
116
  assert result["filepath"] == temp_image_unsupported
@@ -1,13 +1,14 @@
1
1
  import pytest
2
- from unittest.mock import patch
3
2
  from src.raw_docx.raw_list import RawList
4
3
  from src.raw_docx.raw_list_item import RawListItem
5
4
  from src.raw_docx.raw_run import RawRun
5
+ from simple_error_log import Errors
6
6
 
7
7
 
8
8
  @pytest.fixture
9
9
  def raw_list():
10
- return RawList()
10
+ errors = Errors()
11
+ return RawList(errors)
11
12
 
12
13
 
13
14
  @pytest.fixture
@@ -22,16 +23,18 @@ def test_list_initialization(raw_list):
22
23
 
23
24
 
24
25
  def test_add_item(list_item):
26
+ errors = Errors()
25
27
  """Test adding an item to the list"""
26
- list = RawList(1)
28
+ list = RawList(errors, 1)
27
29
  list.add(list_item)
28
30
  assert len(list.items) == 1
29
31
  assert list.items[0] == list_item
30
32
 
31
33
 
32
34
  def test_to_text():
35
+ errors = Errors()
33
36
  """Test to text"""
34
- list = RawList(1)
37
+ list = RawList(errors, 1)
35
38
  items = [
36
39
  RawListItem([RawRun("Item 1", "", None, "Normal")], 1),
37
40
  RawListItem([RawRun("Item 1.1", "", None, "Normal")], 2),
@@ -43,8 +46,9 @@ def test_to_text():
43
46
 
44
47
 
45
48
  def test_add_multiple_items():
49
+ errors = Errors()
46
50
  """Test adding multiple items with different levels"""
47
- list = RawList(1)
51
+ list = RawList(errors, 1)
48
52
  items = [
49
53
  RawListItem([RawRun("Item 1", "", None, "Normal")], 1),
50
54
  RawListItem([RawRun("Item 1.1", "", None, "Normal")], 2),
@@ -57,22 +61,21 @@ def test_add_multiple_items():
57
61
 
58
62
 
59
63
  def test_add_multiple_items_level_error():
64
+ errors = Errors()
60
65
  """Test adding multiple items with different levels with level error"""
61
- with patch("src.raw_docx.raw_list.logger") as mock_logger:
62
- list = RawList(1)
63
- items = [
64
- RawListItem([RawRun("Item 1", "", None, "Normal")], 1),
65
- RawListItem([RawRun("Item 1.1.1", "", None, "Normal")], 3),
66
- RawListItem([RawRun("Item 2", "", None, "Normal")], 1),
67
- ]
68
- for item in items:
69
- list.add(item)
70
- assert len(list.items) == 3
71
- assert [item.level for item in list.items] == [1, 3, 1]
72
- mock_logger.warning.assert_called_once()
73
- error_msg = mock_logger.warning.call_args[0][0]
74
- assert "Adding list item" in error_msg
75
- assert "to item but level jump greater than 1" in error_msg
66
+ list = RawList(errors, 1)
67
+ items = [
68
+ RawListItem([RawRun("Item 1", "", None, "Normal")], 1),
69
+ RawListItem([RawRun("Item 1.1.1", "", None, "Normal")], 3),
70
+ RawListItem([RawRun("Item 2", "", None, "Normal")], 1),
71
+ ]
72
+ for item in items:
73
+ list.add(item)
74
+ assert len(list.items) == 3
75
+ assert [item.level for item in list.items] == [1, 3, 1]
76
+ assert errors.count() == 1
77
+ assert "Adding list item" in errors._items[0].message
78
+ assert "to item but level jump greater than 1" in errors._items[0].message
76
79
 
77
80
 
78
81
  def test_to_html(raw_list):
@@ -89,8 +92,9 @@ def test_to_html(raw_list):
89
92
 
90
93
 
91
94
  def test_nested_list_to_html():
95
+ errors = Errors()
92
96
  """Test converting nested list to HTML format"""
93
- root_list = RawList(0)
97
+ root_list = RawList(errors, 0)
94
98
  items = [
95
99
  RawListItem([RawRun("Item 1", "", None, "Normal")], 1),
96
100
  RawListItem([RawRun("Subitem 1.1", "", None, "Normal")], 2),
@@ -104,26 +108,25 @@ def test_nested_list_to_html():
104
108
 
105
109
 
106
110
  def test_add_item_lower_level_logs_error():
111
+ errors = Errors()
107
112
  """Test that adding an item with lower level than list level logs an error"""
108
- with patch("src.raw_docx.raw_list.logger") as mock_logger:
109
- list_obj = RawList(2) # List with level 2
110
- item = RawListItem(
111
- [RawRun("Test Item", "", None, "Normal")], 1
112
- ) # Item with lower level 1
113
- list_obj.add(item)
114
-
115
- mock_logger.error.assert_called_once()
116
- error_msg = mock_logger.error.call_args[0][0]
117
- assert "Failed to add list item" in error_msg
118
- assert "levels are in error" in error_msg
113
+ list_obj = RawList(errors, 2) # List with level 2
114
+ item = RawListItem(
115
+ [RawRun("Test Item", "", None, "Normal")], 1
116
+ ) # Item with lower level 1
117
+ list_obj.add(item)
118
+ assert errors.count() == 1
119
+ assert "Failed to add list item" in errors._items[0].message
120
+ assert "levels are in error" in errors._items[0].message
119
121
 
120
122
 
121
123
  def test_to_dict():
124
+ errors = Errors()
122
125
  """Test converting list to dictionary"""
123
- list_obj = RawList(1)
126
+ list_obj = RawList(errors, 1)
124
127
  item1 = RawListItem([RawRun("Item 1", "", None, "Normal")], 1)
125
128
  item2 = RawListItem([RawRun("Item 2", "", None, "Normal")], 1)
126
- sublist = RawList(2)
129
+ sublist = RawList(errors, 2)
127
130
  sublist.add(RawListItem([RawRun("Subitem 1", "", None, "Normal")], 2))
128
131
 
129
132
  list_obj.add(item1)
@@ -151,10 +154,11 @@ def test_to_dict():
151
154
 
152
155
 
153
156
  def test_all_items():
154
- list_obj = RawList(1)
157
+ errors = Errors()
158
+ list_obj = RawList(errors, 1)
155
159
  item1 = RawListItem([RawRun("Item 1", "", None, "Normal")], 1)
156
160
  item2 = RawListItem([RawRun("Item 2", "", None, "Normal")], 1)
157
- sublist = RawList(2)
161
+ sublist = RawList(errors, 2)
158
162
  item3 = RawListItem([RawRun("Subitem 1", "", None, "Normal")], 2)
159
163
  sublist.add(item3)
160
164
  list_obj.add(item1)
@@ -8,6 +8,7 @@ from src.raw_docx.raw_table_row import RawTableRow
8
8
  from src.raw_docx.raw_table_cell import RawTableCell
9
9
  from src.raw_docx.raw_list_item import RawListItem
10
10
  from src.raw_docx.raw_run import RawRun
11
+ from simple_error_log import Errors
11
12
 
12
13
 
13
14
  @pytest.fixture
@@ -23,9 +24,10 @@ def paragraph():
23
24
 
24
25
  @pytest.fixture
25
26
  def image(tmp_path):
27
+ errors = Errors()
26
28
  image_path = tmp_path / "test.jpg"
27
29
  image_path.write_bytes(b"dummy image content")
28
- return RawImage(str(image_path))
30
+ return RawImage(str(image_path), errors)
29
31
 
30
32
 
31
33
  def test_section_initialization(section):
@@ -37,6 +39,7 @@ def test_section_initialization(section):
37
39
 
38
40
 
39
41
  def test_section_initialization_strip(section):
42
+ # errors = Errors()
40
43
  """Test section initialization"""
41
44
  section = RawSection(" Test Section ", " Test Content ", 1)
42
45
  assert section.title == "Test Section"
@@ -46,6 +49,7 @@ def test_section_initialization_strip(section):
46
49
 
47
50
 
48
51
  def test_add_paragraph(section, paragraph):
52
+ # errors = Errors()
49
53
  """Test adding a paragraph to section"""
50
54
  section.add(paragraph)
51
55
  assert len(section.items) == 1
@@ -53,6 +57,7 @@ def test_add_paragraph(section, paragraph):
53
57
 
54
58
 
55
59
  def test_add_image(section, image):
60
+ # errors = Errors()
56
61
  """Test adding an image to section"""
57
62
  section.add(image)
58
63
  assert len(section.items) == 1
@@ -60,13 +65,15 @@ def test_add_image(section, image):
60
65
 
61
66
 
62
67
  def test_is_in_list_empty_section(section):
68
+ # errors = Errors()
63
69
  """Test is_in_list with empty section"""
64
70
  assert not section.is_in_list()
65
71
 
66
72
 
67
73
  def test_is_in_list_with_list(section):
74
+ errors = Errors()
68
75
  """Test is_in_list with a list"""
69
- section.add(RawList())
76
+ section.add(RawList(errors))
70
77
  assert section.is_in_list()
71
78
 
72
79
 
@@ -76,8 +83,9 @@ def test_current_list_no_list(section):
76
83
 
77
84
 
78
85
  def test_current_list_with_list(section):
86
+ errors = Errors()
79
87
  """Test current_list with existing list"""
80
- test_list = RawList()
88
+ test_list = RawList(errors)
81
89
  section.add(test_list)
82
90
  assert section.current_list() == test_list
83
91
 
@@ -105,6 +113,7 @@ def test_to_html_between(section):
105
113
 
106
114
 
107
115
  def test_paragraphs(section):
116
+ errors = Errors()
108
117
  """Test getting all paragraphs"""
109
118
  run1 = RawRun("First", "", "", "Normal")
110
119
  run2 = RawRun("Second", "", "", "Normal")
@@ -113,7 +122,7 @@ def test_paragraphs(section):
113
122
  p2 = RawParagraph([run2])
114
123
 
115
124
  section.add(p1)
116
- section.add(RawList()) # Add non-paragraph item
125
+ section.add(RawList(errors)) # Add non-paragraph item
117
126
  section.add(p2)
118
127
 
119
128
  paragraphs = section.paragraphs()
@@ -133,9 +142,10 @@ def test_tables(section):
133
142
 
134
143
 
135
144
  def test_lists(section):
145
+ errors = Errors()
136
146
  """Test getting all lists"""
137
147
  run = RawRun("Test", "", "", "Normal")
138
- list1 = RawList()
148
+ list1 = RawList(errors)
139
149
  section.add(RawParagraph([run]))
140
150
  section.add(list1)
141
151
  lists = section.lists()
@@ -209,9 +219,10 @@ def test_find_first_at_start_not_found(section):
209
219
 
210
220
 
211
221
  def test_has_lists(section):
222
+ errors = Errors()
212
223
  """Test checking if section has lists"""
213
224
  assert not section.has_lists()
214
- section.add(RawList())
225
+ section.add(RawList(errors))
215
226
  assert section.has_lists()
216
227
 
217
228
 
@@ -245,6 +256,7 @@ def test_next(section):
245
256
 
246
257
 
247
258
  def test_next_paragraph(section):
259
+ errors = Errors()
248
260
  """Test getting next paragraph"""
249
261
  run1 = RawRun("First", "", "", "Normal")
250
262
  run2 = RawRun("Second", "", "", "Normal")
@@ -253,7 +265,7 @@ def test_next_paragraph(section):
253
265
  p2 = RawParagraph([run2])
254
266
 
255
267
  section.add(p1)
256
- section.add(RawList()) # Add non-paragraph item
268
+ section.add(RawList(errors)) # Add non-paragraph item
257
269
  section.add(p2)
258
270
 
259
271
  assert section.next_paragraph(0) == p1
@@ -293,12 +305,13 @@ def test_format_heading(section):
293
305
 
294
306
 
295
307
  def test_to_dict(section):
308
+ errors = Errors()
296
309
  """Test converting section to dictionary"""
297
310
  # Add various types of content
298
311
  run = RawRun("Test paragraph", "", "", "Normal")
299
312
  section.add(RawParagraph([run]))
300
313
 
301
- list_obj = RawList(1)
314
+ list_obj = RawList(errors, 1)
302
315
  list_obj.add(RawListItem([RawRun("Test item", "", None, "Normal")], 1))
303
316
  section.add(list_obj)
304
317
 
@@ -407,6 +420,7 @@ def test_section_search(section):
407
420
 
408
421
 
409
422
  def test_section_list_operations(section):
423
+ errors = Errors()
410
424
  """Test list-related operations"""
411
425
  # Test empty section
412
426
  assert not section.is_in_list()
@@ -414,7 +428,7 @@ def test_section_list_operations(section):
414
428
  assert not section.has_lists()
415
429
 
416
430
  # Add a list
417
- list1 = RawList(1)
431
+ list1 = RawList(errors, 1)
418
432
  section.add(list1)
419
433
 
420
434
  # Test with list
@@ -4,11 +4,13 @@ from src.raw_docx.raw_paragraph import RawParagraph
4
4
  from src.raw_docx.raw_list import RawList
5
5
  from src.raw_docx.raw_table import RawTable
6
6
  from src.raw_docx.raw_run import RawRun
7
+ from simple_error_log import Errors
7
8
 
8
9
 
9
10
  @pytest.fixture
10
11
  def list():
11
- return RawList()
12
+ errors = Errors()
13
+ return RawList(errors)
12
14
 
13
15
 
14
16
  @pytest.fixture
@@ -1 +0,0 @@
1
- __package_version__ = "0.5.0"
@@ -1,67 +0,0 @@
1
- import sys
2
- import logging
3
- from pathlib import Path
4
- from typing import Optional
5
- from pythonjsonlogger import jsonlogger
6
-
7
-
8
- class RawLogger:
9
- _instance = None
10
- _initialized = False
11
-
12
- def __new__(cls):
13
- if cls._instance is None:
14
- cls._instance = super().__new__(cls)
15
- return cls._instance
16
-
17
- def __init__(self):
18
- if not RawLogger._initialized:
19
- self.logger = logging.getLogger("raw_docx")
20
- self.logger.setLevel(logging.INFO)
21
-
22
- # Create JSON formatter
23
- formatter = jsonlogger.JsonFormatter(
24
- fmt="%(asctime)s %(name)s %(levelname)s %(message)s",
25
- datefmt="%Y-%m-%d %H:%M:%S",
26
- )
27
-
28
- # Console handler
29
- console_handler = logging.StreamHandler(sys.stdout)
30
- console_handler.setFormatter(formatter)
31
- self.logger.addHandler(console_handler)
32
-
33
- RawLogger._initialized = True
34
-
35
- def setup_file_logging(self, log_dir: Optional[str] = None):
36
- """Setup file logging in addition to console logging"""
37
- if log_dir:
38
- log_path = Path(log_dir)
39
- log_path.mkdir(parents=True, exist_ok=True)
40
- file_handler = logging.FileHandler(log_path / "raw_docx.log")
41
- file_handler.setFormatter(
42
- jsonlogger.JsonFormatter(
43
- fmt="%(asctime)s %(name)s %(levelname)s %(message)s",
44
- datefmt="%Y-%m-%d %H:%M:%S",
45
- )
46
- )
47
- self.logger.addHandler(file_handler)
48
-
49
- def info(self, message: str):
50
- """Log info message"""
51
- self.logger.info(message)
52
-
53
- def warning(self, message: str):
54
- """Log warning message"""
55
- self.logger.warning(message)
56
-
57
- def error(self, message: str):
58
- """Log error message"""
59
- self.logger.error(message)
60
-
61
- def exception(self, message: str, exc: Exception):
62
- """Log exception with message"""
63
- self.logger.exception(message, exc_info=exc)
64
-
65
-
66
- # Create singleton instance
67
- logger = RawLogger()
@@ -1,2 +0,0 @@
1
- python-docx
2
- python-json-logger
@@ -1,112 +0,0 @@
1
- import json
2
- import logging
3
- import pytest
4
- from pathlib import Path
5
- from src.raw_docx.raw_logger import RawLogger
6
-
7
-
8
- @pytest.fixture
9
- def logger_instance():
10
- """Fixture to provide a fresh logger instance for each test"""
11
- # Reset the singleton state
12
- RawLogger._instance = None
13
- RawLogger._initialized = False
14
-
15
- # Clear any existing handlers
16
- logger = logging.getLogger("raw_docx")
17
- logger.handlers.clear()
18
-
19
- return RawLogger()
20
-
21
-
22
- @pytest.fixture
23
- def temp_log_dir(tmp_path):
24
- """Fixture to provide a temporary directory for log files"""
25
- log_dir = tmp_path / "logs"
26
- log_dir.mkdir()
27
- return str(log_dir)
28
-
29
-
30
- def test_singleton_pattern():
31
- """Test that RawLogger implements singleton pattern correctly"""
32
- logger1 = RawLogger()
33
- logger2 = RawLogger()
34
- assert logger1 is logger2
35
-
36
-
37
- def test_default_initialization(logger_instance):
38
- """Test default logger initialization"""
39
- assert logger_instance.logger.level == logging.INFO
40
- assert len(logger_instance.logger.handlers) == 1
41
- assert isinstance(logger_instance.logger.handlers[0], logging.StreamHandler)
42
-
43
-
44
- def test_file_logging_setup(logger_instance, temp_log_dir):
45
- """Test setting up file logging"""
46
- logger_instance.setup_file_logging(temp_log_dir)
47
-
48
- # Check that a file handler was added
49
- assert len(logger_instance.logger.handlers) == 2
50
- assert any(
51
- isinstance(h, logging.FileHandler) for h in logger_instance.logger.handlers
52
- )
53
-
54
- # Check that log file was created
55
- log_file = Path(temp_log_dir) / "raw_docx.log"
56
- assert log_file.exists()
57
-
58
-
59
- def test_log_message_format(logger_instance, temp_log_dir, caplog):
60
- """Test that log messages are properly formatted as JSON"""
61
- logger_instance.setup_file_logging(temp_log_dir)
62
-
63
- test_message = "Test log message"
64
- logger_instance.info(test_message)
65
-
66
- # Read the log file
67
- log_file = Path(temp_log_dir) / "raw_docx.log"
68
- with open(log_file) as f:
69
- log_entry = json.loads(f.readline())
70
-
71
- # Check JSON structure
72
- assert "asctime" in log_entry
73
- assert "name" in log_entry
74
- assert "levelname" in log_entry
75
- assert "message" in log_entry
76
- assert log_entry["message"] == test_message
77
- assert log_entry["levelname"] == "INFO"
78
-
79
-
80
- def test_log_levels(logger_instance, caplog):
81
- """Test different log levels"""
82
- test_message = "Test message"
83
-
84
- logger_instance.info(test_message)
85
- assert "INFO" in caplog.text
86
-
87
- logger_instance.warning(test_message)
88
- assert "WARNING" in caplog.text
89
-
90
- logger_instance.error(test_message)
91
- assert "ERROR" in caplog.text
92
-
93
-
94
- def test_exception_logging(logger_instance, caplog):
95
- """Test exception logging"""
96
- try:
97
- raise ValueError("Test exception")
98
- except ValueError as e:
99
- logger_instance.exception("Error occurred", e)
100
-
101
- assert "ERROR" in caplog.text
102
- assert "Test exception" in caplog.text
103
-
104
-
105
- def test_invalid_log_directory(logger_instance, tmp_path):
106
- """Test handling of invalid log directory"""
107
- invalid_dir = tmp_path / "nonexistent" / "logs"
108
- logger_instance.setup_file_logging(str(invalid_dir))
109
-
110
- # Check that the directory was created
111
- assert invalid_dir.exists()
112
- assert invalid_dir.is_dir()
File without changes
File without changes
File without changes
File without changes