raw-docx 0.3.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {raw_docx-0.3.0 → raw_docx-0.5.0}/PKG-INFO +4 -7
  2. {raw_docx-0.3.0 → raw_docx-0.5.0}/setup.py +8 -11
  3. raw_docx-0.5.0/src/raw_docx/__init__.py +0 -0
  4. raw_docx-0.5.0/src/raw_docx/__version__.py +1 -0
  5. raw_docx-0.5.0/src/raw_docx/docx_paragraph.py +89 -0
  6. raw_docx-0.5.0/src/raw_docx/raw_document.py +64 -0
  7. raw_docx-0.5.0/src/raw_docx/raw_docx.py +256 -0
  8. raw_docx-0.5.0/src/raw_docx/raw_image.py +37 -0
  9. raw_docx-0.5.0/src/raw_docx/raw_list.py +69 -0
  10. raw_docx-0.5.0/src/raw_docx/raw_list_item.py +21 -0
  11. raw_docx-0.5.0/src/raw_docx/raw_logger.py +67 -0
  12. raw_docx-0.5.0/src/raw_docx/raw_paragraph.py +35 -0
  13. raw_docx-0.5.0/src/raw_docx/raw_run.py +15 -0
  14. raw_docx-0.5.0/src/raw_docx/raw_section.py +119 -0
  15. raw_docx-0.5.0/src/raw_docx/raw_table.py +48 -0
  16. raw_docx-0.5.0/src/raw_docx/raw_table_cell.py +62 -0
  17. raw_docx-0.5.0/src/raw_docx/raw_table_row.py +41 -0
  18. {raw_docx-0.3.0 → raw_docx-0.5.0}/src/raw_docx.egg-info/PKG-INFO +4 -7
  19. {raw_docx-0.3.0 → raw_docx-0.5.0}/src/raw_docx.egg-info/SOURCES.txt +15 -0
  20. raw_docx-0.5.0/src/raw_docx.egg-info/requires.txt +2 -0
  21. raw_docx-0.5.0/src/raw_docx.egg-info/top_level.txt +1 -0
  22. {raw_docx-0.3.0 → raw_docx-0.5.0}/tests/test_docx_paragraph.py +1 -1
  23. {raw_docx-0.3.0 → raw_docx-0.5.0}/tests/test_integration.py +1 -1
  24. {raw_docx-0.3.0 → raw_docx-0.5.0}/tests/test_raw_document.py +4 -4
  25. {raw_docx-0.3.0 → raw_docx-0.5.0}/tests/test_raw_docx.py +2 -2
  26. {raw_docx-0.3.0 → raw_docx-0.5.0}/tests/test_raw_image.py +1 -1
  27. {raw_docx-0.3.0 → raw_docx-0.5.0}/tests/test_raw_list.py +5 -5
  28. {raw_docx-0.3.0 → raw_docx-0.5.0}/tests/test_raw_list_item.py +2 -2
  29. {raw_docx-0.3.0 → raw_docx-0.5.0}/tests/test_raw_logger.py +1 -1
  30. {raw_docx-0.3.0 → raw_docx-0.5.0}/tests/test_raw_paragraph.py +2 -2
  31. {raw_docx-0.3.0 → raw_docx-0.5.0}/tests/test_raw_run.py +1 -1
  32. {raw_docx-0.3.0 → raw_docx-0.5.0}/tests/test_raw_section.py +9 -9
  33. {raw_docx-0.3.0 → raw_docx-0.5.0}/tests/test_raw_table.py +5 -5
  34. {raw_docx-0.3.0 → raw_docx-0.5.0}/tests/test_raw_table_cell.py +5 -5
  35. {raw_docx-0.3.0 → raw_docx-0.5.0}/tests/test_raw_table_row.py +4 -4
  36. raw_docx-0.3.0/src/raw_docx.egg-info/requires.txt +0 -5
  37. raw_docx-0.3.0/src/raw_docx.egg-info/top_level.txt +0 -1
  38. {raw_docx-0.3.0 → raw_docx-0.5.0}/LICENSE +0 -0
  39. {raw_docx-0.3.0 → raw_docx-0.5.0}/README.md +0 -0
  40. {raw_docx-0.3.0 → raw_docx-0.5.0}/setup.cfg +0 -0
  41. {raw_docx-0.3.0 → raw_docx-0.5.0}/src/raw_docx.egg-info/dependency_links.txt +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: raw_docx
3
- Version: 0.3.0
3
+ Version: 0.5.0
4
4
  Summary: A package for processing and analyzing raw document formats
5
5
  Home-page: https://github.com/daveih/raw_docx
6
- Author: Dave Berson-Hurst
6
+ Author: Dave Iberson-Hurst
7
7
  Author-email:
8
8
  Classifier: Development Status :: 3 - Alpha
9
9
  Classifier: Intended Audience :: Developers
@@ -17,11 +17,8 @@ Classifier: Programming Language :: Python :: 3.11
17
17
  Requires-Python: >=3.8
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
- Requires-Dist: python-docx==1.1.2
21
- Requires-Dist: ruff==0.8.6
22
- Requires-Dist: python-json-logger==3.2.1
23
- Requires-Dist: pytest==7.4.4
24
- Requires-Dist: pytest-cov==4.1.0
20
+ Requires-Dist: python-docx
21
+ Requires-Dist: python-json-logger
25
22
  Dynamic: author
26
23
  Dynamic: classifier
27
24
  Dynamic: description
@@ -3,19 +3,14 @@ from setuptools import setup, find_packages
3
3
  with open("README.md", "r", encoding="utf-8") as fh:
4
4
  long_description = fh.read()
5
5
 
6
- with open("requirements.txt", "r", encoding="utf-8") as fh:
7
- requirements = [
8
- line.strip() for line in fh if line.strip() and not line.startswith("#")
9
- ]
10
-
11
- version = {}
12
- with open("src/__init__.py") as fp:
13
- exec(fp.read(), version)
6
+ package_info = {}
7
+ with open("src/raw_docx/__version__.py") as fp:
8
+ exec(fp.read(), package_info)
14
9
 
15
10
  setup(
16
11
  name="raw_docx",
17
- version=version["__package_version__"],
18
- author="Dave Berson-Hurst",
12
+ version=package_info["__package_version__"],
13
+ author="Dave Iberson-Hurst",
19
14
  author_email="",
20
15
  description="A package for processing and analyzing raw document formats",
21
16
  long_description=long_description,
@@ -23,6 +18,9 @@ setup(
23
18
  url="https://github.com/daveih/raw_docx",
24
19
  packages=find_packages(where="src"),
25
20
  package_dir={"": "src"},
21
+ package_data={},
22
+ install_requires=["python-docx", "python-json-logger"],
23
+ tests_require=["pytest", "pytest-cov", "pytest-mock", "python-dotenv"],
26
24
  classifiers=[
27
25
  "Development Status :: 3 - Alpha",
28
26
  "Intended Audience :: Developers",
@@ -35,5 +33,4 @@ setup(
35
33
  "Programming Language :: Python :: 3.11",
36
34
  ],
37
35
  python_requires=">=3.8",
38
- install_requires=requirements,
39
36
  )
File without changes
@@ -0,0 +1 @@
1
+ __package_version__ = "0.5.0"
@@ -0,0 +1,89 @@
1
+ from docx.text.paragraph import Paragraph
2
+ from docx.styles.style import ParagraphStyle
3
+ from docx.text.run import Run
4
+ from .raw_logger import logger
5
+ from .raw_run import RawRun
6
+
7
+
8
+ def extract_runs(paragraph: Paragraph) -> list[dict]:
9
+ if paragraph.text.startswith(
10
+ "This template is intended for interventional clinical trials. The template is suitable"
11
+ ):
12
+ logger.info(f"Paragraph style {paragraph.style.name}")
13
+ data = [
14
+ {
15
+ "text": run.text,
16
+ "color": _get_run_color(paragraph.style, run),
17
+ "highlight": _get_highlight_color(run),
18
+ "keep": True,
19
+ # "style": run.style.name if run.style else paragraph.style.name
20
+ "style": paragraph.style.name,
21
+ }
22
+ for run in paragraph.runs
23
+ ]
24
+ data = _tidy_runs_color(data)
25
+ return [RawRun(x["text"], x["color"], x["highlight"], x["style"]) for x in data]
26
+
27
+
28
+ def _tidy_runs_color(data: list[dict]) -> list[dict]:
29
+ more = False
30
+ for index, run in enumerate(data):
31
+ if (
32
+ index > 0
33
+ and run["color"] == data[index - 1]["color"]
34
+ and run["highlight"] == data[index - 1]["highlight"]
35
+ ):
36
+ run["text"] = data[index - 1]["text"] + run["text"]
37
+ data[index - 1]["keep"] = False
38
+ more = True
39
+ new_data = [x for x in data if x["keep"]]
40
+ if more:
41
+ new_data = _tidy_runs_color(new_data)
42
+ return new_data
43
+
44
+
45
+ def _get_run_color(paragraph: Paragraph, run: Run) -> str | None:
46
+ paragraph_color = _get_font_colour(paragraph)
47
+ font_color = _get_font_colour(run)
48
+ style_color = _run_style_color(run)
49
+ if font_color:
50
+ result = str(font_color)
51
+ elif style_color:
52
+ result = str(style_color)
53
+ else:
54
+ result = str(paragraph_color)
55
+ return result
56
+
57
+
58
+ def _get_highlight_color(run: Run) -> str | None:
59
+ try:
60
+ return str(run.font.highlight_color)
61
+ except Exception as e:
62
+ logger.exception("Failed to get run highlight color", e)
63
+ return None
64
+
65
+
66
+ def _run_style_color(run: Run) -> str | None:
67
+ try:
68
+ run_color = None
69
+ run_style = run.style
70
+ while run_style and not run_color:
71
+ if run_style.font.color.rgb:
72
+ run_color = run_style.font.color.rgb
73
+ else:
74
+ run_style = run_style.base_style
75
+ return run_color
76
+ except Exception as e:
77
+ logger.exception("Failed to get run style color", e)
78
+ return None
79
+
80
+
81
+ def _get_font_colour(item: Run | ParagraphStyle) -> str | None:
82
+ try:
83
+ return item.font.color.rgb
84
+ except Exception as e:
85
+ logger.exception("Failed to get font color", e)
86
+ return None
87
+
88
+
89
+ setattr(Paragraph, "extract_runs", extract_runs)
@@ -0,0 +1,64 @@
1
+ from .raw_section import RawSection
2
+
3
+
4
+ class RawDocument:
5
+ def __init__(self):
6
+ self.sections = []
7
+ self._levels = [0, 0, 0, 0, 0, 0]
8
+ self._section_number_mapping = {}
9
+ self._section_title_mapping = {}
10
+ section = RawSection(None, None, 1)
11
+ self.add(section, False) # No section number increment
12
+
13
+ def add(self, section: RawSection, increment=True):
14
+ if increment:
15
+ self._inc_section_number(section.level)
16
+ section.number = self._get_section_number(section.level)
17
+ self._section_number_mapping[section.number] = section
18
+ self._section_title_mapping[section.title] = section
19
+ self.sections.append(section)
20
+
21
+ def current_section(self) -> RawSection:
22
+ return self.sections[-1]
23
+
24
+ def section_by_ordinal(self, ordinal: int) -> RawSection:
25
+ if 1 >= ordinal <= len(self.sections):
26
+ return self.sections[ordinal - 1]
27
+ else:
28
+ return None
29
+
30
+ def section_by_number(self, section_number: str) -> RawSection:
31
+ if section_number in self._section_number_mapping:
32
+ return self._section_number_mapping[section_number]
33
+ else:
34
+ return None
35
+
36
+ def section_by_title(self, section_title: str) -> RawSection:
37
+ if section_title in self._section_title_mapping:
38
+ return self._section_title_mapping[section_title]
39
+ else:
40
+ return None
41
+
42
+ def _inc_section_number(self, level: int) -> None:
43
+ self._levels[level] += 1
44
+ for index in range(level + 1, len(self._levels)):
45
+ self._levels[index] = 0
46
+
47
+ def _get_section_number(self, level: int) -> str:
48
+ return ".".join(str(x) for x in self._levels[1 : level + 1])
49
+
50
+ def to_dict(self) -> dict:
51
+ """Convert the document to a dictionary representation"""
52
+ return {
53
+ "type": "document",
54
+ "sections": [section.to_dict() for section in self.sections],
55
+ "levels": self._levels,
56
+ "section_number_mapping": {
57
+ num: section.to_dict()
58
+ for num, section in self._section_number_mapping.items()
59
+ },
60
+ "section_title_mapping": {
61
+ title: section.to_dict()
62
+ for title, section in self._section_title_mapping.items()
63
+ },
64
+ }
@@ -0,0 +1,256 @@
1
+ import os
2
+ import re
3
+ import docx
4
+ import zipfile
5
+ from pathlib import Path
6
+ from .raw_document import RawDocument
7
+ from .raw_section import RawSection
8
+ from .raw_paragraph import RawParagraph
9
+ from .raw_image import RawImage
10
+ from .raw_table import RawTable
11
+ from .raw_table_row import RawTableRow
12
+ from .raw_table_cell import RawTableCell
13
+ from .raw_list import RawList
14
+ from .raw_list_item import RawListItem
15
+ from docx import Document as DocXProcessor
16
+ from docx.document import Document
17
+ from docx.oxml.table import CT_Tbl, CT_TcPr
18
+ from docx.oxml.text.paragraph import CT_P
19
+ from docx.table import Table, _Cell
20
+ from docx.text.paragraph import Paragraph
21
+ from lxml import etree
22
+ from .raw_logger import logger
23
+ from .docx_paragraph import extract_runs # Needed such that method inserted into class
24
+
25
+
26
+ class RawDocx:
27
+ class LogicError(Exception):
28
+ pass
29
+
30
+ def __init__(self, full_path: str):
31
+ path = Path(full_path)
32
+ # path.stem, path.suffix[1:]
33
+ self.full_path = full_path
34
+ self.dir = path.parent
35
+ self.filename = path.name
36
+ self.image_path = os.path.join(self.dir, "images")
37
+ self.image_rels = {}
38
+ self._organise_dir()
39
+ self.source_document = DocXProcessor(self.full_path)
40
+ self.target_document = RawDocument()
41
+ self._process()
42
+
43
+ def _organise_dir(self):
44
+ try:
45
+ os.mkdir(self.image_path)
46
+ except FileExistsError:
47
+ pass
48
+ except Exception as e:
49
+ logger.exception("Failed to create image directory", e)
50
+
51
+ def _process(self):
52
+ try:
53
+ self._extract_images()
54
+ for block_item in self._iter_block_items(self.source_document):
55
+ target_section = self.target_document.current_section()
56
+ if isinstance(block_item, Paragraph):
57
+ # print(f"PARA BLOCK: {block_item.text}")
58
+ self._process_paragraph(block_item, target_section, self.image_rels)
59
+ elif isinstance(block_item, Table):
60
+ self._process_table(block_item, target_section)
61
+ else:
62
+ logger.warning("Ignoring element")
63
+ raise ValueError
64
+ except Exception as e:
65
+ logger.exception("Exception raised processing document", e)
66
+
67
+ def _extract_images(self):
68
+ # Extract images to image dir
69
+ self._extract_images()
70
+ # Save all 'rId:filenames' as references
71
+ for r in self.source_document.part.rels.values():
72
+ if isinstance(r._target, docx.parts.image.ImagePart):
73
+ self.image_rels[r.rId] = os.path.join(
74
+ self.image_path, os.path.basename(r._target.partname)
75
+ )
76
+
77
+ def _iter_block_items(self, parent):
78
+ """
79
+ Yield each paragraph and table child within *parent*, in document
80
+ order. Each returned value is an instance of either Table or
81
+ Paragraph. *parent* would most commonly be a reference to a main
82
+ Document object, but also works for a _Cell object, which itself can
83
+ contain paragraphs and tables.
84
+ """
85
+ if isinstance(parent, Document):
86
+ parent_elm = parent.element.body
87
+ elif isinstance(parent, _Cell):
88
+ parent_elm = parent._tc
89
+ else:
90
+ raise ValueError("something's not right with the parent")
91
+
92
+ for child in parent_elm.iterchildren():
93
+ if isinstance(child, str):
94
+ logger.warning(f"Ignoring eTree element {child}")
95
+ elif isinstance(child, CT_P):
96
+ # print(f"PARA: {child.text}")
97
+ yield Paragraph(child, parent)
98
+ elif isinstance(child, CT_Tbl):
99
+ yield Table(child, parent)
100
+ elif isinstance(child, etree._Element):
101
+ if (
102
+ child.tag
103
+ == "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}tcPr"
104
+ ):
105
+ pass
106
+ elif (
107
+ child.tag
108
+ == "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}sdt"
109
+ ):
110
+ pass
111
+ else:
112
+ logger.warning(f"Ignoring eTree element {self._tree(child)}")
113
+
114
+ else:
115
+ raise ValueError(f"something's not right with a child {type(child)}")
116
+
117
+ def _tree(self, node, tab=1):
118
+ # print(f"{' ' * tab}{node.tag} {node.text}")
119
+ for child in node:
120
+ self._tree(child, tab + 1)
121
+
122
+ def _process_table(self, table, target: RawSection | RawTableCell):
123
+ target_table = RawTable()
124
+ target.add(target_table)
125
+ for r_index, row in enumerate(table.rows):
126
+ target_row = RawTableRow()
127
+ target_table.add(target_row)
128
+ cells = row.cells
129
+ for c_index, cell in enumerate(cells):
130
+ if cell._tc is not None:
131
+ x = cell._tc
132
+ right = x.right
133
+ left = x.left
134
+ top = x.top
135
+ try:
136
+ # Bottom method seems to have a bug.
137
+ # See https://github.com/python-openxml/python-docx/issues/1433
138
+ bottom = x.bottom
139
+ except Exception:
140
+ bottom = top + 1
141
+ h_span = right - left
142
+ v_span = bottom - top
143
+ else:
144
+ h_span = 1
145
+ v_span = 1
146
+ first = r_index == cell._tc.top and c_index == cell._tc.left
147
+ target_cell = RawTableCell(h_span, v_span, first)
148
+ target_row.add(target_cell)
149
+ for block_item in self._iter_block_items(cell):
150
+ if isinstance(block_item, Paragraph):
151
+ self._process_cell(block_item, target_cell)
152
+ elif isinstance(block_item, Table):
153
+ raise self.LogicError("Table within table detected")
154
+ elif isinstance(block_item, etree._Element):
155
+ if block_item.tag == CT_TcPr:
156
+ pass
157
+ else:
158
+ logger.warning(f"Ignoring eTree element {block_item.tag}")
159
+ else:
160
+ raise self.LogicError(
161
+ f"something's not right with a child {type(block_item)}"
162
+ )
163
+
164
+ def _process_cell(self, paragraph, target_cell: RawTableCell):
165
+ if self._is_list(paragraph):
166
+ list_level = self.get_list_level(paragraph)
167
+ item = RawListItem(paragraph.extract_runs(), list_level)
168
+ if target_cell.is_in_list():
169
+ list = target_cell.current_list()
170
+ else:
171
+ list = RawList()
172
+ target_cell.add(list)
173
+ list.add(item)
174
+ else:
175
+ target_paragraph = RawParagraph(paragraph.extract_runs())
176
+ target_cell.add(target_paragraph)
177
+
178
+ def _process_paragraph(
179
+ self, paragraph, target_section: RawSection, image_rels: dict
180
+ ):
181
+ is_heading, level = self._is_heading(paragraph.style.name)
182
+ if is_heading:
183
+ target_section = RawSection(paragraph.text, paragraph.text, level)
184
+ self.target_document.add(target_section)
185
+ elif self._is_list(paragraph):
186
+ # print(f"START LIST: {paragraph.text}")
187
+ list_level = self.get_list_level(paragraph)
188
+ item = RawListItem(paragraph.extract_runs(), list_level)
189
+ if target_section.is_in_list():
190
+ list = target_section.current_list()
191
+ else:
192
+ list = RawList()
193
+ target_section.add(list)
194
+ list.add(item)
195
+ elif "Graphic" in paragraph._p.xml:
196
+ for rId in image_rels:
197
+ if rId in paragraph._p.xml:
198
+ target_image = RawImage(image_rels[rId])
199
+ target_section.add(target_image)
200
+ else:
201
+ # print(f"START RUNS: {paragraph.text}")
202
+ target_paragraph = RawParagraph(paragraph.extract_runs())
203
+ target_section.add(target_paragraph)
204
+
205
+ def get_list_level(self, paragraph):
206
+ list_level = paragraph._p.xpath("./w:pPr/w:numPr/w:ilvl/@w:val")
207
+ return int(str(list_level[0])) if list_level else 0
208
+
209
+ def _is_heading(self, text):
210
+ if re.match(r"^\d\dHeading \d", text):
211
+ try:
212
+ level = int(text[0:2])
213
+ return True, level
214
+ except Exception:
215
+ return True, 0
216
+ if re.match(r"^Heading \d", text):
217
+ try:
218
+ level = int(text[8])
219
+ return True, level
220
+ except Exception:
221
+ return True, 0
222
+ return False, 0
223
+
224
+ def _is_list(self, paragraph):
225
+ level = paragraph._p.xpath("./w:pPr/w:numPr/w:ilvl/@w:val")
226
+ if level:
227
+ return True
228
+ if paragraph.style.name in ["CPT_List Bullet", "List Bullet"]:
229
+ return True
230
+ if paragraph.text:
231
+ if hex(ord(paragraph.text[0])) == "0x2022":
232
+ return True
233
+ return False
234
+
235
+ def _extract_images(self):
236
+ archive = zipfile.ZipFile(self.full_path)
237
+ for file in archive.filelist:
238
+ if file.filename.startswith("word/media/"):
239
+ # Extract the image file name from the path
240
+ image_name = Path(file.filename).name
241
+ # Create the target path for the image
242
+ target_path = os.path.join(self.image_path, image_name)
243
+ # Extract the file to the target path
244
+ with archive.open(file) as source, open(target_path, "wb") as target:
245
+ target.write(source.read())
246
+
247
+ def to_dict(self) -> dict:
248
+ """Convert the RawDocx instance to a dictionary representation"""
249
+ if hasattr(self, "target_document"):
250
+ return {
251
+ "type": "raw_docx",
252
+ "document": self.target_document.to_dict()
253
+ if hasattr(self.target_document, "to_dict")
254
+ else None,
255
+ }
256
+ return {"type": "raw_docx", "document": None}
@@ -0,0 +1,37 @@
1
+ import os
2
+ import base64
3
+ from .raw_logger import logger
4
+
5
+
6
+ class RawImage:
7
+ FILE_TYPE_MAP = {".png": "png", ".jpg": "jpg", ".jpeg": "jpg"}
8
+
9
+ def __init__(self, filepath: str):
10
+ self.filepath = filepath
11
+
12
+ def to_html(self):
13
+ try:
14
+ file_root, file_extension = os.path.splitext(self.filepath)
15
+ if file_extension in self.FILE_TYPE_MAP:
16
+ file_type = self.FILE_TYPE_MAP[file_extension]
17
+ with open(self.filepath, "rb") as image_file:
18
+ data = base64.b64encode(image_file.read())
19
+ decoded = data.decode("ascii")
20
+ return f'<img alt="alt text" src="data:image/{file_type};base64,{decoded}"/>'
21
+ else:
22
+ return f"""<p style="color:red">Note: Unable to process embedded image of type '{file_extension}', image ignored.</p>"""
23
+ except Exception as e:
24
+ logger.exception("Exception converting image", e)
25
+ return (
26
+ """<p style="color:red">Note: Error encountered processing image.</p>"""
27
+ )
28
+
29
+ def to_dict(self) -> dict:
30
+ """Convert the image to a dictionary representation"""
31
+ file_root, file_extension = os.path.splitext(self.filepath)
32
+ return {
33
+ "type": "image",
34
+ "filepath": self.filepath,
35
+ "extension": file_extension,
36
+ "file_type": self.FILE_TYPE_MAP.get(file_extension, "unknown"),
37
+ }
@@ -0,0 +1,69 @@
1
+ from .raw_list_item import RawListItem
2
+ from .raw_logger import logger
3
+
4
+
5
+ class RawList:
6
+ def __init__(self, level=0):
7
+ self.items = [] # List to store RawListItems and nested RawLists
8
+ self.level = level
9
+
10
+ def add(self, item: RawListItem) -> None:
11
+ if item.level == self.level:
12
+ self.items.append(item)
13
+ elif item.level > self.level:
14
+ list = self.items[-1] if self.items else None
15
+ if not isinstance(list, RawList):
16
+ list = RawList(item.level)
17
+ self.items.append(list)
18
+ list.add(item)
19
+ if item.level > self.level + 1:
20
+ logger.warning(
21
+ f"Adding list item '{item}' to item but level jump greater than 1"
22
+ )
23
+ else:
24
+ logger.error(
25
+ f"Failed to add list item '{item}' to list '{self}', levels are in error"
26
+ )
27
+
28
+ def to_text(self) -> str:
29
+ lines = []
30
+ for item in self.items:
31
+ lines.append(f"{item.to_text()}")
32
+ return ("\n").join(lines)
33
+
34
+ return self.text # Note: This line appears unreachable
35
+
36
+ def all_items(self) -> list[RawListItem]:
37
+ result = []
38
+ for item in self.items:
39
+ if isinstance(item, RawListItem):
40
+ result.append(item)
41
+ elif isinstance(item, RawList):
42
+ result += item.all_items()
43
+ return result
44
+
45
+ def to_html(self) -> str:
46
+ lines = []
47
+ lines.append("<ul>")
48
+ for item in self.items:
49
+ lines.append(f"<li>{item.to_html()}</li>")
50
+ lines.append("</ul>")
51
+ return ("\n").join(lines)
52
+
53
+ def to_dict(self) -> dict:
54
+ return {
55
+ "type": "list",
56
+ "level": self.level,
57
+ "items": [
58
+ item.to_dict() if hasattr(item, "to_dict") else str(item)
59
+ for item in self.items
60
+ ],
61
+ }
62
+
63
+ def __str__(self) -> str:
64
+ """Return a string representation of the list showing its level and item count.
65
+
66
+ Returns:
67
+ str: String representation of the list
68
+ """
69
+ return f"[level='{self.level}', item_count='{len(self.items)}']"
@@ -0,0 +1,21 @@
1
+ from html import escape
2
+ from .raw_paragraph import RawParagraph
3
+ from .raw_run import RawRun
4
+
5
+
6
+ class RawListItem(RawParagraph):
7
+ def __init__(self, runs: list[RawRun], level: int):
8
+ self.level = level
9
+ super().__init__(runs)
10
+
11
+ def to_text(self) -> str:
12
+ return f"{' ' * self.level}{self.text}"
13
+
14
+ def to_html(self) -> str:
15
+ return f"{escape(self.text)}"
16
+
17
+ def to_dict(self) -> dict:
18
+ return {"type": "list_item", "text": self.text, "level": self.level}
19
+
20
+ def __str__(self) -> str:
21
+ return f"[text='{self.text}', level='{self.level}']"
@@ -0,0 +1,67 @@
1
+ import sys
2
+ import logging
3
+ from pathlib import Path
4
+ from typing import Optional
5
+ from pythonjsonlogger import jsonlogger
6
+
7
+
8
+ class RawLogger:
9
+ _instance = None
10
+ _initialized = False
11
+
12
+ def __new__(cls):
13
+ if cls._instance is None:
14
+ cls._instance = super().__new__(cls)
15
+ return cls._instance
16
+
17
+ def __init__(self):
18
+ if not RawLogger._initialized:
19
+ self.logger = logging.getLogger("raw_docx")
20
+ self.logger.setLevel(logging.INFO)
21
+
22
+ # Create JSON formatter
23
+ formatter = jsonlogger.JsonFormatter(
24
+ fmt="%(asctime)s %(name)s %(levelname)s %(message)s",
25
+ datefmt="%Y-%m-%d %H:%M:%S",
26
+ )
27
+
28
+ # Console handler
29
+ console_handler = logging.StreamHandler(sys.stdout)
30
+ console_handler.setFormatter(formatter)
31
+ self.logger.addHandler(console_handler)
32
+
33
+ RawLogger._initialized = True
34
+
35
+ def setup_file_logging(self, log_dir: Optional[str] = None):
36
+ """Setup file logging in addition to console logging"""
37
+ if log_dir:
38
+ log_path = Path(log_dir)
39
+ log_path.mkdir(parents=True, exist_ok=True)
40
+ file_handler = logging.FileHandler(log_path / "raw_docx.log")
41
+ file_handler.setFormatter(
42
+ jsonlogger.JsonFormatter(
43
+ fmt="%(asctime)s %(name)s %(levelname)s %(message)s",
44
+ datefmt="%Y-%m-%d %H:%M:%S",
45
+ )
46
+ )
47
+ self.logger.addHandler(file_handler)
48
+
49
+ def info(self, message: str):
50
+ """Log info message"""
51
+ self.logger.info(message)
52
+
53
+ def warning(self, message: str):
54
+ """Log warning message"""
55
+ self.logger.warning(message)
56
+
57
+ def error(self, message: str):
58
+ """Log error message"""
59
+ self.logger.error(message)
60
+
61
+ def exception(self, message: str, exc: Exception):
62
+ """Log exception with message"""
63
+ self.logger.exception(message, exc_info=exc)
64
+
65
+
66
+ # Create singleton instance
67
+ logger = RawLogger()
@@ -0,0 +1,35 @@
1
+ from .raw_run import RawRun
2
+ from html import escape
3
+
4
+
5
+ class RawParagraph:
6
+ def __init__(self, runs: list[RawRun]):
7
+ self.runs = runs
8
+ self.klasses = []
9
+ self.text = self._run_text()
10
+
11
+ def to_html(self) -> str:
12
+ klass_list = " ".join(self.klasses)
13
+ open_tag = f'<p class="{klass_list}">' if self.klasses else "<p>"
14
+ return f"{open_tag}{escape(self.text)}</p>"
15
+
16
+ def find(self, text: str) -> bool:
17
+ return True if text in self.text else False
18
+
19
+ def find_at_start(self, text: str) -> bool:
20
+ return True if self.text.upper().startswith(text.upper()) else False
21
+
22
+ def add_class(self, klass) -> None:
23
+ self.klasses.append(klass)
24
+
25
+ def to_dict(self) -> dict:
26
+ """Convert the paragraph to a dictionary representation"""
27
+ return {
28
+ "type": "paragraph",
29
+ "text": self.text,
30
+ "runs": [run.to_dict() for run in self.runs],
31
+ "classes": self.klasses,
32
+ }
33
+
34
+ def _run_text(self) -> str:
35
+ return "".join([run.text for run in self.runs])
@@ -0,0 +1,15 @@
1
+ class RawRun:
2
+ def __init__(self, text: str, color: str | None, highlight: str | None, style: str):
3
+ self.text = text
4
+ self.color = color
5
+ self.highlight = highlight
6
+ self.style = style
7
+
8
+ def to_dict(self) -> dict:
9
+ """Convert the instace to a dictionary representation"""
10
+ return {
11
+ "text": self.text,
12
+ "color": self.color,
13
+ "highlight": self.highlight,
14
+ "style": self.style,
15
+ }
@@ -0,0 +1,119 @@
1
+ from .raw_paragraph import RawParagraph
2
+ from .raw_list import RawList
3
+ from .raw_table import RawTable
4
+ from .raw_image import RawImage
5
+
6
+
7
+ class RawSection:
8
+ def __init__(self, title: str | None, number: str | None, level: int):
9
+ self.title = title.strip() if title else title
10
+ self.number = number.strip() if number else number
11
+ self.level = level
12
+ self.items = []
13
+
14
+ def add(self, item: RawParagraph | RawList | RawTable | RawImage) -> None:
15
+ self.items.append(item)
16
+
17
+ def is_in_list(self) -> bool:
18
+ if self.items:
19
+ if isinstance(self.items[-1], RawList):
20
+ return True
21
+ return False
22
+
23
+ def current_list(self) -> RawList:
24
+ if self.items:
25
+ return self.items[-1] if isinstance(self.items[-1], RawList) else None
26
+ else:
27
+ return None
28
+
29
+ def to_dict(self) -> dict:
30
+ """Convert the section to a dictionary representation"""
31
+ return {
32
+ "type": "section",
33
+ "title": self.title,
34
+ "number": self.number,
35
+ "level": self.level,
36
+ "items": [
37
+ item.to_dict() if hasattr(item, "to_dict") else str(item)
38
+ for item in self.items
39
+ ],
40
+ }
41
+
42
+ def to_html(self):
43
+ text = []
44
+ for item in self.items:
45
+ result = item.to_html()
46
+ text.append(result)
47
+ return ("\n").join(text)
48
+
49
+ def to_html_between(self, start, end):
50
+ text = []
51
+ for index, item in enumerate(self.items):
52
+ if index >= start and index < end:
53
+ result = item.to_html()
54
+ text.append(result)
55
+ return ("\n").join(text)
56
+
57
+ def paragraphs(self) -> list[RawParagraph]:
58
+ return [x for x in self.items if isinstance(x, RawParagraph)]
59
+
60
+ def tables(self) -> list[RawTable]:
61
+ return [x for x in self.items if isinstance(x, RawTable)]
62
+
63
+ def lists(self) -> list[RawList]:
64
+ return [x for x in self.items if isinstance(x, RawList)]
65
+
66
+ def items_between(self, start_index, end_index):
67
+ return self.items[start_index:end_index]
68
+
69
+ def find(self, text) -> list[RawParagraph]:
70
+ return [x for x in self.items if isinstance(x, RawParagraph) and x.find(text)]
71
+
72
+ def find_at_start(self, text) -> list[RawParagraph]:
73
+ return [
74
+ x
75
+ for x in self.items
76
+ if isinstance(x, RawParagraph) and x.find_at_start(text)
77
+ ]
78
+
79
+ def find_first_at_start(self, text) -> tuple[RawParagraph, int]:
80
+ for index, item in enumerate(self.items):
81
+ if isinstance(item, RawParagraph) and item.find_at_start(text):
82
+ return item, index
83
+ return None, -1
84
+
85
+ def has_lists(self) -> bool:
86
+ return len(self.lists()) > 0
87
+
88
+ def has_content(self) -> bool:
89
+ return not self.is_empty()
90
+
91
+ def is_empty(self) -> bool:
92
+ return len(self.items) == 0
93
+
94
+ def next(self, index: int):
95
+ return self.items[index + 1] if (index + 1) < len(self.items) else None
96
+
97
+ def next_paragraph(self, start_index: int) -> RawParagraph:
98
+ for index, item in enumerate(self.items):
99
+ if index >= start_index:
100
+ if isinstance(self.items[index], RawParagraph):
101
+ return item
102
+ return None
103
+
104
+ def next_table(self, start_index: int) -> RawTable:
105
+ for index, item in enumerate(self.items):
106
+ if index >= start_index:
107
+ if isinstance(self.items[index], RawTable):
108
+ return item
109
+ return None
110
+
111
+ def _format_heading(self):
112
+ if self.number and self.title:
113
+ return f"<h{self.level}>{self.number} {self.title}</h{self.level}>"
114
+ elif self.number:
115
+ return f"<h{self.level}>{self.number}</h{self.level}>"
116
+ elif self.title:
117
+ return f"<h{self.level}>{self.title}</h{self.level}>"
118
+ else:
119
+ return ""
@@ -0,0 +1,48 @@
1
+ class RawTable:
2
+ def __init__(self):
3
+ from .raw_table_row import RawTableRow
4
+
5
+ self.rows: list[RawTableRow] = []
6
+ self.klasses = ["ich-m11-table"]
7
+
8
+ # @ToDo Would like RawTableRow here but gets a circular import
9
+ def add(self, item):
10
+ self.rows.append(item)
11
+
12
+ def row(self, index: int):
13
+ return self.rows[index] if (index) < len(self.rows) else None
14
+
15
+ def next(self, index: int) -> tuple[object, int]:
16
+ return (
17
+ (self.rows[index + 1], index + 1)
18
+ if (index + 1) < len(self.rows)
19
+ else (None, -1)
20
+ )
21
+
22
+ def find_row(self, text: str) -> tuple[object, int]:
23
+ for index, row in enumerate(self.rows):
24
+ if row.cells[0].is_text():
25
+ if text.upper() in row.cells[0].text().upper():
26
+ return row, index
27
+ return None, -1
28
+
29
+ def to_html(self):
30
+ lines = []
31
+ klass_list = " ".join(self.klasses)
32
+ open_tag = f'<table class="{klass_list}">' if self.klasses else "<table>"
33
+ lines.append(open_tag)
34
+ for item in self.rows:
35
+ lines.append(item.to_html())
36
+ lines.append("</table>")
37
+ return ("\n").join(lines)
38
+
39
+ def add_class(self, klass):
40
+ self.klasses.append(klass)
41
+
42
+ def replace_class(self, old_klass, new_klass):
43
+ self.klasses.remove(old_klass)
44
+ self.klasses.append(new_klass)
45
+
46
+ def to_dict(self) -> dict:
47
+ """Convert the table to a dictionary representation"""
48
+ return {"type": "table", "rows": [row.to_dict() for row in self.rows]}
@@ -0,0 +1,62 @@
1
+ from .raw_paragraph import RawParagraph
2
+ from .raw_list import RawList
3
+ from .raw_table import RawTable
4
+
5
+
6
+ class RawTableCell:
7
+ def __init__(self, h_span: int = 1, v_span: int = 1, first: bool = True):
8
+ self.h_span = h_span
9
+ self.v_span = v_span
10
+ self.h_merged = h_span > 1
11
+ self.v_merged = v_span > 1
12
+ self.merged = self.h_merged or self.v_merged
13
+ self.first = first
14
+ self.items = []
15
+
16
+ def add(self, item: RawParagraph | RawList | RawTable) -> None:
17
+ self.items.append(item)
18
+
19
+ def is_text(self) -> bool:
20
+ for item in self.items:
21
+ if not isinstance(item, RawParagraph):
22
+ return False
23
+ return True
24
+
25
+ def text(self) -> str:
26
+ return ("\n").join([x.text for x in self.items])
27
+
28
+ def is_in_list(self) -> bool:
29
+ if self.items:
30
+ if isinstance(self.items[-1], RawList):
31
+ return True
32
+ return False
33
+
34
+ def current_list(self) -> RawList:
35
+ if self.items:
36
+ return self.items[-1] if isinstance(self.items[-1], RawList) else None
37
+ else:
38
+ return None
39
+
40
+ def to_html(self):
41
+ if not self.first:
42
+ return ""
43
+ lines = []
44
+ colspan = f' colspan="{self.h_span}"' if self.h_merged else ""
45
+ lines.append(f"<td{colspan}>")
46
+ for item in self.items:
47
+ lines.append(item.to_html())
48
+ lines.append("</td>")
49
+ return ("\n").join(lines)
50
+
51
+ def to_dict(self) -> dict:
52
+ """Convert the table cell to a dictionary representation"""
53
+ return {
54
+ "type": "table_cell",
55
+ "row_span": self.v_span,
56
+ "col_span": self.h_span,
57
+ "first": self.first,
58
+ "content": [
59
+ item.to_dict() if hasattr(item, "to_dict") else str(item)
60
+ for item in self.items
61
+ ],
62
+ }
@@ -0,0 +1,41 @@
1
+ from .raw_table_cell import RawTableCell
2
+
3
+
4
+ class RawTableRow:
5
+ def __init__(self):
6
+ self.cells: list[RawTableCell] = []
7
+
8
+ def add(self, cell: RawTableCell):
9
+ self.cells.append(cell)
10
+
11
+ def find_cell(self, text: str) -> RawTableCell:
12
+ for cell in self.cells:
13
+ if cell.is_text():
14
+ if text.upper() in cell.text().upper():
15
+ return cell
16
+ return None
17
+
18
+ def find_cell_next_to(self, text: str) -> RawTableCell:
19
+ for index, cell in enumerate(self.cells):
20
+ if cell.is_text():
21
+ if text.upper() in cell.text().upper():
22
+ return self.next_cell(index)
23
+ return None
24
+
25
+ def to_html(self):
26
+ lines = []
27
+ lines.append("<tr>")
28
+ for item in self.cells:
29
+ lines.append(item.to_html())
30
+ lines.append("</tr>")
31
+ return ("\n").join(lines)
32
+
33
+ def next_cell(self, start_index: int) -> RawTableCell:
34
+ for index, cell in enumerate(self.cells):
35
+ if index > start_index and cell.first:
36
+ return cell
37
+ return None
38
+
39
+ def to_dict(self) -> dict:
40
+ """Convert the table row to a dictionary representation"""
41
+ return {"type": "table_row", "cells": [cell.to_dict() for cell in self.cells]}
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: raw_docx
3
- Version: 0.3.0
3
+ Version: 0.5.0
4
4
  Summary: A package for processing and analyzing raw document formats
5
5
  Home-page: https://github.com/daveih/raw_docx
6
- Author: Dave Berson-Hurst
6
+ Author: Dave Iberson-Hurst
7
7
  Author-email:
8
8
  Classifier: Development Status :: 3 - Alpha
9
9
  Classifier: Intended Audience :: Developers
@@ -17,11 +17,8 @@ Classifier: Programming Language :: Python :: 3.11
17
17
  Requires-Python: >=3.8
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
- Requires-Dist: python-docx==1.1.2
21
- Requires-Dist: ruff==0.8.6
22
- Requires-Dist: python-json-logger==3.2.1
23
- Requires-Dist: pytest==7.4.4
24
- Requires-Dist: pytest-cov==4.1.0
20
+ Requires-Dist: python-docx
21
+ Requires-Dist: python-json-logger
25
22
  Dynamic: author
26
23
  Dynamic: classifier
27
24
  Dynamic: description
@@ -1,6 +1,21 @@
1
1
  LICENSE
2
2
  README.md
3
3
  setup.py
4
+ src/raw_docx/__init__.py
5
+ src/raw_docx/__version__.py
6
+ src/raw_docx/docx_paragraph.py
7
+ src/raw_docx/raw_document.py
8
+ src/raw_docx/raw_docx.py
9
+ src/raw_docx/raw_image.py
10
+ src/raw_docx/raw_list.py
11
+ src/raw_docx/raw_list_item.py
12
+ src/raw_docx/raw_logger.py
13
+ src/raw_docx/raw_paragraph.py
14
+ src/raw_docx/raw_run.py
15
+ src/raw_docx/raw_section.py
16
+ src/raw_docx/raw_table.py
17
+ src/raw_docx/raw_table_cell.py
18
+ src/raw_docx/raw_table_row.py
4
19
  src/raw_docx.egg-info/PKG-INFO
5
20
  src/raw_docx.egg-info/SOURCES.txt
6
21
  src/raw_docx.egg-info/dependency_links.txt
@@ -0,0 +1,2 @@
1
+ python-docx
2
+ python-json-logger
@@ -0,0 +1 @@
1
+ raw_docx
@@ -1,7 +1,7 @@
1
1
  from unittest.mock import Mock, PropertyMock
2
2
  from docx.text.paragraph import Paragraph
3
3
  from docx.text.run import Run
4
- from docx_paragraph import (
4
+ from src.raw_docx.docx_paragraph import (
5
5
  extract_runs,
6
6
  _tidy_runs_color,
7
7
  _get_highlight_color,
@@ -1,5 +1,5 @@
1
1
  import json
2
- from raw_docx import RawDocx
2
+ from src.raw_docx.raw_docx import RawDocx
3
3
 
4
4
  WRITE_FILE = True
5
5
 
@@ -1,8 +1,8 @@
1
1
  import pytest
2
- from raw_document import RawDocument
3
- from raw_section import RawSection
4
- from raw_paragraph import RawParagraph
5
- from raw_run import RawRun
2
+ from src.raw_docx.raw_document import RawDocument
3
+ from src.raw_docx.raw_section import RawSection
4
+ from src.raw_docx.raw_paragraph import RawParagraph
5
+ from src.raw_docx.raw_run import RawRun
6
6
 
7
7
 
8
8
  @pytest.fixture
@@ -2,8 +2,8 @@ import pytest
2
2
  import os
3
3
  from docx import Document as DocxDocument
4
4
  from docx.shared import Inches
5
- from raw_docx import RawDocx
6
- from raw_document import RawDocument
5
+ from src.raw_docx.raw_docx import RawDocx
6
+ from src.raw_docx.raw_document import RawDocument
7
7
 
8
8
 
9
9
  @pytest.fixture
@@ -1,5 +1,5 @@
1
1
  import pytest
2
- from raw_image import RawImage
2
+ from src.raw_docx.raw_image import RawImage
3
3
 
4
4
 
5
5
  @pytest.fixture
@@ -1,8 +1,8 @@
1
1
  import pytest
2
2
  from unittest.mock import patch
3
- from raw_list import RawList
4
- from raw_list_item import RawListItem
5
- from raw_run import RawRun
3
+ from src.raw_docx.raw_list import RawList
4
+ from src.raw_docx.raw_list_item import RawListItem
5
+ from src.raw_docx.raw_run import RawRun
6
6
 
7
7
 
8
8
  @pytest.fixture
@@ -58,7 +58,7 @@ def test_add_multiple_items():
58
58
 
59
59
  def test_add_multiple_items_level_error():
60
60
  """Test adding multiple items with different levels with level error"""
61
- with patch("raw_list.logger") as mock_logger:
61
+ with patch("src.raw_docx.raw_list.logger") as mock_logger:
62
62
  list = RawList(1)
63
63
  items = [
64
64
  RawListItem([RawRun("Item 1", "", None, "Normal")], 1),
@@ -105,7 +105,7 @@ def test_nested_list_to_html():
105
105
 
106
106
  def test_add_item_lower_level_logs_error():
107
107
  """Test that adding an item with lower level than list level logs an error"""
108
- with patch("raw_list.logger") as mock_logger:
108
+ with patch("src.raw_docx.raw_list.logger") as mock_logger:
109
109
  list_obj = RawList(2) # List with level 2
110
110
  item = RawListItem(
111
111
  [RawRun("Test Item", "", None, "Normal")], 1
@@ -1,6 +1,6 @@
1
1
  import pytest
2
- from raw_list_item import RawListItem
3
- from raw_run import RawRun
2
+ from src.raw_docx.raw_list_item import RawListItem
3
+ from src.raw_docx.raw_run import RawRun
4
4
 
5
5
 
6
6
  @pytest.fixture
@@ -2,7 +2,7 @@ import json
2
2
  import logging
3
3
  import pytest
4
4
  from pathlib import Path
5
- from raw_logger import RawLogger
5
+ from src.raw_docx.raw_logger import RawLogger
6
6
 
7
7
 
8
8
  @pytest.fixture
@@ -1,6 +1,6 @@
1
1
  import pytest
2
- from raw_paragraph import RawParagraph
3
- from raw_run import RawRun
2
+ from src.raw_docx.raw_paragraph import RawParagraph
3
+ from src.raw_docx.raw_run import RawRun
4
4
 
5
5
 
6
6
  @pytest.fixture
@@ -2,7 +2,7 @@ import pytest
2
2
  from docx import Document
3
3
  from docx.shared import RGBColor
4
4
  from docx.enum.text import WD_COLOR_INDEX
5
- from raw_run import RawRun
5
+ from src.raw_docx.raw_run import RawRun
6
6
 
7
7
 
8
8
  @pytest.fixture
@@ -1,13 +1,13 @@
1
1
  import pytest
2
- from raw_section import RawSection
3
- from raw_paragraph import RawParagraph
4
- from raw_list import RawList
5
- from raw_image import RawImage
6
- from raw_table import RawTable
7
- from raw_table_row import RawTableRow
8
- from raw_table_cell import RawTableCell
9
- from raw_list_item import RawListItem
10
- from raw_run import RawRun
2
+ from src.raw_docx.raw_section import RawSection
3
+ from src.raw_docx.raw_paragraph import RawParagraph
4
+ from src.raw_docx.raw_list import RawList
5
+ from src.raw_docx.raw_image import RawImage
6
+ from src.raw_docx.raw_table import RawTable
7
+ from src.raw_docx.raw_table_row import RawTableRow
8
+ from src.raw_docx.raw_table_cell import RawTableCell
9
+ from src.raw_docx.raw_list_item import RawListItem
10
+ from src.raw_docx.raw_run import RawRun
11
11
 
12
12
 
13
13
  @pytest.fixture
@@ -1,9 +1,9 @@
1
1
  import pytest
2
- from raw_table import RawTable
3
- from raw_table_row import RawTableRow
4
- from raw_table_cell import RawTableCell
5
- from raw_paragraph import RawParagraph
6
- from raw_run import RawRun
2
+ from src.raw_docx.raw_table import RawTable
3
+ from src.raw_docx.raw_table_row import RawTableRow
4
+ from src.raw_docx.raw_table_cell import RawTableCell
5
+ from src.raw_docx.raw_paragraph import RawParagraph
6
+ from src.raw_docx.raw_run import RawRun
7
7
 
8
8
 
9
9
  @pytest.fixture
@@ -1,9 +1,9 @@
1
1
  import pytest
2
- from raw_table_cell import RawTableCell
3
- from raw_paragraph import RawParagraph
4
- from raw_list import RawList
5
- from raw_table import RawTable
6
- from raw_run import RawRun
2
+ from src.raw_docx.raw_table_cell import RawTableCell
3
+ from src.raw_docx.raw_paragraph import RawParagraph
4
+ from src.raw_docx.raw_list import RawList
5
+ from src.raw_docx.raw_table import RawTable
6
+ from src.raw_docx.raw_run import RawRun
7
7
 
8
8
 
9
9
  @pytest.fixture
@@ -1,8 +1,8 @@
1
1
  import pytest
2
- from raw_table_row import RawTableRow
3
- from raw_table_cell import RawTableCell
4
- from raw_paragraph import RawParagraph
5
- from raw_run import RawRun
2
+ from src.raw_docx.raw_table_row import RawTableRow
3
+ from src.raw_docx.raw_table_cell import RawTableCell
4
+ from src.raw_docx.raw_paragraph import RawParagraph
5
+ from src.raw_docx.raw_run import RawRun
6
6
 
7
7
 
8
8
  @pytest.fixture
@@ -1,5 +0,0 @@
1
- python-docx==1.1.2
2
- ruff==0.8.6
3
- python-json-logger==3.2.1
4
- pytest==7.4.4
5
- pytest-cov==4.1.0
File without changes
File without changes
File without changes