raw-docx 0.7.0__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {raw_docx-0.7.0 → raw_docx-0.8.0}/PKG-INFO +4 -4
- {raw_docx-0.7.0 → raw_docx-0.8.0}/README.md +1 -1
- {raw_docx-0.7.0 → raw_docx-0.8.0}/setup.py +1 -1
- raw_docx-0.8.0/src/raw_docx/__info__.py +1 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx/raw_document.py +1 -1
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx/raw_docx.py +23 -11
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx.egg-info/PKG-INFO +4 -4
- raw_docx-0.8.0/src/raw_docx.egg-info/requires.txt +2 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/tests/test_docx_paragraph.py +48 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/tests/test_raw_document.py +56 -0
- raw_docx-0.8.0/tests/test_raw_docx.py +628 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/tests/test_raw_list.py +26 -1
- {raw_docx-0.7.0 → raw_docx-0.8.0}/tests/test_raw_paragraph.py +37 -0
- raw_docx-0.7.0/src/raw_docx/__info__.py +0 -1
- raw_docx-0.7.0/src/raw_docx.egg-info/requires.txt +0 -2
- raw_docx-0.7.0/tests/test_raw_docx.py +0 -107
- {raw_docx-0.7.0 → raw_docx-0.8.0}/LICENSE +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/setup.cfg +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx/__init__.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx/docx/__init__.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx/docx/docx_paragraph.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx/raw_image.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx/raw_list.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx/raw_list_item.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx/raw_paragraph.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx/raw_run.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx/raw_section.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx/raw_table.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx/raw_table_cell.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx/raw_table_row.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx.egg-info/SOURCES.txt +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx.egg-info/dependency_links.txt +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/src/raw_docx.egg-info/top_level.txt +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/tests/test_integration.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/tests/test_raw_image.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/tests/test_raw_list_item.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/tests/test_raw_run.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/tests/test_raw_section.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/tests/test_raw_table.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/tests/test_raw_table_cell.py +0 -0
- {raw_docx-0.7.0 → raw_docx-0.8.0}/tests/test_raw_table_row.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: raw_docx
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.0
|
4
4
|
Summary: A package for processing and analyzing raw document formats
|
5
5
|
Home-page: https://github.com/daveih/raw_docx
|
6
6
|
Author: Dave Iberson-Hurst
|
@@ -17,8 +17,8 @@ Classifier: Programming Language :: Python :: 3.11
|
|
17
17
|
Requires-Python: >=3.8
|
18
18
|
Description-Content-Type: text/markdown
|
19
19
|
License-File: LICENSE
|
20
|
-
Requires-Dist: python-docx
|
21
|
-
Requires-Dist: simple_error_log
|
20
|
+
Requires-Dist: python-docx==1.1.2
|
21
|
+
Requires-Dist: simple_error_log==0.6.0
|
22
22
|
Dynamic: author
|
23
23
|
Dynamic: classifier
|
24
24
|
Dynamic: description
|
@@ -38,4 +38,4 @@ Simple package to build on top of python-docx to assist in the handling of word
|
|
38
38
|
Build as a normal package
|
39
39
|
|
40
40
|
- Build with `python3 -m build --sdist --wheel`
|
41
|
-
- Upload to pypi.org using `twine upload dist
|
41
|
+
- Upload to pypi.org using `twine upload dist/*`
|
@@ -19,7 +19,7 @@ setup(
|
|
19
19
|
packages=find_packages(where="src"),
|
20
20
|
package_dir={"": "src"},
|
21
21
|
package_data={},
|
22
|
-
install_requires=["python-docx", "simple_error_log"],
|
22
|
+
install_requires=["python-docx==1.1.2", "simple_error_log==0.6.0"],
|
23
23
|
tests_require=["pytest", "pytest-cov", "pytest-mock", "python-dotenv"],
|
24
24
|
classifiers=[
|
25
25
|
"Development Status :: 3 - Alpha",
|
@@ -0,0 +1 @@
|
|
1
|
+
__package_version__ = "0.8.0"
|
@@ -144,7 +144,10 @@ class RawDocx:
|
|
144
144
|
else:
|
145
145
|
h_span = 1
|
146
146
|
v_span = 1
|
147
|
-
|
147
|
+
if cell._tc is not None:
|
148
|
+
first = r_index == cell._tc.top and c_index == cell._tc.left
|
149
|
+
else:
|
150
|
+
first = r_index == 0 and c_index == 0
|
148
151
|
target_cell = RawTableCell(h_span, v_span, first)
|
149
152
|
target_row.add(target_cell)
|
150
153
|
for block_item in self._iter_block_items(cell):
|
@@ -207,18 +210,27 @@ class RawDocx:
|
|
207
210
|
list_level = paragraph._p.xpath("./w:pPr/w:numPr/w:ilvl/@w:val")
|
208
211
|
return int(str(list_level[0])) if list_level else 0
|
209
212
|
|
210
|
-
def _is_heading(self, text):
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
213
|
+
def _is_heading(self, text) -> tuple[bool, int]:
|
214
|
+
"""
|
215
|
+
Extract heading level from text containing "Heading <N>" pattern.
|
216
|
+
|
217
|
+
Args:
|
218
|
+
text: Text to analyze for heading pattern
|
219
|
+
|
220
|
+
Returns:
|
221
|
+
tuple[bool, int]: (success, level) where success indicates if heading
|
222
|
+
pattern was found and level is the extracted integer value
|
223
|
+
"""
|
224
|
+
if not text:
|
225
|
+
return False, 0
|
226
|
+
|
227
|
+
# Look for "Heading <N>" pattern where <N> is one or more digits
|
228
|
+
match = re.search(r"Heading\s+(\d+)", text, re.IGNORECASE)
|
229
|
+
if match:
|
218
230
|
try:
|
219
|
-
level = int(
|
231
|
+
level = int(match.group(1))
|
220
232
|
return True, level
|
221
|
-
except
|
233
|
+
except (ValueError, IndexError):
|
222
234
|
return True, 0
|
223
235
|
return False, 0
|
224
236
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: raw_docx
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.0
|
4
4
|
Summary: A package for processing and analyzing raw document formats
|
5
5
|
Home-page: https://github.com/daveih/raw_docx
|
6
6
|
Author: Dave Iberson-Hurst
|
@@ -17,8 +17,8 @@ Classifier: Programming Language :: Python :: 3.11
|
|
17
17
|
Requires-Python: >=3.8
|
18
18
|
Description-Content-Type: text/markdown
|
19
19
|
License-File: LICENSE
|
20
|
-
Requires-Dist: python-docx
|
21
|
-
Requires-Dist: simple_error_log
|
20
|
+
Requires-Dist: python-docx==1.1.2
|
21
|
+
Requires-Dist: simple_error_log==0.6.0
|
22
22
|
Dynamic: author
|
23
23
|
Dynamic: classifier
|
24
24
|
Dynamic: description
|
@@ -38,4 +38,4 @@ Simple package to build on top of python-docx to assist in the handling of word
|
|
38
38
|
Build as a normal package
|
39
39
|
|
40
40
|
- Build with `python3 -m build --sdist --wheel`
|
41
|
-
- Upload to pypi.org using `twine upload dist
|
41
|
+
- Upload to pypi.org using `twine upload dist/*`
|
@@ -194,3 +194,51 @@ def test_extract_runs_with_mixed_colors():
|
|
194
194
|
result = extract_runs(paragraph, errors)
|
195
195
|
assert len(result) == 3
|
196
196
|
assert [r.color for r in result] == ["FF0000", "0000FF", "FF0000"]
|
197
|
+
|
198
|
+
|
199
|
+
def test_run_style_color_exception_handling():
|
200
|
+
"""Test _run_style_color exception handling (lines 80-82)"""
|
201
|
+
errors = Errors()
|
202
|
+
|
203
|
+
# Create a run that will cause an exception when accessing style properties
|
204
|
+
run = Mock(spec=Run)
|
205
|
+
|
206
|
+
# Create a style that raises an exception when trying to access font.color.rgb
|
207
|
+
def side_effect_func(*args, **kwargs):
|
208
|
+
raise AttributeError("Font color access error")
|
209
|
+
|
210
|
+
style_mock = Mock()
|
211
|
+
style_mock.font.color.rgb = Mock(side_effect=side_effect_func)
|
212
|
+
# Make the rgb property falsy in boolean context but raise when called
|
213
|
+
style_mock.font.color.rgb.__bool__ = Mock(side_effect=side_effect_func)
|
214
|
+
run.style = style_mock
|
215
|
+
|
216
|
+
result = _run_style_color(run, errors)
|
217
|
+
assert result is None
|
218
|
+
# Should have logged the exception
|
219
|
+
|
220
|
+
|
221
|
+
def test_get_font_colour_exception_handling():
|
222
|
+
"""Test _get_font_colour exception handling"""
|
223
|
+
errors = Errors()
|
224
|
+
|
225
|
+
# Create an item that will cause an exception when accessing font properties
|
226
|
+
item = Mock()
|
227
|
+
item.font = None # This will cause AttributeError when trying to access font.color
|
228
|
+
|
229
|
+
result = _get_font_colour(item, errors)
|
230
|
+
assert result is None
|
231
|
+
# Should have logged the exception
|
232
|
+
|
233
|
+
|
234
|
+
def test_get_highlight_color_exception_handling():
|
235
|
+
"""Test _get_highlight_color exception handling"""
|
236
|
+
errors = Errors()
|
237
|
+
|
238
|
+
# Create a run that will cause an exception when accessing highlight color
|
239
|
+
run = Mock(spec=Run)
|
240
|
+
run.font = None # This will cause AttributeError when trying to access font.highlight_color
|
241
|
+
|
242
|
+
result = _get_highlight_color(run, errors)
|
243
|
+
assert result is None
|
244
|
+
# Should have logged the exception
|
@@ -42,6 +42,62 @@ def test_current_section_with_sections(document, sample_section):
|
|
42
42
|
assert current == sample_section
|
43
43
|
|
44
44
|
|
45
|
+
def test_section_by_ordinal(document):
|
46
|
+
"""Test getting section by ordinal position"""
|
47
|
+
# Add some sections
|
48
|
+
section1 = RawSection("Section 1", "1", 1)
|
49
|
+
document.add(section1)
|
50
|
+
|
51
|
+
section2 = RawSection("Section 2", "2", 1)
|
52
|
+
document.add(section2)
|
53
|
+
|
54
|
+
# Test valid ordinals
|
55
|
+
assert document.section_by_ordinal(1) == document.sections[0] # Initial section
|
56
|
+
assert document.section_by_ordinal(2) == section1
|
57
|
+
assert document.section_by_ordinal(3) == section2
|
58
|
+
|
59
|
+
# Test invalid ordinals
|
60
|
+
assert document.section_by_ordinal(0) is None # Below range
|
61
|
+
assert document.section_by_ordinal(4) is None # Above range
|
62
|
+
assert document.section_by_ordinal(-1) is None # Negative
|
63
|
+
|
64
|
+
|
65
|
+
def test_section_by_number(document):
|
66
|
+
"""Test getting section by section number"""
|
67
|
+
# Add sections with specific numbers
|
68
|
+
section1 = RawSection("Section 1", "1", 1)
|
69
|
+
document.add(section1)
|
70
|
+
|
71
|
+
section2 = RawSection("Section 2", "2", 1)
|
72
|
+
document.add(section2)
|
73
|
+
|
74
|
+
# Test valid section numbers
|
75
|
+
assert document.section_by_number("1") == section1
|
76
|
+
assert document.section_by_number("2") == section2
|
77
|
+
|
78
|
+
# Test invalid section number
|
79
|
+
assert document.section_by_number("999") is None
|
80
|
+
assert document.section_by_number("nonexistent") is None
|
81
|
+
|
82
|
+
|
83
|
+
def test_section_by_title(document):
|
84
|
+
"""Test getting section by section title"""
|
85
|
+
# Add sections with specific titles
|
86
|
+
section1 = RawSection("First Section", "1", 1)
|
87
|
+
document.add(section1)
|
88
|
+
|
89
|
+
section2 = RawSection("Second Section", "2", 1)
|
90
|
+
document.add(section2)
|
91
|
+
|
92
|
+
# Test valid section titles
|
93
|
+
assert document.section_by_title("First Section") == section1
|
94
|
+
assert document.section_by_title("Second Section") == section2
|
95
|
+
|
96
|
+
# Test invalid section title
|
97
|
+
assert document.section_by_title("Nonexistent Section") is None
|
98
|
+
assert document.section_by_title("") is None
|
99
|
+
|
100
|
+
|
45
101
|
def test_to_dict(document):
|
46
102
|
"""Test converting document to dictionary"""
|
47
103
|
# Add sections with content
|
@@ -0,0 +1,628 @@
|
|
1
|
+
import pytest
|
2
|
+
import os
|
3
|
+
import tempfile
|
4
|
+
import zipfile
|
5
|
+
from unittest.mock import Mock, patch, PropertyMock
|
6
|
+
from pathlib import Path
|
7
|
+
from docx import Document as DocxDocument
|
8
|
+
from docx.shared import Inches
|
9
|
+
from docx.text.paragraph import Paragraph
|
10
|
+
from docx.table import Table, _Cell
|
11
|
+
from docx.oxml.text.paragraph import CT_P
|
12
|
+
from docx.oxml.table import CT_Tbl, CT_TcPr
|
13
|
+
from lxml import etree
|
14
|
+
import docx.parts.image
|
15
|
+
from src.raw_docx.raw_docx import RawDocx
|
16
|
+
from src.raw_docx.raw_document import RawDocument
|
17
|
+
|
18
|
+
|
19
|
+
@pytest.fixture
|
20
|
+
def raw_docx():
|
21
|
+
return RawDocx("tests/test_files/example_1.docx")
|
22
|
+
|
23
|
+
|
24
|
+
@pytest.fixture
|
25
|
+
def temp_docx(tmp_path):
|
26
|
+
"""Create a test docx file with various content"""
|
27
|
+
doc_path = tmp_path / "test.docx"
|
28
|
+
doc = DocxDocument()
|
29
|
+
|
30
|
+
# Add regular paragraph
|
31
|
+
doc.add_paragraph("Regular paragraph")
|
32
|
+
|
33
|
+
# Add list items using standard list style
|
34
|
+
doc.add_paragraph("First bullet point", style="List Bullet")
|
35
|
+
doc.add_paragraph("Second bullet point", style="List Bullet")
|
36
|
+
|
37
|
+
# Add table with merged cells
|
38
|
+
table = doc.add_table(rows=2, cols=2)
|
39
|
+
table.cell(0, 0).merge(table.cell(0, 1)) # Merge first row
|
40
|
+
table.cell(0, 0).text = "Merged cell"
|
41
|
+
table.cell(1, 0).text = "Cell 1"
|
42
|
+
table.cell(1, 1).text = "Cell 2"
|
43
|
+
|
44
|
+
# Add image if test file exists
|
45
|
+
image_path = os.path.join(os.path.dirname(__file__), "test_files", "test_image.png")
|
46
|
+
if os.path.exists(image_path):
|
47
|
+
doc.add_picture(image_path, width=Inches(1.0))
|
48
|
+
|
49
|
+
# Save the document
|
50
|
+
doc.save(doc_path)
|
51
|
+
return str(doc_path)
|
52
|
+
|
53
|
+
|
54
|
+
@pytest.fixture
|
55
|
+
def simple_docx(tmp_path):
|
56
|
+
"""Create a simple test docx file"""
|
57
|
+
doc_path = tmp_path / "simple_test.docx"
|
58
|
+
doc = DocxDocument()
|
59
|
+
doc.add_paragraph("Simple paragraph")
|
60
|
+
doc.save(doc_path)
|
61
|
+
return str(doc_path)
|
62
|
+
|
63
|
+
|
64
|
+
# Integration Tests
|
65
|
+
def test_to_dict_with_document(raw_docx):
|
66
|
+
"""Test converting RawDocx to dictionary with loaded document"""
|
67
|
+
result = raw_docx.to_dict()
|
68
|
+
assert result["type"] == "raw_docx"
|
69
|
+
assert result["document"] is not None
|
70
|
+
assert result["document"]["type"] == "document"
|
71
|
+
assert isinstance(result["document"]["sections"], list)
|
72
|
+
|
73
|
+
|
74
|
+
def test_initialization_and_processing(temp_docx):
|
75
|
+
"""Test document initialization and processing"""
|
76
|
+
from raw_docx.raw_document import RawDocument
|
77
|
+
docx = RawDocx(temp_docx)
|
78
|
+
assert os.path.exists(docx.image_path)
|
79
|
+
assert isinstance(docx.target_document, RawDocument)
|
80
|
+
assert len(docx.target_document.sections) > 0
|
81
|
+
|
82
|
+
|
83
|
+
def test_table_processing(temp_docx):
|
84
|
+
"""Test table processing with merged cells"""
|
85
|
+
docx = RawDocx(temp_docx)
|
86
|
+
tables = docx.target_document.sections[0].tables()
|
87
|
+
assert len(tables) > 0
|
88
|
+
|
89
|
+
# Check merged cells
|
90
|
+
first_table = tables[0]
|
91
|
+
first_row = first_table.rows[0]
|
92
|
+
assert len(first_row.cells) == 2
|
93
|
+
assert first_row.cells[0].h_span == 2 # Horizontally merged
|
94
|
+
assert first_row.cells[0].first is True
|
95
|
+
assert first_row.cells[1].first is False
|
96
|
+
|
97
|
+
|
98
|
+
def test_image_processing(temp_docx):
|
99
|
+
"""Test image extraction and processing"""
|
100
|
+
docx = RawDocx(temp_docx)
|
101
|
+
|
102
|
+
# Check if image directory was created
|
103
|
+
assert os.path.exists(docx.image_path)
|
104
|
+
|
105
|
+
# Check if image is referenced in document
|
106
|
+
image_path = os.path.join(os.path.dirname(__file__), "test_files", "test_image.png")
|
107
|
+
if os.path.exists(image_path):
|
108
|
+
found_image = False
|
109
|
+
for section in docx.target_document.sections:
|
110
|
+
for item in section.items:
|
111
|
+
if hasattr(item, "filepath") and item.filepath.endswith(
|
112
|
+
(".png", ".jpg", ".jpeg")
|
113
|
+
):
|
114
|
+
found_image = True
|
115
|
+
break
|
116
|
+
assert found_image, "Image not found in document"
|
117
|
+
|
118
|
+
|
119
|
+
def test_error_handling(tmp_path):
|
120
|
+
"""Test error handling for invalid files and directories"""
|
121
|
+
# Test with non-existent file
|
122
|
+
with pytest.raises(Exception):
|
123
|
+
RawDocx(str(tmp_path / "nonexistent.docx"))
|
124
|
+
|
125
|
+
# Test with invalid file format
|
126
|
+
invalid_file = tmp_path / "invalid.txt"
|
127
|
+
invalid_file.write_text("Not a docx file")
|
128
|
+
with pytest.raises(Exception):
|
129
|
+
RawDocx(str(invalid_file))
|
130
|
+
|
131
|
+
|
132
|
+
# Coverage Tests - Error Handling and Edge Cases
|
133
|
+
def test_organise_dir_permission_error(simple_docx):
|
134
|
+
"""Test _organise_dir with permission error"""
|
135
|
+
with patch('os.mkdir', side_effect=PermissionError("Permission denied")):
|
136
|
+
with patch.object(RawDocx, '_process'):
|
137
|
+
docx = RawDocx.__new__(RawDocx)
|
138
|
+
docx.errors = Mock()
|
139
|
+
docx.image_path = "/some/path"
|
140
|
+
docx._organise_dir()
|
141
|
+
docx.errors.exception.assert_called_once()
|
142
|
+
|
143
|
+
|
144
|
+
def test_process_exception_handling(simple_docx):
|
145
|
+
"""Test _process with exception in processing"""
|
146
|
+
with patch.object(RawDocx, '_organise_dir'):
|
147
|
+
with patch.object(RawDocx, '_process_images'):
|
148
|
+
docx = RawDocx.__new__(RawDocx)
|
149
|
+
docx.errors = Mock()
|
150
|
+
docx.target_document = Mock()
|
151
|
+
docx.target_document.current_section.return_value = Mock()
|
152
|
+
|
153
|
+
with patch.object(docx, '_iter_block_items', side_effect=Exception("Test error")):
|
154
|
+
docx._process()
|
155
|
+
docx.errors.exception.assert_called()
|
156
|
+
|
157
|
+
|
158
|
+
# Coverage Tests - _is_heading Method
|
159
|
+
def test_is_heading_comprehensive(simple_docx):
|
160
|
+
"""Test _is_heading method comprehensively"""
|
161
|
+
docx = RawDocx(simple_docx)
|
162
|
+
|
163
|
+
# Test valid patterns
|
164
|
+
assert docx._is_heading("Heading 1") == (True, 1)
|
165
|
+
assert docx._is_heading("Heading 2") == (True, 2)
|
166
|
+
assert docx._is_heading("heading 3") == (True, 3) # Case insensitive
|
167
|
+
assert docx._is_heading("HEADING 10") == (True, 10)
|
168
|
+
assert docx._is_heading("Some text with Heading 5 in it") == (True, 5)
|
169
|
+
assert docx._is_heading("Heading 7") == (True, 7) # Multiple spaces
|
170
|
+
|
171
|
+
# Test invalid patterns
|
172
|
+
assert docx._is_heading("Not a heading") == (False, 0)
|
173
|
+
assert docx._is_heading("Header 1") == (False, 0)
|
174
|
+
assert docx._is_heading("Heading") == (False, 0)
|
175
|
+
assert docx._is_heading("Heading abc") == (False, 0)
|
176
|
+
assert docx._is_heading("1 Heading") == (False, 0)
|
177
|
+
|
178
|
+
# Test edge cases
|
179
|
+
assert docx._is_heading("") == (False, 0)
|
180
|
+
assert docx._is_heading(None) == (False, 0)
|
181
|
+
|
182
|
+
|
183
|
+
# Coverage Tests - List Processing
|
184
|
+
def test_get_list_level_with_mock(simple_docx):
|
185
|
+
"""Test get_list_level with properly mocked paragraph"""
|
186
|
+
docx = RawDocx(simple_docx)
|
187
|
+
|
188
|
+
# Create mock paragraph with _p attribute
|
189
|
+
mock_paragraph = Mock()
|
190
|
+
mock_p = Mock()
|
191
|
+
mock_p.xpath.return_value = ['2']
|
192
|
+
mock_paragraph._p = mock_p
|
193
|
+
|
194
|
+
level = docx.get_list_level(mock_paragraph)
|
195
|
+
assert level == 2
|
196
|
+
|
197
|
+
# Test with no level
|
198
|
+
mock_p.xpath.return_value = []
|
199
|
+
level = docx.get_list_level(mock_paragraph)
|
200
|
+
assert level == 0
|
201
|
+
|
202
|
+
|
203
|
+
def test_is_list_comprehensive(simple_docx):
|
204
|
+
"""Test _is_list method comprehensively"""
|
205
|
+
docx = RawDocx(simple_docx)
|
206
|
+
|
207
|
+
# Test with xpath level
|
208
|
+
mock_paragraph = Mock()
|
209
|
+
mock_p = Mock()
|
210
|
+
mock_p.xpath.return_value = ['1']
|
211
|
+
mock_paragraph._p = mock_p
|
212
|
+
mock_paragraph.style = Mock()
|
213
|
+
mock_paragraph.style.name = "Normal"
|
214
|
+
mock_paragraph.text = "Regular text"
|
215
|
+
|
216
|
+
result = docx._is_list(mock_paragraph)
|
217
|
+
assert result is True
|
218
|
+
|
219
|
+
# Test with bullet styles
|
220
|
+
mock_p.xpath.return_value = []
|
221
|
+
mock_paragraph.style.name = "CPT_List Bullet"
|
222
|
+
result = docx._is_list(mock_paragraph)
|
223
|
+
assert result is True
|
224
|
+
|
225
|
+
mock_paragraph.style.name = "List Bullet"
|
226
|
+
result = docx._is_list(mock_paragraph)
|
227
|
+
assert result is True
|
228
|
+
|
229
|
+
# Test with bullet character
|
230
|
+
mock_paragraph.style.name = "Normal"
|
231
|
+
mock_paragraph.text = "• Bullet point"
|
232
|
+
result = docx._is_list(mock_paragraph)
|
233
|
+
assert result is True
|
234
|
+
|
235
|
+
# Test false cases
|
236
|
+
mock_paragraph.text = "Regular text"
|
237
|
+
result = docx._is_list(mock_paragraph)
|
238
|
+
assert result is False
|
239
|
+
|
240
|
+
mock_paragraph.text = ""
|
241
|
+
result = docx._is_list(mock_paragraph)
|
242
|
+
assert result is False
|
243
|
+
|
244
|
+
mock_paragraph.text = None
|
245
|
+
result = docx._is_list(mock_paragraph)
|
246
|
+
assert result is False
|
247
|
+
|
248
|
+
|
249
|
+
# Coverage Tests - Image Processing
|
250
|
+
def test_extract_images_with_media(simple_docx):
|
251
|
+
"""Test _extract_images when zipfile contains media files"""
|
252
|
+
docx = RawDocx.__new__(RawDocx)
|
253
|
+
docx.full_path = simple_docx
|
254
|
+
docx.image_path = "/tmp/test_images"
|
255
|
+
|
256
|
+
# Create mock zipfile with media files
|
257
|
+
mock_file = Mock()
|
258
|
+
mock_file.filename = "word/media/image1.png"
|
259
|
+
|
260
|
+
mock_archive = Mock()
|
261
|
+
mock_archive.filelist = [mock_file]
|
262
|
+
|
263
|
+
# Mock the context manager for archive.open
|
264
|
+
mock_source = Mock()
|
265
|
+
mock_source.read.return_value = b"fake image data"
|
266
|
+
mock_archive.open.return_value.__enter__ = Mock(return_value=mock_source)
|
267
|
+
mock_archive.open.return_value.__exit__ = Mock(return_value=None)
|
268
|
+
|
269
|
+
# Mock the file writing
|
270
|
+
mock_target = Mock()
|
271
|
+
mock_target.__enter__ = Mock(return_value=mock_target)
|
272
|
+
mock_target.__exit__ = Mock(return_value=None)
|
273
|
+
mock_target.write = Mock()
|
274
|
+
|
275
|
+
with patch('zipfile.ZipFile', return_value=mock_archive):
|
276
|
+
with patch('builtins.open', return_value=mock_target):
|
277
|
+
docx._extract_images()
|
278
|
+
# Should have tried to extract the image
|
279
|
+
mock_archive.open.assert_called()
|
280
|
+
|
281
|
+
|
282
|
+
# Coverage Tests - Utility Methods
|
283
|
+
def test_tree_method(simple_docx):
|
284
|
+
"""Test _tree method with nested children"""
|
285
|
+
docx = RawDocx(simple_docx)
|
286
|
+
|
287
|
+
# Create nested mock structure
|
288
|
+
grandchild = Mock()
|
289
|
+
grandchild.__iter__ = Mock(return_value=iter([]))
|
290
|
+
|
291
|
+
child = Mock()
|
292
|
+
child.__iter__ = Mock(return_value=iter([grandchild]))
|
293
|
+
|
294
|
+
root = Mock()
|
295
|
+
root.__iter__ = Mock(return_value=iter([child]))
|
296
|
+
|
297
|
+
# Should traverse the tree without errors
|
298
|
+
docx._tree(root)
|
299
|
+
|
300
|
+
|
301
|
+
def test_to_dict_edge_cases(simple_docx):
|
302
|
+
"""Test to_dict method edge cases"""
|
303
|
+
# Test without target_document attribute
|
304
|
+
docx = RawDocx.__new__(RawDocx)
|
305
|
+
result = docx.to_dict()
|
306
|
+
assert result == {"type": "raw_docx", "document": None}
|
307
|
+
|
308
|
+
# Test with target_document that doesn't have to_dict method
|
309
|
+
docx.target_document = "not a proper document"
|
310
|
+
result = docx.to_dict()
|
311
|
+
assert result == {"type": "raw_docx", "document": None}
|
312
|
+
|
313
|
+
# Test with proper target_document
|
314
|
+
docx = RawDocx(simple_docx)
|
315
|
+
result = docx.to_dict()
|
316
|
+
assert result["type"] == "raw_docx"
|
317
|
+
assert result["document"] is not None
|
318
|
+
assert isinstance(result["document"], dict)
|
319
|
+
|
320
|
+
|
321
|
+
def test_iter_block_items_string_handling(simple_docx):
|
322
|
+
"""Test _iter_block_items handles string children gracefully"""
|
323
|
+
docx = RawDocx(simple_docx)
|
324
|
+
|
325
|
+
# Mock iterchildren to return a string (should be handled gracefully)
|
326
|
+
with patch.object(docx.source_document.element.body, 'iterchildren', return_value=["string_child"]):
|
327
|
+
items = list(docx._iter_block_items(docx.source_document))
|
328
|
+
# Should handle gracefully and continue without error
|
329
|
+
|
330
|
+
|
331
|
+
def test_process_table_bottom_exception(simple_docx):
|
332
|
+
"""Test _process_table when accessing bottom raises exception"""
|
333
|
+
docx = RawDocx(simple_docx)
|
334
|
+
|
335
|
+
# Create a mock table structure
|
336
|
+
mock_table = Mock()
|
337
|
+
mock_row = Mock()
|
338
|
+
mock_cell = Mock()
|
339
|
+
mock_tc = Mock()
|
340
|
+
mock_tc.right = 2
|
341
|
+
mock_tc.left = 1
|
342
|
+
mock_tc.top = 1
|
343
|
+
|
344
|
+
# Make bottom property raise an exception
|
345
|
+
type(mock_tc).bottom = PropertyMock(side_effect=Exception("Bottom error"))
|
346
|
+
mock_cell._tc = mock_tc
|
347
|
+
mock_row.cells = [mock_cell]
|
348
|
+
mock_table.rows = [mock_row]
|
349
|
+
|
350
|
+
with patch.object(docx, '_iter_block_items', return_value=[]):
|
351
|
+
target_section = docx.target_document.current_section()
|
352
|
+
# Should handle the exception and set bottom = top + 1
|
353
|
+
docx._process_table(mock_table, target_section)
|
354
|
+
|
355
|
+
|
356
|
+
def test_logic_error_exception():
|
357
|
+
"""Test LogicError exception can be raised"""
|
358
|
+
with pytest.raises(RawDocx.LogicError):
|
359
|
+
raise RawDocx.LogicError("Test error")
|
360
|
+
|
361
|
+
|
362
|
+
# Additional Coverage Tests for Missing Lines
|
363
|
+
def test_process_with_unknown_block_item(simple_docx):
|
364
|
+
"""Test _process with unknown block item type (lines 66-67)"""
|
365
|
+
with patch.object(RawDocx, '_organise_dir'):
|
366
|
+
with patch.object(RawDocx, '_process_images'):
|
367
|
+
docx = RawDocx.__new__(RawDocx)
|
368
|
+
docx.errors = Mock()
|
369
|
+
docx.target_document = Mock()
|
370
|
+
docx.target_document.current_section.return_value = Mock()
|
371
|
+
|
372
|
+
# Mock _iter_block_items to return unknown type that will trigger warning and ValueError
|
373
|
+
unknown_item = 123 # Not a Paragraph or Table
|
374
|
+
with patch.object(docx, '_iter_block_items', return_value=[unknown_item]):
|
375
|
+
docx._process()
|
376
|
+
# Should catch the exception and log it
|
377
|
+
docx.errors.exception.assert_called()
|
378
|
+
|
379
|
+
|
380
|
+
|
381
|
+
|
382
|
+
def test_process_table_with_invalid_child_type(simple_docx):
|
383
|
+
"""Test _process_table with invalid child type (lines 153-163)"""
|
384
|
+
docx = RawDocx(simple_docx)
|
385
|
+
|
386
|
+
mock_table = Mock()
|
387
|
+
mock_row = Mock()
|
388
|
+
mock_cell = Mock()
|
389
|
+
mock_tc = Mock()
|
390
|
+
mock_tc.right = 2
|
391
|
+
mock_tc.left = 1
|
392
|
+
mock_tc.top = 1
|
393
|
+
mock_tc.bottom = 2
|
394
|
+
mock_cell._tc = mock_tc
|
395
|
+
mock_row.cells = [mock_cell]
|
396
|
+
mock_table.rows = [mock_row]
|
397
|
+
|
398
|
+
# Return invalid child type
|
399
|
+
invalid_child = "invalid_type"
|
400
|
+
|
401
|
+
with patch.object(docx, '_iter_block_items', return_value=[invalid_child]):
|
402
|
+
target_section = docx.target_document.current_section()
|
403
|
+
with pytest.raises(RawDocx.LogicError, match="something's not right with a child"):
|
404
|
+
docx._process_table(mock_table, target_section)
|
405
|
+
|
406
|
+
|
407
|
+
def test_process_paragraph_with_graphic_and_matching_rid(simple_docx):
|
408
|
+
"""Test _process_paragraph with graphic containing matching rId (lines 198-201)"""
|
409
|
+
docx = RawDocx(simple_docx)
|
410
|
+
|
411
|
+
# Create mock paragraph with graphic
|
412
|
+
mock_paragraph = Mock(spec=Paragraph)
|
413
|
+
mock_paragraph.style = Mock()
|
414
|
+
mock_paragraph.style.name = "Normal"
|
415
|
+
mock_paragraph._p = Mock()
|
416
|
+
mock_paragraph._p.xml = "Some xml content with rId1 reference and Graphic"
|
417
|
+
mock_paragraph.extract_runs.return_value = []
|
418
|
+
|
419
|
+
target_section = Mock()
|
420
|
+
image_rels = {"rId1": "/path/to/image.png"}
|
421
|
+
|
422
|
+
with patch.object(docx, '_is_heading', return_value=(False, 0)):
|
423
|
+
with patch.object(docx, '_is_list', return_value=False):
|
424
|
+
with patch('src.raw_docx.raw_docx.RawImage') as mock_image_class:
|
425
|
+
mock_image = Mock()
|
426
|
+
mock_image_class.return_value = mock_image
|
427
|
+
|
428
|
+
docx._process_paragraph(mock_paragraph, target_section, image_rels)
|
429
|
+
# Should have created and added image
|
430
|
+
mock_image_class.assert_called_with("/path/to/image.png", docx.errors)
|
431
|
+
target_section.add.assert_called_with(mock_image)
|
432
|
+
|
433
|
+
|
434
|
+
def test_is_heading_with_exception_handling(simple_docx):
|
435
|
+
"""Test _is_heading exception handling (line 230-231)"""
|
436
|
+
docx = RawDocx(simple_docx)
|
437
|
+
|
438
|
+
# Mock re.search to return a match but int() raises ValueError
|
439
|
+
with patch('src.raw_docx.raw_docx.re.search') as mock_search:
|
440
|
+
mock_match = Mock()
|
441
|
+
mock_match.group.side_effect = ValueError("Invalid conversion")
|
442
|
+
mock_search.return_value = mock_match
|
443
|
+
|
444
|
+
result = docx._is_heading("Heading 1")
|
445
|
+
# Should return (True, 0) when exception occurs during conversion
|
446
|
+
assert result == (True, 0)
|
447
|
+
|
448
|
+
|
449
|
+
def test_is_heading_with_index_error(simple_docx):
|
450
|
+
"""Test _is_heading with IndexError (line 230-231)"""
|
451
|
+
docx = RawDocx(simple_docx)
|
452
|
+
|
453
|
+
with patch('src.raw_docx.raw_docx.re.search') as mock_search:
|
454
|
+
mock_match = Mock()
|
455
|
+
mock_match.group.side_effect = IndexError("Index error")
|
456
|
+
mock_search.return_value = mock_match
|
457
|
+
|
458
|
+
result = docx._is_heading("Heading 1")
|
459
|
+
# Should return (True, 0) when IndexError occurs
|
460
|
+
assert result == (True, 0)
|
461
|
+
|
462
|
+
|
463
|
+
def test_process_images_with_image_part(simple_docx):
|
464
|
+
"""Test _process_images when document has image parts (line 76)"""
|
465
|
+
raw_docx_instance = RawDocx.__new__(RawDocx)
|
466
|
+
raw_docx_instance.errors = Mock()
|
467
|
+
raw_docx_instance.image_path = "/tmp/test"
|
468
|
+
raw_docx_instance.image_rels = {}
|
469
|
+
|
470
|
+
# Mock source document with image parts
|
471
|
+
mock_source_doc = Mock()
|
472
|
+
mock_part = Mock()
|
473
|
+
mock_rels = Mock()
|
474
|
+
|
475
|
+
# Create mock image part
|
476
|
+
import docx.parts.image
|
477
|
+
mock_image_part = Mock(spec=docx.parts.image.ImagePart)
|
478
|
+
mock_image_part.partname = "/word/media/image1.png"
|
479
|
+
|
480
|
+
mock_rel = Mock()
|
481
|
+
mock_rel._target = mock_image_part
|
482
|
+
mock_rel.rId = "rId1"
|
483
|
+
|
484
|
+
mock_rels.values.return_value = [mock_rel]
|
485
|
+
mock_part.rels = mock_rels
|
486
|
+
mock_source_doc.part = mock_part
|
487
|
+
raw_docx_instance.source_document = mock_source_doc
|
488
|
+
|
489
|
+
with patch.object(raw_docx_instance, '_extract_images'):
|
490
|
+
raw_docx_instance._process_images()
|
491
|
+
# Should have added image to image_rels
|
492
|
+
assert "rId1" in raw_docx_instance.image_rels
|
493
|
+
assert raw_docx_instance.image_rels["rId1"].endswith("image1.png")
|
494
|
+
|
495
|
+
|
496
|
+
def test_iter_block_items_invalid_parent(simple_docx):
|
497
|
+
"""Test _iter_block_items with invalid parent type (line 93)"""
|
498
|
+
docx = RawDocx(simple_docx)
|
499
|
+
|
500
|
+
# Test with invalid parent type
|
501
|
+
invalid_parent = "not_a_document_or_cell"
|
502
|
+
|
503
|
+
with pytest.raises(ValueError, match="something's not right with the parent"):
|
504
|
+
list(docx._iter_block_items(invalid_parent))
|
505
|
+
|
506
|
+
|
507
|
+
def test_iter_block_items_unknown_child_type(simple_docx):
|
508
|
+
"""Test _iter_block_items with unknown child type (line 117)"""
|
509
|
+
from docx.document import Document
|
510
|
+
docx = RawDocx(simple_docx)
|
511
|
+
|
512
|
+
# Mock document with unknown child type
|
513
|
+
mock_doc = Mock(spec=Document)
|
514
|
+
mock_body = Mock()
|
515
|
+
|
516
|
+
# Create unknown child type
|
517
|
+
unknown_child = Mock()
|
518
|
+
unknown_child.__class__ = type('UnknownType', (), {})
|
519
|
+
|
520
|
+
mock_body.iterchildren.return_value = [unknown_child]
|
521
|
+
mock_doc.element = Mock()
|
522
|
+
mock_doc.element.body = mock_body
|
523
|
+
|
524
|
+
with pytest.raises(ValueError, match="something's not right with a child"):
|
525
|
+
list(docx._iter_block_items(mock_doc))
|
526
|
+
|
527
|
+
|
528
|
+
def test_process_table_cell_none(simple_docx):
|
529
|
+
"""Test _process_table when cell._tc is None (lines 145-146)"""
|
530
|
+
docx = RawDocx(simple_docx)
|
531
|
+
|
532
|
+
# Create mock table with cell._tc = None
|
533
|
+
mock_table = Mock()
|
534
|
+
mock_row = Mock()
|
535
|
+
mock_cell = Mock()
|
536
|
+
mock_cell._tc = None # This triggers lines 145-146
|
537
|
+
# Need to mock the attributes that are accessed when _tc is None
|
538
|
+
mock_cell.top = 0
|
539
|
+
mock_cell.left = 0
|
540
|
+
mock_row.cells = [mock_cell]
|
541
|
+
mock_table.rows = [mock_row]
|
542
|
+
|
543
|
+
with patch.object(docx, '_iter_block_items', return_value=[]):
|
544
|
+
target_section = docx.target_document.current_section()
|
545
|
+
docx._process_table(mock_table, target_section)
|
546
|
+
# Should handle None _tc and set default spans
|
547
|
+
|
548
|
+
|
549
|
+
def test_process_table_nested_table(simple_docx):
|
550
|
+
"""Test _process_table with nested table (line 154)"""
|
551
|
+
docx = RawDocx(simple_docx)
|
552
|
+
|
553
|
+
# Create mock table structure
|
554
|
+
mock_table = Mock()
|
555
|
+
mock_row = Mock()
|
556
|
+
mock_cell = Mock()
|
557
|
+
mock_tc = Mock()
|
558
|
+
mock_tc.right = 2
|
559
|
+
mock_tc.left = 1
|
560
|
+
mock_tc.top = 1
|
561
|
+
mock_tc.bottom = 2
|
562
|
+
mock_cell._tc = mock_tc
|
563
|
+
mock_row.cells = [mock_cell]
|
564
|
+
mock_table.rows = [mock_row]
|
565
|
+
|
566
|
+
# Create nested table
|
567
|
+
nested_table = Mock(spec=Table)
|
568
|
+
|
569
|
+
with patch.object(docx, '_iter_block_items', return_value=[nested_table]):
|
570
|
+
target_section = docx.target_document.current_section()
|
571
|
+
with pytest.raises(RawDocx.LogicError, match="Table within table detected"):
|
572
|
+
docx._process_table(mock_table, target_section)
|
573
|
+
|
574
|
+
|
575
|
+
def test_process_table_with_etree_element_ct_tcpr(simple_docx):
|
576
|
+
"""Test _process_table with CT_TcPr etree element (lines 156-157)"""
|
577
|
+
docx = RawDocx(simple_docx)
|
578
|
+
|
579
|
+
# Create mock table structure
|
580
|
+
mock_table = Mock()
|
581
|
+
mock_row = Mock()
|
582
|
+
mock_cell = Mock()
|
583
|
+
mock_tc = Mock()
|
584
|
+
mock_tc.right = 2
|
585
|
+
mock_tc.left = 1
|
586
|
+
mock_tc.top = 1
|
587
|
+
mock_tc.bottom = 2
|
588
|
+
mock_cell._tc = mock_tc
|
589
|
+
mock_row.cells = [mock_cell]
|
590
|
+
mock_table.rows = [mock_row]
|
591
|
+
|
592
|
+
# Create etree element with CT_TcPr tag
|
593
|
+
etree_element = Mock(spec=etree._Element)
|
594
|
+
etree_element.tag = CT_TcPr
|
595
|
+
|
596
|
+
with patch.object(docx, '_iter_block_items', return_value=[etree_element]):
|
597
|
+
target_section = docx.target_document.current_section()
|
598
|
+
# Should pass through CT_TcPr elements without error
|
599
|
+
docx._process_table(mock_table, target_section)
|
600
|
+
|
601
|
+
|
602
|
+
def test_process_table_with_unknown_etree_element(simple_docx):
|
603
|
+
"""Test _process_table with unknown etree element (lines 158-159)"""
|
604
|
+
docx = RawDocx(simple_docx)
|
605
|
+
|
606
|
+
# Create mock table structure
|
607
|
+
mock_table = Mock()
|
608
|
+
mock_row = Mock()
|
609
|
+
mock_cell = Mock()
|
610
|
+
mock_tc = Mock()
|
611
|
+
mock_tc.right = 2
|
612
|
+
mock_tc.left = 1
|
613
|
+
mock_tc.top = 1
|
614
|
+
mock_tc.bottom = 2
|
615
|
+
mock_cell._tc = mock_tc
|
616
|
+
mock_row.cells = [mock_cell]
|
617
|
+
mock_table.rows = [mock_row]
|
618
|
+
|
619
|
+
# Create etree element with unknown tag
|
620
|
+
etree_element = Mock(spec=etree._Element)
|
621
|
+
etree_element.tag = "unknown_tag"
|
622
|
+
|
623
|
+
with patch.object(docx, '_iter_block_items', return_value=[etree_element]):
|
624
|
+
target_section = docx.target_document.current_section()
|
625
|
+
with patch.object(docx.errors, 'warning') as mock_warning:
|
626
|
+
docx._process_table(mock_table, target_section)
|
627
|
+
# Should log warning for unknown etree element
|
628
|
+
mock_warning.assert_called()
|
@@ -42,7 +42,32 @@ def test_to_text():
|
|
42
42
|
]
|
43
43
|
for item in items:
|
44
44
|
list.add(item)
|
45
|
-
|
45
|
+
# Test the RawList.to_text() method to cover lines 30-33
|
46
|
+
result = list.to_text()
|
47
|
+
expected_lines = [
|
48
|
+
" Item 1", # Level 1 item (2 spaces for level 1)
|
49
|
+
" Item 1.1", # Level 2 item (4 spaces for level 2)
|
50
|
+
" Item 2" # Level 1 item (2 spaces for level 1)
|
51
|
+
]
|
52
|
+
assert result == "\n".join(expected_lines)
|
53
|
+
|
54
|
+
|
55
|
+
def test_to_text_empty_list():
|
56
|
+
"""Test to_text method with empty list"""
|
57
|
+
errors = Errors()
|
58
|
+
list = RawList(errors, 0)
|
59
|
+
result = list.to_text()
|
60
|
+
assert result == ""
|
61
|
+
|
62
|
+
|
63
|
+
def test_to_text_single_item():
|
64
|
+
"""Test to_text method with single item"""
|
65
|
+
errors = Errors()
|
66
|
+
list = RawList(errors, 0)
|
67
|
+
item = RawListItem([RawRun("Single item", "", None, "Normal")], 0)
|
68
|
+
list.add(item)
|
69
|
+
result = list.to_text()
|
70
|
+
assert result == "Single item"
|
46
71
|
|
47
72
|
|
48
73
|
def test_add_multiple_items():
|
@@ -95,3 +95,40 @@ def test_empty_paragraph():
|
|
95
95
|
paragraph = RawParagraph([])
|
96
96
|
assert paragraph.text == ""
|
97
97
|
assert len(paragraph.runs) == 0
|
98
|
+
|
99
|
+
|
100
|
+
def test_add_span():
|
101
|
+
"""Test adding span to paragraph text"""
|
102
|
+
run = RawRun("Hello World", "", None, "Normal")
|
103
|
+
paragraph = RawParagraph([run])
|
104
|
+
|
105
|
+
# Add span to the beginning of "Hello"
|
106
|
+
paragraph.add_span("Hello", "highlight")
|
107
|
+
|
108
|
+
# The text should now have the span wrapped around "Hello"
|
109
|
+
expected_text = '<span class="highlight">Hello</span> World'
|
110
|
+
assert paragraph.text == expected_text
|
111
|
+
|
112
|
+
|
113
|
+
def test_add_span_partial_match():
|
114
|
+
"""Test adding span with partial text match"""
|
115
|
+
run = RawRun("Testing paragraph", "", None, "Normal")
|
116
|
+
paragraph = RawParagraph([run])
|
117
|
+
|
118
|
+
# Add span to "Test" at the beginning
|
119
|
+
paragraph.add_span("Test", "emphasis")
|
120
|
+
|
121
|
+
expected_text = '<span class="emphasis">Test</span>ing paragraph'
|
122
|
+
assert paragraph.text == expected_text
|
123
|
+
|
124
|
+
|
125
|
+
def test_add_span_exact_match():
|
126
|
+
"""Test adding span when text matches exactly"""
|
127
|
+
run = RawRun("Test", "", None, "Normal")
|
128
|
+
paragraph = RawParagraph([run])
|
129
|
+
|
130
|
+
# Add span to entire text
|
131
|
+
paragraph.add_span("Test", "bold")
|
132
|
+
|
133
|
+
expected_text = '<span class="bold">Test</span>'
|
134
|
+
assert paragraph.text == expected_text
|
@@ -1 +0,0 @@
|
|
1
|
-
__package_version__ = "0.7.0"
|
@@ -1,107 +0,0 @@
|
|
1
|
-
import pytest
|
2
|
-
import os
|
3
|
-
from docx import Document as DocxDocument
|
4
|
-
from docx.shared import Inches
|
5
|
-
from src.raw_docx.raw_docx import RawDocx
|
6
|
-
from src.raw_docx.raw_document import RawDocument
|
7
|
-
|
8
|
-
|
9
|
-
@pytest.fixture
|
10
|
-
def raw_docx():
|
11
|
-
return RawDocx("tests/test_files/example_1.docx")
|
12
|
-
|
13
|
-
|
14
|
-
@pytest.fixture
|
15
|
-
def temp_docx(tmp_path):
|
16
|
-
"""Create a test docx file with various content"""
|
17
|
-
doc_path = tmp_path / "test.docx"
|
18
|
-
doc = DocxDocument()
|
19
|
-
|
20
|
-
# Add regular paragraph
|
21
|
-
doc.add_paragraph("Regular paragraph")
|
22
|
-
|
23
|
-
# Add list items using standard list style
|
24
|
-
doc.add_paragraph("First bullet point", style="List Bullet")
|
25
|
-
doc.add_paragraph("Second bullet point", style="List Bullet")
|
26
|
-
|
27
|
-
# Add table with merged cells
|
28
|
-
table = doc.add_table(rows=2, cols=2)
|
29
|
-
table.cell(0, 0).merge(table.cell(0, 1)) # Merge first row
|
30
|
-
table.cell(0, 0).text = "Merged cell"
|
31
|
-
table.cell(1, 0).text = "Cell 1"
|
32
|
-
table.cell(1, 1).text = "Cell 2"
|
33
|
-
|
34
|
-
# Add image if test file exists
|
35
|
-
image_path = os.path.join(os.path.dirname(__file__), "test_files", "test_image.png")
|
36
|
-
if os.path.exists(image_path):
|
37
|
-
doc.add_picture(image_path, width=Inches(1.0))
|
38
|
-
|
39
|
-
# Save the document
|
40
|
-
doc.save(doc_path)
|
41
|
-
return str(doc_path)
|
42
|
-
|
43
|
-
|
44
|
-
def test_to_dict_with_document(raw_docx):
|
45
|
-
"""Test converting RawDocx to dictionary with loaded document"""
|
46
|
-
result = raw_docx.to_dict()
|
47
|
-
assert result["type"] == "raw_docx"
|
48
|
-
assert result["document"] is not None
|
49
|
-
assert result["document"]["type"] == "document"
|
50
|
-
assert isinstance(result["document"]["sections"], list)
|
51
|
-
|
52
|
-
|
53
|
-
def test_initialization_and_processing(temp_docx):
|
54
|
-
"""Test document initialization and processing"""
|
55
|
-
docx = RawDocx(temp_docx)
|
56
|
-
assert os.path.exists(docx.image_path)
|
57
|
-
assert isinstance(docx.target_document, RawDocument)
|
58
|
-
assert len(docx.target_document.sections) > 0
|
59
|
-
|
60
|
-
|
61
|
-
def test_table_processing(temp_docx):
|
62
|
-
"""Test table processing with merged cells"""
|
63
|
-
docx = RawDocx(temp_docx)
|
64
|
-
tables = docx.target_document.sections[0].tables()
|
65
|
-
assert len(tables) > 0
|
66
|
-
|
67
|
-
# Check merged cells
|
68
|
-
first_table = tables[0]
|
69
|
-
first_row = first_table.rows[0]
|
70
|
-
assert len(first_row.cells) == 2
|
71
|
-
assert first_row.cells[0].h_span == 2 # Horizontally merged
|
72
|
-
assert first_row.cells[0].first is True
|
73
|
-
assert first_row.cells[1].first is False
|
74
|
-
|
75
|
-
|
76
|
-
def test_image_processing(temp_docx):
|
77
|
-
"""Test image extraction and processing"""
|
78
|
-
docx = RawDocx(temp_docx)
|
79
|
-
|
80
|
-
# Check if image directory was created
|
81
|
-
assert os.path.exists(docx.image_path)
|
82
|
-
|
83
|
-
# Check if image is referenced in document
|
84
|
-
image_path = os.path.join(os.path.dirname(__file__), "test_files", "test_image.png")
|
85
|
-
if os.path.exists(image_path):
|
86
|
-
found_image = False
|
87
|
-
for section in docx.target_document.sections:
|
88
|
-
for item in section.items:
|
89
|
-
if hasattr(item, "filepath") and item.filepath.endswith(
|
90
|
-
(".png", ".jpg", ".jpeg")
|
91
|
-
):
|
92
|
-
found_image = True
|
93
|
-
break
|
94
|
-
assert found_image, "Image not found in document"
|
95
|
-
|
96
|
-
|
97
|
-
def test_error_handling(tmp_path):
|
98
|
-
"""Test error handling for invalid files and directories"""
|
99
|
-
# Test with non-existent file
|
100
|
-
with pytest.raises(Exception):
|
101
|
-
RawDocx(str(tmp_path / "nonexistent.docx"))
|
102
|
-
|
103
|
-
# Test with invalid file format
|
104
|
-
invalid_file = tmp_path / "invalid.txt"
|
105
|
-
invalid_file.write_text("Not a docx file")
|
106
|
-
with pytest.raises(Exception):
|
107
|
-
RawDocx(str(invalid_file))
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|