raw-docx 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- raw_docx/__info__.py +1 -1
- raw_docx/raw_document.py +1 -1
- raw_docx/raw_docx.py +23 -11
- {raw_docx-0.7.0.dist-info → raw_docx-0.8.0.dist-info}/METADATA +4 -4
- {raw_docx-0.7.0.dist-info → raw_docx-0.8.0.dist-info}/RECORD +8 -8
- {raw_docx-0.7.0.dist-info → raw_docx-0.8.0.dist-info}/WHEEL +0 -0
- {raw_docx-0.7.0.dist-info → raw_docx-0.8.0.dist-info}/licenses/LICENSE +0 -0
- {raw_docx-0.7.0.dist-info → raw_docx-0.8.0.dist-info}/top_level.txt +0 -0
raw_docx/__info__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__package_version__ = "0.
|
1
|
+
__package_version__ = "0.8.0"
|
raw_docx/raw_document.py
CHANGED
raw_docx/raw_docx.py
CHANGED
@@ -144,7 +144,10 @@ class RawDocx:
|
|
144
144
|
else:
|
145
145
|
h_span = 1
|
146
146
|
v_span = 1
|
147
|
-
|
147
|
+
if cell._tc is not None:
|
148
|
+
first = r_index == cell._tc.top and c_index == cell._tc.left
|
149
|
+
else:
|
150
|
+
first = r_index == 0 and c_index == 0
|
148
151
|
target_cell = RawTableCell(h_span, v_span, first)
|
149
152
|
target_row.add(target_cell)
|
150
153
|
for block_item in self._iter_block_items(cell):
|
@@ -207,18 +210,27 @@ class RawDocx:
|
|
207
210
|
list_level = paragraph._p.xpath("./w:pPr/w:numPr/w:ilvl/@w:val")
|
208
211
|
return int(str(list_level[0])) if list_level else 0
|
209
212
|
|
210
|
-
def _is_heading(self, text):
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
213
|
+
def _is_heading(self, text) -> tuple[bool, int]:
|
214
|
+
"""
|
215
|
+
Extract heading level from text containing "Heading <N>" pattern.
|
216
|
+
|
217
|
+
Args:
|
218
|
+
text: Text to analyze for heading pattern
|
219
|
+
|
220
|
+
Returns:
|
221
|
+
tuple[bool, int]: (success, level) where success indicates if heading
|
222
|
+
pattern was found and level is the extracted integer value
|
223
|
+
"""
|
224
|
+
if not text:
|
225
|
+
return False, 0
|
226
|
+
|
227
|
+
# Look for "Heading <N>" pattern where <N> is one or more digits
|
228
|
+
match = re.search(r"Heading\s+(\d+)", text, re.IGNORECASE)
|
229
|
+
if match:
|
218
230
|
try:
|
219
|
-
level = int(
|
231
|
+
level = int(match.group(1))
|
220
232
|
return True, level
|
221
|
-
except
|
233
|
+
except (ValueError, IndexError):
|
222
234
|
return True, 0
|
223
235
|
return False, 0
|
224
236
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: raw_docx
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.0
|
4
4
|
Summary: A package for processing and analyzing raw document formats
|
5
5
|
Home-page: https://github.com/daveih/raw_docx
|
6
6
|
Author: Dave Iberson-Hurst
|
@@ -17,8 +17,8 @@ Classifier: Programming Language :: Python :: 3.11
|
|
17
17
|
Requires-Python: >=3.8
|
18
18
|
Description-Content-Type: text/markdown
|
19
19
|
License-File: LICENSE
|
20
|
-
Requires-Dist: python-docx
|
21
|
-
Requires-Dist: simple_error_log
|
20
|
+
Requires-Dist: python-docx==1.1.2
|
21
|
+
Requires-Dist: simple_error_log==0.6.0
|
22
22
|
Dynamic: author
|
23
23
|
Dynamic: classifier
|
24
24
|
Dynamic: description
|
@@ -38,4 +38,4 @@ Simple package to build on top of python-docx to assist in the handling of word
|
|
38
38
|
Build as a normal package
|
39
39
|
|
40
40
|
- Build with `python3 -m build --sdist --wheel`
|
41
|
-
- Upload to pypi.org using `twine upload dist
|
41
|
+
- Upload to pypi.org using `twine upload dist/*`
|
@@ -1,7 +1,7 @@
|
|
1
|
-
raw_docx/__info__.py,sha256=
|
1
|
+
raw_docx/__info__.py,sha256=onU36pd8pPYPNlWn6QKkq5qJkOLC2_M4_UeggrLuh-A,30
|
2
2
|
raw_docx/__init__.py,sha256=FE5cpoCK1EVhpz3LiOOs43l027PcuJN5RljdW0UWON0,591
|
3
|
-
raw_docx/raw_document.py,sha256=
|
4
|
-
raw_docx/raw_docx.py,sha256=
|
3
|
+
raw_docx/raw_document.py,sha256=hUrnf6QZs9-yysnz1UmYZCYvhqdyPi3v2i-t5mu5KsI,2340
|
4
|
+
raw_docx/raw_docx.py,sha256=huZzOyfzkhAILa6MurNO6qpye4gy39FljT9m40rrAX4,10850
|
5
5
|
raw_docx/raw_image.py,sha256=IUUETwW73-guaa_v-cHpfw0_z69u9wfvEk7adm9hHJQ,1506
|
6
6
|
raw_docx/raw_list.py,sha256=bhssQX_oVf8uBmUbcrCIzIJ8pCvdEtdHOAQBNH0EEQQ,2282
|
7
7
|
raw_docx/raw_list_item.py,sha256=4Mn3rmnpXppJGAxk-9StLD60wszk5igg-TIbBz8sKW4,623
|
@@ -13,8 +13,8 @@ raw_docx/raw_table_cell.py,sha256=pXe7FCfEmbqdktBGfkDrvMsbEie8FaGNQbRl_ooms0Q,18
|
|
13
13
|
raw_docx/raw_table_row.py,sha256=m8SoLyVlKLjd_Vqa_U79A2wi8Wout8spgyusqJm79Kc,1297
|
14
14
|
raw_docx/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
15
|
raw_docx/docx/docx_paragraph.py,sha256=DPFzCG26y-6teL3KDnC_Ihmbs48OsHfD4fCD5Tj1O4A,2938
|
16
|
-
raw_docx-0.
|
17
|
-
raw_docx-0.
|
18
|
-
raw_docx-0.
|
19
|
-
raw_docx-0.
|
20
|
-
raw_docx-0.
|
16
|
+
raw_docx-0.8.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
17
|
+
raw_docx-0.8.0.dist-info/METADATA,sha256=jf9pYmWbkI5JKYuucP9mps0Pp9CXgZsywPLHTEAv-L0,1237
|
18
|
+
raw_docx-0.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
19
|
+
raw_docx-0.8.0.dist-info/top_level.txt,sha256=Xl3dspPM9DBVj8clfdkHG7N4nNjNXeUmB4HcXAwOe60,9
|
20
|
+
raw_docx-0.8.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|