raw-docx 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
raw_docx/__info__.py CHANGED
@@ -1 +1 @@
1
- __package_version__ = "0.7.0"
1
+ __package_version__ = "0.8.0"
raw_docx/raw_document.py CHANGED
@@ -22,7 +22,7 @@ class RawDocument:
22
22
  return self.sections[-1]
23
23
 
24
24
  def section_by_ordinal(self, ordinal: int) -> RawSection:
25
- if 1 >= ordinal <= len(self.sections):
25
+ if 1 <= ordinal <= len(self.sections):
26
26
  return self.sections[ordinal - 1]
27
27
  else:
28
28
  return None
raw_docx/raw_docx.py CHANGED
@@ -144,7 +144,10 @@ class RawDocx:
144
144
  else:
145
145
  h_span = 1
146
146
  v_span = 1
147
- first = r_index == cell._tc.top and c_index == cell._tc.left
147
+ if cell._tc is not None:
148
+ first = r_index == cell._tc.top and c_index == cell._tc.left
149
+ else:
150
+ first = r_index == 0 and c_index == 0
148
151
  target_cell = RawTableCell(h_span, v_span, first)
149
152
  target_row.add(target_cell)
150
153
  for block_item in self._iter_block_items(cell):
@@ -207,18 +210,27 @@ class RawDocx:
207
210
  list_level = paragraph._p.xpath("./w:pPr/w:numPr/w:ilvl/@w:val")
208
211
  return int(str(list_level[0])) if list_level else 0
209
212
 
210
- def _is_heading(self, text):
211
- if re.match(r"^\d\dHeading \d", text):
212
- try:
213
- level = int(text[0:2])
214
- return True, level
215
- except Exception:
216
- return True, 0
217
- if re.match(r"^Heading \d", text):
213
+ def _is_heading(self, text) -> tuple[bool, int]:
214
+ """
215
+ Extract heading level from text containing "Heading <N>" pattern.
216
+
217
+ Args:
218
+ text: Text to analyze for heading pattern
219
+
220
+ Returns:
221
+ tuple[bool, int]: (success, level) where success indicates if heading
222
+ pattern was found and level is the extracted integer value
223
+ """
224
+ if not text:
225
+ return False, 0
226
+
227
+ # Look for "Heading <N>" pattern where <N> is one or more digits
228
+ match = re.search(r"Heading\s+(\d+)", text, re.IGNORECASE)
229
+ if match:
218
230
  try:
219
- level = int(text[8])
231
+ level = int(match.group(1))
220
232
  return True, level
221
- except Exception:
233
+ except (ValueError, IndexError):
222
234
  return True, 0
223
235
  return False, 0
224
236
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: raw_docx
3
- Version: 0.7.0
3
+ Version: 0.8.0
4
4
  Summary: A package for processing and analyzing raw document formats
5
5
  Home-page: https://github.com/daveih/raw_docx
6
6
  Author: Dave Iberson-Hurst
@@ -17,8 +17,8 @@ Classifier: Programming Language :: Python :: 3.11
17
17
  Requires-Python: >=3.8
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
- Requires-Dist: python-docx
21
- Requires-Dist: simple_error_log
20
+ Requires-Dist: python-docx==1.1.2
21
+ Requires-Dist: simple_error_log==0.6.0
22
22
  Dynamic: author
23
23
  Dynamic: classifier
24
24
  Dynamic: description
@@ -38,4 +38,4 @@ Simple package to build on top of python-docx to assist in the handling of word
38
38
  Build as a normal package
39
39
 
40
40
  - Build with `python3 -m build --sdist --wheel`
41
- - Upload to pypi.org using `twine upload dist/* `
41
+ - Upload to pypi.org using `twine upload dist/*`
@@ -1,7 +1,7 @@
1
- raw_docx/__info__.py,sha256=JyImHLdD9tOVOKODJRy37_fBqVJ9Nup6yLR4gOOCtz8,30
1
+ raw_docx/__info__.py,sha256=onU36pd8pPYPNlWn6QKkq5qJkOLC2_M4_UeggrLuh-A,30
2
2
  raw_docx/__init__.py,sha256=FE5cpoCK1EVhpz3LiOOs43l027PcuJN5RljdW0UWON0,591
3
- raw_docx/raw_document.py,sha256=VLx0-Z9jGwdYHMU227AKaT8UDRY_OHD7b2BRuw71x6M,2340
4
- raw_docx/raw_docx.py,sha256=VhmwkP1kO5Bjr1WnfCyJE6JL728ZEHvStlQ78VuFFxQ,10379
3
+ raw_docx/raw_document.py,sha256=hUrnf6QZs9-yysnz1UmYZCYvhqdyPi3v2i-t5mu5KsI,2340
4
+ raw_docx/raw_docx.py,sha256=huZzOyfzkhAILa6MurNO6qpye4gy39FljT9m40rrAX4,10850
5
5
  raw_docx/raw_image.py,sha256=IUUETwW73-guaa_v-cHpfw0_z69u9wfvEk7adm9hHJQ,1506
6
6
  raw_docx/raw_list.py,sha256=bhssQX_oVf8uBmUbcrCIzIJ8pCvdEtdHOAQBNH0EEQQ,2282
7
7
  raw_docx/raw_list_item.py,sha256=4Mn3rmnpXppJGAxk-9StLD60wszk5igg-TIbBz8sKW4,623
@@ -13,8 +13,8 @@ raw_docx/raw_table_cell.py,sha256=pXe7FCfEmbqdktBGfkDrvMsbEie8FaGNQbRl_ooms0Q,18
13
13
  raw_docx/raw_table_row.py,sha256=m8SoLyVlKLjd_Vqa_U79A2wi8Wout8spgyusqJm79Kc,1297
14
14
  raw_docx/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  raw_docx/docx/docx_paragraph.py,sha256=DPFzCG26y-6teL3KDnC_Ihmbs48OsHfD4fCD5Tj1O4A,2938
16
- raw_docx-0.7.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
17
- raw_docx-0.7.0.dist-info/METADATA,sha256=aNG1bN6ZnwCWqRv0y2x0-vNhmolaJb8qm04N-gtVvQI,1224
18
- raw_docx-0.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
- raw_docx-0.7.0.dist-info/top_level.txt,sha256=Xl3dspPM9DBVj8clfdkHG7N4nNjNXeUmB4HcXAwOe60,9
20
- raw_docx-0.7.0.dist-info/RECORD,,
16
+ raw_docx-0.8.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
17
+ raw_docx-0.8.0.dist-info/METADATA,sha256=jf9pYmWbkI5JKYuucP9mps0Pp9CXgZsywPLHTEAv-L0,1237
18
+ raw_docx-0.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
+ raw_docx-0.8.0.dist-info/top_level.txt,sha256=Xl3dspPM9DBVj8clfdkHG7N4nNjNXeUmB4HcXAwOe60,9
20
+ raw_docx-0.8.0.dist-info/RECORD,,