raw-docx 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
raw_docx/__info__.py CHANGED
@@ -1 +1 @@
1
- __package_version__ = "0.9.1"
1
+ __package_version__ = "0.10.0"
@@ -1,51 +1,193 @@
1
+ import re
1
2
  from docx.text.paragraph import Paragraph
2
3
  from docx.styles.style import ParagraphStyle
3
4
  from docx.text.run import Run
4
5
  from simple_error_log import Errors
6
+ from simple_error_log.error_location import KlassMethodLocation
5
7
  from raw_docx.raw_run import RawRun
8
+ from raw_docx.raw_simple_field import RawSimpleField
9
+ from raw_docx.raw_bookmark_start import RawBookmarkStart
10
+ from raw_docx.raw_bookmark_end import RawBookmarkEnd
11
+ from docx.oxml.text.run import CT_R
12
+ from lxml import etree
13
+
14
+
15
+ BOOKMARK_START = (
16
+ "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}bookmarkStart"
17
+ )
18
+ BOOKMARK_END = (
19
+ "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}bookmarkEnd"
20
+ )
21
+ FIELD_SIMPLE = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}fldSimple"
22
+ FIELD_CHAR = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}fldChar"
23
+ INSTRUCTION_TEXT = (
24
+ "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}instrText"
25
+ )
26
+
27
+ PARA_PROPERTIES = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}pPr"
28
+ HYPERLINK = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}hyperlink"
29
+ PROOF_ERROR = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}proofErr"
30
+
31
+ ELEMENT_IGNORE_TAGS = [PARA_PROPERTIES, HYPERLINK, PROOF_ERROR]
32
+ SIMPLE_FIELD_IGNORE_TAGS = [BOOKMARK_START, BOOKMARK_END]
33
+
34
+ ID_ATTRIBUTE = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}id"
35
+ NAME_ATTRIBUTE = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}name"
36
+ FIELD_CHAR_TYPE_ATTRIBUTE = (
37
+ "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}fldCharType"
38
+ )
39
+ INSTR_ATTRIBUTE = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}instr"
40
+
41
+ MODULE = "raw_docx.docx.docx_paragraph"
6
42
 
7
43
 
8
44
  def install():
9
- setattr(Paragraph, "extract_runs", extract_runs)
10
-
11
-
12
- def extract_runs(paragraph: Paragraph, errors: Errors) -> list[RawRun]:
13
- if paragraph.text.startswith(
14
- "This template is intended for interventional clinical trials. The template is suitable"
15
- ):
16
- errors.info(f"Paragraph style {paragraph.style.name}")
17
- data = [
18
- {
19
- "text": run.text,
20
- "color": _get_run_color(paragraph.style, run, errors),
21
- "highlight": _get_highlight_color(run, errors),
22
- "keep": True,
23
- # "style": run.style.name if run.style else paragraph.style.name
24
- "style": paragraph.style.name,
25
- }
26
- for run in paragraph.runs
27
- ]
28
- data = _tidy_runs_color(data, errors)
29
- return [RawRun(x["text"], x["color"], x["highlight"], x["style"]) for x in data]
45
+ setattr(Paragraph, "extract_content", extract_content)
46
+
47
+
48
+ def extract_content(
49
+ paragraph: Paragraph, errors: Errors
50
+ ) -> list[RawRun | RawBookmarkStart | RawBookmarkEnd | RawSimpleField]:
51
+ return process_element(paragraph._element, paragraph, errors)
52
+
53
+
54
+ def process_element(
55
+ element: etree._Element | CT_R, parent: Paragraph, errors: Errors
56
+ ) -> list[RawRun | RawBookmarkStart | RawBookmarkEnd | RawSimpleField]:
57
+ data = []
58
+ bookmark = None
59
+ bookmark_id = None
60
+ for child in element:
61
+ # print(
62
+ # f"XML: {child.xml if isinstance(child, CT_R) else etree.tostring(child, encoding='unicode', pretty_print=True)}, type{type(child)}"
63
+ # )
64
+ if isinstance(child, CT_R):
65
+ run = build_run(child, parent, errors)
66
+ data.append(run)
67
+ elif child.tag == BOOKMARK_START:
68
+ if bookmark:
69
+ del data[bookmark]
70
+ id = child.get(ID_ATTRIBUTE)
71
+ name = child.get(NAME_ATTRIBUTE)
72
+ data.append(RawBookmarkStart(id, name))
73
+ bookmark_id = id
74
+ bookmark = len(data) - 1
75
+ elif child.tag == BOOKMARK_END:
76
+ id = child.get(ID_ATTRIBUTE)
77
+ if id == bookmark_id:
78
+ name = child.get(NAME_ATTRIBUTE)
79
+ data.append(RawBookmarkEnd(id, name))
80
+ bookmark = None
81
+ bookmark_id = None
82
+ elif child.tag == FIELD_SIMPLE:
83
+ bookmark_id = _extract_instruction_id(child.get(INSTR_ATTRIBUTE))
84
+ items = process_simple_field(child, parent, errors)
85
+ data.append(RawSimpleField(bookmark_id, create_runs(items, errors)))
86
+ elif child.tag in ELEMENT_IGNORE_TAGS:
87
+ pass
88
+ else:
89
+ errors.warning(
90
+ f"Element other instance/tag detected:: '{child.tag}'",
91
+ KlassMethodLocation(MODULE, "process_element"),
92
+ )
93
+ return create_runs(data, errors)
30
94
 
31
95
 
32
- def _tidy_runs_color(data: list[dict], errors: Errors) -> list[dict]:
96
+ def process_simple_field(element, parent: Paragraph, errors: Errors) -> list[RawRun]:
97
+ data = []
98
+ for child in element:
99
+ # print(F"CHILD: {child}, type{type(child)}")
100
+ if isinstance(child, CT_R):
101
+ run = build_run(child, parent, errors)
102
+ data.append(run)
103
+ elif child.tag in SIMPLE_FIELD_IGNORE_TAGS:
104
+ pass
105
+ else:
106
+ errors.warning(
107
+ f"Simple field other instance/tag detected: '{child.tag}'",
108
+ KlassMethodLocation(MODULE, "process_element"),
109
+ )
110
+ return data
111
+
112
+
113
+ def _extract_instruction_id(text: str) -> str:
114
+ pattern = r"_TN[A-F0-9]+"
115
+ match = re.search(pattern, text, re.IGNORECASE)
116
+ return match.group(0) if match else ""
117
+
118
+
119
+ def create_runs(data: list[dict], errors: Errors) -> list[RawRun]:
120
+ data = _tidy_runs(data, errors)
121
+ results = []
122
+ for x in data:
123
+ if isinstance(x, dict):
124
+ results.append(
125
+ RawRun(
126
+ x["text"],
127
+ x["color"],
128
+ x["highlight"],
129
+ x["style"],
130
+ x["superscript"],
131
+ x["subscript"],
132
+ x["field_char_type"],
133
+ x["instruction"],
134
+ )
135
+ )
136
+ else:
137
+ results.append(x)
138
+ return results
139
+
140
+
141
+ def build_run(element, paragraph: Paragraph, errors: Errors) -> RawRun:
142
+ run = Run(element, paragraph)
143
+ field_char_type = None
144
+ instruction = None
145
+ for child in element:
146
+ if child.tag == FIELD_CHAR:
147
+ field_char_type = child.get(FIELD_CHAR_TYPE_ATTRIBUTE)
148
+ elif child.tag == INSTRUCTION_TEXT:
149
+ instruction = _extract_instruction_id(child.text)
150
+ return {
151
+ "text": run.text,
152
+ "color": _get_run_color(paragraph.style, run, errors),
153
+ "highlight": _get_highlight_color(run, errors),
154
+ "keep": True,
155
+ "style": paragraph.style.name,
156
+ "subscript": run.font.subscript,
157
+ "superscript": run.font.superscript,
158
+ "field_char_type": field_char_type,
159
+ "instruction": instruction,
160
+ }
161
+
162
+
163
+ def _tidy_runs(data: list, errors: Errors) -> list:
33
164
  more = False
165
+ # print(f"TIDY IN: {data}")
34
166
  for index, run in enumerate(data):
35
- if (
36
- index > 0
37
- and run["color"] == data[index - 1]["color"]
38
- and run["highlight"] == data[index - 1]["highlight"]
39
- ):
40
- run["text"] = data[index - 1]["text"] + run["text"]
41
- data[index - 1]["keep"] = False
42
- more = True
43
- new_data = [x for x in data if x["keep"]]
167
+ if index > 0 and isinstance(run, dict) and isinstance(data[index - 1], dict):
168
+ # print(f"A={run}, B={data[index - 1]}")
169
+ if _equal_with_ignore(run, data[index - 1], ["text", "keep"]):
170
+ run["text"] = data[index - 1]["text"] + run["text"]
171
+ data[index - 1]["keep"] = False
172
+ more = True
173
+ new_data = [
174
+ x
175
+ for x in data
176
+ if (isinstance(x, dict) and x["keep"]) or (not isinstance(x, dict))
177
+ ]
44
178
  if more:
45
- new_data = _tidy_runs_color(new_data, errors)
179
+ new_data = _tidy_runs(new_data, errors)
180
+ # print(f"TIDY OUT: {new_data}")
46
181
  return new_data
47
182
 
48
183
 
184
+ def _equal_with_ignore(a: dict, b: dict, ignore_keys: list) -> bool:
185
+ # print(f"A={a}, B={b}")
186
+ return {k: v for k, v in a.items() if k not in ignore_keys} == {
187
+ k: v for k, v in b.items() if k not in ignore_keys
188
+ }
189
+
190
+
49
191
  def _get_run_color(paragraph: Paragraph, run: Run, errors: Errors) -> str | None:
50
192
  paragraph_color = _get_font_colour(paragraph, errors)
51
193
  font_color = _get_font_colour(run, errors)
@@ -0,0 +1,19 @@
1
+ class RawBookmarkEnd:
2
+ def __init__(self, id: str, name: str):
3
+ self.id = id
4
+ self.name = name
5
+
6
+ @property
7
+ def text(self) -> str:
8
+ return ""
9
+
10
+ def to_html(self) -> str:
11
+ return ""
12
+
13
+ def to_dict(self) -> dict:
14
+ """Convert the paragraph to a dictionary representation"""
15
+ return {
16
+ "type": "bookmark_end",
17
+ "id": self.id,
18
+ "name": self.name,
19
+ }
@@ -0,0 +1,20 @@
1
+ # Note: Bookmark is a target, the desination of any link
2
+ class RawBookmarkStart:
3
+ def __init__(self, id: str, name: str):
4
+ self.id = id
5
+ self.name = name
6
+
7
+ @property
8
+ def text(self) -> str:
9
+ return ""
10
+
11
+ def to_html(self) -> str:
12
+ return f'<span id="{self.name}"></span>'
13
+
14
+ def to_dict(self) -> dict:
15
+ """Convert the paragraph to a dictionary representation"""
16
+ return {
17
+ "type": "bookmark_start",
18
+ "id": self.id,
19
+ "name": self.name,
20
+ }
raw_docx/raw_document.py CHANGED
@@ -62,3 +62,7 @@ class RawDocument:
62
62
  for title, section in self._section_title_mapping.items()
63
63
  },
64
64
  }
65
+
66
+ def to_html(self) -> str:
67
+ sections = [section.to_html() for section in self.sections]
68
+ return ("").join(sections)
raw_docx/raw_docx.py CHANGED
@@ -76,7 +76,8 @@ class RawDocx:
76
76
  self._process_table(block_item, target_section)
77
77
  else:
78
78
  self._errors.warning(
79
- "Ignoring element", KlassMethodLocation(self.MODULE, "_process")
79
+ f"Ignoring element {block_item}",
80
+ KlassMethodLocation(self.MODULE, "_process"),
80
81
  )
81
82
  raise ValueError
82
83
  except Exception as e:
@@ -133,17 +134,13 @@ class RawDocx:
133
134
  pass
134
135
  else:
135
136
  self._errors.warning(
136
- f"Ignoring eTree element {self._tree(child)}",
137
+ f"Ignoring eTree element '{child.tag}'",
137
138
  KlassMethodLocation(self.MODULE, "_iter_block_items"),
138
139
  )
139
140
 
140
141
  else:
141
142
  raise ValueError(f"something's not right with a child {type(child)}")
142
143
 
143
- def _tree(self, node, tab=1):
144
- for child in node:
145
- self._tree(child, tab + 1)
146
-
147
144
  def _process_table(self, table, target: RawSection | RawTableCell):
148
145
  target_table = RawTable()
149
146
  target.add(target_table)
@@ -168,7 +165,7 @@ class RawDocx:
168
165
  pass
169
166
  else:
170
167
  self._errors.warning(
171
- f"Ignoring eTree element {block_item.tag}",
168
+ f"Ignoring eTree element '{block_item.tag}'",
172
169
  KlassMethodLocation(self.MODULE, "_process_table"),
173
170
  )
174
171
  else:
@@ -176,63 +173,10 @@ class RawDocx:
176
173
  f"Something's not right with a child {type(block_item)}"
177
174
  )
178
175
 
179
- # def _process_table(self, table, target: RawSection | RawTableCell):
180
- # target_table = RawTable()
181
- # target.add(target_table)
182
- # for r_index, row in enumerate(table.rows):
183
- # target_row = RawTableRow()
184
- # target_table.add(target_row)
185
- # cells = row.cells
186
- # for c_index, cell in enumerate(cells):
187
- # if cell._tc is not None:
188
- # x = cell._tc
189
- # right = x.right
190
- # left = x.left
191
- # top = x.top
192
- # try:
193
- # # Bottom method seems to have a bug.
194
- # # See https://github.com/python-openxml/python-docx/issues/1433
195
- # bottom = x.bottom
196
- # except Exception as e:
197
- # self._errors.exception(
198
- # f"Row span exception! {x.xml}",
199
- # e,
200
- # KlassMethodLocation(self.MODULE, "_process_table"),
201
- # )
202
- # bottom = top + 1
203
- # h_span = right - left
204
- # v_span = bottom - top
205
- # else:
206
- # h_span = 1
207
- # v_span = 1
208
- # if cell._tc is not None:
209
- # first = r_index == cell._tc.top and c_index == cell._tc.left
210
- # else:
211
- # first = r_index == 0 and c_index == 0
212
- # target_cell = RawTableCell(h_span, v_span, first)
213
- # target_row.add(target_cell)
214
- # for block_item in self._iter_block_items(cell):
215
- # if isinstance(block_item, Paragraph):
216
- # self._process_cell(block_item, target_cell)
217
- # elif isinstance(block_item, Table):
218
- # raise self.LogicError("Table within table detected")
219
- # elif isinstance(block_item, etree._Element):
220
- # if block_item.tag == CT_TcPr:
221
- # pass
222
- # else:
223
- # self._errors.warning(
224
- # f"Ignoring eTree element {block_item.tag}",
225
- # KlassMethodLocation(self.MODULE, "_process_table"),
226
- # )
227
- # else:
228
- # raise self.LogicError(
229
- # f"Something's not right with a child {type(block_item)}"
230
- # )
231
-
232
176
  def _process_cell(self, paragraph, target_cell: RawTableCell):
233
177
  if self._is_list(paragraph):
234
178
  list_level = self.get_list_level(paragraph)
235
- item = RawListItem(paragraph.extract_runs(self._errors), list_level)
179
+ item = RawListItem(paragraph.extract_content(self._errors), list_level)
236
180
  if target_cell.is_in_list():
237
181
  list = target_cell.current_list()
238
182
  else:
@@ -240,7 +184,7 @@ class RawDocx:
240
184
  target_cell.add(list)
241
185
  list.add(item)
242
186
  else:
243
- target_paragraph = RawParagraph(paragraph.extract_runs(self._errors))
187
+ target_paragraph = RawParagraph(paragraph.extract_content(self._errors))
244
188
  target_cell.add(target_paragraph)
245
189
 
246
190
  def _process_paragraph(
@@ -252,7 +196,7 @@ class RawDocx:
252
196
  self.target_document.add(target_section)
253
197
  elif self._is_list(paragraph):
254
198
  list_level = self.get_list_level(paragraph)
255
- item = RawListItem(paragraph.extract_runs(self._errors), list_level)
199
+ item = RawListItem(paragraph.extract_content(self._errors), list_level)
256
200
  if target_section.is_in_list():
257
201
  list = target_section.current_list()
258
202
  else:
@@ -265,7 +209,8 @@ class RawDocx:
265
209
  target_image = RawImage(image_rels[rId], self._errors)
266
210
  target_section.add(target_image)
267
211
  else:
268
- target_paragraph = RawParagraph(paragraph.extract_runs(self._errors))
212
+ # print("===== Raw Para =====")
213
+ target_paragraph = RawParagraph(paragraph.extract_content(self._errors))
269
214
  target_section.add(target_paragraph)
270
215
 
271
216
  def get_list_level(self, paragraph):
raw_docx/raw_paragraph.py CHANGED
@@ -1,16 +1,13 @@
1
1
  from .raw_run import RawRun
2
+ from .raw_bookmark_start import RawBookmarkStart
3
+ from .raw_bookmark_end import RawBookmarkEnd
2
4
 
3
5
 
4
6
  class RawParagraph:
5
- def __init__(self, runs: list[RawRun]):
6
- self.runs = runs
7
+ def __init__(self, items: list[RawRun | RawBookmarkStart | RawBookmarkEnd]):
8
+ self.items = items
9
+ self.text = self._item_text()
7
10
  self.klasses = []
8
- self.text = self._run_text()
9
-
10
- def to_html(self) -> str:
11
- klass_list = " ".join(self.klasses)
12
- open_tag = f'<p class="{klass_list}">' if self.klasses else "<p>"
13
- return f"{open_tag}{self.text}</p>"
14
11
 
15
12
  def find(self, text: str) -> bool:
16
13
  return True if text in self.text else False
@@ -18,6 +15,49 @@ class RawParagraph:
18
15
  def find_at_start(self, text: str) -> bool:
19
16
  return True if self.text.upper().startswith(text.upper()) else False
20
17
 
18
+ def to_html(self) -> str:
19
+ klass_list = " ".join(self.klasses)
20
+ open_tag = f'<p class="{klass_list}">' if self.klasses else "<p>"
21
+ close_tag = "</p>"
22
+ body = ""
23
+ in_anchor = False
24
+ in_bookmark = False
25
+ bookmark_id = None
26
+ for index, item in enumerate(self.items):
27
+ if isinstance(item, RawRun):
28
+ if item.field_char_type == "begin":
29
+ if next_item := self._next_run_item(index):
30
+ if next_item.instruction:
31
+ body += f'<a class="raw-docx-cross-ref" href="#{next_item.instruction}">'
32
+ in_anchor = True
33
+ elif item.field_char_type == "separate":
34
+ if next_item := self._next_run_item(index):
35
+ if next_item.instruction:
36
+ body += f'<a class="raw-docx-cross-ref" href="#{next_item.instruction}">'
37
+ in_anchor = True
38
+ elif in_bookmark:
39
+ body += (
40
+ f'<span class="raw-docx-bookmark" id="{bookmark_id}">'
41
+ )
42
+ elif in_anchor and item.field_char_type == "end":
43
+ body += "</a>"
44
+ in_anchor = False
45
+ elif in_bookmark and item.field_char_type == "end":
46
+ body += "</span>"
47
+ in_bookmark = False # Will also be caught by BookmarkEnd
48
+ bookmark_id = None
49
+ else:
50
+ body += item.to_html()
51
+ elif isinstance(item, RawBookmarkStart):
52
+ in_bookmark = True
53
+ bookmark_id = item.name
54
+ elif isinstance(item, RawBookmarkEnd):
55
+ in_bookmark = False # Will also be caught by field_char_type = "end"
56
+ bookmark_id = None
57
+ else:
58
+ body += item.to_html()
59
+ return f"{open_tag}{body}{close_tag}"
60
+
21
61
  def add_class(self, klass) -> None:
22
62
  self.klasses.append(klass)
23
63
 
@@ -26,7 +66,7 @@ class RawParagraph:
26
66
  return {
27
67
  "type": "paragraph",
28
68
  "text": self.text,
29
- "runs": [run.to_dict() for run in self.runs],
69
+ "items": [item.to_dict() for item in self.items],
30
70
  "classes": self.klasses,
31
71
  }
32
72
 
@@ -34,5 +74,23 @@ class RawParagraph:
34
74
  new_str = f'<span class="{klass}">{text}</span>'
35
75
  self.text = new_str + self.text[len(text) :]
36
76
 
37
- def _run_text(self) -> str:
38
- return "".join([run.text for run in self.runs])
77
+ def _next_run_item(self, start: int) -> RawRun | None:
78
+ for index in range(start + 1, len(self.items)):
79
+ if isinstance(self.items[index], RawRun):
80
+ return self.items[index]
81
+ return None
82
+
83
+ def _item_text(self) -> str:
84
+ text = ""
85
+ in_separate = False
86
+ for index, item in enumerate(self.items):
87
+ if isinstance(item, RawRun):
88
+ if item.field_char_type == "separate":
89
+ in_separate = True
90
+ elif in_separate and item.field_char_type == "end":
91
+ in_separate = False
92
+ elif not in_separate:
93
+ text += item.text
94
+ else:
95
+ text += item.text
96
+ return text
raw_docx/raw_run.py CHANGED
@@ -1,15 +1,49 @@
1
1
  class RawRun:
2
- def __init__(self, text: str, color: str | None, highlight: str | None, style: str):
3
- self.text = text
2
+ def __init__(
3
+ self,
4
+ text: str,
5
+ color: str | None,
6
+ highlight: str | None,
7
+ style: str,
8
+ superscript: bool,
9
+ subscript: bool,
10
+ field_char_type: str,
11
+ instruction: str,
12
+ ):
13
+ self._text = text
4
14
  self.color = color
5
15
  self.highlight = highlight
6
16
  self.style = style
17
+ self.subscript = subscript
18
+ self.superscript = superscript
19
+ self.field_char_type = field_char_type
20
+ self.instruction = instruction
21
+
22
+ @property
23
+ def text(self) -> str:
24
+ return "" if self.subscript or self.superscript else self._text
25
+
26
+ def to_html(self) -> str:
27
+ # Note: no support for colours as yet
28
+ if self.field_char_type:
29
+ return ""
30
+ elif self.subscript:
31
+ return f"<sub>{self.text}</sub>" if self.text else ""
32
+ elif self.superscript:
33
+ return f"<sup>{self.text}</sup>" if self.text else ""
34
+ else:
35
+ return f"{self.text}"
7
36
 
8
37
  def to_dict(self) -> dict:
9
38
  """Convert the instace to a dictionary representation"""
10
39
  return {
40
+ "type": "run",
11
41
  "text": self.text,
12
42
  "color": self.color,
13
43
  "highlight": self.highlight,
14
44
  "style": self.style,
45
+ "superscript": self.superscript,
46
+ "subscript": self.subscript,
47
+ "field_char_type": self.field_char_type,
48
+ "instruction": self.instruction,
15
49
  }
raw_docx/raw_section.py CHANGED
@@ -94,6 +94,9 @@ class RawSection:
94
94
  def next(self, index: int):
95
95
  return self.items[index + 1] if (index + 1) < len(self.items) else None
96
96
 
97
+ def index(self, item: RawParagraph | RawList | RawTable | RawImage) -> int | None:
98
+ return next((i for i, x in enumerate(self.items) if x is item), None)
99
+
97
100
  def next_paragraph(self, start_index: int) -> RawParagraph:
98
101
  for index, item in enumerate(self.items):
99
102
  if index >= start_index:
@@ -0,0 +1,24 @@
1
+ from .raw_run import RawRun
2
+
3
+
4
+ class RawSimpleField:
5
+ def __init__(self, id: str, items: list[RawRun]):
6
+ self.id = id
7
+ self.items = items
8
+ self.text = self._item_text()
9
+
10
+ def to_html(self) -> str:
11
+ start_tag = f'<a class="raw-docx-cross-ref" href="#{self.id}">'
12
+ end_tag = "</a>"
13
+ return f"{start_tag}{''.join([item.to_html() for item in self.items])}{end_tag}"
14
+
15
+ def to_dict(self) -> dict:
16
+ return {
17
+ "type": "simple_field",
18
+ "id": self.id,
19
+ "text": self.text,
20
+ "items": [item.to_dict() for item in self.items],
21
+ }
22
+
23
+ def _item_text(self) -> str:
24
+ return "".join([x.text for x in self.items])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: raw_docx
3
- Version: 0.9.1
3
+ Version: 0.10.0
4
4
  Summary: A package for processing and analyzing raw document formats
5
5
  Home-page: https://github.com/daveih/raw_docx
6
6
  Author: Dave Iberson-Hurst
@@ -33,9 +33,12 @@ Dynamic: summary
33
33
 
34
34
  Simple package to build on top of python-docx to assist in the handling of word documents
35
35
 
36
- # Build
36
+ # Build Package
37
37
 
38
- Build as a normal package
38
+ Build steps for deployment to pypi.org
39
39
 
40
+ - Run `pytest`, ensure coverage and all tests pass
41
+ - Run `ruff format`
42
+ - Run `ruff check`, ensure no errors
40
43
  - Build with `python3 -m build --sdist --wheel`
41
44
  - Upload to pypi.org using `twine upload dist/*`
@@ -0,0 +1,24 @@
1
+ raw_docx/__info__.py,sha256=h9PyxJrMiLzLjjHyl_X5nf8x8m1g2dhpuTyPcSoFF4Y,31
2
+ raw_docx/__init__.py,sha256=FE5cpoCK1EVhpz3LiOOs43l027PcuJN5RljdW0UWON0,591
3
+ raw_docx/raw_bookmark_end.py,sha256=D9SpnM3HKOvICUn7HJEEkph6v782udUQD5e6KceOutM,441
4
+ raw_docx/raw_bookmark_start.py,sha256=_2H3DVaPQtn2Bu8upFd0pfqMtMNOE6LylIkq-6H9EpA,533
5
+ raw_docx/raw_document.py,sha256=Lq28CE3f-S_gfVxMC6FpykTKLr1Tviqyx3WzfHOAOPo,2474
6
+ raw_docx/raw_docx.py,sha256=mXK2YB0JPTkf3GsYMxN4r3CLVEQop399pjSvPD6YGTc,11247
7
+ raw_docx/raw_image.py,sha256=IUUETwW73-guaa_v-cHpfw0_z69u9wfvEk7adm9hHJQ,1506
8
+ raw_docx/raw_list.py,sha256=bhssQX_oVf8uBmUbcrCIzIJ8pCvdEtdHOAQBNH0EEQQ,2282
9
+ raw_docx/raw_list_item.py,sha256=I65FDqU4YE6TJ615qH1GYDcXTPKnlk173xXPr3MCMcQ,599
10
+ raw_docx/raw_paragraph.py,sha256=rXBTeiBg-eIEK0NEFhUbGO2I_N-NsyRX1dGPZoaGYFg,3841
11
+ raw_docx/raw_run.py,sha256=Kl1HmZ5R_z0S9R3u5RC_e6HcfcPYMidzSb9Q6FN1TXI,1492
12
+ raw_docx/raw_section.py,sha256=4WNKveU4wDCCwLrB797woPLBzj2PCnqU3gV0pNRQeG0,4108
13
+ raw_docx/raw_simple_field.py,sha256=jPcZhqzN6F3uCb6eXEWBkKOBBjoxw4so6EX9a00wjo4,711
14
+ raw_docx/raw_table.py,sha256=Sc0vV7g-Gonmo_EbdWyYmQy_rvgf4A-PYLuIrX_FTfc,1622
15
+ raw_docx/raw_table_cell.py,sha256=3gKm3m5JlkFVuEy9EHVhC2viNDUKfggny_bblY1TC48,1967
16
+ raw_docx/raw_table_row.py,sha256=m8SoLyVlKLjd_Vqa_U79A2wi8Wout8spgyusqJm79Kc,1297
17
+ raw_docx/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ raw_docx/docx/docx_paragraph.py,sha256=A1bBXWt2qVWAihX1dvYuWXV3QmDSuijWv2Of_qw6IMk,8185
19
+ raw_docx/docx/docx_table.py,sha256=dG8jjkOnxGwN-6KtqdoYq8znO2XLYuIYhjbr4ZjIs2Y,5404
20
+ raw_docx-0.10.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
21
+ raw_docx-0.10.0.dist-info/METADATA,sha256=sBCucC0z6oMmrfj7Mxppul2KNbu6Q6tyvFhb8WBIwl4,1367
22
+ raw_docx-0.10.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
+ raw_docx-0.10.0.dist-info/top_level.txt,sha256=Xl3dspPM9DBVj8clfdkHG7N4nNjNXeUmB4HcXAwOe60,9
24
+ raw_docx-0.10.0.dist-info/RECORD,,
@@ -1,21 +0,0 @@
1
- raw_docx/__info__.py,sha256=0zPMqZ5bMSGqKHkuj4WY2o5mQl7U-b-FYZPn4LEUths,30
2
- raw_docx/__init__.py,sha256=FE5cpoCK1EVhpz3LiOOs43l027PcuJN5RljdW0UWON0,591
3
- raw_docx/raw_document.py,sha256=hUrnf6QZs9-yysnz1UmYZCYvhqdyPi3v2i-t5mu5KsI,2340
4
- raw_docx/raw_docx.py,sha256=bp4No6XbnwPoWkNsnutbxARgV5RzgNa7jalr8IMjUbQ,13843
5
- raw_docx/raw_image.py,sha256=IUUETwW73-guaa_v-cHpfw0_z69u9wfvEk7adm9hHJQ,1506
6
- raw_docx/raw_list.py,sha256=bhssQX_oVf8uBmUbcrCIzIJ8pCvdEtdHOAQBNH0EEQQ,2282
7
- raw_docx/raw_list_item.py,sha256=I65FDqU4YE6TJ615qH1GYDcXTPKnlk173xXPr3MCMcQ,599
8
- raw_docx/raw_paragraph.py,sha256=GDEU89MFVgzD52t0e7Tau8tm6V0uTtaHMawfpU2RbT0,1209
9
- raw_docx/raw_run.py,sha256=0PJHiZIm1QclZfjdsrPPLSL7_GYoX8jSa6JvcfcOcWc,479
10
- raw_docx/raw_section.py,sha256=_ONvR5Fyuif4vZs1LnE7Y67pX29JKWM13YB8Wy8di9o,3942
11
- raw_docx/raw_table.py,sha256=Sc0vV7g-Gonmo_EbdWyYmQy_rvgf4A-PYLuIrX_FTfc,1622
12
- raw_docx/raw_table_cell.py,sha256=3gKm3m5JlkFVuEy9EHVhC2viNDUKfggny_bblY1TC48,1967
13
- raw_docx/raw_table_row.py,sha256=m8SoLyVlKLjd_Vqa_U79A2wi8Wout8spgyusqJm79Kc,1297
14
- raw_docx/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- raw_docx/docx/docx_paragraph.py,sha256=DPFzCG26y-6teL3KDnC_Ihmbs48OsHfD4fCD5Tj1O4A,2938
16
- raw_docx/docx/docx_table.py,sha256=dG8jjkOnxGwN-6KtqdoYq8znO2XLYuIYhjbr4ZjIs2Y,5404
17
- raw_docx-0.9.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
18
- raw_docx-0.9.1.dist-info/METADATA,sha256=Bkl3JymHFUutu7-mx3WQjckO9axotoTqTBg4ATOT5Cw,1237
19
- raw_docx-0.9.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
20
- raw_docx-0.9.1.dist-info/top_level.txt,sha256=Xl3dspPM9DBVj8clfdkHG7N4nNjNXeUmB4HcXAwOe60,9
21
- raw_docx-0.9.1.dist-info/RECORD,,