raw-docx 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- raw_docx/__info__.py +1 -0
- raw_docx/__init__.py +1 -4
- raw_docx/docx/__init__.py +0 -0
- raw_docx/{docx_paragraph.py → docx/docx_paragraph.py} +23 -22
- raw_docx/raw_document.py +1 -1
- raw_docx/raw_docx.py +56 -43
- raw_docx/raw_image.py +4 -3
- raw_docx/raw_list.py +6 -5
- raw_docx/raw_list_item.py +2 -1
- raw_docx/raw_paragraph.py +5 -1
- {raw_docx-0.6.0.dist-info → raw_docx-0.8.0.dist-info}/METADATA +6 -5
- raw_docx-0.8.0.dist-info/RECORD +20 -0
- {raw_docx-0.6.0.dist-info → raw_docx-0.8.0.dist-info}/WHEEL +1 -1
- raw_docx/__version__.py +0 -1
- raw_docx/raw_logger.py +0 -67
- raw_docx-0.6.0.dist-info/RECORD +0 -20
- {raw_docx-0.6.0.dist-info → raw_docx-0.8.0.dist-info/licenses}/LICENSE +0 -0
- {raw_docx-0.6.0.dist-info → raw_docx-0.8.0.dist-info}/top_level.txt +0 -0
raw_docx/__info__.py
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
__package_version__ = "0.8.0"
|
raw_docx/__init__.py
CHANGED
@@ -3,7 +3,6 @@ from .raw_document import RawDocument
|
|
3
3
|
from .raw_image import RawImage
|
4
4
|
from .raw_list_item import RawListItem
|
5
5
|
from .raw_list import RawList
|
6
|
-
from .raw_logger import RawLogger
|
7
6
|
from .raw_paragraph import RawParagraph
|
8
7
|
from .raw_run import RawRun
|
9
8
|
from .raw_section import RawSection
|
@@ -17,12 +16,10 @@ __all__ = [
|
|
17
16
|
"RawImage",
|
18
17
|
"RawList",
|
19
18
|
"RawListItem",
|
20
|
-
"RawLogger",
|
21
19
|
"RawParagraph",
|
22
20
|
"RawRun",
|
23
21
|
"RawSection",
|
24
22
|
"RawTableCell",
|
25
23
|
"RawTableRow",
|
26
|
-
"RawTable"
|
24
|
+
"RawTable",
|
27
25
|
]
|
28
|
-
|
File without changes
|
@@ -1,31 +1,35 @@
|
|
1
1
|
from docx.text.paragraph import Paragraph
|
2
2
|
from docx.styles.style import ParagraphStyle
|
3
3
|
from docx.text.run import Run
|
4
|
-
from
|
5
|
-
from .raw_run import RawRun
|
4
|
+
from simple_error_log import Errors
|
5
|
+
from raw_docx.raw_run import RawRun
|
6
6
|
|
7
7
|
|
8
|
-
def
|
8
|
+
def install():
|
9
|
+
setattr(Paragraph, "extract_runs", extract_runs)
|
10
|
+
|
11
|
+
|
12
|
+
def extract_runs(paragraph: Paragraph, errors: Errors) -> list[RawRun]:
|
9
13
|
if paragraph.text.startswith(
|
10
14
|
"This template is intended for interventional clinical trials. The template is suitable"
|
11
15
|
):
|
12
|
-
|
16
|
+
errors.info(f"Paragraph style {paragraph.style.name}")
|
13
17
|
data = [
|
14
18
|
{
|
15
19
|
"text": run.text,
|
16
|
-
"color": _get_run_color(paragraph.style, run),
|
17
|
-
"highlight": _get_highlight_color(run),
|
20
|
+
"color": _get_run_color(paragraph.style, run, errors),
|
21
|
+
"highlight": _get_highlight_color(run, errors),
|
18
22
|
"keep": True,
|
19
23
|
# "style": run.style.name if run.style else paragraph.style.name
|
20
24
|
"style": paragraph.style.name,
|
21
25
|
}
|
22
26
|
for run in paragraph.runs
|
23
27
|
]
|
24
|
-
data = _tidy_runs_color(data)
|
28
|
+
data = _tidy_runs_color(data, errors)
|
25
29
|
return [RawRun(x["text"], x["color"], x["highlight"], x["style"]) for x in data]
|
26
30
|
|
27
31
|
|
28
|
-
def _tidy_runs_color(data: list[dict]) -> list[dict]:
|
32
|
+
def _tidy_runs_color(data: list[dict], errors: Errors) -> list[dict]:
|
29
33
|
more = False
|
30
34
|
for index, run in enumerate(data):
|
31
35
|
if (
|
@@ -38,14 +42,14 @@ def _tidy_runs_color(data: list[dict]) -> list[dict]:
|
|
38
42
|
more = True
|
39
43
|
new_data = [x for x in data if x["keep"]]
|
40
44
|
if more:
|
41
|
-
new_data = _tidy_runs_color(new_data)
|
45
|
+
new_data = _tidy_runs_color(new_data, errors)
|
42
46
|
return new_data
|
43
47
|
|
44
48
|
|
45
|
-
def _get_run_color(paragraph: Paragraph, run: Run) -> str | None:
|
46
|
-
paragraph_color = _get_font_colour(paragraph)
|
47
|
-
font_color = _get_font_colour(run)
|
48
|
-
style_color = _run_style_color(run)
|
49
|
+
def _get_run_color(paragraph: Paragraph, run: Run, errors: Errors) -> str | None:
|
50
|
+
paragraph_color = _get_font_colour(paragraph, errors)
|
51
|
+
font_color = _get_font_colour(run, errors)
|
52
|
+
style_color = _run_style_color(run, errors)
|
49
53
|
if font_color:
|
50
54
|
result = str(font_color)
|
51
55
|
elif style_color:
|
@@ -55,15 +59,15 @@ def _get_run_color(paragraph: Paragraph, run: Run) -> str | None:
|
|
55
59
|
return result
|
56
60
|
|
57
61
|
|
58
|
-
def _get_highlight_color(run: Run) -> str | None:
|
62
|
+
def _get_highlight_color(run: Run, errors: Errors) -> str | None:
|
59
63
|
try:
|
60
64
|
return str(run.font.highlight_color)
|
61
65
|
except Exception as e:
|
62
|
-
|
66
|
+
errors.exception("Failed to get run highlight color", e)
|
63
67
|
return None
|
64
68
|
|
65
69
|
|
66
|
-
def _run_style_color(run: Run) -> str | None:
|
70
|
+
def _run_style_color(run: Run, errors: Errors) -> str | None:
|
67
71
|
try:
|
68
72
|
run_color = None
|
69
73
|
run_style = run.style
|
@@ -74,16 +78,13 @@ def _run_style_color(run: Run) -> str | None:
|
|
74
78
|
run_style = run_style.base_style
|
75
79
|
return run_color
|
76
80
|
except Exception as e:
|
77
|
-
|
81
|
+
errors.exception("Failed to get run style color", e)
|
78
82
|
return None
|
79
83
|
|
80
84
|
|
81
|
-
def _get_font_colour(item: Run | ParagraphStyle) -> str | None:
|
85
|
+
def _get_font_colour(item: Run | ParagraphStyle, errors: Errors) -> str | None:
|
82
86
|
try:
|
83
87
|
return item.font.color.rgb
|
84
88
|
except Exception as e:
|
85
|
-
|
89
|
+
errors.exception("Failed to get font color", e)
|
86
90
|
return None
|
87
|
-
|
88
|
-
|
89
|
-
setattr(Paragraph, "extract_runs", extract_runs)
|
raw_docx/raw_document.py
CHANGED
raw_docx/raw_docx.py
CHANGED
@@ -3,15 +3,16 @@ import re
|
|
3
3
|
import docx
|
4
4
|
import zipfile
|
5
5
|
from pathlib import Path
|
6
|
-
from .raw_document import RawDocument
|
7
|
-
from .raw_section import RawSection
|
8
|
-
from .raw_paragraph import RawParagraph
|
9
|
-
from .raw_image import RawImage
|
10
|
-
from .raw_table import RawTable
|
11
|
-
from .raw_table_row import RawTableRow
|
12
|
-
from .raw_table_cell import RawTableCell
|
13
|
-
from .raw_list import RawList
|
14
|
-
from .raw_list_item import RawListItem
|
6
|
+
from raw_docx.raw_document import RawDocument
|
7
|
+
from raw_docx.raw_section import RawSection
|
8
|
+
from raw_docx.raw_paragraph import RawParagraph
|
9
|
+
from raw_docx.raw_image import RawImage
|
10
|
+
from raw_docx.raw_table import RawTable
|
11
|
+
from raw_docx.raw_table_row import RawTableRow
|
12
|
+
from raw_docx.raw_table_cell import RawTableCell
|
13
|
+
from raw_docx.raw_list import RawList
|
14
|
+
from raw_docx.raw_list_item import RawListItem
|
15
|
+
from raw_docx.docx.docx_paragraph import install
|
15
16
|
from docx import Document as DocXProcessor
|
16
17
|
from docx.document import Document
|
17
18
|
from docx.oxml.table import CT_Tbl, CT_TcPr
|
@@ -19,8 +20,7 @@ from docx.oxml.text.paragraph import CT_P
|
|
19
20
|
from docx.table import Table, _Cell
|
20
21
|
from docx.text.paragraph import Paragraph
|
21
22
|
from lxml import etree
|
22
|
-
from
|
23
|
-
from .docx_paragraph import extract_runs # Needed such that method inserted into class
|
23
|
+
from simple_error_log import Errors
|
24
24
|
|
25
25
|
|
26
26
|
class RawDocx:
|
@@ -28,12 +28,17 @@ class RawDocx:
|
|
28
28
|
pass
|
29
29
|
|
30
30
|
def __init__(self, full_path: str):
|
31
|
+
install()
|
32
|
+
self.errors = Errors()
|
31
33
|
path = Path(full_path)
|
32
34
|
# path.stem, path.suffix[1:]
|
33
35
|
self.full_path = full_path
|
34
36
|
self.dir = path.parent
|
35
37
|
self.filename = path.name
|
36
38
|
self.image_path = os.path.join(self.dir, "images")
|
39
|
+
self.errors.debug(
|
40
|
+
f"RawDocx initialisation: full_path='{self.full_path}', dir='{self.dir}', image_path0'{self.image_path}', filename='{self.filename}"
|
41
|
+
)
|
37
42
|
self.image_rels = {}
|
38
43
|
self._organise_dir()
|
39
44
|
self.source_document = DocXProcessor(self.full_path)
|
@@ -46,28 +51,26 @@ class RawDocx:
|
|
46
51
|
except FileExistsError:
|
47
52
|
pass
|
48
53
|
except Exception as e:
|
49
|
-
|
54
|
+
self.errors.exception("Failed to create image directory", e)
|
50
55
|
|
51
56
|
def _process(self):
|
52
57
|
try:
|
53
|
-
self.
|
58
|
+
self._process_images()
|
54
59
|
for block_item in self._iter_block_items(self.source_document):
|
55
60
|
target_section = self.target_document.current_section()
|
56
61
|
if isinstance(block_item, Paragraph):
|
57
|
-
# print(f"PARA BLOCK: {block_item.text}")
|
58
62
|
self._process_paragraph(block_item, target_section, self.image_rels)
|
59
63
|
elif isinstance(block_item, Table):
|
60
64
|
self._process_table(block_item, target_section)
|
61
65
|
else:
|
62
|
-
|
66
|
+
self.errors.warning("Ignoring element")
|
63
67
|
raise ValueError
|
64
68
|
except Exception as e:
|
65
|
-
|
69
|
+
self.errors.exception("Exception raised processing document", e)
|
66
70
|
|
67
|
-
def
|
71
|
+
def _process_images(self):
|
68
72
|
# Extract images to image dir
|
69
73
|
self._extract_images()
|
70
|
-
# Save all 'rId:filenames' as references
|
71
74
|
for r in self.source_document.part.rels.values():
|
72
75
|
if isinstance(r._target, docx.parts.image.ImagePart):
|
73
76
|
self.image_rels[r.rId] = os.path.join(
|
@@ -91,9 +94,8 @@ class RawDocx:
|
|
91
94
|
|
92
95
|
for child in parent_elm.iterchildren():
|
93
96
|
if isinstance(child, str):
|
94
|
-
|
97
|
+
self.errors.warning(f"Ignoring eTree element {child}")
|
95
98
|
elif isinstance(child, CT_P):
|
96
|
-
# print(f"PARA: {child.text}")
|
97
99
|
yield Paragraph(child, parent)
|
98
100
|
elif isinstance(child, CT_Tbl):
|
99
101
|
yield Table(child, parent)
|
@@ -109,13 +111,12 @@ class RawDocx:
|
|
109
111
|
):
|
110
112
|
pass
|
111
113
|
else:
|
112
|
-
|
114
|
+
self.errors.warning(f"Ignoring eTree element {self._tree(child)}")
|
113
115
|
|
114
116
|
else:
|
115
117
|
raise ValueError(f"something's not right with a child {type(child)}")
|
116
118
|
|
117
119
|
def _tree(self, node, tab=1):
|
118
|
-
# print(f"{' ' * tab}{node.tag} {node.text}")
|
119
120
|
for child in node:
|
120
121
|
self._tree(child, tab + 1)
|
121
122
|
|
@@ -143,7 +144,10 @@ class RawDocx:
|
|
143
144
|
else:
|
144
145
|
h_span = 1
|
145
146
|
v_span = 1
|
146
|
-
|
147
|
+
if cell._tc is not None:
|
148
|
+
first = r_index == cell._tc.top and c_index == cell._tc.left
|
149
|
+
else:
|
150
|
+
first = r_index == 0 and c_index == 0
|
147
151
|
target_cell = RawTableCell(h_span, v_span, first)
|
148
152
|
target_row.add(target_cell)
|
149
153
|
for block_item in self._iter_block_items(cell):
|
@@ -155,7 +159,9 @@ class RawDocx:
|
|
155
159
|
if block_item.tag == CT_TcPr:
|
156
160
|
pass
|
157
161
|
else:
|
158
|
-
|
162
|
+
self.errors.warning(
|
163
|
+
f"Ignoring eTree element {block_item.tag}"
|
164
|
+
)
|
159
165
|
else:
|
160
166
|
raise self.LogicError(
|
161
167
|
f"something's not right with a child {type(block_item)}"
|
@@ -164,15 +170,15 @@ class RawDocx:
|
|
164
170
|
def _process_cell(self, paragraph, target_cell: RawTableCell):
|
165
171
|
if self._is_list(paragraph):
|
166
172
|
list_level = self.get_list_level(paragraph)
|
167
|
-
item = RawListItem(paragraph.extract_runs(), list_level)
|
173
|
+
item = RawListItem(paragraph.extract_runs(self.errors), list_level)
|
168
174
|
if target_cell.is_in_list():
|
169
175
|
list = target_cell.current_list()
|
170
176
|
else:
|
171
|
-
list = RawList()
|
177
|
+
list = RawList(self.errors)
|
172
178
|
target_cell.add(list)
|
173
179
|
list.add(item)
|
174
180
|
else:
|
175
|
-
target_paragraph = RawParagraph(paragraph.extract_runs())
|
181
|
+
target_paragraph = RawParagraph(paragraph.extract_runs(self.errors))
|
176
182
|
target_cell.add(target_paragraph)
|
177
183
|
|
178
184
|
def _process_paragraph(
|
@@ -183,41 +189,48 @@ class RawDocx:
|
|
183
189
|
target_section = RawSection(paragraph.text, paragraph.text, level)
|
184
190
|
self.target_document.add(target_section)
|
185
191
|
elif self._is_list(paragraph):
|
186
|
-
# print(f"START LIST: {paragraph.text}")
|
187
192
|
list_level = self.get_list_level(paragraph)
|
188
|
-
item = RawListItem(paragraph.extract_runs(), list_level)
|
193
|
+
item = RawListItem(paragraph.extract_runs(self.errors), list_level)
|
189
194
|
if target_section.is_in_list():
|
190
195
|
list = target_section.current_list()
|
191
196
|
else:
|
192
|
-
list = RawList()
|
197
|
+
list = RawList(self.errors)
|
193
198
|
target_section.add(list)
|
194
199
|
list.add(item)
|
195
200
|
elif "Graphic" in paragraph._p.xml:
|
196
201
|
for rId in image_rels:
|
197
202
|
if rId in paragraph._p.xml:
|
198
|
-
target_image = RawImage(image_rels[rId])
|
203
|
+
target_image = RawImage(image_rels[rId], self.errors)
|
199
204
|
target_section.add(target_image)
|
200
205
|
else:
|
201
|
-
|
202
|
-
target_paragraph = RawParagraph(paragraph.extract_runs())
|
206
|
+
target_paragraph = RawParagraph(paragraph.extract_runs(self.errors))
|
203
207
|
target_section.add(target_paragraph)
|
204
208
|
|
205
209
|
def get_list_level(self, paragraph):
|
206
210
|
list_level = paragraph._p.xpath("./w:pPr/w:numPr/w:ilvl/@w:val")
|
207
211
|
return int(str(list_level[0])) if list_level else 0
|
208
212
|
|
209
|
-
def _is_heading(self, text):
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
213
|
+
def _is_heading(self, text) -> tuple[bool, int]:
|
214
|
+
"""
|
215
|
+
Extract heading level from text containing "Heading <N>" pattern.
|
216
|
+
|
217
|
+
Args:
|
218
|
+
text: Text to analyze for heading pattern
|
219
|
+
|
220
|
+
Returns:
|
221
|
+
tuple[bool, int]: (success, level) where success indicates if heading
|
222
|
+
pattern was found and level is the extracted integer value
|
223
|
+
"""
|
224
|
+
if not text:
|
225
|
+
return False, 0
|
226
|
+
|
227
|
+
# Look for "Heading <N>" pattern where <N> is one or more digits
|
228
|
+
match = re.search(r"Heading\s+(\d+)", text, re.IGNORECASE)
|
229
|
+
if match:
|
217
230
|
try:
|
218
|
-
level = int(
|
231
|
+
level = int(match.group(1))
|
219
232
|
return True, level
|
220
|
-
except
|
233
|
+
except (ValueError, IndexError):
|
221
234
|
return True, 0
|
222
235
|
return False, 0
|
223
236
|
|
raw_docx/raw_image.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
import os
|
2
2
|
import base64
|
3
|
-
from
|
3
|
+
from simple_error_log import Errors
|
4
4
|
|
5
5
|
|
6
6
|
class RawImage:
|
7
7
|
FILE_TYPE_MAP = {".png": "png", ".jpg": "jpg", ".jpeg": "jpg"}
|
8
8
|
|
9
|
-
def __init__(self, filepath: str):
|
9
|
+
def __init__(self, filepath: str, errors: Errors):
|
10
|
+
self.errors = errors
|
10
11
|
self.filepath = filepath
|
11
12
|
|
12
13
|
def to_html(self):
|
@@ -21,7 +22,7 @@ class RawImage:
|
|
21
22
|
else:
|
22
23
|
return f"""<p style="color:red">Note: Unable to process embedded image of type '{file_extension}', image ignored.</p>"""
|
23
24
|
except Exception as e:
|
24
|
-
|
25
|
+
self.errors.exception("Exception converting image", e)
|
25
26
|
return (
|
26
27
|
"""<p style="color:red">Note: Error encountered processing image.</p>"""
|
27
28
|
)
|
raw_docx/raw_list.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
from .raw_list_item import RawListItem
|
2
|
-
from
|
2
|
+
from simple_error_log import Errors
|
3
3
|
|
4
4
|
|
5
5
|
class RawList:
|
6
|
-
def __init__(self, level=0):
|
6
|
+
def __init__(self, errors: Errors, level=0):
|
7
|
+
self.errors = errors
|
7
8
|
self.items = [] # List to store RawListItems and nested RawLists
|
8
9
|
self.level = level
|
9
10
|
|
@@ -13,15 +14,15 @@ class RawList:
|
|
13
14
|
elif item.level > self.level:
|
14
15
|
list = self.items[-1] if self.items else None
|
15
16
|
if not isinstance(list, RawList):
|
16
|
-
list = RawList(item.level)
|
17
|
+
list = RawList(self.errors, item.level)
|
17
18
|
self.items.append(list)
|
18
19
|
list.add(item)
|
19
20
|
if item.level > self.level + 1:
|
20
|
-
|
21
|
+
self.errors.warning(
|
21
22
|
f"Adding list item '{item}' to item but level jump greater than 1"
|
22
23
|
)
|
23
24
|
else:
|
24
|
-
|
25
|
+
self.errors.error(
|
25
26
|
f"Failed to add list item '{item}' to list '{self}', levels are in error"
|
26
27
|
)
|
27
28
|
|
raw_docx/raw_list_item.py
CHANGED
@@ -12,7 +12,8 @@ class RawListItem(RawParagraph):
|
|
12
12
|
return f"{' ' * self.level}{self.text}"
|
13
13
|
|
14
14
|
def to_html(self) -> str:
|
15
|
-
return f"{
|
15
|
+
return f"{self.text}"
|
16
|
+
# return f"{escape(self.text)}"
|
16
17
|
|
17
18
|
def to_dict(self) -> dict:
|
18
19
|
return {"type": "list_item", "text": self.text, "level": self.level}
|
raw_docx/raw_paragraph.py
CHANGED
@@ -11,7 +11,7 @@ class RawParagraph:
|
|
11
11
|
def to_html(self) -> str:
|
12
12
|
klass_list = " ".join(self.klasses)
|
13
13
|
open_tag = f'<p class="{klass_list}">' if self.klasses else "<p>"
|
14
|
-
return f"{open_tag}{
|
14
|
+
return f"{open_tag}{self.text}</p>"
|
15
15
|
|
16
16
|
def find(self, text: str) -> bool:
|
17
17
|
return True if text in self.text else False
|
@@ -31,5 +31,9 @@ class RawParagraph:
|
|
31
31
|
"classes": self.klasses,
|
32
32
|
}
|
33
33
|
|
34
|
+
def add_span(self, text: str, klass: str) -> None:
|
35
|
+
new_str = f'<span class="{klass}">{text}</span>'
|
36
|
+
self.text = new_str + self.text[len(text) :]
|
37
|
+
|
34
38
|
def _run_text(self) -> str:
|
35
39
|
return "".join([run.text for run in self.runs])
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: raw_docx
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.0
|
4
4
|
Summary: A package for processing and analyzing raw document formats
|
5
5
|
Home-page: https://github.com/daveih/raw_docx
|
6
6
|
Author: Dave Iberson-Hurst
|
@@ -17,13 +17,14 @@ Classifier: Programming Language :: Python :: 3.11
|
|
17
17
|
Requires-Python: >=3.8
|
18
18
|
Description-Content-Type: text/markdown
|
19
19
|
License-File: LICENSE
|
20
|
-
Requires-Dist: python-docx
|
21
|
-
Requires-Dist:
|
20
|
+
Requires-Dist: python-docx==1.1.2
|
21
|
+
Requires-Dist: simple_error_log==0.6.0
|
22
22
|
Dynamic: author
|
23
23
|
Dynamic: classifier
|
24
24
|
Dynamic: description
|
25
25
|
Dynamic: description-content-type
|
26
26
|
Dynamic: home-page
|
27
|
+
Dynamic: license-file
|
27
28
|
Dynamic: requires-dist
|
28
29
|
Dynamic: requires-python
|
29
30
|
Dynamic: summary
|
@@ -37,4 +38,4 @@ Simple package to build on top of python-docx to assist in the handling of word
|
|
37
38
|
Build as a normal package
|
38
39
|
|
39
40
|
- Build with `python3 -m build --sdist --wheel`
|
40
|
-
- Upload to pypi.org using `twine upload dist
|
41
|
+
- Upload to pypi.org using `twine upload dist/*`
|
@@ -0,0 +1,20 @@
|
|
1
|
+
raw_docx/__info__.py,sha256=onU36pd8pPYPNlWn6QKkq5qJkOLC2_M4_UeggrLuh-A,30
|
2
|
+
raw_docx/__init__.py,sha256=FE5cpoCK1EVhpz3LiOOs43l027PcuJN5RljdW0UWON0,591
|
3
|
+
raw_docx/raw_document.py,sha256=hUrnf6QZs9-yysnz1UmYZCYvhqdyPi3v2i-t5mu5KsI,2340
|
4
|
+
raw_docx/raw_docx.py,sha256=huZzOyfzkhAILa6MurNO6qpye4gy39FljT9m40rrAX4,10850
|
5
|
+
raw_docx/raw_image.py,sha256=IUUETwW73-guaa_v-cHpfw0_z69u9wfvEk7adm9hHJQ,1506
|
6
|
+
raw_docx/raw_list.py,sha256=bhssQX_oVf8uBmUbcrCIzIJ8pCvdEtdHOAQBNH0EEQQ,2282
|
7
|
+
raw_docx/raw_list_item.py,sha256=4Mn3rmnpXppJGAxk-9StLD60wszk5igg-TIbBz8sKW4,623
|
8
|
+
raw_docx/raw_paragraph.py,sha256=edFNwudoBWNWdj5b3ac0e6LFrFuZjU_ize70ToUGQN8,1233
|
9
|
+
raw_docx/raw_run.py,sha256=0PJHiZIm1QclZfjdsrPPLSL7_GYoX8jSa6JvcfcOcWc,479
|
10
|
+
raw_docx/raw_section.py,sha256=_ONvR5Fyuif4vZs1LnE7Y67pX29JKWM13YB8Wy8di9o,3942
|
11
|
+
raw_docx/raw_table.py,sha256=qm-Ap1AOHRuOxiUgHsI6uV4GeCNEJrE0Z3TZ8rXbffg,1579
|
12
|
+
raw_docx/raw_table_cell.py,sha256=pXe7FCfEmbqdktBGfkDrvMsbEie8FaGNQbRl_ooms0Q,1887
|
13
|
+
raw_docx/raw_table_row.py,sha256=m8SoLyVlKLjd_Vqa_U79A2wi8Wout8spgyusqJm79Kc,1297
|
14
|
+
raw_docx/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
+
raw_docx/docx/docx_paragraph.py,sha256=DPFzCG26y-6teL3KDnC_Ihmbs48OsHfD4fCD5Tj1O4A,2938
|
16
|
+
raw_docx-0.8.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
17
|
+
raw_docx-0.8.0.dist-info/METADATA,sha256=jf9pYmWbkI5JKYuucP9mps0Pp9CXgZsywPLHTEAv-L0,1237
|
18
|
+
raw_docx-0.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
19
|
+
raw_docx-0.8.0.dist-info/top_level.txt,sha256=Xl3dspPM9DBVj8clfdkHG7N4nNjNXeUmB4HcXAwOe60,9
|
20
|
+
raw_docx-0.8.0.dist-info/RECORD,,
|
raw_docx/__version__.py
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
__package_version__ = "0.6.0"
|
raw_docx/raw_logger.py
DELETED
@@ -1,67 +0,0 @@
|
|
1
|
-
import sys
|
2
|
-
import logging
|
3
|
-
from pathlib import Path
|
4
|
-
from typing import Optional
|
5
|
-
from pythonjsonlogger import jsonlogger
|
6
|
-
|
7
|
-
|
8
|
-
class RawLogger:
|
9
|
-
_instance = None
|
10
|
-
_initialized = False
|
11
|
-
|
12
|
-
def __new__(cls):
|
13
|
-
if cls._instance is None:
|
14
|
-
cls._instance = super().__new__(cls)
|
15
|
-
return cls._instance
|
16
|
-
|
17
|
-
def __init__(self):
|
18
|
-
if not RawLogger._initialized:
|
19
|
-
self.logger = logging.getLogger("raw_docx")
|
20
|
-
self.logger.setLevel(logging.INFO)
|
21
|
-
|
22
|
-
# Create JSON formatter
|
23
|
-
formatter = jsonlogger.JsonFormatter(
|
24
|
-
fmt="%(asctime)s %(name)s %(levelname)s %(message)s",
|
25
|
-
datefmt="%Y-%m-%d %H:%M:%S",
|
26
|
-
)
|
27
|
-
|
28
|
-
# Console handler
|
29
|
-
console_handler = logging.StreamHandler(sys.stdout)
|
30
|
-
console_handler.setFormatter(formatter)
|
31
|
-
self.logger.addHandler(console_handler)
|
32
|
-
|
33
|
-
RawLogger._initialized = True
|
34
|
-
|
35
|
-
def setup_file_logging(self, log_dir: Optional[str] = None):
|
36
|
-
"""Setup file logging in addition to console logging"""
|
37
|
-
if log_dir:
|
38
|
-
log_path = Path(log_dir)
|
39
|
-
log_path.mkdir(parents=True, exist_ok=True)
|
40
|
-
file_handler = logging.FileHandler(log_path / "raw_docx.log")
|
41
|
-
file_handler.setFormatter(
|
42
|
-
jsonlogger.JsonFormatter(
|
43
|
-
fmt="%(asctime)s %(name)s %(levelname)s %(message)s",
|
44
|
-
datefmt="%Y-%m-%d %H:%M:%S",
|
45
|
-
)
|
46
|
-
)
|
47
|
-
self.logger.addHandler(file_handler)
|
48
|
-
|
49
|
-
def info(self, message: str):
|
50
|
-
"""Log info message"""
|
51
|
-
self.logger.info(message)
|
52
|
-
|
53
|
-
def warning(self, message: str):
|
54
|
-
"""Log warning message"""
|
55
|
-
self.logger.warning(message)
|
56
|
-
|
57
|
-
def error(self, message: str):
|
58
|
-
"""Log error message"""
|
59
|
-
self.logger.error(message)
|
60
|
-
|
61
|
-
def exception(self, message: str, exc: Exception):
|
62
|
-
"""Log exception with message"""
|
63
|
-
self.logger.exception(message, exc_info=exc)
|
64
|
-
|
65
|
-
|
66
|
-
# Create singleton instance
|
67
|
-
logger = RawLogger()
|
raw_docx-0.6.0.dist-info/RECORD
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
raw_docx/__init__.py,sha256=UNQS1BnwzJphSfmaczmY4F0UL82uJPfeVA2QdD9koaQ,642
|
2
|
-
raw_docx/__version__.py,sha256=FybdcTppaiphhHrdlWtqDynrc95gNkil2evIm-RAjhc,30
|
3
|
-
raw_docx/docx_paragraph.py,sha256=2A0fqVNAWswJpI35WfaY2o07hP7Ks83NcpltAyLU31w,2752
|
4
|
-
raw_docx/raw_document.py,sha256=VLx0-Z9jGwdYHMU227AKaT8UDRY_OHD7b2BRuw71x6M,2340
|
5
|
-
raw_docx/raw_docx.py,sha256=LA94jADMXhRL1lcaz0-Fs69FYtAJdQLPJ57Lu92ZVf0,10258
|
6
|
-
raw_docx/raw_image.py,sha256=GvR2hfgNkNnQCmEZ8SbMDWm-_CAyfk56eWF5l2OxQDw,1451
|
7
|
-
raw_docx/raw_list.py,sha256=wA84muLMViYsj4gxMbwWM_aAQ9loYRixKgwPppJghrE,2209
|
8
|
-
raw_docx/raw_list_item.py,sha256=L8b_eaag0aFisHozxW0dh8yynR5i-PhsltTtjHBnlhQ,591
|
9
|
-
raw_docx/raw_logger.py,sha256=jKc5Ph3SNbXjO6sNq_q6BuUcZuIuKk8pbHp7mIFWRXg,2059
|
10
|
-
raw_docx/raw_paragraph.py,sha256=GOxq4n68mpG11kxw5_89UmGqvTV_BSa8E9A15BaEroI,1075
|
11
|
-
raw_docx/raw_run.py,sha256=0PJHiZIm1QclZfjdsrPPLSL7_GYoX8jSa6JvcfcOcWc,479
|
12
|
-
raw_docx/raw_section.py,sha256=_ONvR5Fyuif4vZs1LnE7Y67pX29JKWM13YB8Wy8di9o,3942
|
13
|
-
raw_docx/raw_table.py,sha256=qm-Ap1AOHRuOxiUgHsI6uV4GeCNEJrE0Z3TZ8rXbffg,1579
|
14
|
-
raw_docx/raw_table_cell.py,sha256=pXe7FCfEmbqdktBGfkDrvMsbEie8FaGNQbRl_ooms0Q,1887
|
15
|
-
raw_docx/raw_table_row.py,sha256=m8SoLyVlKLjd_Vqa_U79A2wi8Wout8spgyusqJm79Kc,1297
|
16
|
-
raw_docx-0.6.0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
17
|
-
raw_docx-0.6.0.dist-info/METADATA,sha256=5G_FlzjcZgMZhBet6rQFRYi-ZE2T2fpbQozmUlyUMBc,1204
|
18
|
-
raw_docx-0.6.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
19
|
-
raw_docx-0.6.0.dist-info/top_level.txt,sha256=Xl3dspPM9DBVj8clfdkHG7N4nNjNXeUmB4HcXAwOe60,9
|
20
|
-
raw_docx-0.6.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|