raw-docx 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- raw_docx/__info__.py +1 -0
- raw_docx/__init__.py +25 -0
- raw_docx/docx/__init__.py +0 -0
- raw_docx/{docx_paragraph.py → docx/docx_paragraph.py} +23 -22
- raw_docx/raw_docx.py +33 -32
- raw_docx/raw_image.py +4 -3
- raw_docx/raw_list.py +6 -5
- raw_docx/raw_list_item.py +2 -1
- raw_docx/raw_paragraph.py +5 -1
- {raw_docx-0.5.0.dist-info → raw_docx-0.7.0.dist-info}/METADATA +4 -3
- raw_docx-0.7.0.dist-info/RECORD +20 -0
- {raw_docx-0.5.0.dist-info → raw_docx-0.7.0.dist-info}/WHEEL +1 -1
- raw_docx/__version__.py +0 -1
- raw_docx/raw_logger.py +0 -67
- raw_docx-0.5.0.dist-info/RECORD +0 -20
- {raw_docx-0.5.0.dist-info → raw_docx-0.7.0.dist-info/licenses}/LICENSE +0 -0
- {raw_docx-0.5.0.dist-info → raw_docx-0.7.0.dist-info}/top_level.txt +0 -0
raw_docx/__info__.py
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
__package_version__ = "0.7.0"
|
raw_docx/__init__.py
CHANGED
@@ -0,0 +1,25 @@
|
|
1
|
+
from .raw_docx import RawDocx
|
2
|
+
from .raw_document import RawDocument
|
3
|
+
from .raw_image import RawImage
|
4
|
+
from .raw_list_item import RawListItem
|
5
|
+
from .raw_list import RawList
|
6
|
+
from .raw_paragraph import RawParagraph
|
7
|
+
from .raw_run import RawRun
|
8
|
+
from .raw_section import RawSection
|
9
|
+
from .raw_table_cell import RawTableCell
|
10
|
+
from .raw_table_row import RawTableRow
|
11
|
+
from .raw_table import RawTable
|
12
|
+
|
13
|
+
__all__ = [
|
14
|
+
"RawDocx",
|
15
|
+
"RawDocument",
|
16
|
+
"RawImage",
|
17
|
+
"RawList",
|
18
|
+
"RawListItem",
|
19
|
+
"RawParagraph",
|
20
|
+
"RawRun",
|
21
|
+
"RawSection",
|
22
|
+
"RawTableCell",
|
23
|
+
"RawTableRow",
|
24
|
+
"RawTable",
|
25
|
+
]
|
File without changes
|
@@ -1,31 +1,35 @@
|
|
1
1
|
from docx.text.paragraph import Paragraph
|
2
2
|
from docx.styles.style import ParagraphStyle
|
3
3
|
from docx.text.run import Run
|
4
|
-
from
|
5
|
-
from .raw_run import RawRun
|
4
|
+
from simple_error_log import Errors
|
5
|
+
from raw_docx.raw_run import RawRun
|
6
6
|
|
7
7
|
|
8
|
-
def
|
8
|
+
def install():
|
9
|
+
setattr(Paragraph, "extract_runs", extract_runs)
|
10
|
+
|
11
|
+
|
12
|
+
def extract_runs(paragraph: Paragraph, errors: Errors) -> list[RawRun]:
|
9
13
|
if paragraph.text.startswith(
|
10
14
|
"This template is intended for interventional clinical trials. The template is suitable"
|
11
15
|
):
|
12
|
-
|
16
|
+
errors.info(f"Paragraph style {paragraph.style.name}")
|
13
17
|
data = [
|
14
18
|
{
|
15
19
|
"text": run.text,
|
16
|
-
"color": _get_run_color(paragraph.style, run),
|
17
|
-
"highlight": _get_highlight_color(run),
|
20
|
+
"color": _get_run_color(paragraph.style, run, errors),
|
21
|
+
"highlight": _get_highlight_color(run, errors),
|
18
22
|
"keep": True,
|
19
23
|
# "style": run.style.name if run.style else paragraph.style.name
|
20
24
|
"style": paragraph.style.name,
|
21
25
|
}
|
22
26
|
for run in paragraph.runs
|
23
27
|
]
|
24
|
-
data = _tidy_runs_color(data)
|
28
|
+
data = _tidy_runs_color(data, errors)
|
25
29
|
return [RawRun(x["text"], x["color"], x["highlight"], x["style"]) for x in data]
|
26
30
|
|
27
31
|
|
28
|
-
def _tidy_runs_color(data: list[dict]) -> list[dict]:
|
32
|
+
def _tidy_runs_color(data: list[dict], errors: Errors) -> list[dict]:
|
29
33
|
more = False
|
30
34
|
for index, run in enumerate(data):
|
31
35
|
if (
|
@@ -38,14 +42,14 @@ def _tidy_runs_color(data: list[dict]) -> list[dict]:
|
|
38
42
|
more = True
|
39
43
|
new_data = [x for x in data if x["keep"]]
|
40
44
|
if more:
|
41
|
-
new_data = _tidy_runs_color(new_data)
|
45
|
+
new_data = _tidy_runs_color(new_data, errors)
|
42
46
|
return new_data
|
43
47
|
|
44
48
|
|
45
|
-
def _get_run_color(paragraph: Paragraph, run: Run) -> str | None:
|
46
|
-
paragraph_color = _get_font_colour(paragraph)
|
47
|
-
font_color = _get_font_colour(run)
|
48
|
-
style_color = _run_style_color(run)
|
49
|
+
def _get_run_color(paragraph: Paragraph, run: Run, errors: Errors) -> str | None:
|
50
|
+
paragraph_color = _get_font_colour(paragraph, errors)
|
51
|
+
font_color = _get_font_colour(run, errors)
|
52
|
+
style_color = _run_style_color(run, errors)
|
49
53
|
if font_color:
|
50
54
|
result = str(font_color)
|
51
55
|
elif style_color:
|
@@ -55,15 +59,15 @@ def _get_run_color(paragraph: Paragraph, run: Run) -> str | None:
|
|
55
59
|
return result
|
56
60
|
|
57
61
|
|
58
|
-
def _get_highlight_color(run: Run) -> str | None:
|
62
|
+
def _get_highlight_color(run: Run, errors: Errors) -> str | None:
|
59
63
|
try:
|
60
64
|
return str(run.font.highlight_color)
|
61
65
|
except Exception as e:
|
62
|
-
|
66
|
+
errors.exception("Failed to get run highlight color", e)
|
63
67
|
return None
|
64
68
|
|
65
69
|
|
66
|
-
def _run_style_color(run: Run) -> str | None:
|
70
|
+
def _run_style_color(run: Run, errors: Errors) -> str | None:
|
67
71
|
try:
|
68
72
|
run_color = None
|
69
73
|
run_style = run.style
|
@@ -74,16 +78,13 @@ def _run_style_color(run: Run) -> str | None:
|
|
74
78
|
run_style = run_style.base_style
|
75
79
|
return run_color
|
76
80
|
except Exception as e:
|
77
|
-
|
81
|
+
errors.exception("Failed to get run style color", e)
|
78
82
|
return None
|
79
83
|
|
80
84
|
|
81
|
-
def _get_font_colour(item: Run | ParagraphStyle) -> str | None:
|
85
|
+
def _get_font_colour(item: Run | ParagraphStyle, errors: Errors) -> str | None:
|
82
86
|
try:
|
83
87
|
return item.font.color.rgb
|
84
88
|
except Exception as e:
|
85
|
-
|
89
|
+
errors.exception("Failed to get font color", e)
|
86
90
|
return None
|
87
|
-
|
88
|
-
|
89
|
-
setattr(Paragraph, "extract_runs", extract_runs)
|
raw_docx/raw_docx.py
CHANGED
@@ -3,15 +3,16 @@ import re
|
|
3
3
|
import docx
|
4
4
|
import zipfile
|
5
5
|
from pathlib import Path
|
6
|
-
from .raw_document import RawDocument
|
7
|
-
from .raw_section import RawSection
|
8
|
-
from .raw_paragraph import RawParagraph
|
9
|
-
from .raw_image import RawImage
|
10
|
-
from .raw_table import RawTable
|
11
|
-
from .raw_table_row import RawTableRow
|
12
|
-
from .raw_table_cell import RawTableCell
|
13
|
-
from .raw_list import RawList
|
14
|
-
from .raw_list_item import RawListItem
|
6
|
+
from raw_docx.raw_document import RawDocument
|
7
|
+
from raw_docx.raw_section import RawSection
|
8
|
+
from raw_docx.raw_paragraph import RawParagraph
|
9
|
+
from raw_docx.raw_image import RawImage
|
10
|
+
from raw_docx.raw_table import RawTable
|
11
|
+
from raw_docx.raw_table_row import RawTableRow
|
12
|
+
from raw_docx.raw_table_cell import RawTableCell
|
13
|
+
from raw_docx.raw_list import RawList
|
14
|
+
from raw_docx.raw_list_item import RawListItem
|
15
|
+
from raw_docx.docx.docx_paragraph import install
|
15
16
|
from docx import Document as DocXProcessor
|
16
17
|
from docx.document import Document
|
17
18
|
from docx.oxml.table import CT_Tbl, CT_TcPr
|
@@ -19,8 +20,7 @@ from docx.oxml.text.paragraph import CT_P
|
|
19
20
|
from docx.table import Table, _Cell
|
20
21
|
from docx.text.paragraph import Paragraph
|
21
22
|
from lxml import etree
|
22
|
-
from
|
23
|
-
from .docx_paragraph import extract_runs # Needed such that method inserted into class
|
23
|
+
from simple_error_log import Errors
|
24
24
|
|
25
25
|
|
26
26
|
class RawDocx:
|
@@ -28,12 +28,17 @@ class RawDocx:
|
|
28
28
|
pass
|
29
29
|
|
30
30
|
def __init__(self, full_path: str):
|
31
|
+
install()
|
32
|
+
self.errors = Errors()
|
31
33
|
path = Path(full_path)
|
32
34
|
# path.stem, path.suffix[1:]
|
33
35
|
self.full_path = full_path
|
34
36
|
self.dir = path.parent
|
35
37
|
self.filename = path.name
|
36
38
|
self.image_path = os.path.join(self.dir, "images")
|
39
|
+
self.errors.debug(
|
40
|
+
f"RawDocx initialisation: full_path='{self.full_path}', dir='{self.dir}', image_path0'{self.image_path}', filename='{self.filename}"
|
41
|
+
)
|
37
42
|
self.image_rels = {}
|
38
43
|
self._organise_dir()
|
39
44
|
self.source_document = DocXProcessor(self.full_path)
|
@@ -46,28 +51,26 @@ class RawDocx:
|
|
46
51
|
except FileExistsError:
|
47
52
|
pass
|
48
53
|
except Exception as e:
|
49
|
-
|
54
|
+
self.errors.exception("Failed to create image directory", e)
|
50
55
|
|
51
56
|
def _process(self):
|
52
57
|
try:
|
53
|
-
self.
|
58
|
+
self._process_images()
|
54
59
|
for block_item in self._iter_block_items(self.source_document):
|
55
60
|
target_section = self.target_document.current_section()
|
56
61
|
if isinstance(block_item, Paragraph):
|
57
|
-
# print(f"PARA BLOCK: {block_item.text}")
|
58
62
|
self._process_paragraph(block_item, target_section, self.image_rels)
|
59
63
|
elif isinstance(block_item, Table):
|
60
64
|
self._process_table(block_item, target_section)
|
61
65
|
else:
|
62
|
-
|
66
|
+
self.errors.warning("Ignoring element")
|
63
67
|
raise ValueError
|
64
68
|
except Exception as e:
|
65
|
-
|
69
|
+
self.errors.exception("Exception raised processing document", e)
|
66
70
|
|
67
|
-
def
|
71
|
+
def _process_images(self):
|
68
72
|
# Extract images to image dir
|
69
73
|
self._extract_images()
|
70
|
-
# Save all 'rId:filenames' as references
|
71
74
|
for r in self.source_document.part.rels.values():
|
72
75
|
if isinstance(r._target, docx.parts.image.ImagePart):
|
73
76
|
self.image_rels[r.rId] = os.path.join(
|
@@ -91,9 +94,8 @@ class RawDocx:
|
|
91
94
|
|
92
95
|
for child in parent_elm.iterchildren():
|
93
96
|
if isinstance(child, str):
|
94
|
-
|
97
|
+
self.errors.warning(f"Ignoring eTree element {child}")
|
95
98
|
elif isinstance(child, CT_P):
|
96
|
-
# print(f"PARA: {child.text}")
|
97
99
|
yield Paragraph(child, parent)
|
98
100
|
elif isinstance(child, CT_Tbl):
|
99
101
|
yield Table(child, parent)
|
@@ -109,13 +111,12 @@ class RawDocx:
|
|
109
111
|
):
|
110
112
|
pass
|
111
113
|
else:
|
112
|
-
|
114
|
+
self.errors.warning(f"Ignoring eTree element {self._tree(child)}")
|
113
115
|
|
114
116
|
else:
|
115
117
|
raise ValueError(f"something's not right with a child {type(child)}")
|
116
118
|
|
117
119
|
def _tree(self, node, tab=1):
|
118
|
-
# print(f"{' ' * tab}{node.tag} {node.text}")
|
119
120
|
for child in node:
|
120
121
|
self._tree(child, tab + 1)
|
121
122
|
|
@@ -155,7 +156,9 @@ class RawDocx:
|
|
155
156
|
if block_item.tag == CT_TcPr:
|
156
157
|
pass
|
157
158
|
else:
|
158
|
-
|
159
|
+
self.errors.warning(
|
160
|
+
f"Ignoring eTree element {block_item.tag}"
|
161
|
+
)
|
159
162
|
else:
|
160
163
|
raise self.LogicError(
|
161
164
|
f"something's not right with a child {type(block_item)}"
|
@@ -164,15 +167,15 @@ class RawDocx:
|
|
164
167
|
def _process_cell(self, paragraph, target_cell: RawTableCell):
|
165
168
|
if self._is_list(paragraph):
|
166
169
|
list_level = self.get_list_level(paragraph)
|
167
|
-
item = RawListItem(paragraph.extract_runs(), list_level)
|
170
|
+
item = RawListItem(paragraph.extract_runs(self.errors), list_level)
|
168
171
|
if target_cell.is_in_list():
|
169
172
|
list = target_cell.current_list()
|
170
173
|
else:
|
171
|
-
list = RawList()
|
174
|
+
list = RawList(self.errors)
|
172
175
|
target_cell.add(list)
|
173
176
|
list.add(item)
|
174
177
|
else:
|
175
|
-
target_paragraph = RawParagraph(paragraph.extract_runs())
|
178
|
+
target_paragraph = RawParagraph(paragraph.extract_runs(self.errors))
|
176
179
|
target_cell.add(target_paragraph)
|
177
180
|
|
178
181
|
def _process_paragraph(
|
@@ -183,23 +186,21 @@ class RawDocx:
|
|
183
186
|
target_section = RawSection(paragraph.text, paragraph.text, level)
|
184
187
|
self.target_document.add(target_section)
|
185
188
|
elif self._is_list(paragraph):
|
186
|
-
# print(f"START LIST: {paragraph.text}")
|
187
189
|
list_level = self.get_list_level(paragraph)
|
188
|
-
item = RawListItem(paragraph.extract_runs(), list_level)
|
190
|
+
item = RawListItem(paragraph.extract_runs(self.errors), list_level)
|
189
191
|
if target_section.is_in_list():
|
190
192
|
list = target_section.current_list()
|
191
193
|
else:
|
192
|
-
list = RawList()
|
194
|
+
list = RawList(self.errors)
|
193
195
|
target_section.add(list)
|
194
196
|
list.add(item)
|
195
197
|
elif "Graphic" in paragraph._p.xml:
|
196
198
|
for rId in image_rels:
|
197
199
|
if rId in paragraph._p.xml:
|
198
|
-
target_image = RawImage(image_rels[rId])
|
200
|
+
target_image = RawImage(image_rels[rId], self.errors)
|
199
201
|
target_section.add(target_image)
|
200
202
|
else:
|
201
|
-
|
202
|
-
target_paragraph = RawParagraph(paragraph.extract_runs())
|
203
|
+
target_paragraph = RawParagraph(paragraph.extract_runs(self.errors))
|
203
204
|
target_section.add(target_paragraph)
|
204
205
|
|
205
206
|
def get_list_level(self, paragraph):
|
raw_docx/raw_image.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
import os
|
2
2
|
import base64
|
3
|
-
from
|
3
|
+
from simple_error_log import Errors
|
4
4
|
|
5
5
|
|
6
6
|
class RawImage:
|
7
7
|
FILE_TYPE_MAP = {".png": "png", ".jpg": "jpg", ".jpeg": "jpg"}
|
8
8
|
|
9
|
-
def __init__(self, filepath: str):
|
9
|
+
def __init__(self, filepath: str, errors: Errors):
|
10
|
+
self.errors = errors
|
10
11
|
self.filepath = filepath
|
11
12
|
|
12
13
|
def to_html(self):
|
@@ -21,7 +22,7 @@ class RawImage:
|
|
21
22
|
else:
|
22
23
|
return f"""<p style="color:red">Note: Unable to process embedded image of type '{file_extension}', image ignored.</p>"""
|
23
24
|
except Exception as e:
|
24
|
-
|
25
|
+
self.errors.exception("Exception converting image", e)
|
25
26
|
return (
|
26
27
|
"""<p style="color:red">Note: Error encountered processing image.</p>"""
|
27
28
|
)
|
raw_docx/raw_list.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
from .raw_list_item import RawListItem
|
2
|
-
from
|
2
|
+
from simple_error_log import Errors
|
3
3
|
|
4
4
|
|
5
5
|
class RawList:
|
6
|
-
def __init__(self, level=0):
|
6
|
+
def __init__(self, errors: Errors, level=0):
|
7
|
+
self.errors = errors
|
7
8
|
self.items = [] # List to store RawListItems and nested RawLists
|
8
9
|
self.level = level
|
9
10
|
|
@@ -13,15 +14,15 @@ class RawList:
|
|
13
14
|
elif item.level > self.level:
|
14
15
|
list = self.items[-1] if self.items else None
|
15
16
|
if not isinstance(list, RawList):
|
16
|
-
list = RawList(item.level)
|
17
|
+
list = RawList(self.errors, item.level)
|
17
18
|
self.items.append(list)
|
18
19
|
list.add(item)
|
19
20
|
if item.level > self.level + 1:
|
20
|
-
|
21
|
+
self.errors.warning(
|
21
22
|
f"Adding list item '{item}' to item but level jump greater than 1"
|
22
23
|
)
|
23
24
|
else:
|
24
|
-
|
25
|
+
self.errors.error(
|
25
26
|
f"Failed to add list item '{item}' to list '{self}', levels are in error"
|
26
27
|
)
|
27
28
|
|
raw_docx/raw_list_item.py
CHANGED
@@ -12,7 +12,8 @@ class RawListItem(RawParagraph):
|
|
12
12
|
return f"{' ' * self.level}{self.text}"
|
13
13
|
|
14
14
|
def to_html(self) -> str:
|
15
|
-
return f"{
|
15
|
+
return f"{self.text}"
|
16
|
+
# return f"{escape(self.text)}"
|
16
17
|
|
17
18
|
def to_dict(self) -> dict:
|
18
19
|
return {"type": "list_item", "text": self.text, "level": self.level}
|
raw_docx/raw_paragraph.py
CHANGED
@@ -11,7 +11,7 @@ class RawParagraph:
|
|
11
11
|
def to_html(self) -> str:
|
12
12
|
klass_list = " ".join(self.klasses)
|
13
13
|
open_tag = f'<p class="{klass_list}">' if self.klasses else "<p>"
|
14
|
-
return f"{open_tag}{
|
14
|
+
return f"{open_tag}{self.text}</p>"
|
15
15
|
|
16
16
|
def find(self, text: str) -> bool:
|
17
17
|
return True if text in self.text else False
|
@@ -31,5 +31,9 @@ class RawParagraph:
|
|
31
31
|
"classes": self.klasses,
|
32
32
|
}
|
33
33
|
|
34
|
+
def add_span(self, text: str, klass: str) -> None:
|
35
|
+
new_str = f'<span class="{klass}">{text}</span>'
|
36
|
+
self.text = new_str + self.text[len(text) :]
|
37
|
+
|
34
38
|
def _run_text(self) -> str:
|
35
39
|
return "".join([run.text for run in self.runs])
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: raw_docx
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.7.0
|
4
4
|
Summary: A package for processing and analyzing raw document formats
|
5
5
|
Home-page: https://github.com/daveih/raw_docx
|
6
6
|
Author: Dave Iberson-Hurst
|
@@ -18,12 +18,13 @@ Requires-Python: >=3.8
|
|
18
18
|
Description-Content-Type: text/markdown
|
19
19
|
License-File: LICENSE
|
20
20
|
Requires-Dist: python-docx
|
21
|
-
Requires-Dist:
|
21
|
+
Requires-Dist: simple_error_log
|
22
22
|
Dynamic: author
|
23
23
|
Dynamic: classifier
|
24
24
|
Dynamic: description
|
25
25
|
Dynamic: description-content-type
|
26
26
|
Dynamic: home-page
|
27
|
+
Dynamic: license-file
|
27
28
|
Dynamic: requires-dist
|
28
29
|
Dynamic: requires-python
|
29
30
|
Dynamic: summary
|
@@ -0,0 +1,20 @@
|
|
1
|
+
raw_docx/__info__.py,sha256=JyImHLdD9tOVOKODJRy37_fBqVJ9Nup6yLR4gOOCtz8,30
|
2
|
+
raw_docx/__init__.py,sha256=FE5cpoCK1EVhpz3LiOOs43l027PcuJN5RljdW0UWON0,591
|
3
|
+
raw_docx/raw_document.py,sha256=VLx0-Z9jGwdYHMU227AKaT8UDRY_OHD7b2BRuw71x6M,2340
|
4
|
+
raw_docx/raw_docx.py,sha256=VhmwkP1kO5Bjr1WnfCyJE6JL728ZEHvStlQ78VuFFxQ,10379
|
5
|
+
raw_docx/raw_image.py,sha256=IUUETwW73-guaa_v-cHpfw0_z69u9wfvEk7adm9hHJQ,1506
|
6
|
+
raw_docx/raw_list.py,sha256=bhssQX_oVf8uBmUbcrCIzIJ8pCvdEtdHOAQBNH0EEQQ,2282
|
7
|
+
raw_docx/raw_list_item.py,sha256=4Mn3rmnpXppJGAxk-9StLD60wszk5igg-TIbBz8sKW4,623
|
8
|
+
raw_docx/raw_paragraph.py,sha256=edFNwudoBWNWdj5b3ac0e6LFrFuZjU_ize70ToUGQN8,1233
|
9
|
+
raw_docx/raw_run.py,sha256=0PJHiZIm1QclZfjdsrPPLSL7_GYoX8jSa6JvcfcOcWc,479
|
10
|
+
raw_docx/raw_section.py,sha256=_ONvR5Fyuif4vZs1LnE7Y67pX29JKWM13YB8Wy8di9o,3942
|
11
|
+
raw_docx/raw_table.py,sha256=qm-Ap1AOHRuOxiUgHsI6uV4GeCNEJrE0Z3TZ8rXbffg,1579
|
12
|
+
raw_docx/raw_table_cell.py,sha256=pXe7FCfEmbqdktBGfkDrvMsbEie8FaGNQbRl_ooms0Q,1887
|
13
|
+
raw_docx/raw_table_row.py,sha256=m8SoLyVlKLjd_Vqa_U79A2wi8Wout8spgyusqJm79Kc,1297
|
14
|
+
raw_docx/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
+
raw_docx/docx/docx_paragraph.py,sha256=DPFzCG26y-6teL3KDnC_Ihmbs48OsHfD4fCD5Tj1O4A,2938
|
16
|
+
raw_docx-0.7.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
17
|
+
raw_docx-0.7.0.dist-info/METADATA,sha256=aNG1bN6ZnwCWqRv0y2x0-vNhmolaJb8qm04N-gtVvQI,1224
|
18
|
+
raw_docx-0.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
19
|
+
raw_docx-0.7.0.dist-info/top_level.txt,sha256=Xl3dspPM9DBVj8clfdkHG7N4nNjNXeUmB4HcXAwOe60,9
|
20
|
+
raw_docx-0.7.0.dist-info/RECORD,,
|
raw_docx/__version__.py
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
__package_version__ = "0.5.0"
|
raw_docx/raw_logger.py
DELETED
@@ -1,67 +0,0 @@
|
|
1
|
-
import sys
|
2
|
-
import logging
|
3
|
-
from pathlib import Path
|
4
|
-
from typing import Optional
|
5
|
-
from pythonjsonlogger import jsonlogger
|
6
|
-
|
7
|
-
|
8
|
-
class RawLogger:
|
9
|
-
_instance = None
|
10
|
-
_initialized = False
|
11
|
-
|
12
|
-
def __new__(cls):
|
13
|
-
if cls._instance is None:
|
14
|
-
cls._instance = super().__new__(cls)
|
15
|
-
return cls._instance
|
16
|
-
|
17
|
-
def __init__(self):
|
18
|
-
if not RawLogger._initialized:
|
19
|
-
self.logger = logging.getLogger("raw_docx")
|
20
|
-
self.logger.setLevel(logging.INFO)
|
21
|
-
|
22
|
-
# Create JSON formatter
|
23
|
-
formatter = jsonlogger.JsonFormatter(
|
24
|
-
fmt="%(asctime)s %(name)s %(levelname)s %(message)s",
|
25
|
-
datefmt="%Y-%m-%d %H:%M:%S",
|
26
|
-
)
|
27
|
-
|
28
|
-
# Console handler
|
29
|
-
console_handler = logging.StreamHandler(sys.stdout)
|
30
|
-
console_handler.setFormatter(formatter)
|
31
|
-
self.logger.addHandler(console_handler)
|
32
|
-
|
33
|
-
RawLogger._initialized = True
|
34
|
-
|
35
|
-
def setup_file_logging(self, log_dir: Optional[str] = None):
|
36
|
-
"""Setup file logging in addition to console logging"""
|
37
|
-
if log_dir:
|
38
|
-
log_path = Path(log_dir)
|
39
|
-
log_path.mkdir(parents=True, exist_ok=True)
|
40
|
-
file_handler = logging.FileHandler(log_path / "raw_docx.log")
|
41
|
-
file_handler.setFormatter(
|
42
|
-
jsonlogger.JsonFormatter(
|
43
|
-
fmt="%(asctime)s %(name)s %(levelname)s %(message)s",
|
44
|
-
datefmt="%Y-%m-%d %H:%M:%S",
|
45
|
-
)
|
46
|
-
)
|
47
|
-
self.logger.addHandler(file_handler)
|
48
|
-
|
49
|
-
def info(self, message: str):
|
50
|
-
"""Log info message"""
|
51
|
-
self.logger.info(message)
|
52
|
-
|
53
|
-
def warning(self, message: str):
|
54
|
-
"""Log warning message"""
|
55
|
-
self.logger.warning(message)
|
56
|
-
|
57
|
-
def error(self, message: str):
|
58
|
-
"""Log error message"""
|
59
|
-
self.logger.error(message)
|
60
|
-
|
61
|
-
def exception(self, message: str, exc: Exception):
|
62
|
-
"""Log exception with message"""
|
63
|
-
self.logger.exception(message, exc_info=exc)
|
64
|
-
|
65
|
-
|
66
|
-
# Create singleton instance
|
67
|
-
logger = RawLogger()
|
raw_docx-0.5.0.dist-info/RECORD
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
raw_docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
raw_docx/__version__.py,sha256=Wi_yZdt3UP6QrJT-sQRbjTz0HB_pmH5mlybhCDYuBGo,30
|
3
|
-
raw_docx/docx_paragraph.py,sha256=2A0fqVNAWswJpI35WfaY2o07hP7Ks83NcpltAyLU31w,2752
|
4
|
-
raw_docx/raw_document.py,sha256=VLx0-Z9jGwdYHMU227AKaT8UDRY_OHD7b2BRuw71x6M,2340
|
5
|
-
raw_docx/raw_docx.py,sha256=LA94jADMXhRL1lcaz0-Fs69FYtAJdQLPJ57Lu92ZVf0,10258
|
6
|
-
raw_docx/raw_image.py,sha256=GvR2hfgNkNnQCmEZ8SbMDWm-_CAyfk56eWF5l2OxQDw,1451
|
7
|
-
raw_docx/raw_list.py,sha256=wA84muLMViYsj4gxMbwWM_aAQ9loYRixKgwPppJghrE,2209
|
8
|
-
raw_docx/raw_list_item.py,sha256=L8b_eaag0aFisHozxW0dh8yynR5i-PhsltTtjHBnlhQ,591
|
9
|
-
raw_docx/raw_logger.py,sha256=jKc5Ph3SNbXjO6sNq_q6BuUcZuIuKk8pbHp7mIFWRXg,2059
|
10
|
-
raw_docx/raw_paragraph.py,sha256=GOxq4n68mpG11kxw5_89UmGqvTV_BSa8E9A15BaEroI,1075
|
11
|
-
raw_docx/raw_run.py,sha256=0PJHiZIm1QclZfjdsrPPLSL7_GYoX8jSa6JvcfcOcWc,479
|
12
|
-
raw_docx/raw_section.py,sha256=_ONvR5Fyuif4vZs1LnE7Y67pX29JKWM13YB8Wy8di9o,3942
|
13
|
-
raw_docx/raw_table.py,sha256=qm-Ap1AOHRuOxiUgHsI6uV4GeCNEJrE0Z3TZ8rXbffg,1579
|
14
|
-
raw_docx/raw_table_cell.py,sha256=pXe7FCfEmbqdktBGfkDrvMsbEie8FaGNQbRl_ooms0Q,1887
|
15
|
-
raw_docx/raw_table_row.py,sha256=m8SoLyVlKLjd_Vqa_U79A2wi8Wout8spgyusqJm79Kc,1297
|
16
|
-
raw_docx-0.5.0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
17
|
-
raw_docx-0.5.0.dist-info/METADATA,sha256=Ug8ah1qQKDF8KPvZqp0FD0cxOPfOOy0i1JhFIq_x_c0,1204
|
18
|
-
raw_docx-0.5.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
19
|
-
raw_docx-0.5.0.dist-info/top_level.txt,sha256=Xl3dspPM9DBVj8clfdkHG7N4nNjNXeUmB4HcXAwOe60,9
|
20
|
-
raw_docx-0.5.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|