sharedkernel 1.8.0__tar.gz → 1.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/PKG-INFO +7 -1
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/README.md +2 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/setup.py +6 -2
- sharedkernel-1.9.0/sharedkernel/data_format_converter.py +108 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel.egg-info/PKG-INFO +7 -1
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel.egg-info/SOURCES.txt +1 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel.egg-info/requires.txt +4 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/setup.cfg +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/common.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/database/__init__.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/database/mongo_generic_repository.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/database/vector_database_repository/__init__.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/database/vector_database_repository/chroma_startegy.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/database/vector_database_repository/vector_database_repository.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/database/vector_database_repository/vector_database_strategy.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/date_converter.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/enum/__init__.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/enum/error_code.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/enum/vector_database_type.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/exception/__init__.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/exception/exception.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/exception/exception_handlers.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/jwt_service.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/normalizer/__init__.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/normalizer/number_normalizer.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/normalizer/phone_number_normalizer.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/objects/__init__.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/objects/base_document.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/objects/jwt_model.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/objects/result.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/regex_masking.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/s3_uploader.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/string_extentions.py +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel.egg-info/dependency_links.txt +0 -0
- {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sharedkernel
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.9.0
|
|
4
4
|
Summary: sharekernel is a shared package between all python projects
|
|
5
5
|
Author: Smilinno
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -15,11 +15,17 @@ Requires-Dist: sentry-sdk
|
|
|
15
15
|
Requires-Dist: jdatetime
|
|
16
16
|
Requires-Dist: persiantools
|
|
17
17
|
Requires-Dist: boto3
|
|
18
|
+
Requires-Dist: python-docx
|
|
19
|
+
Requires-Dist: mammoth
|
|
20
|
+
Requires-Dist: markdown
|
|
21
|
+
Requires-Dist: beautifulsoup4
|
|
18
22
|
|
|
19
23
|
# SharedKernel
|
|
20
24
|
this a shared kernel package
|
|
21
25
|
|
|
22
26
|
# Change Log
|
|
27
|
+
### Version 1.9.0
|
|
28
|
+
- Implement DataFormatConverter
|
|
23
29
|
### Version 1.8.0
|
|
24
30
|
- Implement persian number normalizer
|
|
25
31
|
### Version 1.7.3
|
|
@@ -32,10 +32,14 @@ setup(
|
|
|
32
32
|
"sentry-sdk",
|
|
33
33
|
"jdatetime",
|
|
34
34
|
"persiantools",
|
|
35
|
-
"boto3"
|
|
35
|
+
"boto3",
|
|
36
|
+
"python-docx",
|
|
37
|
+
"mammoth",
|
|
38
|
+
"markdown",
|
|
39
|
+
"beautifulsoup4"
|
|
36
40
|
],
|
|
37
41
|
# *strongly* suggested for sharing
|
|
38
|
-
version="1.
|
|
42
|
+
version="1.9.0",
|
|
39
43
|
description="sharekernel is a shared package between all python projects",
|
|
40
44
|
long_description=long_description,
|
|
41
45
|
long_description_content_type="text/markdown",
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
import markdown
|
|
3
|
+
|
|
4
|
+
from docx import Document
|
|
5
|
+
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
|
6
|
+
from docx.oxml.ns import qn
|
|
7
|
+
from docx.oxml import OxmlElement
|
|
8
|
+
from bs4 import BeautifulSoup, NavigableString
|
|
9
|
+
import mammoth
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DataFormatConverter(ABC):
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def convert(input_path, output_path):
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
def _get_out_title(self, title: str, file_format: str = ".docx") -> str:
|
|
18
|
+
return ".".join(title.split(".")[:-1]) + file_format
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Md2Docx(DataFormatConverter):
|
|
22
|
+
|
|
23
|
+
def __add_hyperlink(self, paragraph, url, text, color="0000FF", underline=True):
|
|
24
|
+
r_id = paragraph.part.relate_to(url, "hyperlink", is_external=True)
|
|
25
|
+
hyperlink = OxmlElement("w:hyperlink")
|
|
26
|
+
hyperlink.set(qn("r:id"), r_id)
|
|
27
|
+
|
|
28
|
+
new_run, rPr = OxmlElement("w:r"), OxmlElement("w:rPr")
|
|
29
|
+
if color:
|
|
30
|
+
color_element = OxmlElement("w:color")
|
|
31
|
+
color_element.set(qn("w:val"), color)
|
|
32
|
+
rPr.append(color_element)
|
|
33
|
+
if underline:
|
|
34
|
+
u = OxmlElement("w:u")
|
|
35
|
+
u.set(qn("w:val"), "single")
|
|
36
|
+
rPr.append(u)
|
|
37
|
+
new_run.append(rPr)
|
|
38
|
+
|
|
39
|
+
text_elem = OxmlElement("w:t")
|
|
40
|
+
text_elem.text = text
|
|
41
|
+
new_run.append(text_elem)
|
|
42
|
+
hyperlink.append(new_run)
|
|
43
|
+
paragraph._p.append(hyperlink)
|
|
44
|
+
|
|
45
|
+
def __set_right_to_left(self, paragraph):
|
|
46
|
+
paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
|
47
|
+
paragraph.paragraph_format.right_to_left = True
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def convert(input_path, output_path=None):
|
|
51
|
+
md2docx = Md2Docx()
|
|
52
|
+
|
|
53
|
+
with open(input_path, "r", encoding="utf-8") as f:
|
|
54
|
+
md_content = f.read()
|
|
55
|
+
|
|
56
|
+
output_file = output_path or md2docx._get_out_title(input_path)
|
|
57
|
+
|
|
58
|
+
html_content = markdown.markdown(md_content)
|
|
59
|
+
soup = BeautifulSoup(html_content, "html.parser")
|
|
60
|
+
doc = Document()
|
|
61
|
+
|
|
62
|
+
for element in soup:
|
|
63
|
+
if element.name == "h1":
|
|
64
|
+
paragraph = doc.add_paragraph(element.get_text(), style="Heading 1")
|
|
65
|
+
elif element.name == "h2":
|
|
66
|
+
paragraph = doc.add_paragraph(element.get_text(), style="Heading 2")
|
|
67
|
+
elif element.name == "p":
|
|
68
|
+
paragraph = doc.add_paragraph()
|
|
69
|
+
for part in element.contents:
|
|
70
|
+
if isinstance(part, NavigableString):
|
|
71
|
+
paragraph.add_run(str(part))
|
|
72
|
+
elif part.name == "a":
|
|
73
|
+
md2docx.__add_hyperlink(
|
|
74
|
+
paragraph, part["href"], part.get_text()
|
|
75
|
+
)
|
|
76
|
+
elif part.name == "img":
|
|
77
|
+
md2docx.__add_hyperlink(
|
|
78
|
+
paragraph, part["src"], part.get("alt", "Image")
|
|
79
|
+
)
|
|
80
|
+
md2docx.__set_right_to_left(paragraph)
|
|
81
|
+
|
|
82
|
+
doc.save(output_file)
|
|
83
|
+
return output_file
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class Docx2Md(DataFormatConverter):
|
|
87
|
+
def _get_out_title(self, title: str, file_format: str = ".txt") -> str:
|
|
88
|
+
return super()._get_out_title(title, file_format)
|
|
89
|
+
|
|
90
|
+
@staticmethod
|
|
91
|
+
def convert(input_path: str, output_path: str = None):
|
|
92
|
+
docx2md = Docx2Md()
|
|
93
|
+
with open(input_path, "rb") as docx:
|
|
94
|
+
result = mammoth.convert_to_markdown(
|
|
95
|
+
docx,
|
|
96
|
+
convert_image=mammoth.images.img_element(
|
|
97
|
+
lambda image: {"src": "images/" + image.content_type.split("/")[-1]}
|
|
98
|
+
),
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
output_path = output_path or docx2md._get_out_title(input_path)
|
|
102
|
+
|
|
103
|
+
with open(output_path, "w", encoding="utf-8") as md_file:
|
|
104
|
+
md_file.write(result.value)
|
|
105
|
+
|
|
106
|
+
if result.messages:
|
|
107
|
+
for message in result.messages:
|
|
108
|
+
print("Warning:", message)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sharedkernel
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.9.0
|
|
4
4
|
Summary: sharekernel is a shared package between all python projects
|
|
5
5
|
Author: Smilinno
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -15,11 +15,17 @@ Requires-Dist: sentry-sdk
|
|
|
15
15
|
Requires-Dist: jdatetime
|
|
16
16
|
Requires-Dist: persiantools
|
|
17
17
|
Requires-Dist: boto3
|
|
18
|
+
Requires-Dist: python-docx
|
|
19
|
+
Requires-Dist: mammoth
|
|
20
|
+
Requires-Dist: markdown
|
|
21
|
+
Requires-Dist: beautifulsoup4
|
|
18
22
|
|
|
19
23
|
# SharedKernel
|
|
20
24
|
this a shared kernel package
|
|
21
25
|
|
|
22
26
|
# Change Log
|
|
27
|
+
### Version 1.9.0
|
|
28
|
+
- Implement DataFormatConverter
|
|
23
29
|
### Version 1.8.0
|
|
24
30
|
- Implement persian number normalizer
|
|
25
31
|
### Version 1.7.3
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/normalizer/phone_number_normalizer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|