sharedkernel 1.7.3__tar.gz → 1.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/PKG-INFO +9 -1
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/README.md +4 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/setup.py +6 -2
- sharedkernel-1.9.0/sharedkernel/data_format_converter.py +108 -0
- sharedkernel-1.9.0/sharedkernel/normalizer/__init__.py +2 -0
- sharedkernel-1.9.0/sharedkernel/normalizer/number_normalizer.py +8 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/s3_uploader.py +0 -1
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel.egg-info/PKG-INFO +9 -1
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel.egg-info/SOURCES.txt +2 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel.egg-info/requires.txt +4 -0
- sharedkernel-1.7.3/sharedkernel/normalizer/__init__.py +0 -1
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/setup.cfg +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/common.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/database/__init__.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/database/mongo_generic_repository.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/database/vector_database_repository/__init__.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/database/vector_database_repository/chroma_startegy.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/database/vector_database_repository/vector_database_repository.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/database/vector_database_repository/vector_database_strategy.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/date_converter.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/enum/__init__.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/enum/error_code.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/enum/vector_database_type.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/exception/__init__.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/exception/exception.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/exception/exception_handlers.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/jwt_service.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/normalizer/phone_number_normalizer.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/objects/__init__.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/objects/base_document.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/objects/jwt_model.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/objects/result.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/regex_masking.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/string_extentions.py +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel.egg-info/dependency_links.txt +0 -0
- {sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sharedkernel
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.9.0
|
|
4
4
|
Summary: sharekernel is a shared package between all python projects
|
|
5
5
|
Author: Smilinno
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -15,11 +15,19 @@ Requires-Dist: sentry-sdk
|
|
|
15
15
|
Requires-Dist: jdatetime
|
|
16
16
|
Requires-Dist: persiantools
|
|
17
17
|
Requires-Dist: boto3
|
|
18
|
+
Requires-Dist: python-docx
|
|
19
|
+
Requires-Dist: mammoth
|
|
20
|
+
Requires-Dist: markdown
|
|
21
|
+
Requires-Dist: beautifulsoup4
|
|
18
22
|
|
|
19
23
|
# SharedKernel
|
|
20
24
|
this a shared kernel package
|
|
21
25
|
|
|
22
26
|
# Change Log
|
|
27
|
+
### Version 1.9.0
|
|
28
|
+
- Implement DataFormatConverter
|
|
29
|
+
### Version 1.8.0
|
|
30
|
+
- Implement persian number normalizer
|
|
23
31
|
### Version 1.7.3
|
|
24
32
|
- Add optional folder name for s3 uploader to save the file in
|
|
25
33
|
### Version 1.7.2
|
|
@@ -2,6 +2,10 @@
|
|
|
2
2
|
this a shared kernel package
|
|
3
3
|
|
|
4
4
|
# Change Log
|
|
5
|
+
### Version 1.9.0
|
|
6
|
+
- Implement DataFormatConverter
|
|
7
|
+
### Version 1.8.0
|
|
8
|
+
- Implement persian number normalizer
|
|
5
9
|
### Version 1.7.3
|
|
6
10
|
- Add optional folder name for s3 uploader to save the file in
|
|
7
11
|
### Version 1.7.2
|
|
@@ -32,10 +32,14 @@ setup(
|
|
|
32
32
|
"sentry-sdk",
|
|
33
33
|
"jdatetime",
|
|
34
34
|
"persiantools",
|
|
35
|
-
"boto3"
|
|
35
|
+
"boto3",
|
|
36
|
+
"python-docx",
|
|
37
|
+
"mammoth",
|
|
38
|
+
"markdown",
|
|
39
|
+
"beautifulsoup4"
|
|
36
40
|
],
|
|
37
41
|
# *strongly* suggested for sharing
|
|
38
|
-
version="1.
|
|
42
|
+
version="1.9.0",
|
|
39
43
|
description="sharekernel is a shared package between all python projects",
|
|
40
44
|
long_description=long_description,
|
|
41
45
|
long_description_content_type="text/markdown",
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
import markdown
|
|
3
|
+
|
|
4
|
+
from docx import Document
|
|
5
|
+
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
|
6
|
+
from docx.oxml.ns import qn
|
|
7
|
+
from docx.oxml import OxmlElement
|
|
8
|
+
from bs4 import BeautifulSoup, NavigableString
|
|
9
|
+
import mammoth
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DataFormatConverter(ABC):
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def convert(input_path, output_path):
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
def _get_out_title(self, title: str, file_format: str = ".docx") -> str:
|
|
18
|
+
return ".".join(title.split(".")[:-1]) + file_format
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Md2Docx(DataFormatConverter):
|
|
22
|
+
|
|
23
|
+
def __add_hyperlink(self, paragraph, url, text, color="0000FF", underline=True):
|
|
24
|
+
r_id = paragraph.part.relate_to(url, "hyperlink", is_external=True)
|
|
25
|
+
hyperlink = OxmlElement("w:hyperlink")
|
|
26
|
+
hyperlink.set(qn("r:id"), r_id)
|
|
27
|
+
|
|
28
|
+
new_run, rPr = OxmlElement("w:r"), OxmlElement("w:rPr")
|
|
29
|
+
if color:
|
|
30
|
+
color_element = OxmlElement("w:color")
|
|
31
|
+
color_element.set(qn("w:val"), color)
|
|
32
|
+
rPr.append(color_element)
|
|
33
|
+
if underline:
|
|
34
|
+
u = OxmlElement("w:u")
|
|
35
|
+
u.set(qn("w:val"), "single")
|
|
36
|
+
rPr.append(u)
|
|
37
|
+
new_run.append(rPr)
|
|
38
|
+
|
|
39
|
+
text_elem = OxmlElement("w:t")
|
|
40
|
+
text_elem.text = text
|
|
41
|
+
new_run.append(text_elem)
|
|
42
|
+
hyperlink.append(new_run)
|
|
43
|
+
paragraph._p.append(hyperlink)
|
|
44
|
+
|
|
45
|
+
def __set_right_to_left(self, paragraph):
|
|
46
|
+
paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
|
47
|
+
paragraph.paragraph_format.right_to_left = True
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def convert(input_path, output_path=None):
|
|
51
|
+
md2docx = Md2Docx()
|
|
52
|
+
|
|
53
|
+
with open(input_path, "r", encoding="utf-8") as f:
|
|
54
|
+
md_content = f.read()
|
|
55
|
+
|
|
56
|
+
output_file = output_path or md2docx._get_out_title(input_path)
|
|
57
|
+
|
|
58
|
+
html_content = markdown.markdown(md_content)
|
|
59
|
+
soup = BeautifulSoup(html_content, "html.parser")
|
|
60
|
+
doc = Document()
|
|
61
|
+
|
|
62
|
+
for element in soup:
|
|
63
|
+
if element.name == "h1":
|
|
64
|
+
paragraph = doc.add_paragraph(element.get_text(), style="Heading 1")
|
|
65
|
+
elif element.name == "h2":
|
|
66
|
+
paragraph = doc.add_paragraph(element.get_text(), style="Heading 2")
|
|
67
|
+
elif element.name == "p":
|
|
68
|
+
paragraph = doc.add_paragraph()
|
|
69
|
+
for part in element.contents:
|
|
70
|
+
if isinstance(part, NavigableString):
|
|
71
|
+
paragraph.add_run(str(part))
|
|
72
|
+
elif part.name == "a":
|
|
73
|
+
md2docx.__add_hyperlink(
|
|
74
|
+
paragraph, part["href"], part.get_text()
|
|
75
|
+
)
|
|
76
|
+
elif part.name == "img":
|
|
77
|
+
md2docx.__add_hyperlink(
|
|
78
|
+
paragraph, part["src"], part.get("alt", "Image")
|
|
79
|
+
)
|
|
80
|
+
md2docx.__set_right_to_left(paragraph)
|
|
81
|
+
|
|
82
|
+
doc.save(output_file)
|
|
83
|
+
return output_file
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class Docx2Md(DataFormatConverter):
|
|
87
|
+
def _get_out_title(self, title: str, file_format: str = ".txt") -> str:
|
|
88
|
+
return super()._get_out_title(title, file_format)
|
|
89
|
+
|
|
90
|
+
@staticmethod
|
|
91
|
+
def convert(input_path: str, output_path: str = None):
|
|
92
|
+
docx2md = Docx2Md()
|
|
93
|
+
with open(input_path, "rb") as docx:
|
|
94
|
+
result = mammoth.convert_to_markdown(
|
|
95
|
+
docx,
|
|
96
|
+
convert_image=mammoth.images.img_element(
|
|
97
|
+
lambda image: {"src": "images/" + image.content_type.split("/")[-1]}
|
|
98
|
+
),
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
output_path = output_path or docx2md._get_out_title(input_path)
|
|
102
|
+
|
|
103
|
+
with open(output_path, "w", encoding="utf-8") as md_file:
|
|
104
|
+
md_file.write(result.value)
|
|
105
|
+
|
|
106
|
+
if result.messages:
|
|
107
|
+
for message in result.messages:
|
|
108
|
+
print("Warning:", message)
|
|
@@ -51,7 +51,6 @@ class S3Uploader:
|
|
|
51
51
|
|
|
52
52
|
# Upload the file object
|
|
53
53
|
self.s3.upload_fileobj(file_obj, self.bucket, object_name, ExtraArgs={'ACL':'public-read'})
|
|
54
|
-
print(f"File object uploaded to {self.bucket} as {object_name}.")
|
|
55
54
|
|
|
56
55
|
# Construct the URL of the uploaded file
|
|
57
56
|
file_url = f"{self.endpoint_url}/{self.bucket}/{object_name}"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sharedkernel
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.9.0
|
|
4
4
|
Summary: sharekernel is a shared package between all python projects
|
|
5
5
|
Author: Smilinno
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -15,11 +15,19 @@ Requires-Dist: sentry-sdk
|
|
|
15
15
|
Requires-Dist: jdatetime
|
|
16
16
|
Requires-Dist: persiantools
|
|
17
17
|
Requires-Dist: boto3
|
|
18
|
+
Requires-Dist: python-docx
|
|
19
|
+
Requires-Dist: mammoth
|
|
20
|
+
Requires-Dist: markdown
|
|
21
|
+
Requires-Dist: beautifulsoup4
|
|
18
22
|
|
|
19
23
|
# SharedKernel
|
|
20
24
|
this a shared kernel package
|
|
21
25
|
|
|
22
26
|
# Change Log
|
|
27
|
+
### Version 1.9.0
|
|
28
|
+
- Implement DataFormatConverter
|
|
29
|
+
### Version 1.8.0
|
|
30
|
+
- Implement persian number normalizer
|
|
23
31
|
### Version 1.7.3
|
|
24
32
|
- Add optional folder name for s3 uploader to save the file in
|
|
25
33
|
### Version 1.7.2
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
README.md
|
|
2
2
|
setup.py
|
|
3
3
|
sharedkernel/common.py
|
|
4
|
+
sharedkernel/data_format_converter.py
|
|
4
5
|
sharedkernel/date_converter.py
|
|
5
6
|
sharedkernel/jwt_service.py
|
|
6
7
|
sharedkernel/regex_masking.py
|
|
@@ -24,6 +25,7 @@ sharedkernel/exception/__init__.py
|
|
|
24
25
|
sharedkernel/exception/exception.py
|
|
25
26
|
sharedkernel/exception/exception_handlers.py
|
|
26
27
|
sharedkernel/normalizer/__init__.py
|
|
28
|
+
sharedkernel/normalizer/number_normalizer.py
|
|
27
29
|
sharedkernel/normalizer/phone_number_normalizer.py
|
|
28
30
|
sharedkernel/objects/__init__.py
|
|
29
31
|
sharedkernel/objects/base_document.py
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from .phone_number_normalizer import PhoneNumberNormalizer
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sharedkernel-1.7.3 → sharedkernel-1.9.0}/sharedkernel/normalizer/phone_number_normalizer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|