sharedkernel 1.8.0__tar.gz → 1.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/PKG-INFO +7 -1
  2. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/README.md +2 -0
  3. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/setup.py +6 -2
  4. sharedkernel-1.9.0/sharedkernel/data_format_converter.py +108 -0
  5. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel.egg-info/PKG-INFO +7 -1
  6. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel.egg-info/SOURCES.txt +1 -0
  7. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel.egg-info/requires.txt +4 -0
  8. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/setup.cfg +0 -0
  9. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/common.py +0 -0
  10. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/database/__init__.py +0 -0
  11. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/database/mongo_generic_repository.py +0 -0
  12. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/database/vector_database_repository/__init__.py +0 -0
  13. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/database/vector_database_repository/chroma_startegy.py +0 -0
  14. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/database/vector_database_repository/vector_database_repository.py +0 -0
  15. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/database/vector_database_repository/vector_database_strategy.py +0 -0
  16. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/date_converter.py +0 -0
  17. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/enum/__init__.py +0 -0
  18. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/enum/error_code.py +0 -0
  19. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/enum/vector_database_type.py +0 -0
  20. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/exception/__init__.py +0 -0
  21. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/exception/exception.py +0 -0
  22. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/exception/exception_handlers.py +0 -0
  23. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/jwt_service.py +0 -0
  24. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/normalizer/__init__.py +0 -0
  25. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/normalizer/number_normalizer.py +0 -0
  26. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/normalizer/phone_number_normalizer.py +0 -0
  27. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/objects/__init__.py +0 -0
  28. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/objects/base_document.py +0 -0
  29. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/objects/jwt_model.py +0 -0
  30. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/objects/result.py +0 -0
  31. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/regex_masking.py +0 -0
  32. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/s3_uploader.py +0 -0
  33. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel/string_extentions.py +0 -0
  34. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel.egg-info/dependency_links.txt +0 -0
  35. {sharedkernel-1.8.0 → sharedkernel-1.9.0}/sharedkernel.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sharedkernel
3
- Version: 1.8.0
3
+ Version: 1.9.0
4
4
  Summary: sharekernel is a shared package between all python projects
5
5
  Author: Smilinno
6
6
  Description-Content-Type: text/markdown
@@ -15,11 +15,17 @@ Requires-Dist: sentry-sdk
15
15
  Requires-Dist: jdatetime
16
16
  Requires-Dist: persiantools
17
17
  Requires-Dist: boto3
18
+ Requires-Dist: python-docx
19
+ Requires-Dist: mammoth
20
+ Requires-Dist: markdown
21
+ Requires-Dist: beautifulsoup4
18
22
 
19
23
  # SharedKernel
20
24
  this a shared kernel package
21
25
 
22
26
  # Change Log
27
+ ### Version 1.9.0
28
+ - Implement DataFormatConverter
23
29
  ### Version 1.8.0
24
30
  - Implement persian number normalizer
25
31
  ### Version 1.7.3
@@ -2,6 +2,8 @@
2
2
  this a shared kernel package
3
3
 
4
4
  # Change Log
5
+ ### Version 1.9.0
6
+ - Implement DataFormatConverter
5
7
  ### Version 1.8.0
6
8
  - Implement persian number normalizer
7
9
  ### Version 1.7.3
@@ -32,10 +32,14 @@ setup(
32
32
  "sentry-sdk",
33
33
  "jdatetime",
34
34
  "persiantools",
35
- "boto3"
35
+ "boto3",
36
+ "python-docx",
37
+ "mammoth",
38
+ "markdown",
39
+ "beautifulsoup4"
36
40
  ],
37
41
  # *strongly* suggested for sharing
38
- version="1.8.0",
42
+ version="1.9.0",
39
43
  description="sharekernel is a shared package between all python projects",
40
44
  long_description=long_description,
41
45
  long_description_content_type="text/markdown",
@@ -0,0 +1,108 @@
1
+ from abc import ABC, abstractmethod
2
+ import markdown
3
+
4
+ from docx import Document
5
+ from docx.enum.text import WD_ALIGN_PARAGRAPH
6
+ from docx.oxml.ns import qn
7
+ from docx.oxml import OxmlElement
8
+ from bs4 import BeautifulSoup, NavigableString
9
+ import mammoth
10
+
11
+
12
+ class DataFormatConverter(ABC):
13
+ @abstractmethod
14
+ def convert(input_path, output_path):
15
+ pass
16
+
17
+ def _get_out_title(self, title: str, file_format: str = ".docx") -> str:
18
+ return ".".join(title.split(".")[:-1]) + file_format
19
+
20
+
21
+ class Md2Docx(DataFormatConverter):
22
+
23
+ def __add_hyperlink(self, paragraph, url, text, color="0000FF", underline=True):
24
+ r_id = paragraph.part.relate_to(url, "hyperlink", is_external=True)
25
+ hyperlink = OxmlElement("w:hyperlink")
26
+ hyperlink.set(qn("r:id"), r_id)
27
+
28
+ new_run, rPr = OxmlElement("w:r"), OxmlElement("w:rPr")
29
+ if color:
30
+ color_element = OxmlElement("w:color")
31
+ color_element.set(qn("w:val"), color)
32
+ rPr.append(color_element)
33
+ if underline:
34
+ u = OxmlElement("w:u")
35
+ u.set(qn("w:val"), "single")
36
+ rPr.append(u)
37
+ new_run.append(rPr)
38
+
39
+ text_elem = OxmlElement("w:t")
40
+ text_elem.text = text
41
+ new_run.append(text_elem)
42
+ hyperlink.append(new_run)
43
+ paragraph._p.append(hyperlink)
44
+
45
+ def __set_right_to_left(self, paragraph):
46
+ paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
47
+ paragraph.paragraph_format.right_to_left = True
48
+
49
+ @staticmethod
50
+ def convert(input_path, output_path=None):
51
+ md2docx = Md2Docx()
52
+
53
+ with open(input_path, "r", encoding="utf-8") as f:
54
+ md_content = f.read()
55
+
56
+ output_file = output_path or md2docx._get_out_title(input_path)
57
+
58
+ html_content = markdown.markdown(md_content)
59
+ soup = BeautifulSoup(html_content, "html.parser")
60
+ doc = Document()
61
+
62
+ for element in soup:
63
+ if element.name == "h1":
64
+ paragraph = doc.add_paragraph(element.get_text(), style="Heading 1")
65
+ elif element.name == "h2":
66
+ paragraph = doc.add_paragraph(element.get_text(), style="Heading 2")
67
+ elif element.name == "p":
68
+ paragraph = doc.add_paragraph()
69
+ for part in element.contents:
70
+ if isinstance(part, NavigableString):
71
+ paragraph.add_run(str(part))
72
+ elif part.name == "a":
73
+ md2docx.__add_hyperlink(
74
+ paragraph, part["href"], part.get_text()
75
+ )
76
+ elif part.name == "img":
77
+ md2docx.__add_hyperlink(
78
+ paragraph, part["src"], part.get("alt", "Image")
79
+ )
80
+ md2docx.__set_right_to_left(paragraph)
81
+
82
+ doc.save(output_file)
83
+ return output_file
84
+
85
+
86
+ class Docx2Md(DataFormatConverter):
87
+ def _get_out_title(self, title: str, file_format: str = ".txt") -> str:
88
+ return super()._get_out_title(title, file_format)
89
+
90
+ @staticmethod
91
+ def convert(input_path: str, output_path: str = None):
92
+ docx2md = Docx2Md()
93
+ with open(input_path, "rb") as docx:
94
+ result = mammoth.convert_to_markdown(
95
+ docx,
96
+ convert_image=mammoth.images.img_element(
97
+ lambda image: {"src": "images/" + image.content_type.split("/")[-1]}
98
+ ),
99
+ )
100
+
101
+ output_path = output_path or docx2md._get_out_title(input_path)
102
+
103
+ with open(output_path, "w", encoding="utf-8") as md_file:
104
+ md_file.write(result.value)
105
+
106
+ if result.messages:
107
+ for message in result.messages:
108
+ print("Warning:", message)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sharedkernel
3
- Version: 1.8.0
3
+ Version: 1.9.0
4
4
  Summary: sharekernel is a shared package between all python projects
5
5
  Author: Smilinno
6
6
  Description-Content-Type: text/markdown
@@ -15,11 +15,17 @@ Requires-Dist: sentry-sdk
15
15
  Requires-Dist: jdatetime
16
16
  Requires-Dist: persiantools
17
17
  Requires-Dist: boto3
18
+ Requires-Dist: python-docx
19
+ Requires-Dist: mammoth
20
+ Requires-Dist: markdown
21
+ Requires-Dist: beautifulsoup4
18
22
 
19
23
  # SharedKernel
20
24
  this a shared kernel package
21
25
 
22
26
  # Change Log
27
+ ### Version 1.9.0
28
+ - Implement DataFormatConverter
23
29
  ### Version 1.8.0
24
30
  - Implement persian number normalizer
25
31
  ### Version 1.7.3
@@ -1,6 +1,7 @@
1
1
  README.md
2
2
  setup.py
3
3
  sharedkernel/common.py
4
+ sharedkernel/data_format_converter.py
4
5
  sharedkernel/date_converter.py
5
6
  sharedkernel/jwt_service.py
6
7
  sharedkernel/regex_masking.py
@@ -9,3 +9,7 @@ sentry-sdk
9
9
  jdatetime
10
10
  persiantools
11
11
  boto3
12
+ python-docx
13
+ mammoth
14
+ markdown
15
+ beautifulsoup4
File without changes