sharedkernel 1.8.0__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/PKG-INFO +11 -2
  2. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/README.md +6 -0
  3. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/setup.py +6 -5
  4. sharedkernel-2.0.0/sharedkernel/data_format_converter.py +108 -0
  5. sharedkernel-2.0.0/sharedkernel/database/__init__.py +1 -0
  6. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/database/mongo_generic_repository.py +2 -3
  7. sharedkernel-2.0.0/sharedkernel/enum/error_code.py +10 -0
  8. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/objects/base_document.py +2 -3
  9. sharedkernel-2.0.0/sharedkernel/objects/json_string_model.py +27 -0
  10. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/objects/result.py +7 -11
  11. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel.egg-info/PKG-INFO +11 -2
  12. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel.egg-info/SOURCES.txt +2 -5
  13. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel.egg-info/requires.txt +4 -1
  14. sharedkernel-1.8.0/sharedkernel/database/__init__.py +0 -2
  15. sharedkernel-1.8.0/sharedkernel/database/vector_database_repository/__init__.py +0 -2
  16. sharedkernel-1.8.0/sharedkernel/database/vector_database_repository/chroma_startegy.py +0 -39
  17. sharedkernel-1.8.0/sharedkernel/database/vector_database_repository/vector_database_repository.py +0 -28
  18. sharedkernel-1.8.0/sharedkernel/database/vector_database_repository/vector_database_strategy.py +0 -22
  19. sharedkernel-1.8.0/sharedkernel/enum/error_code.py +0 -14
  20. sharedkernel-1.8.0/sharedkernel/enum/vector_database_type.py +0 -5
  21. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/setup.cfg +0 -0
  22. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/common.py +0 -0
  23. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/date_converter.py +0 -0
  24. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/enum/__init__.py +0 -0
  25. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/exception/__init__.py +0 -0
  26. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/exception/exception.py +0 -0
  27. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/exception/exception_handlers.py +0 -0
  28. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/jwt_service.py +0 -0
  29. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/normalizer/__init__.py +0 -0
  30. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/normalizer/number_normalizer.py +0 -0
  31. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/normalizer/phone_number_normalizer.py +0 -0
  32. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/objects/__init__.py +0 -0
  33. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/objects/jwt_model.py +0 -0
  34. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/regex_masking.py +0 -0
  35. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/s3_uploader.py +0 -0
  36. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel/string_extentions.py +0 -0
  37. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel.egg-info/dependency_links.txt +0 -0
  38. {sharedkernel-1.8.0 → sharedkernel-2.0.0}/sharedkernel.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sharedkernel
3
- Version: 1.8.0
3
+ Version: 2.0.0
4
4
  Summary: sharekernel is a shared package between all python projects
5
5
  Author: Smilinno
6
6
  Description-Content-Type: text/markdown
@@ -9,17 +9,26 @@ Requires-Dist: requests
9
9
  Requires-Dist: pymongo
10
10
  Requires-Dist: fastapi==0.111.0
11
11
  Requires-Dist: PyJWT
12
- Requires-Dist: chromadb
13
12
  Requires-Dist: persian_tools
14
13
  Requires-Dist: sentry-sdk
15
14
  Requires-Dist: jdatetime
16
15
  Requires-Dist: persiantools
17
16
  Requires-Dist: boto3
17
+ Requires-Dist: python-docx
18
+ Requires-Dist: mammoth
19
+ Requires-Dist: markdown
20
+ Requires-Dist: beautifulsoup4
18
21
 
19
22
  # SharedKernel
20
23
  this a shared kernel package
21
24
 
22
25
  # Change Log
26
+ ### Version 2.0.0
27
+ - Update pydantic version
28
+ - Delete vector databases
29
+ - Implement JsonStringModel
30
+ ### Version 1.9.0
31
+ - Implement DataFormatConverter
23
32
  ### Version 1.8.0
24
33
  - Implement persian number normalizer
25
34
  ### Version 1.7.3
@@ -2,6 +2,12 @@
2
2
  this a shared kernel package
3
3
 
4
4
  # Change Log
5
+ ### Version 2.0.0
6
+ - Update pydantic version
7
+ - Delete vector databases
8
+ - Implement JsonStringModel
9
+ ### Version 1.9.0
10
+ - Implement DataFormatConverter
5
11
  ### Version 1.8.0
6
12
  - Implement persian number normalizer
7
13
  ### Version 1.7.3
@@ -13,7 +13,6 @@ setup(
13
13
  packages=[
14
14
  "sharedkernel",
15
15
  "sharedkernel.database",
16
- "sharedkernel.database.vector_database_repository",
17
16
  "sharedkernel.enum",
18
17
  "sharedkernel.exception",
19
18
  "sharedkernel.objects",
@@ -26,16 +25,18 @@ setup(
26
25
  "pymongo",
27
26
  "fastapi==0.111.0",
28
27
  "PyJWT",
29
- #"pymilvus",
30
- "chromadb",
31
28
  "persian_tools",
32
29
  "sentry-sdk",
33
30
  "jdatetime",
34
31
  "persiantools",
35
- "boto3"
32
+ "boto3",
33
+ "python-docx",
34
+ "mammoth",
35
+ "markdown",
36
+ "beautifulsoup4"
36
37
  ],
37
38
  # *strongly* suggested for sharing
38
- version="1.8.0",
39
+ version="2.0.0",
39
40
  description="sharekernel is a shared package between all python projects",
40
41
  long_description=long_description,
41
42
  long_description_content_type="text/markdown",
@@ -0,0 +1,108 @@
1
+ from abc import ABC, abstractmethod
2
+ import markdown
3
+
4
+ from docx import Document
5
+ from docx.enum.text import WD_ALIGN_PARAGRAPH
6
+ from docx.oxml.ns import qn
7
+ from docx.oxml import OxmlElement
8
+ from bs4 import BeautifulSoup, NavigableString
9
+ import mammoth
10
+
11
+
12
+ class DataFormatConverter(ABC):
13
+ @abstractmethod
14
+ def convert(input_path, output_path):
15
+ pass
16
+
17
+ def _get_out_title(self, title: str, file_format: str = ".docx") -> str:
18
+ return ".".join(title.split(".")[:-1]) + file_format
19
+
20
+
21
+ class Md2Docx(DataFormatConverter):
22
+
23
+ def __add_hyperlink(self, paragraph, url, text, color="0000FF", underline=True):
24
+ r_id = paragraph.part.relate_to(url, "hyperlink", is_external=True)
25
+ hyperlink = OxmlElement("w:hyperlink")
26
+ hyperlink.set(qn("r:id"), r_id)
27
+
28
+ new_run, rPr = OxmlElement("w:r"), OxmlElement("w:rPr")
29
+ if color:
30
+ color_element = OxmlElement("w:color")
31
+ color_element.set(qn("w:val"), color)
32
+ rPr.append(color_element)
33
+ if underline:
34
+ u = OxmlElement("w:u")
35
+ u.set(qn("w:val"), "single")
36
+ rPr.append(u)
37
+ new_run.append(rPr)
38
+
39
+ text_elem = OxmlElement("w:t")
40
+ text_elem.text = text
41
+ new_run.append(text_elem)
42
+ hyperlink.append(new_run)
43
+ paragraph._p.append(hyperlink)
44
+
45
+ def __set_right_to_left(self, paragraph):
46
+ paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
47
+ paragraph.paragraph_format.right_to_left = True
48
+
49
+ @staticmethod
50
+ def convert(input_path, output_path=None):
51
+ md2docx = Md2Docx()
52
+
53
+ with open(input_path, "r", encoding="utf-8") as f:
54
+ md_content = f.read()
55
+
56
+ output_file = output_path or md2docx._get_out_title(input_path)
57
+
58
+ html_content = markdown.markdown(md_content)
59
+ soup = BeautifulSoup(html_content, "html.parser")
60
+ doc = Document()
61
+
62
+ for element in soup:
63
+ if element.name == "h1":
64
+ paragraph = doc.add_paragraph(element.get_text(), style="Heading 1")
65
+ elif element.name == "h2":
66
+ paragraph = doc.add_paragraph(element.get_text(), style="Heading 2")
67
+ elif element.name == "p":
68
+ paragraph = doc.add_paragraph()
69
+ for part in element.contents:
70
+ if isinstance(part, NavigableString):
71
+ paragraph.add_run(str(part))
72
+ elif part.name == "a":
73
+ md2docx.__add_hyperlink(
74
+ paragraph, part["href"], part.get_text()
75
+ )
76
+ elif part.name == "img":
77
+ md2docx.__add_hyperlink(
78
+ paragraph, part["src"], part.get("alt", "Image")
79
+ )
80
+ md2docx.__set_right_to_left(paragraph)
81
+
82
+ doc.save(output_file)
83
+ return output_file
84
+
85
+
86
+ class Docx2Md(DataFormatConverter):
87
+ def _get_out_title(self, title: str, file_format: str = ".txt") -> str:
88
+ return super()._get_out_title(title, file_format)
89
+
90
+ @staticmethod
91
+ def convert(input_path: str, output_path: str = None):
92
+ docx2md = Docx2Md()
93
+ with open(input_path, "rb") as docx:
94
+ result = mammoth.convert_to_markdown(
95
+ docx,
96
+ convert_image=mammoth.images.img_element(
97
+ lambda image: {"src": "images/" + image.content_type.split("/")[-1]}
98
+ ),
99
+ )
100
+
101
+ output_path = output_path or docx2md._get_out_title(input_path)
102
+
103
+ with open(output_path, "w", encoding="utf-8") as md_file:
104
+ md_file.write(result.value)
105
+
106
+ if result.messages:
107
+ for message in result.messages:
108
+ print("Warning:", message)
@@ -0,0 +1 @@
1
+ # from .mongo_repository_base import MongoRepositoryBase
@@ -1,7 +1,7 @@
1
1
  from pymongo import MongoClient
2
2
  from bson import ObjectId
3
3
  from typing import Generic, TypeVar, List, Type
4
- from pydantic import BaseModel
4
+ from pydantic.v1 import BaseModel
5
5
  from sharedkernel.string_extentions import camel_to_snake
6
6
 
7
7
  T = TypeVar("T", bound=BaseModel)
@@ -21,8 +21,7 @@ class MongoGenericRepository(Generic[T]):
21
21
  def find_one(self, id: str) -> T:
22
22
  query = {"_id": ObjectId(id), "is_deleted": False}
23
23
  result = self.collection.find_one(query)
24
- return self._map_to_model(result) if result else None
25
-
24
+ return self._map_to_model(result) if result else None
26
25
 
27
26
  def insert_one(self, data: T) -> str:
28
27
  delattr(data, "id")
@@ -0,0 +1,10 @@
1
+ from enum import Enum
2
+
3
+
4
+ class ErrorCode(str, Enum):
5
+ Item_NotFound = "یافت نشد"
6
+ Internal_Server = "خطایی در سیستم رخ داده است"
7
+ UnAuthorized = "توکن دسترسی معتبر نمی باشد"
8
+ Success = "با موفقیت انجام شد"
9
+ Intents_Count_Should_Equal_One = "فقط یک اینتنت میتوانید وارد نمایید"
10
+ Unsupported_Date_Type = "تاریخ داده شده پیشتیبانی نمیشود."
@@ -1,10 +1,9 @@
1
1
  from pydantic import BaseModel, Field
2
- from typing import Optional
3
2
  from datetime import datetime
4
3
 
5
4
 
6
5
  class BaseDocument(BaseModel):
7
- id: Optional[str]
8
- is_deleted: bool = False
6
+ id: str | None = None
7
+ is_deleted: bool | None = False
9
8
  created_on: datetime = Field(default_factory=datetime.now)
10
9
  updated_on: datetime = Field(default_factory=datetime.now)
@@ -0,0 +1,27 @@
1
+ import json
2
+ from typing import Any
3
+ from pydantic import BaseModel, model_validator
4
+
5
+
6
+ class JsonStringModel(BaseModel):
7
+ """`BaseModel` subclass used to validate objects passed to API as JSON strings.
8
+
9
+ The primary use case is treating one of the form fields as JSON payload. We need to do that
10
+ in case of endpoints that accept both file and rich set of input parameters. We cannot receive
11
+ both file and JSON body because they use conflicting Content-Type header. It is
12
+ "multipart/form-data" for the former and "application/json" for the latter. If we require
13
+ a client to send a file, we have to use the data sent in form for input parameters.
14
+
15
+ Theoretically we could use several form fields to gather all the required input parameters.
16
+ However, some of them are nested in their nature and forms don't support that. That's why we
17
+ consider it better to just use one form field and process its content as JSON payload.
18
+
19
+ If a form field model inherits from this class, we will get API documentation and input
20
+ validation - just the way we get it for JSON body defined with `BaseModel`.
21
+ """
22
+ @model_validator(mode='before')
23
+ @classmethod
24
+ def validate_to_json(cls, value: Any) -> Any:
25
+ if isinstance(value, str):
26
+ return cls(**json.loads(value))
27
+ return value
@@ -1,10 +1,6 @@
1
- from pydantic import BaseModel
2
-
3
1
  from typing import Generic, TypeVar
4
-
5
2
  from pydantic import BaseModel
6
- from pydantic.generics import GenericModel
7
-
3
+ from pydantic import BaseModel
8
4
  from sharedkernel.enum.error_code import ErrorCode
9
5
 
10
6
  ResultT = TypeVar("ResultT")
@@ -13,16 +9,16 @@ ResultT = TypeVar("ResultT")
13
9
  class BaseResult(BaseModel):
14
10
  isSucceed:bool = True
15
11
  message: str = ErrorCode.Success.value
16
- errorCode: str = None
12
+ errorCode: str | None = None
17
13
 
18
- class Result(BaseResult,GenericModel, Generic[ResultT]):
19
- data: ResultT =None
14
+ class Result(BaseResult, Generic[ResultT]):
15
+ data: ResultT = None
20
16
 
21
17
  def __init__(
22
18
  self,
23
19
  isSucceed:bool = True,
24
- data: object = None,
20
+ data: object | None = None,
25
21
  message: str = ErrorCode.Success.value,
26
- errorCode: str = None
22
+ errorCode: str | None = None
27
23
  )-> None:
28
- super().__init__(isSucceed= isSucceed, data= data, message= message, errorCode= errorCode)
24
+ super().__init__(isSucceed=isSucceed, data=data, message=message, errorCode=errorCode)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sharedkernel
3
- Version: 1.8.0
3
+ Version: 2.0.0
4
4
  Summary: sharekernel is a shared package between all python projects
5
5
  Author: Smilinno
6
6
  Description-Content-Type: text/markdown
@@ -9,17 +9,26 @@ Requires-Dist: requests
9
9
  Requires-Dist: pymongo
10
10
  Requires-Dist: fastapi==0.111.0
11
11
  Requires-Dist: PyJWT
12
- Requires-Dist: chromadb
13
12
  Requires-Dist: persian_tools
14
13
  Requires-Dist: sentry-sdk
15
14
  Requires-Dist: jdatetime
16
15
  Requires-Dist: persiantools
17
16
  Requires-Dist: boto3
17
+ Requires-Dist: python-docx
18
+ Requires-Dist: mammoth
19
+ Requires-Dist: markdown
20
+ Requires-Dist: beautifulsoup4
18
21
 
19
22
  # SharedKernel
20
23
  this a shared kernel package
21
24
 
22
25
  # Change Log
26
+ ### Version 2.0.0
27
+ - Update pydantic version
28
+ - Delete vector databases
29
+ - Implement JsonStringModel
30
+ ### Version 1.9.0
31
+ - Implement DataFormatConverter
23
32
  ### Version 1.8.0
24
33
  - Implement persian number normalizer
25
34
  ### Version 1.7.3
@@ -1,6 +1,7 @@
1
1
  README.md
2
2
  setup.py
3
3
  sharedkernel/common.py
4
+ sharedkernel/data_format_converter.py
4
5
  sharedkernel/date_converter.py
5
6
  sharedkernel/jwt_service.py
6
7
  sharedkernel/regex_masking.py
@@ -13,13 +14,8 @@ sharedkernel.egg-info/requires.txt
13
14
  sharedkernel.egg-info/top_level.txt
14
15
  sharedkernel/database/__init__.py
15
16
  sharedkernel/database/mongo_generic_repository.py
16
- sharedkernel/database/vector_database_repository/__init__.py
17
- sharedkernel/database/vector_database_repository/chroma_startegy.py
18
- sharedkernel/database/vector_database_repository/vector_database_repository.py
19
- sharedkernel/database/vector_database_repository/vector_database_strategy.py
20
17
  sharedkernel/enum/__init__.py
21
18
  sharedkernel/enum/error_code.py
22
- sharedkernel/enum/vector_database_type.py
23
19
  sharedkernel/exception/__init__.py
24
20
  sharedkernel/exception/exception.py
25
21
  sharedkernel/exception/exception_handlers.py
@@ -28,5 +24,6 @@ sharedkernel/normalizer/number_normalizer.py
28
24
  sharedkernel/normalizer/phone_number_normalizer.py
29
25
  sharedkernel/objects/__init__.py
30
26
  sharedkernel/objects/base_document.py
27
+ sharedkernel/objects/json_string_model.py
31
28
  sharedkernel/objects/jwt_model.py
32
29
  sharedkernel/objects/result.py
@@ -3,9 +3,12 @@ requests
3
3
  pymongo
4
4
  fastapi==0.111.0
5
5
  PyJWT
6
- chromadb
7
6
  persian_tools
8
7
  sentry-sdk
9
8
  jdatetime
10
9
  persiantools
11
10
  boto3
11
+ python-docx
12
+ mammoth
13
+ markdown
14
+ beautifulsoup4
@@ -1,2 +0,0 @@
1
- from .vector_database_repository import VectorRepository
2
- # from .mongo_repository_base import MongoRepositoryBase
@@ -1,2 +0,0 @@
1
- from .chroma_startegy import ChromaStrategy
2
- from .vector_database_repository import VectorRepository
@@ -1,39 +0,0 @@
1
- import chromadb
2
- import numpy as np
3
- from chromadb.config import Settings
4
- from .vector_database_strategy import VectorDatabaseStrategy
5
- import uuid
6
-
7
-
8
- class ChromaStrategy(VectorDatabaseStrategy):
9
- def __init__(self, collection_name: str):
10
- self.collection_name = collection_name
11
- self.collection = None
12
-
13
- def connect(self, host: str = "localhost", port: int = 8000):
14
- client = chromadb.Client(
15
- Settings(
16
- chroma_api_impl="rest",
17
- chroma_server_host=host,
18
- chroma_server_http_port=port,
19
- )
20
- )
21
- self.collection = client.get_or_create_collection(self.collection_name)
22
-
23
- def insert_vector(self, vector: np.ndarray, metadata: dict) -> str:
24
- id = str(uuid.uuid4())
25
- self.collection.upsert(ids=id, embeddings=vector.tolist(), metadatas=[metadata])
26
-
27
- return id
28
-
29
- def search_vector(self, vector: np.ndarray, top_k: int):
30
- results = self.collection.query(vectors=[vector.tolist()], n_results=top_k)
31
- return results
32
-
33
- def get_vector_by_id(self, id: str):
34
- result = self.collection.get(ids=id)
35
-
36
- return result
37
-
38
- def delete_vector(self, id: str):
39
- self.collection.delete(ids=id)
@@ -1,28 +0,0 @@
1
- import numpy as np
2
- from .vector_database_strategy import VectorDatabaseStrategy
3
- # from .milvus_strategy import MilvusStrategy
4
- from .chroma_startegy import ChromaStrategy
5
- from sharedkernel.enum.vector_database_type import VectorDatabaseType
6
-
7
- class VectorRepository:
8
- def __init__(self, database_type: VectorDatabaseType, collection_name: str, **connection_params):
9
- self.strategy = self._get_strategy(database_type, collection_name)
10
- self.strategy.connect(**connection_params)
11
-
12
- def _get_strategy(self, database_type: VectorDatabaseType, collection_name: str) -> VectorDatabaseStrategy:
13
- # if database_type == VectorDatabaseType.MILVUS:
14
- # return MilvusStrategy(collection_name)
15
- # else:
16
- return ChromaStrategy(collection_name)
17
-
18
- def add_vector(self, vector: np.ndarray, metadata: dict) -> str:
19
- return self.strategy.insert_vector(vector, metadata)
20
-
21
- def find_similar_vectors(self, vector: np.ndarray, top_k: int):
22
- return self.strategy.search_vector(vector, top_k)
23
-
24
- def remove_vector(self, id: str):
25
- self.strategy.delete_vector(id)
26
-
27
- def get_vector_by_id(self, id: str):
28
- return self.strategy.get_vector_by_id(id)
@@ -1,22 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- import numpy as np
3
-
4
- class VectorDatabaseStrategy(ABC):
5
- @abstractmethod
6
- def connect(self, **kwargs):
7
- pass
8
- @abstractmethod
9
- def insert_vector(self, vector: np.ndarray, metadata: dict) -> str:
10
- pass
11
-
12
- @abstractmethod
13
- def search_vector(self, vector: np.ndarray, top_k: int):
14
- pass
15
-
16
- @abstractmethod
17
- def delete_vector(self, id: str):
18
- pass
19
-
20
- @abstractmethod
21
- def get_vector_by_id(self, id: str):
22
- pass
@@ -1,14 +0,0 @@
1
- from enum import Enum
2
-
3
- class ErrorCode(str, Enum):
4
- Item_NotFound= "یافت نشد"
5
-
6
- Internal_Server= "خطایی در سیستم رخ داده است"
7
-
8
- UnAuthorized= "توکن دسترسی معتبر نمی باشد"
9
-
10
- Success= "با موفقیت انجام شد"
11
-
12
- Intents_Count_Should_Equal_One= "فقط یک اینتنت میتوانید وارد نمایید"
13
-
14
- Unsupported_Date_Type = "تاریخ داده شده پیشتیبانی نمیشود."
@@ -1,5 +0,0 @@
1
- from enum import Enum
2
-
3
- class VectorDatabaseType(Enum):
4
- MILVUS = "milvus"
5
- CHROMA = "chroma"
File without changes