sharedkernel 2.6.4__tar.gz → 2.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/PKG-INFO +6 -2
  2. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/README.md +4 -0
  3. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/setup.py +5 -2
  4. sharedkernel-2.8.0/sharedkernel/chunker/chunk_rule.py +84 -0
  5. sharedkernel-2.8.0/sharedkernel/chunker/text_chunker.py +107 -0
  6. sharedkernel-2.8.0/sharedkernel/database/cache/__init__.py +0 -0
  7. sharedkernel-2.8.0/sharedkernel/database/cache/cache_repository.py +37 -0
  8. sharedkernel-2.8.0/sharedkernel/database/cache/redis_generic_cache.py +88 -0
  9. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel.egg-info/PKG-INFO +6 -2
  10. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel.egg-info/SOURCES.txt +5 -0
  11. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel.egg-info/requires.txt +1 -1
  12. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/setup.cfg +0 -0
  13. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/common.py +0 -0
  14. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/config.py +0 -0
  15. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/data_format_converter.py +0 -0
  16. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/database/__init__.py +0 -0
  17. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/database/audit_model.py +0 -0
  18. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/database/distributed_cache.py +0 -0
  19. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/database/mongo_generic_audit_repository.py +0 -0
  20. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/database/mongo_generic_repository.py +0 -0
  21. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/database/mongo_health_checker.py +0 -0
  22. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/database/pagination_response_dto.py +0 -0
  23. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/date_converter.py +0 -0
  24. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/diff_utils.py +0 -0
  25. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/enum/__init__.py +0 -0
  26. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/enum/error_code.py +0 -0
  27. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/enum/redis_mode_enum.py +0 -0
  28. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/enum/sort_order.py +0 -0
  29. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/exception/__init__.py +0 -0
  30. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/exception/exception.py +0 -0
  31. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/exception/exception_handlers.py +0 -0
  32. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/file_validation.py +0 -0
  33. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/ip_session_service.py +0 -0
  34. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/jwt_service.py +0 -0
  35. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/logger/log_decorator.py +0 -0
  36. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/logger/log_dto.py +0 -0
  37. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/logger/log_enums.py +0 -0
  38. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/logger/log_info.py +0 -0
  39. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/logger/log_middlewares.py +0 -0
  40. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/logger/logger_service.py +0 -0
  41. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/multipart_upload.py +0 -0
  42. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/normalizer/__init__.py +0 -0
  43. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/normalizer/number_normalizer.py +0 -0
  44. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/normalizer/phone_number_normalizer.py +0 -0
  45. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/normalizer/string_normalizer.py +0 -0
  46. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/objects/__init__.py +0 -0
  47. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/objects/base_document.py +0 -0
  48. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/objects/json_string_model.py +0 -0
  49. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/objects/jwt_model.py +0 -0
  50. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/objects/result.py +0 -0
  51. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/objects/user_info.py +0 -0
  52. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/regex_masking.py +0 -0
  53. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/s3_uploader.py +0 -0
  54. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel/string_extentions.py +0 -0
  55. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel.egg-info/dependency_links.txt +0 -0
  56. {sharedkernel-2.6.4 → sharedkernel-2.8.0}/sharedkernel.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sharedkernel
3
- Version: 2.6.4
3
+ Version: 2.8.0
4
4
  Summary: sharekernel is a shared package between all python projects
5
5
  Author: Smilinno
6
6
  Description-Content-Type: text/markdown
@@ -20,7 +20,7 @@ Requires-Dist: markdown
20
20
  Requires-Dist: beautifulsoup4
21
21
  Requires-Dist: deepdiff
22
22
  Requires-Dist: kombu
23
- Requires-Dist: redis
23
+ Requires-Dist: redis==8.0.0
24
24
  Dynamic: author
25
25
  Dynamic: description
26
26
  Dynamic: description-content-type
@@ -31,6 +31,10 @@ Dynamic: summary
31
31
  this is a shared kernel package
32
32
 
33
33
  # Change Log
34
+ ### Version 2.8.0
35
+ - Add chunker for tts
36
+ ### Version 2.7.0
37
+ - Add Redis and cache repository
34
38
  ### Version 2.6.4
35
39
  - Add Verbose to log
36
40
  ### Version 2.6.3
@@ -2,6 +2,10 @@
2
2
  this is a shared kernel package
3
3
 
4
4
  # Change Log
5
+ ### Version 2.8.0
6
+ - Add chunker for tts
7
+ ### Version 2.7.0
8
+ - Add Redis and cache repository
5
9
  ### Version 2.6.4
6
10
  - Add Verbose to log
7
11
  ### Version 2.6.3
@@ -13,11 +13,14 @@ setup(
13
13
  packages=[
14
14
  "sharedkernel",
15
15
  "sharedkernel.database",
16
+ "sharedkernel.database.cache",
16
17
  "sharedkernel.enum",
17
18
  "sharedkernel.exception",
18
19
  "sharedkernel.objects",
19
20
  "sharedkernel.normalizer",
20
21
  "sharedkernel.logger",
22
+ "sharedkernel.chunker",
23
+
21
24
  ],
22
25
  # Needed for dependencies
23
26
  install_requires=[
@@ -37,10 +40,10 @@ setup(
37
40
  "beautifulsoup4",
38
41
  "deepdiff",
39
42
  "kombu",
40
- "redis",
43
+ "redis==8.0.0",
41
44
  ],
42
45
  # *strongly* suggested for sharing
43
- version="2.6.4",
46
+ version="2.8.0",
44
47
  description="sharekernel is a shared package between all python projects",
45
48
  long_description=long_description,
46
49
  long_description_content_type="text/markdown",
@@ -0,0 +1,84 @@
1
+ from __future__ import annotations
2
+ import re
3
+ from typing import List, Optional, Protocol, Tuple
4
+
5
+
6
+ class SplitRule(Protocol):
7
+ """Interface for chunk split rules."""
8
+
9
+ def apply(
10
+ self, segment: str, offset: int
11
+ ) -> Tuple[Optional[int], Optional[str], Optional[str]]: ...
12
+
13
+
14
+ class PunctuationRule(SplitRule):
15
+ """Split after the first matching punctuation character."""
16
+
17
+ def __init__(self, punctuations: str = ".,;:!?"):
18
+ self.punctuations = set(punctuations)
19
+
20
+ def apply(
21
+ self, segment: str, offset: int
22
+ ) -> Tuple[Optional[int], Optional[str], Optional[str]]:
23
+ for i, ch in enumerate(segment):
24
+ if ch in self.punctuations:
25
+ return offset + i + 1, ch, "punctuation"
26
+ return None, None, None
27
+
28
+
29
+ class RegexRule(SplitRule):
30
+ """Split after the first regex match."""
31
+
32
+ def __init__(self, pattern: str):
33
+ self.regex = re.compile(pattern) if pattern else None
34
+
35
+ def apply(
36
+ self, segment: str, offset: int
37
+ ) -> Tuple[Optional[int], Optional[str], Optional[str]]:
38
+ if not self.regex:
39
+ return None, None, None
40
+ m = self.regex.search(segment)
41
+ if m:
42
+ return offset + m.end(), segment[m.start() : m.end()], "regex"
43
+ return None, None, None
44
+
45
+
46
+ class CutWordRule(SplitRule):
47
+ """Split after the first occurrence of any configured word/phrase."""
48
+
49
+ def __init__(self, cut_words: List[str]):
50
+ self.cut_words = [w.lower() for w in cut_words]
51
+
52
+ def apply(
53
+ self, segment: str, offset: int
54
+ ) -> Tuple[Optional[int], Optional[str], Optional[str]]:
55
+ lowered = segment.lower()
56
+ for word in self.cut_words:
57
+ idx = lowered.find(word)
58
+ if idx != -1:
59
+ phrase = segment[idx : idx + len(word)]
60
+ return offset + idx + len(word), phrase, "cut_word"
61
+ return None, None, None
62
+
63
+
64
+ class SpaceFallbackRule(SplitRule):
65
+ """Split near the center on space; otherwise hard-cut at max_len."""
66
+
67
+ def __init__(self, max_len: int):
68
+ self.max_len = max_len
69
+
70
+ def apply(
71
+ self, segment: str, offset: int
72
+ ) -> Tuple[Optional[int], Optional[str], Optional[str]]:
73
+ centre = len(segment) // 2
74
+ left = segment.rfind(" ", 0, centre)
75
+ right = segment.find(" ", centre)
76
+
77
+ if left != -1:
78
+ split = left
79
+ elif right != -1:
80
+ split = right
81
+ else:
82
+ split = self.max_len
83
+
84
+ return offset + split, None, "space_fallback"
@@ -0,0 +1,107 @@
1
+ from dataclasses import dataclass
2
+ from typing import List, Optional
3
+
4
+ from .chunk_rule import (
5
+ SplitRule,
6
+ )
7
+
8
+
9
+ @dataclass
10
+ class Chunk:
11
+ """A single chunk produced by TextChunker."""
12
+
13
+ text: str
14
+ number: int
15
+ begin: int
16
+ end: int
17
+ method: str
18
+ phrase: Optional[str]
19
+
20
+
21
+ class TextChunker:
22
+ """Split text into chunks based on ordered split rules."""
23
+
24
+ def __init__(self, min_len: int, max_len: int, rules: List[SplitRule]):
25
+ if min_len < 1:
26
+ raise ValueError("min_len must be >= 1")
27
+ if max_len < min_len:
28
+ raise ValueError("max_len must be >= min_len")
29
+
30
+ self.min_len = min_len
31
+ self.max_len = max_len
32
+ self.rules = rules
33
+
34
+ def chunk(self, text: str) -> List[Chunk]:
35
+ if not text:
36
+ return []
37
+
38
+ chunks: List[Chunk] = []
39
+ pos = 0
40
+ number = 1
41
+
42
+ while pos < len(text):
43
+ if len(text) - pos <= self.max_len:
44
+ chunk_text = text[pos:]
45
+ chunks.append(Chunk(chunk_text, number, pos, len(text), "final", None))
46
+ break
47
+
48
+ window_end = pos + self.max_len
49
+ window = text[pos:window_end]
50
+
51
+ split_idx: Optional[int] = None
52
+ phrase: Optional[str] = None
53
+ method: Optional[str] = None
54
+
55
+ for rule in self.rules:
56
+ candidate, cand_phrase, cand_method = rule.apply(window, pos)
57
+ if candidate is not None and candidate - pos >= self.min_len:
58
+ split_idx, phrase, method = candidate, cand_phrase, cand_method
59
+ break
60
+
61
+ if split_idx is None:
62
+ split_idx = pos + self.max_len
63
+ method = "hard_cut"
64
+
65
+ chunk_text = text[pos:split_idx]
66
+ chunks.append(Chunk(chunk_text, number, pos, split_idx, method, phrase))
67
+ pos = split_idx
68
+ number += 1
69
+
70
+ return chunks
71
+
72
+ def chunks(self, text: str):
73
+ """Yield chunks lazily, using the same logic as `chunk`."""
74
+ if not text:
75
+ return
76
+
77
+ pos = 0
78
+ number = 1
79
+
80
+ while pos < len(text):
81
+ if len(text) - pos <= self.max_len:
82
+ chunk_text = text[pos:]
83
+ yield Chunk(chunk_text, number, pos, len(text), "final", None)
84
+ break
85
+
86
+ window_end = pos + self.max_len
87
+ window = text[pos:window_end]
88
+
89
+ split_idx: Optional[int] = None
90
+ phrase: Optional[str] = None
91
+ method: Optional[str] = None
92
+
93
+ for rule in self.rules:
94
+ candidate, cand_phrase, cand_method = rule.apply(window, pos)
95
+ if candidate is not None and candidate - pos >= self.min_len:
96
+ split_idx, phrase, method = candidate, cand_phrase, cand_method
97
+ break
98
+
99
+ if split_idx is None:
100
+ split_idx = pos + self.max_len
101
+ method = "hard_cut"
102
+
103
+ chunk_text = text[pos:split_idx]
104
+ yield Chunk(chunk_text, number, pos, split_idx, method, phrase)
105
+
106
+ pos = split_idx
107
+ number += 1
@@ -0,0 +1,37 @@
1
+
2
+ from abc import ABC, abstractmethod
3
+ from typing import Generic, Optional, TypeVar
4
+ from pydantic import BaseModel
5
+
6
+ T = TypeVar("T", bound=BaseModel)
7
+
8
+
9
+ class CacheRepository(ABC, Generic[T]):
10
+ @abstractmethod
11
+ async def set(
12
+ self,
13
+ key: str,
14
+ value: T,
15
+ ttl_seconds: int | None = None,
16
+ ) -> None:
17
+ pass
18
+
19
+ @abstractmethod
20
+ async def get(self, key: str) -> Optional[T]:
21
+ pass
22
+
23
+ @abstractmethod
24
+ async def exists(self, key: str) -> bool:
25
+ pass
26
+
27
+ @abstractmethod
28
+ async def delete(self, key: str) -> int:
29
+ pass
30
+
31
+ @abstractmethod
32
+ async def delete_group(self, key_prefix: str) -> int:
33
+ pass
34
+
35
+ @abstractmethod
36
+ async def clear(self) -> int:
37
+ pass
@@ -0,0 +1,88 @@
1
+ from typing import Generic, Optional, Type, TypeVar
2
+ from pydantic import BaseModel
3
+ from redis.asyncio import Redis
4
+
5
+ from cache.cache_repository import CacheRepository
6
+
7
+ T = TypeVar("T", bound=BaseModel)
8
+
9
+
10
+ class RedisGenericCache(CacheRepository[T], Generic[T]):
11
+ def __init__(
12
+ self,
13
+ client: Redis,
14
+ namespace: str,
15
+ model: Type[T],
16
+ default_ttl_seconds: int | None = None,
17
+ ):
18
+ self.client = client
19
+ self.namespace = namespace
20
+ self.model = model
21
+ self.default_ttl_seconds = default_ttl_seconds
22
+
23
+ def _pref(self, key: str) -> str:
24
+ return f"{self.namespace}:{key}"
25
+
26
+ def _unpref(self, key: str) -> str:
27
+ prefix = f"{self.namespace}:"
28
+ return key.removeprefix(prefix)
29
+
30
+ def _serialize(self, value: T) -> str:
31
+ return value.model_dump_json()
32
+
33
+ def _deserialize(self, raw: str | bytes) -> T:
34
+ return self.model.model_validate_json(raw.decode())
35
+
36
+ async def set(
37
+ self,
38
+ key: str,
39
+ value: T,
40
+ ttl_seconds: int | None = None,
41
+ ) -> None:
42
+ if value is None:
43
+ return
44
+
45
+ redis_key = self._pref(key)
46
+ ttl = ttl_seconds if ttl_seconds else self.default_ttl_seconds
47
+
48
+ if ttl:
49
+ await self.client.set(redis_key, self._serialize(value), ex=ttl)
50
+ else:
51
+ await self.client.set(redis_key, self._serialize(value))
52
+
53
+ async def get(self, key: str) -> Optional[T]:
54
+ raw = await self.client.get(self._pref(key))
55
+ if raw is None:
56
+ return None
57
+ return self._deserialize(raw)
58
+
59
+ async def exists(self, key: str) -> bool:
60
+ return bool(await self.client.exists(self._pref(key)))
61
+
62
+ async def delete(self, key: str) -> int:
63
+ return await self.client.delete(self._pref(key))
64
+
65
+ async def delete_group(self, key_prefix: str) -> int:
66
+ pattern = self._pref(f"{key_prefix}*")
67
+ deleted = 0
68
+
69
+ async for key in self.client.scan_iter(match=pattern):
70
+ deleted += await self.client.delete(key)
71
+
72
+ return deleted
73
+
74
+ async def clear(self) -> int:
75
+ deleted = 0
76
+
77
+ async for key in self.client.scan_iter(match=f"{self.namespace}:*"):
78
+ deleted += await self.client.delete(key)
79
+
80
+ return deleted
81
+
82
+ async def get_all_ids(self) -> list[str]:
83
+ result: list[str] = []
84
+
85
+ async for raw in self.client.scan_iter(match=f"{self.namespace}:*"):
86
+ result.append(self._unpref(raw.decode()))
87
+
88
+ return result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sharedkernel
3
- Version: 2.6.4
3
+ Version: 2.8.0
4
4
  Summary: sharekernel is a shared package between all python projects
5
5
  Author: Smilinno
6
6
  Description-Content-Type: text/markdown
@@ -20,7 +20,7 @@ Requires-Dist: markdown
20
20
  Requires-Dist: beautifulsoup4
21
21
  Requires-Dist: deepdiff
22
22
  Requires-Dist: kombu
23
- Requires-Dist: redis
23
+ Requires-Dist: redis==8.0.0
24
24
  Dynamic: author
25
25
  Dynamic: description
26
26
  Dynamic: description-content-type
@@ -31,6 +31,10 @@ Dynamic: summary
31
31
  this is a shared kernel package
32
32
 
33
33
  # Change Log
34
+ ### Version 2.8.0
35
+ - Add chunker for tts
36
+ ### Version 2.7.0
37
+ - Add Redis and cache repository
34
38
  ### Version 2.6.4
35
39
  - Add Verbose to log
36
40
  ### Version 2.6.3
@@ -17,6 +17,8 @@ sharedkernel.egg-info/SOURCES.txt
17
17
  sharedkernel.egg-info/dependency_links.txt
18
18
  sharedkernel.egg-info/requires.txt
19
19
  sharedkernel.egg-info/top_level.txt
20
+ sharedkernel/chunker/chunk_rule.py
21
+ sharedkernel/chunker/text_chunker.py
20
22
  sharedkernel/database/__init__.py
21
23
  sharedkernel/database/audit_model.py
22
24
  sharedkernel/database/distributed_cache.py
@@ -24,6 +26,9 @@ sharedkernel/database/mongo_generic_audit_repository.py
24
26
  sharedkernel/database/mongo_generic_repository.py
25
27
  sharedkernel/database/mongo_health_checker.py
26
28
  sharedkernel/database/pagination_response_dto.py
29
+ sharedkernel/database/cache/__init__.py
30
+ sharedkernel/database/cache/cache_repository.py
31
+ sharedkernel/database/cache/redis_generic_cache.py
27
32
  sharedkernel/enum/__init__.py
28
33
  sharedkernel/enum/error_code.py
29
34
  sharedkernel/enum/redis_mode_enum.py
@@ -14,4 +14,4 @@ markdown
14
14
  beautifulsoup4
15
15
  deepdiff
16
16
  kombu
17
- redis
17
+ redis==8.0.0
File without changes