praisonaiagents 0.0.73__py3-none-any.whl → 0.0.74__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- praisonaiagents/knowledge/chunking.py +69 -54
- {praisonaiagents-0.0.73.dist-info → praisonaiagents-0.0.74.dist-info}/METADATA +2 -2
- {praisonaiagents-0.0.73.dist-info → praisonaiagents-0.0.74.dist-info}/RECORD +5 -5
- {praisonaiagents-0.0.73.dist-info → praisonaiagents-0.0.74.dist-info}/WHEEL +0 -0
- {praisonaiagents-0.0.73.dist-info → praisonaiagents-0.0.74.dist-info}/top_level.txt +0 -0
@@ -7,21 +7,19 @@ class Chunking:
|
|
7
7
|
|
8
8
|
CHUNKER_PARAMS = {
|
9
9
|
'token': ['chunk_size', 'chunk_overlap', 'tokenizer'],
|
10
|
-
'
|
11
|
-
'
|
12
|
-
'semantic': ['chunk_size', 'embedding_model'
|
13
|
-
'sdpm': ['chunk_size', 'embedding_model'
|
14
|
-
'late': ['chunk_size', 'embedding_model'
|
15
|
-
'recursive': ['chunk_size', 'tokenizer']
|
10
|
+
'sentence': ['chunk_size', 'chunk_overlap', 'tokenizer_or_token_counter'],
|
11
|
+
'recursive': ['chunk_size', 'tokenizer_or_token_counter'],
|
12
|
+
'semantic': ['chunk_size', 'embedding_model'],
|
13
|
+
'sdpm': ['chunk_size', 'embedding_model'],
|
14
|
+
'late': ['chunk_size', 'embedding_model'],
|
16
15
|
}
|
17
16
|
|
18
17
|
@cached_property
|
19
18
|
def SUPPORTED_CHUNKERS(self) -> Dict[str, Any]:
|
20
19
|
"""Lazy load chunker classes."""
|
21
20
|
try:
|
22
|
-
from chonkie
|
21
|
+
from chonkie import (
|
23
22
|
TokenChunker,
|
24
|
-
WordChunker,
|
25
23
|
SentenceChunker,
|
26
24
|
SemanticChunker,
|
27
25
|
SDPMChunker,
|
@@ -35,7 +33,6 @@ class Chunking:
|
|
35
33
|
|
36
34
|
return {
|
37
35
|
'token': TokenChunker,
|
38
|
-
'word': WordChunker,
|
39
36
|
'sentence': SentenceChunker,
|
40
37
|
'semantic': SemanticChunker,
|
41
38
|
'sdpm': SDPMChunker,
|
@@ -48,7 +45,7 @@ class Chunking:
|
|
48
45
|
chunker_type: str = 'token',
|
49
46
|
chunk_size: int = 512,
|
50
47
|
chunk_overlap: int = 128,
|
51
|
-
|
48
|
+
tokenizer_or_token_counter: str = "gpt2",
|
52
49
|
embedding_model: Optional[Union[str, Any]] = None,
|
53
50
|
**kwargs
|
54
51
|
):
|
@@ -62,7 +59,7 @@ class Chunking:
|
|
62
59
|
self.chunker_type = chunker_type
|
63
60
|
self.chunk_size = chunk_size
|
64
61
|
self.chunk_overlap = chunk_overlap
|
65
|
-
self.
|
62
|
+
self.tokenizer_or_token_counter = tokenizer_or_token_counter
|
66
63
|
self._embedding_model = embedding_model
|
67
64
|
self.kwargs = kwargs
|
68
65
|
|
@@ -89,11 +86,10 @@ class Chunking:
|
|
89
86
|
if 'chunk_overlap' in allowed_params:
|
90
87
|
params['chunk_overlap'] = self.chunk_overlap
|
91
88
|
|
92
|
-
if '
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
params['tokenizer'] = self.tokenizer
|
89
|
+
if 'tokenizer_or_token_counter' in allowed_params:
|
90
|
+
params['tokenizer_or_token_counter'] = self.tokenizer_or_token_counter
|
91
|
+
elif 'tokenizer' in allowed_params:
|
92
|
+
params['tokenizer'] = self.tokenizer_or_token_counter
|
97
93
|
|
98
94
|
if 'embedding_model' in allowed_params:
|
99
95
|
params['embedding_model'] = self.embedding_model
|
@@ -115,63 +111,82 @@ class Chunking:
|
|
115
111
|
|
116
112
|
return self._chunker
|
117
113
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
114
|
+
# NOTE: OverlapRefinery is not supported, disabled for now
|
115
|
+
# As soon as Chonkie is updated to support it, we can re-enable it!
|
116
|
+
# Track in https://github.com/chonkie-inc/chonkie/issues/21
|
117
|
+
|
118
|
+
# def _get_overlap_refinery(self, context_size: Optional[int] = None, **kwargs):
|
119
|
+
# """Lazy load the overlap refinery."""
|
120
|
+
# try:
|
121
|
+
# from chonkie.refinery import OverlapRefinery
|
122
|
+
# except ImportError:
|
123
|
+
# raise ImportError("Failed to import OverlapRefinery from chonkie.refinery")
|
124
124
|
|
125
|
-
|
126
|
-
|
125
|
+
# if context_size is None:
|
126
|
+
# context_size = self.chunk_overlap
|
127
127
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
128
|
+
# return OverlapRefinery(
|
129
|
+
# context_size=context_size,
|
130
|
+
# tokenizer=self.chunker.tokenizer,
|
131
|
+
# **kwargs
|
132
|
+
# )
|
133
133
|
|
134
|
-
def add_overlap_context(
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
) -> List[Any]:
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
134
|
+
# def add_overlap_context(
|
135
|
+
# self,
|
136
|
+
# chunks: List[Any],
|
137
|
+
# context_size: int = None,
|
138
|
+
# mode: str = "suffix",
|
139
|
+
# merge_context: bool = True
|
140
|
+
# ) -> List[Any]:
|
141
|
+
# """Add overlap context to chunks using OverlapRefinery."""
|
142
|
+
# refinery = self._get_overlap_refinery(
|
143
|
+
# context_size=context_size,
|
144
|
+
# mode=mode,
|
145
|
+
# merge_context=merge_context
|
146
|
+
# )
|
147
|
+
# return refinery.refine(chunks)
|
148
148
|
|
149
149
|
def chunk(
|
150
150
|
self,
|
151
151
|
text: Union[str, List[str]],
|
152
|
-
|
153
|
-
|
152
|
+
# Disable context for now, as it's not supported
|
153
|
+
# add_context: bool = False,
|
154
|
+
# context_params: Optional[Dict[str, Any]] = None
|
155
|
+
**kwargs # Added to maintain compatibility with the original `chunk` method signature
|
154
156
|
) -> Union[List[Any], List[List[Any]]]:
|
155
157
|
"""Chunk text using the configured chunking strategy."""
|
156
158
|
chunks = self.chunker(text)
|
157
159
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
160
|
+
# NOTE: OverlapRefinery is not supported, disabled for now
|
161
|
+
# As soon as Chonkie is updated to support it, we can re-enable it!
|
162
|
+
# Track in https://github.com/chonkie-inc/chonkie/issues/21
|
163
|
+
|
164
|
+
# if add_context:
|
165
|
+
# context_params = context_params or {}
|
166
|
+
# if isinstance(text, str):
|
167
|
+
# chunks = self.add_overlap_context(chunks, **context_params)
|
168
|
+
# else:
|
169
|
+
# chunks = [self.add_overlap_context(c, **context_params) for c in chunks]
|
170
|
+
|
171
|
+
if 'add_context' in kwargs or 'context_params' in kwargs:
|
172
|
+
import warnings
|
173
|
+
warnings.warn(
|
174
|
+
"The `add_context` and `context_params` parameters are currently not supported for Chonkie as of version 1.0.2. They would be added in the future. Track in https://github.com/chonkie-inc/chonkie/issues/21",
|
175
|
+
UserWarning
|
176
|
+
)
|
164
177
|
|
165
178
|
return chunks
|
166
179
|
|
167
180
|
def __call__(
|
168
181
|
self,
|
169
182
|
text: Union[str, List[str]],
|
170
|
-
|
171
|
-
|
183
|
+
# Disable context for now, as it's not supported
|
184
|
+
# add_context: bool = False,
|
185
|
+
# context_params: Optional[Dict[str, Any]] = None
|
186
|
+
**kwargs # Added to maintain compatibility with the original `chunk` method signature
|
172
187
|
) -> Union[List[Any], List[List[Any]]]:
|
173
188
|
"""Make the Chunking instance callable."""
|
174
|
-
return self.chunk(text,
|
189
|
+
return self.chunk(text, **kwargs)
|
175
190
|
|
176
191
|
def __repr__(self) -> str:
|
177
192
|
"""String representation of the Chunking instance."""
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: praisonaiagents
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.74
|
4
4
|
Summary: Praison AI agents for completing complex tasks with Self Reflection Agents
|
5
5
|
Author: Mervin Praison
|
6
6
|
Requires-Dist: pydantic
|
@@ -15,7 +15,7 @@ Provides-Extra: knowledge
|
|
15
15
|
Requires-Dist: mem0ai>=0.1.0; extra == "knowledge"
|
16
16
|
Requires-Dist: chromadb==0.5.23; extra == "knowledge"
|
17
17
|
Requires-Dist: markitdown; extra == "knowledge"
|
18
|
-
Requires-Dist: chonkie; extra == "knowledge"
|
18
|
+
Requires-Dist: chonkie>=1.0.2; extra == "knowledge"
|
19
19
|
Provides-Extra: llm
|
20
20
|
Requires-Dist: litellm>=1.50.0; extra == "llm"
|
21
21
|
Requires-Dist: pydantic>=2.4.2; extra == "llm"
|
@@ -7,7 +7,7 @@ praisonaiagents/agents/__init__.py,sha256=_1d6Pqyk9EoBSo7E68sKyd1jDRlN1vxvVIRpoM
|
|
7
7
|
praisonaiagents/agents/agents.py,sha256=uAOHyn77noFvg3sYVFRhQUuc1LDpCMpfLND8CKOXAd4,37971
|
8
8
|
praisonaiagents/agents/autoagents.py,sha256=olYDn--rlJp-SckxILqmREkkgNlzCgEEcAUzfMj-54E,13518
|
9
9
|
praisonaiagents/knowledge/__init__.py,sha256=xL1Eh-a3xsHyIcU4foOWF-JdWYIYBALJH9bge0Ujuto,246
|
10
|
-
praisonaiagents/knowledge/chunking.py,sha256=
|
10
|
+
praisonaiagents/knowledge/chunking.py,sha256=G6wyHa7_8V0_7VpnrrUXbEmUmptlT16ISJYaxmkSgmU,7678
|
11
11
|
praisonaiagents/knowledge/knowledge.py,sha256=fQNREDiwdoisfIxJBLVkteXgq_8Gbypfc3UaZbxf5QY,13210
|
12
12
|
praisonaiagents/llm/__init__.py,sha256=ttPQQJQq6Tah-0updoEXDZFKWtJAM93rBWRoIgxRWO8,689
|
13
13
|
praisonaiagents/llm/llm.py,sha256=1WjHumxzuc8sj81NQ4uVEIetUOrb-i58HYLQW7vjV3M,87921
|
@@ -39,7 +39,7 @@ praisonaiagents/tools/xml_tools.py,sha256=iYTMBEk5l3L3ryQ1fkUnNVYK-Nnua2Kx2S0dxN
|
|
39
39
|
praisonaiagents/tools/yaml_tools.py,sha256=uogAZrhXV9O7xvspAtcTfpKSQYL2nlOTvCQXN94-G9A,14215
|
40
40
|
praisonaiagents/tools/yfinance_tools.py,sha256=s2PBj_1v7oQnOobo2fDbQBACEHl61ftG4beG6Z979ZE,8529
|
41
41
|
praisonaiagents/tools/train/data/generatecot.py,sha256=H6bNh-E2hqL5MW6kX3hqZ05g9ETKN2-kudSjiuU_SD8,19403
|
42
|
-
praisonaiagents-0.0.
|
43
|
-
praisonaiagents-0.0.
|
44
|
-
praisonaiagents-0.0.
|
45
|
-
praisonaiagents-0.0.
|
42
|
+
praisonaiagents-0.0.74.dist-info/METADATA,sha256=322am_MaL8PT1WHZn-uOxB7or2WJpHgnwLOdFVMxi9o,977
|
43
|
+
praisonaiagents-0.0.74.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
44
|
+
praisonaiagents-0.0.74.dist-info/top_level.txt,sha256=_HsRddrJ23iDx5TTqVUVvXG2HeHBL5voshncAMDGjtA,16
|
45
|
+
praisonaiagents-0.0.74.dist-info/RECORD,,
|
File without changes
|
File without changes
|