praisonaiagents 0.0.73__py3-none-any.whl → 0.0.74__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,21 +7,19 @@ class Chunking:
7
7
 
8
8
  CHUNKER_PARAMS = {
9
9
  'token': ['chunk_size', 'chunk_overlap', 'tokenizer'],
10
- 'word': ['chunk_size', 'chunk_overlap', 'tokenizer'],
11
- 'sentence': ['chunk_size', 'chunk_overlap', 'tokenizer'],
12
- 'semantic': ['chunk_size', 'embedding_model', 'tokenizer'],
13
- 'sdpm': ['chunk_size', 'embedding_model', 'tokenizer'],
14
- 'late': ['chunk_size', 'embedding_model', 'tokenizer'],
15
- 'recursive': ['chunk_size', 'tokenizer']
10
+ 'sentence': ['chunk_size', 'chunk_overlap', 'tokenizer_or_token_counter'],
11
+ 'recursive': ['chunk_size', 'tokenizer_or_token_counter'],
12
+ 'semantic': ['chunk_size', 'embedding_model'],
13
+ 'sdpm': ['chunk_size', 'embedding_model'],
14
+ 'late': ['chunk_size', 'embedding_model'],
16
15
  }
17
16
 
18
17
  @cached_property
19
18
  def SUPPORTED_CHUNKERS(self) -> Dict[str, Any]:
20
19
  """Lazy load chunker classes."""
21
20
  try:
22
- from chonkie.chunker import (
21
+ from chonkie import (
23
22
  TokenChunker,
24
- WordChunker,
25
23
  SentenceChunker,
26
24
  SemanticChunker,
27
25
  SDPMChunker,
@@ -35,7 +33,6 @@ class Chunking:
35
33
 
36
34
  return {
37
35
  'token': TokenChunker,
38
- 'word': WordChunker,
39
36
  'sentence': SentenceChunker,
40
37
  'semantic': SemanticChunker,
41
38
  'sdpm': SDPMChunker,
@@ -48,7 +45,7 @@ class Chunking:
48
45
  chunker_type: str = 'token',
49
46
  chunk_size: int = 512,
50
47
  chunk_overlap: int = 128,
51
- tokenizer: str = "gpt2",
48
+ tokenizer_or_token_counter: str = "gpt2",
52
49
  embedding_model: Optional[Union[str, Any]] = None,
53
50
  **kwargs
54
51
  ):
@@ -62,7 +59,7 @@ class Chunking:
62
59
  self.chunker_type = chunker_type
63
60
  self.chunk_size = chunk_size
64
61
  self.chunk_overlap = chunk_overlap
65
- self.tokenizer = tokenizer
62
+ self.tokenizer_or_token_counter = tokenizer_or_token_counter
66
63
  self._embedding_model = embedding_model
67
64
  self.kwargs = kwargs
68
65
 
@@ -89,11 +86,10 @@ class Chunking:
89
86
  if 'chunk_overlap' in allowed_params:
90
87
  params['chunk_overlap'] = self.chunk_overlap
91
88
 
92
- if 'tokenizer' in allowed_params:
93
- if self.chunker_type in ['semantic', 'sdpm', 'late']:
94
- params['tokenizer'] = self.embedding_model.get_tokenizer_or_token_counter()
95
- else:
96
- params['tokenizer'] = self.tokenizer
89
+ if 'tokenizer_or_token_counter' in allowed_params:
90
+ params['tokenizer_or_token_counter'] = self.tokenizer_or_token_counter
91
+ elif 'tokenizer' in allowed_params:
92
+ params['tokenizer'] = self.tokenizer_or_token_counter
97
93
 
98
94
  if 'embedding_model' in allowed_params:
99
95
  params['embedding_model'] = self.embedding_model
@@ -115,63 +111,82 @@ class Chunking:
115
111
 
116
112
  return self._chunker
117
113
 
118
- def _get_overlap_refinery(self, context_size: Optional[int] = None, **kwargs):
119
- """Lazy load the overlap refinery."""
120
- try:
121
- from chonkie.refinery import OverlapRefinery
122
- except ImportError:
123
- raise ImportError("Failed to import OverlapRefinery from chonkie.refinery")
114
+ # NOTE: OverlapRefinery is not supported, disabled for now
115
+ # As soon as Chonkie is updated to support it, we can re-enable it!
116
+ # Track in https://github.com/chonkie-inc/chonkie/issues/21
117
+
118
+ # def _get_overlap_refinery(self, context_size: Optional[int] = None, **kwargs):
119
+ # """Lazy load the overlap refinery."""
120
+ # try:
121
+ # from chonkie.refinery import OverlapRefinery
122
+ # except ImportError:
123
+ # raise ImportError("Failed to import OverlapRefinery from chonkie.refinery")
124
124
 
125
- if context_size is None:
126
- context_size = self.chunk_overlap
125
+ # if context_size is None:
126
+ # context_size = self.chunk_overlap
127
127
 
128
- return OverlapRefinery(
129
- context_size=context_size,
130
- tokenizer=self.chunker.tokenizer,
131
- **kwargs
132
- )
128
+ # return OverlapRefinery(
129
+ # context_size=context_size,
130
+ # tokenizer=self.chunker.tokenizer,
131
+ # **kwargs
132
+ # )
133
133
 
134
- def add_overlap_context(
135
- self,
136
- chunks: List[Any],
137
- context_size: int = None,
138
- mode: str = "suffix",
139
- merge_context: bool = True
140
- ) -> List[Any]:
141
- """Add overlap context to chunks using OverlapRefinery."""
142
- refinery = self._get_overlap_refinery(
143
- context_size=context_size,
144
- mode=mode,
145
- merge_context=merge_context
146
- )
147
- return refinery.refine(chunks)
134
+ # def add_overlap_context(
135
+ # self,
136
+ # chunks: List[Any],
137
+ # context_size: int = None,
138
+ # mode: str = "suffix",
139
+ # merge_context: bool = True
140
+ # ) -> List[Any]:
141
+ # """Add overlap context to chunks using OverlapRefinery."""
142
+ # refinery = self._get_overlap_refinery(
143
+ # context_size=context_size,
144
+ # mode=mode,
145
+ # merge_context=merge_context
146
+ # )
147
+ # return refinery.refine(chunks)
148
148
 
149
149
  def chunk(
150
150
  self,
151
151
  text: Union[str, List[str]],
152
- add_context: bool = False,
153
- context_params: Optional[Dict[str, Any]] = None
152
+ # Disable context for now, as it's not supported
153
+ # add_context: bool = False,
154
+ # context_params: Optional[Dict[str, Any]] = None
155
+ **kwargs # Added to maintain compatibility with the original `chunk` method signature
154
156
  ) -> Union[List[Any], List[List[Any]]]:
155
157
  """Chunk text using the configured chunking strategy."""
156
158
  chunks = self.chunker(text)
157
159
 
158
- if add_context:
159
- context_params = context_params or {}
160
- if isinstance(text, str):
161
- chunks = self.add_overlap_context(chunks, **context_params)
162
- else:
163
- chunks = [self.add_overlap_context(c, **context_params) for c in chunks]
160
+ # NOTE: OverlapRefinery is not supported, disabled for now
161
+ # As soon as Chonkie is updated to support it, we can re-enable it!
162
+ # Track in https://github.com/chonkie-inc/chonkie/issues/21
163
+
164
+ # if add_context:
165
+ # context_params = context_params or {}
166
+ # if isinstance(text, str):
167
+ # chunks = self.add_overlap_context(chunks, **context_params)
168
+ # else:
169
+ # chunks = [self.add_overlap_context(c, **context_params) for c in chunks]
170
+
171
+ if 'add_context' in kwargs or 'context_params' in kwargs:
172
+ import warnings
173
+ warnings.warn(
174
+ "The `add_context` and `context_params` parameters are currently not supported for Chonkie as of version 1.0.2. They would be added in the future. Track in https://github.com/chonkie-inc/chonkie/issues/21",
175
+ UserWarning
176
+ )
164
177
 
165
178
  return chunks
166
179
 
167
180
  def __call__(
168
181
  self,
169
182
  text: Union[str, List[str]],
170
- add_context: bool = False,
171
- context_params: Optional[Dict[str, Any]] = None
183
+ # Disable context for now, as it's not supported
184
+ # add_context: bool = False,
185
+ # context_params: Optional[Dict[str, Any]] = None
186
+ **kwargs # Added to maintain compatibility with the original `chunk` method signature
172
187
  ) -> Union[List[Any], List[List[Any]]]:
173
188
  """Make the Chunking instance callable."""
174
- return self.chunk(text, add_context, context_params)
189
+ return self.chunk(text, **kwargs)
175
190
 
176
191
  def __repr__(self) -> str:
177
192
  """String representation of the Chunking instance."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: praisonaiagents
3
- Version: 0.0.73
3
+ Version: 0.0.74
4
4
  Summary: Praison AI agents for completing complex tasks with Self Reflection Agents
5
5
  Author: Mervin Praison
6
6
  Requires-Dist: pydantic
@@ -15,7 +15,7 @@ Provides-Extra: knowledge
15
15
  Requires-Dist: mem0ai>=0.1.0; extra == "knowledge"
16
16
  Requires-Dist: chromadb==0.5.23; extra == "knowledge"
17
17
  Requires-Dist: markitdown; extra == "knowledge"
18
- Requires-Dist: chonkie; extra == "knowledge"
18
+ Requires-Dist: chonkie>=1.0.2; extra == "knowledge"
19
19
  Provides-Extra: llm
20
20
  Requires-Dist: litellm>=1.50.0; extra == "llm"
21
21
  Requires-Dist: pydantic>=2.4.2; extra == "llm"
@@ -7,7 +7,7 @@ praisonaiagents/agents/__init__.py,sha256=_1d6Pqyk9EoBSo7E68sKyd1jDRlN1vxvVIRpoM
7
7
  praisonaiagents/agents/agents.py,sha256=uAOHyn77noFvg3sYVFRhQUuc1LDpCMpfLND8CKOXAd4,37971
8
8
  praisonaiagents/agents/autoagents.py,sha256=olYDn--rlJp-SckxILqmREkkgNlzCgEEcAUzfMj-54E,13518
9
9
  praisonaiagents/knowledge/__init__.py,sha256=xL1Eh-a3xsHyIcU4foOWF-JdWYIYBALJH9bge0Ujuto,246
10
- praisonaiagents/knowledge/chunking.py,sha256=FzoNY0q8MkvG4gADqk4JcRhmH3lcEHbRdonDgitQa30,6624
10
+ praisonaiagents/knowledge/chunking.py,sha256=G6wyHa7_8V0_7VpnrrUXbEmUmptlT16ISJYaxmkSgmU,7678
11
11
  praisonaiagents/knowledge/knowledge.py,sha256=fQNREDiwdoisfIxJBLVkteXgq_8Gbypfc3UaZbxf5QY,13210
12
12
  praisonaiagents/llm/__init__.py,sha256=ttPQQJQq6Tah-0updoEXDZFKWtJAM93rBWRoIgxRWO8,689
13
13
  praisonaiagents/llm/llm.py,sha256=1WjHumxzuc8sj81NQ4uVEIetUOrb-i58HYLQW7vjV3M,87921
@@ -39,7 +39,7 @@ praisonaiagents/tools/xml_tools.py,sha256=iYTMBEk5l3L3ryQ1fkUnNVYK-Nnua2Kx2S0dxN
39
39
  praisonaiagents/tools/yaml_tools.py,sha256=uogAZrhXV9O7xvspAtcTfpKSQYL2nlOTvCQXN94-G9A,14215
40
40
  praisonaiagents/tools/yfinance_tools.py,sha256=s2PBj_1v7oQnOobo2fDbQBACEHl61ftG4beG6Z979ZE,8529
41
41
  praisonaiagents/tools/train/data/generatecot.py,sha256=H6bNh-E2hqL5MW6kX3hqZ05g9ETKN2-kudSjiuU_SD8,19403
42
- praisonaiagents-0.0.73.dist-info/METADATA,sha256=6Cwjw1SNW5j4ribad7S2mOlXZKW9o_6HRJhFSBVKmPY,970
43
- praisonaiagents-0.0.73.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
44
- praisonaiagents-0.0.73.dist-info/top_level.txt,sha256=_HsRddrJ23iDx5TTqVUVvXG2HeHBL5voshncAMDGjtA,16
45
- praisonaiagents-0.0.73.dist-info/RECORD,,
42
+ praisonaiagents-0.0.74.dist-info/METADATA,sha256=322am_MaL8PT1WHZn-uOxB7or2WJpHgnwLOdFVMxi9o,977
43
+ praisonaiagents-0.0.74.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
44
+ praisonaiagents-0.0.74.dist-info/top_level.txt,sha256=_HsRddrJ23iDx5TTqVUVvXG2HeHBL5voshncAMDGjtA,16
45
+ praisonaiagents-0.0.74.dist-info/RECORD,,