praisonaiagents 0.0.29__py3-none-any.whl → 0.0.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- praisonaiagents/__init__.py +4 -2
- praisonaiagents/agent/agent.py +679 -235
- praisonaiagents/agents/agents.py +169 -34
- praisonaiagents/knowledge/__init__.py +8 -0
- praisonaiagents/knowledge/chunking.py +182 -0
- praisonaiagents/knowledge/knowledge.py +321 -0
- praisonaiagents/llm/__init__.py +20 -0
- praisonaiagents/llm/llm.py +1023 -0
- praisonaiagents/main.py +46 -9
- praisonaiagents/memory/memory.py +6 -3
- praisonaiagents/process/process.py +206 -90
- praisonaiagents/task/task.py +104 -4
- praisonaiagents/tools/pandas_tools.py +3 -0
- praisonaiagents/tools/yfinance_tools.py +9 -1
- praisonaiagents-0.0.53.dist-info/METADATA +22 -0
- {praisonaiagents-0.0.29.dist-info → praisonaiagents-0.0.53.dist-info}/RECORD +18 -13
- praisonaiagents-0.0.29.dist-info/METADATA +0 -10
- {praisonaiagents-0.0.29.dist-info → praisonaiagents-0.0.53.dist-info}/WHEEL +0 -0
- {praisonaiagents-0.0.29.dist-info → praisonaiagents-0.0.53.dist-info}/top_level.txt +0 -0
praisonaiagents/agents/agents.py
CHANGED
@@ -12,6 +12,7 @@ from ..agent.agent import Agent
|
|
12
12
|
from ..task.task import Task
|
13
13
|
from ..process.process import Process, LoopItems
|
14
14
|
import asyncio
|
15
|
+
import uuid
|
15
16
|
|
16
17
|
# Set up logger
|
17
18
|
logger = logging.getLogger(__name__)
|
@@ -44,10 +45,29 @@ def process_video(video_path: str, seconds_per_frame=2):
|
|
44
45
|
return base64_frames
|
45
46
|
|
46
47
|
class PraisonAIAgents:
|
47
|
-
def __init__(self, agents, tasks=None, verbose=0, completion_checker=None, max_retries=5, process="sequential", manager_llm=None, memory=False, memory_config=None, embedder=None):
|
48
|
+
def __init__(self, agents, tasks=None, verbose=0, completion_checker=None, max_retries=5, process="sequential", manager_llm=None, memory=False, memory_config=None, embedder=None, user_id=None, max_iter=10):
|
49
|
+
# Add check at the start if memory is requested
|
50
|
+
if memory:
|
51
|
+
try:
|
52
|
+
from ..memory.memory import Memory
|
53
|
+
MEMORY_AVAILABLE = True
|
54
|
+
except ImportError:
|
55
|
+
raise ImportError(
|
56
|
+
"Memory features requested but memory dependencies not installed. "
|
57
|
+
"Please install with: pip install \"praisonaiagents[memory]\""
|
58
|
+
)
|
59
|
+
|
48
60
|
if not agents:
|
49
61
|
raise ValueError("At least one agent must be provided")
|
50
|
-
|
62
|
+
|
63
|
+
self.run_id = str(uuid.uuid4()) # Auto-generate run_id
|
64
|
+
self.user_id = user_id or "praison" # Optional user_id
|
65
|
+
self.max_iter = max_iter # Add max_iter parameter
|
66
|
+
|
67
|
+
# Pass user_id to each agent
|
68
|
+
for agent in agents:
|
69
|
+
agent.user_id = self.user_id
|
70
|
+
|
51
71
|
self.agents = agents
|
52
72
|
self.tasks = {}
|
53
73
|
if max_retries < 3:
|
@@ -211,20 +231,56 @@ class PraisonAIAgents:
|
|
211
231
|
|
212
232
|
executor_agent = task.agent
|
213
233
|
|
234
|
+
# Ensure tools are available from both task and agent
|
235
|
+
tools = task.tools or []
|
236
|
+
if executor_agent and executor_agent.tools:
|
237
|
+
tools.extend(executor_agent.tools)
|
238
|
+
|
214
239
|
task_prompt = f"""
|
215
240
|
You need to do the following task: {task.description}.
|
216
241
|
Expected Output: {task.expected_output}.
|
217
242
|
"""
|
218
243
|
if task.context:
|
219
|
-
context_results =
|
220
|
-
for
|
221
|
-
if
|
222
|
-
context_results
|
223
|
-
|
224
|
-
context_results
|
244
|
+
context_results = [] # Use list to avoid duplicates
|
245
|
+
for context_item in task.context:
|
246
|
+
if isinstance(context_item, str):
|
247
|
+
context_results.append(f"Input Content:\n{context_item}")
|
248
|
+
elif isinstance(context_item, list):
|
249
|
+
context_results.append(f"Input Content: {' '.join(str(x) for x in context_item)}")
|
250
|
+
elif hasattr(context_item, 'result'): # Task object
|
251
|
+
if context_item.result:
|
252
|
+
context_results.append(
|
253
|
+
f"Result of previous task {context_item.name if context_item.name else context_item.description}:\n{context_item.result.raw}"
|
254
|
+
)
|
255
|
+
else:
|
256
|
+
context_results.append(
|
257
|
+
f"Previous task {context_item.name if context_item.name else context_item.description} has no result yet."
|
258
|
+
)
|
259
|
+
elif isinstance(context_item, dict) and "vector_store" in context_item:
|
260
|
+
from ..knowledge.knowledge import Knowledge
|
261
|
+
try:
|
262
|
+
# Handle both string and dict configs
|
263
|
+
cfg = context_item["vector_store"]
|
264
|
+
if isinstance(cfg, str):
|
265
|
+
cfg = json.loads(cfg)
|
266
|
+
|
267
|
+
knowledge = Knowledge(config={"vector_store": cfg}, verbose=self.verbose)
|
268
|
+
|
269
|
+
# Only use user_id as filter
|
270
|
+
db_results = knowledge.search(
|
271
|
+
task.description,
|
272
|
+
user_id=self.user_id if self.user_id else None
|
273
|
+
)
|
274
|
+
context_results.append(f"[DB Context]: {str(db_results)}")
|
275
|
+
except Exception as e:
|
276
|
+
context_results.append(f"[Vector DB Error]: {e}")
|
277
|
+
|
278
|
+
# Join unique context results
|
279
|
+
unique_contexts = list(dict.fromkeys(context_results)) # Remove duplicates
|
225
280
|
task_prompt += f"""
|
226
|
-
|
227
|
-
|
281
|
+
Context:
|
282
|
+
|
283
|
+
{' '.join(unique_contexts)}
|
228
284
|
"""
|
229
285
|
task_prompt += "Please provide only the final result of your work. Do not add any conversation or extra explanation."
|
230
286
|
|
@@ -267,14 +323,14 @@ Here are the results of previous tasks that might be useful:\n
|
|
267
323
|
|
268
324
|
agent_output = await executor_agent.achat(
|
269
325
|
_get_multimodal_message(task_prompt, task.images),
|
270
|
-
tools=
|
326
|
+
tools=tools,
|
271
327
|
output_json=task.output_json,
|
272
328
|
output_pydantic=task.output_pydantic
|
273
329
|
)
|
274
330
|
else:
|
275
331
|
agent_output = await executor_agent.achat(
|
276
332
|
task_prompt,
|
277
|
-
tools=
|
333
|
+
tools=tools,
|
278
334
|
output_json=task.output_json,
|
279
335
|
output_pydantic=task.output_pydantic
|
280
336
|
)
|
@@ -334,7 +390,8 @@ Here are the results of previous tasks that might be useful:\n
|
|
334
390
|
task.status = "completed"
|
335
391
|
# Run execute_callback for memory operations
|
336
392
|
try:
|
337
|
-
|
393
|
+
# Use the new sync wrapper to avoid pending coroutine issues
|
394
|
+
task.execute_callback_sync(task_output)
|
338
395
|
except Exception as e:
|
339
396
|
logger.error(f"Error executing memory callback for task {task_id}: {e}")
|
340
397
|
logger.exception(e)
|
@@ -343,7 +400,12 @@ Here are the results of previous tasks that might be useful:\n
|
|
343
400
|
if task.callback:
|
344
401
|
try:
|
345
402
|
if asyncio.iscoroutinefunction(task.callback):
|
346
|
-
|
403
|
+
if asyncio.get_event_loop().is_running():
|
404
|
+
asyncio.create_task(task.callback(task_output))
|
405
|
+
else:
|
406
|
+
loop = asyncio.new_event_loop()
|
407
|
+
asyncio.set_event_loop(loop)
|
408
|
+
loop.run_until_complete(task.callback(task_output))
|
347
409
|
else:
|
348
410
|
task.callback(task_output)
|
349
411
|
except Exception as e:
|
@@ -376,15 +438,30 @@ Here are the results of previous tasks that might be useful:\n
|
|
376
438
|
tasks=self.tasks,
|
377
439
|
agents=self.agents,
|
378
440
|
manager_llm=self.manager_llm,
|
379
|
-
verbose=self.verbose
|
441
|
+
verbose=self.verbose,
|
442
|
+
max_iter=self.max_iter
|
380
443
|
)
|
381
444
|
|
382
445
|
if self.process == "workflow":
|
446
|
+
# Collect all tasks that should run in parallel
|
447
|
+
parallel_tasks = []
|
383
448
|
async for task_id in process.aworkflow():
|
384
|
-
if self.tasks[task_id].async_execution:
|
385
|
-
|
386
|
-
|
387
|
-
|
449
|
+
if self.tasks[task_id].async_execution and self.tasks[task_id].is_start:
|
450
|
+
parallel_tasks.append(task_id)
|
451
|
+
elif parallel_tasks:
|
452
|
+
# Execute collected parallel tasks
|
453
|
+
await asyncio.gather(*[self.arun_task(t) for t in parallel_tasks])
|
454
|
+
parallel_tasks = []
|
455
|
+
# Run the current non-parallel task
|
456
|
+
if self.tasks[task_id].async_execution:
|
457
|
+
await self.arun_task(task_id)
|
458
|
+
else:
|
459
|
+
self.run_task(task_id)
|
460
|
+
|
461
|
+
# Execute any remaining parallel tasks
|
462
|
+
if parallel_tasks:
|
463
|
+
await asyncio.gather(*[self.arun_task(t) for t in parallel_tasks])
|
464
|
+
|
388
465
|
elif self.process == "sequential":
|
389
466
|
async for task_id in process.asequential():
|
390
467
|
if self.tasks[task_id].async_execution:
|
@@ -400,8 +477,16 @@ Here are the results of previous tasks that might be useful:\n
|
|
400
477
|
else:
|
401
478
|
self.run_task(task_id)
|
402
479
|
|
403
|
-
async def astart(self):
|
480
|
+
async def astart(self, content=None, **kwargs):
|
404
481
|
"""Async version of start method"""
|
482
|
+
if content:
|
483
|
+
# Add content to context of all tasks
|
484
|
+
for task in self.tasks.values():
|
485
|
+
if isinstance(content, (str, list)):
|
486
|
+
if not task.context:
|
487
|
+
task.context = []
|
488
|
+
task.context.append(content)
|
489
|
+
|
405
490
|
await self.arun_all_tasks()
|
406
491
|
return {
|
407
492
|
"task_status": self.get_all_tasks_status(),
|
@@ -458,16 +543,48 @@ You need to do the following task: {task.description}.
|
|
458
543
|
Expected Output: {task.expected_output}.
|
459
544
|
"""
|
460
545
|
if task.context:
|
461
|
-
context_results =
|
462
|
-
for
|
463
|
-
if
|
464
|
-
context_results
|
465
|
-
|
466
|
-
context_results
|
546
|
+
context_results = [] # Use list to avoid duplicates
|
547
|
+
for context_item in task.context:
|
548
|
+
if isinstance(context_item, str):
|
549
|
+
context_results.append(f"Input Content:\n{context_item}")
|
550
|
+
elif isinstance(context_item, list):
|
551
|
+
context_results.append(f"Input Content: {' '.join(str(x) for x in context_item)}")
|
552
|
+
elif hasattr(context_item, 'result'): # Task object
|
553
|
+
if context_item.result:
|
554
|
+
context_results.append(
|
555
|
+
f"Result of previous task {context_item.name if context_item.name else context_item.description}:\n{context_item.result.raw}"
|
556
|
+
)
|
557
|
+
else:
|
558
|
+
context_results.append(
|
559
|
+
f"Previous task {context_item.name if context_item.name else context_item.description} has no result yet."
|
560
|
+
)
|
561
|
+
elif isinstance(context_item, dict) and "vector_store" in context_item:
|
562
|
+
from ..knowledge.knowledge import Knowledge
|
563
|
+
try:
|
564
|
+
# Handle both string and dict configs
|
565
|
+
cfg = context_item["vector_store"]
|
566
|
+
if isinstance(cfg, str):
|
567
|
+
cfg = json.loads(cfg)
|
568
|
+
|
569
|
+
knowledge = Knowledge(config={"vector_store": cfg}, verbose=self.verbose)
|
570
|
+
|
571
|
+
# Only use user_id as filter
|
572
|
+
db_results = knowledge.search(
|
573
|
+
task.description,
|
574
|
+
user_id=self.user_id if self.user_id else None
|
575
|
+
)
|
576
|
+
context_results.append(f"[DB Context]: {str(db_results)}")
|
577
|
+
except Exception as e:
|
578
|
+
context_results.append(f"[Vector DB Error]: {e}")
|
579
|
+
|
580
|
+
# Join unique context results
|
581
|
+
unique_contexts = list(dict.fromkeys(context_results)) # Remove duplicates
|
467
582
|
task_prompt += f"""
|
468
|
-
|
469
|
-
|
583
|
+
Context:
|
584
|
+
|
585
|
+
{' '.join(unique_contexts)}
|
470
586
|
"""
|
587
|
+
|
471
588
|
# Add memory context if available
|
472
589
|
if task.memory:
|
473
590
|
try:
|
@@ -596,8 +713,8 @@ Here are the results of previous tasks that might be useful:\n
|
|
596
713
|
task.status = "completed"
|
597
714
|
# Run execute_callback for memory operations
|
598
715
|
try:
|
599
|
-
|
600
|
-
|
716
|
+
# Use the new sync wrapper to avoid pending coroutine issues
|
717
|
+
task.execute_callback_sync(task_output)
|
601
718
|
except Exception as e:
|
602
719
|
logger.error(f"Error executing memory callback for task {task_id}: {e}")
|
603
720
|
logger.exception(e)
|
@@ -606,7 +723,12 @@ Here are the results of previous tasks that might be useful:\n
|
|
606
723
|
if task.callback:
|
607
724
|
try:
|
608
725
|
if asyncio.iscoroutinefunction(task.callback):
|
609
|
-
|
726
|
+
if asyncio.get_event_loop().is_running():
|
727
|
+
asyncio.create_task(task.callback(task_output))
|
728
|
+
else:
|
729
|
+
loop = asyncio.new_event_loop()
|
730
|
+
asyncio.set_event_loop(loop)
|
731
|
+
loop.run_until_complete(task.callback(task_output))
|
610
732
|
else:
|
611
733
|
task.callback(task_output)
|
612
734
|
except Exception as e:
|
@@ -639,7 +761,8 @@ Here are the results of previous tasks that might be useful:\n
|
|
639
761
|
tasks=self.tasks,
|
640
762
|
agents=self.agents,
|
641
763
|
manager_llm=self.manager_llm,
|
642
|
-
verbose=self.verbose
|
764
|
+
verbose=self.verbose,
|
765
|
+
max_iter=self.max_iter
|
643
766
|
)
|
644
767
|
|
645
768
|
if self.process == "workflow":
|
@@ -678,12 +801,24 @@ Here are the results of previous tasks that might be useful:\n
|
|
678
801
|
return str(agent[0])
|
679
802
|
return None
|
680
803
|
|
681
|
-
def start(self):
|
804
|
+
def start(self, content=None, **kwargs):
|
805
|
+
"""Start agent execution with optional content and config"""
|
806
|
+
if content:
|
807
|
+
# Add content to context of all tasks
|
808
|
+
for task in self.tasks.values():
|
809
|
+
if isinstance(content, (str, list)):
|
810
|
+
# If context is empty, initialize it
|
811
|
+
if not task.context:
|
812
|
+
task.context = []
|
813
|
+
# Add content to context
|
814
|
+
task.context.append(content)
|
815
|
+
|
816
|
+
# Run tasks as before
|
682
817
|
self.run_all_tasks()
|
683
818
|
return {
|
684
819
|
"task_status": self.get_all_tasks_status(),
|
685
820
|
"task_results": {task_id: self.get_task_result(task_id) for task_id in self.tasks}
|
686
|
-
}
|
821
|
+
}
|
687
822
|
|
688
823
|
def set_state(self, key: str, value: Any) -> None:
|
689
824
|
"""Set a state value"""
|
@@ -0,0 +1,182 @@
|
|
1
|
+
from typing import List, Union, Optional, Dict, Any
|
2
|
+
from functools import cached_property
|
3
|
+
import importlib
|
4
|
+
|
5
|
+
class Chunking:
|
6
|
+
"""A unified class for text chunking with various chunking strategies."""
|
7
|
+
|
8
|
+
CHUNKER_PARAMS = {
|
9
|
+
'token': ['chunk_size', 'chunk_overlap', 'tokenizer'],
|
10
|
+
'word': ['chunk_size', 'chunk_overlap', 'tokenizer'],
|
11
|
+
'sentence': ['chunk_size', 'chunk_overlap', 'tokenizer'],
|
12
|
+
'semantic': ['chunk_size', 'embedding_model', 'tokenizer'],
|
13
|
+
'sdpm': ['chunk_size', 'embedding_model', 'tokenizer'],
|
14
|
+
'late': ['chunk_size', 'embedding_model', 'tokenizer'],
|
15
|
+
'recursive': ['chunk_size', 'tokenizer']
|
16
|
+
}
|
17
|
+
|
18
|
+
@cached_property
|
19
|
+
def SUPPORTED_CHUNKERS(self) -> Dict[str, Any]:
|
20
|
+
"""Lazy load chunker classes."""
|
21
|
+
try:
|
22
|
+
from chonkie.chunker import (
|
23
|
+
TokenChunker,
|
24
|
+
WordChunker,
|
25
|
+
SentenceChunker,
|
26
|
+
SemanticChunker,
|
27
|
+
SDPMChunker,
|
28
|
+
LateChunker,
|
29
|
+
RecursiveChunker
|
30
|
+
)
|
31
|
+
except ImportError:
|
32
|
+
raise ImportError(
|
33
|
+
"chonkie package not found. Please install it using: pip install 'praisonaiagents[knowledge]'"
|
34
|
+
)
|
35
|
+
|
36
|
+
return {
|
37
|
+
'token': TokenChunker,
|
38
|
+
'word': WordChunker,
|
39
|
+
'sentence': SentenceChunker,
|
40
|
+
'semantic': SemanticChunker,
|
41
|
+
'sdpm': SDPMChunker,
|
42
|
+
'late': LateChunker,
|
43
|
+
'recursive': RecursiveChunker
|
44
|
+
}
|
45
|
+
|
46
|
+
def __init__(
|
47
|
+
self,
|
48
|
+
chunker_type: str = 'token',
|
49
|
+
chunk_size: int = 512,
|
50
|
+
chunk_overlap: int = 128,
|
51
|
+
tokenizer: str = "gpt2",
|
52
|
+
embedding_model: Optional[Union[str, Any]] = None,
|
53
|
+
**kwargs
|
54
|
+
):
|
55
|
+
"""Initialize the Chunking class."""
|
56
|
+
if chunker_type not in self.CHUNKER_PARAMS:
|
57
|
+
raise ValueError(
|
58
|
+
f"Unsupported chunker type: {chunker_type}. "
|
59
|
+
f"Must be one of: {list(self.CHUNKER_PARAMS.keys())}"
|
60
|
+
)
|
61
|
+
|
62
|
+
self.chunker_type = chunker_type
|
63
|
+
self.chunk_size = chunk_size
|
64
|
+
self.chunk_overlap = chunk_overlap
|
65
|
+
self.tokenizer = tokenizer
|
66
|
+
self._embedding_model = embedding_model
|
67
|
+
self.kwargs = kwargs
|
68
|
+
|
69
|
+
# Initialize these as None for lazy loading
|
70
|
+
self._chunker = None
|
71
|
+
self._embeddings = None
|
72
|
+
|
73
|
+
@cached_property
|
74
|
+
def embedding_model(self):
|
75
|
+
"""Lazy load the embedding model."""
|
76
|
+
if self._embedding_model is None and self.chunker_type in ['semantic', 'sdpm', 'late']:
|
77
|
+
from chonkie.embeddings import AutoEmbeddings
|
78
|
+
return AutoEmbeddings.get_embeddings("all-MiniLM-L6-v2")
|
79
|
+
elif isinstance(self._embedding_model, str):
|
80
|
+
from chonkie.embeddings import AutoEmbeddings
|
81
|
+
return AutoEmbeddings.get_embeddings(self._embedding_model)
|
82
|
+
return self._embedding_model
|
83
|
+
|
84
|
+
def _get_chunker_params(self) -> Dict[str, Any]:
|
85
|
+
"""Get the appropriate parameters for the current chunker type."""
|
86
|
+
allowed_params = self.CHUNKER_PARAMS[self.chunker_type]
|
87
|
+
params = {'chunk_size': self.chunk_size}
|
88
|
+
|
89
|
+
if 'chunk_overlap' in allowed_params:
|
90
|
+
params['chunk_overlap'] = self.chunk_overlap
|
91
|
+
|
92
|
+
if 'tokenizer' in allowed_params:
|
93
|
+
if self.chunker_type in ['semantic', 'sdpm', 'late']:
|
94
|
+
params['tokenizer'] = self.embedding_model.get_tokenizer_or_token_counter()
|
95
|
+
else:
|
96
|
+
params['tokenizer'] = self.tokenizer
|
97
|
+
|
98
|
+
if 'embedding_model' in allowed_params:
|
99
|
+
params['embedding_model'] = self.embedding_model
|
100
|
+
|
101
|
+
# Add any additional kwargs that are in allowed_params
|
102
|
+
for key, value in self.kwargs.items():
|
103
|
+
if key in allowed_params:
|
104
|
+
params[key] = value
|
105
|
+
|
106
|
+
return params
|
107
|
+
|
108
|
+
@cached_property
|
109
|
+
def chunker(self):
|
110
|
+
"""Lazy load the chunker instance."""
|
111
|
+
if self._chunker is None:
|
112
|
+
chunker_cls = self.SUPPORTED_CHUNKERS[self.chunker_type]
|
113
|
+
common_params = self._get_chunker_params()
|
114
|
+
self._chunker = chunker_cls(**common_params)
|
115
|
+
|
116
|
+
return self._chunker
|
117
|
+
|
118
|
+
def _get_overlap_refinery(self, context_size: Optional[int] = None, **kwargs):
|
119
|
+
"""Lazy load the overlap refinery."""
|
120
|
+
try:
|
121
|
+
from chonkie.refinery import OverlapRefinery
|
122
|
+
except ImportError:
|
123
|
+
raise ImportError("Failed to import OverlapRefinery from chonkie.refinery")
|
124
|
+
|
125
|
+
if context_size is None:
|
126
|
+
context_size = self.chunk_overlap
|
127
|
+
|
128
|
+
return OverlapRefinery(
|
129
|
+
context_size=context_size,
|
130
|
+
tokenizer=self.chunker.tokenizer,
|
131
|
+
**kwargs
|
132
|
+
)
|
133
|
+
|
134
|
+
def add_overlap_context(
|
135
|
+
self,
|
136
|
+
chunks: List[Any],
|
137
|
+
context_size: int = None,
|
138
|
+
mode: str = "suffix",
|
139
|
+
merge_context: bool = True
|
140
|
+
) -> List[Any]:
|
141
|
+
"""Add overlap context to chunks using OverlapRefinery."""
|
142
|
+
refinery = self._get_overlap_refinery(
|
143
|
+
context_size=context_size,
|
144
|
+
mode=mode,
|
145
|
+
merge_context=merge_context
|
146
|
+
)
|
147
|
+
return refinery.refine(chunks)
|
148
|
+
|
149
|
+
def chunk(
|
150
|
+
self,
|
151
|
+
text: Union[str, List[str]],
|
152
|
+
add_context: bool = False,
|
153
|
+
context_params: Optional[Dict[str, Any]] = None
|
154
|
+
) -> Union[List[Any], List[List[Any]]]:
|
155
|
+
"""Chunk text using the configured chunking strategy."""
|
156
|
+
chunks = self.chunker(text)
|
157
|
+
|
158
|
+
if add_context:
|
159
|
+
context_params = context_params or {}
|
160
|
+
if isinstance(text, str):
|
161
|
+
chunks = self.add_overlap_context(chunks, **context_params)
|
162
|
+
else:
|
163
|
+
chunks = [self.add_overlap_context(c, **context_params) for c in chunks]
|
164
|
+
|
165
|
+
return chunks
|
166
|
+
|
167
|
+
def __call__(
|
168
|
+
self,
|
169
|
+
text: Union[str, List[str]],
|
170
|
+
add_context: bool = False,
|
171
|
+
context_params: Optional[Dict[str, Any]] = None
|
172
|
+
) -> Union[List[Any], List[List[Any]]]:
|
173
|
+
"""Make the Chunking instance callable."""
|
174
|
+
return self.chunk(text, add_context, context_params)
|
175
|
+
|
176
|
+
def __repr__(self) -> str:
|
177
|
+
"""String representation of the Chunking instance."""
|
178
|
+
return (
|
179
|
+
f"Chunking(chunker_type='{self.chunker_type}', "
|
180
|
+
f"chunk_size={self.chunk_size}, "
|
181
|
+
f"chunk_overlap={self.chunk_overlap})"
|
182
|
+
)
|