lionagi 0.0.209__py3-none-any.whl → 0.0.211__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. lionagi/__init__.py +2 -4
  2. lionagi/api_service/base_endpoint.py +65 -0
  3. lionagi/api_service/base_rate_limiter.py +121 -0
  4. lionagi/api_service/base_service.py +146 -0
  5. lionagi/api_service/chat_completion.py +6 -0
  6. lionagi/api_service/embeddings.py +6 -0
  7. lionagi/api_service/payload_package.py +47 -0
  8. lionagi/api_service/status_tracker.py +29 -0
  9. lionagi/core/__init__.py +3 -3
  10. lionagi/core/branch.py +22 -3
  11. lionagi/core/session.py +14 -2
  12. lionagi/schema/__init__.py +5 -8
  13. lionagi/schema/base_schema.py +821 -0
  14. lionagi/structures/graph.py +1 -1
  15. lionagi/structures/relationship.py +1 -1
  16. lionagi/structures/structure.py +1 -1
  17. lionagi/tools/tool_manager.py +0 -163
  18. lionagi/tools/tool_util.py +2 -1
  19. lionagi/utils/__init__.py +5 -6
  20. lionagi/utils/api_util.py +6 -1
  21. lionagi/version.py +1 -1
  22. {lionagi-0.0.209.dist-info → lionagi-0.0.211.dist-info}/METADATA +3 -18
  23. lionagi-0.0.211.dist-info/RECORD +56 -0
  24. lionagi/agents/planner.py +0 -1
  25. lionagi/agents/prompter.py +0 -1
  26. lionagi/agents/scorer.py +0 -1
  27. lionagi/agents/summarizer.py +0 -1
  28. lionagi/agents/validator.py +0 -1
  29. lionagi/bridge/__init__.py +0 -22
  30. lionagi/bridge/langchain.py +0 -195
  31. lionagi/bridge/llama_index.py +0 -266
  32. lionagi/datastores/__init__.py +0 -1
  33. lionagi/datastores/chroma.py +0 -1
  34. lionagi/datastores/deeplake.py +0 -1
  35. lionagi/datastores/elasticsearch.py +0 -1
  36. lionagi/datastores/lantern.py +0 -1
  37. lionagi/datastores/pinecone.py +0 -1
  38. lionagi/datastores/postgres.py +0 -1
  39. lionagi/datastores/qdrant.py +0 -1
  40. lionagi/iservices/anthropic.py +0 -79
  41. lionagi/iservices/anyscale.py +0 -0
  42. lionagi/iservices/azure.py +0 -1
  43. lionagi/iservices/bedrock.py +0 -0
  44. lionagi/iservices/everlyai.py +0 -0
  45. lionagi/iservices/gemini.py +0 -0
  46. lionagi/iservices/gpt4all.py +0 -0
  47. lionagi/iservices/huggingface.py +0 -0
  48. lionagi/iservices/litellm.py +0 -33
  49. lionagi/iservices/localai.py +0 -0
  50. lionagi/iservices/openllm.py +0 -0
  51. lionagi/iservices/openrouter.py +0 -44
  52. lionagi/iservices/perplexity.py +0 -0
  53. lionagi/iservices/predibase.py +0 -0
  54. lionagi/iservices/rungpt.py +0 -0
  55. lionagi/iservices/vllm.py +0 -0
  56. lionagi/iservices/xinference.py +0 -0
  57. lionagi/loaders/__init__.py +0 -18
  58. lionagi/loaders/chunker.py +0 -166
  59. lionagi/loaders/load_util.py +0 -240
  60. lionagi/loaders/reader.py +0 -122
  61. lionagi/models/__init__.py +0 -0
  62. lionagi/models/base_model.py +0 -0
  63. lionagi/models/imodel.py +0 -53
  64. lionagi/parsers/__init__.py +0 -1
  65. lionagi/schema/async_queue.py +0 -158
  66. lionagi/schema/base_condition.py +0 -1
  67. lionagi/schema/base_node.py +0 -422
  68. lionagi/schema/base_tool.py +0 -44
  69. lionagi/schema/data_logger.py +0 -131
  70. lionagi/schema/data_node.py +0 -88
  71. lionagi/schema/status_tracker.py +0 -37
  72. lionagi/tests/test_utils/test_encrypt_util.py +0 -323
  73. lionagi/utils/encrypt_util.py +0 -283
  74. lionagi-0.0.209.dist-info/RECORD +0 -98
  75. /lionagi/{agents → api_service}/__init__.py +0 -0
  76. /lionagi/{iservices → services}/__init__.py +0 -0
  77. /lionagi/{iservices → services}/base_service.py +0 -0
  78. /lionagi/{iservices → services}/mistralai.py +0 -0
  79. /lionagi/{iservices → services}/mlx_service.py +0 -0
  80. /lionagi/{iservices → services}/oai.py +0 -0
  81. /lionagi/{iservices → services}/ollama.py +0 -0
  82. /lionagi/{iservices → services}/services.py +0 -0
  83. /lionagi/{iservices → services}/transformers.py +0 -0
  84. {lionagi-0.0.209.dist-info → lionagi-0.0.211.dist-info}/LICENSE +0 -0
  85. {lionagi-0.0.209.dist-info → lionagi-0.0.211.dist-info}/WHEEL +0 -0
  86. {lionagi-0.0.209.dist-info → lionagi-0.0.211.dist-info}/top_level.txt +0 -0
@@ -1,266 +0,0 @@
1
- from typing import Union, Callable, List, Any, Dict, TypeVar
2
- from ..utils.sys_util import change_dict_key, install_import, is_package_installed
3
- from ..schema.data_node import DataNode
4
-
5
-
6
- T = TypeVar('T', bound='DataNode')
7
-
8
- def from_llama_index(llama_node: Any, **kwargs: Any) -> T:
9
- """
10
- Converts a Llama Index node into a DataNode object.
11
-
12
- Args:
13
- llama_node (Any): The Llama Index node to be converted.
14
- **kwargs: Additional keyword arguments for JSON serialization.
15
-
16
- Returns:
17
- T: A DataNode object created from the Llama Index node.
18
-
19
- Example:
20
- llama_node = LlamaIndexNode(...)
21
- datanode = from_llama_index(llama_node, serialize_dates=True)
22
- """
23
- llama_dict = llama_node.to_dict(**kwargs)
24
- return DataNode.from_dict(llama_dict)
25
-
26
- def to_llama_index_textnode(datanode: T, **kwargs: Any) -> Any:
27
- """
28
- Converts a DataNode into a Llama Index TextNode.
29
-
30
- Args:
31
- datanode (T): The DataNode to be converted.
32
- **kwargs: Additional keyword arguments to be included in the TextNode.
33
-
34
- Returns:
35
- Any: A Llama Index TextNode created from the DataNode.
36
-
37
- Example:
38
- datanode = DataNode(...)
39
- textnode = to_llama_index_textnode(datanode, additional_arg=1)
40
- """
41
- try:
42
- from llama_index.schema import TextNode
43
- except ImportError:
44
- try:
45
- install_import(
46
- package_name='llama_index',
47
- module_name='schema',
48
- import_name='TextNode'
49
- )
50
- from llama_index.schema import TextNode
51
- except Exception as e:
52
- raise ImportError(f'Unable to import required module from llama_index. Please make sure that llama_index is installed. Error: {e}')
53
-
54
- dnode = datanode.to_dict()
55
- change_dict_key(dnode, old_key='content', new_key='text')
56
- change_dict_key(dnode, old_key='node_id', new_key='id_')
57
- dnode['text'] = str(dnode['text'])
58
-
59
- dnode = {**dnode, **kwargs}
60
- return TextNode.from_dict(dnode)
61
-
62
- def get_llama_reader(reader: Union[str, Callable]) -> Callable:
63
- """
64
- Gets a Llama Index reader function.
65
-
66
- Args:
67
- reader (Union[str, Callable]): The name of the reader function or the reader function itself.
68
-
69
- Returns:
70
- Callable: The Llama Index reader function.
71
-
72
- Raises:
73
- ValueError: If the specified reader is invalid.
74
-
75
- Example:
76
- reader = get_llama_reader("SimpleDirectoryReader")
77
- # or for a custom function
78
- def custom_reader(): pass
79
- reader = get_llama_reader(custom_reader)
80
- """
81
-
82
- try:
83
- if isinstance(reader, str):
84
- if reader == 'SimpleDirectoryReader':
85
- try:
86
- from llama_index import SimpleDirectoryReader
87
- return SimpleDirectoryReader
88
- except ImportError or ModuleNotFoundError:
89
- try:
90
- install_import(
91
- package_name='llama_index',
92
- import_name='SimpleDirectoryReader'
93
- )
94
- from llama_index import SimpleDirectoryReader
95
- return SimpleDirectoryReader
96
- except Exception as e:
97
- raise ImportError(f'Failed to import SimpleDirectoryReader. Error: {e}')
98
- else:
99
- try:
100
- from llama_index import download_loader
101
- return download_loader(reader)
102
- except ImportError:
103
- try:
104
- install_import(
105
- package_name='llama_index',
106
- import_name='download_loader'
107
- )
108
- return download_loader(reader)
109
- except Exception as e:
110
- raise ImportError(f'Failed to import download_loader from LlamaIndex. Error: {e}')
111
- else:
112
- return reader
113
- except Exception as e:
114
- raise ValueError(f'Invalid reader: {reader}, Error: {e}')
115
-
116
- def llama_index_reader(reader: Union[str, Callable],
117
- reader_args: List[Any] = [],
118
- reader_kwargs: Dict[str, Any] = {},
119
- load_data_args: List[Any] = [],
120
- load_data_kwargs: Dict[str, Any] = {}) -> List[Any]:
121
- """
122
- Loads documents using a specified Llama Index reader.
123
-
124
- Args:
125
- reader (Union[str, Callable]): The name of the reader function or the reader function itself.
126
- reader_args (List[Any]): Positional arguments to pass to the reader function.
127
- reader_kwargs (Dict[str, Any]): Keyword arguments to pass to the reader function.
128
- load_data_args (List[Any]): Positional arguments for the load_data method.
129
- load_data_kwargs (Dict[str, Any]): Keyword arguments for the load_data method.
130
-
131
- Returns:
132
- List[Any]: A list of documents loaded by the reader.
133
-
134
- Raises:
135
- ValueError: If the specified reader is invalid or if the reader fails to load documents.
136
-
137
- Example:
138
- documents = llama_index_reader("SimpleDirectoryReader", reader_args=["/path/to/data"])
139
- """
140
- reader = get_llama_reader(reader)
141
-
142
- try:
143
- loader = reader(*reader_args, **reader_kwargs)
144
- documents = loader.load_data(*load_data_args, **load_data_kwargs)
145
- return documents
146
-
147
- except Exception as e:
148
- raise ValueError(f'Failed to read. Error: {e}')
149
-
150
- def get_llama_parser(parser: Union[str, Callable]) -> Callable:
151
- """
152
- Gets a Llama Index parser function or object.
153
-
154
- Args:
155
- parser (Union[str, Callable]): The name of the parser function or the parser function itself.
156
-
157
- Returns:
158
- Callable: The Llama Index parser function or object.
159
-
160
- Raises:
161
- ValueError: If the specified parser is invalid.
162
-
163
- Example:
164
- parser = get_llama_parser("DefaultNodeParser")
165
- # or for a custom function
166
- def custom_parser(): pass
167
- parser = get_llama_parser(custom_parser)
168
- """
169
-
170
- try:
171
- import llama_index.node_parser as node_parser
172
- except ImportError:
173
- try:
174
- install_import(
175
- package_name='llama_index',
176
- module_name='node_parser'
177
- )
178
- import llama_index.node_parser as node_parser
179
- except ImportError:
180
- raise ImportError('Failed to import Llama Index. Please install Llama Index to use this function.')
181
- except Exception as e:
182
- raise ValueError(f'Invalid node parser: {parser}. Error: {e}')
183
-
184
- try:
185
- import llama_index.text_splitter as text_splitter
186
- except ImportError:
187
- try:
188
- install_import(
189
- package_name='llama_index',
190
- module_name='text_splitter'
191
- )
192
- import llama_index.text_splitter as text_splitter
193
- except ImportError:
194
- raise ImportError('Failed to import Llama Index. Please install Llama Index to use this function.')
195
-
196
- try:
197
- if parser == 'CodeSplitter':
198
- if not is_package_installed('tree_sitter_languages'):
199
- install_import(package_name='tree_sitter_languages')
200
-
201
- a = getattr(node_parser, parser)
202
- if a is not None:
203
- return a
204
- else:
205
- raise ImportError(f'Failed to import {parser} from Llama Index.')
206
- except Exception as e1:
207
- try:
208
- if isinstance(parser, str):
209
- return getattr(text_splitter, parser)
210
- else:
211
- return parser
212
- except Exception as e2:
213
- raise ValueError(f'Invalid node parser: {parser}. Error: {e1}, {e2}')
214
-
215
-
216
- def llama_index_node_parser(documents: List[Any],
217
- parser: Union[str, Callable],
218
- parser_args: List[Any] = [],
219
- parser_kwargs: Dict[str, Any] = {},
220
- parsing_kwargs: Dict[str, Any] = {}) -> List[Any]:
221
- """
222
- Parses documents into nodes using a specified Llama Index node parser.
223
-
224
- Args:
225
- documents (List[Any]): The documents to parse.
226
- parser (Union[str, Callable]): The name of the parser function or the parser function itself.
227
- parser_args (List[Any]): Positional arguments to pass to the parser function.
228
- parser_kwargs (Dict[str, Any]): Keyword arguments to pass to the parser function.
229
- parsing_kwargs (Dict[str, Any]): Keyword arguments for the parsing process.
230
-
231
- Returns:
232
- List[Any]: A list of nodes parsed from the documents.
233
-
234
- Raises:
235
- ValueError: If the specified parser is invalid or if the parser fails to parse the documents.
236
-
237
- Example:
238
- nodes = llama_index_node_parser(documents, "DefaultNodeParser")
239
- """
240
-
241
- try:
242
- parser = get_llama_parser(parser)
243
- parser_obj = parser(*parser_args, **parser_kwargs)
244
- nodes = parser_obj.get_nodes_from_documents(documents, **parsing_kwargs)
245
- return nodes
246
-
247
- except ImportError as e:
248
- module_name = str(e).split("\'")[-2]
249
- try:
250
- install_import(package_name=module_name)
251
- parser = get_llama_parser(parser)
252
- parser_obj = parser(*parser_args, **parser_kwargs)
253
- nodes = parser_obj.get_nodes_from_documents(documents, **parsing_kwargs)
254
- return nodes
255
- except Exception as e:
256
- raise ImportError(f'Failed to install and import {module_name}. Error: {e}')
257
-
258
-
259
- except Exception as e1:
260
- try:
261
- parser_obj = parser.from_defaults(*parser_args, **parser_kwargs)
262
- nodes = parser_obj.get_nodes_from_documents(documents, **parsing_kwargs)
263
- return nodes
264
- except Exception as e2:
265
- raise ValueError(f'Failed to parse. Error: {e1}, {e2}')
266
-
@@ -1 +0,0 @@
1
- # TODO
@@ -1 +0,0 @@
1
- # TODO
@@ -1 +0,0 @@
1
- # TODO
@@ -1 +0,0 @@
1
- # TODO
@@ -1 +0,0 @@
1
- # TODO
@@ -1 +0,0 @@
1
- # TODO
@@ -1 +0,0 @@
1
- # TODO
@@ -1 +0,0 @@
1
- # TODO
@@ -1,79 +0,0 @@
1
- from os import getenv
2
- from .base_service import BaseService, PayloadCreation
3
-
4
- class AnthropicService(BaseService):
5
- """
6
- A service to interact with Anthropic's API endpoints.
7
-
8
- Attributes:
9
- base_url (str): The base URL for the Anthropic API.
10
- available_endpoints (list): A list of available API endpoints.
11
- schema (dict): The schema configuration for the API.
12
- key_scheme (str): The environment variable name for Anthropic API key.
13
- token_encoding_name (str): The default token encoding scheme.
14
-
15
- Examples:
16
- >>> service = AnthropicService(api_key="your_api_key")
17
- >>> asyncio.run(service.serve("Hello, world!", "chat/completions"))
18
- (payload, completion)
19
- """
20
-
21
- base_url = "https://api.anthropic.com/v1/"
22
- available_endpoints = ['chat/completions']
23
- schema = {} # TODO
24
- key_scheme = "ANTHROPIC_API_KEY"
25
- token_encoding_name = "cl100k_base"
26
-
27
- def __init__(self, api_key = None, key_scheme = None,schema = None, token_encoding_name: str = "cl100k_base", **kwargs):
28
- key_scheme = key_scheme or self.key_scheme
29
- super().__init__(
30
- api_key = api_key or getenv(key_scheme),
31
- schema = schema or self.schema,
32
- token_encoding_name=token_encoding_name,
33
- **kwargs
34
- )
35
- self.active_endpoint = []
36
-
37
- async def serve(self, input_, endpoint="chat/completions", method="post", **kwargs):
38
- """
39
- Serves the input using the specified endpoint and method.
40
-
41
- Args:
42
- input_: The input text to be processed.
43
- endpoint: The API endpoint to use for processing.
44
- method: The HTTP method to use for the request.
45
- **kwargs: Additional keyword arguments to pass to the payload creation.
46
-
47
- Returns:
48
- A tuple containing the payload and the completion response from the API.
49
- """
50
- if endpoint not in self.active_endpoint:
51
- await self. init_endpoint(endpoint)
52
- if endpoint == "chat/completions":
53
- return await self.serve_chat(input_, **kwargs)
54
- else:
55
- return ValueError(f'{endpoint} is currently not supported')
56
-
57
- async def serve_chat(self, messages, **kwargs):
58
- """
59
- Serves the chat completion request with the given messages.
60
-
61
- Args:
62
- messages: The messages to be included in the chat completion.
63
- **kwargs: Additional keyword arguments for payload creation.
64
-
65
- Returns:
66
- A tuple containing the payload and the completion response from the API.
67
- """
68
- if "chat/completions" not in self.active_endpoint:
69
- await self. init_endpoint("chat/completions")
70
- self.active_endpoint.append("chat/completions")
71
- payload = PayloadCreation.chat_completion(
72
- messages, self.endpoints["chat/completions"].config, self.schema["chat/completions"], **kwargs)
73
-
74
- try:
75
- completion = await self.call_api(payload, "chat/completions", "post")
76
- return payload, completion
77
- except Exception as e:
78
- self.status_tracker.num_tasks_failed += 1
79
- raise e
File without changes
@@ -1 +0,0 @@
1
- # TODO
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -1,33 +0,0 @@
1
- from ..utils.sys_util import install_import, is_package_installed
2
- from .base_service import BaseService
3
-
4
-
5
- class LiteLLMService(BaseService):
6
- def __init__(self, model: str = None, **kwargs):
7
- super().__init__()
8
-
9
- try:
10
- if not is_package_installed('litellm'):
11
- install_import(
12
- package_name='litellm',
13
- import_name='acompletion'
14
- )
15
- from litellm import acompletion
16
- self.acompletion = acompletion
17
- except:
18
- raise ImportError(f'Unable to import required module from ollama. Please make sure that ollama is installed.')
19
-
20
- self.model = model
21
- self.kwargs = kwargs
22
-
23
- async def serve_chat(self, messages, **kwargs):
24
- payload = {'messages': messages}
25
- kwargs = {**self.kwargs, **kwargs}
26
-
27
- try:
28
- completion = await self.acompletion(model=self.model, messages=messages, **kwargs)
29
- return payload, completion
30
- except Exception as e:
31
- self.status_tracker.num_tasks_failed += 1
32
- raise e
33
-
File without changes
File without changes
@@ -1,44 +0,0 @@
1
- from os import getenv
2
- from ..configs.openrouter_configs import openrouter_schema
3
- from .base_service import BaseService, PayloadCreation
4
-
5
- class OpenRouterService(BaseService):
6
- base_url = "https://openrouter.ai/api/v1/"
7
- available_endpoints = ['chat/completions']
8
- schema = openrouter_schema
9
- key_scheme = "OPENROUTER_API_KEY"
10
- token_encoding_name = "cl100k_base"
11
-
12
-
13
- def __init__(self, api_key = None, key_scheme = None,schema = None, token_encoding_name: str = "cl100k_base", **kwargs):
14
- key_scheme = key_scheme or self.key_scheme
15
- super().__init__(
16
- api_key = api_key or getenv(key_scheme),
17
- schema = schema or self.schema,
18
- token_encoding_name=token_encoding_name, **kwargs
19
- )
20
- self.active_endpoint = []
21
-
22
- async def serve(self, input_, endpoint="chat/completions", method="post", **kwargs):
23
- if endpoint not in self.active_endpoint:
24
- await self. init_endpoint(endpoint)
25
- if endpoint == "chat/completions":
26
- return await self.serve_chat(input_, **kwargs)
27
- else:
28
- return ValueError(f'{endpoint} is currently not supported')
29
-
30
- async def serve_chat(self, messages, **kwargs):
31
- endpoint = "chat/completions"
32
-
33
- if endpoint not in self.active_endpoint:
34
- await self. init_endpoint(endpoint)
35
- self.active_endpoint.append(endpoint)
36
- payload = PayloadCreation.chat_completion(
37
- messages, self.endpoints[endpoint].config, self.schema[endpoint], **kwargs)
38
-
39
- try:
40
- completion = await self.call_api(payload, endpoint, "post")
41
- return payload, completion
42
- except Exception as e:
43
- self.status_tracker.num_tasks_failed += 1
44
- raise e
File without changes
File without changes
File without changes
lionagi/iservices/vllm.py DELETED
File without changes
File without changes
@@ -1,18 +0,0 @@
1
- from .load_util import dir_to_path, dir_to_nodes, chunk_text, read_text, file_to_chunks
2
- from .reader import load, ReaderType, text_reader
3
- from .chunker import chunk, datanodes_convert, ChunkerType, text_chunker
4
-
5
- __all__ = [
6
- 'load',
7
- 'chunk',
8
- 'datanodes_convert',
9
- 'text_reader',
10
- 'text_chunker',
11
- 'ReaderType',
12
- 'ChunkerType',
13
- 'dir_to_path',
14
- 'dir_to_nodes',
15
- 'chunk_text',
16
- 'read_text',
17
- 'file_to_chunks'
18
- ]
@@ -1,166 +0,0 @@
1
- from typing import Union, Callable
2
-
3
- from ..utils import lcall
4
- from ..schema import DataNode
5
- from ..bridge import langchain_text_splitter, from_langchain, llama_index_node_parser, from_llama_index
6
- from .load_util import ChunkerType, file_to_chunks
7
-
8
-
9
- def datanodes_convert(documents, chunker_type):
10
- """
11
- Converts a list of DataNode documents to a specific format based on the chunker type.
12
-
13
- Args:
14
- documents: A list of DataNode instances to be converted.
15
- chunker_type: The chunker type to determine the conversion format.
16
-
17
- Returns:
18
- The list of converted DataNode instances.
19
-
20
- Example usage:
21
- >>> documents = [DataNode(content="Example content")]
22
- >>> converted = datanodes_convert(documents, ChunkerType.LANGCHAIN)
23
- """
24
- for i in range(len(documents)):
25
- if type(documents[i]) == DataNode:
26
- if chunker_type == ChunkerType.LLAMAINDEX:
27
- documents[i] = documents[i].to_llama_index()
28
- elif chunker_type == ChunkerType.LANGCHAIN:
29
- documents[i] = documents[i].to_langchain()
30
- return documents
31
-
32
- def text_chunker(documents, args, kwargs):
33
- """
34
- Chunks text documents into smaller pieces.
35
-
36
- Args:
37
- documents: A list of DataNode instances to be chunked.
38
- args: Positional arguments to be passed to the chunking function.
39
- kwargs: Keyword arguments to be passed to the chunking function.
40
-
41
- Returns:
42
- A list of chunked DataNode instances.
43
-
44
- Example usage:
45
- >>> documents = [DataNode(content="Example content")]
46
- >>> args = []
47
- >>> kwargs = {"chunk_size": 100}
48
- >>> chunked_docs = text_chunker(documents, args, kwargs)
49
- """
50
- def chunk_node(node):
51
- chunks = file_to_chunks(node.to_dict(), *args, **kwargs)
52
- lcall(chunks, lambda chunk: chunk.pop('node_id'))
53
- chunk_nodes = lcall(chunks, lambda x: DataNode(**x))
54
- return chunk_nodes
55
-
56
- nodes = []
57
- for doc in documents:
58
- nodes += chunk_node(doc)
59
- return nodes
60
-
61
-
62
- def _datanode_parser(nodes, parser):
63
- """
64
- Parses raw data into DataNode instances using the provided parser function.
65
-
66
- Args:
67
- nodes: A list of raw data to be parsed.
68
- parser: A function that parses raw data into DataNode instances.
69
-
70
- Returns:
71
- A list of parsed DataNode instances.
72
-
73
- Raises:
74
- ValueError: If the parser function fails.
75
-
76
- Example usage:
77
- >>> raw_data = [{"content": "Example content"}]
78
- >>> parser = lambda x: [DataNode(**d) for d in x]
79
- >>> parsed_nodes = _datanode_parser(raw_data, parser)
80
- """
81
- try:
82
- nodes = parser(nodes)
83
- except Exception as e:
84
- raise ValueError(f'DataNode parser {parser} failed. Error:{e}')
85
- return nodes
86
-
87
-
88
- def chunk(documents,
89
- chunker,
90
- chunker_type=ChunkerType.PLAIN,
91
- chunker_args=[],
92
- chunker_kwargs={},
93
- chunking_kwargs={},
94
- documents_convert_func=None,
95
- to_datanode: Union[bool, Callable] = True):
96
- """
97
- Chunks documents using the specified chunker and chunker type.
98
-
99
- Args:
100
- documents: A list of documents to be chunked.
101
- chunker: The chunking function to be used.
102
- chunker_type: The type of the chunker. Defaults to ChunkerType.PLAIN.
103
- chunker_args: Positional arguments for the chunker function. Defaults to an empty list.
104
- chunker_kwargs: Keyword arguments for the chunker function. Defaults to an empty dict.
105
- chunking_kwargs: Additional keyword arguments for the chunking process. Defaults to an empty dict.
106
- documents_convert_func: A function to convert documents to a specific format. Defaults to None.
107
- to_datanode: Determines whether to convert the result into DataNode instances, or a callable to convert the result. Defaults to True.
108
-
109
- Returns:
110
- A list of chunked DataNode instances after applying the chunker.
111
-
112
- Raises:
113
- ValueError: If the chunker fails or an unsupported chunker type is provided.
114
-
115
- Example usage:
116
- >>> documents = ["Long text document...", "Another long text..."]
117
- >>> chunked_docs = chunk(documents, text_chunker, ChunkerType.PLAIN, chunker_args=[], chunker_kwargs={"chunk_size": 100})
118
- """
119
- if chunker_type == ChunkerType.PLAIN:
120
- try:
121
- if chunker == 'text_chunker':
122
- chunker = text_chunker
123
- nodes = chunker(documents, chunker_args, chunker_kwargs)
124
- return nodes
125
- except Exception as e:
126
- raise ValueError(f'Reader {chunker} is currently not supported. Error: {e}')
127
- if chunker_type == ChunkerType.LANGCHAIN:
128
- if documents_convert_func:
129
- documents = documents_convert_func(documents, 'langchain')
130
- nodes = langchain_text_splitter(documents, chunker, chunker_args, chunker_kwargs)
131
- if isinstance(to_datanode, bool) and to_datanode is True:
132
- if isinstance(documents, str):
133
- nodes = lcall(nodes, lambda x: DataNode(content=x))
134
- else:
135
- nodes = lcall(nodes, from_langchain)
136
- elif isinstance(to_datanode, Callable):
137
- nodes = _datanode_parser(nodes, to_datanode)
138
- return nodes
139
-
140
- elif chunker_type == ChunkerType.LLAMAINDEX:
141
- if documents_convert_func:
142
- documents = documents_convert_func(documents, 'llama_index')
143
- nodes = llama_index_node_parser(documents, chunker, chunker_args, chunker_kwargs, chunking_kwargs)
144
-
145
- if isinstance(to_datanode, bool) and to_datanode is True:
146
- nodes = lcall(nodes, from_llama_index)
147
- elif isinstance(to_datanode, Callable):
148
- nodes = _datanode_parser(nodes, to_datanode)
149
- return nodes
150
-
151
- elif chunker_type == ChunkerType.SELFDEFINED:
152
- try:
153
- splitter = chunker(*chunker_args, **chunker_kwargs)
154
- nodes = splitter.split(documents, **chunking_kwargs)
155
- except Exception as e:
156
- raise ValueError(f'Self defined chunker {chunker} is not valid. Error: {e}')
157
-
158
- if isinstance(to_datanode, bool) and to_datanode is True:
159
- raise ValueError(f'Please define a valid parser to DataNode.')
160
- elif isinstance(to_datanode, Callable):
161
- nodes = _datanode_parser(nodes, to_datanode)
162
- return nodes
163
-
164
- else:
165
- raise ValueError(f'{chunker_type} is not supported. Please choose from {list(ChunkerType)}')
166
-