lionagi 0.0.313__py3-none-any.whl → 0.0.315__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- lionagi/__init__.py +3 -0
- lionagi/core/direct/__init__.py +2 -1
- lionagi/core/direct/cot.py +1 -0
- lionagi/core/direct/plan.py +0 -0
- lionagi/core/direct/predict.py +3 -2
- lionagi/core/direct/react.py +167 -0
- lionagi/core/direct/score.py +2 -1
- lionagi/core/direct/select.py +2 -1
- lionagi/core/direct/sentiment.py +1 -0
- lionagi/core/messages/schema.py +1 -1
- lionagi/core/prompt/action_template.py +26 -0
- lionagi/core/prompt/field_validator.py +40 -1
- lionagi/core/prompt/prompt_template.py +36 -40
- lionagi/core/prompt/scored_template.py +13 -0
- lionagi/core/tool/manual.py +1 -0
- lionagi/core/tool/tool_manager.py +3 -1
- lionagi/integrations/bridge/llamaindex_/node_parser.py +6 -9
- lionagi/integrations/bridge/pydantic_/pydantic_bridge.py +1 -0
- lionagi/integrations/chunker/__init__.py +0 -0
- lionagi/integrations/chunker/chunk.py +175 -0
- lionagi/integrations/loader/__init__.py +0 -0
- lionagi/integrations/loader/load.py +152 -0
- lionagi/integrations/loader/load_util.py +266 -0
- lionagi/integrations/provider/ollama.py +1 -1
- lionagi/version.py +1 -1
- {lionagi-0.0.313.dist-info → lionagi-0.0.315.dist-info}/METADATA +15 -19
- {lionagi-0.0.313.dist-info → lionagi-0.0.315.dist-info}/RECORD +30 -20
- lionagi/integrations/bridge/pydantic_/base_model.py +0 -7
- {lionagi-0.0.313.dist-info → lionagi-0.0.315.dist-info}/LICENSE +0 -0
- {lionagi-0.0.313.dist-info → lionagi-0.0.315.dist-info}/WHEEL +0 -0
- {lionagi-0.0.313.dist-info → lionagi-0.0.315.dist-info}/top_level.txt +0 -0
lionagi/__init__.py
CHANGED
@@ -8,6 +8,9 @@ from dotenv import load_dotenv
|
|
8
8
|
|
9
9
|
from .core import direct, Branch, Session, func_to_tool
|
10
10
|
from .integrations.provider.services import Services
|
11
|
+
from .integrations.chunker.chunk import chunk
|
12
|
+
from .integrations.loader.load import load
|
13
|
+
|
11
14
|
|
12
15
|
logger = logging.getLogger(__name__)
|
13
16
|
logger.setLevel(logging.INFO)
|
lionagi/core/direct/__init__.py
CHANGED
@@ -0,0 +1 @@
|
|
1
|
+
# TODO: chain of thoughts
|
File without changes
|
lionagi/core/direct/predict.py
CHANGED
@@ -6,9 +6,10 @@ using a language model. It includes fields for the input sentence, number of sen
|
|
6
6
|
confidence score, and reason for the prediction.
|
7
7
|
"""
|
8
8
|
|
9
|
-
from pydantic import Field
|
10
9
|
from lionagi.libs import func_call
|
11
|
-
from
|
10
|
+
from lionagi.integrations.bridge.pydantic_.pydantic_bridge import Field
|
11
|
+
|
12
|
+
from ..prompt.scored_template import ScoredTemplate
|
12
13
|
from ..branch import Branch
|
13
14
|
|
14
15
|
|
@@ -0,0 +1,167 @@
|
|
1
|
+
from lionagi.libs import func_call, convert, AsyncUtil
|
2
|
+
|
3
|
+
from lionagi.integrations.bridge.pydantic_.pydantic_bridge import Field
|
4
|
+
from ..prompt.action_template import ActionedTemplate
|
5
|
+
from ..branch import Branch
|
6
|
+
|
7
|
+
|
8
|
+
class ReactTemplate(ActionedTemplate):
|
9
|
+
template_name: str = "default_react"
|
10
|
+
sentence: str | list | dict = Field(
|
11
|
+
default_factory=str,
|
12
|
+
description="the given sentence(s) to reason and take actions on",
|
13
|
+
)
|
14
|
+
|
15
|
+
def __init__(
|
16
|
+
self,
|
17
|
+
sentence=None,
|
18
|
+
instruction=None,
|
19
|
+
confidence_score=False,
|
20
|
+
**kwargs,
|
21
|
+
):
|
22
|
+
super().__init__(**kwargs)
|
23
|
+
|
24
|
+
self.sentence = sentence
|
25
|
+
self.task = f"Think step by step. Perform reasoning and prepare actions with given tools only.Instruction: {instruction}. Absolutely DO NOT MAKE UP FUNCTIONS !!!"
|
26
|
+
|
27
|
+
if confidence_score:
|
28
|
+
self.output_fields.append("confidence_score")
|
29
|
+
|
30
|
+
|
31
|
+
async def _react(
|
32
|
+
sentence,
|
33
|
+
*,
|
34
|
+
instruction=None,
|
35
|
+
branch=None,
|
36
|
+
confidence_score=False,
|
37
|
+
retries=2,
|
38
|
+
delay=0.5,
|
39
|
+
backoff_factor=2,
|
40
|
+
default_value=None,
|
41
|
+
timeout=None,
|
42
|
+
branch_name=None,
|
43
|
+
system=None,
|
44
|
+
messages=None,
|
45
|
+
service=None,
|
46
|
+
sender=None,
|
47
|
+
llmconfig=None,
|
48
|
+
tools=None,
|
49
|
+
datalogger=None,
|
50
|
+
persist_path=None,
|
51
|
+
tool_manager=None,
|
52
|
+
return_branch=False,
|
53
|
+
**kwargs,
|
54
|
+
):
|
55
|
+
|
56
|
+
if "temperature" not in kwargs:
|
57
|
+
kwargs["temperature"] = 0.1
|
58
|
+
|
59
|
+
instruction = instruction or ""
|
60
|
+
|
61
|
+
branch = branch or Branch(
|
62
|
+
name=branch_name,
|
63
|
+
system=system,
|
64
|
+
messages=messages,
|
65
|
+
service=service,
|
66
|
+
sender=sender,
|
67
|
+
llmconfig=llmconfig,
|
68
|
+
tools=tools,
|
69
|
+
datalogger=datalogger,
|
70
|
+
persist_path=persist_path,
|
71
|
+
tool_manager=tool_manager,
|
72
|
+
)
|
73
|
+
|
74
|
+
_template = ReactTemplate(
|
75
|
+
sentence=sentence,
|
76
|
+
instruction=instruction,
|
77
|
+
confidence_score=confidence_score,
|
78
|
+
)
|
79
|
+
|
80
|
+
await func_call.rcall(
|
81
|
+
branch.chat,
|
82
|
+
prompt_template=_template,
|
83
|
+
retries=retries,
|
84
|
+
delay=delay,
|
85
|
+
backoff_factor=backoff_factor,
|
86
|
+
default=default_value,
|
87
|
+
timeout=timeout,
|
88
|
+
**kwargs,
|
89
|
+
)
|
90
|
+
|
91
|
+
if _template.action_needed:
|
92
|
+
actions = _template.actions
|
93
|
+
tasks = [branch.tool_manager.invoke(i.values()) for i in actions]
|
94
|
+
results = await AsyncUtil.execute_tasks(*tasks)
|
95
|
+
|
96
|
+
a = []
|
97
|
+
for idx, item in enumerate(actions):
|
98
|
+
res = {
|
99
|
+
"function": item["function"],
|
100
|
+
"arguments": item["arguments"],
|
101
|
+
"output": results[idx],
|
102
|
+
}
|
103
|
+
branch.add_message(response=res)
|
104
|
+
a.append(res)
|
105
|
+
|
106
|
+
_template.__setattr__("action_response", a)
|
107
|
+
|
108
|
+
return (_template, branch) if return_branch else _template
|
109
|
+
|
110
|
+
|
111
|
+
async def react(
|
112
|
+
sentence,
|
113
|
+
*,
|
114
|
+
instruction=None,
|
115
|
+
num_instances=1,
|
116
|
+
branch=None,
|
117
|
+
confidence_score=False,
|
118
|
+
retries=2,
|
119
|
+
delay=0.5,
|
120
|
+
backoff_factor=2,
|
121
|
+
default_value=None,
|
122
|
+
timeout=None,
|
123
|
+
branch_name=None,
|
124
|
+
system=None,
|
125
|
+
messages=None,
|
126
|
+
service=None,
|
127
|
+
sender=None,
|
128
|
+
llmconfig=None,
|
129
|
+
tools=None,
|
130
|
+
datalogger=None,
|
131
|
+
persist_path=None,
|
132
|
+
tool_manager=None,
|
133
|
+
return_branch=False,
|
134
|
+
**kwargs,
|
135
|
+
):
|
136
|
+
|
137
|
+
async def _inner(i=0):
|
138
|
+
return await _react(
|
139
|
+
sentence=sentence,
|
140
|
+
instruction=instruction,
|
141
|
+
num_instances=num_instances,
|
142
|
+
branch=branch,
|
143
|
+
confidence_score=confidence_score,
|
144
|
+
retries=retries,
|
145
|
+
delay=delay,
|
146
|
+
backoff_factor=backoff_factor,
|
147
|
+
default_value=default_value,
|
148
|
+
timeout=timeout,
|
149
|
+
branch_name=branch_name,
|
150
|
+
system=system,
|
151
|
+
messages=messages,
|
152
|
+
service=service,
|
153
|
+
sender=sender,
|
154
|
+
llmconfig=llmconfig,
|
155
|
+
tools=tools,
|
156
|
+
datalogger=datalogger,
|
157
|
+
persist_path=persist_path,
|
158
|
+
tool_manager=tool_manager,
|
159
|
+
return_branch=return_branch,
|
160
|
+
**kwargs,
|
161
|
+
)
|
162
|
+
|
163
|
+
if num_instances == 1:
|
164
|
+
return await _inner()
|
165
|
+
|
166
|
+
elif num_instances > 1:
|
167
|
+
return await func_call.alcall(range(num_instances), _inner)
|
lionagi/core/direct/score.py
CHANGED
@@ -12,7 +12,7 @@ ScoreTemplate class and a language model.
|
|
12
12
|
from pydantic import Field
|
13
13
|
import numpy as np
|
14
14
|
from lionagi.libs import func_call, convert
|
15
|
-
from ..prompt.
|
15
|
+
from ..prompt.scored_template import ScoredTemplate
|
16
16
|
from ..branch import Branch
|
17
17
|
|
18
18
|
|
@@ -183,6 +183,7 @@ async def _score(
|
|
183
183
|
|
184
184
|
async def score(
|
185
185
|
sentence,
|
186
|
+
*,
|
186
187
|
num_instances=1,
|
187
188
|
instruction=None,
|
188
189
|
score_range=(1, 10),
|
lionagi/core/direct/select.py
CHANGED
@@ -13,7 +13,7 @@ from enum import Enum
|
|
13
13
|
from pydantic import Field
|
14
14
|
|
15
15
|
from lionagi.libs import func_call, StringMatch
|
16
|
-
from ..prompt.
|
16
|
+
from ..prompt.scored_template import ScoredTemplate
|
17
17
|
from ..branch import Branch
|
18
18
|
|
19
19
|
|
@@ -39,6 +39,7 @@ class SelectTemplate(ScoredTemplate):
|
|
39
39
|
answer: Enum | str = Field(
|
40
40
|
default_factory=str, description="selection from given choices"
|
41
41
|
)
|
42
|
+
choices: list = Field(default_factory=list, description="the given choices")
|
42
43
|
|
43
44
|
signature: str = "sentence -> answer"
|
44
45
|
|
@@ -0,0 +1 @@
|
|
1
|
+
# TODO: sentiment analysis
|
lionagi/core/messages/schema.py
CHANGED
@@ -0,0 +1,26 @@
|
|
1
|
+
from typing import Any
|
2
|
+
from lionagi.integrations.bridge.pydantic_.pydantic_bridge import Field
|
3
|
+
|
4
|
+
from .scored_template import ScoredTemplate
|
5
|
+
|
6
|
+
|
7
|
+
class ActionRequest: ...
|
8
|
+
|
9
|
+
|
10
|
+
class ActionedTemplate(ScoredTemplate):
|
11
|
+
|
12
|
+
action_needed: bool | None = Field(
|
13
|
+
False, description="true if actions are needed else false"
|
14
|
+
)
|
15
|
+
|
16
|
+
actions: list[dict | ActionRequest | Any] | None = Field(
|
17
|
+
default_factory=list,
|
18
|
+
description="""provide The list of action(s) to take, each action in {"function": function_name, "arguments": {param1:..., param2:..., ...}}. Leave blank if no further actions are needed, you must use provided parameters for each action, DO NOT MAKE UP KWARG NAME!!!""",
|
19
|
+
)
|
20
|
+
|
21
|
+
answer: str | dict | Any | None = Field(
|
22
|
+
default_factory=str,
|
23
|
+
description="output answer to the questions asked if further actions are not needed, leave blank if an accurate answer cannot be provided from context during this step",
|
24
|
+
)
|
25
|
+
|
26
|
+
signature: str = "sentence -> reason, action_needed, actions, answer"
|
@@ -6,7 +6,45 @@ including numeric, boolean, string, and enum. It also provides a dictionary `val
|
|
6
6
|
maps data types to their corresponding validation functions.
|
7
7
|
"""
|
8
8
|
|
9
|
-
from lionagi.libs import convert, StringMatch
|
9
|
+
from lionagi.libs import convert, StringMatch, ParseUtil
|
10
|
+
|
11
|
+
|
12
|
+
def _has_action_keys(dict_):
|
13
|
+
return list(dict_.keys()) >= ["function", "arguments"]
|
14
|
+
|
15
|
+
|
16
|
+
def check_action_field(x, fix_=True, **kwargs):
|
17
|
+
if (
|
18
|
+
isinstance(x, list)
|
19
|
+
and convert.is_same_dtype(x, dict)
|
20
|
+
and all(_has_action_keys(y) for y in x)
|
21
|
+
):
|
22
|
+
return x
|
23
|
+
try:
|
24
|
+
x = _fix_action_field(x, fix_)
|
25
|
+
return x
|
26
|
+
except Exception as e:
|
27
|
+
raise ValueError("Invalid action field type.") from e
|
28
|
+
|
29
|
+
|
30
|
+
def _fix_action_field(x, discard_=True):
|
31
|
+
corrected = []
|
32
|
+
if isinstance(x, str):
|
33
|
+
x = ParseUtil.fuzzy_parse_json(x)
|
34
|
+
|
35
|
+
try:
|
36
|
+
x = convert.to_list(x)
|
37
|
+
|
38
|
+
for i in x:
|
39
|
+
i = convert.to_dict(i)
|
40
|
+
if _has_action_keys(i):
|
41
|
+
corrected.append(i)
|
42
|
+
elif not discard_:
|
43
|
+
raise ValueError(f"Invalid action field: {i}")
|
44
|
+
except Exception as e:
|
45
|
+
raise ValueError(f"Invalid action field: {e}") from e
|
46
|
+
|
47
|
+
return corrected
|
10
48
|
|
11
49
|
|
12
50
|
def check_number_field(x, fix_=True, **kwargs):
|
@@ -236,4 +274,5 @@ validation_funcs = {
|
|
236
274
|
"bool": check_bool_field,
|
237
275
|
"str": check_str_field,
|
238
276
|
"enum": check_enum_field,
|
277
|
+
"action": check_action_field,
|
239
278
|
}
|
@@ -207,6 +207,10 @@ class PromptTemplate(BaseComponent):
|
|
207
207
|
setattr(self, k, v_)
|
208
208
|
return True
|
209
209
|
|
210
|
+
if "lionagi.core.prompt.action_template.actionrequest" in str_:
|
211
|
+
self.__setattr__(k, validation_funcs["action"](v))
|
212
|
+
return True
|
213
|
+
|
210
214
|
elif "bool" in str_:
|
211
215
|
self.__setattr__(k, validation_funcs["bool"](v, fix_=fix_, **kwargs))
|
212
216
|
return True
|
@@ -227,48 +231,50 @@ class PromptTemplate(BaseComponent):
|
|
227
231
|
if k not in kwargs:
|
228
232
|
kwargs = {k: {}}
|
229
233
|
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
+
if self._field_has_choices(k):
|
235
|
+
self.choices[k] = self.model_fields[k].json_schema_extra["choices"]
|
236
|
+
if self._validate_field(
|
237
|
+
k, v, choices=self.choices[k], fix_=fix_, **kwargs[k]
|
234
238
|
):
|
235
|
-
self.choices[k] = self.model_fields[k].json_schema_extra["choices"]
|
236
|
-
if self._validate_field(
|
237
|
-
k, v, choices=self.choices[k], fix_=fix_, **kwargs[k]
|
238
|
-
):
|
239
|
-
continue
|
240
|
-
else:
|
241
|
-
raise ValueError(f"{k} has no choices")
|
242
|
-
|
243
|
-
except Exception as e:
|
244
|
-
if self._validate_field(k, v, fix_=fix_, **kwargs[k]):
|
245
239
|
continue
|
246
240
|
else:
|
247
|
-
raise ValueError(f"
|
241
|
+
raise ValueError(f"{k} has no choices")
|
242
|
+
|
243
|
+
elif self._validate_field(k, v, fix_=fix_, **kwargs[k]):
|
244
|
+
continue
|
245
|
+
else:
|
246
|
+
raise ValueError(f"failed to validate field {k}")
|
247
|
+
|
248
|
+
def _field_has_choices(self, k):
|
249
|
+
try:
|
250
|
+
a = (
|
251
|
+
self.model_fields[k].json_schema_extra["choices"] is not None
|
252
|
+
and "choices" in self.model_fields[k].json_schema_extra
|
253
|
+
)
|
254
|
+
return a if isinstance(a, bool) else False
|
255
|
+
except Exception:
|
256
|
+
return False
|
248
257
|
|
249
258
|
def _process_response(self, out_, fix_=True):
|
250
259
|
kwargs = self.out_validation_kwargs.copy()
|
251
260
|
for k, v in out_.items():
|
252
261
|
if k not in kwargs:
|
253
262
|
kwargs = {k: {}}
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
263
|
+
|
264
|
+
if self._field_has_choices(k):
|
265
|
+
self.choices[k] = self.model_fields[k].json_schema_extra["choices"]
|
266
|
+
if self._validate_field(
|
267
|
+
k, v, choices=self.choices[k], fix_=fix_, **kwargs[k]
|
258
268
|
):
|
259
|
-
self.choices[k] = self.model_fields[k].json_schema_extra["choices"]
|
260
|
-
if self._validate_field(
|
261
|
-
k, v, choices=self.choices[k], fix_=fix_, **kwargs[k]
|
262
|
-
):
|
263
|
-
continue
|
264
|
-
else:
|
265
|
-
raise ValueError(f"{k} has no choices")
|
266
|
-
|
267
|
-
except Exception as e:
|
268
|
-
if self._validate_field(k, v, fix_=fix_, **kwargs[k]):
|
269
269
|
continue
|
270
270
|
else:
|
271
|
-
raise ValueError(f"
|
271
|
+
raise ValueError(f"{k} has no choices")
|
272
|
+
|
273
|
+
elif self._validate_field(k, v, fix_=fix_, **kwargs[k]):
|
274
|
+
continue
|
275
|
+
|
276
|
+
else:
|
277
|
+
raise ValueError(f"failed to validate field {k} with value {v}")
|
272
278
|
|
273
279
|
@property
|
274
280
|
def in_(self):
|
@@ -288,16 +294,6 @@ class PromptTemplate(BaseComponent):
|
|
288
294
|
return self
|
289
295
|
|
290
296
|
|
291
|
-
class ScoredTemplate(PromptTemplate):
|
292
|
-
confidence_score: float | None = Field(
|
293
|
-
-1,
|
294
|
-
description="a numeric score between 0 to 1 formatted in num:0.2f",
|
295
|
-
)
|
296
|
-
reason: str | None = Field(
|
297
|
-
default_factory=str, description="brief reason for the given output"
|
298
|
-
)
|
299
|
-
|
300
|
-
|
301
297
|
# class Weather(PromptTemplate):
|
302
298
|
# sunny: bool = Field(True, description="true if the weather is sunny outside else false")
|
303
299
|
# rainy: bool = Field(False, description="true if it is raining outside else false")
|
@@ -0,0 +1,13 @@
|
|
1
|
+
from lionagi.integrations.bridge.pydantic_.pydantic_bridge import Field
|
2
|
+
|
3
|
+
from .prompt_template import PromptTemplate
|
4
|
+
|
5
|
+
|
6
|
+
class ScoredTemplate(PromptTemplate):
|
7
|
+
confidence_score: float | None = Field(
|
8
|
+
-1,
|
9
|
+
description="a numeric score between 0 to 1 formatted in num:0.2f",
|
10
|
+
)
|
11
|
+
reason: str | None = Field(
|
12
|
+
default_factory=str, description="brief reason for the given output"
|
13
|
+
)
|
lionagi/core/tool/manual.py
CHANGED
@@ -0,0 +1 @@
|
|
1
|
+
# TODO: tool manual, instruction on how to use the tool for LLM
|
@@ -38,7 +38,7 @@ class ToolManager:
|
|
38
38
|
def has_tools(self):
|
39
39
|
return self.registry != {}
|
40
40
|
|
41
|
-
def _register_tool(self, tool: Tool) -> None:
|
41
|
+
def _register_tool(self, tool: Tool | Callable) -> None:
|
42
42
|
"""
|
43
43
|
Registers a tool in the registry. Raises a TypeError if the object is not an instance of Tool.
|
44
44
|
|
@@ -48,6 +48,8 @@ class ToolManager:
|
|
48
48
|
Raises:
|
49
49
|
TypeError: If the provided object is not an instance of Tool.
|
50
50
|
"""
|
51
|
+
if isinstance(tool, Callable):
|
52
|
+
tool = func_to_tool(tool)[0]
|
51
53
|
if not isinstance(tool, Tool):
|
52
54
|
raise TypeError("Please register a Tool object.")
|
53
55
|
name = tool.schema_["function"]["name"]
|
@@ -29,19 +29,18 @@ def get_llama_index_node_parser(node_parser: Any):
|
|
29
29
|
import llama_index.core.node_parser
|
30
30
|
|
31
31
|
if not isinstance(node_parser, str) and not issubclass(node_parser, NodeParser):
|
32
|
-
raise TypeError(
|
32
|
+
raise TypeError("node_parser must be a string or NodeParser.")
|
33
33
|
|
34
34
|
if isinstance(node_parser, str):
|
35
35
|
if node_parser == "CodeSplitter":
|
36
36
|
SysUtil.check_import("tree_sitter_languages")
|
37
37
|
|
38
38
|
try:
|
39
|
-
|
40
|
-
return parser
|
39
|
+
return getattr(llama_index.core.node_parser, node_parser)
|
41
40
|
except Exception as e:
|
42
41
|
raise AttributeError(
|
43
42
|
f"llama_index_core has no such attribute:" f" {node_parser}, Error: {e}"
|
44
|
-
)
|
43
|
+
) from e
|
45
44
|
|
46
45
|
elif isinstance(node_parser, NodeParser):
|
47
46
|
return node_parser
|
@@ -75,10 +74,8 @@ def llama_index_parse_node(
|
|
75
74
|
parser = get_llama_index_node_parser(node_parser)
|
76
75
|
try:
|
77
76
|
parser = parser(*parser_args, **parser_kwargs)
|
78
|
-
except:
|
77
|
+
except Exception:
|
79
78
|
parser = parser.from_defaults(*parser_args, **parser_kwargs)
|
80
|
-
|
81
|
-
return nodes
|
82
|
-
|
79
|
+
return parser.get_nodes_from_documents(documents)
|
83
80
|
except Exception as e:
|
84
|
-
raise ValueError(f"Failed to parse. Error: {e}")
|
81
|
+
raise ValueError(f"Failed to parse. Error: {e}") from e
|
@@ -0,0 +1 @@
|
|
1
|
+
from pydantic import BaseModel, Field, ValidationError, AliasChoices, field_serializer
|
File without changes
|
@@ -0,0 +1,175 @@
|
|
1
|
+
from typing import Union, Callable
|
2
|
+
|
3
|
+
from lionagi.libs import func_call
|
4
|
+
from lionagi.core.schema import DataNode
|
5
|
+
from ..bridge.langchain_.langchain_bridge import LangchainBridge
|
6
|
+
from ..bridge.llamaindex_.llama_index_bridge import LlamaIndexBridge
|
7
|
+
|
8
|
+
|
9
|
+
from ..loader.load_util import ChunkerType, file_to_chunks, _datanode_parser
|
10
|
+
|
11
|
+
|
12
|
+
def datanodes_convert(documents, chunker_type):
|
13
|
+
|
14
|
+
for i in range(len(documents)):
|
15
|
+
if type(documents[i]) == DataNode:
|
16
|
+
if chunker_type == ChunkerType.LLAMAINDEX:
|
17
|
+
documents[i] = documents[i].to_llama_index()
|
18
|
+
elif chunker_type == ChunkerType.LANGCHAIN:
|
19
|
+
documents[i] = documents[i].to_langchain()
|
20
|
+
return documents
|
21
|
+
|
22
|
+
|
23
|
+
def text_chunker(documents, args, kwargs):
|
24
|
+
|
25
|
+
def chunk_node(node):
|
26
|
+
chunks = file_to_chunks(node.to_dict(), *args, **kwargs)
|
27
|
+
func_call.lcall(chunks, lambda chunk: chunk.pop("node_id"))
|
28
|
+
return [DataNode.from_obj({**chunk}) for chunk in chunks]
|
29
|
+
|
30
|
+
return [chunk_node(doc) for doc in documents]
|
31
|
+
|
32
|
+
|
33
|
+
def chunk(
|
34
|
+
documents,
|
35
|
+
chunker,
|
36
|
+
chunker_type=ChunkerType.PLAIN,
|
37
|
+
chunker_args=None,
|
38
|
+
chunker_kwargs=None,
|
39
|
+
chunking_kwargs=None,
|
40
|
+
documents_convert_func=None,
|
41
|
+
to_datanode: bool | Callable = True,
|
42
|
+
):
|
43
|
+
|
44
|
+
if chunker_args is None:
|
45
|
+
chunker_args = []
|
46
|
+
if chunker_kwargs is None:
|
47
|
+
chunker_kwargs = {}
|
48
|
+
if chunking_kwargs is None:
|
49
|
+
chunking_kwargs = {}
|
50
|
+
|
51
|
+
if chunker_type == ChunkerType.PLAIN:
|
52
|
+
return chunk_funcs[ChunkerType.PLAIN](
|
53
|
+
documents, chunker, chunker_args, chunker_kwargs
|
54
|
+
)
|
55
|
+
|
56
|
+
elif chunker_type == ChunkerType.LANGCHAIN:
|
57
|
+
return chunk_funcs[ChunkerType.LANGCHAIN](
|
58
|
+
documents,
|
59
|
+
documents_convert_func,
|
60
|
+
chunker,
|
61
|
+
chunker_args,
|
62
|
+
chunker_kwargs,
|
63
|
+
to_datanode,
|
64
|
+
)
|
65
|
+
|
66
|
+
elif chunker_type == ChunkerType.LLAMAINDEX:
|
67
|
+
return chunk_funcs[ChunkerType.LLAMAINDEX](
|
68
|
+
documents,
|
69
|
+
documents_convert_func,
|
70
|
+
chunker,
|
71
|
+
chunker_args,
|
72
|
+
chunker_kwargs,
|
73
|
+
to_datanode,
|
74
|
+
)
|
75
|
+
|
76
|
+
elif chunker_type == ChunkerType.SELFDEFINED:
|
77
|
+
return chunk_funcs[ChunkerType.SELFDEFINED](
|
78
|
+
documents,
|
79
|
+
chunker,
|
80
|
+
chunker_args,
|
81
|
+
chunker_kwargs,
|
82
|
+
chunking_kwargs,
|
83
|
+
to_datanode,
|
84
|
+
)
|
85
|
+
|
86
|
+
else:
|
87
|
+
raise ValueError(
|
88
|
+
f"{chunker_type} is not supported. Please choose from {list(ChunkerType)}"
|
89
|
+
)
|
90
|
+
|
91
|
+
|
92
|
+
def _self_defined_chunker(
|
93
|
+
documents,
|
94
|
+
chunker,
|
95
|
+
chunker_args,
|
96
|
+
chunker_kwargs,
|
97
|
+
chunking_kwargs,
|
98
|
+
to_datanode: bool | Callable,
|
99
|
+
):
|
100
|
+
try:
|
101
|
+
splitter = chunker(*chunker_args, **chunker_kwargs)
|
102
|
+
nodes = splitter.split(documents, **chunking_kwargs)
|
103
|
+
except Exception as e:
|
104
|
+
raise ValueError(
|
105
|
+
f"Self defined chunker {chunker} is not valid. Error: {e}"
|
106
|
+
) from e
|
107
|
+
|
108
|
+
if isinstance(to_datanode, bool) and to_datanode is True:
|
109
|
+
raise ValueError("Please define a valid parser to DataNode.")
|
110
|
+
elif isinstance(to_datanode, Callable):
|
111
|
+
nodes = _datanode_parser(nodes, to_datanode)
|
112
|
+
return nodes
|
113
|
+
|
114
|
+
|
115
|
+
def _llama_index_chunker(
|
116
|
+
documents,
|
117
|
+
documents_convert_func,
|
118
|
+
chunker,
|
119
|
+
chunker_args,
|
120
|
+
chunker_kwargs,
|
121
|
+
to_datanode: bool | Callable,
|
122
|
+
):
|
123
|
+
if documents_convert_func:
|
124
|
+
documents = documents_convert_func(documents, "llama_index")
|
125
|
+
nodes = LlamaIndexBridge.llama_index_parse_node(
|
126
|
+
documents, chunker, chunker_args, chunker_kwargs
|
127
|
+
)
|
128
|
+
|
129
|
+
if isinstance(to_datanode, bool) and to_datanode is True:
|
130
|
+
nodes = [DataNode.from_llama_index(i) for i in nodes]
|
131
|
+
elif isinstance(to_datanode, Callable):
|
132
|
+
nodes = _datanode_parser(nodes, to_datanode)
|
133
|
+
return nodes
|
134
|
+
|
135
|
+
|
136
|
+
def _langchain_chunker(
|
137
|
+
documents,
|
138
|
+
documents_convert_func,
|
139
|
+
chunker,
|
140
|
+
chunker_args,
|
141
|
+
chunker_kwargs,
|
142
|
+
to_datanode: bool | Callable,
|
143
|
+
):
|
144
|
+
if documents_convert_func:
|
145
|
+
documents = documents_convert_func(documents, "langchain")
|
146
|
+
nodes = LangchainBridge.langchain_text_splitter(
|
147
|
+
documents, chunker, chunker_args, chunker_kwargs
|
148
|
+
)
|
149
|
+
if isinstance(to_datanode, bool) and to_datanode is True:
|
150
|
+
if isinstance(documents, str):
|
151
|
+
nodes = [DataNode(content=i) for i in nodes]
|
152
|
+
else:
|
153
|
+
nodes = [DataNode.from_langchain(i) for i in nodes]
|
154
|
+
elif isinstance(to_datanode, Callable):
|
155
|
+
nodes = _datanode_parser(nodes, to_datanode)
|
156
|
+
return nodes
|
157
|
+
|
158
|
+
|
159
|
+
def _plain_chunker(documents, chunker, chunker_args, chunker_kwargs):
|
160
|
+
try:
|
161
|
+
if chunker == "text_chunker":
|
162
|
+
chunker = text_chunker
|
163
|
+
return chunker(documents, chunker_args, chunker_kwargs)
|
164
|
+
except Exception as e:
|
165
|
+
raise ValueError(
|
166
|
+
f"Reader {chunker} is currently not supported. Error: {e}"
|
167
|
+
) from e
|
168
|
+
|
169
|
+
|
170
|
+
chunk_funcs = {
|
171
|
+
ChunkerType.PLAIN: _plain_chunker,
|
172
|
+
ChunkerType.LANGCHAIN: _langchain_chunker,
|
173
|
+
ChunkerType.LLAMAINDEX: _llama_index_chunker,
|
174
|
+
ChunkerType.SELFDEFINED: _self_defined_chunker,
|
175
|
+
}
|
File without changes
|
@@ -0,0 +1,152 @@
|
|
1
|
+
from typing import Callable
|
2
|
+
|
3
|
+
from lionagi.core.schema import DataNode
|
4
|
+
from ..bridge.langchain_.langchain_bridge import LangchainBridge
|
5
|
+
from ..bridge.llamaindex_.llama_index_bridge import LlamaIndexBridge
|
6
|
+
|
7
|
+
from .load_util import dir_to_nodes, ReaderType, _datanode_parser
|
8
|
+
|
9
|
+
|
10
|
+
def text_reader(args, kwargs):
|
11
|
+
"""
|
12
|
+
Reads text files from a directory and converts them to DataNode instances.
|
13
|
+
|
14
|
+
Args:
|
15
|
+
args: Positional arguments for the dir_to_nodes function.
|
16
|
+
kwargs: Keyword arguments for the dir_to_nodes function.
|
17
|
+
|
18
|
+
Returns:
|
19
|
+
A list of DataNode instances.
|
20
|
+
|
21
|
+
Example usage:
|
22
|
+
>>> args = ['path/to/text/files']
|
23
|
+
>>> kwargs = {'file_extension': 'txt'}
|
24
|
+
>>> nodes = text_reader(args, kwargs)
|
25
|
+
"""
|
26
|
+
return dir_to_nodes(*args, **kwargs)
|
27
|
+
|
28
|
+
|
29
|
+
def load(
|
30
|
+
reader: str | Callable = "SimpleDirectoryReader",
|
31
|
+
input_dir=None,
|
32
|
+
input_files=None,
|
33
|
+
recursive: bool = False,
|
34
|
+
required_exts: list[str] = None,
|
35
|
+
reader_type=ReaderType.LLAMAINDEX,
|
36
|
+
reader_args=None,
|
37
|
+
reader_kwargs=None,
|
38
|
+
load_args=None,
|
39
|
+
load_kwargs=None,
|
40
|
+
to_datanode: bool | Callable = True,
|
41
|
+
):
|
42
|
+
|
43
|
+
if reader_args is None:
|
44
|
+
reader_args = []
|
45
|
+
if reader_kwargs is None:
|
46
|
+
reader_kwargs = {}
|
47
|
+
if load_args is None:
|
48
|
+
load_args = []
|
49
|
+
if load_kwargs is None:
|
50
|
+
load_kwargs = {}
|
51
|
+
|
52
|
+
if reader_type == ReaderType.PLAIN:
|
53
|
+
return read_funcs[ReaderType.PLAIN](reader, reader_args, reader_kwargs)
|
54
|
+
|
55
|
+
if reader_type == ReaderType.LANGCHAIN:
|
56
|
+
return read_funcs[ReaderType.LANGCHAIN](
|
57
|
+
reader, reader_args, reader_kwargs, to_datanode
|
58
|
+
)
|
59
|
+
|
60
|
+
elif reader_type == ReaderType.LLAMAINDEX:
|
61
|
+
if input_dir is not None:
|
62
|
+
reader_kwargs["input_dir"] = input_dir
|
63
|
+
if input_files is not None:
|
64
|
+
reader_kwargs["input_files"] = input_files
|
65
|
+
if recursive:
|
66
|
+
reader_kwargs["recursive"] = True
|
67
|
+
if required_exts is not None:
|
68
|
+
reader_kwargs["required_exts"] = required_exts
|
69
|
+
|
70
|
+
return read_funcs[ReaderType.LLAMAINDEX](
|
71
|
+
reader, reader_args, reader_kwargs, load_args, load_kwargs, to_datanode
|
72
|
+
)
|
73
|
+
|
74
|
+
elif reader_type == ReaderType.SELFDEFINED:
|
75
|
+
return read_funcs[ReaderType.SELFDEFINED](
|
76
|
+
reader, reader_args, reader_kwargs, load_args, load_kwargs, to_datanode
|
77
|
+
)
|
78
|
+
|
79
|
+
else:
|
80
|
+
raise ValueError(
|
81
|
+
f"{reader_type} is not supported. Please choose from {list(ReaderType)}"
|
82
|
+
)
|
83
|
+
|
84
|
+
|
85
|
+
def _plain_reader(reader, reader_args, reader_kwargs):
|
86
|
+
try:
|
87
|
+
if reader == "text_reader":
|
88
|
+
reader = text_reader
|
89
|
+
return reader(reader_args, reader_kwargs)
|
90
|
+
except Exception as e:
|
91
|
+
raise ValueError(
|
92
|
+
f"Reader {reader} is currently not supported. Error: {e}"
|
93
|
+
) from e
|
94
|
+
|
95
|
+
|
96
|
+
def _langchain_reader(reader, reader_args, reader_kwargs, to_datanode: bool | Callable):
|
97
|
+
nodes = LangchainBridge.langchain_loader(reader, reader_args, reader_kwargs)
|
98
|
+
if isinstance(to_datanode, bool) and to_datanode is True:
|
99
|
+
nodes = [DataNode.from_langchain(i) for i in nodes]
|
100
|
+
|
101
|
+
elif isinstance(to_datanode, Callable):
|
102
|
+
nodes = _datanode_parser(nodes, to_datanode)
|
103
|
+
return nodes
|
104
|
+
|
105
|
+
|
106
|
+
def _llama_index_reader(
|
107
|
+
reader,
|
108
|
+
reader_args,
|
109
|
+
reader_kwargs,
|
110
|
+
load_args,
|
111
|
+
load_kwargs,
|
112
|
+
to_datanode: bool | Callable,
|
113
|
+
):
|
114
|
+
nodes = LlamaIndexBridge.llama_index_read_data(
|
115
|
+
reader, reader_args, reader_kwargs, load_args, load_kwargs
|
116
|
+
)
|
117
|
+
if isinstance(to_datanode, bool) and to_datanode is True:
|
118
|
+
nodes = [DataNode.from_llama_index(i) for i in nodes]
|
119
|
+
elif isinstance(to_datanode, Callable):
|
120
|
+
nodes = _datanode_parser(nodes, to_datanode)
|
121
|
+
return nodes
|
122
|
+
|
123
|
+
|
124
|
+
def _self_defined_reader(
|
125
|
+
reader,
|
126
|
+
reader_args,
|
127
|
+
reader_kwargs,
|
128
|
+
load_args,
|
129
|
+
load_kwargs,
|
130
|
+
to_datanode: bool | Callable,
|
131
|
+
):
|
132
|
+
try:
|
133
|
+
loader = reader(*reader_args, **reader_kwargs)
|
134
|
+
nodes = loader.load(*load_args, **load_kwargs)
|
135
|
+
except Exception as e:
|
136
|
+
raise ValueError(
|
137
|
+
f"Self defined reader {reader} is not valid. Error: {e}"
|
138
|
+
) from e
|
139
|
+
|
140
|
+
if isinstance(to_datanode, bool) and to_datanode is True:
|
141
|
+
raise ValueError("Please define a valid parser to DataNode.")
|
142
|
+
elif isinstance(to_datanode, Callable):
|
143
|
+
nodes = _datanode_parser(nodes, to_datanode)
|
144
|
+
return nodes
|
145
|
+
|
146
|
+
|
147
|
+
read_funcs = {
|
148
|
+
ReaderType.PLAIN: _plain_reader,
|
149
|
+
ReaderType.LANGCHAIN: _langchain_reader,
|
150
|
+
ReaderType.LLAMAINDEX: _llama_index_reader,
|
151
|
+
ReaderType.SELFDEFINED: _self_defined_reader,
|
152
|
+
}
|
@@ -0,0 +1,266 @@
|
|
1
|
+
# use utils and schema
|
2
|
+
import math
|
3
|
+
from enum import Enum
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import List, Union, Dict, Any, Tuple
|
6
|
+
|
7
|
+
from lionagi.libs import convert, func_call
|
8
|
+
from lionagi.core.schema import DataNode
|
9
|
+
|
10
|
+
|
11
|
+
class ReaderType(str, Enum):
|
12
|
+
PLAIN = "plain"
|
13
|
+
LANGCHAIN = "langchain"
|
14
|
+
LLAMAINDEX = "llama_index"
|
15
|
+
SELFDEFINED = "self_defined"
|
16
|
+
|
17
|
+
|
18
|
+
class ChunkerType(str, Enum):
|
19
|
+
PLAIN = "plain" # default
|
20
|
+
LANGCHAIN = "langchain" # using langchain functions
|
21
|
+
LLAMAINDEX = "llama_index" # using llamaindex functions
|
22
|
+
SELFDEFINED = "self_defined" # create custom functions
|
23
|
+
|
24
|
+
|
25
|
+
def dir_to_path(
|
26
|
+
dir: str, ext: str, recursive: bool = False, flatten: bool = True
|
27
|
+
) -> List[Path]:
|
28
|
+
"""
|
29
|
+
Generates a list of file paths from a directory with the given file extension.
|
30
|
+
|
31
|
+
Parameters:
|
32
|
+
dir (str): The directory to search for files.
|
33
|
+
|
34
|
+
ext (str): The file extension to filter by.
|
35
|
+
|
36
|
+
recursive (bool): Whether to search subdirectories recursively. Defaults to False.
|
37
|
+
|
38
|
+
flatten (bool): Whether to flatten the list. Defaults to True.
|
39
|
+
|
40
|
+
Returns:
|
41
|
+
List[Path]: A list of Paths to the files.
|
42
|
+
|
43
|
+
Raises:
|
44
|
+
ValueError: If the directory or extension is invalid.
|
45
|
+
"""
|
46
|
+
|
47
|
+
def _dir_to_path(ext):
|
48
|
+
tem = "**/*" if recursive else "*"
|
49
|
+
return list(Path(dir).glob(tem + ext))
|
50
|
+
|
51
|
+
try:
|
52
|
+
return convert.to_list(
|
53
|
+
func_call.lcall(ext, _dir_to_path, flatten=True), flatten=flatten
|
54
|
+
)
|
55
|
+
except:
|
56
|
+
raise ValueError("Invalid directory or extension, please check the path")
|
57
|
+
|
58
|
+
|
59
|
+
def dir_to_nodes(
|
60
|
+
dir: str,
|
61
|
+
ext: Union[List[str], str],
|
62
|
+
recursive: bool = False,
|
63
|
+
flatten: bool = True,
|
64
|
+
clean_text: bool = True,
|
65
|
+
) -> List[DataNode]:
|
66
|
+
"""
|
67
|
+
Converts directory contents into DataNode objects based on specified file extensions.
|
68
|
+
|
69
|
+
This function first retrieves a list of file paths from the specified directory, matching the given file extension. It then reads the content of these files, optionally cleaning the text, and converts each file's content into a DataNode object.
|
70
|
+
|
71
|
+
Parameters:
|
72
|
+
dir (str): The directory path from which to read files.
|
73
|
+
ext: The file extension(s) to include. Can be a single string or a list/tuple of strings.
|
74
|
+
recursive (bool, optional): If True, the function searches for files recursively in subdirectories. Defaults to False.
|
75
|
+
flatten (bool, optional): If True, flattens the directory structure in the returned paths. Defaults to True.
|
76
|
+
clean_text (bool, optional): If True, cleans the text read from files. Defaults to True.
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
list: A list of DataNode objects created from the files in the specified directory.
|
80
|
+
|
81
|
+
Example:
|
82
|
+
nodes = dir_to_nodes("/path/to/dir", ".txt", recursive=True)
|
83
|
+
# This would read all .txt files in /path/to/dir and its subdirectories,
|
84
|
+
# converting them into DataNode objects.
|
85
|
+
"""
|
86
|
+
|
87
|
+
path_list = dir_to_path(dir, ext, recursive, flatten)
|
88
|
+
files_info = func_call.lcall(path_list, read_text, clean=clean_text)
|
89
|
+
return func_call.lcall(files_info, lambda x: DataNode(content=x[0], metadata=x[1]))
|
90
|
+
|
91
|
+
|
92
|
+
def chunk_text(
|
93
|
+
input: str, chunk_size: int, overlap: float, threshold: int
|
94
|
+
) -> List[Union[str, None]]:
|
95
|
+
"""
|
96
|
+
Chunks the input text into smaller parts, with optional overlap and threshold for final chunk.
|
97
|
+
|
98
|
+
Parameters:
|
99
|
+
input (str): The input text to chunk.
|
100
|
+
|
101
|
+
chunk_size (int): The size of each chunk.
|
102
|
+
|
103
|
+
overlap (float): The amount of overlap between chunks.
|
104
|
+
|
105
|
+
threshold (int): The minimum size of the final chunk.
|
106
|
+
|
107
|
+
Returns:
|
108
|
+
List[Union[str, None]]: A list of text chunks.
|
109
|
+
|
110
|
+
Raises:
|
111
|
+
ValueError: If an error occurs during chunking.
|
112
|
+
"""
|
113
|
+
|
114
|
+
def _chunk_n1():
|
115
|
+
return [input]
|
116
|
+
|
117
|
+
def _chunk_n2():
|
118
|
+
chunks = []
|
119
|
+
chunks.append(input[: chunk_size + overlap_size])
|
120
|
+
|
121
|
+
if len(input) - chunk_size > threshold:
|
122
|
+
chunks.append(input[chunk_size - overlap_size :])
|
123
|
+
else:
|
124
|
+
return _chunk_n1()
|
125
|
+
|
126
|
+
return chunks
|
127
|
+
|
128
|
+
def _chunk_n3():
|
129
|
+
chunks = []
|
130
|
+
chunks.append(input[: chunk_size + overlap_size])
|
131
|
+
for i in range(1, n_chunks - 1):
|
132
|
+
start_idx = chunk_size * i - overlap_size
|
133
|
+
end_idx = chunk_size * (i + 1) + overlap_size
|
134
|
+
chunks.append(input[start_idx:end_idx])
|
135
|
+
|
136
|
+
if len(input) - chunk_size * (n_chunks - 1) > threshold:
|
137
|
+
chunks.append(input[chunk_size * (n_chunks - 1) - overlap_size :])
|
138
|
+
else:
|
139
|
+
chunks[-1] += input[chunk_size * (n_chunks - 1) + overlap_size :]
|
140
|
+
|
141
|
+
return chunks
|
142
|
+
|
143
|
+
try:
|
144
|
+
if not isinstance(input, str):
|
145
|
+
input = convert.to_str(input)
|
146
|
+
|
147
|
+
n_chunks = math.ceil(len(input) / chunk_size)
|
148
|
+
overlap_size = int(overlap / 2)
|
149
|
+
|
150
|
+
if n_chunks == 1:
|
151
|
+
return _chunk_n1()
|
152
|
+
|
153
|
+
elif n_chunks == 2:
|
154
|
+
return _chunk_n2()
|
155
|
+
|
156
|
+
elif n_chunks > 2:
|
157
|
+
return _chunk_n3()
|
158
|
+
|
159
|
+
except Exception as e:
|
160
|
+
raise ValueError(f"An error occurred while chunking the text. {e}")
|
161
|
+
|
162
|
+
|
163
|
+
def read_text(filepath: str, clean: bool = True) -> Tuple[str, dict]:
|
164
|
+
"""
|
165
|
+
Reads text from a file and optionally cleans it, returning the content and metadata.
|
166
|
+
|
167
|
+
Parameters:
|
168
|
+
filepath (str): The path to the file to read.
|
169
|
+
|
170
|
+
clean (bool): Whether to clean the text by replacing certain characters. Defaults to True.
|
171
|
+
|
172
|
+
Returns:
|
173
|
+
Tuple[str, dict]: A tuple containing the content and metadata of the file.
|
174
|
+
|
175
|
+
Raises:
|
176
|
+
FileNotFoundError: If the file cannot be found.
|
177
|
+
|
178
|
+
PermissionError: If there are permissions issues.
|
179
|
+
|
180
|
+
OSError: For other OS-related errors.
|
181
|
+
"""
|
182
|
+
|
183
|
+
def _get_metadata():
|
184
|
+
import os
|
185
|
+
from datetime import datetime
|
186
|
+
|
187
|
+
file = filepath
|
188
|
+
size = os.path.getsize(filepath)
|
189
|
+
creation_date = datetime.fromtimestamp(os.path.getctime(filepath)).date()
|
190
|
+
modified_date = datetime.fromtimestamp(os.path.getmtime(filepath)).date()
|
191
|
+
last_accessed_date = datetime.fromtimestamp(os.path.getatime(filepath)).date()
|
192
|
+
return {
|
193
|
+
"file": convert.to_str(file),
|
194
|
+
"size": size,
|
195
|
+
"creation_date": str(creation_date),
|
196
|
+
"modified_date": str(modified_date),
|
197
|
+
"last_accessed_date": str(last_accessed_date),
|
198
|
+
}
|
199
|
+
|
200
|
+
try:
|
201
|
+
with open(filepath, "r") as f:
|
202
|
+
content = f.read()
|
203
|
+
if clean:
|
204
|
+
# Define characters to replace and their replacements
|
205
|
+
replacements = {"\\": " ", "\n": " ", "\t": " ", " ": " ", "'": " "}
|
206
|
+
for old, new in replacements.items():
|
207
|
+
content = content.replace(old, new)
|
208
|
+
metadata = _get_metadata()
|
209
|
+
return content, metadata
|
210
|
+
except Exception as e:
|
211
|
+
raise e
|
212
|
+
|
213
|
+
|
214
|
+
def _file_to_chunks(
|
215
|
+
input: Dict[str, Any],
|
216
|
+
field: str = "content",
|
217
|
+
chunk_size: int = 1500,
|
218
|
+
overlap: float = 0.1,
|
219
|
+
threshold: int = 200,
|
220
|
+
) -> List[Dict[str, Any]]:
|
221
|
+
try:
|
222
|
+
out = {key: value for key, value in input.items() if key != field} | {
|
223
|
+
"chunk_overlap": overlap,
|
224
|
+
"chunk_threshold": threshold,
|
225
|
+
}
|
226
|
+
chunks = chunk_text(
|
227
|
+
input[field], chunk_size=chunk_size, overlap=overlap, threshold=threshold
|
228
|
+
)
|
229
|
+
logs = []
|
230
|
+
for i, chunk in enumerate(chunks):
|
231
|
+
chunk_dict = out | {
|
232
|
+
"file_chunks": len(chunks),
|
233
|
+
"chunk_id": i + 1,
|
234
|
+
"chunk_size": len(chunk),
|
235
|
+
f"chunk_{field}": chunk,
|
236
|
+
}
|
237
|
+
logs.append(chunk_dict)
|
238
|
+
|
239
|
+
return logs
|
240
|
+
|
241
|
+
except Exception as e:
|
242
|
+
raise ValueError(f"An error occurred while chunking the file. {e}") from e
|
243
|
+
|
244
|
+
|
245
|
+
# needs doing TODO
|
246
|
+
def file_to_chunks(
|
247
|
+
input,
|
248
|
+
# project='project',
|
249
|
+
# output_dir='data/logs/sources/',
|
250
|
+
chunk_func=_file_to_chunks,
|
251
|
+
**kwargs,
|
252
|
+
):
|
253
|
+
# out_to_csv=False,
|
254
|
+
# filename=None,
|
255
|
+
# verbose=True,
|
256
|
+
# timestamp=True,
|
257
|
+
# logger=None,
|
258
|
+
return convert.to_list(func_call.lcall(input, chunk_func, **kwargs), flatten=True)
|
259
|
+
|
260
|
+
|
261
|
+
def _datanode_parser(nodes, parser):
|
262
|
+
|
263
|
+
try:
|
264
|
+
return parser(nodes)
|
265
|
+
except Exception as e:
|
266
|
+
raise ValueError(f"DataNode parser {parser} failed. Error:{e}") from e
|
lionagi/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.0.
|
1
|
+
__version__ = "0.0.315"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lionagi
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.315
|
4
4
|
Summary: Towards automated general intelligence.
|
5
5
|
Author: HaiyangLi
|
6
6
|
Author-email: Haiyang Li <ocean@lionagi.ai>
|
@@ -43,39 +43,35 @@ To contribute, you need to make a fork first, and then make pull request from yo
|
|
43
43
|
|
44
44
|
# LionAGI
|
45
45
|
|
46
|
-
|
47
|
-
**LionAGI is undergoing major transformation.**
|
46
|
+
**Powerful Intelligent Workflow Automation**
|
48
47
|
|
49
48
|
It is an intelligent agentic workflow automation framework. It introduces advanced ML models into any existing workflows and data infrastructure.
|
50
49
|
|
51
|
-
#### Currently, it can
|
52
50
|
|
53
|
-
|
54
|
-
|
55
|
-
-
|
51
|
+
### Currently, it can
|
52
|
+
|
53
|
+
- interact with almost any models including local*
|
54
|
+
- run interactions in parallel for most models (OpenRouter, OpenAI, Ollama, litellm...)
|
55
|
+
- produce structured pydantic outputs with flexible usage\*\*
|
56
56
|
- automate workflow via graph based agents
|
57
57
|
- use advanced prompting techniques, i.e. ReAct (reason-action)
|
58
58
|
- …
|
59
|
-
|
60
|
-
#### It aims to:
|
61
59
|
|
62
|
-
|
60
|
+
### It aims to:
|
61
|
+
|
62
|
+
- provide a centralized agent-managed framework for, "ML-powered tools coordination".
|
63
|
+
- The ways of coordination and possible path can occur among nodes is what we also refers to as `workflow` (the concept of workflow is still in design).
|
63
64
|
- such that, people can utilize intelligence to solve their problems in real life.
|
64
65
|
- achieve the goal by dramatically lowering the barrier of entries for creating use-case/domain specific tools.
|
65
66
|
|
66
|
-
#### Why?
|
67
67
|
|
68
|
-
|
69
|
-
|
70
|
-
---
|
71
|
-
|
72
|
-
|
73
|
-
**Powerful Intelligent Workflow Automation**
|
68
|
+
All notebooks should run, as of 0.0.313,
|
74
69
|
|
75
|
-
|
70
|
+
\* if there are models on providers that have not been configured, you can do so by configuring your own AI providers, and endpoints.
|
76
71
|
|
72
|
+
\*\* Structured Input/Output, Graph based agent system, as well as more advanced prompting techniques are undergoing fast interations...
|
77
73
|
|
78
|
-
|
74
|
+
### Why Automating Workflows?
|
79
75
|
|
80
76
|
Intelligent AI models such as [Large Language Model (LLM)](https://en.wikipedia.org/wiki/Large_language_model), introduced new possibilities of human-computer interaction. LLMs is drawing a lot of attention worldwide due to its “one model fits all”, and incredible performance. One way of using LLM is to use as search engine, however, this usage is complicated by the fact that LLMs [hallucinate](https://arxiv.org/abs/2311.05232).
|
81
77
|
|
@@ -1,5 +1,5 @@
|
|
1
|
-
lionagi/__init__.py,sha256=
|
2
|
-
lionagi/version.py,sha256=
|
1
|
+
lionagi/__init__.py,sha256=i6Ci7FebU2s4EVVnBFj1Dsi5RvP80JqeSqW-iripRPg,418
|
2
|
+
lionagi/version.py,sha256=Zazlk4sxt5cxFTrUeqVNVrVkGcIAkFTm-b9a6VLDqkw,24
|
3
3
|
lionagi/core/__init__.py,sha256=M5YXmJJiLcR5QB1VRmYvec14cHT6pKvxZOEs737BmP8,322
|
4
4
|
lionagi/core/agent/__init__.py,sha256=IVcw9yn_QMBJGBou1Atck98Us9uwPGFs-gERTv0RWew,59
|
5
5
|
lionagi/core/agent/base_agent.py,sha256=CRUpl7Zc5d2H9uCa17nMiFAnhKM_UH5Ujo1NHo3JAxg,3371
|
@@ -10,10 +10,14 @@ lionagi/core/branch/branch_flow_mixin.py,sha256=yXEfpxTaJ1aoDQQnCBYx5wShn9zt1ki8
|
|
10
10
|
lionagi/core/branch/executable_branch.py,sha256=Yi0t4fDNMa5UaHo15sX-zBchr5auvXOtSc0RnSpG2a8,12151
|
11
11
|
lionagi/core/branch/util.py,sha256=os7Qp7HpDfyyCvdkbBTyIQ3AYHfzUP0M684W4XMDHN4,11813
|
12
12
|
lionagi/core/branch/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
-
lionagi/core/direct/__init__.py,sha256=
|
14
|
-
lionagi/core/direct/
|
15
|
-
lionagi/core/direct/
|
16
|
-
lionagi/core/direct/
|
13
|
+
lionagi/core/direct/__init__.py,sha256=P17UfY3tLFgu0ncxMy4FRoVDlvOGUc7jzeowN41akBk,188
|
14
|
+
lionagi/core/direct/cot.py,sha256=3hz0CjFN2Bw5IW1tOh26fzd1UVrV_41KKIS7pzCd6ok,26
|
15
|
+
lionagi/core/direct/plan.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
+
lionagi/core/direct/predict.py,sha256=tkxvN9m_XOf3SW8xTi5yanXylV8xVNRn9a8DeGd9xgs,6355
|
17
|
+
lionagi/core/direct/react.py,sha256=IJ6sKgajCjhQ_UpJHf-j71tnVehEtIXFnyeB6bNlZwk,4196
|
18
|
+
lionagi/core/direct/score.py,sha256=QHO11WtAUfMEdfa1K-SRyn5uqf6_N0UmyCbEJsiqcQw,10328
|
19
|
+
lionagi/core/direct/select.py,sha256=pPwesq29C3JZ5J3piwjBHqjOCsEM4uChPKMGBRxtSTE,6127
|
20
|
+
lionagi/core/direct/sentiment.py,sha256=rNwBs-I2XICOwsXxFvfM1Tlc_afsVcRCNCXCxfxm_2k,27
|
17
21
|
lionagi/core/direct/utils.py,sha256=yqu4qv9aaU4qzUD9QovtN2m21QySzdMLmcBp5recWC0,2333
|
18
22
|
lionagi/core/direct/vote.py,sha256=tjs-EYDGlGB3J6d_nSl1oIuJYHtxncjustBbU_pXDqQ,2449
|
19
23
|
lionagi/core/flow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -30,10 +34,12 @@ lionagi/core/mail/__init__.py,sha256=FTMSvImGo3dQVnY1FYC_K0Ido9VwJa0DDJogR3nqSeM
|
|
30
34
|
lionagi/core/mail/mail_manager.py,sha256=M1l0eWdSj0oMLBeKIVqCJQ1Vn4GdIyf8tABagrsgwxc,3866
|
31
35
|
lionagi/core/mail/schema.py,sha256=qA5MsjJGeXQodEWOnKNF3l0_xnssNU3lroorUYuHq0Y,1625
|
32
36
|
lionagi/core/messages/__init__.py,sha256=NmK1xeR7xMgsbWxW-wm7d82QXKrRu6t7PKO5REpwO_k,99
|
33
|
-
lionagi/core/messages/schema.py,sha256=
|
37
|
+
lionagi/core/messages/schema.py,sha256=BCT9sv3I2CiYkqXlGyvQmKb4xoL5xvhInR8RYWxeIiI,19190
|
34
38
|
lionagi/core/prompt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
35
|
-
lionagi/core/prompt/
|
36
|
-
lionagi/core/prompt/
|
39
|
+
lionagi/core/prompt/action_template.py,sha256=B_lpTUeAL1x_hwhsvj5EEFU5hxi5Om7Gjtom4qVKtPo,1035
|
40
|
+
lionagi/core/prompt/field_validator.py,sha256=QqeINioagGh4Qj-guZaFX2exFezTTGUCfQLnrnWPEsQ,8090
|
41
|
+
lionagi/core/prompt/prompt_template.py,sha256=W6CRXW7fwaSNbj7f7V9Az7yXEtHcLcCZonuMqJmzL8s,11755
|
42
|
+
lionagi/core/prompt/scored_template.py,sha256=jrnMEB22eItERmunTPJbmJdgSMoh0pdHMXfJUAUa_S4,410
|
37
43
|
lionagi/core/schema/__init__.py,sha256=C2dNOGQdI0c8vISFTG0h_7jhVnCpcY9kJbPEBmPMJO8,521
|
38
44
|
lionagi/core/schema/action_node.py,sha256=uTjHLe-GmR1p_9rT_BCF0JC4FB9Dd1Rr5c_2pAL01o8,736
|
39
45
|
lionagi/core/schema/base_mixin.py,sha256=c4YAxN5pOvmBZqvL-0bBGrJ7T9LPItVqPNrTwkP0FHk,11059
|
@@ -46,8 +52,8 @@ lionagi/core/schema/structure.py,sha256=9BFKpkvrBGgZ88Zim7HqsK3AC8qserwN3FdzJyvJ
|
|
46
52
|
lionagi/core/session/__init__.py,sha256=A1W67UXOGAIRuSmFZPtGRqoPq0spTkjSiGCucOEEhTE,52
|
47
53
|
lionagi/core/session/session.py,sha256=vNqq5DbEEGfVaMPG8x8LAVnOXlZZ7jUeHgWnQtCRbqY,37975
|
48
54
|
lionagi/core/tool/__init__.py,sha256=hpv1NzjiPtpng2Ie_fXgIP-lVCSG1fTub-gO9Q41ee0,95
|
49
|
-
lionagi/core/tool/manual.py,sha256=
|
50
|
-
lionagi/core/tool/tool_manager.py,sha256=
|
55
|
+
lionagi/core/tool/manual.py,sha256=75XhIkEkVoscu5qKBm3cTWmSWDFN-uRmK-e26lbSEso,64
|
56
|
+
lionagi/core/tool/tool_manager.py,sha256=RcF4ktKC_3Ft3xgyr8tj4HuFwKOArJsM7uxK6GWVewY,11306
|
51
57
|
lionagi/integrations/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
52
58
|
lionagi/integrations/bridge/__init__.py,sha256=ee5IeCkDOD2uhbzqxg_xDxaG-Q3BPPZCYltBjzg2gSs,169
|
53
59
|
lionagi/integrations/bridge/langchain_/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -56,23 +62,27 @@ lionagi/integrations/bridge/langchain_/langchain_bridge.py,sha256=-lnJtyf4iJEwHK
|
|
56
62
|
lionagi/integrations/bridge/llamaindex_/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
57
63
|
lionagi/integrations/bridge/llamaindex_/index.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
58
64
|
lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py,sha256=SSnLSIu06xVsghTios01QKGrc2xCq3oC8fVNGO4QkF8,5064
|
59
|
-
lionagi/integrations/bridge/llamaindex_/node_parser.py,sha256=
|
65
|
+
lionagi/integrations/bridge/llamaindex_/node_parser.py,sha256=d8SPD6EMf9bZ6824jjeZOWmwm7BHBZQ0qGq1JnsKh9k,3458
|
60
66
|
lionagi/integrations/bridge/llamaindex_/reader.py,sha256=VxdTk5h3a3_5RQzN15q75XGli52umhz9gLUrKk1Sg90,8235
|
61
67
|
lionagi/integrations/bridge/llamaindex_/textnode.py,sha256=OszGitHZ36zbG4DCGWUnSV6EO7wChEH2VA5M50iBojs,2322
|
62
68
|
lionagi/integrations/bridge/pydantic_/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
63
|
-
lionagi/integrations/bridge/pydantic_/
|
64
|
-
lionagi/integrations/
|
69
|
+
lionagi/integrations/bridge/pydantic_/pydantic_bridge.py,sha256=TVh7sQX_LKERUvv1nxsA2JICY1S6ptPr3qFqzgHfGCY,87
|
70
|
+
lionagi/integrations/chunker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
71
|
+
lionagi/integrations/chunker/chunk.py,sha256=huLtaLVzs2Py6F9tFHdU0o67JncOiX4WfmmmFccbcgI,5107
|
65
72
|
lionagi/integrations/config/__init__.py,sha256=zzQGZe3H5vofcNWSjjoqe_gqHpCO8Yl7FefmrUpLqnw,133
|
66
73
|
lionagi/integrations/config/mlx_configs.py,sha256=xbostqjnk3aAN-qKyC54YBprHPA38C8YDevXMMEHXWY,44
|
67
74
|
lionagi/integrations/config/oai_configs.py,sha256=aoKx91Nv5eQU2F8v8EsALXQCEEfy3sfCgUYjCYEGJPU,2754
|
68
75
|
lionagi/integrations/config/ollama_configs.py,sha256=Np73p86bTJtxYwAj3lr5l8V9IMu7rHJPdyzHEqyzI2Q,17
|
69
76
|
lionagi/integrations/config/openrouter_configs.py,sha256=Sz4IHrriXoB8RQ0Pj23Q13Ps4AnZ0BWrh5DhL18NLwQ,1379
|
77
|
+
lionagi/integrations/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
78
|
+
lionagi/integrations/loader/load.py,sha256=XYG93HzxKvnWIrPq-MotY6JTiuB_11rSADjOq_OdeiM,4582
|
79
|
+
lionagi/integrations/loader/load_util.py,sha256=7VyoQ3z4kYscjqOGFYrhPq1U0yPIR9NHWCESfv2gmp4,8369
|
70
80
|
lionagi/integrations/provider/__init__.py,sha256=MJhnq2tkBRcMH-3utc0G-Co20MmsxLBbp3fUwHrJGQ8,198
|
71
81
|
lionagi/integrations/provider/litellm.py,sha256=l3sTtIPDeM_9soTLj9gpVfFWWDzFfIZ7rbVcuzeql2w,1181
|
72
82
|
lionagi/integrations/provider/mistralai.py,sha256=G-StbfrnUcWZvl0eRby6CZYXxmJf6BRMFzDaix-brmU,7
|
73
83
|
lionagi/integrations/provider/mlx_service.py,sha256=ilbBupRs3HeOeO2iwW0prVq57krPu9db8aYLgq_cN0A,1325
|
74
84
|
lionagi/integrations/provider/oai.py,sha256=ZxfVz-ZdpeYqXQFq_1_Lqlz4sG8zmp48MosGIpV5ggU,4222
|
75
|
-
lionagi/integrations/provider/ollama.py,sha256=
|
85
|
+
lionagi/integrations/provider/ollama.py,sha256=dOKqzqjpwYYGw9vGKDVzVh9PAem7oFG09cfKEEB__oc,1314
|
76
86
|
lionagi/integrations/provider/openrouter.py,sha256=ZEG2lLbp-qb7r95CFAuPz7EqAdsMuJxMJXfgWhaBHsk,1856
|
77
87
|
lionagi/integrations/provider/services.py,sha256=zLX_C0p1eI5cfxkxlxIVEsU0k_fknhV1-LoXPJZFE4w,5388
|
78
88
|
lionagi/integrations/provider/transformers.py,sha256=E6CTtm7pa3aa0IqYklWvhXz9YWlh3p_lgKaJXYfXeyA,3397
|
@@ -104,8 +114,8 @@ lionagi/tests/test_libs/test_func_call.py,sha256=xvs19YBNxqh3RbWLjQXY19L06b1_uZY
|
|
104
114
|
lionagi/tests/test_libs/test_nested.py,sha256=eEcE4BXJEkjoPZsd9-0rUxOJHjmu8W2hgVClUTwXEFY,13106
|
105
115
|
lionagi/tests/test_libs/test_parse.py,sha256=aa74kfOoJwDU7L7-59EcgBGYc5-OtafPIP2oGTI3Zrk,6814
|
106
116
|
lionagi/tests/test_libs/test_sys_util.py,sha256=Y-9jxLGxgbFNp78Z0PJyGUjRROMuRAG3Vo3i5LAH8Hs,7849
|
107
|
-
lionagi-0.0.
|
108
|
-
lionagi-0.0.
|
109
|
-
lionagi-0.0.
|
110
|
-
lionagi-0.0.
|
111
|
-
lionagi-0.0.
|
117
|
+
lionagi-0.0.315.dist-info/LICENSE,sha256=vfczrx-xFNkybZ7Ef-lGUnA1Vorky6wL4kwb1Fd5o3I,1089
|
118
|
+
lionagi-0.0.315.dist-info/METADATA,sha256=FVnSivifINUlYoYjEh7s01WKZ3h1Hn1AW_uKW3KfdLg,7934
|
119
|
+
lionagi-0.0.315.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
120
|
+
lionagi-0.0.315.dist-info/top_level.txt,sha256=szvch_d2jE1Lu9ZIKsl26Ll6BGfYfbOgt5lm-UpFSo4,8
|
121
|
+
lionagi-0.0.315.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|