lionagi 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/__init__.py +60 -5
- lionagi/core/__init__.py +0 -25
- lionagi/core/_setting/_setting.py +59 -0
- lionagi/core/action/__init__.py +14 -0
- lionagi/core/action/function_calling.py +136 -0
- lionagi/core/action/manual.py +1 -0
- lionagi/core/action/node.py +109 -0
- lionagi/core/action/tool.py +114 -0
- lionagi/core/action/tool_manager.py +356 -0
- lionagi/core/agent/base_agent.py +27 -13
- lionagi/core/agent/eval/evaluator.py +1 -0
- lionagi/core/agent/eval/vote.py +40 -0
- lionagi/core/agent/learn/learner.py +59 -0
- lionagi/core/agent/plan/unit_template.py +1 -0
- lionagi/core/collections/__init__.py +17 -0
- lionagi/core/{generic/data_logger.py → collections/_logger.py} +69 -55
- lionagi/core/collections/abc/__init__.py +53 -0
- lionagi/core/collections/abc/component.py +615 -0
- lionagi/core/collections/abc/concepts.py +297 -0
- lionagi/core/collections/abc/exceptions.py +150 -0
- lionagi/core/collections/abc/util.py +45 -0
- lionagi/core/collections/exchange.py +161 -0
- lionagi/core/collections/flow.py +426 -0
- lionagi/core/collections/model.py +419 -0
- lionagi/core/collections/pile.py +913 -0
- lionagi/core/collections/progression.py +236 -0
- lionagi/core/collections/util.py +64 -0
- lionagi/core/director/direct.py +314 -0
- lionagi/core/director/director.py +2 -0
- lionagi/core/{execute/branch_executor.py → engine/branch_engine.py} +134 -97
- lionagi/core/{execute/instruction_map_executor.py → engine/instruction_map_engine.py} +80 -55
- lionagi/{experimental/directive/evaluator → core/engine}/script_engine.py +17 -1
- lionagi/core/executor/base_executor.py +90 -0
- lionagi/core/{execute/structure_executor.py → executor/graph_executor.py} +62 -66
- lionagi/core/{execute → executor}/neo4j_executor.py +70 -67
- lionagi/core/generic/__init__.py +3 -33
- lionagi/core/generic/edge.py +29 -79
- lionagi/core/generic/edge_condition.py +16 -0
- lionagi/core/generic/graph.py +236 -0
- lionagi/core/generic/hyperedge.py +1 -0
- lionagi/core/generic/node.py +156 -221
- lionagi/core/generic/tree.py +48 -0
- lionagi/core/generic/tree_node.py +79 -0
- lionagi/core/mail/__init__.py +12 -0
- lionagi/core/mail/mail.py +25 -0
- lionagi/core/mail/mail_manager.py +139 -58
- lionagi/core/mail/package.py +45 -0
- lionagi/core/mail/start_mail.py +36 -0
- lionagi/core/message/__init__.py +19 -0
- lionagi/core/message/action_request.py +133 -0
- lionagi/core/message/action_response.py +135 -0
- lionagi/core/message/assistant_response.py +95 -0
- lionagi/core/message/instruction.py +234 -0
- lionagi/core/message/message.py +101 -0
- lionagi/core/message/system.py +86 -0
- lionagi/core/message/util.py +283 -0
- lionagi/core/report/__init__.py +4 -0
- lionagi/core/report/base.py +217 -0
- lionagi/core/report/form.py +231 -0
- lionagi/core/report/report.py +166 -0
- lionagi/core/report/util.py +28 -0
- lionagi/core/rule/_default.py +16 -0
- lionagi/core/rule/action.py +99 -0
- lionagi/core/rule/base.py +238 -0
- lionagi/core/rule/boolean.py +56 -0
- lionagi/core/rule/choice.py +47 -0
- lionagi/core/rule/mapping.py +96 -0
- lionagi/core/rule/number.py +71 -0
- lionagi/core/rule/rulebook.py +109 -0
- lionagi/core/rule/string.py +52 -0
- lionagi/core/rule/util.py +35 -0
- lionagi/core/session/branch.py +431 -0
- lionagi/core/session/directive_mixin.py +287 -0
- lionagi/core/session/session.py +229 -903
- lionagi/core/structure/__init__.py +1 -0
- lionagi/core/structure/chain.py +1 -0
- lionagi/core/structure/forest.py +1 -0
- lionagi/core/structure/graph.py +1 -0
- lionagi/core/structure/tree.py +1 -0
- lionagi/core/unit/__init__.py +5 -0
- lionagi/core/unit/parallel_unit.py +245 -0
- lionagi/core/unit/template/action.py +81 -0
- lionagi/core/unit/template/base.py +51 -0
- lionagi/core/unit/template/plan.py +84 -0
- lionagi/core/unit/template/predict.py +109 -0
- lionagi/core/unit/template/score.py +124 -0
- lionagi/core/unit/template/select.py +104 -0
- lionagi/core/unit/unit.py +362 -0
- lionagi/core/unit/unit_form.py +305 -0
- lionagi/core/unit/unit_mixin.py +1168 -0
- lionagi/core/unit/util.py +71 -0
- lionagi/core/validator/validator.py +364 -0
- lionagi/core/work/work.py +74 -0
- lionagi/core/work/work_function.py +92 -0
- lionagi/core/work/work_queue.py +81 -0
- lionagi/core/work/worker.py +195 -0
- lionagi/core/work/worklog.py +124 -0
- lionagi/experimental/compressor/base.py +46 -0
- lionagi/experimental/compressor/llm_compressor.py +247 -0
- lionagi/experimental/compressor/llm_summarizer.py +61 -0
- lionagi/experimental/compressor/util.py +70 -0
- lionagi/experimental/directive/__init__.py +19 -0
- lionagi/experimental/directive/parser/base_parser.py +69 -2
- lionagi/experimental/directive/{template_ → template}/base_template.py +17 -1
- lionagi/{libs/ln_tokenizer.py → experimental/directive/tokenizer.py} +16 -0
- lionagi/experimental/{directive/evaluator → evaluator}/ast_evaluator.py +16 -0
- lionagi/experimental/{directive/evaluator → evaluator}/base_evaluator.py +16 -0
- lionagi/experimental/knowledge/base.py +10 -0
- lionagi/experimental/memory/__init__.py +0 -0
- lionagi/experimental/strategies/__init__.py +0 -0
- lionagi/experimental/strategies/base.py +1 -0
- lionagi/integrations/bridge/langchain_/documents.py +4 -0
- lionagi/integrations/bridge/llamaindex_/index.py +30 -0
- lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +6 -0
- lionagi/integrations/chunker/chunk.py +161 -24
- lionagi/integrations/config/oai_configs.py +34 -3
- lionagi/integrations/config/openrouter_configs.py +14 -2
- lionagi/integrations/loader/load.py +122 -21
- lionagi/integrations/loader/load_util.py +6 -77
- lionagi/integrations/provider/_mapping.py +46 -0
- lionagi/integrations/provider/litellm.py +2 -1
- lionagi/integrations/provider/mlx_service.py +16 -9
- lionagi/integrations/provider/oai.py +91 -4
- lionagi/integrations/provider/ollama.py +6 -5
- lionagi/integrations/provider/openrouter.py +115 -8
- lionagi/integrations/provider/services.py +2 -2
- lionagi/integrations/provider/transformers.py +18 -22
- lionagi/integrations/storage/__init__.py +3 -3
- lionagi/integrations/storage/neo4j.py +52 -60
- lionagi/integrations/storage/storage_util.py +44 -46
- lionagi/integrations/storage/structure_excel.py +43 -26
- lionagi/integrations/storage/to_excel.py +11 -4
- lionagi/libs/__init__.py +22 -1
- lionagi/libs/ln_api.py +75 -20
- lionagi/libs/ln_context.py +37 -0
- lionagi/libs/ln_convert.py +21 -9
- lionagi/libs/ln_func_call.py +69 -28
- lionagi/libs/ln_image.py +107 -0
- lionagi/libs/ln_nested.py +26 -11
- lionagi/libs/ln_parse.py +82 -23
- lionagi/libs/ln_queue.py +16 -0
- lionagi/libs/ln_tokenize.py +164 -0
- lionagi/libs/ln_validate.py +16 -0
- lionagi/libs/special_tokens.py +172 -0
- lionagi/libs/sys_util.py +95 -24
- lionagi/lions/coder/code_form.py +13 -0
- lionagi/lions/coder/coder.py +50 -3
- lionagi/lions/coder/util.py +30 -25
- lionagi/tests/libs/test_func_call.py +23 -21
- lionagi/tests/libs/test_nested.py +36 -21
- lionagi/tests/libs/test_parse.py +1 -1
- lionagi/tests/test_core/collections/__init__.py +0 -0
- lionagi/tests/test_core/collections/test_component.py +206 -0
- lionagi/tests/test_core/collections/test_exchange.py +138 -0
- lionagi/tests/test_core/collections/test_flow.py +145 -0
- lionagi/tests/test_core/collections/test_pile.py +171 -0
- lionagi/tests/test_core/collections/test_progression.py +129 -0
- lionagi/tests/test_core/generic/test_edge.py +67 -0
- lionagi/tests/test_core/generic/test_graph.py +96 -0
- lionagi/tests/test_core/generic/test_node.py +106 -0
- lionagi/tests/test_core/generic/test_tree_node.py +73 -0
- lionagi/tests/test_core/test_branch.py +115 -294
- lionagi/tests/test_core/test_form.py +46 -0
- lionagi/tests/test_core/test_report.py +105 -0
- lionagi/tests/test_core/test_validator.py +111 -0
- lionagi/version.py +1 -1
- lionagi-0.2.0.dist-info/LICENSE +202 -0
- lionagi-0.2.0.dist-info/METADATA +272 -0
- lionagi-0.2.0.dist-info/RECORD +240 -0
- lionagi/core/branch/base.py +0 -653
- lionagi/core/branch/branch.py +0 -474
- lionagi/core/branch/flow_mixin.py +0 -96
- lionagi/core/branch/util.py +0 -323
- lionagi/core/direct/__init__.py +0 -19
- lionagi/core/direct/cot.py +0 -123
- lionagi/core/direct/plan.py +0 -164
- lionagi/core/direct/predict.py +0 -166
- lionagi/core/direct/react.py +0 -171
- lionagi/core/direct/score.py +0 -279
- lionagi/core/direct/select.py +0 -170
- lionagi/core/direct/sentiment.py +0 -1
- lionagi/core/direct/utils.py +0 -110
- lionagi/core/direct/vote.py +0 -64
- lionagi/core/execute/base_executor.py +0 -47
- lionagi/core/flow/baseflow.py +0 -23
- lionagi/core/flow/monoflow/ReAct.py +0 -240
- lionagi/core/flow/monoflow/__init__.py +0 -9
- lionagi/core/flow/monoflow/chat.py +0 -95
- lionagi/core/flow/monoflow/chat_mixin.py +0 -253
- lionagi/core/flow/monoflow/followup.py +0 -215
- lionagi/core/flow/polyflow/__init__.py +0 -1
- lionagi/core/flow/polyflow/chat.py +0 -251
- lionagi/core/form/action_form.py +0 -26
- lionagi/core/form/field_validator.py +0 -287
- lionagi/core/form/form.py +0 -302
- lionagi/core/form/mixin.py +0 -214
- lionagi/core/form/scored_form.py +0 -13
- lionagi/core/generic/action.py +0 -26
- lionagi/core/generic/component.py +0 -532
- lionagi/core/generic/condition.py +0 -46
- lionagi/core/generic/mail.py +0 -90
- lionagi/core/generic/mailbox.py +0 -36
- lionagi/core/generic/relation.py +0 -70
- lionagi/core/generic/signal.py +0 -22
- lionagi/core/generic/structure.py +0 -362
- lionagi/core/generic/transfer.py +0 -20
- lionagi/core/generic/work.py +0 -40
- lionagi/core/graph/graph.py +0 -126
- lionagi/core/graph/tree.py +0 -190
- lionagi/core/mail/schema.py +0 -63
- lionagi/core/messages/schema.py +0 -325
- lionagi/core/tool/__init__.py +0 -5
- lionagi/core/tool/tool.py +0 -28
- lionagi/core/tool/tool_manager.py +0 -283
- lionagi/experimental/report/form.py +0 -64
- lionagi/experimental/report/report.py +0 -138
- lionagi/experimental/report/util.py +0 -47
- lionagi/experimental/tool/function_calling.py +0 -43
- lionagi/experimental/tool/manual.py +0 -66
- lionagi/experimental/tool/schema.py +0 -59
- lionagi/experimental/tool/tool_manager.py +0 -138
- lionagi/experimental/tool/util.py +0 -16
- lionagi/experimental/validator/rule.py +0 -139
- lionagi/experimental/validator/validator.py +0 -56
- lionagi/experimental/work/__init__.py +0 -10
- lionagi/experimental/work/async_queue.py +0 -54
- lionagi/experimental/work/schema.py +0 -73
- lionagi/experimental/work/work_function.py +0 -67
- lionagi/experimental/work/worker.py +0 -56
- lionagi/experimental/work2/form.py +0 -371
- lionagi/experimental/work2/report.py +0 -289
- lionagi/experimental/work2/schema.py +0 -30
- lionagi/experimental/work2/tests.py +0 -72
- lionagi/experimental/work2/work_function.py +0 -89
- lionagi/experimental/work2/worker.py +0 -12
- lionagi/integrations/bridge/llamaindex_/get_index.py +0 -294
- lionagi/tests/test_core/generic/test_component.py +0 -89
- lionagi/tests/test_core/test_base_branch.py +0 -426
- lionagi/tests/test_core/test_chat_flow.py +0 -63
- lionagi/tests/test_core/test_mail_manager.py +0 -75
- lionagi/tests/test_core/test_prompts.py +0 -51
- lionagi/tests/test_core/test_session.py +0 -254
- lionagi/tests/test_core/test_session_base_util.py +0 -313
- lionagi/tests/test_core/test_tool_manager.py +0 -95
- lionagi-0.1.2.dist-info/LICENSE +0 -9
- lionagi-0.1.2.dist-info/METADATA +0 -174
- lionagi-0.1.2.dist-info/RECORD +0 -206
- /lionagi/core/{branch → _setting}/__init__.py +0 -0
- /lionagi/core/{execute → agent/eval}/__init__.py +0 -0
- /lionagi/core/{flow → agent/learn}/__init__.py +0 -0
- /lionagi/core/{form → agent/plan}/__init__.py +0 -0
- /lionagi/core/{branch/executable_branch.py → agent/plan/plan.py} +0 -0
- /lionagi/core/{graph → director}/__init__.py +0 -0
- /lionagi/core/{messages → engine}/__init__.py +0 -0
- /lionagi/{experimental/directive/evaluator → core/engine}/sandbox_.py +0 -0
- /lionagi/{experimental/directive/evaluator → core/executor}/__init__.py +0 -0
- /lionagi/{experimental/directive/template_ → core/rule}/__init__.py +0 -0
- /lionagi/{experimental/report → core/unit/template}/__init__.py +0 -0
- /lionagi/{experimental/tool → core/validator}/__init__.py +0 -0
- /lionagi/{experimental/validator → core/work}/__init__.py +0 -0
- /lionagi/experimental/{work2 → compressor}/__init__.py +0 -0
- /lionagi/{core/flow/mono_chat_mixin.py → experimental/directive/template/__init__.py} +0 -0
- /lionagi/experimental/directive/{schema.py → template/schema.py} +0 -0
- /lionagi/experimental/{work2/util.py → evaluator/__init__.py} +0 -0
- /lionagi/experimental/{work2/work.py → knowledge/__init__.py} +0 -0
- /lionagi/{tests/libs/test_async.py → experimental/knowledge/graph.py} +0 -0
- {lionagi-0.1.2.dist-info → lionagi-0.2.0.dist-info}/WHEEL +0 -0
- {lionagi-0.1.2.dist-info → lionagi-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,195 @@
|
|
1
|
+
"""
|
2
|
+
Copyright 2024 HaiyangLi
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
"""
|
16
|
+
|
17
|
+
from abc import ABC
|
18
|
+
from functools import wraps
|
19
|
+
import asyncio
|
20
|
+
from lionagi import logging as _logging
|
21
|
+
from lionagi.libs.ln_func_call import pcall
|
22
|
+
from lionagi.core.work.work_function import WorkFunction
|
23
|
+
from lionagi.core.work.work import Work
|
24
|
+
|
25
|
+
|
26
|
+
class Worker(ABC):
|
27
|
+
"""
|
28
|
+
This class represents a worker that handles multiple work functions.
|
29
|
+
|
30
|
+
Attributes:
|
31
|
+
name (str): The name of the worker.
|
32
|
+
work_functions (dict[str, WorkFunction]): Dictionary mapping assignments to WorkFunction objects.
|
33
|
+
"""
|
34
|
+
|
35
|
+
name: str = "Worker"
|
36
|
+
work_functions: dict[str, WorkFunction] = {}
|
37
|
+
|
38
|
+
def __init__(self) -> None:
|
39
|
+
self.stopped = False
|
40
|
+
|
41
|
+
async def stop(self):
|
42
|
+
"""
|
43
|
+
Stops the worker and all associated work functions.
|
44
|
+
"""
|
45
|
+
self.stopped = True
|
46
|
+
_logging.info(f"Stopping worker {self.name}")
|
47
|
+
non_stopped_ = []
|
48
|
+
|
49
|
+
for func in self.work_functions.values():
|
50
|
+
worklog = func.worklog
|
51
|
+
await worklog.stop()
|
52
|
+
if not worklog.stopped:
|
53
|
+
non_stopped_.append(func.name)
|
54
|
+
|
55
|
+
if len(non_stopped_) > 0:
|
56
|
+
_logging.error(f"Could not stop worklogs: {non_stopped_}")
|
57
|
+
_logging.info(f"Stopped worker {self.name}")
|
58
|
+
|
59
|
+
async def is_progressable(self):
|
60
|
+
"""
|
61
|
+
Checks if any work function is progressable and the worker is not stopped.
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
bool: True if any work function is progressable and the worker is not stopped, else False.
|
65
|
+
"""
|
66
|
+
|
67
|
+
return (
|
68
|
+
any([await i.is_progressable() for i in self.work_functions.values()])
|
69
|
+
and not self.stopped
|
70
|
+
)
|
71
|
+
|
72
|
+
async def process(self, refresh_time=1):
|
73
|
+
"""
|
74
|
+
Processes all work functions periodically.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
refresh_time (int): Time interval between each process cycle.
|
78
|
+
"""
|
79
|
+
while await self.is_progressable():
|
80
|
+
await pcall([i.process(refresh_time) for i in self.work_functions.values()])
|
81
|
+
await asyncio.sleep(refresh_time)
|
82
|
+
|
83
|
+
# TODO: Implement process method
|
84
|
+
|
85
|
+
# async def process(self, refresh_time=1):
|
86
|
+
# while not self.stopped:
|
87
|
+
# tasks = [
|
88
|
+
# asyncio.create_task(func.process(refresh_time=refresh_time))
|
89
|
+
# for func in self.work_functions.values()
|
90
|
+
# ]
|
91
|
+
# await asyncio.wait(tasks)
|
92
|
+
# await asyncio.sleep(refresh_time)
|
93
|
+
|
94
|
+
async def _wrapper(
|
95
|
+
self,
|
96
|
+
*args,
|
97
|
+
func=None,
|
98
|
+
assignment=None,
|
99
|
+
capacity=None,
|
100
|
+
retry_kwargs=None,
|
101
|
+
guidance=None,
|
102
|
+
**kwargs,
|
103
|
+
):
|
104
|
+
"""
|
105
|
+
Internal wrapper to handle work function execution.
|
106
|
+
|
107
|
+
Args:
|
108
|
+
func (Callable): The function to be executed.
|
109
|
+
assignment (str): The assignment description.
|
110
|
+
capacity (int): Capacity for the work log.
|
111
|
+
retry_kwargs (dict): Retry arguments for the function.
|
112
|
+
guidance (str): Guidance or documentation for the function.
|
113
|
+
"""
|
114
|
+
if getattr(self, "work_functions", None) is None:
|
115
|
+
self.work_functions = {}
|
116
|
+
|
117
|
+
if func.__name__ not in self.work_functions:
|
118
|
+
self.work_functions[func.__name__] = WorkFunction(
|
119
|
+
assignment=assignment,
|
120
|
+
function=func,
|
121
|
+
retry_kwargs=retry_kwargs or {},
|
122
|
+
guidance=guidance or func.__doc__,
|
123
|
+
capacity=capacity,
|
124
|
+
)
|
125
|
+
|
126
|
+
work_func: WorkFunction = self.work_functions[func.__name__]
|
127
|
+
task = asyncio.create_task(work_func.perform(self, *args, **kwargs))
|
128
|
+
work = Work(async_task=task)
|
129
|
+
await work_func.worklog.append(work)
|
130
|
+
return True
|
131
|
+
|
132
|
+
|
133
|
+
def work(
|
134
|
+
assignment=None,
|
135
|
+
capacity=10,
|
136
|
+
guidance=None,
|
137
|
+
retry_kwargs=None,
|
138
|
+
refresh_time=1,
|
139
|
+
timeout=10,
|
140
|
+
):
|
141
|
+
"""
|
142
|
+
Decorator to mark a method as a work function.
|
143
|
+
|
144
|
+
Args:
|
145
|
+
assignment (str): The assignment description of the work function.
|
146
|
+
capacity (int): Capacity for the work log.
|
147
|
+
guidance (str): Guidance or documentation for the work function.
|
148
|
+
retry_kwargs (dict): Retry arguments for the work function.
|
149
|
+
refresh_time (int): Time interval between each process cycle.
|
150
|
+
timeout (int): Timeout for the work function.
|
151
|
+
"""
|
152
|
+
|
153
|
+
def decorator(func):
|
154
|
+
@wraps(func)
|
155
|
+
async def wrapper(
|
156
|
+
self: Worker,
|
157
|
+
*args,
|
158
|
+
func=func,
|
159
|
+
assignment=assignment,
|
160
|
+
capacity=capacity,
|
161
|
+
retry_kwargs=retry_kwargs,
|
162
|
+
guidance=guidance,
|
163
|
+
**kwargs,
|
164
|
+
):
|
165
|
+
retry_kwargs = retry_kwargs or {}
|
166
|
+
retry_kwargs["timeout"] = retry_kwargs.get("timeout", timeout)
|
167
|
+
return await self._wrapper(
|
168
|
+
*args,
|
169
|
+
func=func,
|
170
|
+
assignment=assignment,
|
171
|
+
capacity=capacity,
|
172
|
+
retry_kwargs=retry_kwargs,
|
173
|
+
guidance=guidance,
|
174
|
+
**kwargs,
|
175
|
+
)
|
176
|
+
|
177
|
+
return wrapper
|
178
|
+
|
179
|
+
return decorator
|
180
|
+
|
181
|
+
|
182
|
+
# # Example
|
183
|
+
# from lionagi import Session
|
184
|
+
# from lionagi.experimental.work.work_function import work
|
185
|
+
|
186
|
+
|
187
|
+
# class MyWorker(Worker):
|
188
|
+
|
189
|
+
# @work(assignment="instruction, context -> response")
|
190
|
+
# async def chat(instruction=None, context=None):
|
191
|
+
# session = Session()
|
192
|
+
# return await session.chat(instruction=instruction, context=context)
|
193
|
+
|
194
|
+
|
195
|
+
# await a.chat(instruction="Hello", context={})
|
@@ -0,0 +1,124 @@
|
|
1
|
+
"""
|
2
|
+
Copyright 2024 HaiyangLi
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
"""
|
16
|
+
|
17
|
+
from lionagi.core.collections.abc import Progressable
|
18
|
+
from lionagi.core.collections import pile, progression, Pile
|
19
|
+
from lionagi.core.work.work import Work, WorkStatus
|
20
|
+
from lionagi.core.work.work_queue import WorkQueue
|
21
|
+
|
22
|
+
|
23
|
+
class WorkLog(Progressable):
|
24
|
+
"""
|
25
|
+
A class representing a log of work items.
|
26
|
+
|
27
|
+
Attributes:
|
28
|
+
pile (Pile): A pile containing work items.
|
29
|
+
pending (Progression): A progression of pending work items.
|
30
|
+
queue (WorkQueue): A queue to manage the execution of work items.
|
31
|
+
"""
|
32
|
+
|
33
|
+
def __init__(self, capacity=10, workpile=None):
|
34
|
+
"""
|
35
|
+
Initializes a new instance of WorkLog.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
capacity (int): The capacity of the work queue.
|
39
|
+
workpile (Pile, optional): An optional pile of initial work items.
|
40
|
+
"""
|
41
|
+
self.pile = (
|
42
|
+
workpile if workpile and isinstance(workpile, Pile) else pile({}, Work)
|
43
|
+
)
|
44
|
+
self.pending = progression(workpile) if workpile else progression()
|
45
|
+
self.queue = WorkQueue(capacity=capacity)
|
46
|
+
|
47
|
+
async def append(self, work: Work):
|
48
|
+
"""
|
49
|
+
Appends a new work item to the log.
|
50
|
+
|
51
|
+
Args:
|
52
|
+
work (Work): The work item to append.
|
53
|
+
"""
|
54
|
+
self.pile.append(work)
|
55
|
+
self.pending.append(work)
|
56
|
+
|
57
|
+
async def forward(self):
|
58
|
+
"""
|
59
|
+
Forwards pending work items to the queue if capacity allows.
|
60
|
+
"""
|
61
|
+
if not self.queue.available_capacity:
|
62
|
+
return
|
63
|
+
else:
|
64
|
+
while len(self.pending) > 0 and self.queue.available_capacity:
|
65
|
+
work: Work = self.pile[self.pending.popleft()]
|
66
|
+
work.status = WorkStatus.IN_PROGRESS
|
67
|
+
await self.queue.enqueue(work)
|
68
|
+
|
69
|
+
async def stop(self):
|
70
|
+
"""
|
71
|
+
Stops the work queue.
|
72
|
+
"""
|
73
|
+
await self.queue.stop()
|
74
|
+
|
75
|
+
@property
|
76
|
+
def pending_work(self):
|
77
|
+
"""
|
78
|
+
Retrieves the pile of pending work items.
|
79
|
+
|
80
|
+
Returns:
|
81
|
+
Pile: A pile of pending work items.
|
82
|
+
"""
|
83
|
+
return pile([i for i in self.pile if i.status == WorkStatus.PENDING])
|
84
|
+
|
85
|
+
@property
|
86
|
+
def stopped(self):
|
87
|
+
"""
|
88
|
+
Checks if the work queue is stopped.
|
89
|
+
|
90
|
+
Returns:
|
91
|
+
bool: True if the work queue is stopped, else False.
|
92
|
+
"""
|
93
|
+
return self.queue.stopped
|
94
|
+
|
95
|
+
@property
|
96
|
+
def completed_work(self):
|
97
|
+
"""
|
98
|
+
Retrieves the pile of completed work items.
|
99
|
+
|
100
|
+
Returns:
|
101
|
+
Pile: A pile of completed work items.
|
102
|
+
"""
|
103
|
+
return pile([i for i in self.pile if i.status == WorkStatus.COMPLETED])
|
104
|
+
|
105
|
+
def __contains__(self, work):
|
106
|
+
"""
|
107
|
+
Checks if a work item is in the pile.
|
108
|
+
|
109
|
+
Args:
|
110
|
+
work (Work): The work item to check.
|
111
|
+
|
112
|
+
Returns:
|
113
|
+
bool: True if the work item is in the pile, else False.
|
114
|
+
"""
|
115
|
+
return work in self.pile
|
116
|
+
|
117
|
+
def __iter__(self):
|
118
|
+
"""
|
119
|
+
Returns an iterator over the work pile.
|
120
|
+
|
121
|
+
Returns:
|
122
|
+
Iterator: An iterator over the work pile.
|
123
|
+
"""
|
124
|
+
return iter(self.pile)
|
@@ -0,0 +1,46 @@
|
|
1
|
+
from abc import ABC
|
2
|
+
from lionagi.core.collections import iModel
|
3
|
+
|
4
|
+
|
5
|
+
class TokenCompressor(ABC):
|
6
|
+
"""
|
7
|
+
NOTICE:
|
8
|
+
The token compressor system is inspired by LLMLingua.
|
9
|
+
https://github.com/microsoft/LLMLingua
|
10
|
+
|
11
|
+
MIT License
|
12
|
+
Copyright (c) Microsoft Corporation.
|
13
|
+
|
14
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
15
|
+
of this software and associated documentation files (the "Software"), to deal
|
16
|
+
in the Software without restriction, including without limitation the rights
|
17
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
18
|
+
copies of the Software, and to permit persons to whom the Software is
|
19
|
+
furnished to do so, subject to the following conditions:
|
20
|
+
|
21
|
+
Authors:
|
22
|
+
Huiqiang Jiang, Qianhui Wu, Chin-Yew Lin, Yuqing Yang, Lili Qiu
|
23
|
+
@inproceedings{jiang-etal-2023-llmlingua,
|
24
|
+
title = "{LLML}ingua: Compressing Prompts for Accelerated Inference of Large Language Models",
|
25
|
+
author = "Huiqiang Jiang and Qianhui Wu and Chin-Yew Lin and Yuqing Yang and Lili Qiu",
|
26
|
+
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
|
27
|
+
month = dec,
|
28
|
+
year = "2023",
|
29
|
+
publisher = "Association for Computational Linguistics",
|
30
|
+
url = "https://aclanthology.org/2023.emnlp-main.825",
|
31
|
+
doi = "10.18653/v1/2023.emnlp-main.825",
|
32
|
+
pages = "13358--13376",
|
33
|
+
}
|
34
|
+
|
35
|
+
LionAGI Modifications:
|
36
|
+
- Only borrowed the concept of token compression via perplexity
|
37
|
+
- Removed the dependency on the LLMLingua library
|
38
|
+
- use logprobs from GPT model to calculate perplexity
|
39
|
+
- added async ability to the functions
|
40
|
+
- used lionagi existing iModel class for API calls
|
41
|
+
"""
|
42
|
+
|
43
|
+
def __init__(self, imodel: iModel, tokenizer=None, splitter=None):
|
44
|
+
self.imodel = imodel
|
45
|
+
self.tokenizer = tokenizer
|
46
|
+
self.splitter = splitter
|
@@ -0,0 +1,247 @@
|
|
1
|
+
import asyncio
|
2
|
+
from lionagi import alcall
|
3
|
+
from lionagi.libs.ln_convert import to_list
|
4
|
+
import numpy as np
|
5
|
+
from lionagi.core.collections import iModel
|
6
|
+
from .base import TokenCompressor
|
7
|
+
from lionagi.libs.ln_tokenize import TokenizeUtil
|
8
|
+
from time import time
|
9
|
+
|
10
|
+
# inspired by LLMLingua, MIT License, Copyright (c) Microsoft Corporation.
|
11
|
+
# https://github.com/microsoft/LLMLingua
|
12
|
+
|
13
|
+
|
14
|
+
class LLMCompressor(TokenCompressor):
|
15
|
+
|
16
|
+
def __init__(
|
17
|
+
self,
|
18
|
+
imodel: iModel = None,
|
19
|
+
system_msg=None,
|
20
|
+
tokenizer=None, # must be a callable or object with a tokenize method
|
21
|
+
splitter=None, # must be a callable or object with a split/chunk/segment method
|
22
|
+
target_ratio=0.2,
|
23
|
+
n_samples=5, # the cumulative samples to take in each perplexity calculation
|
24
|
+
chunk_size=64,
|
25
|
+
max_tokens_per_sample=80,
|
26
|
+
min_compression_score=0, # (0-1) the minimum score to consider for compression, 0 means all
|
27
|
+
split_overlap=0,
|
28
|
+
split_threshold=0,
|
29
|
+
verbose=True,
|
30
|
+
):
|
31
|
+
imodel = imodel or iModel(model="gpt-3.5-turbo", temperature=0.3)
|
32
|
+
super().__init__(imodel=imodel, tokenizer=tokenizer, splitter=splitter)
|
33
|
+
self.system_msg = (
|
34
|
+
system_msg
|
35
|
+
or "Concisely summarize and compress the information for storage:"
|
36
|
+
)
|
37
|
+
self.target_ratio = target_ratio
|
38
|
+
self.n_samples = n_samples
|
39
|
+
self.chunk_size = chunk_size
|
40
|
+
self.max_tokens_per_sample = max_tokens_per_sample
|
41
|
+
self.min_compression_score = min_compression_score
|
42
|
+
self.verbose = verbose
|
43
|
+
self.split_overlap = split_overlap
|
44
|
+
self.split_threshold = split_threshold
|
45
|
+
|
46
|
+
def tokenize(self, text, encoding_name=None, return_byte=False, **kwargs):
|
47
|
+
"""
|
48
|
+
by default you can use `encoding_name` to be one of,
|
49
|
+
['gpt2', 'r50k_base', 'p50k_base', 'p50k_edit', 'cl100k_base', 'o200k_base']
|
50
|
+
|
51
|
+
or you can use `encoding_model` that tiktoken supports in their mapping such as "gpt-4o"
|
52
|
+
"""
|
53
|
+
if not self.tokenizer:
|
54
|
+
return TokenizeUtil.tokenize(
|
55
|
+
text,
|
56
|
+
encoding_model=self.imodel.iModel_name,
|
57
|
+
encoding_name=encoding_name,
|
58
|
+
return_byte=return_byte,
|
59
|
+
)
|
60
|
+
|
61
|
+
if hasattr(self.tokenizer, "tokenize"):
|
62
|
+
return self.tokenizer.tokenize(text, **kwargs)
|
63
|
+
|
64
|
+
return self.tokenizer(text, **kwargs)
|
65
|
+
|
66
|
+
def split(
|
67
|
+
self,
|
68
|
+
text,
|
69
|
+
chunk_size=None,
|
70
|
+
overlap=None,
|
71
|
+
threshold=None,
|
72
|
+
by_chars=False,
|
73
|
+
return_tokens=False,
|
74
|
+
return_byte=False,
|
75
|
+
**kwargs,
|
76
|
+
):
|
77
|
+
if not self.splitter:
|
78
|
+
splitter = (
|
79
|
+
TokenizeUtil.chunk_by_chars
|
80
|
+
if by_chars
|
81
|
+
else TokenizeUtil.chunk_by_tokens
|
82
|
+
)
|
83
|
+
return splitter(
|
84
|
+
text,
|
85
|
+
chunk_size or self.chunk_size,
|
86
|
+
overlap or self.split_overlap,
|
87
|
+
threshold or self.split_threshold,
|
88
|
+
return_tokens=return_tokens,
|
89
|
+
return_byte=return_byte,
|
90
|
+
)
|
91
|
+
|
92
|
+
a = [
|
93
|
+
getattr(self.splitter, i, None)
|
94
|
+
for i in ["split", "chunk", "segment"]
|
95
|
+
if i is not None
|
96
|
+
][0]
|
97
|
+
a = getattr(self.splitter, a)
|
98
|
+
return a(text, **kwargs)
|
99
|
+
|
100
|
+
async def rank_by_pplex(
|
101
|
+
self, items: list, initial_text=None, cumulative=False, n_samples=None, **kwargs
|
102
|
+
):
|
103
|
+
"""
|
104
|
+
rank a list of items according to their perplexity
|
105
|
+
an item can be a single token or a list of tokens
|
106
|
+
|
107
|
+
kwargs: additional arguments to pass to the model
|
108
|
+
"""
|
109
|
+
|
110
|
+
async def _get_item_perplexity(item):
|
111
|
+
item = item if isinstance(item, list) else [item]
|
112
|
+
item = (
|
113
|
+
item[: self.max_tokens_per_sample]
|
114
|
+
if len(item) > self.max_tokens_per_sample
|
115
|
+
else item
|
116
|
+
)
|
117
|
+
return await self.imodel.compute_perplexity(
|
118
|
+
initial_context=initial_text,
|
119
|
+
tokens=item,
|
120
|
+
n_samples=n_samples or self.n_samples,
|
121
|
+
system_msg=self.system_msg,
|
122
|
+
**kwargs,
|
123
|
+
)
|
124
|
+
|
125
|
+
if not isinstance(items, list):
|
126
|
+
items = self.tokenize(items)
|
127
|
+
|
128
|
+
if len(items) == 1:
|
129
|
+
return [items] # no need to rank a single item
|
130
|
+
|
131
|
+
_segments = []
|
132
|
+
_context = initial_text or ""
|
133
|
+
_task = []
|
134
|
+
|
135
|
+
if cumulative:
|
136
|
+
for i in items:
|
137
|
+
if isinstance(i, list):
|
138
|
+
_context += " " + " ".join(i).strip()
|
139
|
+
else:
|
140
|
+
_context += " " + i.strip()
|
141
|
+
|
142
|
+
_segments.append(_context)
|
143
|
+
else:
|
144
|
+
_segments = items
|
145
|
+
|
146
|
+
for i in _segments:
|
147
|
+
_task.append(asyncio.create_task(_get_item_perplexity(i)))
|
148
|
+
|
149
|
+
results = await asyncio.gather(*_task)
|
150
|
+
results = [(item, pplex) for item, pplex in zip(items, results)]
|
151
|
+
return sorted(results, key=lambda x: x[1]["logprobs"], reverse=True)
|
152
|
+
|
153
|
+
async def compress(
|
154
|
+
self,
|
155
|
+
text,
|
156
|
+
target_ratio=None,
|
157
|
+
initial_text=None,
|
158
|
+
cumulative=False,
|
159
|
+
split_kwargs=None,
|
160
|
+
split_overlap=None,
|
161
|
+
split_threshold=None,
|
162
|
+
rank_by="perplexity",
|
163
|
+
min_compression_score=None,
|
164
|
+
verbose=True,
|
165
|
+
**kwargs,
|
166
|
+
):
|
167
|
+
start = time()
|
168
|
+
if split_kwargs is None:
|
169
|
+
split_kwargs = {}
|
170
|
+
split_kwargs["chunk_size"] = self.max_tokens_per_sample
|
171
|
+
split_kwargs["overlap"] = split_overlap or 0
|
172
|
+
split_kwargs["threshold"] = split_threshold or 0
|
173
|
+
|
174
|
+
len_tokens = len(self.tokenize(text))
|
175
|
+
|
176
|
+
items = self.split(text, return_tokens=True, **split_kwargs)
|
177
|
+
|
178
|
+
if rank_by == "perplexity":
|
179
|
+
ranked_items = await self.rank_by_pplex(
|
180
|
+
items=items, initial_text=initial_text, cumulative=cumulative, **kwargs
|
181
|
+
)
|
182
|
+
|
183
|
+
prompt_tokens = sum([i[1]["num_prompt_tokens"] for i in ranked_items])
|
184
|
+
|
185
|
+
num_completion_tokens = sum(
|
186
|
+
[i[1]["num_completion_tokens"] for i in ranked_items]
|
187
|
+
)
|
188
|
+
|
189
|
+
price = (
|
190
|
+
prompt_tokens * 0.5 / 1000000 + num_completion_tokens * 1.5 / 1000000
|
191
|
+
)
|
192
|
+
|
193
|
+
selected_items = self.select_by_pplex(
|
194
|
+
ranked_items=ranked_items,
|
195
|
+
target_compression_ratio=target_ratio or self.target_ratio,
|
196
|
+
original_length=len_tokens,
|
197
|
+
min_pplex=min_compression_score or self.min_compression_score,
|
198
|
+
)
|
199
|
+
|
200
|
+
if verbose:
|
201
|
+
msg = ""
|
202
|
+
msg += f"Original Token number: {len_tokens}\n"
|
203
|
+
|
204
|
+
def _f(i):
|
205
|
+
if isinstance(i, str):
|
206
|
+
i = self.tokenize(i)
|
207
|
+
|
208
|
+
if isinstance(i, list):
|
209
|
+
return len(to_list(i, dropna=True, flatten=True))
|
210
|
+
|
211
|
+
len_ = sum([_f(i) for i in selected_items])
|
212
|
+
msg += f"Selected Token number: {len_}\n"
|
213
|
+
msg += f"Token Compression Ratio: {len_ / len_tokens:.03f}\n"
|
214
|
+
msg += f"Compression Time: {time() - start:.04f} seconds\n"
|
215
|
+
msg += f"Compression Model: {self.imodel.iModel_name}\n"
|
216
|
+
msg += f"Compression Method: {rank_by}\n"
|
217
|
+
msg += f"Compression Usage: ${price:.05f}\n"
|
218
|
+
print(msg)
|
219
|
+
|
220
|
+
a = "".join([i.strip() for i in selected_items]).strip()
|
221
|
+
a = a.replace("\n\n", "")
|
222
|
+
return a
|
223
|
+
|
224
|
+
raise ValueError(f"Ranking method {rank_by} is not supported")
|
225
|
+
|
226
|
+
def select_by_pplex(
|
227
|
+
self, ranked_items, target_compression_ratio, original_length, min_pplex=None
|
228
|
+
):
|
229
|
+
min_pplex = min_pplex or 0
|
230
|
+
|
231
|
+
desired_length = int(original_length * target_compression_ratio)
|
232
|
+
|
233
|
+
items = []
|
234
|
+
current_length = 0
|
235
|
+
|
236
|
+
for item, info in ranked_items:
|
237
|
+
if info["perplexity"] > min_pplex:
|
238
|
+
item = self.tokenize(item) if isinstance(item, str) else item
|
239
|
+
item = item if isinstance(item, list) else [item]
|
240
|
+
item = to_list(item, dropna=True, flatten=True)
|
241
|
+
if current_length + len(item) > desired_length:
|
242
|
+
break
|
243
|
+
else:
|
244
|
+
current_length += len(item)
|
245
|
+
items.append("".join(item))
|
246
|
+
|
247
|
+
return items
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# from lionagi.core.collections import iModel
|
2
|
+
# from .base import TokenCompressor
|
3
|
+
|
4
|
+
|
5
|
+
# class LLMSummarizer(TokenCompressor):
|
6
|
+
|
7
|
+
# def __init__(
|
8
|
+
# self, imodel: iModel = None, system_msg=None, tokenizer=None, splitter=None,
|
9
|
+
# max_tokens=25, target_ratio=0.3
|
10
|
+
# ):
|
11
|
+
# imodel = imodel or iModel(model="gpt-3.5-turbo", max_tokens=max_tokens)
|
12
|
+
# super().__init__(imodel=imodel, tokenizer=tokenizer, splitter=splitter)
|
13
|
+
# self.system_msg = (
|
14
|
+
# system_msg
|
15
|
+
# or "Summarize the following sentence to be concise and informative:"
|
16
|
+
# )
|
17
|
+
# self.target_ratio = target_ratio
|
18
|
+
|
19
|
+
# async def summarize_sentence(self, sentence, **kwargs):
|
20
|
+
# messages = [
|
21
|
+
# {"role": "system", "content": self.system_msg},
|
22
|
+
# {"role": "user", "content": sentence},
|
23
|
+
# ]
|
24
|
+
# response = await self.imodel.call_chat_completion(messages, **kwargs)
|
25
|
+
# return response["choices"][0]["message"]["content"]
|
26
|
+
|
27
|
+
# def tokenize(self, text):
|
28
|
+
# tokenize_func = self.tokenizer or tokenize
|
29
|
+
# return tokenize_func(text)
|
30
|
+
|
31
|
+
# def split(self, text):
|
32
|
+
# split_func = self.splitter or split_into_segments
|
33
|
+
# return split_func(text)
|
34
|
+
|
35
|
+
# # Function to enforce maximum sentence length
|
36
|
+
# def enforce_max_sentence_length(self, sentence, max_words=25):
|
37
|
+
# words = self.tokenize(sentence)
|
38
|
+
# if len(words) > max_words:
|
39
|
+
# sentence = ' '.join(words[:max_words])
|
40
|
+
# return sentence
|
41
|
+
|
42
|
+
# async def summarize_text(self, text, max_length_per_sentence=25, target_ratio=None, **kwargs):
|
43
|
+
# sentences = self.split(text)
|
44
|
+
# summarized = await alcall(
|
45
|
+
# sentences, self.summarize_sentence, **kwargs
|
46
|
+
# )
|
47
|
+
# summarized = [
|
48
|
+
# self.enforce_max_sentence_length(sentence, max_length_per_sentence)
|
49
|
+
# for sentence in summarized
|
50
|
+
# ]
|
51
|
+
|
52
|
+
# original_length = len(self.tokenize(text))
|
53
|
+
# summarized_length = len(self.tokenize(' '.join(summarized)))
|
54
|
+
# current_ratio = summarized_length / original_length
|
55
|
+
|
56
|
+
# target_ratio = target_ratio or self.target_ratio
|
57
|
+
# if current_ratio > target_ratio:
|
58
|
+
# words_to_remove = int((current_ratio - target_ratio) * original_length)
|
59
|
+
# return ' '.join(summarized[:-words_to_remove])
|
60
|
+
|
61
|
+
# return ' '.join(summarized)
|