lionagi 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/__init__.py +60 -5
- lionagi/core/__init__.py +0 -25
- lionagi/core/_setting/_setting.py +59 -0
- lionagi/core/action/__init__.py +14 -0
- lionagi/core/action/function_calling.py +136 -0
- lionagi/core/action/manual.py +1 -0
- lionagi/core/action/node.py +109 -0
- lionagi/core/action/tool.py +114 -0
- lionagi/core/action/tool_manager.py +356 -0
- lionagi/core/agent/base_agent.py +27 -13
- lionagi/core/agent/eval/evaluator.py +1 -0
- lionagi/core/agent/eval/vote.py +40 -0
- lionagi/core/agent/learn/learner.py +59 -0
- lionagi/core/agent/plan/unit_template.py +1 -0
- lionagi/core/collections/__init__.py +17 -0
- lionagi/core/{generic/data_logger.py → collections/_logger.py} +69 -55
- lionagi/core/collections/abc/__init__.py +53 -0
- lionagi/core/collections/abc/component.py +615 -0
- lionagi/core/collections/abc/concepts.py +297 -0
- lionagi/core/collections/abc/exceptions.py +150 -0
- lionagi/core/collections/abc/util.py +45 -0
- lionagi/core/collections/exchange.py +161 -0
- lionagi/core/collections/flow.py +426 -0
- lionagi/core/collections/model.py +419 -0
- lionagi/core/collections/pile.py +913 -0
- lionagi/core/collections/progression.py +236 -0
- lionagi/core/collections/util.py +64 -0
- lionagi/core/director/direct.py +314 -0
- lionagi/core/director/director.py +2 -0
- lionagi/core/{execute/branch_executor.py → engine/branch_engine.py} +134 -97
- lionagi/core/{execute/instruction_map_executor.py → engine/instruction_map_engine.py} +80 -55
- lionagi/{experimental/directive/evaluator → core/engine}/script_engine.py +17 -1
- lionagi/core/executor/base_executor.py +90 -0
- lionagi/core/{execute/structure_executor.py → executor/graph_executor.py} +62 -66
- lionagi/core/{execute → executor}/neo4j_executor.py +70 -67
- lionagi/core/generic/__init__.py +3 -33
- lionagi/core/generic/edge.py +29 -79
- lionagi/core/generic/edge_condition.py +16 -0
- lionagi/core/generic/graph.py +236 -0
- lionagi/core/generic/hyperedge.py +1 -0
- lionagi/core/generic/node.py +156 -221
- lionagi/core/generic/tree.py +48 -0
- lionagi/core/generic/tree_node.py +79 -0
- lionagi/core/mail/__init__.py +12 -0
- lionagi/core/mail/mail.py +25 -0
- lionagi/core/mail/mail_manager.py +139 -58
- lionagi/core/mail/package.py +45 -0
- lionagi/core/mail/start_mail.py +36 -0
- lionagi/core/message/__init__.py +19 -0
- lionagi/core/message/action_request.py +133 -0
- lionagi/core/message/action_response.py +135 -0
- lionagi/core/message/assistant_response.py +95 -0
- lionagi/core/message/instruction.py +234 -0
- lionagi/core/message/message.py +101 -0
- lionagi/core/message/system.py +86 -0
- lionagi/core/message/util.py +283 -0
- lionagi/core/report/__init__.py +4 -0
- lionagi/core/report/base.py +217 -0
- lionagi/core/report/form.py +231 -0
- lionagi/core/report/report.py +166 -0
- lionagi/core/report/util.py +28 -0
- lionagi/core/rule/_default.py +16 -0
- lionagi/core/rule/action.py +99 -0
- lionagi/core/rule/base.py +238 -0
- lionagi/core/rule/boolean.py +56 -0
- lionagi/core/rule/choice.py +47 -0
- lionagi/core/rule/mapping.py +96 -0
- lionagi/core/rule/number.py +71 -0
- lionagi/core/rule/rulebook.py +109 -0
- lionagi/core/rule/string.py +52 -0
- lionagi/core/rule/util.py +35 -0
- lionagi/core/session/branch.py +431 -0
- lionagi/core/session/directive_mixin.py +287 -0
- lionagi/core/session/session.py +229 -903
- lionagi/core/structure/__init__.py +1 -0
- lionagi/core/structure/chain.py +1 -0
- lionagi/core/structure/forest.py +1 -0
- lionagi/core/structure/graph.py +1 -0
- lionagi/core/structure/tree.py +1 -0
- lionagi/core/unit/__init__.py +5 -0
- lionagi/core/unit/parallel_unit.py +245 -0
- lionagi/core/unit/template/action.py +81 -0
- lionagi/core/unit/template/base.py +51 -0
- lionagi/core/unit/template/plan.py +84 -0
- lionagi/core/unit/template/predict.py +109 -0
- lionagi/core/unit/template/score.py +124 -0
- lionagi/core/unit/template/select.py +104 -0
- lionagi/core/unit/unit.py +362 -0
- lionagi/core/unit/unit_form.py +305 -0
- lionagi/core/unit/unit_mixin.py +1168 -0
- lionagi/core/unit/util.py +71 -0
- lionagi/core/validator/validator.py +364 -0
- lionagi/core/work/work.py +76 -0
- lionagi/core/work/work_function.py +101 -0
- lionagi/core/work/work_queue.py +103 -0
- lionagi/core/work/worker.py +258 -0
- lionagi/core/work/worklog.py +120 -0
- lionagi/experimental/compressor/base.py +46 -0
- lionagi/experimental/compressor/llm_compressor.py +247 -0
- lionagi/experimental/compressor/llm_summarizer.py +61 -0
- lionagi/experimental/compressor/util.py +70 -0
- lionagi/experimental/directive/__init__.py +19 -0
- lionagi/experimental/directive/parser/base_parser.py +69 -2
- lionagi/experimental/directive/{template_ → template}/base_template.py +17 -1
- lionagi/{libs/ln_tokenizer.py → experimental/directive/tokenizer.py} +16 -0
- lionagi/experimental/{directive/evaluator → evaluator}/ast_evaluator.py +16 -0
- lionagi/experimental/{directive/evaluator → evaluator}/base_evaluator.py +16 -0
- lionagi/experimental/knowledge/base.py +10 -0
- lionagi/experimental/memory/__init__.py +0 -0
- lionagi/experimental/strategies/__init__.py +0 -0
- lionagi/experimental/strategies/base.py +1 -0
- lionagi/integrations/bridge/langchain_/documents.py +4 -0
- lionagi/integrations/bridge/llamaindex_/index.py +30 -0
- lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +6 -0
- lionagi/integrations/chunker/chunk.py +161 -24
- lionagi/integrations/config/oai_configs.py +34 -3
- lionagi/integrations/config/openrouter_configs.py +14 -2
- lionagi/integrations/loader/load.py +122 -21
- lionagi/integrations/loader/load_util.py +6 -77
- lionagi/integrations/provider/_mapping.py +46 -0
- lionagi/integrations/provider/litellm.py +2 -1
- lionagi/integrations/provider/mlx_service.py +16 -9
- lionagi/integrations/provider/oai.py +91 -4
- lionagi/integrations/provider/ollama.py +6 -5
- lionagi/integrations/provider/openrouter.py +115 -8
- lionagi/integrations/provider/services.py +2 -2
- lionagi/integrations/provider/transformers.py +18 -22
- lionagi/integrations/storage/__init__.py +3 -3
- lionagi/integrations/storage/neo4j.py +52 -60
- lionagi/integrations/storage/storage_util.py +44 -46
- lionagi/integrations/storage/structure_excel.py +43 -26
- lionagi/integrations/storage/to_excel.py +11 -4
- lionagi/libs/__init__.py +22 -1
- lionagi/libs/ln_api.py +75 -20
- lionagi/libs/ln_context.py +37 -0
- lionagi/libs/ln_convert.py +21 -9
- lionagi/libs/ln_func_call.py +69 -28
- lionagi/libs/ln_image.py +107 -0
- lionagi/libs/ln_nested.py +26 -11
- lionagi/libs/ln_parse.py +82 -23
- lionagi/libs/ln_queue.py +16 -0
- lionagi/libs/ln_tokenize.py +164 -0
- lionagi/libs/ln_validate.py +16 -0
- lionagi/libs/special_tokens.py +172 -0
- lionagi/libs/sys_util.py +95 -24
- lionagi/lions/coder/code_form.py +13 -0
- lionagi/lions/coder/coder.py +50 -3
- lionagi/lions/coder/util.py +30 -25
- lionagi/tests/libs/test_func_call.py +23 -21
- lionagi/tests/libs/test_nested.py +36 -21
- lionagi/tests/libs/test_parse.py +1 -1
- lionagi/tests/test_core/collections/__init__.py +0 -0
- lionagi/tests/test_core/collections/test_component.py +206 -0
- lionagi/tests/test_core/collections/test_exchange.py +138 -0
- lionagi/tests/test_core/collections/test_flow.py +145 -0
- lionagi/tests/test_core/collections/test_pile.py +171 -0
- lionagi/tests/test_core/collections/test_progression.py +129 -0
- lionagi/tests/test_core/generic/test_edge.py +67 -0
- lionagi/tests/test_core/generic/test_graph.py +96 -0
- lionagi/tests/test_core/generic/test_node.py +106 -0
- lionagi/tests/test_core/generic/test_tree_node.py +73 -0
- lionagi/tests/test_core/test_branch.py +115 -294
- lionagi/tests/test_core/test_form.py +46 -0
- lionagi/tests/test_core/test_report.py +105 -0
- lionagi/tests/test_core/test_validator.py +111 -0
- lionagi/version.py +1 -1
- lionagi-0.2.1.dist-info/LICENSE +202 -0
- lionagi-0.2.1.dist-info/METADATA +272 -0
- lionagi-0.2.1.dist-info/RECORD +240 -0
- lionagi/core/branch/base.py +0 -653
- lionagi/core/branch/branch.py +0 -474
- lionagi/core/branch/flow_mixin.py +0 -96
- lionagi/core/branch/util.py +0 -323
- lionagi/core/direct/__init__.py +0 -19
- lionagi/core/direct/cot.py +0 -123
- lionagi/core/direct/plan.py +0 -164
- lionagi/core/direct/predict.py +0 -166
- lionagi/core/direct/react.py +0 -171
- lionagi/core/direct/score.py +0 -279
- lionagi/core/direct/select.py +0 -170
- lionagi/core/direct/sentiment.py +0 -1
- lionagi/core/direct/utils.py +0 -110
- lionagi/core/direct/vote.py +0 -64
- lionagi/core/execute/base_executor.py +0 -47
- lionagi/core/flow/baseflow.py +0 -23
- lionagi/core/flow/monoflow/ReAct.py +0 -240
- lionagi/core/flow/monoflow/__init__.py +0 -9
- lionagi/core/flow/monoflow/chat.py +0 -95
- lionagi/core/flow/monoflow/chat_mixin.py +0 -253
- lionagi/core/flow/monoflow/followup.py +0 -215
- lionagi/core/flow/polyflow/__init__.py +0 -1
- lionagi/core/flow/polyflow/chat.py +0 -251
- lionagi/core/form/action_form.py +0 -26
- lionagi/core/form/field_validator.py +0 -287
- lionagi/core/form/form.py +0 -302
- lionagi/core/form/mixin.py +0 -214
- lionagi/core/form/scored_form.py +0 -13
- lionagi/core/generic/action.py +0 -26
- lionagi/core/generic/component.py +0 -532
- lionagi/core/generic/condition.py +0 -46
- lionagi/core/generic/mail.py +0 -90
- lionagi/core/generic/mailbox.py +0 -36
- lionagi/core/generic/relation.py +0 -70
- lionagi/core/generic/signal.py +0 -22
- lionagi/core/generic/structure.py +0 -362
- lionagi/core/generic/transfer.py +0 -20
- lionagi/core/generic/work.py +0 -40
- lionagi/core/graph/graph.py +0 -126
- lionagi/core/graph/tree.py +0 -190
- lionagi/core/mail/schema.py +0 -63
- lionagi/core/messages/schema.py +0 -325
- lionagi/core/tool/__init__.py +0 -5
- lionagi/core/tool/tool.py +0 -28
- lionagi/core/tool/tool_manager.py +0 -283
- lionagi/experimental/report/form.py +0 -64
- lionagi/experimental/report/report.py +0 -138
- lionagi/experimental/report/util.py +0 -47
- lionagi/experimental/tool/function_calling.py +0 -43
- lionagi/experimental/tool/manual.py +0 -66
- lionagi/experimental/tool/schema.py +0 -59
- lionagi/experimental/tool/tool_manager.py +0 -138
- lionagi/experimental/tool/util.py +0 -16
- lionagi/experimental/validator/rule.py +0 -139
- lionagi/experimental/validator/validator.py +0 -56
- lionagi/experimental/work/__init__.py +0 -10
- lionagi/experimental/work/async_queue.py +0 -54
- lionagi/experimental/work/schema.py +0 -73
- lionagi/experimental/work/work_function.py +0 -67
- lionagi/experimental/work/worker.py +0 -56
- lionagi/experimental/work2/form.py +0 -371
- lionagi/experimental/work2/report.py +0 -289
- lionagi/experimental/work2/schema.py +0 -30
- lionagi/experimental/work2/tests.py +0 -72
- lionagi/experimental/work2/work_function.py +0 -89
- lionagi/experimental/work2/worker.py +0 -12
- lionagi/integrations/bridge/llamaindex_/get_index.py +0 -294
- lionagi/tests/test_core/generic/test_component.py +0 -89
- lionagi/tests/test_core/test_base_branch.py +0 -426
- lionagi/tests/test_core/test_chat_flow.py +0 -63
- lionagi/tests/test_core/test_mail_manager.py +0 -75
- lionagi/tests/test_core/test_prompts.py +0 -51
- lionagi/tests/test_core/test_session.py +0 -254
- lionagi/tests/test_core/test_session_base_util.py +0 -313
- lionagi/tests/test_core/test_tool_manager.py +0 -95
- lionagi-0.1.2.dist-info/LICENSE +0 -9
- lionagi-0.1.2.dist-info/METADATA +0 -174
- lionagi-0.1.2.dist-info/RECORD +0 -206
- /lionagi/core/{branch → _setting}/__init__.py +0 -0
- /lionagi/core/{execute → agent/eval}/__init__.py +0 -0
- /lionagi/core/{flow → agent/learn}/__init__.py +0 -0
- /lionagi/core/{form → agent/plan}/__init__.py +0 -0
- /lionagi/core/{branch/executable_branch.py → agent/plan/plan.py} +0 -0
- /lionagi/core/{graph → director}/__init__.py +0 -0
- /lionagi/core/{messages → engine}/__init__.py +0 -0
- /lionagi/{experimental/directive/evaluator → core/engine}/sandbox_.py +0 -0
- /lionagi/{experimental/directive/evaluator → core/executor}/__init__.py +0 -0
- /lionagi/{experimental/directive/template_ → core/rule}/__init__.py +0 -0
- /lionagi/{experimental/report → core/unit/template}/__init__.py +0 -0
- /lionagi/{experimental/tool → core/validator}/__init__.py +0 -0
- /lionagi/{experimental/validator → core/work}/__init__.py +0 -0
- /lionagi/experimental/{work2 → compressor}/__init__.py +0 -0
- /lionagi/{core/flow/mono_chat_mixin.py → experimental/directive/template/__init__.py} +0 -0
- /lionagi/experimental/directive/{schema.py → template/schema.py} +0 -0
- /lionagi/experimental/{work2/util.py → evaluator/__init__.py} +0 -0
- /lionagi/experimental/{work2/work.py → knowledge/__init__.py} +0 -0
- /lionagi/{tests/libs/test_async.py → experimental/knowledge/graph.py} +0 -0
- {lionagi-0.1.2.dist-info → lionagi-0.2.1.dist-info}/WHEEL +0 -0
- {lionagi-0.1.2.dist-info → lionagi-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,247 @@
|
|
1
|
+
import asyncio
|
2
|
+
from lionagi import alcall
|
3
|
+
from lionagi.libs.ln_convert import to_list
|
4
|
+
import numpy as np
|
5
|
+
from lionagi.core.collections import iModel
|
6
|
+
from .base import TokenCompressor
|
7
|
+
from lionagi.libs.ln_tokenize import TokenizeUtil
|
8
|
+
from time import time
|
9
|
+
|
10
|
+
# inspired by LLMLingua, MIT License, Copyright (c) Microsoft Corporation.
|
11
|
+
# https://github.com/microsoft/LLMLingua
|
12
|
+
|
13
|
+
|
14
|
+
class LLMCompressor(TokenCompressor):
|
15
|
+
|
16
|
+
def __init__(
|
17
|
+
self,
|
18
|
+
imodel: iModel = None,
|
19
|
+
system_msg=None,
|
20
|
+
tokenizer=None, # must be a callable or object with a tokenize method
|
21
|
+
splitter=None, # must be a callable or object with a split/chunk/segment method
|
22
|
+
target_ratio=0.2,
|
23
|
+
n_samples=5, # the cumulative samples to take in each perplexity calculation
|
24
|
+
chunk_size=64,
|
25
|
+
max_tokens_per_sample=80,
|
26
|
+
min_compression_score=0, # (0-1) the minimum score to consider for compression, 0 means all
|
27
|
+
split_overlap=0,
|
28
|
+
split_threshold=0,
|
29
|
+
verbose=True,
|
30
|
+
):
|
31
|
+
imodel = imodel or iModel(model="gpt-3.5-turbo", temperature=0.3)
|
32
|
+
super().__init__(imodel=imodel, tokenizer=tokenizer, splitter=splitter)
|
33
|
+
self.system_msg = (
|
34
|
+
system_msg
|
35
|
+
or "Concisely summarize and compress the information for storage:"
|
36
|
+
)
|
37
|
+
self.target_ratio = target_ratio
|
38
|
+
self.n_samples = n_samples
|
39
|
+
self.chunk_size = chunk_size
|
40
|
+
self.max_tokens_per_sample = max_tokens_per_sample
|
41
|
+
self.min_compression_score = min_compression_score
|
42
|
+
self.verbose = verbose
|
43
|
+
self.split_overlap = split_overlap
|
44
|
+
self.split_threshold = split_threshold
|
45
|
+
|
46
|
+
def tokenize(self, text, encoding_name=None, return_byte=False, **kwargs):
|
47
|
+
"""
|
48
|
+
by default you can use `encoding_name` to be one of,
|
49
|
+
['gpt2', 'r50k_base', 'p50k_base', 'p50k_edit', 'cl100k_base', 'o200k_base']
|
50
|
+
|
51
|
+
or you can use `encoding_model` that tiktoken supports in their mapping such as "gpt-4o"
|
52
|
+
"""
|
53
|
+
if not self.tokenizer:
|
54
|
+
return TokenizeUtil.tokenize(
|
55
|
+
text,
|
56
|
+
encoding_model=self.imodel.iModel_name,
|
57
|
+
encoding_name=encoding_name,
|
58
|
+
return_byte=return_byte,
|
59
|
+
)
|
60
|
+
|
61
|
+
if hasattr(self.tokenizer, "tokenize"):
|
62
|
+
return self.tokenizer.tokenize(text, **kwargs)
|
63
|
+
|
64
|
+
return self.tokenizer(text, **kwargs)
|
65
|
+
|
66
|
+
def split(
|
67
|
+
self,
|
68
|
+
text,
|
69
|
+
chunk_size=None,
|
70
|
+
overlap=None,
|
71
|
+
threshold=None,
|
72
|
+
by_chars=False,
|
73
|
+
return_tokens=False,
|
74
|
+
return_byte=False,
|
75
|
+
**kwargs,
|
76
|
+
):
|
77
|
+
if not self.splitter:
|
78
|
+
splitter = (
|
79
|
+
TokenizeUtil.chunk_by_chars
|
80
|
+
if by_chars
|
81
|
+
else TokenizeUtil.chunk_by_tokens
|
82
|
+
)
|
83
|
+
return splitter(
|
84
|
+
text,
|
85
|
+
chunk_size or self.chunk_size,
|
86
|
+
overlap or self.split_overlap,
|
87
|
+
threshold or self.split_threshold,
|
88
|
+
return_tokens=return_tokens,
|
89
|
+
return_byte=return_byte,
|
90
|
+
)
|
91
|
+
|
92
|
+
a = [
|
93
|
+
getattr(self.splitter, i, None)
|
94
|
+
for i in ["split", "chunk", "segment"]
|
95
|
+
if i is not None
|
96
|
+
][0]
|
97
|
+
a = getattr(self.splitter, a)
|
98
|
+
return a(text, **kwargs)
|
99
|
+
|
100
|
+
async def rank_by_pplex(
|
101
|
+
self, items: list, initial_text=None, cumulative=False, n_samples=None, **kwargs
|
102
|
+
):
|
103
|
+
"""
|
104
|
+
rank a list of items according to their perplexity
|
105
|
+
an item can be a single token or a list of tokens
|
106
|
+
|
107
|
+
kwargs: additional arguments to pass to the model
|
108
|
+
"""
|
109
|
+
|
110
|
+
async def _get_item_perplexity(item):
|
111
|
+
item = item if isinstance(item, list) else [item]
|
112
|
+
item = (
|
113
|
+
item[: self.max_tokens_per_sample]
|
114
|
+
if len(item) > self.max_tokens_per_sample
|
115
|
+
else item
|
116
|
+
)
|
117
|
+
return await self.imodel.compute_perplexity(
|
118
|
+
initial_context=initial_text,
|
119
|
+
tokens=item,
|
120
|
+
n_samples=n_samples or self.n_samples,
|
121
|
+
system_msg=self.system_msg,
|
122
|
+
**kwargs,
|
123
|
+
)
|
124
|
+
|
125
|
+
if not isinstance(items, list):
|
126
|
+
items = self.tokenize(items)
|
127
|
+
|
128
|
+
if len(items) == 1:
|
129
|
+
return [items] # no need to rank a single item
|
130
|
+
|
131
|
+
_segments = []
|
132
|
+
_context = initial_text or ""
|
133
|
+
_task = []
|
134
|
+
|
135
|
+
if cumulative:
|
136
|
+
for i in items:
|
137
|
+
if isinstance(i, list):
|
138
|
+
_context += " " + " ".join(i).strip()
|
139
|
+
else:
|
140
|
+
_context += " " + i.strip()
|
141
|
+
|
142
|
+
_segments.append(_context)
|
143
|
+
else:
|
144
|
+
_segments = items
|
145
|
+
|
146
|
+
for i in _segments:
|
147
|
+
_task.append(asyncio.create_task(_get_item_perplexity(i)))
|
148
|
+
|
149
|
+
results = await asyncio.gather(*_task)
|
150
|
+
results = [(item, pplex) for item, pplex in zip(items, results)]
|
151
|
+
return sorted(results, key=lambda x: x[1]["logprobs"], reverse=True)
|
152
|
+
|
153
|
+
async def compress(
|
154
|
+
self,
|
155
|
+
text,
|
156
|
+
target_ratio=None,
|
157
|
+
initial_text=None,
|
158
|
+
cumulative=False,
|
159
|
+
split_kwargs=None,
|
160
|
+
split_overlap=None,
|
161
|
+
split_threshold=None,
|
162
|
+
rank_by="perplexity",
|
163
|
+
min_compression_score=None,
|
164
|
+
verbose=True,
|
165
|
+
**kwargs,
|
166
|
+
):
|
167
|
+
start = time()
|
168
|
+
if split_kwargs is None:
|
169
|
+
split_kwargs = {}
|
170
|
+
split_kwargs["chunk_size"] = self.max_tokens_per_sample
|
171
|
+
split_kwargs["overlap"] = split_overlap or 0
|
172
|
+
split_kwargs["threshold"] = split_threshold or 0
|
173
|
+
|
174
|
+
len_tokens = len(self.tokenize(text))
|
175
|
+
|
176
|
+
items = self.split(text, return_tokens=True, **split_kwargs)
|
177
|
+
|
178
|
+
if rank_by == "perplexity":
|
179
|
+
ranked_items = await self.rank_by_pplex(
|
180
|
+
items=items, initial_text=initial_text, cumulative=cumulative, **kwargs
|
181
|
+
)
|
182
|
+
|
183
|
+
prompt_tokens = sum([i[1]["num_prompt_tokens"] for i in ranked_items])
|
184
|
+
|
185
|
+
num_completion_tokens = sum(
|
186
|
+
[i[1]["num_completion_tokens"] for i in ranked_items]
|
187
|
+
)
|
188
|
+
|
189
|
+
price = (
|
190
|
+
prompt_tokens * 0.5 / 1000000 + num_completion_tokens * 1.5 / 1000000
|
191
|
+
)
|
192
|
+
|
193
|
+
selected_items = self.select_by_pplex(
|
194
|
+
ranked_items=ranked_items,
|
195
|
+
target_compression_ratio=target_ratio or self.target_ratio,
|
196
|
+
original_length=len_tokens,
|
197
|
+
min_pplex=min_compression_score or self.min_compression_score,
|
198
|
+
)
|
199
|
+
|
200
|
+
if verbose:
|
201
|
+
msg = ""
|
202
|
+
msg += f"Original Token number: {len_tokens}\n"
|
203
|
+
|
204
|
+
def _f(i):
|
205
|
+
if isinstance(i, str):
|
206
|
+
i = self.tokenize(i)
|
207
|
+
|
208
|
+
if isinstance(i, list):
|
209
|
+
return len(to_list(i, dropna=True, flatten=True))
|
210
|
+
|
211
|
+
len_ = sum([_f(i) for i in selected_items])
|
212
|
+
msg += f"Selected Token number: {len_}\n"
|
213
|
+
msg += f"Token Compression Ratio: {len_ / len_tokens:.03f}\n"
|
214
|
+
msg += f"Compression Time: {time() - start:.04f} seconds\n"
|
215
|
+
msg += f"Compression Model: {self.imodel.iModel_name}\n"
|
216
|
+
msg += f"Compression Method: {rank_by}\n"
|
217
|
+
msg += f"Compression Usage: ${price:.05f}\n"
|
218
|
+
print(msg)
|
219
|
+
|
220
|
+
a = "".join([i.strip() for i in selected_items]).strip()
|
221
|
+
a = a.replace("\n\n", "")
|
222
|
+
return a
|
223
|
+
|
224
|
+
raise ValueError(f"Ranking method {rank_by} is not supported")
|
225
|
+
|
226
|
+
def select_by_pplex(
|
227
|
+
self, ranked_items, target_compression_ratio, original_length, min_pplex=None
|
228
|
+
):
|
229
|
+
min_pplex = min_pplex or 0
|
230
|
+
|
231
|
+
desired_length = int(original_length * target_compression_ratio)
|
232
|
+
|
233
|
+
items = []
|
234
|
+
current_length = 0
|
235
|
+
|
236
|
+
for item, info in ranked_items:
|
237
|
+
if info["perplexity"] > min_pplex:
|
238
|
+
item = self.tokenize(item) if isinstance(item, str) else item
|
239
|
+
item = item if isinstance(item, list) else [item]
|
240
|
+
item = to_list(item, dropna=True, flatten=True)
|
241
|
+
if current_length + len(item) > desired_length:
|
242
|
+
break
|
243
|
+
else:
|
244
|
+
current_length += len(item)
|
245
|
+
items.append("".join(item))
|
246
|
+
|
247
|
+
return items
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# from lionagi.core.collections import iModel
|
2
|
+
# from .base import TokenCompressor
|
3
|
+
|
4
|
+
|
5
|
+
# class LLMSummarizer(TokenCompressor):
|
6
|
+
|
7
|
+
# def __init__(
|
8
|
+
# self, imodel: iModel = None, system_msg=None, tokenizer=None, splitter=None,
|
9
|
+
# max_tokens=25, target_ratio=0.3
|
10
|
+
# ):
|
11
|
+
# imodel = imodel or iModel(model="gpt-3.5-turbo", max_tokens=max_tokens)
|
12
|
+
# super().__init__(imodel=imodel, tokenizer=tokenizer, splitter=splitter)
|
13
|
+
# self.system_msg = (
|
14
|
+
# system_msg
|
15
|
+
# or "Summarize the following sentence to be concise and informative:"
|
16
|
+
# )
|
17
|
+
# self.target_ratio = target_ratio
|
18
|
+
|
19
|
+
# async def summarize_sentence(self, sentence, **kwargs):
|
20
|
+
# messages = [
|
21
|
+
# {"role": "system", "content": self.system_msg},
|
22
|
+
# {"role": "user", "content": sentence},
|
23
|
+
# ]
|
24
|
+
# response = await self.imodel.call_chat_completion(messages, **kwargs)
|
25
|
+
# return response["choices"][0]["message"]["content"]
|
26
|
+
|
27
|
+
# def tokenize(self, text):
|
28
|
+
# tokenize_func = self.tokenizer or tokenize
|
29
|
+
# return tokenize_func(text)
|
30
|
+
|
31
|
+
# def split(self, text):
|
32
|
+
# split_func = self.splitter or split_into_segments
|
33
|
+
# return split_func(text)
|
34
|
+
|
35
|
+
# # Function to enforce maximum sentence length
|
36
|
+
# def enforce_max_sentence_length(self, sentence, max_words=25):
|
37
|
+
# words = self.tokenize(sentence)
|
38
|
+
# if len(words) > max_words:
|
39
|
+
# sentence = ' '.join(words[:max_words])
|
40
|
+
# return sentence
|
41
|
+
|
42
|
+
# async def summarize_text(self, text, max_length_per_sentence=25, target_ratio=None, **kwargs):
|
43
|
+
# sentences = self.split(text)
|
44
|
+
# summarized = await alcall(
|
45
|
+
# sentences, self.summarize_sentence, **kwargs
|
46
|
+
# )
|
47
|
+
# summarized = [
|
48
|
+
# self.enforce_max_sentence_length(sentence, max_length_per_sentence)
|
49
|
+
# for sentence in summarized
|
50
|
+
# ]
|
51
|
+
|
52
|
+
# original_length = len(self.tokenize(text))
|
53
|
+
# summarized_length = len(self.tokenize(' '.join(summarized)))
|
54
|
+
# current_ratio = summarized_length / original_length
|
55
|
+
|
56
|
+
# target_ratio = target_ratio or self.target_ratio
|
57
|
+
# if current_ratio > target_ratio:
|
58
|
+
# words_to_remove = int((current_ratio - target_ratio) * original_length)
|
59
|
+
# return ' '.join(summarized[:-words_to_remove])
|
60
|
+
|
61
|
+
# return ' '.join(summarized)
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# import asyncio
|
2
|
+
# from lionagi import alcall
|
3
|
+
# from lionagi.libs.ln_convert import to_list
|
4
|
+
# import numpy as np
|
5
|
+
|
6
|
+
# def split_into_segments(text):
|
7
|
+
# segments = text.split(".") # Splitting by period followed by a space
|
8
|
+
# return [segment.strip() for segment in segments if segment]
|
9
|
+
|
10
|
+
# # Tokenize the segment
|
11
|
+
# def tokenize(segment):
|
12
|
+
# tokens = segment.split() # Simple space-based tokenization
|
13
|
+
# return tokens
|
14
|
+
|
15
|
+
# async def calculate_perplexity(system_msg: str, imodel, tokens, initial_context=None, **kwargs):
|
16
|
+
# _tasks = []
|
17
|
+
# _context = initial_context or ""
|
18
|
+
# for i in range(len(tokens)):
|
19
|
+
# _context += " " + tokens[i]
|
20
|
+
# messages = [
|
21
|
+
# {"role": "system", "content": system_msg},
|
22
|
+
# {"role": "user", "content": _context},
|
23
|
+
# ]
|
24
|
+
# task = asyncio.create_task(
|
25
|
+
# imodel.call_chat_completion(
|
26
|
+
# messages=messages, logprobs=True, max_tokens=1, **kwargs
|
27
|
+
# )
|
28
|
+
# )
|
29
|
+
# _tasks.append(task)
|
30
|
+
|
31
|
+
# results = await asyncio.gather(*_tasks)
|
32
|
+
# logprobs = [
|
33
|
+
# result[1]["choices"][0]["logprobs"]["content"] for result in results
|
34
|
+
# ]
|
35
|
+
# logprobs = to_list(logprobs, flatten=True, dropna=True)
|
36
|
+
# logprobs = [lprob_["logprob"] for lprob_ in logprobs]
|
37
|
+
# return np.exp(np.mean(logprobs))
|
38
|
+
|
39
|
+
# async def rank_by_perplexity(
|
40
|
+
# text: str | list[str] = None, # if list we assume they are already well split
|
41
|
+
# initial_text=None,
|
42
|
+
|
43
|
+
# segments,
|
44
|
+
# initial_text=None,
|
45
|
+
# cumulative=False,
|
46
|
+
# **kwargs
|
47
|
+
# ):
|
48
|
+
# _segments = []
|
49
|
+
# _context = initial_text or ""
|
50
|
+
# _task = []
|
51
|
+
|
52
|
+
# if cumulative:
|
53
|
+
# for i in range(1, len(segments)):
|
54
|
+
# _context += " " + segments[i - 1]
|
55
|
+
# _segments.append(_context)
|
56
|
+
# else:
|
57
|
+
# _segments = segments
|
58
|
+
|
59
|
+
# for i in segments:
|
60
|
+
# _task.append(asyncio.create_task(
|
61
|
+
# calculate_perplexity(
|
62
|
+
# self.system_msg, self.imodel, self.tokenize(i), **kwargs)
|
63
|
+
# )
|
64
|
+
# )
|
65
|
+
# segment_perplexities = await asyncio.gather(*_task)
|
66
|
+
|
67
|
+
# return {
|
68
|
+
# segment: perplexity
|
69
|
+
# for segment, perplexity in zip(segments, segment_perplexities)
|
70
|
+
# }
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# from ..form.predict import predict
|
2
|
+
# from .select import select
|
3
|
+
# from ..form.score import score
|
4
|
+
# from ..form.react import react
|
5
|
+
# from .vote import vote
|
6
|
+
# from ..form.plan import plan
|
7
|
+
# from .cot import chain_of_thoughts, chain_of_react
|
8
|
+
|
9
|
+
|
10
|
+
# __all__ = [
|
11
|
+
# "predict",
|
12
|
+
# "select",
|
13
|
+
# "score",
|
14
|
+
# "vote",
|
15
|
+
# "react",
|
16
|
+
# "plan",
|
17
|
+
# "chain_of_thoughts",
|
18
|
+
# "chain_of_react",
|
19
|
+
# ]
|
@@ -1,7 +1,23 @@
|
|
1
|
+
"""
|
2
|
+
Copyright 2024 HaiyangLi
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
"""
|
16
|
+
|
1
17
|
from typing import List, Optional
|
2
18
|
|
3
|
-
from lionagi.
|
4
|
-
from ..schema import IfNode, TryNode, ForNode
|
19
|
+
from lionagi.experimental.directive.tokenizer import BaseToken
|
20
|
+
from ..template.schema import IfNode, TryNode, ForNode
|
5
21
|
|
6
22
|
|
7
23
|
class BaseDirectiveParser:
|
@@ -76,10 +92,19 @@ class BaseDirectiveParser:
|
|
76
92
|
self.next_token()
|
77
93
|
|
78
94
|
def skip_semicolon(self):
|
95
|
+
"""Skips a semicolon token if it is the current token."""
|
79
96
|
if self.current_token and self.current_token.value == ";":
|
80
97
|
self.next_token()
|
81
98
|
|
82
99
|
def parse_expression(self):
|
100
|
+
"""Parses an expression until a semicolon is encountered.
|
101
|
+
|
102
|
+
Returns:
|
103
|
+
str: The parsed expression as a string.
|
104
|
+
|
105
|
+
Raises:
|
106
|
+
SyntaxError: If a semicolon is not found at the end of the expression.
|
107
|
+
"""
|
83
108
|
expr = ""
|
84
109
|
while self.current_token and self.current_token.value != ";":
|
85
110
|
expr += self.current_token.value + " "
|
@@ -91,6 +116,11 @@ class BaseDirectiveParser:
|
|
91
116
|
return expr.strip()
|
92
117
|
|
93
118
|
def parse_if_block(self):
|
119
|
+
"""Parses a block of statements for an IF condition.
|
120
|
+
|
121
|
+
Returns:
|
122
|
+
list: The parsed block of statements as a list of strings.
|
123
|
+
"""
|
94
124
|
block = []
|
95
125
|
# Parse the block until 'ELSE', 'ENDIF', ensuring not to include semicolons as part of the block
|
96
126
|
while self.current_token and self.current_token.value not in ("ENDIF", "ELSE"):
|
@@ -103,6 +133,14 @@ class BaseDirectiveParser:
|
|
103
133
|
return block
|
104
134
|
|
105
135
|
def parse_if_statement(self):
|
136
|
+
"""Parses an IF statement.
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
IfNode: The parsed IF statement as an IfNode object.
|
140
|
+
|
141
|
+
Raises:
|
142
|
+
SyntaxError: If the IF statement is not properly formed.
|
143
|
+
"""
|
106
144
|
if self.current_token.type != "KEYWORD" or self.current_token.value != "IF":
|
107
145
|
raise SyntaxError("Expected IF statement")
|
108
146
|
self.next_token() # Skip 'IF'
|
@@ -125,6 +163,14 @@ class BaseDirectiveParser:
|
|
125
163
|
return IfNode(condition, true_block, false_block)
|
126
164
|
|
127
165
|
def parse_for_statement(self):
|
166
|
+
"""Parses a FOR statement.
|
167
|
+
|
168
|
+
Returns:
|
169
|
+
ForNode: The parsed FOR statement as a ForNode object.
|
170
|
+
|
171
|
+
Raises:
|
172
|
+
SyntaxError: If the FOR statement is not properly formed.
|
173
|
+
"""
|
128
174
|
if self.current_token.type != "KEYWORD" or self.current_token.value != "FOR":
|
129
175
|
raise SyntaxError("Expected FOR statement")
|
130
176
|
self.next_token() # Skip 'FOR'
|
@@ -153,6 +199,11 @@ class BaseDirectiveParser:
|
|
153
199
|
return ForNode(iterator, collection, true_block)
|
154
200
|
|
155
201
|
def parse_for_block(self):
|
202
|
+
"""Parses a block of statements for a FOR loop.
|
203
|
+
|
204
|
+
Returns:
|
205
|
+
list: The parsed block of statements as a list of strings.
|
206
|
+
"""
|
156
207
|
block = []
|
157
208
|
# Skip initial 'DO' if present
|
158
209
|
if self.current_token and self.current_token.value == "DO":
|
@@ -173,6 +224,14 @@ class BaseDirectiveParser:
|
|
173
224
|
return block
|
174
225
|
|
175
226
|
def parse_try_statement(self):
|
227
|
+
"""Parses a TRY statement.
|
228
|
+
|
229
|
+
Returns:
|
230
|
+
TryNode: The parsed TRY statement as a TryNode object.
|
231
|
+
|
232
|
+
Raises:
|
233
|
+
SyntaxError: If the TRY statement is not properly formed.
|
234
|
+
"""
|
176
235
|
if self.current_token.type != "KEYWORD" or self.current_token.value != "TRY":
|
177
236
|
raise SyntaxError("Expected TRY statement")
|
178
237
|
self.next_token() # Skip 'TRY'
|
@@ -196,6 +255,14 @@ class BaseDirectiveParser:
|
|
196
255
|
return TryNode(try_block, except_block)
|
197
256
|
|
198
257
|
def parse_try_block(self, stop_keyword):
|
258
|
+
"""Parses a block of statements for a TRY or EXCEPT clause.
|
259
|
+
|
260
|
+
Args:
|
261
|
+
stop_keyword (str): The keyword that indicates the end of the block.
|
262
|
+
|
263
|
+
Returns:
|
264
|
+
list: The parsed block of statements as a list of strings.
|
265
|
+
"""
|
199
266
|
block = []
|
200
267
|
while self.current_token and self.current_token.value != stop_keyword:
|
201
268
|
if self.current_token.value == "DO":
|
@@ -1,10 +1,26 @@
|
|
1
|
+
"""
|
2
|
+
Copyright 2024 HaiyangLi
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
"""
|
16
|
+
|
1
17
|
from typing import Any, Dict
|
2
18
|
import re
|
3
19
|
|
4
20
|
from ..evaluator.base_evaluator import BaseEvaluator
|
5
21
|
|
6
22
|
|
7
|
-
class
|
23
|
+
class DirectiveTemplate:
|
8
24
|
"""Enhanced base template class for processing templates with conditionals and loops."""
|
9
25
|
|
10
26
|
def __init__(self, template_str: str):
|
@@ -1,3 +1,19 @@
|
|
1
|
+
"""
|
2
|
+
Copyright 2024 HaiyangLi
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
"""
|
16
|
+
|
1
17
|
import re
|
2
18
|
|
3
19
|
|
@@ -1,3 +1,19 @@
|
|
1
|
+
"""
|
2
|
+
Copyright 2024 HaiyangLi
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
"""
|
16
|
+
|
1
17
|
import ast
|
2
18
|
import operator
|
3
19
|
|
@@ -1,3 +1,19 @@
|
|
1
|
+
"""
|
2
|
+
Copyright 2024 HaiyangLi
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
"""
|
16
|
+
|
1
17
|
import ast
|
2
18
|
import operator
|
3
19
|
from typing import Any, Dict, Tuple, Callable
|
File without changes
|
File without changes
|
@@ -0,0 +1 @@
|
|
1
|
+
# TODO
|
@@ -28,6 +28,10 @@ def to_langchain_document(datanode: T, **kwargs: Any) -> Any:
|
|
28
28
|
SysUtil.change_dict_key(dnode, old_key="content", new_key="page_content")
|
29
29
|
SysUtil.change_dict_key(dnode, old_key="lc_id", new_key="id_")
|
30
30
|
dnode = {**dnode, **kwargs}
|
31
|
+
dnode = {k: v for k, v in dnode.items() if v is not None}
|
32
|
+
if "page_content" not in dnode:
|
33
|
+
dnode["page_content"] = ""
|
34
|
+
|
31
35
|
return LangchainDocument(**dnode)
|
32
36
|
|
33
37
|
|
@@ -0,0 +1,30 @@
|
|
1
|
+
class LlamaIndex:
|
2
|
+
|
3
|
+
@classmethod
|
4
|
+
def index(
|
5
|
+
cls,
|
6
|
+
nodes,
|
7
|
+
llm_obj=None,
|
8
|
+
llm_class=None,
|
9
|
+
llm_kwargs=None,
|
10
|
+
index_type=None,
|
11
|
+
**kwargs,
|
12
|
+
):
|
13
|
+
from llama_index.core import Settings
|
14
|
+
from llama_index.llms.openai import OpenAI
|
15
|
+
|
16
|
+
if not llm_obj:
|
17
|
+
llm_class = llm_class or OpenAI
|
18
|
+
llm_kwargs = llm_kwargs or {}
|
19
|
+
if "model" not in llm_kwargs:
|
20
|
+
llm_kwargs["model"] = "gpt-4o"
|
21
|
+
llm_obj = llm_class(**llm_kwargs)
|
22
|
+
|
23
|
+
Settings.llm = llm_obj
|
24
|
+
|
25
|
+
if not index_type:
|
26
|
+
from llama_index.core import VectorStoreIndex
|
27
|
+
|
28
|
+
index_type = VectorStoreIndex
|
29
|
+
|
30
|
+
return index_type(nodes, **kwargs)
|
@@ -100,3 +100,9 @@ class LlamaIndexBridge:
|
|
100
100
|
from .reader import get_llama_index_reader
|
101
101
|
|
102
102
|
return get_llama_index_reader(*args, **kwargs)
|
103
|
+
|
104
|
+
@staticmethod
|
105
|
+
def index(nodes, **kwargs):
|
106
|
+
from .index import LlamaIndex
|
107
|
+
|
108
|
+
return LlamaIndex.index(nodes, **kwargs)
|