lionagi 0.0.312__py3-none-any.whl → 0.2.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- lionagi/__init__.py +61 -3
- lionagi/core/__init__.py +0 -14
- lionagi/core/_setting/_setting.py +59 -0
- lionagi/core/action/__init__.py +14 -0
- lionagi/core/action/function_calling.py +136 -0
- lionagi/core/action/manual.py +1 -0
- lionagi/core/action/node.py +109 -0
- lionagi/core/action/tool.py +114 -0
- lionagi/core/action/tool_manager.py +356 -0
- lionagi/core/agent/__init__.py +0 -3
- lionagi/core/agent/base_agent.py +45 -36
- lionagi/core/agent/eval/evaluator.py +1 -0
- lionagi/core/agent/eval/vote.py +40 -0
- lionagi/core/agent/learn/learner.py +59 -0
- lionagi/core/agent/plan/unit_template.py +1 -0
- lionagi/core/collections/__init__.py +17 -0
- lionagi/core/collections/_logger.py +319 -0
- lionagi/core/collections/abc/__init__.py +53 -0
- lionagi/core/collections/abc/component.py +615 -0
- lionagi/core/collections/abc/concepts.py +297 -0
- lionagi/core/collections/abc/exceptions.py +150 -0
- lionagi/core/collections/abc/util.py +45 -0
- lionagi/core/collections/exchange.py +161 -0
- lionagi/core/collections/flow.py +426 -0
- lionagi/core/collections/model.py +419 -0
- lionagi/core/collections/pile.py +913 -0
- lionagi/core/collections/progression.py +236 -0
- lionagi/core/collections/util.py +64 -0
- lionagi/core/director/direct.py +314 -0
- lionagi/core/director/director.py +2 -0
- lionagi/core/engine/branch_engine.py +333 -0
- lionagi/core/engine/instruction_map_engine.py +204 -0
- lionagi/core/engine/sandbox_.py +14 -0
- lionagi/core/engine/script_engine.py +99 -0
- lionagi/core/executor/base_executor.py +90 -0
- lionagi/core/executor/graph_executor.py +330 -0
- lionagi/core/executor/neo4j_executor.py +384 -0
- lionagi/core/generic/__init__.py +7 -0
- lionagi/core/generic/edge.py +112 -0
- lionagi/core/generic/edge_condition.py +16 -0
- lionagi/core/generic/graph.py +236 -0
- lionagi/core/generic/hyperedge.py +1 -0
- lionagi/core/generic/node.py +220 -0
- lionagi/core/generic/tree.py +48 -0
- lionagi/core/generic/tree_node.py +79 -0
- lionagi/core/mail/__init__.py +7 -3
- lionagi/core/mail/mail.py +25 -0
- lionagi/core/mail/mail_manager.py +142 -58
- lionagi/core/mail/package.py +45 -0
- lionagi/core/mail/start_mail.py +36 -0
- lionagi/core/message/__init__.py +19 -0
- lionagi/core/message/action_request.py +133 -0
- lionagi/core/message/action_response.py +135 -0
- lionagi/core/message/assistant_response.py +95 -0
- lionagi/core/message/instruction.py +234 -0
- lionagi/core/message/message.py +101 -0
- lionagi/core/message/system.py +86 -0
- lionagi/core/message/util.py +283 -0
- lionagi/core/report/__init__.py +4 -0
- lionagi/core/report/base.py +217 -0
- lionagi/core/report/form.py +231 -0
- lionagi/core/report/report.py +166 -0
- lionagi/core/report/util.py +28 -0
- lionagi/core/rule/__init__.py +0 -0
- lionagi/core/rule/_default.py +16 -0
- lionagi/core/rule/action.py +99 -0
- lionagi/core/rule/base.py +238 -0
- lionagi/core/rule/boolean.py +56 -0
- lionagi/core/rule/choice.py +47 -0
- lionagi/core/rule/mapping.py +96 -0
- lionagi/core/rule/number.py +71 -0
- lionagi/core/rule/rulebook.py +109 -0
- lionagi/core/rule/string.py +52 -0
- lionagi/core/rule/util.py +35 -0
- lionagi/core/session/__init__.py +0 -3
- lionagi/core/session/branch.py +431 -0
- lionagi/core/session/directive_mixin.py +287 -0
- lionagi/core/session/session.py +230 -902
- lionagi/core/structure/__init__.py +1 -0
- lionagi/core/structure/chain.py +1 -0
- lionagi/core/structure/forest.py +1 -0
- lionagi/core/structure/graph.py +1 -0
- lionagi/core/structure/tree.py +1 -0
- lionagi/core/unit/__init__.py +5 -0
- lionagi/core/unit/parallel_unit.py +245 -0
- lionagi/core/unit/template/__init__.py +0 -0
- lionagi/core/unit/template/action.py +81 -0
- lionagi/core/unit/template/base.py +51 -0
- lionagi/core/unit/template/plan.py +84 -0
- lionagi/core/unit/template/predict.py +109 -0
- lionagi/core/unit/template/score.py +124 -0
- lionagi/core/unit/template/select.py +104 -0
- lionagi/core/unit/unit.py +362 -0
- lionagi/core/unit/unit_form.py +305 -0
- lionagi/core/unit/unit_mixin.py +1168 -0
- lionagi/core/unit/util.py +71 -0
- lionagi/core/validator/__init__.py +0 -0
- lionagi/core/validator/validator.py +364 -0
- lionagi/core/work/__init__.py +0 -0
- lionagi/core/work/work.py +76 -0
- lionagi/core/work/work_function.py +101 -0
- lionagi/core/work/work_queue.py +103 -0
- lionagi/core/work/worker.py +258 -0
- lionagi/core/work/worklog.py +120 -0
- lionagi/experimental/__init__.py +0 -0
- lionagi/experimental/compressor/__init__.py +0 -0
- lionagi/experimental/compressor/base.py +46 -0
- lionagi/experimental/compressor/llm_compressor.py +247 -0
- lionagi/experimental/compressor/llm_summarizer.py +61 -0
- lionagi/experimental/compressor/util.py +70 -0
- lionagi/experimental/directive/__init__.py +19 -0
- lionagi/experimental/directive/parser/__init__.py +0 -0
- lionagi/experimental/directive/parser/base_parser.py +282 -0
- lionagi/experimental/directive/template/__init__.py +0 -0
- lionagi/experimental/directive/template/base_template.py +79 -0
- lionagi/experimental/directive/template/schema.py +36 -0
- lionagi/experimental/directive/tokenizer.py +73 -0
- lionagi/experimental/evaluator/__init__.py +0 -0
- lionagi/experimental/evaluator/ast_evaluator.py +131 -0
- lionagi/experimental/evaluator/base_evaluator.py +218 -0
- lionagi/experimental/knowledge/__init__.py +0 -0
- lionagi/experimental/knowledge/base.py +10 -0
- lionagi/experimental/knowledge/graph.py +0 -0
- lionagi/experimental/memory/__init__.py +0 -0
- lionagi/experimental/strategies/__init__.py +0 -0
- lionagi/experimental/strategies/base.py +1 -0
- lionagi/integrations/bridge/autogen_/__init__.py +0 -0
- lionagi/integrations/bridge/autogen_/autogen_.py +124 -0
- lionagi/integrations/bridge/langchain_/documents.py +4 -0
- lionagi/integrations/bridge/llamaindex_/index.py +30 -0
- lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +6 -0
- lionagi/integrations/bridge/llamaindex_/llama_pack.py +227 -0
- lionagi/integrations/bridge/llamaindex_/node_parser.py +6 -9
- lionagi/integrations/bridge/pydantic_/pydantic_bridge.py +1 -0
- lionagi/integrations/bridge/transformers_/__init__.py +0 -0
- lionagi/integrations/bridge/transformers_/install_.py +36 -0
- lionagi/integrations/chunker/__init__.py +0 -0
- lionagi/integrations/chunker/chunk.py +312 -0
- lionagi/integrations/config/oai_configs.py +38 -7
- lionagi/integrations/config/ollama_configs.py +1 -1
- lionagi/integrations/config/openrouter_configs.py +14 -2
- lionagi/integrations/loader/__init__.py +0 -0
- lionagi/integrations/loader/load.py +253 -0
- lionagi/integrations/loader/load_util.py +195 -0
- lionagi/integrations/provider/_mapping.py +46 -0
- lionagi/integrations/provider/litellm.py +2 -1
- lionagi/integrations/provider/mlx_service.py +16 -9
- lionagi/integrations/provider/oai.py +91 -4
- lionagi/integrations/provider/ollama.py +7 -6
- lionagi/integrations/provider/openrouter.py +115 -8
- lionagi/integrations/provider/services.py +2 -2
- lionagi/integrations/provider/transformers.py +18 -22
- lionagi/integrations/storage/__init__.py +3 -0
- lionagi/integrations/storage/neo4j.py +665 -0
- lionagi/integrations/storage/storage_util.py +287 -0
- lionagi/integrations/storage/structure_excel.py +285 -0
- lionagi/integrations/storage/to_csv.py +63 -0
- lionagi/integrations/storage/to_excel.py +83 -0
- lionagi/libs/__init__.py +26 -1
- lionagi/libs/ln_api.py +78 -23
- lionagi/libs/ln_context.py +37 -0
- lionagi/libs/ln_convert.py +21 -9
- lionagi/libs/ln_func_call.py +69 -28
- lionagi/libs/ln_image.py +107 -0
- lionagi/libs/ln_knowledge_graph.py +405 -0
- lionagi/libs/ln_nested.py +26 -11
- lionagi/libs/ln_parse.py +110 -14
- lionagi/libs/ln_queue.py +117 -0
- lionagi/libs/ln_tokenize.py +164 -0
- lionagi/{core/prompt/field_validator.py → libs/ln_validate.py} +79 -14
- lionagi/libs/special_tokens.py +172 -0
- lionagi/libs/sys_util.py +107 -2
- lionagi/lions/__init__.py +0 -0
- lionagi/lions/coder/__init__.py +0 -0
- lionagi/lions/coder/add_feature.py +20 -0
- lionagi/lions/coder/base_prompts.py +22 -0
- lionagi/lions/coder/code_form.py +13 -0
- lionagi/lions/coder/coder.py +168 -0
- lionagi/lions/coder/util.py +96 -0
- lionagi/lions/researcher/__init__.py +0 -0
- lionagi/lions/researcher/data_source/__init__.py +0 -0
- lionagi/lions/researcher/data_source/finhub_.py +191 -0
- lionagi/lions/researcher/data_source/google_.py +199 -0
- lionagi/lions/researcher/data_source/wiki_.py +96 -0
- lionagi/lions/researcher/data_source/yfinance_.py +21 -0
- lionagi/tests/integrations/__init__.py +0 -0
- lionagi/tests/libs/__init__.py +0 -0
- lionagi/tests/libs/test_field_validators.py +353 -0
- lionagi/tests/{test_libs → libs}/test_func_call.py +23 -21
- lionagi/tests/{test_libs → libs}/test_nested.py +36 -21
- lionagi/tests/{test_libs → libs}/test_parse.py +1 -1
- lionagi/tests/libs/test_queue.py +67 -0
- lionagi/tests/test_core/collections/__init__.py +0 -0
- lionagi/tests/test_core/collections/test_component.py +206 -0
- lionagi/tests/test_core/collections/test_exchange.py +138 -0
- lionagi/tests/test_core/collections/test_flow.py +145 -0
- lionagi/tests/test_core/collections/test_pile.py +171 -0
- lionagi/tests/test_core/collections/test_progression.py +129 -0
- lionagi/tests/test_core/generic/__init__.py +0 -0
- lionagi/tests/test_core/generic/test_edge.py +67 -0
- lionagi/tests/test_core/generic/test_graph.py +96 -0
- lionagi/tests/test_core/generic/test_node.py +106 -0
- lionagi/tests/test_core/generic/test_tree_node.py +73 -0
- lionagi/tests/test_core/test_branch.py +115 -292
- lionagi/tests/test_core/test_form.py +46 -0
- lionagi/tests/test_core/test_report.py +105 -0
- lionagi/tests/test_core/test_validator.py +111 -0
- lionagi/version.py +1 -1
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/LICENSE +12 -11
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/METADATA +19 -118
- lionagi-0.2.1.dist-info/RECORD +240 -0
- lionagi/core/branch/__init__.py +0 -4
- lionagi/core/branch/base_branch.py +0 -654
- lionagi/core/branch/branch.py +0 -471
- lionagi/core/branch/branch_flow_mixin.py +0 -96
- lionagi/core/branch/executable_branch.py +0 -347
- lionagi/core/branch/util.py +0 -323
- lionagi/core/direct/__init__.py +0 -6
- lionagi/core/direct/predict.py +0 -161
- lionagi/core/direct/score.py +0 -278
- lionagi/core/direct/select.py +0 -169
- lionagi/core/direct/utils.py +0 -87
- lionagi/core/direct/vote.py +0 -64
- lionagi/core/flow/base/baseflow.py +0 -23
- lionagi/core/flow/monoflow/ReAct.py +0 -238
- lionagi/core/flow/monoflow/__init__.py +0 -9
- lionagi/core/flow/monoflow/chat.py +0 -95
- lionagi/core/flow/monoflow/chat_mixin.py +0 -263
- lionagi/core/flow/monoflow/followup.py +0 -214
- lionagi/core/flow/polyflow/__init__.py +0 -1
- lionagi/core/flow/polyflow/chat.py +0 -248
- lionagi/core/mail/schema.py +0 -56
- lionagi/core/messages/__init__.py +0 -3
- lionagi/core/messages/schema.py +0 -533
- lionagi/core/prompt/prompt_template.py +0 -316
- lionagi/core/schema/__init__.py +0 -22
- lionagi/core/schema/action_node.py +0 -29
- lionagi/core/schema/base_mixin.py +0 -296
- lionagi/core/schema/base_node.py +0 -199
- lionagi/core/schema/condition.py +0 -24
- lionagi/core/schema/data_logger.py +0 -354
- lionagi/core/schema/data_node.py +0 -93
- lionagi/core/schema/prompt_template.py +0 -67
- lionagi/core/schema/structure.py +0 -910
- lionagi/core/tool/__init__.py +0 -3
- lionagi/core/tool/tool_manager.py +0 -280
- lionagi/integrations/bridge/pydantic_/base_model.py +0 -7
- lionagi/tests/test_core/test_base_branch.py +0 -427
- lionagi/tests/test_core/test_chat_flow.py +0 -63
- lionagi/tests/test_core/test_mail_manager.py +0 -75
- lionagi/tests/test_core/test_prompts.py +0 -51
- lionagi/tests/test_core/test_session.py +0 -254
- lionagi/tests/test_core/test_session_base_util.py +0 -312
- lionagi/tests/test_core/test_tool_manager.py +0 -95
- lionagi-0.0.312.dist-info/RECORD +0 -111
- /lionagi/core/{branch/base → _setting}/__init__.py +0 -0
- /lionagi/core/{flow → agent/eval}/__init__.py +0 -0
- /lionagi/core/{flow/base → agent/learn}/__init__.py +0 -0
- /lionagi/core/{prompt → agent/plan}/__init__.py +0 -0
- /lionagi/core/{tool/manual.py → agent/plan/plan.py} +0 -0
- /lionagi/{tests/test_integrations → core/director}/__init__.py +0 -0
- /lionagi/{tests/test_libs → core/engine}/__init__.py +0 -0
- /lionagi/{tests/test_libs/test_async.py → core/executor/__init__.py} +0 -0
- /lionagi/tests/{test_libs → libs}/test_api.py +0 -0
- /lionagi/tests/{test_libs → libs}/test_convert.py +0 -0
- /lionagi/tests/{test_libs → libs}/test_sys_util.py +0 -0
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/WHEEL +0 -0
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,247 @@
|
|
1
|
+
import asyncio
|
2
|
+
from lionagi import alcall
|
3
|
+
from lionagi.libs.ln_convert import to_list
|
4
|
+
import numpy as np
|
5
|
+
from lionagi.core.collections import iModel
|
6
|
+
from .base import TokenCompressor
|
7
|
+
from lionagi.libs.ln_tokenize import TokenizeUtil
|
8
|
+
from time import time
|
9
|
+
|
10
|
+
# inspired by LLMLingua, MIT License, Copyright (c) Microsoft Corporation.
|
11
|
+
# https://github.com/microsoft/LLMLingua
|
12
|
+
|
13
|
+
|
14
|
+
class LLMCompressor(TokenCompressor):
|
15
|
+
|
16
|
+
def __init__(
|
17
|
+
self,
|
18
|
+
imodel: iModel = None,
|
19
|
+
system_msg=None,
|
20
|
+
tokenizer=None, # must be a callable or object with a tokenize method
|
21
|
+
splitter=None, # must be a callable or object with a split/chunk/segment method
|
22
|
+
target_ratio=0.2,
|
23
|
+
n_samples=5, # the cumulative samples to take in each perplexity calculation
|
24
|
+
chunk_size=64,
|
25
|
+
max_tokens_per_sample=80,
|
26
|
+
min_compression_score=0, # (0-1) the minimum score to consider for compression, 0 means all
|
27
|
+
split_overlap=0,
|
28
|
+
split_threshold=0,
|
29
|
+
verbose=True,
|
30
|
+
):
|
31
|
+
imodel = imodel or iModel(model="gpt-3.5-turbo", temperature=0.3)
|
32
|
+
super().__init__(imodel=imodel, tokenizer=tokenizer, splitter=splitter)
|
33
|
+
self.system_msg = (
|
34
|
+
system_msg
|
35
|
+
or "Concisely summarize and compress the information for storage:"
|
36
|
+
)
|
37
|
+
self.target_ratio = target_ratio
|
38
|
+
self.n_samples = n_samples
|
39
|
+
self.chunk_size = chunk_size
|
40
|
+
self.max_tokens_per_sample = max_tokens_per_sample
|
41
|
+
self.min_compression_score = min_compression_score
|
42
|
+
self.verbose = verbose
|
43
|
+
self.split_overlap = split_overlap
|
44
|
+
self.split_threshold = split_threshold
|
45
|
+
|
46
|
+
def tokenize(self, text, encoding_name=None, return_byte=False, **kwargs):
|
47
|
+
"""
|
48
|
+
by default you can use `encoding_name` to be one of,
|
49
|
+
['gpt2', 'r50k_base', 'p50k_base', 'p50k_edit', 'cl100k_base', 'o200k_base']
|
50
|
+
|
51
|
+
or you can use `encoding_model` that tiktoken supports in their mapping such as "gpt-4o"
|
52
|
+
"""
|
53
|
+
if not self.tokenizer:
|
54
|
+
return TokenizeUtil.tokenize(
|
55
|
+
text,
|
56
|
+
encoding_model=self.imodel.iModel_name,
|
57
|
+
encoding_name=encoding_name,
|
58
|
+
return_byte=return_byte,
|
59
|
+
)
|
60
|
+
|
61
|
+
if hasattr(self.tokenizer, "tokenize"):
|
62
|
+
return self.tokenizer.tokenize(text, **kwargs)
|
63
|
+
|
64
|
+
return self.tokenizer(text, **kwargs)
|
65
|
+
|
66
|
+
def split(
|
67
|
+
self,
|
68
|
+
text,
|
69
|
+
chunk_size=None,
|
70
|
+
overlap=None,
|
71
|
+
threshold=None,
|
72
|
+
by_chars=False,
|
73
|
+
return_tokens=False,
|
74
|
+
return_byte=False,
|
75
|
+
**kwargs,
|
76
|
+
):
|
77
|
+
if not self.splitter:
|
78
|
+
splitter = (
|
79
|
+
TokenizeUtil.chunk_by_chars
|
80
|
+
if by_chars
|
81
|
+
else TokenizeUtil.chunk_by_tokens
|
82
|
+
)
|
83
|
+
return splitter(
|
84
|
+
text,
|
85
|
+
chunk_size or self.chunk_size,
|
86
|
+
overlap or self.split_overlap,
|
87
|
+
threshold or self.split_threshold,
|
88
|
+
return_tokens=return_tokens,
|
89
|
+
return_byte=return_byte,
|
90
|
+
)
|
91
|
+
|
92
|
+
a = [
|
93
|
+
getattr(self.splitter, i, None)
|
94
|
+
for i in ["split", "chunk", "segment"]
|
95
|
+
if i is not None
|
96
|
+
][0]
|
97
|
+
a = getattr(self.splitter, a)
|
98
|
+
return a(text, **kwargs)
|
99
|
+
|
100
|
+
async def rank_by_pplex(
|
101
|
+
self, items: list, initial_text=None, cumulative=False, n_samples=None, **kwargs
|
102
|
+
):
|
103
|
+
"""
|
104
|
+
rank a list of items according to their perplexity
|
105
|
+
an item can be a single token or a list of tokens
|
106
|
+
|
107
|
+
kwargs: additional arguments to pass to the model
|
108
|
+
"""
|
109
|
+
|
110
|
+
async def _get_item_perplexity(item):
|
111
|
+
item = item if isinstance(item, list) else [item]
|
112
|
+
item = (
|
113
|
+
item[: self.max_tokens_per_sample]
|
114
|
+
if len(item) > self.max_tokens_per_sample
|
115
|
+
else item
|
116
|
+
)
|
117
|
+
return await self.imodel.compute_perplexity(
|
118
|
+
initial_context=initial_text,
|
119
|
+
tokens=item,
|
120
|
+
n_samples=n_samples or self.n_samples,
|
121
|
+
system_msg=self.system_msg,
|
122
|
+
**kwargs,
|
123
|
+
)
|
124
|
+
|
125
|
+
if not isinstance(items, list):
|
126
|
+
items = self.tokenize(items)
|
127
|
+
|
128
|
+
if len(items) == 1:
|
129
|
+
return [items] # no need to rank a single item
|
130
|
+
|
131
|
+
_segments = []
|
132
|
+
_context = initial_text or ""
|
133
|
+
_task = []
|
134
|
+
|
135
|
+
if cumulative:
|
136
|
+
for i in items:
|
137
|
+
if isinstance(i, list):
|
138
|
+
_context += " " + " ".join(i).strip()
|
139
|
+
else:
|
140
|
+
_context += " " + i.strip()
|
141
|
+
|
142
|
+
_segments.append(_context)
|
143
|
+
else:
|
144
|
+
_segments = items
|
145
|
+
|
146
|
+
for i in _segments:
|
147
|
+
_task.append(asyncio.create_task(_get_item_perplexity(i)))
|
148
|
+
|
149
|
+
results = await asyncio.gather(*_task)
|
150
|
+
results = [(item, pplex) for item, pplex in zip(items, results)]
|
151
|
+
return sorted(results, key=lambda x: x[1]["logprobs"], reverse=True)
|
152
|
+
|
153
|
+
async def compress(
|
154
|
+
self,
|
155
|
+
text,
|
156
|
+
target_ratio=None,
|
157
|
+
initial_text=None,
|
158
|
+
cumulative=False,
|
159
|
+
split_kwargs=None,
|
160
|
+
split_overlap=None,
|
161
|
+
split_threshold=None,
|
162
|
+
rank_by="perplexity",
|
163
|
+
min_compression_score=None,
|
164
|
+
verbose=True,
|
165
|
+
**kwargs,
|
166
|
+
):
|
167
|
+
start = time()
|
168
|
+
if split_kwargs is None:
|
169
|
+
split_kwargs = {}
|
170
|
+
split_kwargs["chunk_size"] = self.max_tokens_per_sample
|
171
|
+
split_kwargs["overlap"] = split_overlap or 0
|
172
|
+
split_kwargs["threshold"] = split_threshold or 0
|
173
|
+
|
174
|
+
len_tokens = len(self.tokenize(text))
|
175
|
+
|
176
|
+
items = self.split(text, return_tokens=True, **split_kwargs)
|
177
|
+
|
178
|
+
if rank_by == "perplexity":
|
179
|
+
ranked_items = await self.rank_by_pplex(
|
180
|
+
items=items, initial_text=initial_text, cumulative=cumulative, **kwargs
|
181
|
+
)
|
182
|
+
|
183
|
+
prompt_tokens = sum([i[1]["num_prompt_tokens"] for i in ranked_items])
|
184
|
+
|
185
|
+
num_completion_tokens = sum(
|
186
|
+
[i[1]["num_completion_tokens"] for i in ranked_items]
|
187
|
+
)
|
188
|
+
|
189
|
+
price = (
|
190
|
+
prompt_tokens * 0.5 / 1000000 + num_completion_tokens * 1.5 / 1000000
|
191
|
+
)
|
192
|
+
|
193
|
+
selected_items = self.select_by_pplex(
|
194
|
+
ranked_items=ranked_items,
|
195
|
+
target_compression_ratio=target_ratio or self.target_ratio,
|
196
|
+
original_length=len_tokens,
|
197
|
+
min_pplex=min_compression_score or self.min_compression_score,
|
198
|
+
)
|
199
|
+
|
200
|
+
if verbose:
|
201
|
+
msg = ""
|
202
|
+
msg += f"Original Token number: {len_tokens}\n"
|
203
|
+
|
204
|
+
def _f(i):
|
205
|
+
if isinstance(i, str):
|
206
|
+
i = self.tokenize(i)
|
207
|
+
|
208
|
+
if isinstance(i, list):
|
209
|
+
return len(to_list(i, dropna=True, flatten=True))
|
210
|
+
|
211
|
+
len_ = sum([_f(i) for i in selected_items])
|
212
|
+
msg += f"Selected Token number: {len_}\n"
|
213
|
+
msg += f"Token Compression Ratio: {len_ / len_tokens:.03f}\n"
|
214
|
+
msg += f"Compression Time: {time() - start:.04f} seconds\n"
|
215
|
+
msg += f"Compression Model: {self.imodel.iModel_name}\n"
|
216
|
+
msg += f"Compression Method: {rank_by}\n"
|
217
|
+
msg += f"Compression Usage: ${price:.05f}\n"
|
218
|
+
print(msg)
|
219
|
+
|
220
|
+
a = "".join([i.strip() for i in selected_items]).strip()
|
221
|
+
a = a.replace("\n\n", "")
|
222
|
+
return a
|
223
|
+
|
224
|
+
raise ValueError(f"Ranking method {rank_by} is not supported")
|
225
|
+
|
226
|
+
def select_by_pplex(
|
227
|
+
self, ranked_items, target_compression_ratio, original_length, min_pplex=None
|
228
|
+
):
|
229
|
+
min_pplex = min_pplex or 0
|
230
|
+
|
231
|
+
desired_length = int(original_length * target_compression_ratio)
|
232
|
+
|
233
|
+
items = []
|
234
|
+
current_length = 0
|
235
|
+
|
236
|
+
for item, info in ranked_items:
|
237
|
+
if info["perplexity"] > min_pplex:
|
238
|
+
item = self.tokenize(item) if isinstance(item, str) else item
|
239
|
+
item = item if isinstance(item, list) else [item]
|
240
|
+
item = to_list(item, dropna=True, flatten=True)
|
241
|
+
if current_length + len(item) > desired_length:
|
242
|
+
break
|
243
|
+
else:
|
244
|
+
current_length += len(item)
|
245
|
+
items.append("".join(item))
|
246
|
+
|
247
|
+
return items
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# from lionagi.core.collections import iModel
|
2
|
+
# from .base import TokenCompressor
|
3
|
+
|
4
|
+
|
5
|
+
# class LLMSummarizer(TokenCompressor):
|
6
|
+
|
7
|
+
# def __init__(
|
8
|
+
# self, imodel: iModel = None, system_msg=None, tokenizer=None, splitter=None,
|
9
|
+
# max_tokens=25, target_ratio=0.3
|
10
|
+
# ):
|
11
|
+
# imodel = imodel or iModel(model="gpt-3.5-turbo", max_tokens=max_tokens)
|
12
|
+
# super().__init__(imodel=imodel, tokenizer=tokenizer, splitter=splitter)
|
13
|
+
# self.system_msg = (
|
14
|
+
# system_msg
|
15
|
+
# or "Summarize the following sentence to be concise and informative:"
|
16
|
+
# )
|
17
|
+
# self.target_ratio = target_ratio
|
18
|
+
|
19
|
+
# async def summarize_sentence(self, sentence, **kwargs):
|
20
|
+
# messages = [
|
21
|
+
# {"role": "system", "content": self.system_msg},
|
22
|
+
# {"role": "user", "content": sentence},
|
23
|
+
# ]
|
24
|
+
# response = await self.imodel.call_chat_completion(messages, **kwargs)
|
25
|
+
# return response["choices"][0]["message"]["content"]
|
26
|
+
|
27
|
+
# def tokenize(self, text):
|
28
|
+
# tokenize_func = self.tokenizer or tokenize
|
29
|
+
# return tokenize_func(text)
|
30
|
+
|
31
|
+
# def split(self, text):
|
32
|
+
# split_func = self.splitter or split_into_segments
|
33
|
+
# return split_func(text)
|
34
|
+
|
35
|
+
# # Function to enforce maximum sentence length
|
36
|
+
# def enforce_max_sentence_length(self, sentence, max_words=25):
|
37
|
+
# words = self.tokenize(sentence)
|
38
|
+
# if len(words) > max_words:
|
39
|
+
# sentence = ' '.join(words[:max_words])
|
40
|
+
# return sentence
|
41
|
+
|
42
|
+
# async def summarize_text(self, text, max_length_per_sentence=25, target_ratio=None, **kwargs):
|
43
|
+
# sentences = self.split(text)
|
44
|
+
# summarized = await alcall(
|
45
|
+
# sentences, self.summarize_sentence, **kwargs
|
46
|
+
# )
|
47
|
+
# summarized = [
|
48
|
+
# self.enforce_max_sentence_length(sentence, max_length_per_sentence)
|
49
|
+
# for sentence in summarized
|
50
|
+
# ]
|
51
|
+
|
52
|
+
# original_length = len(self.tokenize(text))
|
53
|
+
# summarized_length = len(self.tokenize(' '.join(summarized)))
|
54
|
+
# current_ratio = summarized_length / original_length
|
55
|
+
|
56
|
+
# target_ratio = target_ratio or self.target_ratio
|
57
|
+
# if current_ratio > target_ratio:
|
58
|
+
# words_to_remove = int((current_ratio - target_ratio) * original_length)
|
59
|
+
# return ' '.join(summarized[:-words_to_remove])
|
60
|
+
|
61
|
+
# return ' '.join(summarized)
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# import asyncio
|
2
|
+
# from lionagi import alcall
|
3
|
+
# from lionagi.libs.ln_convert import to_list
|
4
|
+
# import numpy as np
|
5
|
+
|
6
|
+
# def split_into_segments(text):
|
7
|
+
# segments = text.split(".") # Splitting by period followed by a space
|
8
|
+
# return [segment.strip() for segment in segments if segment]
|
9
|
+
|
10
|
+
# # Tokenize the segment
|
11
|
+
# def tokenize(segment):
|
12
|
+
# tokens = segment.split() # Simple space-based tokenization
|
13
|
+
# return tokens
|
14
|
+
|
15
|
+
# async def calculate_perplexity(system_msg: str, imodel, tokens, initial_context=None, **kwargs):
|
16
|
+
# _tasks = []
|
17
|
+
# _context = initial_context or ""
|
18
|
+
# for i in range(len(tokens)):
|
19
|
+
# _context += " " + tokens[i]
|
20
|
+
# messages = [
|
21
|
+
# {"role": "system", "content": system_msg},
|
22
|
+
# {"role": "user", "content": _context},
|
23
|
+
# ]
|
24
|
+
# task = asyncio.create_task(
|
25
|
+
# imodel.call_chat_completion(
|
26
|
+
# messages=messages, logprobs=True, max_tokens=1, **kwargs
|
27
|
+
# )
|
28
|
+
# )
|
29
|
+
# _tasks.append(task)
|
30
|
+
|
31
|
+
# results = await asyncio.gather(*_tasks)
|
32
|
+
# logprobs = [
|
33
|
+
# result[1]["choices"][0]["logprobs"]["content"] for result in results
|
34
|
+
# ]
|
35
|
+
# logprobs = to_list(logprobs, flatten=True, dropna=True)
|
36
|
+
# logprobs = [lprob_["logprob"] for lprob_ in logprobs]
|
37
|
+
# return np.exp(np.mean(logprobs))
|
38
|
+
|
39
|
+
# async def rank_by_perplexity(
|
40
|
+
# text: str | list[str] = None, # if list we assume they are already well split
|
41
|
+
# initial_text=None,
|
42
|
+
|
43
|
+
# segments,
|
44
|
+
# initial_text=None,
|
45
|
+
# cumulative=False,
|
46
|
+
# **kwargs
|
47
|
+
# ):
|
48
|
+
# _segments = []
|
49
|
+
# _context = initial_text or ""
|
50
|
+
# _task = []
|
51
|
+
|
52
|
+
# if cumulative:
|
53
|
+
# for i in range(1, len(segments)):
|
54
|
+
# _context += " " + segments[i - 1]
|
55
|
+
# _segments.append(_context)
|
56
|
+
# else:
|
57
|
+
# _segments = segments
|
58
|
+
|
59
|
+
# for i in segments:
|
60
|
+
# _task.append(asyncio.create_task(
|
61
|
+
# calculate_perplexity(
|
62
|
+
# self.system_msg, self.imodel, self.tokenize(i), **kwargs)
|
63
|
+
# )
|
64
|
+
# )
|
65
|
+
# segment_perplexities = await asyncio.gather(*_task)
|
66
|
+
|
67
|
+
# return {
|
68
|
+
# segment: perplexity
|
69
|
+
# for segment, perplexity in zip(segments, segment_perplexities)
|
70
|
+
# }
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# from ..form.predict import predict
|
2
|
+
# from .select import select
|
3
|
+
# from ..form.score import score
|
4
|
+
# from ..form.react import react
|
5
|
+
# from .vote import vote
|
6
|
+
# from ..form.plan import plan
|
7
|
+
# from .cot import chain_of_thoughts, chain_of_react
|
8
|
+
|
9
|
+
|
10
|
+
# __all__ = [
|
11
|
+
# "predict",
|
12
|
+
# "select",
|
13
|
+
# "score",
|
14
|
+
# "vote",
|
15
|
+
# "react",
|
16
|
+
# "plan",
|
17
|
+
# "chain_of_thoughts",
|
18
|
+
# "chain_of_react",
|
19
|
+
# ]
|
File without changes
|
@@ -0,0 +1,282 @@
|
|
1
|
+
"""
|
2
|
+
Copyright 2024 HaiyangLi
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
"""
|
16
|
+
|
17
|
+
from typing import List, Optional
|
18
|
+
|
19
|
+
from lionagi.experimental.directive.tokenizer import BaseToken
|
20
|
+
from ..template.schema import IfNode, TryNode, ForNode
|
21
|
+
|
22
|
+
|
23
|
+
class BaseDirectiveParser:
|
24
|
+
"""A base parser with lookahead, error recovery, and backtracking support.
|
25
|
+
|
26
|
+
Attributes:
|
27
|
+
tokens (List[BaseToken]): A list of tokens to be parsed.
|
28
|
+
current_token_index (int): The index of the current token in the tokens list.
|
29
|
+
current_token (Optional[BaseToken]): The current token being processed.
|
30
|
+
|
31
|
+
Examples:
|
32
|
+
>>> tokenizer = BaseTokenizer("IF x > 10 THEN DO something ENDIF")
|
33
|
+
>>> tokens = tokenizer.get_tokens()
|
34
|
+
>>> parser = BaseParser(tokens)
|
35
|
+
>>> print(parser.current_token)
|
36
|
+
BaseToken(KEYWORD, IF)
|
37
|
+
"""
|
38
|
+
|
39
|
+
def __init__(self, tokens: List[BaseToken]):
|
40
|
+
self.tokens = tokens
|
41
|
+
self.current_token_index = -1
|
42
|
+
self.current_token: Optional[BaseToken] = None
|
43
|
+
self.next_token()
|
44
|
+
|
45
|
+
def next_token(self) -> None:
|
46
|
+
"""Advances to the next token in the list."""
|
47
|
+
self.current_token_index += 1
|
48
|
+
if self.current_token_index < len(self.tokens):
|
49
|
+
self.current_token = self.tokens[self.current_token_index]
|
50
|
+
else:
|
51
|
+
self.current_token = None
|
52
|
+
|
53
|
+
def peek_next_token(self, offset: int = 1) -> BaseToken | None:
|
54
|
+
"""Peeks at the next token without consuming it.
|
55
|
+
|
56
|
+
Args:
|
57
|
+
offset (int): The number of tokens to look ahead.
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
Optional[BaseToken]: The token at the specified lookahead offset, or None if end of list.
|
61
|
+
"""
|
62
|
+
peek_index = self.current_token_index + offset
|
63
|
+
if peek_index < len(self.tokens):
|
64
|
+
return self.tokens[peek_index]
|
65
|
+
else:
|
66
|
+
return None
|
67
|
+
|
68
|
+
def skip_until(self, token_types: List[str]) -> None:
|
69
|
+
"""Skips tokens until a token of the specified type is found.
|
70
|
+
|
71
|
+
Args:
|
72
|
+
token_types (List[str]): A list of token types to stop skipping.
|
73
|
+
"""
|
74
|
+
while self.current_token and self.current_token.type not in token_types:
|
75
|
+
self.next_token()
|
76
|
+
|
77
|
+
def mark(self) -> int:
|
78
|
+
"""Marks the current position in the token list for potential backtracking.
|
79
|
+
|
80
|
+
Returns:
|
81
|
+
int: The current token index.
|
82
|
+
"""
|
83
|
+
return self.current_token_index
|
84
|
+
|
85
|
+
def reset_to_mark(self, mark: int) -> None:
|
86
|
+
"""Resets the parser to a previously marked position.
|
87
|
+
|
88
|
+
Args:
|
89
|
+
mark (int): The token index to reset to.
|
90
|
+
"""
|
91
|
+
self.current_token_index = mark - 1
|
92
|
+
self.next_token()
|
93
|
+
|
94
|
+
def skip_semicolon(self):
|
95
|
+
"""Skips a semicolon token if it is the current token."""
|
96
|
+
if self.current_token and self.current_token.value == ";":
|
97
|
+
self.next_token()
|
98
|
+
|
99
|
+
def parse_expression(self):
|
100
|
+
"""Parses an expression until a semicolon is encountered.
|
101
|
+
|
102
|
+
Returns:
|
103
|
+
str: The parsed expression as a string.
|
104
|
+
|
105
|
+
Raises:
|
106
|
+
SyntaxError: If a semicolon is not found at the end of the expression.
|
107
|
+
"""
|
108
|
+
expr = ""
|
109
|
+
while self.current_token and self.current_token.value != ";":
|
110
|
+
expr += self.current_token.value + " "
|
111
|
+
self.next_token()
|
112
|
+
# Expecting a semicolon at the end of the condition
|
113
|
+
if self.current_token.value != ";":
|
114
|
+
raise SyntaxError("Expected ';' at the end of the condition")
|
115
|
+
self.next_token() # Move past the semicolon to the next part of the statement
|
116
|
+
return expr.strip()
|
117
|
+
|
118
|
+
def parse_if_block(self):
|
119
|
+
"""Parses a block of statements for an IF condition.
|
120
|
+
|
121
|
+
Returns:
|
122
|
+
list: The parsed block of statements as a list of strings.
|
123
|
+
"""
|
124
|
+
block = []
|
125
|
+
# Parse the block until 'ELSE', 'ENDIF', ensuring not to include semicolons as part of the block
|
126
|
+
while self.current_token and self.current_token.value not in ("ENDIF", "ELSE"):
|
127
|
+
if self.current_token.value == "DO":
|
128
|
+
self.next_token() # Move past 'DO' to get to the action
|
129
|
+
block.append(self.current_token.value) # Add the action to the block
|
130
|
+
self.next_token() # Move to the next token, which could be a semicolon or the next action
|
131
|
+
if self.current_token.value == ";":
|
132
|
+
self.next_token() # Move past the semicolon
|
133
|
+
return block
|
134
|
+
|
135
|
+
def parse_if_statement(self):
|
136
|
+
"""Parses an IF statement.
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
IfNode: The parsed IF statement as an IfNode object.
|
140
|
+
|
141
|
+
Raises:
|
142
|
+
SyntaxError: If the IF statement is not properly formed.
|
143
|
+
"""
|
144
|
+
if self.current_token.type != "KEYWORD" or self.current_token.value != "IF":
|
145
|
+
raise SyntaxError("Expected IF statement")
|
146
|
+
self.next_token() # Skip 'IF'
|
147
|
+
|
148
|
+
condition = self.parse_expression() # Now properly ends after the semicolon
|
149
|
+
|
150
|
+
true_block = []
|
151
|
+
if self.current_token.value == "DO":
|
152
|
+
true_block = self.parse_if_block() # Parse true block after 'DO'
|
153
|
+
|
154
|
+
false_block = None
|
155
|
+
if self.current_token and self.current_token.value == "ELSE":
|
156
|
+
self.next_token() # Skip 'ELSE', expect 'DO' next for the false block
|
157
|
+
self.skip_semicolon()
|
158
|
+
if self.current_token.value != "DO":
|
159
|
+
raise SyntaxError("Expected 'DO' after 'ELSE'")
|
160
|
+
self.next_token() # Skip 'DO'
|
161
|
+
false_block = self.parse_if_block() # Parse false block
|
162
|
+
|
163
|
+
return IfNode(condition, true_block, false_block)
|
164
|
+
|
165
|
+
def parse_for_statement(self):
|
166
|
+
"""Parses a FOR statement.
|
167
|
+
|
168
|
+
Returns:
|
169
|
+
ForNode: The parsed FOR statement as a ForNode object.
|
170
|
+
|
171
|
+
Raises:
|
172
|
+
SyntaxError: If the FOR statement is not properly formed.
|
173
|
+
"""
|
174
|
+
if self.current_token.type != "KEYWORD" or self.current_token.value != "FOR":
|
175
|
+
raise SyntaxError("Expected FOR statement")
|
176
|
+
self.next_token() # Skip 'FOR'
|
177
|
+
|
178
|
+
# Parse the iterator variable
|
179
|
+
if self.current_token.type != "IDENTIFIER":
|
180
|
+
raise SyntaxError("Expected iterator variable after FOR")
|
181
|
+
iterator = self.current_token.value
|
182
|
+
self.next_token() # Move past the iterator variable
|
183
|
+
|
184
|
+
# Expect and skip 'IN' keyword
|
185
|
+
if self.current_token.type != "KEYWORD" or self.current_token.value != "IN":
|
186
|
+
raise SyntaxError("Expected 'IN' after iterator variable")
|
187
|
+
self.next_token() # Move past 'IN'
|
188
|
+
|
189
|
+
# Parse the collection
|
190
|
+
if self.current_token.type not in ["IDENTIFIER", "LITERAL"]:
|
191
|
+
raise SyntaxError("Expected collection after 'IN'")
|
192
|
+
collection = self.current_token.value
|
193
|
+
self.next_token() # Move past the collection
|
194
|
+
|
195
|
+
# Now, parse the block of statements to execute
|
196
|
+
true_block = self.parse_for_block()
|
197
|
+
|
198
|
+
# Construct and return a ForNode
|
199
|
+
return ForNode(iterator, collection, true_block)
|
200
|
+
|
201
|
+
def parse_for_block(self):
|
202
|
+
"""Parses a block of statements for a FOR loop.
|
203
|
+
|
204
|
+
Returns:
|
205
|
+
list: The parsed block of statements as a list of strings.
|
206
|
+
"""
|
207
|
+
block = []
|
208
|
+
# Skip initial 'DO' if present
|
209
|
+
if self.current_token and self.current_token.value == "DO":
|
210
|
+
self.next_token()
|
211
|
+
|
212
|
+
while self.current_token and self.current_token.value not in ("ENDFOR",):
|
213
|
+
if self.current_token.value == ";":
|
214
|
+
# If a semicolon is encountered, skip it and move to the next token
|
215
|
+
self.next_token()
|
216
|
+
continue
|
217
|
+
# Add the current token to the block unless it's a 'DO' or ';'
|
218
|
+
if self.current_token.value != "DO":
|
219
|
+
block.append(self.current_token.value)
|
220
|
+
self.next_token()
|
221
|
+
|
222
|
+
# The loop exits when 'ENDFOR' is encountered; move past it for subsequent parsing
|
223
|
+
self.next_token() # Skip 'ENDFOR'
|
224
|
+
return block
|
225
|
+
|
226
|
+
def parse_try_statement(self):
|
227
|
+
"""Parses a TRY statement.
|
228
|
+
|
229
|
+
Returns:
|
230
|
+
TryNode: The parsed TRY statement as a TryNode object.
|
231
|
+
|
232
|
+
Raises:
|
233
|
+
SyntaxError: If the TRY statement is not properly formed.
|
234
|
+
"""
|
235
|
+
if self.current_token.type != "KEYWORD" or self.current_token.value != "TRY":
|
236
|
+
raise SyntaxError("Expected TRY statement")
|
237
|
+
self.next_token() # Skip 'TRY'
|
238
|
+
|
239
|
+
try_block = self.parse_try_block("EXCEPT") # Parse the try block until 'EXCEPT'
|
240
|
+
|
241
|
+
# Now expecting 'EXCEPT' keyword
|
242
|
+
if not (self.current_token and self.current_token.value == "EXCEPT"):
|
243
|
+
raise SyntaxError("Expected 'EXCEPT' after try block")
|
244
|
+
self.next_token() # Move past 'EXCEPT'
|
245
|
+
|
246
|
+
except_block = self.parse_try_block(
|
247
|
+
"ENDTRY"
|
248
|
+
) # Parse the except block until 'ENDTRY'
|
249
|
+
|
250
|
+
# Ensure we are correctly positioned after 'ENDTRY'
|
251
|
+
if self.current_token and self.current_token.value != "ENDTRY":
|
252
|
+
raise SyntaxError("Expected 'ENDTRY' at the end of except block")
|
253
|
+
self.next_token() # Move past 'ENDTRY' for subsequent parsing
|
254
|
+
|
255
|
+
return TryNode(try_block, except_block)
|
256
|
+
|
257
|
+
def parse_try_block(self, stop_keyword):
|
258
|
+
"""Parses a block of statements for a TRY or EXCEPT clause.
|
259
|
+
|
260
|
+
Args:
|
261
|
+
stop_keyword (str): The keyword that indicates the end of the block.
|
262
|
+
|
263
|
+
Returns:
|
264
|
+
list: The parsed block of statements as a list of strings.
|
265
|
+
"""
|
266
|
+
block = []
|
267
|
+
while self.current_token and self.current_token.value != stop_keyword:
|
268
|
+
if self.current_token.value == "DO":
|
269
|
+
self.next_token() # Move past 'DO' to get to the action
|
270
|
+
elif self.current_token.value == ";":
|
271
|
+
self.next_token() # Move past the semicolon
|
272
|
+
continue # Skip adding ';' to the block
|
273
|
+
else:
|
274
|
+
block.append(self.current_token.value) # Add the action to the block
|
275
|
+
self.next_token()
|
276
|
+
|
277
|
+
return block
|
278
|
+
|
279
|
+
|
280
|
+
# "IF condition1 && condition2; DO action2; ELSE; DO action3; ENDIF;"
|
281
|
+
# "FOR input_ IN collections; DO action(input_); ENDFOR;"
|
282
|
+
# "TRY; DO action(); EXCEPT; DO action(input_); ENDTRY;"
|
File without changes
|