lionagi 0.0.312__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/__init__.py +61 -3
- lionagi/core/__init__.py +0 -14
- lionagi/core/_setting/_setting.py +59 -0
- lionagi/core/action/__init__.py +14 -0
- lionagi/core/action/function_calling.py +136 -0
- lionagi/core/action/manual.py +1 -0
- lionagi/core/action/node.py +109 -0
- lionagi/core/action/tool.py +114 -0
- lionagi/core/action/tool_manager.py +356 -0
- lionagi/core/agent/__init__.py +0 -3
- lionagi/core/agent/base_agent.py +45 -36
- lionagi/core/agent/eval/evaluator.py +1 -0
- lionagi/core/agent/eval/vote.py +40 -0
- lionagi/core/agent/learn/learner.py +59 -0
- lionagi/core/agent/plan/unit_template.py +1 -0
- lionagi/core/collections/__init__.py +17 -0
- lionagi/core/collections/_logger.py +319 -0
- lionagi/core/collections/abc/__init__.py +53 -0
- lionagi/core/collections/abc/component.py +615 -0
- lionagi/core/collections/abc/concepts.py +297 -0
- lionagi/core/collections/abc/exceptions.py +150 -0
- lionagi/core/collections/abc/util.py +45 -0
- lionagi/core/collections/exchange.py +161 -0
- lionagi/core/collections/flow.py +426 -0
- lionagi/core/collections/model.py +419 -0
- lionagi/core/collections/pile.py +913 -0
- lionagi/core/collections/progression.py +236 -0
- lionagi/core/collections/util.py +64 -0
- lionagi/core/director/direct.py +314 -0
- lionagi/core/director/director.py +2 -0
- lionagi/core/engine/branch_engine.py +333 -0
- lionagi/core/engine/instruction_map_engine.py +204 -0
- lionagi/core/engine/sandbox_.py +14 -0
- lionagi/core/engine/script_engine.py +99 -0
- lionagi/core/executor/base_executor.py +90 -0
- lionagi/core/executor/graph_executor.py +330 -0
- lionagi/core/executor/neo4j_executor.py +384 -0
- lionagi/core/generic/__init__.py +7 -0
- lionagi/core/generic/edge.py +112 -0
- lionagi/core/generic/edge_condition.py +16 -0
- lionagi/core/generic/graph.py +236 -0
- lionagi/core/generic/hyperedge.py +1 -0
- lionagi/core/generic/node.py +220 -0
- lionagi/core/generic/tree.py +48 -0
- lionagi/core/generic/tree_node.py +79 -0
- lionagi/core/mail/__init__.py +7 -3
- lionagi/core/mail/mail.py +25 -0
- lionagi/core/mail/mail_manager.py +142 -58
- lionagi/core/mail/package.py +45 -0
- lionagi/core/mail/start_mail.py +36 -0
- lionagi/core/message/__init__.py +19 -0
- lionagi/core/message/action_request.py +133 -0
- lionagi/core/message/action_response.py +135 -0
- lionagi/core/message/assistant_response.py +95 -0
- lionagi/core/message/instruction.py +234 -0
- lionagi/core/message/message.py +101 -0
- lionagi/core/message/system.py +86 -0
- lionagi/core/message/util.py +283 -0
- lionagi/core/report/__init__.py +4 -0
- lionagi/core/report/base.py +217 -0
- lionagi/core/report/form.py +231 -0
- lionagi/core/report/report.py +166 -0
- lionagi/core/report/util.py +28 -0
- lionagi/core/rule/__init__.py +0 -0
- lionagi/core/rule/_default.py +16 -0
- lionagi/core/rule/action.py +99 -0
- lionagi/core/rule/base.py +238 -0
- lionagi/core/rule/boolean.py +56 -0
- lionagi/core/rule/choice.py +47 -0
- lionagi/core/rule/mapping.py +96 -0
- lionagi/core/rule/number.py +71 -0
- lionagi/core/rule/rulebook.py +109 -0
- lionagi/core/rule/string.py +52 -0
- lionagi/core/rule/util.py +35 -0
- lionagi/core/session/__init__.py +0 -3
- lionagi/core/session/branch.py +431 -0
- lionagi/core/session/directive_mixin.py +287 -0
- lionagi/core/session/session.py +230 -902
- lionagi/core/structure/__init__.py +1 -0
- lionagi/core/structure/chain.py +1 -0
- lionagi/core/structure/forest.py +1 -0
- lionagi/core/structure/graph.py +1 -0
- lionagi/core/structure/tree.py +1 -0
- lionagi/core/unit/__init__.py +5 -0
- lionagi/core/unit/parallel_unit.py +245 -0
- lionagi/core/unit/template/__init__.py +0 -0
- lionagi/core/unit/template/action.py +81 -0
- lionagi/core/unit/template/base.py +51 -0
- lionagi/core/unit/template/plan.py +84 -0
- lionagi/core/unit/template/predict.py +109 -0
- lionagi/core/unit/template/score.py +124 -0
- lionagi/core/unit/template/select.py +104 -0
- lionagi/core/unit/unit.py +362 -0
- lionagi/core/unit/unit_form.py +305 -0
- lionagi/core/unit/unit_mixin.py +1168 -0
- lionagi/core/unit/util.py +71 -0
- lionagi/core/validator/__init__.py +0 -0
- lionagi/core/validator/validator.py +364 -0
- lionagi/core/work/__init__.py +0 -0
- lionagi/core/work/work.py +76 -0
- lionagi/core/work/work_function.py +101 -0
- lionagi/core/work/work_queue.py +103 -0
- lionagi/core/work/worker.py +258 -0
- lionagi/core/work/worklog.py +120 -0
- lionagi/experimental/__init__.py +0 -0
- lionagi/experimental/compressor/__init__.py +0 -0
- lionagi/experimental/compressor/base.py +46 -0
- lionagi/experimental/compressor/llm_compressor.py +247 -0
- lionagi/experimental/compressor/llm_summarizer.py +61 -0
- lionagi/experimental/compressor/util.py +70 -0
- lionagi/experimental/directive/__init__.py +19 -0
- lionagi/experimental/directive/parser/__init__.py +0 -0
- lionagi/experimental/directive/parser/base_parser.py +282 -0
- lionagi/experimental/directive/template/__init__.py +0 -0
- lionagi/experimental/directive/template/base_template.py +79 -0
- lionagi/experimental/directive/template/schema.py +36 -0
- lionagi/experimental/directive/tokenizer.py +73 -0
- lionagi/experimental/evaluator/__init__.py +0 -0
- lionagi/experimental/evaluator/ast_evaluator.py +131 -0
- lionagi/experimental/evaluator/base_evaluator.py +218 -0
- lionagi/experimental/knowledge/__init__.py +0 -0
- lionagi/experimental/knowledge/base.py +10 -0
- lionagi/experimental/knowledge/graph.py +0 -0
- lionagi/experimental/memory/__init__.py +0 -0
- lionagi/experimental/strategies/__init__.py +0 -0
- lionagi/experimental/strategies/base.py +1 -0
- lionagi/integrations/bridge/autogen_/__init__.py +0 -0
- lionagi/integrations/bridge/autogen_/autogen_.py +124 -0
- lionagi/integrations/bridge/langchain_/documents.py +4 -0
- lionagi/integrations/bridge/llamaindex_/index.py +30 -0
- lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +6 -0
- lionagi/integrations/bridge/llamaindex_/llama_pack.py +227 -0
- lionagi/integrations/bridge/llamaindex_/node_parser.py +6 -9
- lionagi/integrations/bridge/pydantic_/pydantic_bridge.py +1 -0
- lionagi/integrations/bridge/transformers_/__init__.py +0 -0
- lionagi/integrations/bridge/transformers_/install_.py +36 -0
- lionagi/integrations/chunker/__init__.py +0 -0
- lionagi/integrations/chunker/chunk.py +312 -0
- lionagi/integrations/config/oai_configs.py +38 -7
- lionagi/integrations/config/ollama_configs.py +1 -1
- lionagi/integrations/config/openrouter_configs.py +14 -2
- lionagi/integrations/loader/__init__.py +0 -0
- lionagi/integrations/loader/load.py +253 -0
- lionagi/integrations/loader/load_util.py +195 -0
- lionagi/integrations/provider/_mapping.py +46 -0
- lionagi/integrations/provider/litellm.py +2 -1
- lionagi/integrations/provider/mlx_service.py +16 -9
- lionagi/integrations/provider/oai.py +91 -4
- lionagi/integrations/provider/ollama.py +7 -6
- lionagi/integrations/provider/openrouter.py +115 -8
- lionagi/integrations/provider/services.py +2 -2
- lionagi/integrations/provider/transformers.py +18 -22
- lionagi/integrations/storage/__init__.py +3 -0
- lionagi/integrations/storage/neo4j.py +665 -0
- lionagi/integrations/storage/storage_util.py +287 -0
- lionagi/integrations/storage/structure_excel.py +285 -0
- lionagi/integrations/storage/to_csv.py +63 -0
- lionagi/integrations/storage/to_excel.py +83 -0
- lionagi/libs/__init__.py +26 -1
- lionagi/libs/ln_api.py +78 -23
- lionagi/libs/ln_context.py +37 -0
- lionagi/libs/ln_convert.py +21 -9
- lionagi/libs/ln_func_call.py +69 -28
- lionagi/libs/ln_image.py +107 -0
- lionagi/libs/ln_knowledge_graph.py +405 -0
- lionagi/libs/ln_nested.py +26 -11
- lionagi/libs/ln_parse.py +110 -14
- lionagi/libs/ln_queue.py +117 -0
- lionagi/libs/ln_tokenize.py +164 -0
- lionagi/{core/prompt/field_validator.py → libs/ln_validate.py} +79 -14
- lionagi/libs/special_tokens.py +172 -0
- lionagi/libs/sys_util.py +107 -2
- lionagi/lions/__init__.py +0 -0
- lionagi/lions/coder/__init__.py +0 -0
- lionagi/lions/coder/add_feature.py +20 -0
- lionagi/lions/coder/base_prompts.py +22 -0
- lionagi/lions/coder/code_form.py +13 -0
- lionagi/lions/coder/coder.py +168 -0
- lionagi/lions/coder/util.py +96 -0
- lionagi/lions/researcher/__init__.py +0 -0
- lionagi/lions/researcher/data_source/__init__.py +0 -0
- lionagi/lions/researcher/data_source/finhub_.py +191 -0
- lionagi/lions/researcher/data_source/google_.py +199 -0
- lionagi/lions/researcher/data_source/wiki_.py +96 -0
- lionagi/lions/researcher/data_source/yfinance_.py +21 -0
- lionagi/tests/integrations/__init__.py +0 -0
- lionagi/tests/libs/__init__.py +0 -0
- lionagi/tests/libs/test_field_validators.py +353 -0
- lionagi/tests/{test_libs → libs}/test_func_call.py +23 -21
- lionagi/tests/{test_libs → libs}/test_nested.py +36 -21
- lionagi/tests/{test_libs → libs}/test_parse.py +1 -1
- lionagi/tests/libs/test_queue.py +67 -0
- lionagi/tests/test_core/collections/__init__.py +0 -0
- lionagi/tests/test_core/collections/test_component.py +206 -0
- lionagi/tests/test_core/collections/test_exchange.py +138 -0
- lionagi/tests/test_core/collections/test_flow.py +145 -0
- lionagi/tests/test_core/collections/test_pile.py +171 -0
- lionagi/tests/test_core/collections/test_progression.py +129 -0
- lionagi/tests/test_core/generic/__init__.py +0 -0
- lionagi/tests/test_core/generic/test_edge.py +67 -0
- lionagi/tests/test_core/generic/test_graph.py +96 -0
- lionagi/tests/test_core/generic/test_node.py +106 -0
- lionagi/tests/test_core/generic/test_tree_node.py +73 -0
- lionagi/tests/test_core/test_branch.py +115 -292
- lionagi/tests/test_core/test_form.py +46 -0
- lionagi/tests/test_core/test_report.py +105 -0
- lionagi/tests/test_core/test_validator.py +111 -0
- lionagi/version.py +1 -1
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/LICENSE +12 -11
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/METADATA +19 -118
- lionagi-0.2.1.dist-info/RECORD +240 -0
- lionagi/core/branch/__init__.py +0 -4
- lionagi/core/branch/base_branch.py +0 -654
- lionagi/core/branch/branch.py +0 -471
- lionagi/core/branch/branch_flow_mixin.py +0 -96
- lionagi/core/branch/executable_branch.py +0 -347
- lionagi/core/branch/util.py +0 -323
- lionagi/core/direct/__init__.py +0 -6
- lionagi/core/direct/predict.py +0 -161
- lionagi/core/direct/score.py +0 -278
- lionagi/core/direct/select.py +0 -169
- lionagi/core/direct/utils.py +0 -87
- lionagi/core/direct/vote.py +0 -64
- lionagi/core/flow/base/baseflow.py +0 -23
- lionagi/core/flow/monoflow/ReAct.py +0 -238
- lionagi/core/flow/monoflow/__init__.py +0 -9
- lionagi/core/flow/monoflow/chat.py +0 -95
- lionagi/core/flow/monoflow/chat_mixin.py +0 -263
- lionagi/core/flow/monoflow/followup.py +0 -214
- lionagi/core/flow/polyflow/__init__.py +0 -1
- lionagi/core/flow/polyflow/chat.py +0 -248
- lionagi/core/mail/schema.py +0 -56
- lionagi/core/messages/__init__.py +0 -3
- lionagi/core/messages/schema.py +0 -533
- lionagi/core/prompt/prompt_template.py +0 -316
- lionagi/core/schema/__init__.py +0 -22
- lionagi/core/schema/action_node.py +0 -29
- lionagi/core/schema/base_mixin.py +0 -296
- lionagi/core/schema/base_node.py +0 -199
- lionagi/core/schema/condition.py +0 -24
- lionagi/core/schema/data_logger.py +0 -354
- lionagi/core/schema/data_node.py +0 -93
- lionagi/core/schema/prompt_template.py +0 -67
- lionagi/core/schema/structure.py +0 -910
- lionagi/core/tool/__init__.py +0 -3
- lionagi/core/tool/tool_manager.py +0 -280
- lionagi/integrations/bridge/pydantic_/base_model.py +0 -7
- lionagi/tests/test_core/test_base_branch.py +0 -427
- lionagi/tests/test_core/test_chat_flow.py +0 -63
- lionagi/tests/test_core/test_mail_manager.py +0 -75
- lionagi/tests/test_core/test_prompts.py +0 -51
- lionagi/tests/test_core/test_session.py +0 -254
- lionagi/tests/test_core/test_session_base_util.py +0 -312
- lionagi/tests/test_core/test_tool_manager.py +0 -95
- lionagi-0.0.312.dist-info/RECORD +0 -111
- /lionagi/core/{branch/base → _setting}/__init__.py +0 -0
- /lionagi/core/{flow → agent/eval}/__init__.py +0 -0
- /lionagi/core/{flow/base → agent/learn}/__init__.py +0 -0
- /lionagi/core/{prompt → agent/plan}/__init__.py +0 -0
- /lionagi/core/{tool/manual.py → agent/plan/plan.py} +0 -0
- /lionagi/{tests/test_integrations → core/director}/__init__.py +0 -0
- /lionagi/{tests/test_libs → core/engine}/__init__.py +0 -0
- /lionagi/{tests/test_libs/test_async.py → core/executor/__init__.py} +0 -0
- /lionagi/tests/{test_libs → libs}/test_api.py +0 -0
- /lionagi/tests/{test_libs → libs}/test_convert.py +0 -0
- /lionagi/tests/{test_libs → libs}/test_sys_util.py +0 -0
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/WHEEL +0 -0
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,247 @@
|
|
1
|
+
import asyncio
|
2
|
+
from lionagi import alcall
|
3
|
+
from lionagi.libs.ln_convert import to_list
|
4
|
+
import numpy as np
|
5
|
+
from lionagi.core.collections import iModel
|
6
|
+
from .base import TokenCompressor
|
7
|
+
from lionagi.libs.ln_tokenize import TokenizeUtil
|
8
|
+
from time import time
|
9
|
+
|
10
|
+
# inspired by LLMLingua, MIT License, Copyright (c) Microsoft Corporation.
|
11
|
+
# https://github.com/microsoft/LLMLingua
|
12
|
+
|
13
|
+
|
14
|
+
class LLMCompressor(TokenCompressor):
|
15
|
+
|
16
|
+
def __init__(
|
17
|
+
self,
|
18
|
+
imodel: iModel = None,
|
19
|
+
system_msg=None,
|
20
|
+
tokenizer=None, # must be a callable or object with a tokenize method
|
21
|
+
splitter=None, # must be a callable or object with a split/chunk/segment method
|
22
|
+
target_ratio=0.2,
|
23
|
+
n_samples=5, # the cumulative samples to take in each perplexity calculation
|
24
|
+
chunk_size=64,
|
25
|
+
max_tokens_per_sample=80,
|
26
|
+
min_compression_score=0, # (0-1) the minimum score to consider for compression, 0 means all
|
27
|
+
split_overlap=0,
|
28
|
+
split_threshold=0,
|
29
|
+
verbose=True,
|
30
|
+
):
|
31
|
+
imodel = imodel or iModel(model="gpt-3.5-turbo", temperature=0.3)
|
32
|
+
super().__init__(imodel=imodel, tokenizer=tokenizer, splitter=splitter)
|
33
|
+
self.system_msg = (
|
34
|
+
system_msg
|
35
|
+
or "Concisely summarize and compress the information for storage:"
|
36
|
+
)
|
37
|
+
self.target_ratio = target_ratio
|
38
|
+
self.n_samples = n_samples
|
39
|
+
self.chunk_size = chunk_size
|
40
|
+
self.max_tokens_per_sample = max_tokens_per_sample
|
41
|
+
self.min_compression_score = min_compression_score
|
42
|
+
self.verbose = verbose
|
43
|
+
self.split_overlap = split_overlap
|
44
|
+
self.split_threshold = split_threshold
|
45
|
+
|
46
|
+
def tokenize(self, text, encoding_name=None, return_byte=False, **kwargs):
|
47
|
+
"""
|
48
|
+
by default you can use `encoding_name` to be one of,
|
49
|
+
['gpt2', 'r50k_base', 'p50k_base', 'p50k_edit', 'cl100k_base', 'o200k_base']
|
50
|
+
|
51
|
+
or you can use `encoding_model` that tiktoken supports in their mapping such as "gpt-4o"
|
52
|
+
"""
|
53
|
+
if not self.tokenizer:
|
54
|
+
return TokenizeUtil.tokenize(
|
55
|
+
text,
|
56
|
+
encoding_model=self.imodel.iModel_name,
|
57
|
+
encoding_name=encoding_name,
|
58
|
+
return_byte=return_byte,
|
59
|
+
)
|
60
|
+
|
61
|
+
if hasattr(self.tokenizer, "tokenize"):
|
62
|
+
return self.tokenizer.tokenize(text, **kwargs)
|
63
|
+
|
64
|
+
return self.tokenizer(text, **kwargs)
|
65
|
+
|
66
|
+
def split(
|
67
|
+
self,
|
68
|
+
text,
|
69
|
+
chunk_size=None,
|
70
|
+
overlap=None,
|
71
|
+
threshold=None,
|
72
|
+
by_chars=False,
|
73
|
+
return_tokens=False,
|
74
|
+
return_byte=False,
|
75
|
+
**kwargs,
|
76
|
+
):
|
77
|
+
if not self.splitter:
|
78
|
+
splitter = (
|
79
|
+
TokenizeUtil.chunk_by_chars
|
80
|
+
if by_chars
|
81
|
+
else TokenizeUtil.chunk_by_tokens
|
82
|
+
)
|
83
|
+
return splitter(
|
84
|
+
text,
|
85
|
+
chunk_size or self.chunk_size,
|
86
|
+
overlap or self.split_overlap,
|
87
|
+
threshold or self.split_threshold,
|
88
|
+
return_tokens=return_tokens,
|
89
|
+
return_byte=return_byte,
|
90
|
+
)
|
91
|
+
|
92
|
+
a = [
|
93
|
+
getattr(self.splitter, i, None)
|
94
|
+
for i in ["split", "chunk", "segment"]
|
95
|
+
if i is not None
|
96
|
+
][0]
|
97
|
+
a = getattr(self.splitter, a)
|
98
|
+
return a(text, **kwargs)
|
99
|
+
|
100
|
+
async def rank_by_pplex(
|
101
|
+
self, items: list, initial_text=None, cumulative=False, n_samples=None, **kwargs
|
102
|
+
):
|
103
|
+
"""
|
104
|
+
rank a list of items according to their perplexity
|
105
|
+
an item can be a single token or a list of tokens
|
106
|
+
|
107
|
+
kwargs: additional arguments to pass to the model
|
108
|
+
"""
|
109
|
+
|
110
|
+
async def _get_item_perplexity(item):
|
111
|
+
item = item if isinstance(item, list) else [item]
|
112
|
+
item = (
|
113
|
+
item[: self.max_tokens_per_sample]
|
114
|
+
if len(item) > self.max_tokens_per_sample
|
115
|
+
else item
|
116
|
+
)
|
117
|
+
return await self.imodel.compute_perplexity(
|
118
|
+
initial_context=initial_text,
|
119
|
+
tokens=item,
|
120
|
+
n_samples=n_samples or self.n_samples,
|
121
|
+
system_msg=self.system_msg,
|
122
|
+
**kwargs,
|
123
|
+
)
|
124
|
+
|
125
|
+
if not isinstance(items, list):
|
126
|
+
items = self.tokenize(items)
|
127
|
+
|
128
|
+
if len(items) == 1:
|
129
|
+
return [items] # no need to rank a single item
|
130
|
+
|
131
|
+
_segments = []
|
132
|
+
_context = initial_text or ""
|
133
|
+
_task = []
|
134
|
+
|
135
|
+
if cumulative:
|
136
|
+
for i in items:
|
137
|
+
if isinstance(i, list):
|
138
|
+
_context += " " + " ".join(i).strip()
|
139
|
+
else:
|
140
|
+
_context += " " + i.strip()
|
141
|
+
|
142
|
+
_segments.append(_context)
|
143
|
+
else:
|
144
|
+
_segments = items
|
145
|
+
|
146
|
+
for i in _segments:
|
147
|
+
_task.append(asyncio.create_task(_get_item_perplexity(i)))
|
148
|
+
|
149
|
+
results = await asyncio.gather(*_task)
|
150
|
+
results = [(item, pplex) for item, pplex in zip(items, results)]
|
151
|
+
return sorted(results, key=lambda x: x[1]["logprobs"], reverse=True)
|
152
|
+
|
153
|
+
async def compress(
|
154
|
+
self,
|
155
|
+
text,
|
156
|
+
target_ratio=None,
|
157
|
+
initial_text=None,
|
158
|
+
cumulative=False,
|
159
|
+
split_kwargs=None,
|
160
|
+
split_overlap=None,
|
161
|
+
split_threshold=None,
|
162
|
+
rank_by="perplexity",
|
163
|
+
min_compression_score=None,
|
164
|
+
verbose=True,
|
165
|
+
**kwargs,
|
166
|
+
):
|
167
|
+
start = time()
|
168
|
+
if split_kwargs is None:
|
169
|
+
split_kwargs = {}
|
170
|
+
split_kwargs["chunk_size"] = self.max_tokens_per_sample
|
171
|
+
split_kwargs["overlap"] = split_overlap or 0
|
172
|
+
split_kwargs["threshold"] = split_threshold or 0
|
173
|
+
|
174
|
+
len_tokens = len(self.tokenize(text))
|
175
|
+
|
176
|
+
items = self.split(text, return_tokens=True, **split_kwargs)
|
177
|
+
|
178
|
+
if rank_by == "perplexity":
|
179
|
+
ranked_items = await self.rank_by_pplex(
|
180
|
+
items=items, initial_text=initial_text, cumulative=cumulative, **kwargs
|
181
|
+
)
|
182
|
+
|
183
|
+
prompt_tokens = sum([i[1]["num_prompt_tokens"] for i in ranked_items])
|
184
|
+
|
185
|
+
num_completion_tokens = sum(
|
186
|
+
[i[1]["num_completion_tokens"] for i in ranked_items]
|
187
|
+
)
|
188
|
+
|
189
|
+
price = (
|
190
|
+
prompt_tokens * 0.5 / 1000000 + num_completion_tokens * 1.5 / 1000000
|
191
|
+
)
|
192
|
+
|
193
|
+
selected_items = self.select_by_pplex(
|
194
|
+
ranked_items=ranked_items,
|
195
|
+
target_compression_ratio=target_ratio or self.target_ratio,
|
196
|
+
original_length=len_tokens,
|
197
|
+
min_pplex=min_compression_score or self.min_compression_score,
|
198
|
+
)
|
199
|
+
|
200
|
+
if verbose:
|
201
|
+
msg = ""
|
202
|
+
msg += f"Original Token number: {len_tokens}\n"
|
203
|
+
|
204
|
+
def _f(i):
|
205
|
+
if isinstance(i, str):
|
206
|
+
i = self.tokenize(i)
|
207
|
+
|
208
|
+
if isinstance(i, list):
|
209
|
+
return len(to_list(i, dropna=True, flatten=True))
|
210
|
+
|
211
|
+
len_ = sum([_f(i) for i in selected_items])
|
212
|
+
msg += f"Selected Token number: {len_}\n"
|
213
|
+
msg += f"Token Compression Ratio: {len_ / len_tokens:.03f}\n"
|
214
|
+
msg += f"Compression Time: {time() - start:.04f} seconds\n"
|
215
|
+
msg += f"Compression Model: {self.imodel.iModel_name}\n"
|
216
|
+
msg += f"Compression Method: {rank_by}\n"
|
217
|
+
msg += f"Compression Usage: ${price:.05f}\n"
|
218
|
+
print(msg)
|
219
|
+
|
220
|
+
a = "".join([i.strip() for i in selected_items]).strip()
|
221
|
+
a = a.replace("\n\n", "")
|
222
|
+
return a
|
223
|
+
|
224
|
+
raise ValueError(f"Ranking method {rank_by} is not supported")
|
225
|
+
|
226
|
+
def select_by_pplex(
|
227
|
+
self, ranked_items, target_compression_ratio, original_length, min_pplex=None
|
228
|
+
):
|
229
|
+
min_pplex = min_pplex or 0
|
230
|
+
|
231
|
+
desired_length = int(original_length * target_compression_ratio)
|
232
|
+
|
233
|
+
items = []
|
234
|
+
current_length = 0
|
235
|
+
|
236
|
+
for item, info in ranked_items:
|
237
|
+
if info["perplexity"] > min_pplex:
|
238
|
+
item = self.tokenize(item) if isinstance(item, str) else item
|
239
|
+
item = item if isinstance(item, list) else [item]
|
240
|
+
item = to_list(item, dropna=True, flatten=True)
|
241
|
+
if current_length + len(item) > desired_length:
|
242
|
+
break
|
243
|
+
else:
|
244
|
+
current_length += len(item)
|
245
|
+
items.append("".join(item))
|
246
|
+
|
247
|
+
return items
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# from lionagi.core.collections import iModel
|
2
|
+
# from .base import TokenCompressor
|
3
|
+
|
4
|
+
|
5
|
+
# class LLMSummarizer(TokenCompressor):
|
6
|
+
|
7
|
+
# def __init__(
|
8
|
+
# self, imodel: iModel = None, system_msg=None, tokenizer=None, splitter=None,
|
9
|
+
# max_tokens=25, target_ratio=0.3
|
10
|
+
# ):
|
11
|
+
# imodel = imodel or iModel(model="gpt-3.5-turbo", max_tokens=max_tokens)
|
12
|
+
# super().__init__(imodel=imodel, tokenizer=tokenizer, splitter=splitter)
|
13
|
+
# self.system_msg = (
|
14
|
+
# system_msg
|
15
|
+
# or "Summarize the following sentence to be concise and informative:"
|
16
|
+
# )
|
17
|
+
# self.target_ratio = target_ratio
|
18
|
+
|
19
|
+
# async def summarize_sentence(self, sentence, **kwargs):
|
20
|
+
# messages = [
|
21
|
+
# {"role": "system", "content": self.system_msg},
|
22
|
+
# {"role": "user", "content": sentence},
|
23
|
+
# ]
|
24
|
+
# response = await self.imodel.call_chat_completion(messages, **kwargs)
|
25
|
+
# return response["choices"][0]["message"]["content"]
|
26
|
+
|
27
|
+
# def tokenize(self, text):
|
28
|
+
# tokenize_func = self.tokenizer or tokenize
|
29
|
+
# return tokenize_func(text)
|
30
|
+
|
31
|
+
# def split(self, text):
|
32
|
+
# split_func = self.splitter or split_into_segments
|
33
|
+
# return split_func(text)
|
34
|
+
|
35
|
+
# # Function to enforce maximum sentence length
|
36
|
+
# def enforce_max_sentence_length(self, sentence, max_words=25):
|
37
|
+
# words = self.tokenize(sentence)
|
38
|
+
# if len(words) > max_words:
|
39
|
+
# sentence = ' '.join(words[:max_words])
|
40
|
+
# return sentence
|
41
|
+
|
42
|
+
# async def summarize_text(self, text, max_length_per_sentence=25, target_ratio=None, **kwargs):
|
43
|
+
# sentences = self.split(text)
|
44
|
+
# summarized = await alcall(
|
45
|
+
# sentences, self.summarize_sentence, **kwargs
|
46
|
+
# )
|
47
|
+
# summarized = [
|
48
|
+
# self.enforce_max_sentence_length(sentence, max_length_per_sentence)
|
49
|
+
# for sentence in summarized
|
50
|
+
# ]
|
51
|
+
|
52
|
+
# original_length = len(self.tokenize(text))
|
53
|
+
# summarized_length = len(self.tokenize(' '.join(summarized)))
|
54
|
+
# current_ratio = summarized_length / original_length
|
55
|
+
|
56
|
+
# target_ratio = target_ratio or self.target_ratio
|
57
|
+
# if current_ratio > target_ratio:
|
58
|
+
# words_to_remove = int((current_ratio - target_ratio) * original_length)
|
59
|
+
# return ' '.join(summarized[:-words_to_remove])
|
60
|
+
|
61
|
+
# return ' '.join(summarized)
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# import asyncio
|
2
|
+
# from lionagi import alcall
|
3
|
+
# from lionagi.libs.ln_convert import to_list
|
4
|
+
# import numpy as np
|
5
|
+
|
6
|
+
# def split_into_segments(text):
|
7
|
+
# segments = text.split(".") # Splitting by period followed by a space
|
8
|
+
# return [segment.strip() for segment in segments if segment]
|
9
|
+
|
10
|
+
# # Tokenize the segment
|
11
|
+
# def tokenize(segment):
|
12
|
+
# tokens = segment.split() # Simple space-based tokenization
|
13
|
+
# return tokens
|
14
|
+
|
15
|
+
# async def calculate_perplexity(system_msg: str, imodel, tokens, initial_context=None, **kwargs):
|
16
|
+
# _tasks = []
|
17
|
+
# _context = initial_context or ""
|
18
|
+
# for i in range(len(tokens)):
|
19
|
+
# _context += " " + tokens[i]
|
20
|
+
# messages = [
|
21
|
+
# {"role": "system", "content": system_msg},
|
22
|
+
# {"role": "user", "content": _context},
|
23
|
+
# ]
|
24
|
+
# task = asyncio.create_task(
|
25
|
+
# imodel.call_chat_completion(
|
26
|
+
# messages=messages, logprobs=True, max_tokens=1, **kwargs
|
27
|
+
# )
|
28
|
+
# )
|
29
|
+
# _tasks.append(task)
|
30
|
+
|
31
|
+
# results = await asyncio.gather(*_tasks)
|
32
|
+
# logprobs = [
|
33
|
+
# result[1]["choices"][0]["logprobs"]["content"] for result in results
|
34
|
+
# ]
|
35
|
+
# logprobs = to_list(logprobs, flatten=True, dropna=True)
|
36
|
+
# logprobs = [lprob_["logprob"] for lprob_ in logprobs]
|
37
|
+
# return np.exp(np.mean(logprobs))
|
38
|
+
|
39
|
+
# async def rank_by_perplexity(
|
40
|
+
# text: str | list[str] = None, # if list we assume they are already well split
|
41
|
+
# initial_text=None,
|
42
|
+
|
43
|
+
# segments,
|
44
|
+
# initial_text=None,
|
45
|
+
# cumulative=False,
|
46
|
+
# **kwargs
|
47
|
+
# ):
|
48
|
+
# _segments = []
|
49
|
+
# _context = initial_text or ""
|
50
|
+
# _task = []
|
51
|
+
|
52
|
+
# if cumulative:
|
53
|
+
# for i in range(1, len(segments)):
|
54
|
+
# _context += " " + segments[i - 1]
|
55
|
+
# _segments.append(_context)
|
56
|
+
# else:
|
57
|
+
# _segments = segments
|
58
|
+
|
59
|
+
# for i in segments:
|
60
|
+
# _task.append(asyncio.create_task(
|
61
|
+
# calculate_perplexity(
|
62
|
+
# self.system_msg, self.imodel, self.tokenize(i), **kwargs)
|
63
|
+
# )
|
64
|
+
# )
|
65
|
+
# segment_perplexities = await asyncio.gather(*_task)
|
66
|
+
|
67
|
+
# return {
|
68
|
+
# segment: perplexity
|
69
|
+
# for segment, perplexity in zip(segments, segment_perplexities)
|
70
|
+
# }
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# from ..form.predict import predict
|
2
|
+
# from .select import select
|
3
|
+
# from ..form.score import score
|
4
|
+
# from ..form.react import react
|
5
|
+
# from .vote import vote
|
6
|
+
# from ..form.plan import plan
|
7
|
+
# from .cot import chain_of_thoughts, chain_of_react
|
8
|
+
|
9
|
+
|
10
|
+
# __all__ = [
|
11
|
+
# "predict",
|
12
|
+
# "select",
|
13
|
+
# "score",
|
14
|
+
# "vote",
|
15
|
+
# "react",
|
16
|
+
# "plan",
|
17
|
+
# "chain_of_thoughts",
|
18
|
+
# "chain_of_react",
|
19
|
+
# ]
|
File without changes
|
@@ -0,0 +1,282 @@
|
|
1
|
+
"""
|
2
|
+
Copyright 2024 HaiyangLi
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
"""
|
16
|
+
|
17
|
+
from typing import List, Optional
|
18
|
+
|
19
|
+
from lionagi.experimental.directive.tokenizer import BaseToken
|
20
|
+
from ..template.schema import IfNode, TryNode, ForNode
|
21
|
+
|
22
|
+
|
23
|
+
class BaseDirectiveParser:
|
24
|
+
"""A base parser with lookahead, error recovery, and backtracking support.
|
25
|
+
|
26
|
+
Attributes:
|
27
|
+
tokens (List[BaseToken]): A list of tokens to be parsed.
|
28
|
+
current_token_index (int): The index of the current token in the tokens list.
|
29
|
+
current_token (Optional[BaseToken]): The current token being processed.
|
30
|
+
|
31
|
+
Examples:
|
32
|
+
>>> tokenizer = BaseTokenizer("IF x > 10 THEN DO something ENDIF")
|
33
|
+
>>> tokens = tokenizer.get_tokens()
|
34
|
+
>>> parser = BaseParser(tokens)
|
35
|
+
>>> print(parser.current_token)
|
36
|
+
BaseToken(KEYWORD, IF)
|
37
|
+
"""
|
38
|
+
|
39
|
+
def __init__(self, tokens: List[BaseToken]):
|
40
|
+
self.tokens = tokens
|
41
|
+
self.current_token_index = -1
|
42
|
+
self.current_token: Optional[BaseToken] = None
|
43
|
+
self.next_token()
|
44
|
+
|
45
|
+
def next_token(self) -> None:
|
46
|
+
"""Advances to the next token in the list."""
|
47
|
+
self.current_token_index += 1
|
48
|
+
if self.current_token_index < len(self.tokens):
|
49
|
+
self.current_token = self.tokens[self.current_token_index]
|
50
|
+
else:
|
51
|
+
self.current_token = None
|
52
|
+
|
53
|
+
def peek_next_token(self, offset: int = 1) -> BaseToken | None:
|
54
|
+
"""Peeks at the next token without consuming it.
|
55
|
+
|
56
|
+
Args:
|
57
|
+
offset (int): The number of tokens to look ahead.
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
Optional[BaseToken]: The token at the specified lookahead offset, or None if end of list.
|
61
|
+
"""
|
62
|
+
peek_index = self.current_token_index + offset
|
63
|
+
if peek_index < len(self.tokens):
|
64
|
+
return self.tokens[peek_index]
|
65
|
+
else:
|
66
|
+
return None
|
67
|
+
|
68
|
+
def skip_until(self, token_types: List[str]) -> None:
|
69
|
+
"""Skips tokens until a token of the specified type is found.
|
70
|
+
|
71
|
+
Args:
|
72
|
+
token_types (List[str]): A list of token types to stop skipping.
|
73
|
+
"""
|
74
|
+
while self.current_token and self.current_token.type not in token_types:
|
75
|
+
self.next_token()
|
76
|
+
|
77
|
+
def mark(self) -> int:
|
78
|
+
"""Marks the current position in the token list for potential backtracking.
|
79
|
+
|
80
|
+
Returns:
|
81
|
+
int: The current token index.
|
82
|
+
"""
|
83
|
+
return self.current_token_index
|
84
|
+
|
85
|
+
def reset_to_mark(self, mark: int) -> None:
|
86
|
+
"""Resets the parser to a previously marked position.
|
87
|
+
|
88
|
+
Args:
|
89
|
+
mark (int): The token index to reset to.
|
90
|
+
"""
|
91
|
+
self.current_token_index = mark - 1
|
92
|
+
self.next_token()
|
93
|
+
|
94
|
+
def skip_semicolon(self):
|
95
|
+
"""Skips a semicolon token if it is the current token."""
|
96
|
+
if self.current_token and self.current_token.value == ";":
|
97
|
+
self.next_token()
|
98
|
+
|
99
|
+
def parse_expression(self):
|
100
|
+
"""Parses an expression until a semicolon is encountered.
|
101
|
+
|
102
|
+
Returns:
|
103
|
+
str: The parsed expression as a string.
|
104
|
+
|
105
|
+
Raises:
|
106
|
+
SyntaxError: If a semicolon is not found at the end of the expression.
|
107
|
+
"""
|
108
|
+
expr = ""
|
109
|
+
while self.current_token and self.current_token.value != ";":
|
110
|
+
expr += self.current_token.value + " "
|
111
|
+
self.next_token()
|
112
|
+
# Expecting a semicolon at the end of the condition
|
113
|
+
if self.current_token.value != ";":
|
114
|
+
raise SyntaxError("Expected ';' at the end of the condition")
|
115
|
+
self.next_token() # Move past the semicolon to the next part of the statement
|
116
|
+
return expr.strip()
|
117
|
+
|
118
|
+
def parse_if_block(self):
|
119
|
+
"""Parses a block of statements for an IF condition.
|
120
|
+
|
121
|
+
Returns:
|
122
|
+
list: The parsed block of statements as a list of strings.
|
123
|
+
"""
|
124
|
+
block = []
|
125
|
+
# Parse the block until 'ELSE', 'ENDIF', ensuring not to include semicolons as part of the block
|
126
|
+
while self.current_token and self.current_token.value not in ("ENDIF", "ELSE"):
|
127
|
+
if self.current_token.value == "DO":
|
128
|
+
self.next_token() # Move past 'DO' to get to the action
|
129
|
+
block.append(self.current_token.value) # Add the action to the block
|
130
|
+
self.next_token() # Move to the next token, which could be a semicolon or the next action
|
131
|
+
if self.current_token.value == ";":
|
132
|
+
self.next_token() # Move past the semicolon
|
133
|
+
return block
|
134
|
+
|
135
|
+
def parse_if_statement(self):
|
136
|
+
"""Parses an IF statement.
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
IfNode: The parsed IF statement as an IfNode object.
|
140
|
+
|
141
|
+
Raises:
|
142
|
+
SyntaxError: If the IF statement is not properly formed.
|
143
|
+
"""
|
144
|
+
if self.current_token.type != "KEYWORD" or self.current_token.value != "IF":
|
145
|
+
raise SyntaxError("Expected IF statement")
|
146
|
+
self.next_token() # Skip 'IF'
|
147
|
+
|
148
|
+
condition = self.parse_expression() # Now properly ends after the semicolon
|
149
|
+
|
150
|
+
true_block = []
|
151
|
+
if self.current_token.value == "DO":
|
152
|
+
true_block = self.parse_if_block() # Parse true block after 'DO'
|
153
|
+
|
154
|
+
false_block = None
|
155
|
+
if self.current_token and self.current_token.value == "ELSE":
|
156
|
+
self.next_token() # Skip 'ELSE', expect 'DO' next for the false block
|
157
|
+
self.skip_semicolon()
|
158
|
+
if self.current_token.value != "DO":
|
159
|
+
raise SyntaxError("Expected 'DO' after 'ELSE'")
|
160
|
+
self.next_token() # Skip 'DO'
|
161
|
+
false_block = self.parse_if_block() # Parse false block
|
162
|
+
|
163
|
+
return IfNode(condition, true_block, false_block)
|
164
|
+
|
165
|
+
def parse_for_statement(self):
|
166
|
+
"""Parses a FOR statement.
|
167
|
+
|
168
|
+
Returns:
|
169
|
+
ForNode: The parsed FOR statement as a ForNode object.
|
170
|
+
|
171
|
+
Raises:
|
172
|
+
SyntaxError: If the FOR statement is not properly formed.
|
173
|
+
"""
|
174
|
+
if self.current_token.type != "KEYWORD" or self.current_token.value != "FOR":
|
175
|
+
raise SyntaxError("Expected FOR statement")
|
176
|
+
self.next_token() # Skip 'FOR'
|
177
|
+
|
178
|
+
# Parse the iterator variable
|
179
|
+
if self.current_token.type != "IDENTIFIER":
|
180
|
+
raise SyntaxError("Expected iterator variable after FOR")
|
181
|
+
iterator = self.current_token.value
|
182
|
+
self.next_token() # Move past the iterator variable
|
183
|
+
|
184
|
+
# Expect and skip 'IN' keyword
|
185
|
+
if self.current_token.type != "KEYWORD" or self.current_token.value != "IN":
|
186
|
+
raise SyntaxError("Expected 'IN' after iterator variable")
|
187
|
+
self.next_token() # Move past 'IN'
|
188
|
+
|
189
|
+
# Parse the collection
|
190
|
+
if self.current_token.type not in ["IDENTIFIER", "LITERAL"]:
|
191
|
+
raise SyntaxError("Expected collection after 'IN'")
|
192
|
+
collection = self.current_token.value
|
193
|
+
self.next_token() # Move past the collection
|
194
|
+
|
195
|
+
# Now, parse the block of statements to execute
|
196
|
+
true_block = self.parse_for_block()
|
197
|
+
|
198
|
+
# Construct and return a ForNode
|
199
|
+
return ForNode(iterator, collection, true_block)
|
200
|
+
|
201
|
+
def parse_for_block(self):
|
202
|
+
"""Parses a block of statements for a FOR loop.
|
203
|
+
|
204
|
+
Returns:
|
205
|
+
list: The parsed block of statements as a list of strings.
|
206
|
+
"""
|
207
|
+
block = []
|
208
|
+
# Skip initial 'DO' if present
|
209
|
+
if self.current_token and self.current_token.value == "DO":
|
210
|
+
self.next_token()
|
211
|
+
|
212
|
+
while self.current_token and self.current_token.value not in ("ENDFOR",):
|
213
|
+
if self.current_token.value == ";":
|
214
|
+
# If a semicolon is encountered, skip it and move to the next token
|
215
|
+
self.next_token()
|
216
|
+
continue
|
217
|
+
# Add the current token to the block unless it's a 'DO' or ';'
|
218
|
+
if self.current_token.value != "DO":
|
219
|
+
block.append(self.current_token.value)
|
220
|
+
self.next_token()
|
221
|
+
|
222
|
+
# The loop exits when 'ENDFOR' is encountered; move past it for subsequent parsing
|
223
|
+
self.next_token() # Skip 'ENDFOR'
|
224
|
+
return block
|
225
|
+
|
226
|
+
def parse_try_statement(self):
|
227
|
+
"""Parses a TRY statement.
|
228
|
+
|
229
|
+
Returns:
|
230
|
+
TryNode: The parsed TRY statement as a TryNode object.
|
231
|
+
|
232
|
+
Raises:
|
233
|
+
SyntaxError: If the TRY statement is not properly formed.
|
234
|
+
"""
|
235
|
+
if self.current_token.type != "KEYWORD" or self.current_token.value != "TRY":
|
236
|
+
raise SyntaxError("Expected TRY statement")
|
237
|
+
self.next_token() # Skip 'TRY'
|
238
|
+
|
239
|
+
try_block = self.parse_try_block("EXCEPT") # Parse the try block until 'EXCEPT'
|
240
|
+
|
241
|
+
# Now expecting 'EXCEPT' keyword
|
242
|
+
if not (self.current_token and self.current_token.value == "EXCEPT"):
|
243
|
+
raise SyntaxError("Expected 'EXCEPT' after try block")
|
244
|
+
self.next_token() # Move past 'EXCEPT'
|
245
|
+
|
246
|
+
except_block = self.parse_try_block(
|
247
|
+
"ENDTRY"
|
248
|
+
) # Parse the except block until 'ENDTRY'
|
249
|
+
|
250
|
+
# Ensure we are correctly positioned after 'ENDTRY'
|
251
|
+
if self.current_token and self.current_token.value != "ENDTRY":
|
252
|
+
raise SyntaxError("Expected 'ENDTRY' at the end of except block")
|
253
|
+
self.next_token() # Move past 'ENDTRY' for subsequent parsing
|
254
|
+
|
255
|
+
return TryNode(try_block, except_block)
|
256
|
+
|
257
|
+
def parse_try_block(self, stop_keyword):
|
258
|
+
"""Parses a block of statements for a TRY or EXCEPT clause.
|
259
|
+
|
260
|
+
Args:
|
261
|
+
stop_keyword (str): The keyword that indicates the end of the block.
|
262
|
+
|
263
|
+
Returns:
|
264
|
+
list: The parsed block of statements as a list of strings.
|
265
|
+
"""
|
266
|
+
block = []
|
267
|
+
while self.current_token and self.current_token.value != stop_keyword:
|
268
|
+
if self.current_token.value == "DO":
|
269
|
+
self.next_token() # Move past 'DO' to get to the action
|
270
|
+
elif self.current_token.value == ";":
|
271
|
+
self.next_token() # Move past the semicolon
|
272
|
+
continue # Skip adding ';' to the block
|
273
|
+
else:
|
274
|
+
block.append(self.current_token.value) # Add the action to the block
|
275
|
+
self.next_token()
|
276
|
+
|
277
|
+
return block
|
278
|
+
|
279
|
+
|
280
|
+
# "IF condition1 && condition2; DO action2; ELSE; DO action3; ENDIF;"
|
281
|
+
# "FOR input_ IN collections; DO action(input_); ENDFOR;"
|
282
|
+
# "TRY; DO action(); EXCEPT; DO action(input_); ENDTRY;"
|
File without changes
|