lionagi 0.0.312__py3-none-any.whl → 0.2.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (268) hide show
  1. lionagi/__init__.py +61 -3
  2. lionagi/core/__init__.py +0 -14
  3. lionagi/core/_setting/_setting.py +59 -0
  4. lionagi/core/action/__init__.py +14 -0
  5. lionagi/core/action/function_calling.py +136 -0
  6. lionagi/core/action/manual.py +1 -0
  7. lionagi/core/action/node.py +109 -0
  8. lionagi/core/action/tool.py +114 -0
  9. lionagi/core/action/tool_manager.py +356 -0
  10. lionagi/core/agent/__init__.py +0 -3
  11. lionagi/core/agent/base_agent.py +45 -36
  12. lionagi/core/agent/eval/evaluator.py +1 -0
  13. lionagi/core/agent/eval/vote.py +40 -0
  14. lionagi/core/agent/learn/learner.py +59 -0
  15. lionagi/core/agent/plan/unit_template.py +1 -0
  16. lionagi/core/collections/__init__.py +17 -0
  17. lionagi/core/collections/_logger.py +319 -0
  18. lionagi/core/collections/abc/__init__.py +53 -0
  19. lionagi/core/collections/abc/component.py +615 -0
  20. lionagi/core/collections/abc/concepts.py +297 -0
  21. lionagi/core/collections/abc/exceptions.py +150 -0
  22. lionagi/core/collections/abc/util.py +45 -0
  23. lionagi/core/collections/exchange.py +161 -0
  24. lionagi/core/collections/flow.py +426 -0
  25. lionagi/core/collections/model.py +419 -0
  26. lionagi/core/collections/pile.py +913 -0
  27. lionagi/core/collections/progression.py +236 -0
  28. lionagi/core/collections/util.py +64 -0
  29. lionagi/core/director/direct.py +314 -0
  30. lionagi/core/director/director.py +2 -0
  31. lionagi/core/engine/branch_engine.py +333 -0
  32. lionagi/core/engine/instruction_map_engine.py +204 -0
  33. lionagi/core/engine/sandbox_.py +14 -0
  34. lionagi/core/engine/script_engine.py +99 -0
  35. lionagi/core/executor/base_executor.py +90 -0
  36. lionagi/core/executor/graph_executor.py +330 -0
  37. lionagi/core/executor/neo4j_executor.py +384 -0
  38. lionagi/core/generic/__init__.py +7 -0
  39. lionagi/core/generic/edge.py +112 -0
  40. lionagi/core/generic/edge_condition.py +16 -0
  41. lionagi/core/generic/graph.py +236 -0
  42. lionagi/core/generic/hyperedge.py +1 -0
  43. lionagi/core/generic/node.py +220 -0
  44. lionagi/core/generic/tree.py +48 -0
  45. lionagi/core/generic/tree_node.py +79 -0
  46. lionagi/core/mail/__init__.py +7 -3
  47. lionagi/core/mail/mail.py +25 -0
  48. lionagi/core/mail/mail_manager.py +142 -58
  49. lionagi/core/mail/package.py +45 -0
  50. lionagi/core/mail/start_mail.py +36 -0
  51. lionagi/core/message/__init__.py +19 -0
  52. lionagi/core/message/action_request.py +133 -0
  53. lionagi/core/message/action_response.py +135 -0
  54. lionagi/core/message/assistant_response.py +95 -0
  55. lionagi/core/message/instruction.py +234 -0
  56. lionagi/core/message/message.py +101 -0
  57. lionagi/core/message/system.py +86 -0
  58. lionagi/core/message/util.py +283 -0
  59. lionagi/core/report/__init__.py +4 -0
  60. lionagi/core/report/base.py +217 -0
  61. lionagi/core/report/form.py +231 -0
  62. lionagi/core/report/report.py +166 -0
  63. lionagi/core/report/util.py +28 -0
  64. lionagi/core/rule/__init__.py +0 -0
  65. lionagi/core/rule/_default.py +16 -0
  66. lionagi/core/rule/action.py +99 -0
  67. lionagi/core/rule/base.py +238 -0
  68. lionagi/core/rule/boolean.py +56 -0
  69. lionagi/core/rule/choice.py +47 -0
  70. lionagi/core/rule/mapping.py +96 -0
  71. lionagi/core/rule/number.py +71 -0
  72. lionagi/core/rule/rulebook.py +109 -0
  73. lionagi/core/rule/string.py +52 -0
  74. lionagi/core/rule/util.py +35 -0
  75. lionagi/core/session/__init__.py +0 -3
  76. lionagi/core/session/branch.py +431 -0
  77. lionagi/core/session/directive_mixin.py +287 -0
  78. lionagi/core/session/session.py +230 -902
  79. lionagi/core/structure/__init__.py +1 -0
  80. lionagi/core/structure/chain.py +1 -0
  81. lionagi/core/structure/forest.py +1 -0
  82. lionagi/core/structure/graph.py +1 -0
  83. lionagi/core/structure/tree.py +1 -0
  84. lionagi/core/unit/__init__.py +5 -0
  85. lionagi/core/unit/parallel_unit.py +245 -0
  86. lionagi/core/unit/template/__init__.py +0 -0
  87. lionagi/core/unit/template/action.py +81 -0
  88. lionagi/core/unit/template/base.py +51 -0
  89. lionagi/core/unit/template/plan.py +84 -0
  90. lionagi/core/unit/template/predict.py +109 -0
  91. lionagi/core/unit/template/score.py +124 -0
  92. lionagi/core/unit/template/select.py +104 -0
  93. lionagi/core/unit/unit.py +362 -0
  94. lionagi/core/unit/unit_form.py +305 -0
  95. lionagi/core/unit/unit_mixin.py +1168 -0
  96. lionagi/core/unit/util.py +71 -0
  97. lionagi/core/validator/__init__.py +0 -0
  98. lionagi/core/validator/validator.py +364 -0
  99. lionagi/core/work/__init__.py +0 -0
  100. lionagi/core/work/work.py +76 -0
  101. lionagi/core/work/work_function.py +101 -0
  102. lionagi/core/work/work_queue.py +103 -0
  103. lionagi/core/work/worker.py +258 -0
  104. lionagi/core/work/worklog.py +120 -0
  105. lionagi/experimental/__init__.py +0 -0
  106. lionagi/experimental/compressor/__init__.py +0 -0
  107. lionagi/experimental/compressor/base.py +46 -0
  108. lionagi/experimental/compressor/llm_compressor.py +247 -0
  109. lionagi/experimental/compressor/llm_summarizer.py +61 -0
  110. lionagi/experimental/compressor/util.py +70 -0
  111. lionagi/experimental/directive/__init__.py +19 -0
  112. lionagi/experimental/directive/parser/__init__.py +0 -0
  113. lionagi/experimental/directive/parser/base_parser.py +282 -0
  114. lionagi/experimental/directive/template/__init__.py +0 -0
  115. lionagi/experimental/directive/template/base_template.py +79 -0
  116. lionagi/experimental/directive/template/schema.py +36 -0
  117. lionagi/experimental/directive/tokenizer.py +73 -0
  118. lionagi/experimental/evaluator/__init__.py +0 -0
  119. lionagi/experimental/evaluator/ast_evaluator.py +131 -0
  120. lionagi/experimental/evaluator/base_evaluator.py +218 -0
  121. lionagi/experimental/knowledge/__init__.py +0 -0
  122. lionagi/experimental/knowledge/base.py +10 -0
  123. lionagi/experimental/knowledge/graph.py +0 -0
  124. lionagi/experimental/memory/__init__.py +0 -0
  125. lionagi/experimental/strategies/__init__.py +0 -0
  126. lionagi/experimental/strategies/base.py +1 -0
  127. lionagi/integrations/bridge/autogen_/__init__.py +0 -0
  128. lionagi/integrations/bridge/autogen_/autogen_.py +124 -0
  129. lionagi/integrations/bridge/langchain_/documents.py +4 -0
  130. lionagi/integrations/bridge/llamaindex_/index.py +30 -0
  131. lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +6 -0
  132. lionagi/integrations/bridge/llamaindex_/llama_pack.py +227 -0
  133. lionagi/integrations/bridge/llamaindex_/node_parser.py +6 -9
  134. lionagi/integrations/bridge/pydantic_/pydantic_bridge.py +1 -0
  135. lionagi/integrations/bridge/transformers_/__init__.py +0 -0
  136. lionagi/integrations/bridge/transformers_/install_.py +36 -0
  137. lionagi/integrations/chunker/__init__.py +0 -0
  138. lionagi/integrations/chunker/chunk.py +312 -0
  139. lionagi/integrations/config/oai_configs.py +38 -7
  140. lionagi/integrations/config/ollama_configs.py +1 -1
  141. lionagi/integrations/config/openrouter_configs.py +14 -2
  142. lionagi/integrations/loader/__init__.py +0 -0
  143. lionagi/integrations/loader/load.py +253 -0
  144. lionagi/integrations/loader/load_util.py +195 -0
  145. lionagi/integrations/provider/_mapping.py +46 -0
  146. lionagi/integrations/provider/litellm.py +2 -1
  147. lionagi/integrations/provider/mlx_service.py +16 -9
  148. lionagi/integrations/provider/oai.py +91 -4
  149. lionagi/integrations/provider/ollama.py +7 -6
  150. lionagi/integrations/provider/openrouter.py +115 -8
  151. lionagi/integrations/provider/services.py +2 -2
  152. lionagi/integrations/provider/transformers.py +18 -22
  153. lionagi/integrations/storage/__init__.py +3 -0
  154. lionagi/integrations/storage/neo4j.py +665 -0
  155. lionagi/integrations/storage/storage_util.py +287 -0
  156. lionagi/integrations/storage/structure_excel.py +285 -0
  157. lionagi/integrations/storage/to_csv.py +63 -0
  158. lionagi/integrations/storage/to_excel.py +83 -0
  159. lionagi/libs/__init__.py +26 -1
  160. lionagi/libs/ln_api.py +78 -23
  161. lionagi/libs/ln_context.py +37 -0
  162. lionagi/libs/ln_convert.py +21 -9
  163. lionagi/libs/ln_func_call.py +69 -28
  164. lionagi/libs/ln_image.py +107 -0
  165. lionagi/libs/ln_knowledge_graph.py +405 -0
  166. lionagi/libs/ln_nested.py +26 -11
  167. lionagi/libs/ln_parse.py +110 -14
  168. lionagi/libs/ln_queue.py +117 -0
  169. lionagi/libs/ln_tokenize.py +164 -0
  170. lionagi/{core/prompt/field_validator.py → libs/ln_validate.py} +79 -14
  171. lionagi/libs/special_tokens.py +172 -0
  172. lionagi/libs/sys_util.py +107 -2
  173. lionagi/lions/__init__.py +0 -0
  174. lionagi/lions/coder/__init__.py +0 -0
  175. lionagi/lions/coder/add_feature.py +20 -0
  176. lionagi/lions/coder/base_prompts.py +22 -0
  177. lionagi/lions/coder/code_form.py +13 -0
  178. lionagi/lions/coder/coder.py +168 -0
  179. lionagi/lions/coder/util.py +96 -0
  180. lionagi/lions/researcher/__init__.py +0 -0
  181. lionagi/lions/researcher/data_source/__init__.py +0 -0
  182. lionagi/lions/researcher/data_source/finhub_.py +191 -0
  183. lionagi/lions/researcher/data_source/google_.py +199 -0
  184. lionagi/lions/researcher/data_source/wiki_.py +96 -0
  185. lionagi/lions/researcher/data_source/yfinance_.py +21 -0
  186. lionagi/tests/integrations/__init__.py +0 -0
  187. lionagi/tests/libs/__init__.py +0 -0
  188. lionagi/tests/libs/test_field_validators.py +353 -0
  189. lionagi/tests/{test_libs → libs}/test_func_call.py +23 -21
  190. lionagi/tests/{test_libs → libs}/test_nested.py +36 -21
  191. lionagi/tests/{test_libs → libs}/test_parse.py +1 -1
  192. lionagi/tests/libs/test_queue.py +67 -0
  193. lionagi/tests/test_core/collections/__init__.py +0 -0
  194. lionagi/tests/test_core/collections/test_component.py +206 -0
  195. lionagi/tests/test_core/collections/test_exchange.py +138 -0
  196. lionagi/tests/test_core/collections/test_flow.py +145 -0
  197. lionagi/tests/test_core/collections/test_pile.py +171 -0
  198. lionagi/tests/test_core/collections/test_progression.py +129 -0
  199. lionagi/tests/test_core/generic/__init__.py +0 -0
  200. lionagi/tests/test_core/generic/test_edge.py +67 -0
  201. lionagi/tests/test_core/generic/test_graph.py +96 -0
  202. lionagi/tests/test_core/generic/test_node.py +106 -0
  203. lionagi/tests/test_core/generic/test_tree_node.py +73 -0
  204. lionagi/tests/test_core/test_branch.py +115 -292
  205. lionagi/tests/test_core/test_form.py +46 -0
  206. lionagi/tests/test_core/test_report.py +105 -0
  207. lionagi/tests/test_core/test_validator.py +111 -0
  208. lionagi/version.py +1 -1
  209. {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/LICENSE +12 -11
  210. {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/METADATA +19 -118
  211. lionagi-0.2.1.dist-info/RECORD +240 -0
  212. lionagi/core/branch/__init__.py +0 -4
  213. lionagi/core/branch/base_branch.py +0 -654
  214. lionagi/core/branch/branch.py +0 -471
  215. lionagi/core/branch/branch_flow_mixin.py +0 -96
  216. lionagi/core/branch/executable_branch.py +0 -347
  217. lionagi/core/branch/util.py +0 -323
  218. lionagi/core/direct/__init__.py +0 -6
  219. lionagi/core/direct/predict.py +0 -161
  220. lionagi/core/direct/score.py +0 -278
  221. lionagi/core/direct/select.py +0 -169
  222. lionagi/core/direct/utils.py +0 -87
  223. lionagi/core/direct/vote.py +0 -64
  224. lionagi/core/flow/base/baseflow.py +0 -23
  225. lionagi/core/flow/monoflow/ReAct.py +0 -238
  226. lionagi/core/flow/monoflow/__init__.py +0 -9
  227. lionagi/core/flow/monoflow/chat.py +0 -95
  228. lionagi/core/flow/monoflow/chat_mixin.py +0 -263
  229. lionagi/core/flow/monoflow/followup.py +0 -214
  230. lionagi/core/flow/polyflow/__init__.py +0 -1
  231. lionagi/core/flow/polyflow/chat.py +0 -248
  232. lionagi/core/mail/schema.py +0 -56
  233. lionagi/core/messages/__init__.py +0 -3
  234. lionagi/core/messages/schema.py +0 -533
  235. lionagi/core/prompt/prompt_template.py +0 -316
  236. lionagi/core/schema/__init__.py +0 -22
  237. lionagi/core/schema/action_node.py +0 -29
  238. lionagi/core/schema/base_mixin.py +0 -296
  239. lionagi/core/schema/base_node.py +0 -199
  240. lionagi/core/schema/condition.py +0 -24
  241. lionagi/core/schema/data_logger.py +0 -354
  242. lionagi/core/schema/data_node.py +0 -93
  243. lionagi/core/schema/prompt_template.py +0 -67
  244. lionagi/core/schema/structure.py +0 -910
  245. lionagi/core/tool/__init__.py +0 -3
  246. lionagi/core/tool/tool_manager.py +0 -280
  247. lionagi/integrations/bridge/pydantic_/base_model.py +0 -7
  248. lionagi/tests/test_core/test_base_branch.py +0 -427
  249. lionagi/tests/test_core/test_chat_flow.py +0 -63
  250. lionagi/tests/test_core/test_mail_manager.py +0 -75
  251. lionagi/tests/test_core/test_prompts.py +0 -51
  252. lionagi/tests/test_core/test_session.py +0 -254
  253. lionagi/tests/test_core/test_session_base_util.py +0 -312
  254. lionagi/tests/test_core/test_tool_manager.py +0 -95
  255. lionagi-0.0.312.dist-info/RECORD +0 -111
  256. /lionagi/core/{branch/base → _setting}/__init__.py +0 -0
  257. /lionagi/core/{flow → agent/eval}/__init__.py +0 -0
  258. /lionagi/core/{flow/base → agent/learn}/__init__.py +0 -0
  259. /lionagi/core/{prompt → agent/plan}/__init__.py +0 -0
  260. /lionagi/core/{tool/manual.py → agent/plan/plan.py} +0 -0
  261. /lionagi/{tests/test_integrations → core/director}/__init__.py +0 -0
  262. /lionagi/{tests/test_libs → core/engine}/__init__.py +0 -0
  263. /lionagi/{tests/test_libs/test_async.py → core/executor/__init__.py} +0 -0
  264. /lionagi/tests/{test_libs → libs}/test_api.py +0 -0
  265. /lionagi/tests/{test_libs → libs}/test_convert.py +0 -0
  266. /lionagi/tests/{test_libs → libs}/test_sys_util.py +0 -0
  267. {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/WHEEL +0 -0
  268. {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,247 @@
1
+ import asyncio
2
+ from lionagi import alcall
3
+ from lionagi.libs.ln_convert import to_list
4
+ import numpy as np
5
+ from lionagi.core.collections import iModel
6
+ from .base import TokenCompressor
7
+ from lionagi.libs.ln_tokenize import TokenizeUtil
8
+ from time import time
9
+
10
+ # inspired by LLMLingua, MIT License, Copyright (c) Microsoft Corporation.
11
+ # https://github.com/microsoft/LLMLingua
12
+
13
+
14
+ class LLMCompressor(TokenCompressor):
15
+
16
+ def __init__(
17
+ self,
18
+ imodel: iModel = None,
19
+ system_msg=None,
20
+ tokenizer=None, # must be a callable or object with a tokenize method
21
+ splitter=None, # must be a callable or object with a split/chunk/segment method
22
+ target_ratio=0.2,
23
+ n_samples=5, # the cumulative samples to take in each perplexity calculation
24
+ chunk_size=64,
25
+ max_tokens_per_sample=80,
26
+ min_compression_score=0, # (0-1) the minimum score to consider for compression, 0 means all
27
+ split_overlap=0,
28
+ split_threshold=0,
29
+ verbose=True,
30
+ ):
31
+ imodel = imodel or iModel(model="gpt-3.5-turbo", temperature=0.3)
32
+ super().__init__(imodel=imodel, tokenizer=tokenizer, splitter=splitter)
33
+ self.system_msg = (
34
+ system_msg
35
+ or "Concisely summarize and compress the information for storage:"
36
+ )
37
+ self.target_ratio = target_ratio
38
+ self.n_samples = n_samples
39
+ self.chunk_size = chunk_size
40
+ self.max_tokens_per_sample = max_tokens_per_sample
41
+ self.min_compression_score = min_compression_score
42
+ self.verbose = verbose
43
+ self.split_overlap = split_overlap
44
+ self.split_threshold = split_threshold
45
+
46
+ def tokenize(self, text, encoding_name=None, return_byte=False, **kwargs):
47
+ """
48
+ by default you can use `encoding_name` to be one of,
49
+ ['gpt2', 'r50k_base', 'p50k_base', 'p50k_edit', 'cl100k_base', 'o200k_base']
50
+
51
+ or you can use `encoding_model` that tiktoken supports in their mapping such as "gpt-4o"
52
+ """
53
+ if not self.tokenizer:
54
+ return TokenizeUtil.tokenize(
55
+ text,
56
+ encoding_model=self.imodel.iModel_name,
57
+ encoding_name=encoding_name,
58
+ return_byte=return_byte,
59
+ )
60
+
61
+ if hasattr(self.tokenizer, "tokenize"):
62
+ return self.tokenizer.tokenize(text, **kwargs)
63
+
64
+ return self.tokenizer(text, **kwargs)
65
+
66
+ def split(
67
+ self,
68
+ text,
69
+ chunk_size=None,
70
+ overlap=None,
71
+ threshold=None,
72
+ by_chars=False,
73
+ return_tokens=False,
74
+ return_byte=False,
75
+ **kwargs,
76
+ ):
77
+ if not self.splitter:
78
+ splitter = (
79
+ TokenizeUtil.chunk_by_chars
80
+ if by_chars
81
+ else TokenizeUtil.chunk_by_tokens
82
+ )
83
+ return splitter(
84
+ text,
85
+ chunk_size or self.chunk_size,
86
+ overlap or self.split_overlap,
87
+ threshold or self.split_threshold,
88
+ return_tokens=return_tokens,
89
+ return_byte=return_byte,
90
+ )
91
+
92
+ a = [
93
+ getattr(self.splitter, i, None)
94
+ for i in ["split", "chunk", "segment"]
95
+ if i is not None
96
+ ][0]
97
+ a = getattr(self.splitter, a)
98
+ return a(text, **kwargs)
99
+
100
+ async def rank_by_pplex(
101
+ self, items: list, initial_text=None, cumulative=False, n_samples=None, **kwargs
102
+ ):
103
+ """
104
+ rank a list of items according to their perplexity
105
+ an item can be a single token or a list of tokens
106
+
107
+ kwargs: additional arguments to pass to the model
108
+ """
109
+
110
+ async def _get_item_perplexity(item):
111
+ item = item if isinstance(item, list) else [item]
112
+ item = (
113
+ item[: self.max_tokens_per_sample]
114
+ if len(item) > self.max_tokens_per_sample
115
+ else item
116
+ )
117
+ return await self.imodel.compute_perplexity(
118
+ initial_context=initial_text,
119
+ tokens=item,
120
+ n_samples=n_samples or self.n_samples,
121
+ system_msg=self.system_msg,
122
+ **kwargs,
123
+ )
124
+
125
+ if not isinstance(items, list):
126
+ items = self.tokenize(items)
127
+
128
+ if len(items) == 1:
129
+ return [items] # no need to rank a single item
130
+
131
+ _segments = []
132
+ _context = initial_text or ""
133
+ _task = []
134
+
135
+ if cumulative:
136
+ for i in items:
137
+ if isinstance(i, list):
138
+ _context += " " + " ".join(i).strip()
139
+ else:
140
+ _context += " " + i.strip()
141
+
142
+ _segments.append(_context)
143
+ else:
144
+ _segments = items
145
+
146
+ for i in _segments:
147
+ _task.append(asyncio.create_task(_get_item_perplexity(i)))
148
+
149
+ results = await asyncio.gather(*_task)
150
+ results = [(item, pplex) for item, pplex in zip(items, results)]
151
+ return sorted(results, key=lambda x: x[1]["logprobs"], reverse=True)
152
+
153
+ async def compress(
154
+ self,
155
+ text,
156
+ target_ratio=None,
157
+ initial_text=None,
158
+ cumulative=False,
159
+ split_kwargs=None,
160
+ split_overlap=None,
161
+ split_threshold=None,
162
+ rank_by="perplexity",
163
+ min_compression_score=None,
164
+ verbose=True,
165
+ **kwargs,
166
+ ):
167
+ start = time()
168
+ if split_kwargs is None:
169
+ split_kwargs = {}
170
+ split_kwargs["chunk_size"] = self.max_tokens_per_sample
171
+ split_kwargs["overlap"] = split_overlap or 0
172
+ split_kwargs["threshold"] = split_threshold or 0
173
+
174
+ len_tokens = len(self.tokenize(text))
175
+
176
+ items = self.split(text, return_tokens=True, **split_kwargs)
177
+
178
+ if rank_by == "perplexity":
179
+ ranked_items = await self.rank_by_pplex(
180
+ items=items, initial_text=initial_text, cumulative=cumulative, **kwargs
181
+ )
182
+
183
+ prompt_tokens = sum([i[1]["num_prompt_tokens"] for i in ranked_items])
184
+
185
+ num_completion_tokens = sum(
186
+ [i[1]["num_completion_tokens"] for i in ranked_items]
187
+ )
188
+
189
+ price = (
190
+ prompt_tokens * 0.5 / 1000000 + num_completion_tokens * 1.5 / 1000000
191
+ )
192
+
193
+ selected_items = self.select_by_pplex(
194
+ ranked_items=ranked_items,
195
+ target_compression_ratio=target_ratio or self.target_ratio,
196
+ original_length=len_tokens,
197
+ min_pplex=min_compression_score or self.min_compression_score,
198
+ )
199
+
200
+ if verbose:
201
+ msg = ""
202
+ msg += f"Original Token number: {len_tokens}\n"
203
+
204
+ def _f(i):
205
+ if isinstance(i, str):
206
+ i = self.tokenize(i)
207
+
208
+ if isinstance(i, list):
209
+ return len(to_list(i, dropna=True, flatten=True))
210
+
211
+ len_ = sum([_f(i) for i in selected_items])
212
+ msg += f"Selected Token number: {len_}\n"
213
+ msg += f"Token Compression Ratio: {len_ / len_tokens:.03f}\n"
214
+ msg += f"Compression Time: {time() - start:.04f} seconds\n"
215
+ msg += f"Compression Model: {self.imodel.iModel_name}\n"
216
+ msg += f"Compression Method: {rank_by}\n"
217
+ msg += f"Compression Usage: ${price:.05f}\n"
218
+ print(msg)
219
+
220
+ a = "".join([i.strip() for i in selected_items]).strip()
221
+ a = a.replace("\n\n", "")
222
+ return a
223
+
224
+ raise ValueError(f"Ranking method {rank_by} is not supported")
225
+
226
+ def select_by_pplex(
227
+ self, ranked_items, target_compression_ratio, original_length, min_pplex=None
228
+ ):
229
+ min_pplex = min_pplex or 0
230
+
231
+ desired_length = int(original_length * target_compression_ratio)
232
+
233
+ items = []
234
+ current_length = 0
235
+
236
+ for item, info in ranked_items:
237
+ if info["perplexity"] > min_pplex:
238
+ item = self.tokenize(item) if isinstance(item, str) else item
239
+ item = item if isinstance(item, list) else [item]
240
+ item = to_list(item, dropna=True, flatten=True)
241
+ if current_length + len(item) > desired_length:
242
+ break
243
+ else:
244
+ current_length += len(item)
245
+ items.append("".join(item))
246
+
247
+ return items
@@ -0,0 +1,61 @@
1
+ # from lionagi.core.collections import iModel
2
+ # from .base import TokenCompressor
3
+
4
+
5
+ # class LLMSummarizer(TokenCompressor):
6
+
7
+ # def __init__(
8
+ # self, imodel: iModel = None, system_msg=None, tokenizer=None, splitter=None,
9
+ # max_tokens=25, target_ratio=0.3
10
+ # ):
11
+ # imodel = imodel or iModel(model="gpt-3.5-turbo", max_tokens=max_tokens)
12
+ # super().__init__(imodel=imodel, tokenizer=tokenizer, splitter=splitter)
13
+ # self.system_msg = (
14
+ # system_msg
15
+ # or "Summarize the following sentence to be concise and informative:"
16
+ # )
17
+ # self.target_ratio = target_ratio
18
+
19
+ # async def summarize_sentence(self, sentence, **kwargs):
20
+ # messages = [
21
+ # {"role": "system", "content": self.system_msg},
22
+ # {"role": "user", "content": sentence},
23
+ # ]
24
+ # response = await self.imodel.call_chat_completion(messages, **kwargs)
25
+ # return response["choices"][0]["message"]["content"]
26
+
27
+ # def tokenize(self, text):
28
+ # tokenize_func = self.tokenizer or tokenize
29
+ # return tokenize_func(text)
30
+
31
+ # def split(self, text):
32
+ # split_func = self.splitter or split_into_segments
33
+ # return split_func(text)
34
+
35
+ # # Function to enforce maximum sentence length
36
+ # def enforce_max_sentence_length(self, sentence, max_words=25):
37
+ # words = self.tokenize(sentence)
38
+ # if len(words) > max_words:
39
+ # sentence = ' '.join(words[:max_words])
40
+ # return sentence
41
+
42
+ # async def summarize_text(self, text, max_length_per_sentence=25, target_ratio=None, **kwargs):
43
+ # sentences = self.split(text)
44
+ # summarized = await alcall(
45
+ # sentences, self.summarize_sentence, **kwargs
46
+ # )
47
+ # summarized = [
48
+ # self.enforce_max_sentence_length(sentence, max_length_per_sentence)
49
+ # for sentence in summarized
50
+ # ]
51
+
52
+ # original_length = len(self.tokenize(text))
53
+ # summarized_length = len(self.tokenize(' '.join(summarized)))
54
+ # current_ratio = summarized_length / original_length
55
+
56
+ # target_ratio = target_ratio or self.target_ratio
57
+ # if current_ratio > target_ratio:
58
+ # words_to_remove = int((current_ratio - target_ratio) * original_length)
59
+ # return ' '.join(summarized[:-words_to_remove])
60
+
61
+ # return ' '.join(summarized)
@@ -0,0 +1,70 @@
1
+ # import asyncio
2
+ # from lionagi import alcall
3
+ # from lionagi.libs.ln_convert import to_list
4
+ # import numpy as np
5
+
6
+ # def split_into_segments(text):
7
+ # segments = text.split(".") # Splitting by period followed by a space
8
+ # return [segment.strip() for segment in segments if segment]
9
+
10
+ # # Tokenize the segment
11
+ # def tokenize(segment):
12
+ # tokens = segment.split() # Simple space-based tokenization
13
+ # return tokens
14
+
15
+ # async def calculate_perplexity(system_msg: str, imodel, tokens, initial_context=None, **kwargs):
16
+ # _tasks = []
17
+ # _context = initial_context or ""
18
+ # for i in range(len(tokens)):
19
+ # _context += " " + tokens[i]
20
+ # messages = [
21
+ # {"role": "system", "content": system_msg},
22
+ # {"role": "user", "content": _context},
23
+ # ]
24
+ # task = asyncio.create_task(
25
+ # imodel.call_chat_completion(
26
+ # messages=messages, logprobs=True, max_tokens=1, **kwargs
27
+ # )
28
+ # )
29
+ # _tasks.append(task)
30
+
31
+ # results = await asyncio.gather(*_tasks)
32
+ # logprobs = [
33
+ # result[1]["choices"][0]["logprobs"]["content"] for result in results
34
+ # ]
35
+ # logprobs = to_list(logprobs, flatten=True, dropna=True)
36
+ # logprobs = [lprob_["logprob"] for lprob_ in logprobs]
37
+ # return np.exp(np.mean(logprobs))
38
+
39
+ # async def rank_by_perplexity(
40
+ # text: str | list[str] = None, # if list we assume they are already well split
41
+ # initial_text=None,
42
+
43
+ # segments,
44
+ # initial_text=None,
45
+ # cumulative=False,
46
+ # **kwargs
47
+ # ):
48
+ # _segments = []
49
+ # _context = initial_text or ""
50
+ # _task = []
51
+
52
+ # if cumulative:
53
+ # for i in range(1, len(segments)):
54
+ # _context += " " + segments[i - 1]
55
+ # _segments.append(_context)
56
+ # else:
57
+ # _segments = segments
58
+
59
+ # for i in segments:
60
+ # _task.append(asyncio.create_task(
61
+ # calculate_perplexity(
62
+ # self.system_msg, self.imodel, self.tokenize(i), **kwargs)
63
+ # )
64
+ # )
65
+ # segment_perplexities = await asyncio.gather(*_task)
66
+
67
+ # return {
68
+ # segment: perplexity
69
+ # for segment, perplexity in zip(segments, segment_perplexities)
70
+ # }
@@ -0,0 +1,19 @@
1
+ # from ..form.predict import predict
2
+ # from .select import select
3
+ # from ..form.score import score
4
+ # from ..form.react import react
5
+ # from .vote import vote
6
+ # from ..form.plan import plan
7
+ # from .cot import chain_of_thoughts, chain_of_react
8
+
9
+
10
+ # __all__ = [
11
+ # "predict",
12
+ # "select",
13
+ # "score",
14
+ # "vote",
15
+ # "react",
16
+ # "plan",
17
+ # "chain_of_thoughts",
18
+ # "chain_of_react",
19
+ # ]
File without changes
@@ -0,0 +1,282 @@
1
+ """
2
+ Copyright 2024 HaiyangLi
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from typing import List, Optional
18
+
19
+ from lionagi.experimental.directive.tokenizer import BaseToken
20
+ from ..template.schema import IfNode, TryNode, ForNode
21
+
22
+
23
+ class BaseDirectiveParser:
24
+ """A base parser with lookahead, error recovery, and backtracking support.
25
+
26
+ Attributes:
27
+ tokens (List[BaseToken]): A list of tokens to be parsed.
28
+ current_token_index (int): The index of the current token in the tokens list.
29
+ current_token (Optional[BaseToken]): The current token being processed.
30
+
31
+ Examples:
32
+ >>> tokenizer = BaseTokenizer("IF x > 10 THEN DO something ENDIF")
33
+ >>> tokens = tokenizer.get_tokens()
34
+ >>> parser = BaseParser(tokens)
35
+ >>> print(parser.current_token)
36
+ BaseToken(KEYWORD, IF)
37
+ """
38
+
39
+ def __init__(self, tokens: List[BaseToken]):
40
+ self.tokens = tokens
41
+ self.current_token_index = -1
42
+ self.current_token: Optional[BaseToken] = None
43
+ self.next_token()
44
+
45
+ def next_token(self) -> None:
46
+ """Advances to the next token in the list."""
47
+ self.current_token_index += 1
48
+ if self.current_token_index < len(self.tokens):
49
+ self.current_token = self.tokens[self.current_token_index]
50
+ else:
51
+ self.current_token = None
52
+
53
+ def peek_next_token(self, offset: int = 1) -> BaseToken | None:
54
+ """Peeks at the next token without consuming it.
55
+
56
+ Args:
57
+ offset (int): The number of tokens to look ahead.
58
+
59
+ Returns:
60
+ Optional[BaseToken]: The token at the specified lookahead offset, or None if end of list.
61
+ """
62
+ peek_index = self.current_token_index + offset
63
+ if peek_index < len(self.tokens):
64
+ return self.tokens[peek_index]
65
+ else:
66
+ return None
67
+
68
+ def skip_until(self, token_types: List[str]) -> None:
69
+ """Skips tokens until a token of the specified type is found.
70
+
71
+ Args:
72
+ token_types (List[str]): A list of token types to stop skipping.
73
+ """
74
+ while self.current_token and self.current_token.type not in token_types:
75
+ self.next_token()
76
+
77
+ def mark(self) -> int:
78
+ """Marks the current position in the token list for potential backtracking.
79
+
80
+ Returns:
81
+ int: The current token index.
82
+ """
83
+ return self.current_token_index
84
+
85
+ def reset_to_mark(self, mark: int) -> None:
86
+ """Resets the parser to a previously marked position.
87
+
88
+ Args:
89
+ mark (int): The token index to reset to.
90
+ """
91
+ self.current_token_index = mark - 1
92
+ self.next_token()
93
+
94
+ def skip_semicolon(self):
95
+ """Skips a semicolon token if it is the current token."""
96
+ if self.current_token and self.current_token.value == ";":
97
+ self.next_token()
98
+
99
+ def parse_expression(self):
100
+ """Parses an expression until a semicolon is encountered.
101
+
102
+ Returns:
103
+ str: The parsed expression as a string.
104
+
105
+ Raises:
106
+ SyntaxError: If a semicolon is not found at the end of the expression.
107
+ """
108
+ expr = ""
109
+ while self.current_token and self.current_token.value != ";":
110
+ expr += self.current_token.value + " "
111
+ self.next_token()
112
+ # Expecting a semicolon at the end of the condition
113
+ if self.current_token.value != ";":
114
+ raise SyntaxError("Expected ';' at the end of the condition")
115
+ self.next_token() # Move past the semicolon to the next part of the statement
116
+ return expr.strip()
117
+
118
+ def parse_if_block(self):
119
+ """Parses a block of statements for an IF condition.
120
+
121
+ Returns:
122
+ list: The parsed block of statements as a list of strings.
123
+ """
124
+ block = []
125
+ # Parse the block until 'ELSE', 'ENDIF', ensuring not to include semicolons as part of the block
126
+ while self.current_token and self.current_token.value not in ("ENDIF", "ELSE"):
127
+ if self.current_token.value == "DO":
128
+ self.next_token() # Move past 'DO' to get to the action
129
+ block.append(self.current_token.value) # Add the action to the block
130
+ self.next_token() # Move to the next token, which could be a semicolon or the next action
131
+ if self.current_token.value == ";":
132
+ self.next_token() # Move past the semicolon
133
+ return block
134
+
135
+ def parse_if_statement(self):
136
+ """Parses an IF statement.
137
+
138
+ Returns:
139
+ IfNode: The parsed IF statement as an IfNode object.
140
+
141
+ Raises:
142
+ SyntaxError: If the IF statement is not properly formed.
143
+ """
144
+ if self.current_token.type != "KEYWORD" or self.current_token.value != "IF":
145
+ raise SyntaxError("Expected IF statement")
146
+ self.next_token() # Skip 'IF'
147
+
148
+ condition = self.parse_expression() # Now properly ends after the semicolon
149
+
150
+ true_block = []
151
+ if self.current_token.value == "DO":
152
+ true_block = self.parse_if_block() # Parse true block after 'DO'
153
+
154
+ false_block = None
155
+ if self.current_token and self.current_token.value == "ELSE":
156
+ self.next_token() # Skip 'ELSE', expect 'DO' next for the false block
157
+ self.skip_semicolon()
158
+ if self.current_token.value != "DO":
159
+ raise SyntaxError("Expected 'DO' after 'ELSE'")
160
+ self.next_token() # Skip 'DO'
161
+ false_block = self.parse_if_block() # Parse false block
162
+
163
+ return IfNode(condition, true_block, false_block)
164
+
165
+ def parse_for_statement(self):
166
+ """Parses a FOR statement.
167
+
168
+ Returns:
169
+ ForNode: The parsed FOR statement as a ForNode object.
170
+
171
+ Raises:
172
+ SyntaxError: If the FOR statement is not properly formed.
173
+ """
174
+ if self.current_token.type != "KEYWORD" or self.current_token.value != "FOR":
175
+ raise SyntaxError("Expected FOR statement")
176
+ self.next_token() # Skip 'FOR'
177
+
178
+ # Parse the iterator variable
179
+ if self.current_token.type != "IDENTIFIER":
180
+ raise SyntaxError("Expected iterator variable after FOR")
181
+ iterator = self.current_token.value
182
+ self.next_token() # Move past the iterator variable
183
+
184
+ # Expect and skip 'IN' keyword
185
+ if self.current_token.type != "KEYWORD" or self.current_token.value != "IN":
186
+ raise SyntaxError("Expected 'IN' after iterator variable")
187
+ self.next_token() # Move past 'IN'
188
+
189
+ # Parse the collection
190
+ if self.current_token.type not in ["IDENTIFIER", "LITERAL"]:
191
+ raise SyntaxError("Expected collection after 'IN'")
192
+ collection = self.current_token.value
193
+ self.next_token() # Move past the collection
194
+
195
+ # Now, parse the block of statements to execute
196
+ true_block = self.parse_for_block()
197
+
198
+ # Construct and return a ForNode
199
+ return ForNode(iterator, collection, true_block)
200
+
201
+ def parse_for_block(self):
202
+ """Parses a block of statements for a FOR loop.
203
+
204
+ Returns:
205
+ list: The parsed block of statements as a list of strings.
206
+ """
207
+ block = []
208
+ # Skip initial 'DO' if present
209
+ if self.current_token and self.current_token.value == "DO":
210
+ self.next_token()
211
+
212
+ while self.current_token and self.current_token.value not in ("ENDFOR",):
213
+ if self.current_token.value == ";":
214
+ # If a semicolon is encountered, skip it and move to the next token
215
+ self.next_token()
216
+ continue
217
+ # Add the current token to the block unless it's a 'DO' or ';'
218
+ if self.current_token.value != "DO":
219
+ block.append(self.current_token.value)
220
+ self.next_token()
221
+
222
+ # The loop exits when 'ENDFOR' is encountered; move past it for subsequent parsing
223
+ self.next_token() # Skip 'ENDFOR'
224
+ return block
225
+
226
+ def parse_try_statement(self):
227
+ """Parses a TRY statement.
228
+
229
+ Returns:
230
+ TryNode: The parsed TRY statement as a TryNode object.
231
+
232
+ Raises:
233
+ SyntaxError: If the TRY statement is not properly formed.
234
+ """
235
+ if self.current_token.type != "KEYWORD" or self.current_token.value != "TRY":
236
+ raise SyntaxError("Expected TRY statement")
237
+ self.next_token() # Skip 'TRY'
238
+
239
+ try_block = self.parse_try_block("EXCEPT") # Parse the try block until 'EXCEPT'
240
+
241
+ # Now expecting 'EXCEPT' keyword
242
+ if not (self.current_token and self.current_token.value == "EXCEPT"):
243
+ raise SyntaxError("Expected 'EXCEPT' after try block")
244
+ self.next_token() # Move past 'EXCEPT'
245
+
246
+ except_block = self.parse_try_block(
247
+ "ENDTRY"
248
+ ) # Parse the except block until 'ENDTRY'
249
+
250
+ # Ensure we are correctly positioned after 'ENDTRY'
251
+ if self.current_token and self.current_token.value != "ENDTRY":
252
+ raise SyntaxError("Expected 'ENDTRY' at the end of except block")
253
+ self.next_token() # Move past 'ENDTRY' for subsequent parsing
254
+
255
+ return TryNode(try_block, except_block)
256
+
257
+ def parse_try_block(self, stop_keyword):
258
+ """Parses a block of statements for a TRY or EXCEPT clause.
259
+
260
+ Args:
261
+ stop_keyword (str): The keyword that indicates the end of the block.
262
+
263
+ Returns:
264
+ list: The parsed block of statements as a list of strings.
265
+ """
266
+ block = []
267
+ while self.current_token and self.current_token.value != stop_keyword:
268
+ if self.current_token.value == "DO":
269
+ self.next_token() # Move past 'DO' to get to the action
270
+ elif self.current_token.value == ";":
271
+ self.next_token() # Move past the semicolon
272
+ continue # Skip adding ';' to the block
273
+ else:
274
+ block.append(self.current_token.value) # Add the action to the block
275
+ self.next_token()
276
+
277
+ return block
278
+
279
+
280
+ # "IF condition1 && condition2; DO action2; ELSE; DO action3; ENDIF;"
281
+ # "FOR input_ IN collections; DO action(input_); ENDFOR;"
282
+ # "TRY; DO action(); EXCEPT; DO action(input_); ENDTRY;"
File without changes