lionagi 0.0.312__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. lionagi/__init__.py +61 -3
  2. lionagi/core/__init__.py +0 -14
  3. lionagi/core/_setting/_setting.py +59 -0
  4. lionagi/core/action/__init__.py +14 -0
  5. lionagi/core/action/function_calling.py +136 -0
  6. lionagi/core/action/manual.py +1 -0
  7. lionagi/core/action/node.py +109 -0
  8. lionagi/core/action/tool.py +114 -0
  9. lionagi/core/action/tool_manager.py +356 -0
  10. lionagi/core/agent/__init__.py +0 -3
  11. lionagi/core/agent/base_agent.py +45 -36
  12. lionagi/core/agent/eval/evaluator.py +1 -0
  13. lionagi/core/agent/eval/vote.py +40 -0
  14. lionagi/core/agent/learn/learner.py +59 -0
  15. lionagi/core/agent/plan/unit_template.py +1 -0
  16. lionagi/core/collections/__init__.py +17 -0
  17. lionagi/core/collections/_logger.py +319 -0
  18. lionagi/core/collections/abc/__init__.py +53 -0
  19. lionagi/core/collections/abc/component.py +615 -0
  20. lionagi/core/collections/abc/concepts.py +297 -0
  21. lionagi/core/collections/abc/exceptions.py +150 -0
  22. lionagi/core/collections/abc/util.py +45 -0
  23. lionagi/core/collections/exchange.py +161 -0
  24. lionagi/core/collections/flow.py +426 -0
  25. lionagi/core/collections/model.py +419 -0
  26. lionagi/core/collections/pile.py +913 -0
  27. lionagi/core/collections/progression.py +236 -0
  28. lionagi/core/collections/util.py +64 -0
  29. lionagi/core/director/direct.py +314 -0
  30. lionagi/core/director/director.py +2 -0
  31. lionagi/core/engine/branch_engine.py +333 -0
  32. lionagi/core/engine/instruction_map_engine.py +204 -0
  33. lionagi/core/engine/sandbox_.py +14 -0
  34. lionagi/core/engine/script_engine.py +99 -0
  35. lionagi/core/executor/base_executor.py +90 -0
  36. lionagi/core/executor/graph_executor.py +330 -0
  37. lionagi/core/executor/neo4j_executor.py +384 -0
  38. lionagi/core/generic/__init__.py +7 -0
  39. lionagi/core/generic/edge.py +112 -0
  40. lionagi/core/generic/edge_condition.py +16 -0
  41. lionagi/core/generic/graph.py +236 -0
  42. lionagi/core/generic/hyperedge.py +1 -0
  43. lionagi/core/generic/node.py +220 -0
  44. lionagi/core/generic/tree.py +48 -0
  45. lionagi/core/generic/tree_node.py +79 -0
  46. lionagi/core/mail/__init__.py +7 -3
  47. lionagi/core/mail/mail.py +25 -0
  48. lionagi/core/mail/mail_manager.py +142 -58
  49. lionagi/core/mail/package.py +45 -0
  50. lionagi/core/mail/start_mail.py +36 -0
  51. lionagi/core/message/__init__.py +19 -0
  52. lionagi/core/message/action_request.py +133 -0
  53. lionagi/core/message/action_response.py +135 -0
  54. lionagi/core/message/assistant_response.py +95 -0
  55. lionagi/core/message/instruction.py +234 -0
  56. lionagi/core/message/message.py +101 -0
  57. lionagi/core/message/system.py +86 -0
  58. lionagi/core/message/util.py +283 -0
  59. lionagi/core/report/__init__.py +4 -0
  60. lionagi/core/report/base.py +217 -0
  61. lionagi/core/report/form.py +231 -0
  62. lionagi/core/report/report.py +166 -0
  63. lionagi/core/report/util.py +28 -0
  64. lionagi/core/rule/__init__.py +0 -0
  65. lionagi/core/rule/_default.py +16 -0
  66. lionagi/core/rule/action.py +99 -0
  67. lionagi/core/rule/base.py +238 -0
  68. lionagi/core/rule/boolean.py +56 -0
  69. lionagi/core/rule/choice.py +47 -0
  70. lionagi/core/rule/mapping.py +96 -0
  71. lionagi/core/rule/number.py +71 -0
  72. lionagi/core/rule/rulebook.py +109 -0
  73. lionagi/core/rule/string.py +52 -0
  74. lionagi/core/rule/util.py +35 -0
  75. lionagi/core/session/__init__.py +0 -3
  76. lionagi/core/session/branch.py +431 -0
  77. lionagi/core/session/directive_mixin.py +287 -0
  78. lionagi/core/session/session.py +230 -902
  79. lionagi/core/structure/__init__.py +1 -0
  80. lionagi/core/structure/chain.py +1 -0
  81. lionagi/core/structure/forest.py +1 -0
  82. lionagi/core/structure/graph.py +1 -0
  83. lionagi/core/structure/tree.py +1 -0
  84. lionagi/core/unit/__init__.py +5 -0
  85. lionagi/core/unit/parallel_unit.py +245 -0
  86. lionagi/core/unit/template/__init__.py +0 -0
  87. lionagi/core/unit/template/action.py +81 -0
  88. lionagi/core/unit/template/base.py +51 -0
  89. lionagi/core/unit/template/plan.py +84 -0
  90. lionagi/core/unit/template/predict.py +109 -0
  91. lionagi/core/unit/template/score.py +124 -0
  92. lionagi/core/unit/template/select.py +104 -0
  93. lionagi/core/unit/unit.py +362 -0
  94. lionagi/core/unit/unit_form.py +305 -0
  95. lionagi/core/unit/unit_mixin.py +1168 -0
  96. lionagi/core/unit/util.py +71 -0
  97. lionagi/core/validator/__init__.py +0 -0
  98. lionagi/core/validator/validator.py +364 -0
  99. lionagi/core/work/__init__.py +0 -0
  100. lionagi/core/work/work.py +76 -0
  101. lionagi/core/work/work_function.py +101 -0
  102. lionagi/core/work/work_queue.py +103 -0
  103. lionagi/core/work/worker.py +258 -0
  104. lionagi/core/work/worklog.py +120 -0
  105. lionagi/experimental/__init__.py +0 -0
  106. lionagi/experimental/compressor/__init__.py +0 -0
  107. lionagi/experimental/compressor/base.py +46 -0
  108. lionagi/experimental/compressor/llm_compressor.py +247 -0
  109. lionagi/experimental/compressor/llm_summarizer.py +61 -0
  110. lionagi/experimental/compressor/util.py +70 -0
  111. lionagi/experimental/directive/__init__.py +19 -0
  112. lionagi/experimental/directive/parser/__init__.py +0 -0
  113. lionagi/experimental/directive/parser/base_parser.py +282 -0
  114. lionagi/experimental/directive/template/__init__.py +0 -0
  115. lionagi/experimental/directive/template/base_template.py +79 -0
  116. lionagi/experimental/directive/template/schema.py +36 -0
  117. lionagi/experimental/directive/tokenizer.py +73 -0
  118. lionagi/experimental/evaluator/__init__.py +0 -0
  119. lionagi/experimental/evaluator/ast_evaluator.py +131 -0
  120. lionagi/experimental/evaluator/base_evaluator.py +218 -0
  121. lionagi/experimental/knowledge/__init__.py +0 -0
  122. lionagi/experimental/knowledge/base.py +10 -0
  123. lionagi/experimental/knowledge/graph.py +0 -0
  124. lionagi/experimental/memory/__init__.py +0 -0
  125. lionagi/experimental/strategies/__init__.py +0 -0
  126. lionagi/experimental/strategies/base.py +1 -0
  127. lionagi/integrations/bridge/autogen_/__init__.py +0 -0
  128. lionagi/integrations/bridge/autogen_/autogen_.py +124 -0
  129. lionagi/integrations/bridge/langchain_/documents.py +4 -0
  130. lionagi/integrations/bridge/llamaindex_/index.py +30 -0
  131. lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +6 -0
  132. lionagi/integrations/bridge/llamaindex_/llama_pack.py +227 -0
  133. lionagi/integrations/bridge/llamaindex_/node_parser.py +6 -9
  134. lionagi/integrations/bridge/pydantic_/pydantic_bridge.py +1 -0
  135. lionagi/integrations/bridge/transformers_/__init__.py +0 -0
  136. lionagi/integrations/bridge/transformers_/install_.py +36 -0
  137. lionagi/integrations/chunker/__init__.py +0 -0
  138. lionagi/integrations/chunker/chunk.py +312 -0
  139. lionagi/integrations/config/oai_configs.py +38 -7
  140. lionagi/integrations/config/ollama_configs.py +1 -1
  141. lionagi/integrations/config/openrouter_configs.py +14 -2
  142. lionagi/integrations/loader/__init__.py +0 -0
  143. lionagi/integrations/loader/load.py +253 -0
  144. lionagi/integrations/loader/load_util.py +195 -0
  145. lionagi/integrations/provider/_mapping.py +46 -0
  146. lionagi/integrations/provider/litellm.py +2 -1
  147. lionagi/integrations/provider/mlx_service.py +16 -9
  148. lionagi/integrations/provider/oai.py +91 -4
  149. lionagi/integrations/provider/ollama.py +7 -6
  150. lionagi/integrations/provider/openrouter.py +115 -8
  151. lionagi/integrations/provider/services.py +2 -2
  152. lionagi/integrations/provider/transformers.py +18 -22
  153. lionagi/integrations/storage/__init__.py +3 -0
  154. lionagi/integrations/storage/neo4j.py +665 -0
  155. lionagi/integrations/storage/storage_util.py +287 -0
  156. lionagi/integrations/storage/structure_excel.py +285 -0
  157. lionagi/integrations/storage/to_csv.py +63 -0
  158. lionagi/integrations/storage/to_excel.py +83 -0
  159. lionagi/libs/__init__.py +26 -1
  160. lionagi/libs/ln_api.py +78 -23
  161. lionagi/libs/ln_context.py +37 -0
  162. lionagi/libs/ln_convert.py +21 -9
  163. lionagi/libs/ln_func_call.py +69 -28
  164. lionagi/libs/ln_image.py +107 -0
  165. lionagi/libs/ln_knowledge_graph.py +405 -0
  166. lionagi/libs/ln_nested.py +26 -11
  167. lionagi/libs/ln_parse.py +110 -14
  168. lionagi/libs/ln_queue.py +117 -0
  169. lionagi/libs/ln_tokenize.py +164 -0
  170. lionagi/{core/prompt/field_validator.py → libs/ln_validate.py} +79 -14
  171. lionagi/libs/special_tokens.py +172 -0
  172. lionagi/libs/sys_util.py +107 -2
  173. lionagi/lions/__init__.py +0 -0
  174. lionagi/lions/coder/__init__.py +0 -0
  175. lionagi/lions/coder/add_feature.py +20 -0
  176. lionagi/lions/coder/base_prompts.py +22 -0
  177. lionagi/lions/coder/code_form.py +13 -0
  178. lionagi/lions/coder/coder.py +168 -0
  179. lionagi/lions/coder/util.py +96 -0
  180. lionagi/lions/researcher/__init__.py +0 -0
  181. lionagi/lions/researcher/data_source/__init__.py +0 -0
  182. lionagi/lions/researcher/data_source/finhub_.py +191 -0
  183. lionagi/lions/researcher/data_source/google_.py +199 -0
  184. lionagi/lions/researcher/data_source/wiki_.py +96 -0
  185. lionagi/lions/researcher/data_source/yfinance_.py +21 -0
  186. lionagi/tests/integrations/__init__.py +0 -0
  187. lionagi/tests/libs/__init__.py +0 -0
  188. lionagi/tests/libs/test_field_validators.py +353 -0
  189. lionagi/tests/{test_libs → libs}/test_func_call.py +23 -21
  190. lionagi/tests/{test_libs → libs}/test_nested.py +36 -21
  191. lionagi/tests/{test_libs → libs}/test_parse.py +1 -1
  192. lionagi/tests/libs/test_queue.py +67 -0
  193. lionagi/tests/test_core/collections/__init__.py +0 -0
  194. lionagi/tests/test_core/collections/test_component.py +206 -0
  195. lionagi/tests/test_core/collections/test_exchange.py +138 -0
  196. lionagi/tests/test_core/collections/test_flow.py +145 -0
  197. lionagi/tests/test_core/collections/test_pile.py +171 -0
  198. lionagi/tests/test_core/collections/test_progression.py +129 -0
  199. lionagi/tests/test_core/generic/__init__.py +0 -0
  200. lionagi/tests/test_core/generic/test_edge.py +67 -0
  201. lionagi/tests/test_core/generic/test_graph.py +96 -0
  202. lionagi/tests/test_core/generic/test_node.py +106 -0
  203. lionagi/tests/test_core/generic/test_tree_node.py +73 -0
  204. lionagi/tests/test_core/test_branch.py +115 -292
  205. lionagi/tests/test_core/test_form.py +46 -0
  206. lionagi/tests/test_core/test_report.py +105 -0
  207. lionagi/tests/test_core/test_validator.py +111 -0
  208. lionagi/version.py +1 -1
  209. {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/LICENSE +12 -11
  210. {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/METADATA +19 -118
  211. lionagi-0.2.1.dist-info/RECORD +240 -0
  212. lionagi/core/branch/__init__.py +0 -4
  213. lionagi/core/branch/base_branch.py +0 -654
  214. lionagi/core/branch/branch.py +0 -471
  215. lionagi/core/branch/branch_flow_mixin.py +0 -96
  216. lionagi/core/branch/executable_branch.py +0 -347
  217. lionagi/core/branch/util.py +0 -323
  218. lionagi/core/direct/__init__.py +0 -6
  219. lionagi/core/direct/predict.py +0 -161
  220. lionagi/core/direct/score.py +0 -278
  221. lionagi/core/direct/select.py +0 -169
  222. lionagi/core/direct/utils.py +0 -87
  223. lionagi/core/direct/vote.py +0 -64
  224. lionagi/core/flow/base/baseflow.py +0 -23
  225. lionagi/core/flow/monoflow/ReAct.py +0 -238
  226. lionagi/core/flow/monoflow/__init__.py +0 -9
  227. lionagi/core/flow/monoflow/chat.py +0 -95
  228. lionagi/core/flow/monoflow/chat_mixin.py +0 -263
  229. lionagi/core/flow/monoflow/followup.py +0 -214
  230. lionagi/core/flow/polyflow/__init__.py +0 -1
  231. lionagi/core/flow/polyflow/chat.py +0 -248
  232. lionagi/core/mail/schema.py +0 -56
  233. lionagi/core/messages/__init__.py +0 -3
  234. lionagi/core/messages/schema.py +0 -533
  235. lionagi/core/prompt/prompt_template.py +0 -316
  236. lionagi/core/schema/__init__.py +0 -22
  237. lionagi/core/schema/action_node.py +0 -29
  238. lionagi/core/schema/base_mixin.py +0 -296
  239. lionagi/core/schema/base_node.py +0 -199
  240. lionagi/core/schema/condition.py +0 -24
  241. lionagi/core/schema/data_logger.py +0 -354
  242. lionagi/core/schema/data_node.py +0 -93
  243. lionagi/core/schema/prompt_template.py +0 -67
  244. lionagi/core/schema/structure.py +0 -910
  245. lionagi/core/tool/__init__.py +0 -3
  246. lionagi/core/tool/tool_manager.py +0 -280
  247. lionagi/integrations/bridge/pydantic_/base_model.py +0 -7
  248. lionagi/tests/test_core/test_base_branch.py +0 -427
  249. lionagi/tests/test_core/test_chat_flow.py +0 -63
  250. lionagi/tests/test_core/test_mail_manager.py +0 -75
  251. lionagi/tests/test_core/test_prompts.py +0 -51
  252. lionagi/tests/test_core/test_session.py +0 -254
  253. lionagi/tests/test_core/test_session_base_util.py +0 -312
  254. lionagi/tests/test_core/test_tool_manager.py +0 -95
  255. lionagi-0.0.312.dist-info/RECORD +0 -111
  256. /lionagi/core/{branch/base → _setting}/__init__.py +0 -0
  257. /lionagi/core/{flow → agent/eval}/__init__.py +0 -0
  258. /lionagi/core/{flow/base → agent/learn}/__init__.py +0 -0
  259. /lionagi/core/{prompt → agent/plan}/__init__.py +0 -0
  260. /lionagi/core/{tool/manual.py → agent/plan/plan.py} +0 -0
  261. /lionagi/{tests/test_integrations → core/director}/__init__.py +0 -0
  262. /lionagi/{tests/test_libs → core/engine}/__init__.py +0 -0
  263. /lionagi/{tests/test_libs/test_async.py → core/executor/__init__.py} +0 -0
  264. /lionagi/tests/{test_libs → libs}/test_api.py +0 -0
  265. /lionagi/tests/{test_libs → libs}/test_convert.py +0 -0
  266. /lionagi/tests/{test_libs → libs}/test_sys_util.py +0 -0
  267. {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/WHEEL +0 -0
  268. {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,247 @@
1
+ import asyncio
2
+ from lionagi import alcall
3
+ from lionagi.libs.ln_convert import to_list
4
+ import numpy as np
5
+ from lionagi.core.collections import iModel
6
+ from .base import TokenCompressor
7
+ from lionagi.libs.ln_tokenize import TokenizeUtil
8
+ from time import time
9
+
10
+ # inspired by LLMLingua, MIT License, Copyright (c) Microsoft Corporation.
11
+ # https://github.com/microsoft/LLMLingua
12
+
13
+
14
+ class LLMCompressor(TokenCompressor):
15
+
16
+ def __init__(
17
+ self,
18
+ imodel: iModel = None,
19
+ system_msg=None,
20
+ tokenizer=None, # must be a callable or object with a tokenize method
21
+ splitter=None, # must be a callable or object with a split/chunk/segment method
22
+ target_ratio=0.2,
23
+ n_samples=5, # the cumulative samples to take in each perplexity calculation
24
+ chunk_size=64,
25
+ max_tokens_per_sample=80,
26
+ min_compression_score=0, # (0-1) the minimum score to consider for compression, 0 means all
27
+ split_overlap=0,
28
+ split_threshold=0,
29
+ verbose=True,
30
+ ):
31
+ imodel = imodel or iModel(model="gpt-3.5-turbo", temperature=0.3)
32
+ super().__init__(imodel=imodel, tokenizer=tokenizer, splitter=splitter)
33
+ self.system_msg = (
34
+ system_msg
35
+ or "Concisely summarize and compress the information for storage:"
36
+ )
37
+ self.target_ratio = target_ratio
38
+ self.n_samples = n_samples
39
+ self.chunk_size = chunk_size
40
+ self.max_tokens_per_sample = max_tokens_per_sample
41
+ self.min_compression_score = min_compression_score
42
+ self.verbose = verbose
43
+ self.split_overlap = split_overlap
44
+ self.split_threshold = split_threshold
45
+
46
+ def tokenize(self, text, encoding_name=None, return_byte=False, **kwargs):
47
+ """
48
+ by default you can use `encoding_name` to be one of,
49
+ ['gpt2', 'r50k_base', 'p50k_base', 'p50k_edit', 'cl100k_base', 'o200k_base']
50
+
51
+ or you can use `encoding_model` that tiktoken supports in their mapping such as "gpt-4o"
52
+ """
53
+ if not self.tokenizer:
54
+ return TokenizeUtil.tokenize(
55
+ text,
56
+ encoding_model=self.imodel.iModel_name,
57
+ encoding_name=encoding_name,
58
+ return_byte=return_byte,
59
+ )
60
+
61
+ if hasattr(self.tokenizer, "tokenize"):
62
+ return self.tokenizer.tokenize(text, **kwargs)
63
+
64
+ return self.tokenizer(text, **kwargs)
65
+
66
+ def split(
67
+ self,
68
+ text,
69
+ chunk_size=None,
70
+ overlap=None,
71
+ threshold=None,
72
+ by_chars=False,
73
+ return_tokens=False,
74
+ return_byte=False,
75
+ **kwargs,
76
+ ):
77
+ if not self.splitter:
78
+ splitter = (
79
+ TokenizeUtil.chunk_by_chars
80
+ if by_chars
81
+ else TokenizeUtil.chunk_by_tokens
82
+ )
83
+ return splitter(
84
+ text,
85
+ chunk_size or self.chunk_size,
86
+ overlap or self.split_overlap,
87
+ threshold or self.split_threshold,
88
+ return_tokens=return_tokens,
89
+ return_byte=return_byte,
90
+ )
91
+
92
+ a = [
93
+ getattr(self.splitter, i, None)
94
+ for i in ["split", "chunk", "segment"]
95
+ if i is not None
96
+ ][0]
97
+ a = getattr(self.splitter, a)
98
+ return a(text, **kwargs)
99
+
100
+ async def rank_by_pplex(
101
+ self, items: list, initial_text=None, cumulative=False, n_samples=None, **kwargs
102
+ ):
103
+ """
104
+ rank a list of items according to their perplexity
105
+ an item can be a single token or a list of tokens
106
+
107
+ kwargs: additional arguments to pass to the model
108
+ """
109
+
110
+ async def _get_item_perplexity(item):
111
+ item = item if isinstance(item, list) else [item]
112
+ item = (
113
+ item[: self.max_tokens_per_sample]
114
+ if len(item) > self.max_tokens_per_sample
115
+ else item
116
+ )
117
+ return await self.imodel.compute_perplexity(
118
+ initial_context=initial_text,
119
+ tokens=item,
120
+ n_samples=n_samples or self.n_samples,
121
+ system_msg=self.system_msg,
122
+ **kwargs,
123
+ )
124
+
125
+ if not isinstance(items, list):
126
+ items = self.tokenize(items)
127
+
128
+ if len(items) == 1:
129
+ return [items] # no need to rank a single item
130
+
131
+ _segments = []
132
+ _context = initial_text or ""
133
+ _task = []
134
+
135
+ if cumulative:
136
+ for i in items:
137
+ if isinstance(i, list):
138
+ _context += " " + " ".join(i).strip()
139
+ else:
140
+ _context += " " + i.strip()
141
+
142
+ _segments.append(_context)
143
+ else:
144
+ _segments = items
145
+
146
+ for i in _segments:
147
+ _task.append(asyncio.create_task(_get_item_perplexity(i)))
148
+
149
+ results = await asyncio.gather(*_task)
150
+ results = [(item, pplex) for item, pplex in zip(items, results)]
151
+ return sorted(results, key=lambda x: x[1]["logprobs"], reverse=True)
152
+
153
+ async def compress(
154
+ self,
155
+ text,
156
+ target_ratio=None,
157
+ initial_text=None,
158
+ cumulative=False,
159
+ split_kwargs=None,
160
+ split_overlap=None,
161
+ split_threshold=None,
162
+ rank_by="perplexity",
163
+ min_compression_score=None,
164
+ verbose=True,
165
+ **kwargs,
166
+ ):
167
+ start = time()
168
+ if split_kwargs is None:
169
+ split_kwargs = {}
170
+ split_kwargs["chunk_size"] = self.max_tokens_per_sample
171
+ split_kwargs["overlap"] = split_overlap or 0
172
+ split_kwargs["threshold"] = split_threshold or 0
173
+
174
+ len_tokens = len(self.tokenize(text))
175
+
176
+ items = self.split(text, return_tokens=True, **split_kwargs)
177
+
178
+ if rank_by == "perplexity":
179
+ ranked_items = await self.rank_by_pplex(
180
+ items=items, initial_text=initial_text, cumulative=cumulative, **kwargs
181
+ )
182
+
183
+ prompt_tokens = sum([i[1]["num_prompt_tokens"] for i in ranked_items])
184
+
185
+ num_completion_tokens = sum(
186
+ [i[1]["num_completion_tokens"] for i in ranked_items]
187
+ )
188
+
189
+ price = (
190
+ prompt_tokens * 0.5 / 1000000 + num_completion_tokens * 1.5 / 1000000
191
+ )
192
+
193
+ selected_items = self.select_by_pplex(
194
+ ranked_items=ranked_items,
195
+ target_compression_ratio=target_ratio or self.target_ratio,
196
+ original_length=len_tokens,
197
+ min_pplex=min_compression_score or self.min_compression_score,
198
+ )
199
+
200
+ if verbose:
201
+ msg = ""
202
+ msg += f"Original Token number: {len_tokens}\n"
203
+
204
+ def _f(i):
205
+ if isinstance(i, str):
206
+ i = self.tokenize(i)
207
+
208
+ if isinstance(i, list):
209
+ return len(to_list(i, dropna=True, flatten=True))
210
+
211
+ len_ = sum([_f(i) for i in selected_items])
212
+ msg += f"Selected Token number: {len_}\n"
213
+ msg += f"Token Compression Ratio: {len_ / len_tokens:.03f}\n"
214
+ msg += f"Compression Time: {time() - start:.04f} seconds\n"
215
+ msg += f"Compression Model: {self.imodel.iModel_name}\n"
216
+ msg += f"Compression Method: {rank_by}\n"
217
+ msg += f"Compression Usage: ${price:.05f}\n"
218
+ print(msg)
219
+
220
+ a = "".join([i.strip() for i in selected_items]).strip()
221
+ a = a.replace("\n\n", "")
222
+ return a
223
+
224
+ raise ValueError(f"Ranking method {rank_by} is not supported")
225
+
226
+ def select_by_pplex(
227
+ self, ranked_items, target_compression_ratio, original_length, min_pplex=None
228
+ ):
229
+ min_pplex = min_pplex or 0
230
+
231
+ desired_length = int(original_length * target_compression_ratio)
232
+
233
+ items = []
234
+ current_length = 0
235
+
236
+ for item, info in ranked_items:
237
+ if info["perplexity"] > min_pplex:
238
+ item = self.tokenize(item) if isinstance(item, str) else item
239
+ item = item if isinstance(item, list) else [item]
240
+ item = to_list(item, dropna=True, flatten=True)
241
+ if current_length + len(item) > desired_length:
242
+ break
243
+ else:
244
+ current_length += len(item)
245
+ items.append("".join(item))
246
+
247
+ return items
@@ -0,0 +1,61 @@
1
+ # from lionagi.core.collections import iModel
2
+ # from .base import TokenCompressor
3
+
4
+
5
+ # class LLMSummarizer(TokenCompressor):
6
+
7
+ # def __init__(
8
+ # self, imodel: iModel = None, system_msg=None, tokenizer=None, splitter=None,
9
+ # max_tokens=25, target_ratio=0.3
10
+ # ):
11
+ # imodel = imodel or iModel(model="gpt-3.5-turbo", max_tokens=max_tokens)
12
+ # super().__init__(imodel=imodel, tokenizer=tokenizer, splitter=splitter)
13
+ # self.system_msg = (
14
+ # system_msg
15
+ # or "Summarize the following sentence to be concise and informative:"
16
+ # )
17
+ # self.target_ratio = target_ratio
18
+
19
+ # async def summarize_sentence(self, sentence, **kwargs):
20
+ # messages = [
21
+ # {"role": "system", "content": self.system_msg},
22
+ # {"role": "user", "content": sentence},
23
+ # ]
24
+ # response = await self.imodel.call_chat_completion(messages, **kwargs)
25
+ # return response["choices"][0]["message"]["content"]
26
+
27
+ # def tokenize(self, text):
28
+ # tokenize_func = self.tokenizer or tokenize
29
+ # return tokenize_func(text)
30
+
31
+ # def split(self, text):
32
+ # split_func = self.splitter or split_into_segments
33
+ # return split_func(text)
34
+
35
+ # # Function to enforce maximum sentence length
36
+ # def enforce_max_sentence_length(self, sentence, max_words=25):
37
+ # words = self.tokenize(sentence)
38
+ # if len(words) > max_words:
39
+ # sentence = ' '.join(words[:max_words])
40
+ # return sentence
41
+
42
+ # async def summarize_text(self, text, max_length_per_sentence=25, target_ratio=None, **kwargs):
43
+ # sentences = self.split(text)
44
+ # summarized = await alcall(
45
+ # sentences, self.summarize_sentence, **kwargs
46
+ # )
47
+ # summarized = [
48
+ # self.enforce_max_sentence_length(sentence, max_length_per_sentence)
49
+ # for sentence in summarized
50
+ # ]
51
+
52
+ # original_length = len(self.tokenize(text))
53
+ # summarized_length = len(self.tokenize(' '.join(summarized)))
54
+ # current_ratio = summarized_length / original_length
55
+
56
+ # target_ratio = target_ratio or self.target_ratio
57
+ # if current_ratio > target_ratio:
58
+ # words_to_remove = int((current_ratio - target_ratio) * original_length)
59
+ # return ' '.join(summarized[:-words_to_remove])
60
+
61
+ # return ' '.join(summarized)
@@ -0,0 +1,70 @@
1
+ # import asyncio
2
+ # from lionagi import alcall
3
+ # from lionagi.libs.ln_convert import to_list
4
+ # import numpy as np
5
+
6
+ # def split_into_segments(text):
7
+ # segments = text.split(".") # Splitting by period followed by a space
8
+ # return [segment.strip() for segment in segments if segment]
9
+
10
+ # # Tokenize the segment
11
+ # def tokenize(segment):
12
+ # tokens = segment.split() # Simple space-based tokenization
13
+ # return tokens
14
+
15
+ # async def calculate_perplexity(system_msg: str, imodel, tokens, initial_context=None, **kwargs):
16
+ # _tasks = []
17
+ # _context = initial_context or ""
18
+ # for i in range(len(tokens)):
19
+ # _context += " " + tokens[i]
20
+ # messages = [
21
+ # {"role": "system", "content": system_msg},
22
+ # {"role": "user", "content": _context},
23
+ # ]
24
+ # task = asyncio.create_task(
25
+ # imodel.call_chat_completion(
26
+ # messages=messages, logprobs=True, max_tokens=1, **kwargs
27
+ # )
28
+ # )
29
+ # _tasks.append(task)
30
+
31
+ # results = await asyncio.gather(*_tasks)
32
+ # logprobs = [
33
+ # result[1]["choices"][0]["logprobs"]["content"] for result in results
34
+ # ]
35
+ # logprobs = to_list(logprobs, flatten=True, dropna=True)
36
+ # logprobs = [lprob_["logprob"] for lprob_ in logprobs]
37
+ # return np.exp(np.mean(logprobs))
38
+
39
+ # async def rank_by_perplexity(
40
+ # text: str | list[str] = None, # if list we assume they are already well split
41
+ # initial_text=None,
42
+
43
+ # segments,
44
+ # initial_text=None,
45
+ # cumulative=False,
46
+ # **kwargs
47
+ # ):
48
+ # _segments = []
49
+ # _context = initial_text or ""
50
+ # _task = []
51
+
52
+ # if cumulative:
53
+ # for i in range(1, len(segments)):
54
+ # _context += " " + segments[i - 1]
55
+ # _segments.append(_context)
56
+ # else:
57
+ # _segments = segments
58
+
59
+ # for i in segments:
60
+ # _task.append(asyncio.create_task(
61
+ # calculate_perplexity(
62
+ # self.system_msg, self.imodel, self.tokenize(i), **kwargs)
63
+ # )
64
+ # )
65
+ # segment_perplexities = await asyncio.gather(*_task)
66
+
67
+ # return {
68
+ # segment: perplexity
69
+ # for segment, perplexity in zip(segments, segment_perplexities)
70
+ # }
@@ -0,0 +1,19 @@
1
+ # from ..form.predict import predict
2
+ # from .select import select
3
+ # from ..form.score import score
4
+ # from ..form.react import react
5
+ # from .vote import vote
6
+ # from ..form.plan import plan
7
+ # from .cot import chain_of_thoughts, chain_of_react
8
+
9
+
10
+ # __all__ = [
11
+ # "predict",
12
+ # "select",
13
+ # "score",
14
+ # "vote",
15
+ # "react",
16
+ # "plan",
17
+ # "chain_of_thoughts",
18
+ # "chain_of_react",
19
+ # ]
File without changes
@@ -0,0 +1,282 @@
1
+ """
2
+ Copyright 2024 HaiyangLi
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from typing import List, Optional
18
+
19
+ from lionagi.experimental.directive.tokenizer import BaseToken
20
+ from ..template.schema import IfNode, TryNode, ForNode
21
+
22
+
23
+ class BaseDirectiveParser:
24
+ """A base parser with lookahead, error recovery, and backtracking support.
25
+
26
+ Attributes:
27
+ tokens (List[BaseToken]): A list of tokens to be parsed.
28
+ current_token_index (int): The index of the current token in the tokens list.
29
+ current_token (Optional[BaseToken]): The current token being processed.
30
+
31
+ Examples:
32
+ >>> tokenizer = BaseTokenizer("IF x > 10 THEN DO something ENDIF")
33
+ >>> tokens = tokenizer.get_tokens()
34
+ >>> parser = BaseParser(tokens)
35
+ >>> print(parser.current_token)
36
+ BaseToken(KEYWORD, IF)
37
+ """
38
+
39
+ def __init__(self, tokens: List[BaseToken]):
40
+ self.tokens = tokens
41
+ self.current_token_index = -1
42
+ self.current_token: Optional[BaseToken] = None
43
+ self.next_token()
44
+
45
+ def next_token(self) -> None:
46
+ """Advances to the next token in the list."""
47
+ self.current_token_index += 1
48
+ if self.current_token_index < len(self.tokens):
49
+ self.current_token = self.tokens[self.current_token_index]
50
+ else:
51
+ self.current_token = None
52
+
53
+ def peek_next_token(self, offset: int = 1) -> BaseToken | None:
54
+ """Peeks at the next token without consuming it.
55
+
56
+ Args:
57
+ offset (int): The number of tokens to look ahead.
58
+
59
+ Returns:
60
+ Optional[BaseToken]: The token at the specified lookahead offset, or None if end of list.
61
+ """
62
+ peek_index = self.current_token_index + offset
63
+ if peek_index < len(self.tokens):
64
+ return self.tokens[peek_index]
65
+ else:
66
+ return None
67
+
68
+ def skip_until(self, token_types: List[str]) -> None:
69
+ """Skips tokens until a token of the specified type is found.
70
+
71
+ Args:
72
+ token_types (List[str]): A list of token types to stop skipping.
73
+ """
74
+ while self.current_token and self.current_token.type not in token_types:
75
+ self.next_token()
76
+
77
+ def mark(self) -> int:
78
+ """Marks the current position in the token list for potential backtracking.
79
+
80
+ Returns:
81
+ int: The current token index.
82
+ """
83
+ return self.current_token_index
84
+
85
+ def reset_to_mark(self, mark: int) -> None:
86
+ """Resets the parser to a previously marked position.
87
+
88
+ Args:
89
+ mark (int): The token index to reset to.
90
+ """
91
+ self.current_token_index = mark - 1
92
+ self.next_token()
93
+
94
+ def skip_semicolon(self):
95
+ """Skips a semicolon token if it is the current token."""
96
+ if self.current_token and self.current_token.value == ";":
97
+ self.next_token()
98
+
99
+ def parse_expression(self):
100
+ """Parses an expression until a semicolon is encountered.
101
+
102
+ Returns:
103
+ str: The parsed expression as a string.
104
+
105
+ Raises:
106
+ SyntaxError: If a semicolon is not found at the end of the expression.
107
+ """
108
+ expr = ""
109
+ while self.current_token and self.current_token.value != ";":
110
+ expr += self.current_token.value + " "
111
+ self.next_token()
112
+ # Expecting a semicolon at the end of the condition
113
+ if self.current_token.value != ";":
114
+ raise SyntaxError("Expected ';' at the end of the condition")
115
+ self.next_token() # Move past the semicolon to the next part of the statement
116
+ return expr.strip()
117
+
118
+ def parse_if_block(self):
119
+ """Parses a block of statements for an IF condition.
120
+
121
+ Returns:
122
+ list: The parsed block of statements as a list of strings.
123
+ """
124
+ block = []
125
+ # Parse the block until 'ELSE', 'ENDIF', ensuring not to include semicolons as part of the block
126
+ while self.current_token and self.current_token.value not in ("ENDIF", "ELSE"):
127
+ if self.current_token.value == "DO":
128
+ self.next_token() # Move past 'DO' to get to the action
129
+ block.append(self.current_token.value) # Add the action to the block
130
+ self.next_token() # Move to the next token, which could be a semicolon or the next action
131
+ if self.current_token.value == ";":
132
+ self.next_token() # Move past the semicolon
133
+ return block
134
+
135
+ def parse_if_statement(self):
136
+ """Parses an IF statement.
137
+
138
+ Returns:
139
+ IfNode: The parsed IF statement as an IfNode object.
140
+
141
+ Raises:
142
+ SyntaxError: If the IF statement is not properly formed.
143
+ """
144
+ if self.current_token.type != "KEYWORD" or self.current_token.value != "IF":
145
+ raise SyntaxError("Expected IF statement")
146
+ self.next_token() # Skip 'IF'
147
+
148
+ condition = self.parse_expression() # Now properly ends after the semicolon
149
+
150
+ true_block = []
151
+ if self.current_token.value == "DO":
152
+ true_block = self.parse_if_block() # Parse true block after 'DO'
153
+
154
+ false_block = None
155
+ if self.current_token and self.current_token.value == "ELSE":
156
+ self.next_token() # Skip 'ELSE', expect 'DO' next for the false block
157
+ self.skip_semicolon()
158
+ if self.current_token.value != "DO":
159
+ raise SyntaxError("Expected 'DO' after 'ELSE'")
160
+ self.next_token() # Skip 'DO'
161
+ false_block = self.parse_if_block() # Parse false block
162
+
163
+ return IfNode(condition, true_block, false_block)
164
+
165
+ def parse_for_statement(self):
166
+ """Parses a FOR statement.
167
+
168
+ Returns:
169
+ ForNode: The parsed FOR statement as a ForNode object.
170
+
171
+ Raises:
172
+ SyntaxError: If the FOR statement is not properly formed.
173
+ """
174
+ if self.current_token.type != "KEYWORD" or self.current_token.value != "FOR":
175
+ raise SyntaxError("Expected FOR statement")
176
+ self.next_token() # Skip 'FOR'
177
+
178
+ # Parse the iterator variable
179
+ if self.current_token.type != "IDENTIFIER":
180
+ raise SyntaxError("Expected iterator variable after FOR")
181
+ iterator = self.current_token.value
182
+ self.next_token() # Move past the iterator variable
183
+
184
+ # Expect and skip 'IN' keyword
185
+ if self.current_token.type != "KEYWORD" or self.current_token.value != "IN":
186
+ raise SyntaxError("Expected 'IN' after iterator variable")
187
+ self.next_token() # Move past 'IN'
188
+
189
+ # Parse the collection
190
+ if self.current_token.type not in ["IDENTIFIER", "LITERAL"]:
191
+ raise SyntaxError("Expected collection after 'IN'")
192
+ collection = self.current_token.value
193
+ self.next_token() # Move past the collection
194
+
195
+ # Now, parse the block of statements to execute
196
+ true_block = self.parse_for_block()
197
+
198
+ # Construct and return a ForNode
199
+ return ForNode(iterator, collection, true_block)
200
+
201
+ def parse_for_block(self):
202
+ """Parses a block of statements for a FOR loop.
203
+
204
+ Returns:
205
+ list: The parsed block of statements as a list of strings.
206
+ """
207
+ block = []
208
+ # Skip initial 'DO' if present
209
+ if self.current_token and self.current_token.value == "DO":
210
+ self.next_token()
211
+
212
+ while self.current_token and self.current_token.value not in ("ENDFOR",):
213
+ if self.current_token.value == ";":
214
+ # If a semicolon is encountered, skip it and move to the next token
215
+ self.next_token()
216
+ continue
217
+ # Add the current token to the block unless it's a 'DO' or ';'
218
+ if self.current_token.value != "DO":
219
+ block.append(self.current_token.value)
220
+ self.next_token()
221
+
222
+ # The loop exits when 'ENDFOR' is encountered; move past it for subsequent parsing
223
+ self.next_token() # Skip 'ENDFOR'
224
+ return block
225
+
226
+ def parse_try_statement(self):
227
+ """Parses a TRY statement.
228
+
229
+ Returns:
230
+ TryNode: The parsed TRY statement as a TryNode object.
231
+
232
+ Raises:
233
+ SyntaxError: If the TRY statement is not properly formed.
234
+ """
235
+ if self.current_token.type != "KEYWORD" or self.current_token.value != "TRY":
236
+ raise SyntaxError("Expected TRY statement")
237
+ self.next_token() # Skip 'TRY'
238
+
239
+ try_block = self.parse_try_block("EXCEPT") # Parse the try block until 'EXCEPT'
240
+
241
+ # Now expecting 'EXCEPT' keyword
242
+ if not (self.current_token and self.current_token.value == "EXCEPT"):
243
+ raise SyntaxError("Expected 'EXCEPT' after try block")
244
+ self.next_token() # Move past 'EXCEPT'
245
+
246
+ except_block = self.parse_try_block(
247
+ "ENDTRY"
248
+ ) # Parse the except block until 'ENDTRY'
249
+
250
+ # Ensure we are correctly positioned after 'ENDTRY'
251
+ if self.current_token and self.current_token.value != "ENDTRY":
252
+ raise SyntaxError("Expected 'ENDTRY' at the end of except block")
253
+ self.next_token() # Move past 'ENDTRY' for subsequent parsing
254
+
255
+ return TryNode(try_block, except_block)
256
+
257
+ def parse_try_block(self, stop_keyword):
258
+ """Parses a block of statements for a TRY or EXCEPT clause.
259
+
260
+ Args:
261
+ stop_keyword (str): The keyword that indicates the end of the block.
262
+
263
+ Returns:
264
+ list: The parsed block of statements as a list of strings.
265
+ """
266
+ block = []
267
+ while self.current_token and self.current_token.value != stop_keyword:
268
+ if self.current_token.value == "DO":
269
+ self.next_token() # Move past 'DO' to get to the action
270
+ elif self.current_token.value == ";":
271
+ self.next_token() # Move past the semicolon
272
+ continue # Skip adding ';' to the block
273
+ else:
274
+ block.append(self.current_token.value) # Add the action to the block
275
+ self.next_token()
276
+
277
+ return block
278
+
279
+
280
+ # "IF condition1 && condition2; DO action2; ELSE; DO action3; ENDIF;"
281
+ # "FOR input_ IN collections; DO action(input_); ENDFOR;"
282
+ # "TRY; DO action(); EXCEPT; DO action(input_); ENDTRY;"
File without changes