lionagi 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. lionagi/__init__.py +60 -5
  2. lionagi/core/__init__.py +0 -25
  3. lionagi/core/_setting/_setting.py +59 -0
  4. lionagi/core/action/__init__.py +14 -0
  5. lionagi/core/action/function_calling.py +136 -0
  6. lionagi/core/action/manual.py +1 -0
  7. lionagi/core/action/node.py +109 -0
  8. lionagi/core/action/tool.py +114 -0
  9. lionagi/core/action/tool_manager.py +356 -0
  10. lionagi/core/agent/base_agent.py +27 -13
  11. lionagi/core/agent/eval/evaluator.py +1 -0
  12. lionagi/core/agent/eval/vote.py +40 -0
  13. lionagi/core/agent/learn/learner.py +59 -0
  14. lionagi/core/agent/plan/unit_template.py +1 -0
  15. lionagi/core/collections/__init__.py +17 -0
  16. lionagi/core/{generic/data_logger.py → collections/_logger.py} +69 -55
  17. lionagi/core/collections/abc/__init__.py +53 -0
  18. lionagi/core/collections/abc/component.py +615 -0
  19. lionagi/core/collections/abc/concepts.py +297 -0
  20. lionagi/core/collections/abc/exceptions.py +150 -0
  21. lionagi/core/collections/abc/util.py +45 -0
  22. lionagi/core/collections/exchange.py +161 -0
  23. lionagi/core/collections/flow.py +426 -0
  24. lionagi/core/collections/model.py +419 -0
  25. lionagi/core/collections/pile.py +913 -0
  26. lionagi/core/collections/progression.py +236 -0
  27. lionagi/core/collections/util.py +64 -0
  28. lionagi/core/director/direct.py +314 -0
  29. lionagi/core/director/director.py +2 -0
  30. lionagi/core/{execute/branch_executor.py → engine/branch_engine.py} +134 -97
  31. lionagi/core/{execute/instruction_map_executor.py → engine/instruction_map_engine.py} +80 -55
  32. lionagi/{experimental/directive/evaluator → core/engine}/script_engine.py +17 -1
  33. lionagi/core/executor/base_executor.py +90 -0
  34. lionagi/core/{execute/structure_executor.py → executor/graph_executor.py} +62 -66
  35. lionagi/core/{execute → executor}/neo4j_executor.py +70 -67
  36. lionagi/core/generic/__init__.py +3 -33
  37. lionagi/core/generic/edge.py +29 -79
  38. lionagi/core/generic/edge_condition.py +16 -0
  39. lionagi/core/generic/graph.py +236 -0
  40. lionagi/core/generic/hyperedge.py +1 -0
  41. lionagi/core/generic/node.py +156 -221
  42. lionagi/core/generic/tree.py +48 -0
  43. lionagi/core/generic/tree_node.py +79 -0
  44. lionagi/core/mail/__init__.py +12 -0
  45. lionagi/core/mail/mail.py +25 -0
  46. lionagi/core/mail/mail_manager.py +139 -58
  47. lionagi/core/mail/package.py +45 -0
  48. lionagi/core/mail/start_mail.py +36 -0
  49. lionagi/core/message/__init__.py +19 -0
  50. lionagi/core/message/action_request.py +133 -0
  51. lionagi/core/message/action_response.py +135 -0
  52. lionagi/core/message/assistant_response.py +95 -0
  53. lionagi/core/message/instruction.py +234 -0
  54. lionagi/core/message/message.py +101 -0
  55. lionagi/core/message/system.py +86 -0
  56. lionagi/core/message/util.py +283 -0
  57. lionagi/core/report/__init__.py +4 -0
  58. lionagi/core/report/base.py +217 -0
  59. lionagi/core/report/form.py +231 -0
  60. lionagi/core/report/report.py +166 -0
  61. lionagi/core/report/util.py +28 -0
  62. lionagi/core/rule/_default.py +16 -0
  63. lionagi/core/rule/action.py +99 -0
  64. lionagi/core/rule/base.py +238 -0
  65. lionagi/core/rule/boolean.py +56 -0
  66. lionagi/core/rule/choice.py +47 -0
  67. lionagi/core/rule/mapping.py +96 -0
  68. lionagi/core/rule/number.py +71 -0
  69. lionagi/core/rule/rulebook.py +109 -0
  70. lionagi/core/rule/string.py +52 -0
  71. lionagi/core/rule/util.py +35 -0
  72. lionagi/core/session/branch.py +431 -0
  73. lionagi/core/session/directive_mixin.py +287 -0
  74. lionagi/core/session/session.py +229 -903
  75. lionagi/core/structure/__init__.py +1 -0
  76. lionagi/core/structure/chain.py +1 -0
  77. lionagi/core/structure/forest.py +1 -0
  78. lionagi/core/structure/graph.py +1 -0
  79. lionagi/core/structure/tree.py +1 -0
  80. lionagi/core/unit/__init__.py +5 -0
  81. lionagi/core/unit/parallel_unit.py +245 -0
  82. lionagi/core/unit/template/action.py +81 -0
  83. lionagi/core/unit/template/base.py +51 -0
  84. lionagi/core/unit/template/plan.py +84 -0
  85. lionagi/core/unit/template/predict.py +109 -0
  86. lionagi/core/unit/template/score.py +124 -0
  87. lionagi/core/unit/template/select.py +104 -0
  88. lionagi/core/unit/unit.py +362 -0
  89. lionagi/core/unit/unit_form.py +305 -0
  90. lionagi/core/unit/unit_mixin.py +1168 -0
  91. lionagi/core/unit/util.py +71 -0
  92. lionagi/core/validator/validator.py +364 -0
  93. lionagi/core/work/work.py +76 -0
  94. lionagi/core/work/work_function.py +101 -0
  95. lionagi/core/work/work_queue.py +103 -0
  96. lionagi/core/work/worker.py +258 -0
  97. lionagi/core/work/worklog.py +120 -0
  98. lionagi/experimental/compressor/base.py +46 -0
  99. lionagi/experimental/compressor/llm_compressor.py +247 -0
  100. lionagi/experimental/compressor/llm_summarizer.py +61 -0
  101. lionagi/experimental/compressor/util.py +70 -0
  102. lionagi/experimental/directive/__init__.py +19 -0
  103. lionagi/experimental/directive/parser/base_parser.py +69 -2
  104. lionagi/experimental/directive/{template_ → template}/base_template.py +17 -1
  105. lionagi/{libs/ln_tokenizer.py → experimental/directive/tokenizer.py} +16 -0
  106. lionagi/experimental/{directive/evaluator → evaluator}/ast_evaluator.py +16 -0
  107. lionagi/experimental/{directive/evaluator → evaluator}/base_evaluator.py +16 -0
  108. lionagi/experimental/knowledge/base.py +10 -0
  109. lionagi/experimental/memory/__init__.py +0 -0
  110. lionagi/experimental/strategies/__init__.py +0 -0
  111. lionagi/experimental/strategies/base.py +1 -0
  112. lionagi/integrations/bridge/langchain_/documents.py +4 -0
  113. lionagi/integrations/bridge/llamaindex_/index.py +30 -0
  114. lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +6 -0
  115. lionagi/integrations/chunker/chunk.py +161 -24
  116. lionagi/integrations/config/oai_configs.py +34 -3
  117. lionagi/integrations/config/openrouter_configs.py +14 -2
  118. lionagi/integrations/loader/load.py +122 -21
  119. lionagi/integrations/loader/load_util.py +6 -77
  120. lionagi/integrations/provider/_mapping.py +46 -0
  121. lionagi/integrations/provider/litellm.py +2 -1
  122. lionagi/integrations/provider/mlx_service.py +16 -9
  123. lionagi/integrations/provider/oai.py +91 -4
  124. lionagi/integrations/provider/ollama.py +6 -5
  125. lionagi/integrations/provider/openrouter.py +115 -8
  126. lionagi/integrations/provider/services.py +2 -2
  127. lionagi/integrations/provider/transformers.py +18 -22
  128. lionagi/integrations/storage/__init__.py +3 -3
  129. lionagi/integrations/storage/neo4j.py +52 -60
  130. lionagi/integrations/storage/storage_util.py +44 -46
  131. lionagi/integrations/storage/structure_excel.py +43 -26
  132. lionagi/integrations/storage/to_excel.py +11 -4
  133. lionagi/libs/__init__.py +22 -1
  134. lionagi/libs/ln_api.py +75 -20
  135. lionagi/libs/ln_context.py +37 -0
  136. lionagi/libs/ln_convert.py +21 -9
  137. lionagi/libs/ln_func_call.py +69 -28
  138. lionagi/libs/ln_image.py +107 -0
  139. lionagi/libs/ln_nested.py +26 -11
  140. lionagi/libs/ln_parse.py +82 -23
  141. lionagi/libs/ln_queue.py +16 -0
  142. lionagi/libs/ln_tokenize.py +164 -0
  143. lionagi/libs/ln_validate.py +16 -0
  144. lionagi/libs/special_tokens.py +172 -0
  145. lionagi/libs/sys_util.py +95 -24
  146. lionagi/lions/coder/code_form.py +13 -0
  147. lionagi/lions/coder/coder.py +50 -3
  148. lionagi/lions/coder/util.py +30 -25
  149. lionagi/tests/libs/test_func_call.py +23 -21
  150. lionagi/tests/libs/test_nested.py +36 -21
  151. lionagi/tests/libs/test_parse.py +1 -1
  152. lionagi/tests/test_core/collections/__init__.py +0 -0
  153. lionagi/tests/test_core/collections/test_component.py +206 -0
  154. lionagi/tests/test_core/collections/test_exchange.py +138 -0
  155. lionagi/tests/test_core/collections/test_flow.py +145 -0
  156. lionagi/tests/test_core/collections/test_pile.py +171 -0
  157. lionagi/tests/test_core/collections/test_progression.py +129 -0
  158. lionagi/tests/test_core/generic/test_edge.py +67 -0
  159. lionagi/tests/test_core/generic/test_graph.py +96 -0
  160. lionagi/tests/test_core/generic/test_node.py +106 -0
  161. lionagi/tests/test_core/generic/test_tree_node.py +73 -0
  162. lionagi/tests/test_core/test_branch.py +115 -294
  163. lionagi/tests/test_core/test_form.py +46 -0
  164. lionagi/tests/test_core/test_report.py +105 -0
  165. lionagi/tests/test_core/test_validator.py +111 -0
  166. lionagi/version.py +1 -1
  167. lionagi-0.2.1.dist-info/LICENSE +202 -0
  168. lionagi-0.2.1.dist-info/METADATA +272 -0
  169. lionagi-0.2.1.dist-info/RECORD +240 -0
  170. lionagi/core/branch/base.py +0 -653
  171. lionagi/core/branch/branch.py +0 -474
  172. lionagi/core/branch/flow_mixin.py +0 -96
  173. lionagi/core/branch/util.py +0 -323
  174. lionagi/core/direct/__init__.py +0 -19
  175. lionagi/core/direct/cot.py +0 -123
  176. lionagi/core/direct/plan.py +0 -164
  177. lionagi/core/direct/predict.py +0 -166
  178. lionagi/core/direct/react.py +0 -171
  179. lionagi/core/direct/score.py +0 -279
  180. lionagi/core/direct/select.py +0 -170
  181. lionagi/core/direct/sentiment.py +0 -1
  182. lionagi/core/direct/utils.py +0 -110
  183. lionagi/core/direct/vote.py +0 -64
  184. lionagi/core/execute/base_executor.py +0 -47
  185. lionagi/core/flow/baseflow.py +0 -23
  186. lionagi/core/flow/monoflow/ReAct.py +0 -240
  187. lionagi/core/flow/monoflow/__init__.py +0 -9
  188. lionagi/core/flow/monoflow/chat.py +0 -95
  189. lionagi/core/flow/monoflow/chat_mixin.py +0 -253
  190. lionagi/core/flow/monoflow/followup.py +0 -215
  191. lionagi/core/flow/polyflow/__init__.py +0 -1
  192. lionagi/core/flow/polyflow/chat.py +0 -251
  193. lionagi/core/form/action_form.py +0 -26
  194. lionagi/core/form/field_validator.py +0 -287
  195. lionagi/core/form/form.py +0 -302
  196. lionagi/core/form/mixin.py +0 -214
  197. lionagi/core/form/scored_form.py +0 -13
  198. lionagi/core/generic/action.py +0 -26
  199. lionagi/core/generic/component.py +0 -532
  200. lionagi/core/generic/condition.py +0 -46
  201. lionagi/core/generic/mail.py +0 -90
  202. lionagi/core/generic/mailbox.py +0 -36
  203. lionagi/core/generic/relation.py +0 -70
  204. lionagi/core/generic/signal.py +0 -22
  205. lionagi/core/generic/structure.py +0 -362
  206. lionagi/core/generic/transfer.py +0 -20
  207. lionagi/core/generic/work.py +0 -40
  208. lionagi/core/graph/graph.py +0 -126
  209. lionagi/core/graph/tree.py +0 -190
  210. lionagi/core/mail/schema.py +0 -63
  211. lionagi/core/messages/schema.py +0 -325
  212. lionagi/core/tool/__init__.py +0 -5
  213. lionagi/core/tool/tool.py +0 -28
  214. lionagi/core/tool/tool_manager.py +0 -283
  215. lionagi/experimental/report/form.py +0 -64
  216. lionagi/experimental/report/report.py +0 -138
  217. lionagi/experimental/report/util.py +0 -47
  218. lionagi/experimental/tool/function_calling.py +0 -43
  219. lionagi/experimental/tool/manual.py +0 -66
  220. lionagi/experimental/tool/schema.py +0 -59
  221. lionagi/experimental/tool/tool_manager.py +0 -138
  222. lionagi/experimental/tool/util.py +0 -16
  223. lionagi/experimental/validator/rule.py +0 -139
  224. lionagi/experimental/validator/validator.py +0 -56
  225. lionagi/experimental/work/__init__.py +0 -10
  226. lionagi/experimental/work/async_queue.py +0 -54
  227. lionagi/experimental/work/schema.py +0 -73
  228. lionagi/experimental/work/work_function.py +0 -67
  229. lionagi/experimental/work/worker.py +0 -56
  230. lionagi/experimental/work2/form.py +0 -371
  231. lionagi/experimental/work2/report.py +0 -289
  232. lionagi/experimental/work2/schema.py +0 -30
  233. lionagi/experimental/work2/tests.py +0 -72
  234. lionagi/experimental/work2/work_function.py +0 -89
  235. lionagi/experimental/work2/worker.py +0 -12
  236. lionagi/integrations/bridge/llamaindex_/get_index.py +0 -294
  237. lionagi/tests/test_core/generic/test_component.py +0 -89
  238. lionagi/tests/test_core/test_base_branch.py +0 -426
  239. lionagi/tests/test_core/test_chat_flow.py +0 -63
  240. lionagi/tests/test_core/test_mail_manager.py +0 -75
  241. lionagi/tests/test_core/test_prompts.py +0 -51
  242. lionagi/tests/test_core/test_session.py +0 -254
  243. lionagi/tests/test_core/test_session_base_util.py +0 -313
  244. lionagi/tests/test_core/test_tool_manager.py +0 -95
  245. lionagi-0.1.2.dist-info/LICENSE +0 -9
  246. lionagi-0.1.2.dist-info/METADATA +0 -174
  247. lionagi-0.1.2.dist-info/RECORD +0 -206
  248. /lionagi/core/{branch → _setting}/__init__.py +0 -0
  249. /lionagi/core/{execute → agent/eval}/__init__.py +0 -0
  250. /lionagi/core/{flow → agent/learn}/__init__.py +0 -0
  251. /lionagi/core/{form → agent/plan}/__init__.py +0 -0
  252. /lionagi/core/{branch/executable_branch.py → agent/plan/plan.py} +0 -0
  253. /lionagi/core/{graph → director}/__init__.py +0 -0
  254. /lionagi/core/{messages → engine}/__init__.py +0 -0
  255. /lionagi/{experimental/directive/evaluator → core/engine}/sandbox_.py +0 -0
  256. /lionagi/{experimental/directive/evaluator → core/executor}/__init__.py +0 -0
  257. /lionagi/{experimental/directive/template_ → core/rule}/__init__.py +0 -0
  258. /lionagi/{experimental/report → core/unit/template}/__init__.py +0 -0
  259. /lionagi/{experimental/tool → core/validator}/__init__.py +0 -0
  260. /lionagi/{experimental/validator → core/work}/__init__.py +0 -0
  261. /lionagi/experimental/{work2 → compressor}/__init__.py +0 -0
  262. /lionagi/{core/flow/mono_chat_mixin.py → experimental/directive/template/__init__.py} +0 -0
  263. /lionagi/experimental/directive/{schema.py → template/schema.py} +0 -0
  264. /lionagi/experimental/{work2/util.py → evaluator/__init__.py} +0 -0
  265. /lionagi/experimental/{work2/work.py → knowledge/__init__.py} +0 -0
  266. /lionagi/{tests/libs/test_async.py → experimental/knowledge/graph.py} +0 -0
  267. {lionagi-0.1.2.dist-info → lionagi-0.2.1.dist-info}/WHEEL +0 -0
  268. {lionagi-0.1.2.dist-info → lionagi-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,247 @@
1
+ import asyncio
2
+ from lionagi import alcall
3
+ from lionagi.libs.ln_convert import to_list
4
+ import numpy as np
5
+ from lionagi.core.collections import iModel
6
+ from .base import TokenCompressor
7
+ from lionagi.libs.ln_tokenize import TokenizeUtil
8
+ from time import time
9
+
10
+ # inspired by LLMLingua, MIT License, Copyright (c) Microsoft Corporation.
11
+ # https://github.com/microsoft/LLMLingua
12
+
13
+
14
+ class LLMCompressor(TokenCompressor):
15
+
16
+ def __init__(
17
+ self,
18
+ imodel: iModel = None,
19
+ system_msg=None,
20
+ tokenizer=None, # must be a callable or object with a tokenize method
21
+ splitter=None, # must be a callable or object with a split/chunk/segment method
22
+ target_ratio=0.2,
23
+ n_samples=5, # the cumulative samples to take in each perplexity calculation
24
+ chunk_size=64,
25
+ max_tokens_per_sample=80,
26
+ min_compression_score=0, # (0-1) the minimum score to consider for compression, 0 means all
27
+ split_overlap=0,
28
+ split_threshold=0,
29
+ verbose=True,
30
+ ):
31
+ imodel = imodel or iModel(model="gpt-3.5-turbo", temperature=0.3)
32
+ super().__init__(imodel=imodel, tokenizer=tokenizer, splitter=splitter)
33
+ self.system_msg = (
34
+ system_msg
35
+ or "Concisely summarize and compress the information for storage:"
36
+ )
37
+ self.target_ratio = target_ratio
38
+ self.n_samples = n_samples
39
+ self.chunk_size = chunk_size
40
+ self.max_tokens_per_sample = max_tokens_per_sample
41
+ self.min_compression_score = min_compression_score
42
+ self.verbose = verbose
43
+ self.split_overlap = split_overlap
44
+ self.split_threshold = split_threshold
45
+
46
+ def tokenize(self, text, encoding_name=None, return_byte=False, **kwargs):
47
+ """
48
+ by default you can use `encoding_name` to be one of,
49
+ ['gpt2', 'r50k_base', 'p50k_base', 'p50k_edit', 'cl100k_base', 'o200k_base']
50
+
51
+ or you can use `encoding_model` that tiktoken supports in their mapping such as "gpt-4o"
52
+ """
53
+ if not self.tokenizer:
54
+ return TokenizeUtil.tokenize(
55
+ text,
56
+ encoding_model=self.imodel.iModel_name,
57
+ encoding_name=encoding_name,
58
+ return_byte=return_byte,
59
+ )
60
+
61
+ if hasattr(self.tokenizer, "tokenize"):
62
+ return self.tokenizer.tokenize(text, **kwargs)
63
+
64
+ return self.tokenizer(text, **kwargs)
65
+
66
+ def split(
67
+ self,
68
+ text,
69
+ chunk_size=None,
70
+ overlap=None,
71
+ threshold=None,
72
+ by_chars=False,
73
+ return_tokens=False,
74
+ return_byte=False,
75
+ **kwargs,
76
+ ):
77
+ if not self.splitter:
78
+ splitter = (
79
+ TokenizeUtil.chunk_by_chars
80
+ if by_chars
81
+ else TokenizeUtil.chunk_by_tokens
82
+ )
83
+ return splitter(
84
+ text,
85
+ chunk_size or self.chunk_size,
86
+ overlap or self.split_overlap,
87
+ threshold or self.split_threshold,
88
+ return_tokens=return_tokens,
89
+ return_byte=return_byte,
90
+ )
91
+
92
+ a = [
93
+ getattr(self.splitter, i, None)
94
+ for i in ["split", "chunk", "segment"]
95
+ if i is not None
96
+ ][0]
97
+ a = getattr(self.splitter, a)
98
+ return a(text, **kwargs)
99
+
100
+ async def rank_by_pplex(
101
+ self, items: list, initial_text=None, cumulative=False, n_samples=None, **kwargs
102
+ ):
103
+ """
104
+ rank a list of items according to their perplexity
105
+ an item can be a single token or a list of tokens
106
+
107
+ kwargs: additional arguments to pass to the model
108
+ """
109
+
110
+ async def _get_item_perplexity(item):
111
+ item = item if isinstance(item, list) else [item]
112
+ item = (
113
+ item[: self.max_tokens_per_sample]
114
+ if len(item) > self.max_tokens_per_sample
115
+ else item
116
+ )
117
+ return await self.imodel.compute_perplexity(
118
+ initial_context=initial_text,
119
+ tokens=item,
120
+ n_samples=n_samples or self.n_samples,
121
+ system_msg=self.system_msg,
122
+ **kwargs,
123
+ )
124
+
125
+ if not isinstance(items, list):
126
+ items = self.tokenize(items)
127
+
128
+ if len(items) == 1:
129
+ return [items] # no need to rank a single item
130
+
131
+ _segments = []
132
+ _context = initial_text or ""
133
+ _task = []
134
+
135
+ if cumulative:
136
+ for i in items:
137
+ if isinstance(i, list):
138
+ _context += " " + " ".join(i).strip()
139
+ else:
140
+ _context += " " + i.strip()
141
+
142
+ _segments.append(_context)
143
+ else:
144
+ _segments = items
145
+
146
+ for i in _segments:
147
+ _task.append(asyncio.create_task(_get_item_perplexity(i)))
148
+
149
+ results = await asyncio.gather(*_task)
150
+ results = [(item, pplex) for item, pplex in zip(items, results)]
151
+ return sorted(results, key=lambda x: x[1]["logprobs"], reverse=True)
152
+
153
+ async def compress(
154
+ self,
155
+ text,
156
+ target_ratio=None,
157
+ initial_text=None,
158
+ cumulative=False,
159
+ split_kwargs=None,
160
+ split_overlap=None,
161
+ split_threshold=None,
162
+ rank_by="perplexity",
163
+ min_compression_score=None,
164
+ verbose=True,
165
+ **kwargs,
166
+ ):
167
+ start = time()
168
+ if split_kwargs is None:
169
+ split_kwargs = {}
170
+ split_kwargs["chunk_size"] = self.max_tokens_per_sample
171
+ split_kwargs["overlap"] = split_overlap or 0
172
+ split_kwargs["threshold"] = split_threshold or 0
173
+
174
+ len_tokens = len(self.tokenize(text))
175
+
176
+ items = self.split(text, return_tokens=True, **split_kwargs)
177
+
178
+ if rank_by == "perplexity":
179
+ ranked_items = await self.rank_by_pplex(
180
+ items=items, initial_text=initial_text, cumulative=cumulative, **kwargs
181
+ )
182
+
183
+ prompt_tokens = sum([i[1]["num_prompt_tokens"] for i in ranked_items])
184
+
185
+ num_completion_tokens = sum(
186
+ [i[1]["num_completion_tokens"] for i in ranked_items]
187
+ )
188
+
189
+ price = (
190
+ prompt_tokens * 0.5 / 1000000 + num_completion_tokens * 1.5 / 1000000
191
+ )
192
+
193
+ selected_items = self.select_by_pplex(
194
+ ranked_items=ranked_items,
195
+ target_compression_ratio=target_ratio or self.target_ratio,
196
+ original_length=len_tokens,
197
+ min_pplex=min_compression_score or self.min_compression_score,
198
+ )
199
+
200
+ if verbose:
201
+ msg = ""
202
+ msg += f"Original Token number: {len_tokens}\n"
203
+
204
+ def _f(i):
205
+ if isinstance(i, str):
206
+ i = self.tokenize(i)
207
+
208
+ if isinstance(i, list):
209
+ return len(to_list(i, dropna=True, flatten=True))
210
+
211
+ len_ = sum([_f(i) for i in selected_items])
212
+ msg += f"Selected Token number: {len_}\n"
213
+ msg += f"Token Compression Ratio: {len_ / len_tokens:.03f}\n"
214
+ msg += f"Compression Time: {time() - start:.04f} seconds\n"
215
+ msg += f"Compression Model: {self.imodel.iModel_name}\n"
216
+ msg += f"Compression Method: {rank_by}\n"
217
+ msg += f"Compression Usage: ${price:.05f}\n"
218
+ print(msg)
219
+
220
+ a = "".join([i.strip() for i in selected_items]).strip()
221
+ a = a.replace("\n\n", "")
222
+ return a
223
+
224
+ raise ValueError(f"Ranking method {rank_by} is not supported")
225
+
226
+ def select_by_pplex(
227
+ self, ranked_items, target_compression_ratio, original_length, min_pplex=None
228
+ ):
229
+ min_pplex = min_pplex or 0
230
+
231
+ desired_length = int(original_length * target_compression_ratio)
232
+
233
+ items = []
234
+ current_length = 0
235
+
236
+ for item, info in ranked_items:
237
+ if info["perplexity"] > min_pplex:
238
+ item = self.tokenize(item) if isinstance(item, str) else item
239
+ item = item if isinstance(item, list) else [item]
240
+ item = to_list(item, dropna=True, flatten=True)
241
+ if current_length + len(item) > desired_length:
242
+ break
243
+ else:
244
+ current_length += len(item)
245
+ items.append("".join(item))
246
+
247
+ return items
@@ -0,0 +1,61 @@
1
+ # from lionagi.core.collections import iModel
2
+ # from .base import TokenCompressor
3
+
4
+
5
+ # class LLMSummarizer(TokenCompressor):
6
+
7
+ # def __init__(
8
+ # self, imodel: iModel = None, system_msg=None, tokenizer=None, splitter=None,
9
+ # max_tokens=25, target_ratio=0.3
10
+ # ):
11
+ # imodel = imodel or iModel(model="gpt-3.5-turbo", max_tokens=max_tokens)
12
+ # super().__init__(imodel=imodel, tokenizer=tokenizer, splitter=splitter)
13
+ # self.system_msg = (
14
+ # system_msg
15
+ # or "Summarize the following sentence to be concise and informative:"
16
+ # )
17
+ # self.target_ratio = target_ratio
18
+
19
+ # async def summarize_sentence(self, sentence, **kwargs):
20
+ # messages = [
21
+ # {"role": "system", "content": self.system_msg},
22
+ # {"role": "user", "content": sentence},
23
+ # ]
24
+ # response = await self.imodel.call_chat_completion(messages, **kwargs)
25
+ # return response["choices"][0]["message"]["content"]
26
+
27
+ # def tokenize(self, text):
28
+ # tokenize_func = self.tokenizer or tokenize
29
+ # return tokenize_func(text)
30
+
31
+ # def split(self, text):
32
+ # split_func = self.splitter or split_into_segments
33
+ # return split_func(text)
34
+
35
+ # # Function to enforce maximum sentence length
36
+ # def enforce_max_sentence_length(self, sentence, max_words=25):
37
+ # words = self.tokenize(sentence)
38
+ # if len(words) > max_words:
39
+ # sentence = ' '.join(words[:max_words])
40
+ # return sentence
41
+
42
+ # async def summarize_text(self, text, max_length_per_sentence=25, target_ratio=None, **kwargs):
43
+ # sentences = self.split(text)
44
+ # summarized = await alcall(
45
+ # sentences, self.summarize_sentence, **kwargs
46
+ # )
47
+ # summarized = [
48
+ # self.enforce_max_sentence_length(sentence, max_length_per_sentence)
49
+ # for sentence in summarized
50
+ # ]
51
+
52
+ # original_length = len(self.tokenize(text))
53
+ # summarized_length = len(self.tokenize(' '.join(summarized)))
54
+ # current_ratio = summarized_length / original_length
55
+
56
+ # target_ratio = target_ratio or self.target_ratio
57
+ # if current_ratio > target_ratio:
58
+ # words_to_remove = int((current_ratio - target_ratio) * original_length)
59
+ # return ' '.join(summarized[:-words_to_remove])
60
+
61
+ # return ' '.join(summarized)
@@ -0,0 +1,70 @@
1
+ # import asyncio
2
+ # from lionagi import alcall
3
+ # from lionagi.libs.ln_convert import to_list
4
+ # import numpy as np
5
+
6
+ # def split_into_segments(text):
7
+ # segments = text.split(".") # Splitting by period followed by a space
8
+ # return [segment.strip() for segment in segments if segment]
9
+
10
+ # # Tokenize the segment
11
+ # def tokenize(segment):
12
+ # tokens = segment.split() # Simple space-based tokenization
13
+ # return tokens
14
+
15
+ # async def calculate_perplexity(system_msg: str, imodel, tokens, initial_context=None, **kwargs):
16
+ # _tasks = []
17
+ # _context = initial_context or ""
18
+ # for i in range(len(tokens)):
19
+ # _context += " " + tokens[i]
20
+ # messages = [
21
+ # {"role": "system", "content": system_msg},
22
+ # {"role": "user", "content": _context},
23
+ # ]
24
+ # task = asyncio.create_task(
25
+ # imodel.call_chat_completion(
26
+ # messages=messages, logprobs=True, max_tokens=1, **kwargs
27
+ # )
28
+ # )
29
+ # _tasks.append(task)
30
+
31
+ # results = await asyncio.gather(*_tasks)
32
+ # logprobs = [
33
+ # result[1]["choices"][0]["logprobs"]["content"] for result in results
34
+ # ]
35
+ # logprobs = to_list(logprobs, flatten=True, dropna=True)
36
+ # logprobs = [lprob_["logprob"] for lprob_ in logprobs]
37
+ # return np.exp(np.mean(logprobs))
38
+
39
+ # async def rank_by_perplexity(
40
+ # text: str | list[str] = None, # if list we assume they are already well split
41
+ # initial_text=None,
42
+
43
+ # segments,
44
+ # initial_text=None,
45
+ # cumulative=False,
46
+ # **kwargs
47
+ # ):
48
+ # _segments = []
49
+ # _context = initial_text or ""
50
+ # _task = []
51
+
52
+ # if cumulative:
53
+ # for i in range(1, len(segments)):
54
+ # _context += " " + segments[i - 1]
55
+ # _segments.append(_context)
56
+ # else:
57
+ # _segments = segments
58
+
59
+ # for i in segments:
60
+ # _task.append(asyncio.create_task(
61
+ # calculate_perplexity(
62
+ # self.system_msg, self.imodel, self.tokenize(i), **kwargs)
63
+ # )
64
+ # )
65
+ # segment_perplexities = await asyncio.gather(*_task)
66
+
67
+ # return {
68
+ # segment: perplexity
69
+ # for segment, perplexity in zip(segments, segment_perplexities)
70
+ # }
@@ -0,0 +1,19 @@
1
+ # from ..form.predict import predict
2
+ # from .select import select
3
+ # from ..form.score import score
4
+ # from ..form.react import react
5
+ # from .vote import vote
6
+ # from ..form.plan import plan
7
+ # from .cot import chain_of_thoughts, chain_of_react
8
+
9
+
10
+ # __all__ = [
11
+ # "predict",
12
+ # "select",
13
+ # "score",
14
+ # "vote",
15
+ # "react",
16
+ # "plan",
17
+ # "chain_of_thoughts",
18
+ # "chain_of_react",
19
+ # ]
@@ -1,7 +1,23 @@
1
+ """
2
+ Copyright 2024 HaiyangLi
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
1
17
  from typing import List, Optional
2
18
 
3
- from lionagi.libs.ln_tokenizer import BaseToken
4
- from ..schema import IfNode, TryNode, ForNode
19
+ from lionagi.experimental.directive.tokenizer import BaseToken
20
+ from ..template.schema import IfNode, TryNode, ForNode
5
21
 
6
22
 
7
23
  class BaseDirectiveParser:
@@ -76,10 +92,19 @@ class BaseDirectiveParser:
76
92
  self.next_token()
77
93
 
78
94
  def skip_semicolon(self):
95
+ """Skips a semicolon token if it is the current token."""
79
96
  if self.current_token and self.current_token.value == ";":
80
97
  self.next_token()
81
98
 
82
99
  def parse_expression(self):
100
+ """Parses an expression until a semicolon is encountered.
101
+
102
+ Returns:
103
+ str: The parsed expression as a string.
104
+
105
+ Raises:
106
+ SyntaxError: If a semicolon is not found at the end of the expression.
107
+ """
83
108
  expr = ""
84
109
  while self.current_token and self.current_token.value != ";":
85
110
  expr += self.current_token.value + " "
@@ -91,6 +116,11 @@ class BaseDirectiveParser:
91
116
  return expr.strip()
92
117
 
93
118
  def parse_if_block(self):
119
+ """Parses a block of statements for an IF condition.
120
+
121
+ Returns:
122
+ list: The parsed block of statements as a list of strings.
123
+ """
94
124
  block = []
95
125
  # Parse the block until 'ELSE', 'ENDIF', ensuring not to include semicolons as part of the block
96
126
  while self.current_token and self.current_token.value not in ("ENDIF", "ELSE"):
@@ -103,6 +133,14 @@ class BaseDirectiveParser:
103
133
  return block
104
134
 
105
135
  def parse_if_statement(self):
136
+ """Parses an IF statement.
137
+
138
+ Returns:
139
+ IfNode: The parsed IF statement as an IfNode object.
140
+
141
+ Raises:
142
+ SyntaxError: If the IF statement is not properly formed.
143
+ """
106
144
  if self.current_token.type != "KEYWORD" or self.current_token.value != "IF":
107
145
  raise SyntaxError("Expected IF statement")
108
146
  self.next_token() # Skip 'IF'
@@ -125,6 +163,14 @@ class BaseDirectiveParser:
125
163
  return IfNode(condition, true_block, false_block)
126
164
 
127
165
  def parse_for_statement(self):
166
+ """Parses a FOR statement.
167
+
168
+ Returns:
169
+ ForNode: The parsed FOR statement as a ForNode object.
170
+
171
+ Raises:
172
+ SyntaxError: If the FOR statement is not properly formed.
173
+ """
128
174
  if self.current_token.type != "KEYWORD" or self.current_token.value != "FOR":
129
175
  raise SyntaxError("Expected FOR statement")
130
176
  self.next_token() # Skip 'FOR'
@@ -153,6 +199,11 @@ class BaseDirectiveParser:
153
199
  return ForNode(iterator, collection, true_block)
154
200
 
155
201
  def parse_for_block(self):
202
+ """Parses a block of statements for a FOR loop.
203
+
204
+ Returns:
205
+ list: The parsed block of statements as a list of strings.
206
+ """
156
207
  block = []
157
208
  # Skip initial 'DO' if present
158
209
  if self.current_token and self.current_token.value == "DO":
@@ -173,6 +224,14 @@ class BaseDirectiveParser:
173
224
  return block
174
225
 
175
226
  def parse_try_statement(self):
227
+ """Parses a TRY statement.
228
+
229
+ Returns:
230
+ TryNode: The parsed TRY statement as a TryNode object.
231
+
232
+ Raises:
233
+ SyntaxError: If the TRY statement is not properly formed.
234
+ """
176
235
  if self.current_token.type != "KEYWORD" or self.current_token.value != "TRY":
177
236
  raise SyntaxError("Expected TRY statement")
178
237
  self.next_token() # Skip 'TRY'
@@ -196,6 +255,14 @@ class BaseDirectiveParser:
196
255
  return TryNode(try_block, except_block)
197
256
 
198
257
  def parse_try_block(self, stop_keyword):
258
+ """Parses a block of statements for a TRY or EXCEPT clause.
259
+
260
+ Args:
261
+ stop_keyword (str): The keyword that indicates the end of the block.
262
+
263
+ Returns:
264
+ list: The parsed block of statements as a list of strings.
265
+ """
199
266
  block = []
200
267
  while self.current_token and self.current_token.value != stop_keyword:
201
268
  if self.current_token.value == "DO":
@@ -1,10 +1,26 @@
1
+ """
2
+ Copyright 2024 HaiyangLi
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
1
17
  from typing import Any, Dict
2
18
  import re
3
19
 
4
20
  from ..evaluator.base_evaluator import BaseEvaluator
5
21
 
6
22
 
7
- class BaseDirectiveTemplate:
23
+ class DirectiveTemplate:
8
24
  """Enhanced base template class for processing templates with conditionals and loops."""
9
25
 
10
26
  def __init__(self, template_str: str):
@@ -1,3 +1,19 @@
1
+ """
2
+ Copyright 2024 HaiyangLi
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
1
17
  import re
2
18
 
3
19
 
@@ -1,3 +1,19 @@
1
+ """
2
+ Copyright 2024 HaiyangLi
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
1
17
  import ast
2
18
  import operator
3
19
 
@@ -1,3 +1,19 @@
1
+ """
2
+ Copyright 2024 HaiyangLi
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
1
17
  import ast
2
18
  import operator
3
19
  from typing import Any, Dict, Tuple, Callable
@@ -0,0 +1,10 @@
1
+ from ..collections.abc import Component, Field
2
+ from ..collections import Pile, pile
3
+ from ..generic import Node
4
+
5
+
6
+ class Knowledge(Component):
7
+
8
+ knowledge_base: Pile[Node] = Field(
9
+ default_factory=pile,
10
+ )
File without changes
File without changes
@@ -0,0 +1 @@
1
+ # TODO
@@ -28,6 +28,10 @@ def to_langchain_document(datanode: T, **kwargs: Any) -> Any:
28
28
  SysUtil.change_dict_key(dnode, old_key="content", new_key="page_content")
29
29
  SysUtil.change_dict_key(dnode, old_key="lc_id", new_key="id_")
30
30
  dnode = {**dnode, **kwargs}
31
+ dnode = {k: v for k, v in dnode.items() if v is not None}
32
+ if "page_content" not in dnode:
33
+ dnode["page_content"] = ""
34
+
31
35
  return LangchainDocument(**dnode)
32
36
 
33
37
 
@@ -0,0 +1,30 @@
1
+ class LlamaIndex:
2
+
3
+ @classmethod
4
+ def index(
5
+ cls,
6
+ nodes,
7
+ llm_obj=None,
8
+ llm_class=None,
9
+ llm_kwargs=None,
10
+ index_type=None,
11
+ **kwargs,
12
+ ):
13
+ from llama_index.core import Settings
14
+ from llama_index.llms.openai import OpenAI
15
+
16
+ if not llm_obj:
17
+ llm_class = llm_class or OpenAI
18
+ llm_kwargs = llm_kwargs or {}
19
+ if "model" not in llm_kwargs:
20
+ llm_kwargs["model"] = "gpt-4o"
21
+ llm_obj = llm_class(**llm_kwargs)
22
+
23
+ Settings.llm = llm_obj
24
+
25
+ if not index_type:
26
+ from llama_index.core import VectorStoreIndex
27
+
28
+ index_type = VectorStoreIndex
29
+
30
+ return index_type(nodes, **kwargs)
@@ -100,3 +100,9 @@ class LlamaIndexBridge:
100
100
  from .reader import get_llama_index_reader
101
101
 
102
102
  return get_llama_index_reader(*args, **kwargs)
103
+
104
+ @staticmethod
105
+ def index(nodes, **kwargs):
106
+ from .index import LlamaIndex
107
+
108
+ return LlamaIndex.index(nodes, **kwargs)