lionagi 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (257) hide show
  1. lionagi/__init__.py +60 -5
  2. lionagi/core/__init__.py +0 -25
  3. lionagi/core/_setting/_setting.py +59 -0
  4. lionagi/core/action/__init__.py +14 -0
  5. lionagi/core/action/function_calling.py +136 -0
  6. lionagi/core/action/manual.py +1 -0
  7. lionagi/core/action/node.py +109 -0
  8. lionagi/core/action/tool.py +114 -0
  9. lionagi/core/action/tool_manager.py +356 -0
  10. lionagi/core/agent/base_agent.py +27 -13
  11. lionagi/core/agent/eval/evaluator.py +1 -0
  12. lionagi/core/agent/eval/vote.py +40 -0
  13. lionagi/core/agent/learn/learner.py +59 -0
  14. lionagi/core/agent/plan/unit_template.py +1 -0
  15. lionagi/core/collections/__init__.py +17 -0
  16. lionagi/core/{generic/data_logger.py → collections/_logger.py} +69 -55
  17. lionagi/core/collections/abc/__init__.py +53 -0
  18. lionagi/core/collections/abc/component.py +615 -0
  19. lionagi/core/collections/abc/concepts.py +297 -0
  20. lionagi/core/collections/abc/exceptions.py +150 -0
  21. lionagi/core/collections/abc/util.py +45 -0
  22. lionagi/core/collections/exchange.py +161 -0
  23. lionagi/core/collections/flow.py +426 -0
  24. lionagi/core/collections/model.py +419 -0
  25. lionagi/core/collections/pile.py +913 -0
  26. lionagi/core/collections/progression.py +236 -0
  27. lionagi/core/collections/util.py +64 -0
  28. lionagi/core/director/direct.py +314 -0
  29. lionagi/core/director/director.py +2 -0
  30. lionagi/core/{execute/branch_executor.py → engine/branch_engine.py} +134 -97
  31. lionagi/core/{execute/instruction_map_executor.py → engine/instruction_map_engine.py} +80 -55
  32. lionagi/{experimental/directive/evaluator → core/engine}/script_engine.py +17 -1
  33. lionagi/core/executor/base_executor.py +90 -0
  34. lionagi/core/{execute/structure_executor.py → executor/graph_executor.py} +83 -67
  35. lionagi/core/{execute → executor}/neo4j_executor.py +70 -67
  36. lionagi/core/generic/__init__.py +3 -33
  37. lionagi/core/generic/edge.py +42 -92
  38. lionagi/core/generic/edge_condition.py +16 -0
  39. lionagi/core/generic/graph.py +236 -0
  40. lionagi/core/generic/hyperedge.py +1 -0
  41. lionagi/core/generic/node.py +156 -221
  42. lionagi/core/generic/tree.py +48 -0
  43. lionagi/core/generic/tree_node.py +79 -0
  44. lionagi/core/mail/__init__.py +12 -0
  45. lionagi/core/mail/mail.py +25 -0
  46. lionagi/core/mail/mail_manager.py +139 -58
  47. lionagi/core/mail/package.py +45 -0
  48. lionagi/core/mail/start_mail.py +36 -0
  49. lionagi/core/message/__init__.py +19 -0
  50. lionagi/core/message/action_request.py +133 -0
  51. lionagi/core/message/action_response.py +135 -0
  52. lionagi/core/message/assistant_response.py +95 -0
  53. lionagi/core/message/instruction.py +234 -0
  54. lionagi/core/message/message.py +101 -0
  55. lionagi/core/message/system.py +86 -0
  56. lionagi/core/message/util.py +283 -0
  57. lionagi/core/report/__init__.py +4 -0
  58. lionagi/core/report/base.py +217 -0
  59. lionagi/core/report/form.py +231 -0
  60. lionagi/core/report/report.py +166 -0
  61. lionagi/core/report/util.py +28 -0
  62. lionagi/core/rule/_default.py +16 -0
  63. lionagi/core/rule/action.py +99 -0
  64. lionagi/core/rule/base.py +238 -0
  65. lionagi/core/rule/boolean.py +56 -0
  66. lionagi/core/rule/choice.py +47 -0
  67. lionagi/core/rule/mapping.py +96 -0
  68. lionagi/core/rule/number.py +71 -0
  69. lionagi/core/rule/rulebook.py +109 -0
  70. lionagi/core/rule/string.py +52 -0
  71. lionagi/core/rule/util.py +35 -0
  72. lionagi/core/session/branch.py +431 -0
  73. lionagi/core/session/directive_mixin.py +287 -0
  74. lionagi/core/session/session.py +229 -903
  75. lionagi/core/structure/__init__.py +1 -0
  76. lionagi/core/structure/chain.py +1 -0
  77. lionagi/core/structure/forest.py +1 -0
  78. lionagi/core/structure/graph.py +1 -0
  79. lionagi/core/structure/tree.py +1 -0
  80. lionagi/core/unit/__init__.py +5 -0
  81. lionagi/core/unit/parallel_unit.py +245 -0
  82. lionagi/core/unit/template/action.py +81 -0
  83. lionagi/core/unit/template/base.py +51 -0
  84. lionagi/core/unit/template/plan.py +84 -0
  85. lionagi/core/unit/template/predict.py +109 -0
  86. lionagi/core/unit/template/score.py +124 -0
  87. lionagi/core/unit/template/select.py +104 -0
  88. lionagi/core/unit/unit.py +362 -0
  89. lionagi/core/unit/unit_form.py +305 -0
  90. lionagi/core/unit/unit_mixin.py +1168 -0
  91. lionagi/core/unit/util.py +71 -0
  92. lionagi/core/validator/validator.py +364 -0
  93. lionagi/core/work/work.py +74 -0
  94. lionagi/core/work/work_function.py +92 -0
  95. lionagi/core/work/work_queue.py +81 -0
  96. lionagi/core/work/worker.py +195 -0
  97. lionagi/core/work/worklog.py +124 -0
  98. lionagi/experimental/compressor/base.py +46 -0
  99. lionagi/experimental/compressor/llm_compressor.py +247 -0
  100. lionagi/experimental/compressor/llm_summarizer.py +61 -0
  101. lionagi/experimental/compressor/util.py +70 -0
  102. lionagi/experimental/directive/__init__.py +19 -0
  103. lionagi/experimental/directive/parser/base_parser.py +69 -2
  104. lionagi/experimental/directive/{template_ → template}/base_template.py +17 -1
  105. lionagi/{libs/ln_tokenizer.py → experimental/directive/tokenizer.py} +16 -0
  106. lionagi/experimental/{directive/evaluator → evaluator}/ast_evaluator.py +16 -0
  107. lionagi/experimental/{directive/evaluator → evaluator}/base_evaluator.py +16 -0
  108. lionagi/experimental/knowledge/__init__.py +0 -0
  109. lionagi/experimental/knowledge/base.py +10 -0
  110. lionagi/experimental/knowledge/graph.py +0 -0
  111. lionagi/experimental/memory/__init__.py +0 -0
  112. lionagi/experimental/strategies/__init__.py +0 -0
  113. lionagi/experimental/strategies/base.py +1 -0
  114. lionagi/integrations/bridge/langchain_/documents.py +4 -0
  115. lionagi/integrations/bridge/llamaindex_/index.py +30 -0
  116. lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +6 -0
  117. lionagi/integrations/chunker/chunk.py +161 -24
  118. lionagi/integrations/config/oai_configs.py +34 -3
  119. lionagi/integrations/config/openrouter_configs.py +14 -2
  120. lionagi/integrations/loader/load.py +122 -21
  121. lionagi/integrations/loader/load_util.py +6 -77
  122. lionagi/integrations/provider/_mapping.py +46 -0
  123. lionagi/integrations/provider/litellm.py +2 -1
  124. lionagi/integrations/provider/mlx_service.py +16 -9
  125. lionagi/integrations/provider/oai.py +91 -4
  126. lionagi/integrations/provider/ollama.py +6 -5
  127. lionagi/integrations/provider/openrouter.py +115 -8
  128. lionagi/integrations/provider/services.py +2 -2
  129. lionagi/integrations/provider/transformers.py +18 -22
  130. lionagi/integrations/storage/__init__.py +3 -3
  131. lionagi/integrations/storage/neo4j.py +52 -60
  132. lionagi/integrations/storage/storage_util.py +45 -47
  133. lionagi/integrations/storage/structure_excel.py +285 -0
  134. lionagi/integrations/storage/to_excel.py +23 -7
  135. lionagi/libs/__init__.py +26 -1
  136. lionagi/libs/ln_api.py +75 -20
  137. lionagi/libs/ln_context.py +37 -0
  138. lionagi/libs/ln_convert.py +21 -9
  139. lionagi/libs/ln_func_call.py +69 -28
  140. lionagi/libs/ln_image.py +107 -0
  141. lionagi/libs/ln_nested.py +26 -11
  142. lionagi/libs/ln_parse.py +82 -23
  143. lionagi/libs/ln_queue.py +16 -0
  144. lionagi/libs/ln_tokenize.py +164 -0
  145. lionagi/libs/ln_validate.py +16 -0
  146. lionagi/libs/special_tokens.py +172 -0
  147. lionagi/libs/sys_util.py +95 -24
  148. lionagi/lions/coder/code_form.py +13 -0
  149. lionagi/lions/coder/coder.py +50 -3
  150. lionagi/lions/coder/util.py +30 -25
  151. lionagi/tests/libs/test_func_call.py +23 -21
  152. lionagi/tests/libs/test_nested.py +36 -21
  153. lionagi/tests/libs/test_parse.py +1 -1
  154. lionagi/tests/test_core/collections/__init__.py +0 -0
  155. lionagi/tests/test_core/collections/test_component.py +206 -0
  156. lionagi/tests/test_core/collections/test_exchange.py +138 -0
  157. lionagi/tests/test_core/collections/test_flow.py +145 -0
  158. lionagi/tests/test_core/collections/test_pile.py +171 -0
  159. lionagi/tests/test_core/collections/test_progression.py +129 -0
  160. lionagi/tests/test_core/generic/__init__.py +0 -0
  161. lionagi/tests/test_core/generic/test_edge.py +67 -0
  162. lionagi/tests/test_core/generic/test_graph.py +96 -0
  163. lionagi/tests/test_core/generic/test_node.py +106 -0
  164. lionagi/tests/test_core/generic/test_tree_node.py +73 -0
  165. lionagi/tests/test_core/test_branch.py +115 -294
  166. lionagi/tests/test_core/test_form.py +46 -0
  167. lionagi/tests/test_core/test_report.py +105 -0
  168. lionagi/tests/test_core/test_validator.py +111 -0
  169. lionagi/version.py +1 -1
  170. lionagi-0.2.0.dist-info/LICENSE +202 -0
  171. lionagi-0.2.0.dist-info/METADATA +272 -0
  172. lionagi-0.2.0.dist-info/RECORD +240 -0
  173. lionagi/core/branch/base.py +0 -653
  174. lionagi/core/branch/branch.py +0 -474
  175. lionagi/core/branch/flow_mixin.py +0 -96
  176. lionagi/core/branch/util.py +0 -323
  177. lionagi/core/direct/__init__.py +0 -19
  178. lionagi/core/direct/cot.py +0 -123
  179. lionagi/core/direct/plan.py +0 -164
  180. lionagi/core/direct/predict.py +0 -166
  181. lionagi/core/direct/react.py +0 -171
  182. lionagi/core/direct/score.py +0 -279
  183. lionagi/core/direct/select.py +0 -170
  184. lionagi/core/direct/sentiment.py +0 -1
  185. lionagi/core/direct/utils.py +0 -110
  186. lionagi/core/direct/vote.py +0 -64
  187. lionagi/core/execute/base_executor.py +0 -47
  188. lionagi/core/flow/baseflow.py +0 -23
  189. lionagi/core/flow/monoflow/ReAct.py +0 -238
  190. lionagi/core/flow/monoflow/__init__.py +0 -9
  191. lionagi/core/flow/monoflow/chat.py +0 -95
  192. lionagi/core/flow/monoflow/chat_mixin.py +0 -253
  193. lionagi/core/flow/monoflow/followup.py +0 -213
  194. lionagi/core/flow/polyflow/__init__.py +0 -1
  195. lionagi/core/flow/polyflow/chat.py +0 -251
  196. lionagi/core/form/action_form.py +0 -26
  197. lionagi/core/form/field_validator.py +0 -287
  198. lionagi/core/form/form.py +0 -302
  199. lionagi/core/form/mixin.py +0 -214
  200. lionagi/core/form/scored_form.py +0 -13
  201. lionagi/core/generic/action.py +0 -26
  202. lionagi/core/generic/component.py +0 -455
  203. lionagi/core/generic/condition.py +0 -44
  204. lionagi/core/generic/mail.py +0 -90
  205. lionagi/core/generic/mailbox.py +0 -36
  206. lionagi/core/generic/relation.py +0 -70
  207. lionagi/core/generic/signal.py +0 -22
  208. lionagi/core/generic/structure.py +0 -362
  209. lionagi/core/generic/transfer.py +0 -20
  210. lionagi/core/generic/work.py +0 -40
  211. lionagi/core/graph/graph.py +0 -126
  212. lionagi/core/graph/tree.py +0 -190
  213. lionagi/core/mail/schema.py +0 -63
  214. lionagi/core/messages/schema.py +0 -325
  215. lionagi/core/tool/__init__.py +0 -5
  216. lionagi/core/tool/tool.py +0 -28
  217. lionagi/core/tool/tool_manager.py +0 -282
  218. lionagi/experimental/tool/function_calling.py +0 -43
  219. lionagi/experimental/tool/manual.py +0 -66
  220. lionagi/experimental/tool/schema.py +0 -59
  221. lionagi/experimental/tool/tool_manager.py +0 -138
  222. lionagi/experimental/tool/util.py +0 -16
  223. lionagi/experimental/work/_logger.py +0 -25
  224. lionagi/experimental/work/schema.py +0 -30
  225. lionagi/experimental/work/tests.py +0 -72
  226. lionagi/experimental/work/work_function.py +0 -89
  227. lionagi/experimental/work/worker.py +0 -12
  228. lionagi/integrations/bridge/llamaindex_/get_index.py +0 -294
  229. lionagi/tests/test_core/test_base_branch.py +0 -426
  230. lionagi/tests/test_core/test_chat_flow.py +0 -63
  231. lionagi/tests/test_core/test_mail_manager.py +0 -75
  232. lionagi/tests/test_core/test_prompts.py +0 -51
  233. lionagi/tests/test_core/test_session.py +0 -254
  234. lionagi/tests/test_core/test_session_base_util.py +0 -313
  235. lionagi/tests/test_core/test_tool_manager.py +0 -95
  236. lionagi-0.1.1.dist-info/LICENSE +0 -9
  237. lionagi-0.1.1.dist-info/METADATA +0 -174
  238. lionagi-0.1.1.dist-info/RECORD +0 -190
  239. /lionagi/core/{branch → _setting}/__init__.py +0 -0
  240. /lionagi/core/{execute → agent/eval}/__init__.py +0 -0
  241. /lionagi/core/{flow → agent/learn}/__init__.py +0 -0
  242. /lionagi/core/{form → agent/plan}/__init__.py +0 -0
  243. /lionagi/core/{branch/executable_branch.py → agent/plan/plan.py} +0 -0
  244. /lionagi/core/{graph → director}/__init__.py +0 -0
  245. /lionagi/core/{messages → engine}/__init__.py +0 -0
  246. /lionagi/{experimental/directive/evaluator → core/engine}/sandbox_.py +0 -0
  247. /lionagi/{experimental/directive/evaluator → core/executor}/__init__.py +0 -0
  248. /lionagi/{experimental/directive/template_ → core/rule}/__init__.py +0 -0
  249. /lionagi/{experimental/tool → core/unit/template}/__init__.py +0 -0
  250. /lionagi/{experimental/work → core/validator}/__init__.py +0 -0
  251. /lionagi/core/{flow/mono_chat_mixin.py → work/__init__.py} +0 -0
  252. /lionagi/experimental/{work/exchange.py → compressor/__init__.py} +0 -0
  253. /lionagi/experimental/{work/util.py → directive/template/__init__.py} +0 -0
  254. /lionagi/experimental/directive/{schema.py → template/schema.py} +0 -0
  255. /lionagi/{tests/libs/test_async.py → experimental/evaluator/__init__.py} +0 -0
  256. {lionagi-0.1.1.dist-info → lionagi-0.2.0.dist-info}/WHEEL +0 -0
  257. {lionagi-0.1.1.dist-info → lionagi-0.2.0.dist-info}/top_level.txt +0 -0
lionagi/libs/ln_parse.py CHANGED
@@ -1,9 +1,23 @@
1
+ """
2
+ Copyright 2024 HaiyangLi
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
1
17
  from collections.abc import Callable
2
18
  import re
3
19
  import inspect
4
20
  import itertools
5
- import contextlib
6
- from functools import singledispatchmethod
7
21
  from typing import Any
8
22
  import numpy as np
9
23
  import lionagi.libs.ln_convert as convert
@@ -103,12 +117,12 @@ class ParseUtil:
103
117
  # inspired by langchain_core.output_parsers.json (MIT License)
104
118
  # https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/output_parsers/json.py
105
119
  @staticmethod
106
- def extract_code_block(
120
+ def extract_json_block(
107
121
  str_to_parse: str,
108
122
  language: str | None = None,
109
123
  regex_pattern: str | None = None,
110
124
  *,
111
- parser: Callable[[str], Any],
125
+ parser: Callable[[str], Any] = None,
112
126
  ) -> Any:
113
127
  """
114
128
  Extracts and parses a code block from Markdown content.
@@ -148,9 +162,37 @@ class ParseUtil:
148
162
  raise ValueError(
149
163
  f"No {language or 'specified'} code block found in the Markdown content."
150
164
  )
165
+ if not match:
166
+ str_to_parse = str_to_parse.strip()
167
+ if str_to_parse.startswith("```json\n") and str_to_parse.endswith("\n```"):
168
+ str_to_parse = str_to_parse[8:-4].strip()
151
169
 
170
+ parser = parser or ParseUtil.fuzzy_parse_json
152
171
  return parser(code_str)
153
172
 
173
+ @staticmethod
174
+ def extract_code_blocks(code):
175
+ code_blocks = []
176
+ lines = code.split("\n")
177
+ inside_code_block = False
178
+ current_block = []
179
+
180
+ for line in lines:
181
+ if line.startswith("```"):
182
+ if inside_code_block:
183
+ code_blocks.append("\n".join(current_block))
184
+ current_block = []
185
+ inside_code_block = False
186
+ else:
187
+ inside_code_block = True
188
+ elif inside_code_block:
189
+ current_block.append(line)
190
+
191
+ if current_block:
192
+ code_blocks.append("\n".join(current_block))
193
+
194
+ return "\n\n".join(code_blocks)
195
+
154
196
  @staticmethod
155
197
  def md_to_json(
156
198
  str_to_parse: str,
@@ -181,7 +223,7 @@ class ParseUtil:
181
223
  >>> md_to_json('```json\\n{"key": "value"}\\n```', expected_keys=['key'])
182
224
  {'key': 'value'}
183
225
  """
184
- json_obj = ParseUtil.extract_code_block(
226
+ json_obj = ParseUtil.extract_json_block(
185
227
  str_to_parse, language="json", parser=parser or ParseUtil.fuzzy_parse_json
186
228
  )
187
229
 
@@ -385,7 +427,9 @@ class ParseUtil:
385
427
  return type_mapping.get(py_type, "object")
386
428
 
387
429
  @staticmethod
388
- def _func_to_schema(func, style="google"):
430
+ def _func_to_schema(
431
+ func, style="google", func_description=None, params_description=None
432
+ ):
389
433
  """
390
434
  Generates a schema description for a given function, using typing hints and
391
435
  docstrings. The schema includes the function's name, description, and parameters.
@@ -412,9 +456,11 @@ class ParseUtil:
412
456
  """
413
457
  # Extracting function name and docstring details
414
458
  func_name = func.__name__
415
- func_description, params_description = ParseUtil._extract_docstring_details(
416
- func, style
417
- )
459
+
460
+ if not func_description:
461
+ func_description, _ = ParseUtil._extract_docstring_details(func, style)
462
+ if not params_description:
463
+ _, params_description = ParseUtil._extract_docstring_details(func, style)
418
464
 
419
465
  # Extracting parameters with typing hints
420
466
  sig = inspect.signature(func)
@@ -634,7 +680,7 @@ class StringMatch:
634
680
  # Calculate Jaro-Winkler similarity scores for each potential match
635
681
  scores = np.array(
636
682
  [
637
- score_func(convert.to_str(word), correct_word)
683
+ score_func(str(word), str(correct_word))
638
684
  for correct_word in correct_words_list
639
685
  ]
640
686
  )
@@ -648,26 +694,39 @@ class StringMatch:
648
694
 
649
695
  if isinstance(out_, str):
650
696
  # first try to parse it straight as a fuzzy json
697
+
651
698
  try:
652
699
  out_ = ParseUtil.fuzzy_parse_json(out_)
653
- except Exception:
700
+ return StringMatch.correct_dict_keys(keys, out_)
701
+
702
+ except:
654
703
  try:
655
- # if failed we try to extract the json block and parse it
656
704
  out_ = ParseUtil.md_to_json(out_)
705
+ return StringMatch.correct_dict_keys(keys, out_)
706
+
657
707
  except Exception:
658
- # if still failed we try to extract the json block using re and parse it again
659
- match = re.search(r"```json\n({.*?})\n```", out_, re.DOTALL)
660
- if match:
661
- out_ = match.group(1)
662
- try:
663
- out_ = ParseUtil.fuzzy_parse_json(out_)
664
- except:
708
+ try:
709
+ # if failed we try to extract the json block and parse it
710
+ out_ = ParseUtil.md_to_json(out_)
711
+ return StringMatch.correct_dict_keys(keys, out_)
712
+
713
+ except Exception:
714
+ # if still failed we try to extract the json block using re and parse it again
715
+ match = re.search(r"```json\n({.*?})\n```", out_, re.DOTALL)
716
+ if match:
717
+ out_ = match.group(1)
665
718
  try:
666
- out_ = ParseUtil.fuzzy_parse_json(
667
- out_.replace("'", '"')
668
- )
719
+ out_ = ParseUtil.fuzzy_parse_json(out_)
720
+ return StringMatch.correct_dict_keys(keys, out_)
721
+
669
722
  except:
670
- pass
723
+ try:
724
+ out_ = ParseUtil.fuzzy_parse_json(
725
+ out_.replace("'", '"')
726
+ )
727
+ return StringMatch.correct_dict_keys(keys, out_)
728
+ except:
729
+ pass
671
730
 
672
731
  if isinstance(out_, dict):
673
732
  try:
lionagi/libs/ln_queue.py CHANGED
@@ -1,3 +1,19 @@
1
+ """
2
+ Copyright 2024 HaiyangLi
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
1
17
  """
2
18
  A class that manages asynchronous task processing with controlled concurrency.
3
19
  """
@@ -0,0 +1,164 @@
1
+ import tiktoken
2
+ import math
3
+ from .ln_convert import to_str
4
+ from .special_tokens import disallowed_tokens
5
+
6
+
7
+ class TokenizeUtil:
8
+
9
+ @staticmethod
10
+ def tokenize(
11
+ text,
12
+ encoding_model=None,
13
+ encoding_name=None,
14
+ return_byte=False,
15
+ disallowed_tokens=disallowed_tokens,
16
+ ):
17
+ encoding = None
18
+
19
+ if encoding_model:
20
+ try:
21
+ encoding_name = tiktoken.encoding_name_for_model(encoding_model)
22
+ except:
23
+ encoding_name = encoding_name or "cl100k_base"
24
+
25
+ if not encoding_name or encoding_name in tiktoken.list_encoding_names():
26
+ encoding_name = encoding_name or "cl100k_base"
27
+ encoding = tiktoken.get_encoding(encoding_name)
28
+
29
+ special_encodings = (
30
+ [encoding.encode(token) for token in disallowed_tokens]
31
+ if disallowed_tokens
32
+ else []
33
+ )
34
+ codes = encoding.encode(text)
35
+ if special_encodings and len(special_encodings) > 0:
36
+ codes = [code for code in codes if code not in special_encodings]
37
+
38
+ if return_byte:
39
+ return codes
40
+
41
+ return [encoding.decode([code]) for code in codes]
42
+
43
+ @staticmethod
44
+ def chunk_by_chars(
45
+ text: str, chunk_size: int, overlap: float, threshold: int
46
+ ) -> list[str | None]:
47
+ """
48
+ Chunks the input text into smaller parts, with optional overlap and threshold for final chunk.
49
+
50
+ Parameters:
51
+ text (str): The input text to chunk.
52
+
53
+ chunk_size (int): The size of each chunk.
54
+
55
+ overlap (float): The amount of overlap between chunks.
56
+
57
+ threshold (int): The minimum size of the final chunk.
58
+
59
+ Returns:
60
+ List[Union[str, None]]: A list of text chunks.
61
+
62
+ Raises:
63
+ ValueError: If an error occurs during chunking.
64
+ """
65
+
66
+ def _chunk_n1():
67
+ return [text]
68
+
69
+ def _chunk_n2():
70
+ chunks = []
71
+ chunks.append(text[: chunk_size + overlap_size])
72
+
73
+ if len(text) - chunk_size > threshold:
74
+ chunks.append(text[chunk_size - overlap_size :])
75
+ else:
76
+ return _chunk_n1()
77
+
78
+ return chunks
79
+
80
+ def _chunk_n3():
81
+ chunks = []
82
+ chunks.append(text[: chunk_size + overlap_size])
83
+ for i in range(1, n_chunks - 1):
84
+ start_idx = chunk_size * i - overlap_size
85
+ end_idx = chunk_size * (i + 1) + overlap_size
86
+ chunks.append(text[start_idx:end_idx])
87
+
88
+ if len(text) - chunk_size * (n_chunks - 1) > threshold:
89
+ chunks.append(text[chunk_size * (n_chunks - 1) - overlap_size :])
90
+ else:
91
+ chunks[-1] += text[chunk_size * (n_chunks - 1) + overlap_size :]
92
+
93
+ return chunks
94
+
95
+ try:
96
+ if not isinstance(text, str):
97
+ text = to_str(text)
98
+
99
+ n_chunks = math.ceil(len(text) / chunk_size)
100
+ overlap_size = int(overlap / 2)
101
+
102
+ if n_chunks == 1:
103
+ return _chunk_n1()
104
+
105
+ elif n_chunks == 2:
106
+ return _chunk_n2()
107
+
108
+ elif n_chunks > 2:
109
+ return _chunk_n3()
110
+
111
+ except Exception as e:
112
+ raise ValueError(f"An error occurred while chunking the text. {e}")
113
+
114
+ @staticmethod
115
+ def chunk_by_tokens(
116
+ text: str,
117
+ chunk_size: int,
118
+ overlap: float,
119
+ threshold: int, # minimum size of the final chunk in number of tokens
120
+ encoding_model=None,
121
+ encoding_name=None,
122
+ return_tokens=False,
123
+ return_byte=False,
124
+ ) -> list[str | None]:
125
+
126
+ tokens = TokenizeUtil.tokenize(
127
+ text, encoding_model, encoding_name, return_byte=return_byte
128
+ )
129
+
130
+ n_chunks = math.ceil(len(tokens) / chunk_size)
131
+ overlap_size = int(overlap * chunk_size / 2)
132
+ residue = len(tokens) % chunk_size
133
+
134
+ if n_chunks == 1:
135
+ return text if not return_tokens else [tokens]
136
+
137
+ elif n_chunks == 2:
138
+ chunks = [tokens[: chunk_size + overlap_size]]
139
+ if residue > threshold:
140
+ chunks.append(tokens[chunk_size - overlap_size :])
141
+ return (
142
+ [" ".join(chunk).strip() for chunk in chunks]
143
+ if not return_tokens
144
+ else chunks
145
+ )
146
+ else:
147
+ return text if not return_tokens else [tokens]
148
+
149
+ elif n_chunks > 2:
150
+ chunks = []
151
+ chunks.append(tokens[: chunk_size + overlap_size])
152
+ for i in range(1, n_chunks - 1):
153
+ start_idx = chunk_size * i - overlap_size
154
+ end_idx = chunk_size * (i + 1) + overlap_size
155
+ chunks.append(tokens[start_idx:end_idx])
156
+
157
+ if len(tokens) - chunk_size * (n_chunks - 1) > threshold:
158
+ chunks.append(tokens[chunk_size * (n_chunks - 1) - overlap_size :])
159
+ else:
160
+ chunks[-1] += tokens[-residue:]
161
+
162
+ return (
163
+ [" ".join(chunk) for chunk in chunks] if not return_tokens else chunks
164
+ )
@@ -1,3 +1,19 @@
1
+ """
2
+ Copyright 2024 HaiyangLi
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
1
17
  """
2
18
  This module provides functions for validating and fixing field values based on their data types.
3
19
 
@@ -0,0 +1,172 @@
1
+ # disallowed special tokens
2
+
3
+ disallowed_tokens = [
4
+ "▄",
5
+ "▅",
6
+ "▆",
7
+ "▇",
8
+ "█",
9
+ "▏",
10
+ "▎",
11
+ "▍",
12
+ "▌",
13
+ "▋",
14
+ "▊",
15
+ "▉",
16
+ "▔",
17
+ "▕",
18
+ "▁▁",
19
+ "▁▂",
20
+ "▁▃",
21
+ "▁▄",
22
+ "▁▅",
23
+ "▁▆",
24
+ "▁▇",
25
+ "▁█",
26
+ "▁▏",
27
+ "▁▎",
28
+ "▁▍",
29
+ "▁▌",
30
+ "▁▋",
31
+ "▁▊",
32
+ "▁▉",
33
+ "▁▔",
34
+ "▁▕",
35
+ "▄▄",
36
+ "▄▅",
37
+ "▄▆",
38
+ "▄▇",
39
+ "▄█",
40
+ "▄▏",
41
+ "▄▎",
42
+ "▄▍",
43
+ "▄▌",
44
+ "▄▋",
45
+ "▄▊",
46
+ "▄▉",
47
+ "▄▔",
48
+ "▄▕",
49
+ "▅▅",
50
+ "▅▆",
51
+ "▅▇",
52
+ "▅█",
53
+ "▅▏",
54
+ "▅▎",
55
+ "▅▍",
56
+ "▅▌",
57
+ "▅▋",
58
+ "▅▊",
59
+ "▅▉",
60
+ "▅▔",
61
+ "▅▕",
62
+ "▆▆",
63
+ "▆▇",
64
+ "▆█",
65
+ "▆▏",
66
+ "▆▎",
67
+ "▆▍",
68
+ "▆▌",
69
+ "▆▋",
70
+ "▆▊",
71
+ "▆▉",
72
+ "▆▔",
73
+ "▆▕",
74
+ "▇▇",
75
+ "▇█",
76
+ "▇▏",
77
+ "▇▎",
78
+ "▇▍",
79
+ "▇▌",
80
+ "▇▋",
81
+ "▇▊",
82
+ "▇▉",
83
+ "▇▔",
84
+ "▇▕",
85
+ "██",
86
+ "█▏",
87
+ "█▎",
88
+ "█▍",
89
+ "█▌",
90
+ "█▋",
91
+ "█▊",
92
+ "█▉",
93
+ "█",
94
+ "█▔",
95
+ "█▕",
96
+ "▏▏",
97
+ "▏▎",
98
+ "▏▍",
99
+ "▏▌",
100
+ "▏▋",
101
+ "▏▊",
102
+ "▏▉",
103
+ "▏",
104
+ "▏▔",
105
+ "▏▕",
106
+ "▎▎",
107
+ "▎▍",
108
+ "▎▌",
109
+ "▎▋",
110
+ "▎▊",
111
+ "▎▉",
112
+ "▎",
113
+ "▎▔",
114
+ "▎▕",
115
+ "▍▍",
116
+ "▍▌",
117
+ "▍▋",
118
+ "▍▊",
119
+ "▍▉",
120
+ "▍",
121
+ "▍▔",
122
+ "▍▕",
123
+ "▌▌",
124
+ "▌▋",
125
+ "▌▊",
126
+ "▌▉",
127
+ "▌",
128
+ "▌▔",
129
+ "▌▕",
130
+ "▋▋",
131
+ "▋▊",
132
+ "▋▉",
133
+ "▋",
134
+ "▋▔",
135
+ "▋▕",
136
+ "▊▊",
137
+ "▊▉",
138
+ "▊",
139
+ "▊▔",
140
+ "▊▕",
141
+ "▉▉",
142
+ "▉",
143
+ "▉▔",
144
+ "▉▕",
145
+ "▔▔",
146
+ "▔▕",
147
+ "▕▕",
148
+ "▁▁▁",
149
+ "▁▁▂",
150
+ "▁▁▃",
151
+ "▁▁▄",
152
+ "▁▁▅",
153
+ "▁▁▆",
154
+ "▁▁▇",
155
+ "▁▁█",
156
+ "▁▁▏",
157
+ "▁▁▎",
158
+ "▁▁▍",
159
+ "▁▁▌",
160
+ "▁▁▋",
161
+ "▁▁▊",
162
+ "▁▁▉",
163
+ "▁▁▔",
164
+ "▁▁▕",
165
+ "▁▂▂",
166
+ "▁▂▃",
167
+ "▁▂▄",
168
+ "▁▂▅",
169
+ "▁▂▆",
170
+ "▁▂▇",
171
+ "▁▂█",
172
+ ]
lionagi/libs/sys_util.py CHANGED
@@ -1,28 +1,17 @@
1
1
  """
2
- MIT License
3
-
4
- Copyright (c) 2023 HaiyangLi quantocean.li@gmail.com
5
-
6
- Permission is hereby granted, free of charge, to any person
7
- obtaining a copy of this software and associated documentation
8
- files (the "Software"), to deal in the Software without
9
- restriction, including without limitation the rights to use,
10
- copy, modify, merge, publish, distribute, sublicense, and/or
11
- sell copies of the Software, and to permit persons to whom
12
- the Software is furnished to do so, subject to the following
13
- conditions:
14
-
15
- The above copyright notice and this permission notice shall be
16
- included in all copies or substantial portions of the Software.
17
-
18
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
- OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
22
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
23
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
- SOFTWARE.
2
+ Copyright 2024 HaiyangLi
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
26
15
  """
27
16
 
28
17
  import copy
@@ -430,6 +419,19 @@ class SysUtil:
430
419
 
431
420
  @staticmethod
432
421
  def list_files(dir_path: Path | str, extension: str = None) -> list[Path]:
422
+ """
423
+ Lists all files in a specified directory with an optional filter for file extensions.
424
+
425
+ Args:
426
+ dir_path (Path | str): The directory path where files are listed.
427
+ extension (str, optional): Filter files by extension. Default is None, which lists all files.
428
+
429
+ Returns:
430
+ list[Path]: A list of Path objects representing files in the directory.
431
+
432
+ Raises:
433
+ NotADirectoryError: If the provided dir_path is not a directory.
434
+ """
433
435
  dir_path = Path(dir_path)
434
436
  if not dir_path.is_dir():
435
437
  raise NotADirectoryError(f"{dir_path} is not a directory.")
@@ -440,6 +442,16 @@ class SysUtil:
440
442
 
441
443
  @staticmethod
442
444
  def copy_file(src: Path | str, dest: Path | str) -> None:
445
+ """
446
+ Copies a file from a source path to a destination path.
447
+
448
+ Args:
449
+ src (Path | str): The source file path.
450
+ dest (Path | str): The destination file path.
451
+
452
+ Raises:
453
+ FileNotFoundError: If the source file does not exist or is not a file.
454
+ """
443
455
  from shutil import copy2
444
456
 
445
457
  src, dest = Path(src), Path(dest)
@@ -450,6 +462,18 @@ class SysUtil:
450
462
 
451
463
  @staticmethod
452
464
  def get_size(path: Path | str) -> int:
465
+ """
466
+ Gets the size of a file or total size of files in a directory.
467
+
468
+ Args:
469
+ path (Path | str): The file or directory path.
470
+
471
+ Returns:
472
+ int: The size in bytes.
473
+
474
+ Raises:
475
+ FileNotFoundError: If the path does not exist.
476
+ """
453
477
  path = Path(path)
454
478
  if path.is_file():
455
479
  return path.stat().st_size
@@ -457,3 +481,50 @@ class SysUtil:
457
481
  return sum(f.stat().st_size for f in path.glob("**/*") if f.is_file())
458
482
  else:
459
483
  raise FileNotFoundError(f"{path} does not exist.")
484
+
485
+ @staticmethod
486
+ def save_to_file(
487
+ text,
488
+ directory: Path | str,
489
+ filename: str,
490
+ timestamp: bool = True,
491
+ dir_exist_ok: bool = True,
492
+ time_prefix: bool = False,
493
+ custom_timestamp_format: str | None = None,
494
+ random_hash_digits=0,
495
+ verbose=True,
496
+ ):
497
+ """
498
+ Saves text to a file within a specified directory, optionally adding a timestamp, hash, and verbose logging.
499
+
500
+ Args:
501
+ text (str): The text to save.
502
+ directory (Path | str): The directory path to save the file.
503
+ filename (str): The filename for the saved text.
504
+ timestamp (bool): If True, append a timestamp to the filename. Default is True.
505
+ dir_exist_ok (bool): If True, creates the directory if it does not exist. Default is True.
506
+ time_prefix (bool): If True, prepend the timestamp instead of appending. Default is False.
507
+ custom_timestamp_format (str | None): A custom format for the timestamp, if None uses default format. Default is None.
508
+ random_hash_digits (int): Number of random hash digits to append to filename. Default is 0.
509
+ verbose (bool): If True, prints the file path after saving. Default is True.
510
+
511
+ Returns:
512
+ bool: True if the text was successfully saved.
513
+ """
514
+ file_path = SysUtil.create_path(
515
+ directory=directory,
516
+ filename=filename,
517
+ timestamp=timestamp,
518
+ dir_exist_ok=dir_exist_ok,
519
+ time_prefix=time_prefix,
520
+ custom_timestamp_format=custom_timestamp_format,
521
+ random_hash_digits=random_hash_digits,
522
+ )
523
+
524
+ with open(file_path, "w") as file:
525
+ file.write(text)
526
+
527
+ if verbose:
528
+ print(f"Text saved to: {file_path}")
529
+
530
+ return True