lionagi 0.0.316__py3-none-any.whl → 0.1.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (157) hide show
  1. lionagi/core/__init__.py +19 -8
  2. lionagi/core/agent/__init__.py +0 -3
  3. lionagi/core/agent/base_agent.py +25 -30
  4. lionagi/core/branch/__init__.py +0 -4
  5. lionagi/core/branch/{base_branch.py → base.py} +12 -13
  6. lionagi/core/branch/branch.py +22 -19
  7. lionagi/core/branch/executable_branch.py +0 -347
  8. lionagi/core/branch/{branch_flow_mixin.py → flow_mixin.py} +5 -5
  9. lionagi/core/direct/__init__.py +10 -1
  10. lionagi/core/direct/cot.py +61 -26
  11. lionagi/core/direct/plan.py +10 -8
  12. lionagi/core/direct/predict.py +5 -5
  13. lionagi/core/direct/react.py +8 -8
  14. lionagi/core/direct/score.py +4 -4
  15. lionagi/core/direct/select.py +4 -4
  16. lionagi/core/direct/utils.py +7 -4
  17. lionagi/core/direct/vote.py +2 -2
  18. lionagi/core/execute/base_executor.py +47 -0
  19. lionagi/core/execute/branch_executor.py +296 -0
  20. lionagi/core/execute/instruction_map_executor.py +179 -0
  21. lionagi/core/execute/neo4j_executor.py +381 -0
  22. lionagi/core/execute/structure_executor.py +314 -0
  23. lionagi/core/flow/monoflow/ReAct.py +20 -20
  24. lionagi/core/flow/monoflow/chat.py +6 -6
  25. lionagi/core/flow/monoflow/chat_mixin.py +23 -33
  26. lionagi/core/flow/monoflow/followup.py +14 -15
  27. lionagi/core/flow/polyflow/chat.py +15 -12
  28. lionagi/core/{prompt/action_template.py → form/action_form.py} +2 -2
  29. lionagi/core/{prompt → form}/field_validator.py +40 -31
  30. lionagi/core/form/form.py +302 -0
  31. lionagi/core/form/mixin.py +214 -0
  32. lionagi/core/{prompt/scored_template.py → form/scored_form.py} +2 -2
  33. lionagi/core/generic/__init__.py +37 -0
  34. lionagi/core/generic/action.py +26 -0
  35. lionagi/core/generic/component.py +455 -0
  36. lionagi/core/generic/condition.py +44 -0
  37. lionagi/core/generic/data_logger.py +305 -0
  38. lionagi/core/generic/edge.py +162 -0
  39. lionagi/core/generic/mail.py +90 -0
  40. lionagi/core/generic/mailbox.py +36 -0
  41. lionagi/core/generic/node.py +285 -0
  42. lionagi/core/generic/relation.py +70 -0
  43. lionagi/core/generic/signal.py +22 -0
  44. lionagi/core/generic/structure.py +362 -0
  45. lionagi/core/generic/transfer.py +20 -0
  46. lionagi/core/generic/work.py +40 -0
  47. lionagi/core/graph/graph.py +126 -0
  48. lionagi/core/graph/tree.py +190 -0
  49. lionagi/core/mail/__init__.py +0 -8
  50. lionagi/core/mail/mail_manager.py +15 -12
  51. lionagi/core/mail/schema.py +9 -2
  52. lionagi/core/messages/__init__.py +0 -3
  53. lionagi/core/messages/schema.py +17 -225
  54. lionagi/core/session/__init__.py +0 -3
  55. lionagi/core/session/session.py +24 -22
  56. lionagi/core/tool/__init__.py +3 -1
  57. lionagi/core/tool/tool.py +28 -0
  58. lionagi/core/tool/tool_manager.py +75 -75
  59. lionagi/experimental/directive/evaluator/__init__.py +0 -0
  60. lionagi/experimental/directive/evaluator/ast_evaluator.py +115 -0
  61. lionagi/experimental/directive/evaluator/base_evaluator.py +202 -0
  62. lionagi/experimental/directive/evaluator/sandbox_.py +14 -0
  63. lionagi/experimental/directive/evaluator/script_engine.py +83 -0
  64. lionagi/experimental/directive/parser/__init__.py +0 -0
  65. lionagi/experimental/directive/parser/base_parser.py +215 -0
  66. lionagi/experimental/directive/schema.py +36 -0
  67. lionagi/experimental/directive/template_/__init__.py +0 -0
  68. lionagi/experimental/directive/template_/base_template.py +63 -0
  69. lionagi/experimental/tool/__init__.py +0 -0
  70. lionagi/experimental/tool/function_calling.py +43 -0
  71. lionagi/experimental/tool/manual.py +66 -0
  72. lionagi/experimental/tool/schema.py +59 -0
  73. lionagi/experimental/tool/tool_manager.py +138 -0
  74. lionagi/experimental/tool/util.py +16 -0
  75. lionagi/experimental/work/__init__.py +0 -0
  76. lionagi/experimental/work/_logger.py +25 -0
  77. lionagi/experimental/work/exchange.py +0 -0
  78. lionagi/experimental/work/schema.py +30 -0
  79. lionagi/experimental/work/tests.py +72 -0
  80. lionagi/experimental/work/util.py +0 -0
  81. lionagi/experimental/work/work_function.py +89 -0
  82. lionagi/experimental/work/worker.py +12 -0
  83. lionagi/integrations/bridge/autogen_/__init__.py +0 -0
  84. lionagi/integrations/bridge/autogen_/autogen_.py +124 -0
  85. lionagi/integrations/bridge/llamaindex_/get_index.py +294 -0
  86. lionagi/integrations/bridge/llamaindex_/llama_pack.py +227 -0
  87. lionagi/integrations/bridge/transformers_/__init__.py +0 -0
  88. lionagi/integrations/bridge/transformers_/install_.py +36 -0
  89. lionagi/integrations/chunker/chunk.py +7 -7
  90. lionagi/integrations/config/oai_configs.py +5 -5
  91. lionagi/integrations/config/ollama_configs.py +1 -1
  92. lionagi/integrations/config/openrouter_configs.py +1 -1
  93. lionagi/integrations/loader/load.py +6 -6
  94. lionagi/integrations/loader/load_util.py +8 -8
  95. lionagi/integrations/storage/__init__.py +3 -0
  96. lionagi/integrations/storage/neo4j.py +673 -0
  97. lionagi/integrations/storage/storage_util.py +289 -0
  98. lionagi/integrations/storage/to_csv.py +63 -0
  99. lionagi/integrations/storage/to_excel.py +67 -0
  100. lionagi/libs/ln_api.py +3 -3
  101. lionagi/libs/ln_knowledge_graph.py +405 -0
  102. lionagi/libs/ln_parse.py +43 -6
  103. lionagi/libs/ln_queue.py +101 -0
  104. lionagi/libs/ln_tokenizer.py +57 -0
  105. lionagi/libs/ln_validate.py +288 -0
  106. lionagi/libs/sys_util.py +29 -7
  107. lionagi/lions/__init__.py +0 -0
  108. lionagi/lions/coder/__init__.py +0 -0
  109. lionagi/lions/coder/add_feature.py +20 -0
  110. lionagi/lions/coder/base_prompts.py +22 -0
  111. lionagi/lions/coder/coder.py +121 -0
  112. lionagi/lions/coder/util.py +91 -0
  113. lionagi/lions/researcher/__init__.py +0 -0
  114. lionagi/lions/researcher/data_source/__init__.py +0 -0
  115. lionagi/lions/researcher/data_source/finhub_.py +191 -0
  116. lionagi/lions/researcher/data_source/google_.py +199 -0
  117. lionagi/lions/researcher/data_source/wiki_.py +96 -0
  118. lionagi/lions/researcher/data_source/yfinance_.py +21 -0
  119. lionagi/tests/integrations/__init__.py +0 -0
  120. lionagi/tests/libs/__init__.py +0 -0
  121. lionagi/tests/libs/test_async.py +0 -0
  122. lionagi/tests/libs/test_field_validators.py +353 -0
  123. lionagi/tests/libs/test_queue.py +67 -0
  124. lionagi/tests/test_core/test_base_branch.py +0 -1
  125. lionagi/tests/test_core/test_branch.py +2 -0
  126. lionagi/tests/test_core/test_session_base_util.py +1 -0
  127. lionagi/version.py +1 -1
  128. {lionagi-0.0.316.dist-info → lionagi-0.1.1.dist-info}/METADATA +1 -1
  129. lionagi-0.1.1.dist-info/RECORD +190 -0
  130. lionagi/core/prompt/prompt_template.py +0 -312
  131. lionagi/core/schema/__init__.py +0 -22
  132. lionagi/core/schema/action_node.py +0 -29
  133. lionagi/core/schema/base_mixin.py +0 -296
  134. lionagi/core/schema/base_node.py +0 -199
  135. lionagi/core/schema/condition.py +0 -24
  136. lionagi/core/schema/data_logger.py +0 -354
  137. lionagi/core/schema/data_node.py +0 -93
  138. lionagi/core/schema/prompt_template.py +0 -67
  139. lionagi/core/schema/structure.py +0 -912
  140. lionagi/core/tool/manual.py +0 -1
  141. lionagi-0.0.316.dist-info/RECORD +0 -121
  142. /lionagi/core/{branch/base → execute}/__init__.py +0 -0
  143. /lionagi/core/flow/{base/baseflow.py → baseflow.py} +0 -0
  144. /lionagi/core/flow/{base/__init__.py → mono_chat_mixin.py} +0 -0
  145. /lionagi/core/{prompt → form}/__init__.py +0 -0
  146. /lionagi/{tests/test_integrations → core/graph}/__init__.py +0 -0
  147. /lionagi/{tests/test_libs → experimental}/__init__.py +0 -0
  148. /lionagi/{tests/test_libs/test_async.py → experimental/directive/__init__.py} +0 -0
  149. /lionagi/tests/{test_libs → libs}/test_api.py +0 -0
  150. /lionagi/tests/{test_libs → libs}/test_convert.py +0 -0
  151. /lionagi/tests/{test_libs → libs}/test_func_call.py +0 -0
  152. /lionagi/tests/{test_libs → libs}/test_nested.py +0 -0
  153. /lionagi/tests/{test_libs → libs}/test_parse.py +0 -0
  154. /lionagi/tests/{test_libs → libs}/test_sys_util.py +0 -0
  155. {lionagi-0.0.316.dist-info → lionagi-0.1.1.dist-info}/LICENSE +0 -0
  156. {lionagi-0.0.316.dist-info → lionagi-0.1.1.dist-info}/WHEEL +0 -0
  157. {lionagi-0.0.316.dist-info → lionagi-0.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,405 @@
1
+ import math
2
+ from lionagi.libs import CallDecorator as cd
3
+
4
+
5
+ class KnowledgeBase:
6
+ """
7
+ A class to represent a Knowledge Base (KB) containing entities, relations, and sources.
8
+
9
+ Attributes:
10
+ entities (dict): A dictionary of entities in the KB, where the keys are entity titles, and the values are
11
+ entity information (excluding the title).
12
+ relations (list): A list of relations in the KB, where each relation is a dictionary containing information
13
+ about the relation (head, type, tail) and metadata (article_url and spans).
14
+ sources (dict): A dictionary of information about the sources of relations, where the keys are article URLs,
15
+ and the values are source data (article_title and article_publish_date).
16
+
17
+ Methods:
18
+ merge_with_kb(kb2): Merge another Knowledge Base (kb2) into this KB.
19
+ are_relations_equal(r1, r2): Check if two relations (r1 and r2) are equal.
20
+ exists_relation(r1): Check if a relation (r1) already exists in the KB.
21
+ merge_relations(r2): Merge the information from relation r2 into an existing relation in the KB.
22
+ get_wikipedia_data(candidate_entity): Get data for a candidate entity from Wikipedia.
23
+ add_entity(e): Add an entity to the KB.
24
+ add_relation(r, article_title, article_publish_date): Add a relation to the KB.
25
+ print(): Print the entities, relations, and sources in the KB.
26
+ extract_relations_from_model_output(text): Extract relations from the model output text.
27
+
28
+ """
29
+
30
+ def __init__(self):
31
+ """
32
+ Initialize an empty Knowledge Base (KB) with empty dictionaries for entities, relations, and sources.
33
+ """
34
+ self.entities = {} # { entity_title: {...} }
35
+ self.relations = [] # [ head: entity_title, type: ..., tail: entity_title,
36
+ # meta: { article_url: { spans: [...] } } ]
37
+ self.sources = {} # { article_url: {...} }
38
+
39
+ def merge_with_kb(self, kb2):
40
+ """
41
+ Merge another Knowledge Base (KB) into this KB.
42
+
43
+ Args:
44
+ kb2 (KnowledgeBase): The Knowledge Base (KB) to merge into this KB.
45
+ """
46
+ for r in kb2.relations:
47
+ article_url = list(r["meta"].keys())[0]
48
+ source_data = kb2.sources[article_url]
49
+ self.add_relation(
50
+ r, source_data["article_title"], source_data["article_publish_date"]
51
+ )
52
+
53
+ def are_relations_equal(self, r1, r2):
54
+ """
55
+ Check if two relations (r1 and r2) are equal.
56
+
57
+ Args:
58
+ r1 (dict): The first relation to compare.
59
+ r2 (dict): The second relation to compare.
60
+
61
+ Returns:
62
+ bool: True if the relations are equal, False otherwise.
63
+ """
64
+ return all(r1[attr] == r2[attr] for attr in ["head", "type", "tail"])
65
+
66
+ def exists_relation(self, r1):
67
+ """
68
+ Check if a relation (r1) already exists in the KB.
69
+
70
+ Args:
71
+ r1 (dict): The relation to check for existence in the KB.
72
+
73
+ Returns:
74
+ bool: True if the relation exists in the KB, False otherwise.
75
+ """
76
+ return any(self.are_relations_equal(r1, r2) for r2 in self.relations)
77
+
78
+ def merge_relations(self, r2):
79
+ """
80
+ Merge the information from relation r2 into an existing relation in the KB.
81
+
82
+ Args:
83
+ r2 (dict): The relation to merge into an existing relation in the KB.
84
+ """
85
+ r1 = [r for r in self.relations if self.are_relations_equal(r2, r)][0]
86
+
87
+ # if different article
88
+ article_url = list(r2["meta"].keys())[0]
89
+ if article_url not in r1["meta"]:
90
+ r1["meta"][article_url] = r2["meta"][article_url]
91
+
92
+ # if existing article
93
+ else:
94
+ spans_to_add = [
95
+ span
96
+ for span in r2["meta"][article_url]["spans"]
97
+ if span not in r1["meta"][article_url]["spans"]
98
+ ]
99
+ r1["meta"][article_url]["spans"] += spans_to_add
100
+
101
+ @cd.cache(maxsize=10000)
102
+ def get_wikipedia_data(self, candidate_entity):
103
+ """
104
+ Get data for a candidate entity from Wikipedia.
105
+
106
+ Args:
107
+ candidate_entity (str): The candidate entity title.
108
+
109
+ Returns:
110
+ dict: A dictionary containing information about the candidate entity (title, url, summary).
111
+ None if the entity does not exist in Wikipedia.
112
+ """
113
+ try:
114
+ from lionagi.libs import SysUtil
115
+
116
+ SysUtil.check_import("wikipedia")
117
+ import wikipedia # type: ignore
118
+ except Exception as e:
119
+ raise Exception("wikipedia package is not installed {e}")
120
+
121
+ try:
122
+ page = wikipedia.page(candidate_entity, auto_suggest=False)
123
+ entity_data = {
124
+ "title": page.title,
125
+ "url": page.url,
126
+ "summary": page.summary,
127
+ }
128
+ return entity_data
129
+ except:
130
+ return None
131
+
132
+ def add_entity(self, e):
133
+ """
134
+ Add an entity to the KB.
135
+
136
+ Args:
137
+ e (dict): A dictionary containing information about the entity (title and additional attributes).
138
+ """
139
+ self.entities[e["title"]] = {k: v for k, v in e.items() if k != "title"}
140
+
141
+ def add_relation(self, r, article_title, article_publish_date):
142
+ """
143
+ Add a relation to the KB.
144
+
145
+ Args:
146
+ r (dict): A dictionary containing information about the relation (head, type, tail, and metadata).
147
+ article_title (str): The title of the article containing the relation.
148
+ article_publish_date (str): The publish date of the article.
149
+ """
150
+ # check on wikipedia
151
+ candidate_entities = [r["head"], r["tail"]]
152
+ entities = [self.get_wikipedia_data(ent) for ent in candidate_entities]
153
+
154
+ # if one entity does not exist, stop
155
+ if any(ent is None for ent in entities):
156
+ return
157
+
158
+ # manage new entities
159
+ for e in entities:
160
+ self.add_entity(e)
161
+
162
+ # rename relation entities with their wikipedia titles
163
+ r["head"] = entities[0]["title"]
164
+ r["tail"] = entities[1]["title"]
165
+
166
+ # add source if not in kb
167
+ article_url = list(r["meta"].keys())[0]
168
+ if article_url not in self.sources:
169
+ self.sources[article_url] = {
170
+ "article_title": article_title,
171
+ "article_publish_date": article_publish_date,
172
+ }
173
+
174
+ # manage new relation
175
+ if not self.exists_relation(r):
176
+ self.relations.append(r)
177
+ else:
178
+ self.merge_relations(r)
179
+
180
+ def print(self):
181
+ """
182
+ Print the entities, relations, and sources in the KB.
183
+
184
+ Returns:
185
+ None
186
+ """
187
+ print("Entities:")
188
+ for e in self.entities.items():
189
+ print(f" {e}")
190
+ print("Relations:")
191
+ for r in self.relations:
192
+ print(f" {r}")
193
+ print("Sources:")
194
+ for s in self.sources.items():
195
+ print(f" {s}")
196
+
197
+ @staticmethod
198
+ def extract_relations_from_model_output(text):
199
+ """
200
+ Extract relations from the model output text.
201
+
202
+ Args:
203
+ text (str): The model output text containing relations.
204
+
205
+ Returns:
206
+ list: A list of dictionaries, where each dictionary represents a relation (head, type, tail).
207
+ """
208
+ relations = []
209
+ relation, subject, relation, object_ = "", "", "", ""
210
+ text = text.strip()
211
+ current = "x"
212
+ text_replaced = text.replace("<s>", "").replace("<pad>", "").replace("</s>", "")
213
+ for token in text_replaced.split():
214
+ if token == "<triplet>":
215
+ current = "t"
216
+ if relation != "":
217
+ relations.append(
218
+ {
219
+ "head": subject.strip(),
220
+ "type": relation.strip(),
221
+ "tail": object_.strip(),
222
+ }
223
+ )
224
+ relation = ""
225
+ subject = ""
226
+ elif token == "<subj>":
227
+ current = "s"
228
+ if relation != "":
229
+ relations.append(
230
+ {
231
+ "head": subject.strip(),
232
+ "type": relation.strip(),
233
+ "tail": object_.strip(),
234
+ }
235
+ )
236
+ object_ = ""
237
+ elif token == "<obj>":
238
+ current = "o"
239
+ relation = ""
240
+ else:
241
+ if current == "t":
242
+ subject += " " + token
243
+ elif current == "s":
244
+ object_ += " " + token
245
+ elif current == "o":
246
+ relation += " " + token
247
+ if subject != "" and relation != "" and object_ != "":
248
+ relations.append(
249
+ {
250
+ "head": subject.strip(),
251
+ "type": relation.strip(),
252
+ "tail": object_.strip(),
253
+ }
254
+ )
255
+ return relations
256
+
257
+
258
+ class KGTripletExtractor:
259
+ """
260
+ A class to perform knowledge graph triplet extraction from text using a pre-trained model.
261
+
262
+ Methods:
263
+ text_to_wiki_kb(text, model=None, tokenizer=None, device='cpu', span_length=512,
264
+ article_title=None, article_publish_date=None, verbose=False):
265
+ Extract knowledge graph triplets from text and create a KnowledgeBase (KB) containing entities and relations.
266
+
267
+ """
268
+
269
+ @staticmethod
270
+ def text_to_wiki_kb(
271
+ text,
272
+ model=None,
273
+ tokenizer=None,
274
+ device="cpu",
275
+ span_length=512,
276
+ article_title=None,
277
+ article_publish_date=None,
278
+ verbose=False,
279
+ ):
280
+ from lionagi.integrations.bridge.transformers_.install_ import (
281
+ install_transformers,
282
+ )
283
+
284
+ try:
285
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer # type: ignore
286
+ except ImportError:
287
+ install_transformers()
288
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer # type: ignore
289
+ import torch # type: ignore
290
+
291
+ """
292
+ Extract knowledge graph triplets from text and create a KnowledgeBase (KB) containing entities and relations.
293
+
294
+ Args:
295
+ text (str): The input text from which triplets will be extracted.
296
+ model (AutoModelForSeq2SeqLM, optional): The pre-trained model for triplet extraction. Defaults to None.
297
+ tokenizer (AutoTokenizer, optional): The tokenizer for the model. Defaults to None.
298
+ device (str, optional): The device to run the model on (e.g., 'cpu', 'cuda'). Defaults to 'cpu'.
299
+ span_length (int, optional): The maximum span length for input text segmentation. Defaults to 512.
300
+ article_title (str, optional): The title of the article containing the input text. Defaults to None.
301
+ article_publish_date (str, optional): The publish date of the article. Defaults to None.
302
+ verbose (bool, optional): Whether to enable verbose mode for debugging. Defaults to False.
303
+
304
+ Returns:
305
+ KnowledgeBase: A KnowledgeBase (KB) containing extracted entities, relations, and sources.
306
+
307
+ """
308
+
309
+ if not any([model, tokenizer]):
310
+ tokenizer = AutoTokenizer.from_pretrained("Babelscape/rebel-large")
311
+ model = AutoModelForSeq2SeqLM.from_pretrained("Babelscape/rebel-large")
312
+ model.to(device)
313
+
314
+ inputs = tokenizer([text], return_tensors="pt")
315
+
316
+ num_tokens = len(inputs["input_ids"][0])
317
+ if verbose:
318
+ print(f"Input has {num_tokens} tokens")
319
+ num_spans = math.ceil(num_tokens / span_length)
320
+ if verbose:
321
+ print(f"Input has {num_spans} spans")
322
+ overlap = math.ceil(
323
+ (num_spans * span_length - num_tokens) / max(num_spans - 1, 1)
324
+ )
325
+ spans_boundaries = []
326
+ start = 0
327
+ for i in range(num_spans):
328
+ spans_boundaries.append(
329
+ [start + span_length * i, start + span_length * (i + 1)]
330
+ )
331
+ start -= overlap
332
+ if verbose:
333
+ print(f"Span boundaries are {spans_boundaries}")
334
+
335
+ # transform input with spans
336
+ tensor_ids = [
337
+ inputs["input_ids"][0][boundary[0] : boundary[1]]
338
+ for boundary in spans_boundaries
339
+ ]
340
+ tensor_masks = [
341
+ inputs["attention_mask"][0][boundary[0] : boundary[1]]
342
+ for boundary in spans_boundaries
343
+ ]
344
+
345
+ inputs = {
346
+ "input_ids": torch.stack(tensor_ids).to(device),
347
+ "attention_mask": torch.stack(tensor_masks).to(device),
348
+ }
349
+
350
+ # generate relations
351
+ num_return_sequences = 3
352
+ gen_kwargs = {
353
+ "max_length": 512,
354
+ "length_penalty": 0,
355
+ "num_beams": 3,
356
+ "num_return_sequences": num_return_sequences,
357
+ }
358
+ generated_tokens = model.generate(
359
+ **inputs,
360
+ **gen_kwargs,
361
+ )
362
+
363
+ # decode relations
364
+ decoded_preds = tokenizer.batch_decode(
365
+ generated_tokens, skip_special_tokens=False
366
+ )
367
+
368
+ # create kb
369
+ kb = KnowledgeBase()
370
+ i = 0
371
+ for sentence_pred in decoded_preds:
372
+ current_span_index = i // num_return_sequences
373
+ relations = KnowledgeBase.extract_relations_from_model_output(sentence_pred)
374
+ for relation in relations:
375
+ relation["meta"] = {
376
+ "article_url": {"spans": [spans_boundaries[current_span_index]]}
377
+ }
378
+ kb.add_relation(relation, article_title, article_publish_date)
379
+ i += 1
380
+ return kb
381
+
382
+
383
+ class KGraph:
384
+ """
385
+ A class representing a Knowledge Graph (KGraph) for extracting relations from text.
386
+
387
+ Methods:
388
+ text_to_wiki_kb(text, model=None, tokenizer=None, device='cpu', span_length=512, article_title=None,
389
+ article_publish_date=None, verbose=False):
390
+ Extract relations from input text and create a Knowledge Base (KB) containing entities and relations.
391
+ """
392
+
393
+ @staticmethod
394
+ def text_to_wiki_kb(text, **kwargs):
395
+ """
396
+ Extract relations from input text and create a Knowledge Base (KB) containing entities and relations.
397
+
398
+ Args:
399
+ text (str): The input text from which relations are extracted.
400
+ **kwargs: Additional keyword arguments passed to the underlying extraction method.
401
+
402
+ Returns:
403
+ KnowledgeBase: A Knowledge Base (KB) containing entities and relations extracted from the input text.
404
+ """
405
+ return KGTripletExtractor.text_to_wiki_kb(text, **kwargs)
lionagi/libs/ln_parse.py CHANGED
@@ -1,11 +1,14 @@
1
+ from collections.abc import Callable
1
2
  import re
2
3
  import inspect
3
4
  import itertools
4
- from collections.abc import Callable
5
+ import contextlib
6
+ from functools import singledispatchmethod
5
7
  from typing import Any
6
8
  import numpy as np
7
9
  import lionagi.libs.ln_convert as convert
8
10
 
11
+
9
12
  md_json_char_map = {"\n": "\\n", "\r": "\\r", "\t": "\\t", '"': '\\"'}
10
13
 
11
14
 
@@ -590,14 +593,15 @@ class StringMatch:
590
593
  return d[m][n]
591
594
 
592
595
  @staticmethod
593
- def correct_keys(output_fields, out_, score_func=None):
596
+ def correct_dict_keys(keys: dict | list[str], dict_, score_func=None):
594
597
  if score_func is None:
595
598
  score_func = StringMatch.jaro_winkler_similarity
596
- fields_set = set(output_fields.keys())
599
+
600
+ fields_set = set(keys if isinstance(keys, list) else keys.keys())
597
601
  corrected_out = {}
598
602
  used_keys = set()
599
603
 
600
- for k, v in out_.items():
604
+ for k, v in dict_.items():
601
605
  if k in fields_set:
602
606
  corrected_out[k] = v
603
607
  fields_set.remove(k) # Remove the matched key
@@ -614,8 +618,8 @@ class StringMatch:
614
618
  fields_set.remove(best_match) # Remove the matched key
615
619
  used_keys.add(best_match)
616
620
 
617
- if len(used_keys) < len(out_):
618
- for k, v in out_.items():
621
+ if len(used_keys) < len(dict_):
622
+ for k, v in dict_.items():
619
623
  if k not in used_keys:
620
624
  corrected_out[k] = v
621
625
 
@@ -637,3 +641,36 @@ class StringMatch:
637
641
  # Find the index of the highest score
638
642
  max_score_index = np.argmax(scores)
639
643
  return correct_words_list[max_score_index]
644
+
645
+ @staticmethod
646
+ def force_validate_dict(x, keys: dict | list[str]) -> dict:
647
+ out_ = x
648
+
649
+ if isinstance(out_, str):
650
+ # first try to parse it straight as a fuzzy json
651
+ try:
652
+ out_ = ParseUtil.fuzzy_parse_json(out_)
653
+ except Exception:
654
+ try:
655
+ # if failed we try to extract the json block and parse it
656
+ out_ = ParseUtil.md_to_json(out_)
657
+ except Exception:
658
+ # if still failed we try to extract the json block using re and parse it again
659
+ match = re.search(r"```json\n({.*?})\n```", out_, re.DOTALL)
660
+ if match:
661
+ out_ = match.group(1)
662
+ try:
663
+ out_ = ParseUtil.fuzzy_parse_json(out_)
664
+ except:
665
+ try:
666
+ out_ = ParseUtil.fuzzy_parse_json(
667
+ out_.replace("'", '"')
668
+ )
669
+ except:
670
+ pass
671
+
672
+ if isinstance(out_, dict):
673
+ try:
674
+ return StringMatch.correct_dict_keys(keys, out_)
675
+ except Exception as e:
676
+ raise ValueError(f"Failed to force_validate_dict for input: {x}") from e
@@ -0,0 +1,101 @@
1
+ """
2
+ A class that manages asynchronous task processing with controlled concurrency.
3
+ """
4
+
5
+ from typing import Any, Callable
6
+ import asyncio
7
+ from lionagi.libs import func_call
8
+
9
+
10
+ class AsyncQueue:
11
+ """
12
+ This class handles the enqueueing and processing of tasks with a limit on
13
+ how many can run simultaneously, using an asyncio.Queue for task storage and
14
+ an asyncio.Semaphore to limit concurrency.
15
+
16
+ Attributes:
17
+ queue (asyncio.Queue): The queue to store tasks.
18
+ _stop_event (asyncio.Event): Event to signal processing should stop.
19
+ max_concurrent_tasks (int): Maximum number of tasks processed concurrently.
20
+ semaphore (asyncio.Semaphore): Controls concurrent access to task execution.
21
+ """
22
+
23
+ def __init__(self, max_concurrent_tasks=5):
24
+ """
25
+ Initializes the AsyncQueue with a concurrency limit.
26
+
27
+ Args:
28
+ max_concurrent_tasks (int): The maximum number of concurrent tasks
29
+ allowed. Default is 5.
30
+ """
31
+ self.queue = asyncio.Queue()
32
+ self._stop_event = asyncio.Event()
33
+ self.max_concurrent_tasks = max_concurrent_tasks
34
+ self.semaphore = asyncio.Semaphore(max_concurrent_tasks)
35
+
36
+ async def enqueue(self, input_: Any) -> None:
37
+ """
38
+ Enqueues an item to be processed asynchronously.
39
+
40
+ Args:
41
+ input_ (Any): The item to be enqueued.
42
+ """
43
+ await self.queue.put(input_)
44
+
45
+ async def dequeue(self) -> Any:
46
+ """
47
+ Dequeues an item for processing.
48
+
49
+ Returns:
50
+ Any: The dequeued item.
51
+ """
52
+ return await self.queue.get()
53
+
54
+ async def join(self) -> None:
55
+ """Waits for all items in the queue to be processed."""
56
+ await self.queue.join()
57
+
58
+ async def stop(self) -> None:
59
+ """Signals the queue to stop processing new items."""
60
+ self._stop_event.set()
61
+
62
+ def stopped(self) -> bool:
63
+ """
64
+ Checks if the stop signal has been issued.
65
+
66
+ Returns:
67
+ bool: True if the queue has been stopped, otherwise False.
68
+ """
69
+ return self._stop_event.is_set()
70
+
71
+ async def process_requests(self, func: Callable, retry_kwargs: dict = {}) -> None:
72
+ """
73
+ Processes tasks from the queue using the provided function with retries.
74
+
75
+ This method continuously processes tasks from the queue using the specified
76
+ function until a stop event is triggered. Handles concurrency using a
77
+ semaphore and manages task completion.
78
+
79
+ Args:
80
+ func (Callable): The function to process each task.
81
+ retry_kwargs (dict): Keyword arguments for retry behavior. Default is
82
+ an empty dictionary.
83
+ """
84
+ tasks = set()
85
+ while not self.stopped():
86
+ if len(tasks) >= self.max_concurrent_tasks:
87
+ _, done = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
88
+ tasks.difference_update(done)
89
+
90
+ async with self.semaphore:
91
+ input_ = await self.dequeue()
92
+ if input_ is None:
93
+ await self.stop()
94
+ break
95
+ task = asyncio.create_task(
96
+ func_call.rcall(func, input_, **retry_kwargs)
97
+ )
98
+ tasks.add(task)
99
+
100
+ if tasks:
101
+ await asyncio.wait(tasks)
@@ -0,0 +1,57 @@
1
+ import re
2
+
3
+
4
+ class BaseToken:
5
+ def __init__(self, type_, value):
6
+ self.type = type_
7
+ self.value = value
8
+
9
+ def __repr__(self):
10
+ return f"BaseDirectiveToken({self.type}, {self.value})"
11
+
12
+
13
+ class BaseTokenizer:
14
+ TOKEN_TYPES = {
15
+ "KEYWORD": r"\b(BEGIN|END|IF|ELSE|FOR|IN|TRY|EXCEPT|ENDIF|ENDFOR|ENDTRY|DO)\b",
16
+ "OPERATOR": r"(==|!=|>=|<=|>|<|&&|\|\||!)",
17
+ "FUNCTION_CALL": r"\b[a-zA-Z_][a-zA-Z0-9_]*\b\((.*?)\)",
18
+ "LITERAL": r'(\d+|\'.*?\'|".*?")',
19
+ "IDENTIFIER": r"\b[a-zA-Z_][a-zA-Z0-9_]*\b",
20
+ "PUNCTUATION": r"(;|,|\(|\))",
21
+ "WHITESPACE": r"\s+",
22
+ }
23
+
24
+ def __init__(self, script):
25
+ self.script = script
26
+ self.tokens = []
27
+ self.tokenize()
28
+
29
+ @property
30
+ def is_empty(self):
31
+ return self.tokens == []
32
+
33
+ def tokenize(self):
34
+ position = 0
35
+ while position < len(self.script):
36
+ match = None
37
+ for type_, pattern in self.TOKEN_TYPES.items():
38
+ regex = re.compile(pattern)
39
+ match = regex.match(self.script, position)
40
+ if match:
41
+ if type_ != "WHITESPACE": # Ignore whitespace
42
+ token = BaseToken(type_, match.group())
43
+ self.tokens.append(token)
44
+ position = match.end() # Move past the matched token
45
+ break
46
+ if not match: # No match found, unrecognized token
47
+ raise SyntaxError(f"Unexpected character: {self.script[position]}")
48
+ # break
49
+
50
+ def get_tokens(self):
51
+ if self.is_empty:
52
+ try:
53
+ self.tokenize()
54
+ except SyntaxError as e:
55
+ print(e)
56
+ return []
57
+ return self.tokens