llm-ie 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llm_ie/__init__.py CHANGED
@@ -1,9 +1,9 @@
1
1
  from .data_types import LLMInformationExtractionFrame, LLMInformationExtractionDocument
2
- from .engines import LlamaCppInferenceEngine, OllamaInferenceEngine, HuggingFaceHubInferenceEngine, OpenAIInferenceEngine, LiteLLMInferenceEngine
2
+ from .engines import LlamaCppInferenceEngine, OllamaInferenceEngine, HuggingFaceHubInferenceEngine, OpenAIInferenceEngine, AzureOpenAIInferenceEngine, LiteLLMInferenceEngine
3
3
  from .extractors import BasicFrameExtractor, ReviewFrameExtractor, SentenceFrameExtractor, SentenceReviewFrameExtractor, SentenceCoTFrameExtractor, BinaryRelationExtractor, MultiClassRelationExtractor
4
4
  from .prompt_editor import PromptEditor
5
5
 
6
6
  __all__ = ["LLMInformationExtractionFrame", "LLMInformationExtractionDocument",
7
- "LlamaCppInferenceEngine", "OllamaInferenceEngine", "HuggingFaceHubInferenceEngine", "OpenAIInferenceEngine", "LiteLLMInferenceEngine",
7
+ "LlamaCppInferenceEngine", "OllamaInferenceEngine", "HuggingFaceHubInferenceEngine", "OpenAIInferenceEngine", "AzureOpenAIInferenceEngine", "LiteLLMInferenceEngine",
8
8
  "BasicFrameExtractor", "ReviewFrameExtractor", "SentenceFrameExtractor", "SentenceReviewFrameExtractor", "SentenceCoTFrameExtractor", "BinaryRelationExtractor", "MultiClassRelationExtractor",
9
9
  "PromptEditor"]
llm_ie/data_types.py CHANGED
@@ -204,7 +204,7 @@ class LLMInformationExtractionDocument:
204
204
  # Add frame
205
205
  frame_clone = frame.copy()
206
206
  if create_id:
207
- frame_clone.doc_id = f"{self.doc_id}_{len(self.frames)}"
207
+ frame_clone.frame_id = str(len(self.frames))
208
208
 
209
209
  self.frames.append(frame_clone)
210
210
  return True
llm_ie/engines.py CHANGED
@@ -290,9 +290,88 @@ class OpenAIInferenceEngine(InferenceEngine):
290
290
  if stream:
291
291
  res = ''
292
292
  for chunk in response:
293
- if chunk.choices[0].delta.content is not None:
294
- res += chunk.choices[0].delta.content
295
- print(chunk.choices[0].delta.content, end="", flush=True)
293
+ if len(chunk.choices) > 0:
294
+ if chunk.choices[0].delta.content is not None:
295
+ res += chunk.choices[0].delta.content
296
+ print(chunk.choices[0].delta.content, end="", flush=True)
297
+ return res
298
+
299
+ return response.choices[0].message.content
300
+
301
+
302
+ async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
303
+ """
304
+ Async version of chat method. Streaming is not supported.
305
+ """
306
+ response = await self.async_client.chat.completions.create(
307
+ model=self.model,
308
+ messages=messages,
309
+ max_tokens=max_new_tokens,
310
+ temperature=temperature,
311
+ stream=False,
312
+ **kwrs
313
+ )
314
+
315
+ return response.choices[0].message.content
316
+
317
+
318
+ class AzureOpenAIInferenceEngine(InferenceEngine):
319
+ def __init__(self, model:str, api_version:str, **kwrs):
320
+ """
321
+ The Azure OpenAI API inference engine.
322
+ For parameters and documentation, refer to
323
+ - https://azure.microsoft.com/en-us/products/ai-services/openai-service
324
+ - https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart
325
+
326
+ Parameters:
327
+ ----------
328
+ model : str
329
+ model name as described in https://platform.openai.com/docs/models
330
+ api_version : str
331
+ the Azure OpenAI API version
332
+ """
333
+ if importlib.util.find_spec("openai") is None:
334
+ raise ImportError("OpenAI Python API library not found. Please install OpanAI (```pip install openai```).")
335
+
336
+ from openai import AzureOpenAI, AsyncAzureOpenAI
337
+ self.model = model
338
+ self.api_version = api_version
339
+ self.client = AzureOpenAI(api_version=self.api_version,
340
+ **kwrs)
341
+ self.async_client = AsyncAzureOpenAI(api_version=self.api_version,
342
+ **kwrs)
343
+
344
+ def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
345
+ """
346
+ This method inputs chat messages and outputs LLM generated text.
347
+
348
+ Parameters:
349
+ ----------
350
+ messages : List[Dict[str,str]]
351
+ a list of dict with role and content. role must be one of {"system", "user", "assistant"}
352
+ max_new_tokens : str, Optional
353
+ the max number of new tokens LLM can generate.
354
+ temperature : float, Optional
355
+ the temperature for token sampling.
356
+ stream : bool, Optional
357
+ if True, LLM generated text will be printed in terminal in real-time.
358
+ """
359
+ response = self.client.chat.completions.create(
360
+ model=self.model,
361
+ messages=messages,
362
+ max_tokens=max_new_tokens,
363
+ temperature=temperature,
364
+ stream=stream,
365
+ **kwrs
366
+ )
367
+
368
+ if stream:
369
+ res = ''
370
+ for chunk in response:
371
+ if len(chunk.choices) > 0:
372
+ if chunk.choices[0].delta.content is not None:
373
+ res += chunk.choices[0].delta.content
374
+ print(chunk.choices[0].delta.content, end="", flush=True)
296
375
  return res
297
376
 
298
377
  return response.choices[0].message.content
@@ -312,6 +391,7 @@ class OpenAIInferenceEngine(InferenceEngine):
312
391
  )
313
392
 
314
393
  return response.choices[0].message.content
394
+
315
395
 
316
396
  class LiteLLMInferenceEngine(InferenceEngine):
317
397
  def __init__(self, model:str=None, base_url:str=None, api_key:str=None):
llm_ie/extractors.py CHANGED
@@ -59,7 +59,7 @@ class Extractor:
59
59
  text_content : Union[str, Dict[str,str]]
60
60
  the input text content to put in prompt template.
61
61
  If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
62
- If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
62
+ If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}. All values must be str.
63
63
 
64
64
  Returns : str
65
65
  a user prompt.
@@ -73,6 +73,10 @@ class Extractor:
73
73
  prompt = pattern.sub(text, self.prompt_template)
74
74
 
75
75
  elif isinstance(text_content, dict):
76
+ # Check if all values are str
77
+ if not all([isinstance(v, str) for v in text_content.values()]):
78
+ raise ValueError("All values in text_content must be str.")
79
+ # Check if all keys are in the prompt template
76
80
  placeholders = pattern.findall(self.prompt_template)
77
81
  if len(placeholders) != len(text_content):
78
82
  raise ValueError(f"Expect text_content ({len(text_content)}) and prompt template placeholder ({len(placeholders)}) to have equal size.")
@@ -422,6 +426,13 @@ class BasicFrameExtractor(FrameExtractor):
422
426
  Return : str
423
427
  a list of frames.
424
428
  """
429
+ if isinstance(text_content, str):
430
+ text = text_content
431
+ elif isinstance(text_content, dict):
432
+ if document_key is None:
433
+ raise ValueError("document_key must be provided when text_content is dict.")
434
+ text = text_content[document_key]
435
+
425
436
  frame_list = []
426
437
  gen_text = self.extract(text_content=text_content,
427
438
  max_new_tokens=max_new_tokens,
@@ -435,11 +446,6 @@ class BasicFrameExtractor(FrameExtractor):
435
446
  entity_json.append(entity)
436
447
  else:
437
448
  warnings.warn(f'Extractor output "{entity}" does not have entity_key ("{entity_key}"). This frame will be dropped.', RuntimeWarning)
438
-
439
- if isinstance(text_content, str):
440
- text = text_content
441
- elif isinstance(text_content, dict):
442
- text = text_content[document_key]
443
449
 
444
450
  spans = self._find_entity_spans(text=text,
445
451
  entities=[e[entity_key] for e in entity_json],
@@ -645,6 +651,8 @@ class SentenceFrameExtractor(FrameExtractor):
645
651
  if isinstance(text_content, str):
646
652
  sentences = self._get_sentences(text_content)
647
653
  elif isinstance(text_content, dict):
654
+ if document_key is None:
655
+ raise ValueError("document_key must be provided when text_content is dict.")
648
656
  sentences = self._get_sentences(text_content[document_key])
649
657
  # construct chat messages
650
658
  messages = []
@@ -715,6 +723,8 @@ class SentenceFrameExtractor(FrameExtractor):
715
723
  if isinstance(text_content, str):
716
724
  sentences = self._get_sentences(text_content)
717
725
  elif isinstance(text_content, dict):
726
+ if document_key is None:
727
+ raise ValueError("document_key must be provided when text_content is dict.")
718
728
  sentences = self._get_sentences(text_content[document_key])
719
729
  # construct chat messages
720
730
  base_messages = []
@@ -933,6 +943,8 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
933
943
  if isinstance(text_content, str):
934
944
  sentences = self._get_sentences(text_content)
935
945
  elif isinstance(text_content, dict):
946
+ if document_key is None:
947
+ raise ValueError("document_key must be provided when text_content is dict.")
936
948
  sentences = self._get_sentences(text_content[document_key])
937
949
  # construct chat messages
938
950
  messages = []
@@ -1025,6 +1037,8 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
1025
1037
  if isinstance(text_content, str):
1026
1038
  sentences = self._get_sentences(text_content)
1027
1039
  elif isinstance(text_content, dict):
1040
+ if document_key is None:
1041
+ raise ValueError("document_key must be provided when text_content is dict.")
1028
1042
  sentences = self._get_sentences(text_content[document_key])
1029
1043
  # construct chat messages
1030
1044
  base_messages = []
llm_ie/prompt_editor.py CHANGED
@@ -67,7 +67,7 @@ class PromptEditor:
67
67
  return prompt
68
68
 
69
69
 
70
- def rewrite(self, draft:str) -> str:
70
+ def rewrite(self, draft:str, **kwrs) -> str:
71
71
  """
72
72
  This method inputs a prompt draft and rewrites it following the extractor's guideline.
73
73
  """
@@ -79,10 +79,10 @@ class PromptEditor:
79
79
  prompt_template=rewrite_prompt_template)
80
80
  messages = [{"role": "system", "content": self.system_prompt},
81
81
  {"role": "user", "content": prompt}]
82
- res = self.inference_engine.chat(messages, stream=True)
82
+ res = self.inference_engine.chat(messages, stream=True, **kwrs)
83
83
  return res
84
84
 
85
- def comment(self, draft:str) -> str:
85
+ def comment(self, draft:str, **kwrs) -> str:
86
86
  """
87
87
  This method inputs a prompt draft and comment following the extractor's guideline.
88
88
  """
@@ -94,11 +94,11 @@ class PromptEditor:
94
94
  prompt_template=comment_prompt_template)
95
95
  messages = [{"role": "system", "content": self.system_prompt},
96
96
  {"role": "user", "content": prompt}]
97
- res = self.inference_engine.chat(messages, stream=True)
97
+ res = self.inference_engine.chat(messages, stream=True, **kwrs)
98
98
  return res
99
99
 
100
100
 
101
- def _terminal_chat(self):
101
+ def _terminal_chat(self, **kwrs):
102
102
  """
103
103
  This method runs an interactive chat session in the terminal to help users write prompt templates.
104
104
  """
@@ -126,11 +126,11 @@ class PromptEditor:
126
126
  # Chat
127
127
  messages.append({"role": "user", "content": user_input})
128
128
  print(f"{Fore.BLUE}Assistant: {Style.RESET_ALL}", end="")
129
- response = self.inference_engine.chat(messages, stream=True)
129
+ response = self.inference_engine.chat(messages, stream=True, **kwrs)
130
130
  messages.append({"role": "assistant", "content": response})
131
131
 
132
132
 
133
- def _IPython_chat(self):
133
+ def _IPython_chat(self, **kwrs):
134
134
  """
135
135
  This method runs an interactive chat session in Jupyter/IPython using ipywidgets to help users write prompt templates.
136
136
  """
@@ -186,7 +186,7 @@ class PromptEditor:
186
186
 
187
187
  # Get assistant's response and append it to conversation
188
188
  print("Assistant: ", end="")
189
- response = self.inference_engine.chat(messages, stream=True)
189
+ response = self.inference_engine.chat(messages, stream=True, **kwrs)
190
190
  messages.append({"role": "assistant", "content": response})
191
191
 
192
192
  # Display the assistant's response
@@ -200,11 +200,11 @@ class PromptEditor:
200
200
  display(input_box)
201
201
  display(output_area)
202
202
 
203
- def chat(self):
203
+ def chat(self, **kwrs):
204
204
  """
205
205
  External method that detects the environment and calls the appropriate chat method.
206
206
  """
207
207
  if 'ipykernel' in sys.modules:
208
- self._IPython_chat()
208
+ self._IPython_chat(**kwrs)
209
209
  else:
210
- self._terminal_chat()
210
+ self._terminal_chat(**kwrs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llm-ie
3
- Version: 0.4.1
3
+ Version: 0.4.3
4
4
  Summary: An LLM-powered tool that transforms everyday language into robust information extraction pipelines.
5
5
  License: MIT
6
6
  Author: Enshuo (David) Hsu
@@ -11,6 +11,7 @@ Classifier: Programming Language :: Python :: 3.11
11
11
  Classifier: Programming Language :: Python :: 3.12
12
12
  Requires-Dist: colorama (>=0.4.6,<0.5.0)
13
13
  Requires-Dist: json_repair (>=0.30,<0.31)
14
+ Requires-Dist: nest_asyncio (>=0.1.6,<0.2.0)
14
15
  Requires-Dist: nltk (>=3.8,<4.0)
15
16
  Description-Content-Type: text/markdown
16
17
 
@@ -39,6 +40,7 @@ An LLM-powered tool that transforms everyday language into robust information ex
39
40
  - Concurrent LLM inferencing to speed up frame and relation extraction.
40
41
  - Support for LiteLLM.
41
42
  - [v0.4.1](https://github.com/daviden1013/llm-ie/releases/tag/v0.4.1) (Jan 25, 2025): Added filters, table view, and some new features to visualization tool (make sure to update [ie-viz](https://github.com/daviden1013/ie-viz)).
43
+ - [v0.4.3](https://github.com/daviden1013/llm-ie/releases/tag/v0.4.3) (Feb 7, 2025): Added Azure OpenAI support.
42
44
 
43
45
  ## Table of Contents
44
46
  - [Overview](#overview)
@@ -99,6 +101,20 @@ inference_engine = OpenAIInferenceEngine(model="gpt-4o-mini")
99
101
  ```
100
102
  </details>
101
103
 
104
+ <details>
105
+ <summary><img src=doc_asset/readme_img/Azure_icon.png width=32 /> Azure OpenAI API</summary>
106
+
107
+ Follow the [Azure AI Services Quickstart](https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Ckeyless%2Ctypescript-keyless%2Cpython-new&pivots=programming-language-python) to set up Endpoint and API key.
108
+
109
+ ```python
110
+ from llm_ie.engines import AzureOpenAIInferenceEngine
111
+
112
+ inference_engine = AzureOpenAIInferenceEngine(model="gpt-4o-mini",
113
+ api_version="<your api version>")
114
+ ```
115
+
116
+ </details>
117
+
102
118
  <details>
103
119
  <summary>🤗 Huggingface_hub</summary>
104
120
 
@@ -324,6 +340,22 @@ from llm_ie.engines import OpenAIInferenceEngine
324
340
  inference_engine = OpenAIInferenceEngine(model="gpt-4o-mini")
325
341
  ```
326
342
 
343
+ #### <img src=doc_asset/readme_img/Azure_icon.png width=32 /> Azure OpenAI API
344
+ In bash, save the endpoint name and API key to environmental variables `AZURE_OPENAI_ENDPOINT` and `AZURE_OPENAI_API_KEY`.
345
+ ```
346
+ export AZURE_OPENAI_API_KEY="<your_API_key>"
347
+ export AZURE_OPENAI_ENDPOINT="<your_endpoint>"
348
+ ```
349
+
350
+ In Python, create inference engine and specify model name. For the available models, refer to [OpenAI webpage](https://platform.openai.com/docs/models).
351
+ For more parameters, see [Azure OpenAI reference](https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart).
352
+
353
+ ```python
354
+ from llm_ie.engines import AzureOpenAIInferenceEngine
355
+
356
+ inference_engine = AzureOpenAIInferenceEngine(model="gpt-4o-mini")
357
+ ```
358
+
327
359
  #### 🤗 huggingface_hub
328
360
  The ```model``` can be a model id hosted on the Hugging Face Hub or a URL to a deployed Inference Endpoint. Refer to the [Inference Client](https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client) documentation for more details.
329
361
 
@@ -1,4 +1,4 @@
1
- llm_ie/__init__.py,sha256=zzxVgfX0Nrx3LgDDZl1XsJychb70LfQnYc5WUxL7gbo,932
1
+ llm_ie/__init__.py,sha256=FnpYVn9C7t1q8EJbTup4FtxmGd1m8ZyaCXH6nRqbroY,990
2
2
  llm_ie/asset/PromptEditor_prompts/chat.txt,sha256=Fq62voV0JQ8xBRcxS1Nmdd7DkHs1fGYb-tmNwctZZK0,118
3
3
  llm_ie/asset/PromptEditor_prompts/comment.txt,sha256=C_lxx-dlOlFJ__jkHKosZ8HsNAeV1aowh2B36nIipBY,159
4
4
  llm_ie/asset/PromptEditor_prompts/rewrite.txt,sha256=JAwY9vm1jSmKf2qcLBYUvrSmME2EJH36bALmkwZDWYQ,178
@@ -14,10 +14,10 @@ llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt,sha256=m7iX4Qjsf
14
14
  llm_ie/asset/prompt_guide/SentenceCoTFrameExtractor_prompt_guide.txt,sha256=T4NsO33s3KSJml-klzXAJiYox0kiuxGo-ou2a2Ig2SY,14225
15
15
  llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt,sha256=oKH_QeDgpw771ZdHk3L7DYz2Jvfm7OolUoTiJyMJI30,9541
16
16
  llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt,sha256=oKH_QeDgpw771ZdHk3L7DYz2Jvfm7OolUoTiJyMJI30,9541
17
- llm_ie/data_types.py,sha256=3-FsQPlcUwMQLA9IOM5qaCFtWfiwrS80w-18XKgPN5w,15729
18
- llm_ie/engines.py,sha256=lz2HODoqlndgezdT76diXKN_wgb7mjl6hX3JuCwsH-g,15191
19
- llm_ie/extractors.py,sha256=CpEuSqzlYd3u8Qwiu7Qdd26iII2pci1nNKxGz8sv1ZU,84506
20
- llm_ie/prompt_editor.py,sha256=pw_FOsEeWxFJ1p5lYR93cTNMqKQ-YZHzgBmRbPm7aNE,9486
21
- llm_ie-0.4.1.dist-info/METADATA,sha256=9oa9Li6ailbEqLENTC4F4DyUQApswK0ecpS0NKEhXVM,52527
22
- llm_ie-0.4.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
23
- llm_ie-0.4.1.dist-info/RECORD,,
17
+ llm_ie/data_types.py,sha256=_Kt4Er1SMj1jg8U8TCXFJH_64prur-IbFngHKmZgWr8,15717
18
+ llm_ie/engines.py,sha256=qESKa2f_2VZ-HSCDZ6R2gKFwPBN0j2qv1vcTYpWXJTM,18319
19
+ llm_ie/extractors.py,sha256=ueSt8jBKLnqOxu8FuqyYqEERugzd6FsI0r-pY8EboHw,85426
20
+ llm_ie/prompt_editor.py,sha256=pHRbg_yFZdoV63r3pvf0TsLfgH2EVJvzUQEVDH1Hj0s,9570
21
+ llm_ie-0.4.3.dist-info/METADATA,sha256=ErEWRa3WYFakjwcUmQvHUomp_EiZho7nlB9XQFgGu9A,53980
22
+ llm_ie-0.4.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
23
+ llm_ie-0.4.3.dist-info/RECORD,,
File without changes