llm-ie 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llm_ie/__init__.py CHANGED
@@ -0,0 +1,9 @@
1
+ from .data_types import LLMInformationExtractionFrame, LLMInformationExtractionDocument
2
+ from .engines import LlamaCppInferenceEngine, OllamaInferenceEngine, HuggingFaceHubInferenceEngine, OpenAIInferenceEngine, LiteLLMInferenceEngine
3
+ from .extractors import BasicFrameExtractor, ReviewFrameExtractor, SentenceFrameExtractor, SentenceReviewFrameExtractor, SentenceCoTFrameExtractor, BinaryRelationExtractor, MultiClassRelationExtractor
4
+ from .prompt_editor import PromptEditor
5
+
6
+ __all__ = ["LLMInformationExtractionFrame", "LLMInformationExtractionDocument",
7
+ "LlamaCppInferenceEngine", "OllamaInferenceEngine", "HuggingFaceHubInferenceEngine", "OpenAIInferenceEngine", "LiteLLMInferenceEngine",
8
+ "BasicFrameExtractor", "ReviewFrameExtractor", "SentenceFrameExtractor", "SentenceReviewFrameExtractor", "SentenceCoTFrameExtractor", "BinaryRelationExtractor", "MultiClassRelationExtractor",
9
+ "PromptEditor"]
llm_ie/engines.py CHANGED
@@ -1,5 +1,7 @@
1
1
  import abc
2
- from typing import List, Dict
2
+ import importlib
3
+ from typing import List, Dict, Union
4
+
3
5
 
4
6
  class InferenceEngine:
5
7
  @abc.abstractmethod
@@ -104,6 +106,9 @@ class LlamaCppInferenceEngine(InferenceEngine):
104
106
  return response['choices'][0]['message']['content']
105
107
 
106
108
 
109
+
110
+
111
+
107
112
  class OllamaInferenceEngine(InferenceEngine):
108
113
  def __init__(self, model_name:str, num_ctx:int=4096, keep_alive:int=300, **kwrs):
109
114
  """
@@ -118,8 +123,12 @@ class OllamaInferenceEngine(InferenceEngine):
118
123
  keep_alive : int, Optional
119
124
  seconds to hold the LLM after the last API call.
120
125
  """
121
- import ollama
122
- self.ollama = ollama
126
+ if importlib.util.find_spec("ollama") is None:
127
+ raise ImportError("ollama-python not found. Please install ollama-python (```pip install ollama```).")
128
+
129
+ from ollama import Client, AsyncClient
130
+ self.client = Client(**kwrs)
131
+ self.async_client = AsyncClient(**kwrs)
123
132
  self.model_name = model_name
124
133
  self.num_ctx = num_ctx
125
134
  self.keep_alive = keep_alive
@@ -139,7 +148,7 @@ class OllamaInferenceEngine(InferenceEngine):
139
148
  stream : bool, Optional
140
149
  if True, LLM generated text will be printed in terminal in real-time.
141
150
  """
142
- response = self.ollama.chat(
151
+ response = self.client.chat(
143
152
  model=self.model_name,
144
153
  messages=messages,
145
154
  options={'temperature':temperature, 'num_ctx': self.num_ctx, 'num_predict': max_new_tokens, **kwrs},
@@ -155,16 +164,35 @@ class OllamaInferenceEngine(InferenceEngine):
155
164
  return res
156
165
 
157
166
  return response['message']['content']
167
+
168
+
169
+ async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
170
+ """
171
+ Async version of chat method. Streaming is not supported.
172
+ """
173
+ response = await self.async_client.chat(
174
+ model=self.model_name,
175
+ messages=messages,
176
+ options={'temperature':temperature, 'num_ctx': self.num_ctx, 'num_predict': max_new_tokens, **kwrs},
177
+ stream=False,
178
+ keep_alive=self.keep_alive
179
+ )
180
+
181
+ return response['message']['content']
158
182
 
159
183
 
160
184
  class HuggingFaceHubInferenceEngine(InferenceEngine):
161
- def __init__(self, **kwrs):
185
+ def __init__(self, model:str=None, token:Union[str, bool]=None, base_url:str=None, api_key:str=None, **kwrs):
162
186
  """
163
187
  The Huggingface_hub InferenceClient inference engine.
164
188
  For parameters and documentation, refer to https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client
165
189
  """
166
- from huggingface_hub import InferenceClient
167
- self.client = InferenceClient(**kwrs)
190
+ if importlib.util.find_spec("huggingface_hub") is None:
191
+ raise ImportError("huggingface-hub not found. Please install huggingface-hub (```pip install huggingface-hub```).")
192
+
193
+ from huggingface_hub import InferenceClient, AsyncInferenceClient
194
+ self.client = InferenceClient(model=model, token=token, base_url=base_url, api_key=api_key, **kwrs)
195
+ self.client_async = AsyncInferenceClient(model=model, token=token, base_url=base_url, api_key=api_key, **kwrs)
168
196
 
169
197
  def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
170
198
  """
@@ -197,12 +225,29 @@ class HuggingFaceHubInferenceEngine(InferenceEngine):
197
225
  return res
198
226
 
199
227
  return response.choices[0].message.content
228
+
229
+ async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
230
+ """
231
+ Async version of chat method. Streaming is not supported.
232
+ """
233
+ response = await self.client_async.chat.completions.create(
234
+ messages=messages,
235
+ max_tokens=max_new_tokens,
236
+ temperature=temperature,
237
+ stream=False,
238
+ **kwrs
239
+ )
240
+
241
+ return response.choices[0].message.content
200
242
 
201
243
 
202
244
  class OpenAIInferenceEngine(InferenceEngine):
203
245
  def __init__(self, model:str, **kwrs):
204
246
  """
205
- The OpenAI API inference engine.
247
+ The OpenAI API inference engine. Supports OpenAI models and OpenAI compatible servers:
248
+ - vLLM OpenAI compatible server (https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html)
249
+ - Llama.cpp OpenAI compatible server (https://llama-cpp-python.readthedocs.io/en/latest/server/)
250
+
206
251
  For parameters and documentation, refer to https://platform.openai.com/docs/api-reference/introduction
207
252
 
208
253
  Parameters:
@@ -210,8 +255,12 @@ class OpenAIInferenceEngine(InferenceEngine):
210
255
  model_name : str
211
256
  model name as described in https://platform.openai.com/docs/models
212
257
  """
213
- from openai import OpenAI
258
+ if importlib.util.find_spec("openai") is None:
259
+ raise ImportError("OpenAI Python API library not found. Please install OpanAI (```pip install openai```).")
260
+
261
+ from openai import OpenAI, AsyncOpenAI
214
262
  self.client = OpenAI(**kwrs)
263
+ self.async_client = AsyncOpenAI(**kwrs)
215
264
  self.model = model
216
265
 
217
266
  def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
@@ -246,4 +295,97 @@ class OpenAIInferenceEngine(InferenceEngine):
246
295
  print(chunk.choices[0].delta.content, end="", flush=True)
247
296
  return res
248
297
 
298
+ return response.choices[0].message.content
299
+
300
+
301
+ async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
302
+ """
303
+ Async version of chat method. Streaming is not supported.
304
+ """
305
+ response = await self.async_client.chat.completions.create(
306
+ model=self.model,
307
+ messages=messages,
308
+ max_tokens=max_new_tokens,
309
+ temperature=temperature,
310
+ stream=False,
311
+ **kwrs
312
+ )
313
+
314
+ return response.choices[0].message.content
315
+
316
+ class LiteLLMInferenceEngine(InferenceEngine):
317
+ def __init__(self, model:str=None, base_url:str=None, api_key:str=None):
318
+ """
319
+ The LiteLLM inference engine.
320
+ For parameters and documentation, refer to https://github.com/BerriAI/litellm?tab=readme-ov-file
321
+
322
+ Parameters:
323
+ ----------
324
+ model : str
325
+ the model name
326
+ base_url : str, Optional
327
+ the base url for the LLM server
328
+ api_key : str, Optional
329
+ the API key for the LLM server
330
+ """
331
+ if importlib.util.find_spec("litellm") is None:
332
+ raise ImportError("litellm not found. Please install litellm (```pip install litellm```).")
333
+
334
+ import litellm
335
+ self.litellm = litellm
336
+ self.model = model
337
+ self.base_url = base_url
338
+ self.api_key = api_key
339
+
340
+ def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
341
+ """
342
+ This method inputs chat messages and outputs LLM generated text.
343
+
344
+ Parameters:
345
+ ----------
346
+ messages : List[Dict[str,str]]
347
+ a list of dict with role and content. role must be one of {"system", "user", "assistant"}
348
+ max_new_tokens : str, Optional
349
+ the max number of new tokens LLM can generate.
350
+ temperature : float, Optional
351
+ the temperature for token sampling.
352
+ stream : bool, Optional
353
+ if True, LLM generated text will be printed in terminal in real-time.
354
+ """
355
+ response = self.litellm.completion(
356
+ model=self.model,
357
+ messages=messages,
358
+ max_tokens=max_new_tokens,
359
+ temperature=temperature,
360
+ stream=stream,
361
+ base_url=self.base_url,
362
+ api_key=self.api_key,
363
+ **kwrs
364
+ )
365
+
366
+ if stream:
367
+ res = ''
368
+ for chunk in response:
369
+ if chunk.choices[0].delta.content is not None:
370
+ res += chunk.choices[0].delta.content
371
+ print(chunk.choices[0].delta.content, end="", flush=True)
372
+ return res
373
+
374
+ return response.choices[0].message.content
375
+
376
+ async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
377
+ """
378
+ Async version of chat method. Streaming is not supported.
379
+ """
380
+ response = await self.litellm.acompletion(
381
+ model=self.model,
382
+ messages=messages,
383
+ max_tokens=max_new_tokens,
384
+ temperature=temperature,
385
+ stream=False,
386
+ base_url=self.base_url,
387
+ api_key=self.api_key,
388
+ **kwrs
389
+ )
390
+
249
391
  return response.choices[0].message.content