llm-ie 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_ie/__init__.py +9 -0
- llm_ie/engines.py +151 -9
- llm_ie/extractors.py +545 -151
- llm_ie/prompt_editor.py +17 -2
- {llm_ie-0.3.5.dist-info → llm_ie-0.4.0.dist-info}/METADATA +341 -103
- {llm_ie-0.3.5.dist-info → llm_ie-0.4.0.dist-info}/RECORD +7 -7
- {llm_ie-0.3.5.dist-info → llm_ie-0.4.0.dist-info}/WHEEL +0 -0
llm_ie/__init__.py
CHANGED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from .data_types import LLMInformationExtractionFrame, LLMInformationExtractionDocument
|
|
2
|
+
from .engines import LlamaCppInferenceEngine, OllamaInferenceEngine, HuggingFaceHubInferenceEngine, OpenAIInferenceEngine, LiteLLMInferenceEngine
|
|
3
|
+
from .extractors import BasicFrameExtractor, ReviewFrameExtractor, SentenceFrameExtractor, SentenceReviewFrameExtractor, SentenceCoTFrameExtractor, BinaryRelationExtractor, MultiClassRelationExtractor
|
|
4
|
+
from .prompt_editor import PromptEditor
|
|
5
|
+
|
|
6
|
+
__all__ = ["LLMInformationExtractionFrame", "LLMInformationExtractionDocument",
|
|
7
|
+
"LlamaCppInferenceEngine", "OllamaInferenceEngine", "HuggingFaceHubInferenceEngine", "OpenAIInferenceEngine", "LiteLLMInferenceEngine",
|
|
8
|
+
"BasicFrameExtractor", "ReviewFrameExtractor", "SentenceFrameExtractor", "SentenceReviewFrameExtractor", "SentenceCoTFrameExtractor", "BinaryRelationExtractor", "MultiClassRelationExtractor",
|
|
9
|
+
"PromptEditor"]
|
llm_ie/engines.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import abc
|
|
2
|
-
|
|
2
|
+
import importlib
|
|
3
|
+
from typing import List, Dict, Union
|
|
4
|
+
|
|
3
5
|
|
|
4
6
|
class InferenceEngine:
|
|
5
7
|
@abc.abstractmethod
|
|
@@ -104,6 +106,9 @@ class LlamaCppInferenceEngine(InferenceEngine):
|
|
|
104
106
|
return response['choices'][0]['message']['content']
|
|
105
107
|
|
|
106
108
|
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
|
|
107
112
|
class OllamaInferenceEngine(InferenceEngine):
|
|
108
113
|
def __init__(self, model_name:str, num_ctx:int=4096, keep_alive:int=300, **kwrs):
|
|
109
114
|
"""
|
|
@@ -118,8 +123,12 @@ class OllamaInferenceEngine(InferenceEngine):
|
|
|
118
123
|
keep_alive : int, Optional
|
|
119
124
|
seconds to hold the LLM after the last API call.
|
|
120
125
|
"""
|
|
121
|
-
|
|
122
|
-
|
|
126
|
+
if importlib.util.find_spec("ollama") is None:
|
|
127
|
+
raise ImportError("ollama-python not found. Please install ollama-python (```pip install ollama```).")
|
|
128
|
+
|
|
129
|
+
from ollama import Client, AsyncClient
|
|
130
|
+
self.client = Client(**kwrs)
|
|
131
|
+
self.async_client = AsyncClient(**kwrs)
|
|
123
132
|
self.model_name = model_name
|
|
124
133
|
self.num_ctx = num_ctx
|
|
125
134
|
self.keep_alive = keep_alive
|
|
@@ -139,7 +148,7 @@ class OllamaInferenceEngine(InferenceEngine):
|
|
|
139
148
|
stream : bool, Optional
|
|
140
149
|
if True, LLM generated text will be printed in terminal in real-time.
|
|
141
150
|
"""
|
|
142
|
-
response = self.
|
|
151
|
+
response = self.client.chat(
|
|
143
152
|
model=self.model_name,
|
|
144
153
|
messages=messages,
|
|
145
154
|
options={'temperature':temperature, 'num_ctx': self.num_ctx, 'num_predict': max_new_tokens, **kwrs},
|
|
@@ -155,16 +164,35 @@ class OllamaInferenceEngine(InferenceEngine):
|
|
|
155
164
|
return res
|
|
156
165
|
|
|
157
166
|
return response['message']['content']
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
|
|
170
|
+
"""
|
|
171
|
+
Async version of chat method. Streaming is not supported.
|
|
172
|
+
"""
|
|
173
|
+
response = await self.async_client.chat(
|
|
174
|
+
model=self.model_name,
|
|
175
|
+
messages=messages,
|
|
176
|
+
options={'temperature':temperature, 'num_ctx': self.num_ctx, 'num_predict': max_new_tokens, **kwrs},
|
|
177
|
+
stream=False,
|
|
178
|
+
keep_alive=self.keep_alive
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
return response['message']['content']
|
|
158
182
|
|
|
159
183
|
|
|
160
184
|
class HuggingFaceHubInferenceEngine(InferenceEngine):
|
|
161
|
-
def __init__(self, **kwrs):
|
|
185
|
+
def __init__(self, model:str=None, token:Union[str, bool]=None, base_url:str=None, api_key:str=None, **kwrs):
|
|
162
186
|
"""
|
|
163
187
|
The Huggingface_hub InferenceClient inference engine.
|
|
164
188
|
For parameters and documentation, refer to https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client
|
|
165
189
|
"""
|
|
166
|
-
|
|
167
|
-
|
|
190
|
+
if importlib.util.find_spec("huggingface_hub") is None:
|
|
191
|
+
raise ImportError("huggingface-hub not found. Please install huggingface-hub (```pip install huggingface-hub```).")
|
|
192
|
+
|
|
193
|
+
from huggingface_hub import InferenceClient, AsyncInferenceClient
|
|
194
|
+
self.client = InferenceClient(model=model, token=token, base_url=base_url, api_key=api_key, **kwrs)
|
|
195
|
+
self.client_async = AsyncInferenceClient(model=model, token=token, base_url=base_url, api_key=api_key, **kwrs)
|
|
168
196
|
|
|
169
197
|
def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
|
|
170
198
|
"""
|
|
@@ -197,12 +225,29 @@ class HuggingFaceHubInferenceEngine(InferenceEngine):
|
|
|
197
225
|
return res
|
|
198
226
|
|
|
199
227
|
return response.choices[0].message.content
|
|
228
|
+
|
|
229
|
+
async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
|
|
230
|
+
"""
|
|
231
|
+
Async version of chat method. Streaming is not supported.
|
|
232
|
+
"""
|
|
233
|
+
response = await self.client_async.chat.completions.create(
|
|
234
|
+
messages=messages,
|
|
235
|
+
max_tokens=max_new_tokens,
|
|
236
|
+
temperature=temperature,
|
|
237
|
+
stream=False,
|
|
238
|
+
**kwrs
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
return response.choices[0].message.content
|
|
200
242
|
|
|
201
243
|
|
|
202
244
|
class OpenAIInferenceEngine(InferenceEngine):
|
|
203
245
|
def __init__(self, model:str, **kwrs):
|
|
204
246
|
"""
|
|
205
|
-
The OpenAI API inference engine.
|
|
247
|
+
The OpenAI API inference engine. Supports OpenAI models and OpenAI compatible servers:
|
|
248
|
+
- vLLM OpenAI compatible server (https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html)
|
|
249
|
+
- Llama.cpp OpenAI compatible server (https://llama-cpp-python.readthedocs.io/en/latest/server/)
|
|
250
|
+
|
|
206
251
|
For parameters and documentation, refer to https://platform.openai.com/docs/api-reference/introduction
|
|
207
252
|
|
|
208
253
|
Parameters:
|
|
@@ -210,8 +255,12 @@ class OpenAIInferenceEngine(InferenceEngine):
|
|
|
210
255
|
model_name : str
|
|
211
256
|
model name as described in https://platform.openai.com/docs/models
|
|
212
257
|
"""
|
|
213
|
-
|
|
258
|
+
if importlib.util.find_spec("openai") is None:
|
|
259
|
+
raise ImportError("OpenAI Python API library not found. Please install OpanAI (```pip install openai```).")
|
|
260
|
+
|
|
261
|
+
from openai import OpenAI, AsyncOpenAI
|
|
214
262
|
self.client = OpenAI(**kwrs)
|
|
263
|
+
self.async_client = AsyncOpenAI(**kwrs)
|
|
215
264
|
self.model = model
|
|
216
265
|
|
|
217
266
|
def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
|
|
@@ -246,4 +295,97 @@ class OpenAIInferenceEngine(InferenceEngine):
|
|
|
246
295
|
print(chunk.choices[0].delta.content, end="", flush=True)
|
|
247
296
|
return res
|
|
248
297
|
|
|
298
|
+
return response.choices[0].message.content
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
|
|
302
|
+
"""
|
|
303
|
+
Async version of chat method. Streaming is not supported.
|
|
304
|
+
"""
|
|
305
|
+
response = await self.async_client.chat.completions.create(
|
|
306
|
+
model=self.model,
|
|
307
|
+
messages=messages,
|
|
308
|
+
max_tokens=max_new_tokens,
|
|
309
|
+
temperature=temperature,
|
|
310
|
+
stream=False,
|
|
311
|
+
**kwrs
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
return response.choices[0].message.content
|
|
315
|
+
|
|
316
|
+
class LiteLLMInferenceEngine(InferenceEngine):
|
|
317
|
+
def __init__(self, model:str=None, base_url:str=None, api_key:str=None):
|
|
318
|
+
"""
|
|
319
|
+
The LiteLLM inference engine.
|
|
320
|
+
For parameters and documentation, refer to https://github.com/BerriAI/litellm?tab=readme-ov-file
|
|
321
|
+
|
|
322
|
+
Parameters:
|
|
323
|
+
----------
|
|
324
|
+
model : str
|
|
325
|
+
the model name
|
|
326
|
+
base_url : str, Optional
|
|
327
|
+
the base url for the LLM server
|
|
328
|
+
api_key : str, Optional
|
|
329
|
+
the API key for the LLM server
|
|
330
|
+
"""
|
|
331
|
+
if importlib.util.find_spec("litellm") is None:
|
|
332
|
+
raise ImportError("litellm not found. Please install litellm (```pip install litellm```).")
|
|
333
|
+
|
|
334
|
+
import litellm
|
|
335
|
+
self.litellm = litellm
|
|
336
|
+
self.model = model
|
|
337
|
+
self.base_url = base_url
|
|
338
|
+
self.api_key = api_key
|
|
339
|
+
|
|
340
|
+
def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
|
|
341
|
+
"""
|
|
342
|
+
This method inputs chat messages and outputs LLM generated text.
|
|
343
|
+
|
|
344
|
+
Parameters:
|
|
345
|
+
----------
|
|
346
|
+
messages : List[Dict[str,str]]
|
|
347
|
+
a list of dict with role and content. role must be one of {"system", "user", "assistant"}
|
|
348
|
+
max_new_tokens : str, Optional
|
|
349
|
+
the max number of new tokens LLM can generate.
|
|
350
|
+
temperature : float, Optional
|
|
351
|
+
the temperature for token sampling.
|
|
352
|
+
stream : bool, Optional
|
|
353
|
+
if True, LLM generated text will be printed in terminal in real-time.
|
|
354
|
+
"""
|
|
355
|
+
response = self.litellm.completion(
|
|
356
|
+
model=self.model,
|
|
357
|
+
messages=messages,
|
|
358
|
+
max_tokens=max_new_tokens,
|
|
359
|
+
temperature=temperature,
|
|
360
|
+
stream=stream,
|
|
361
|
+
base_url=self.base_url,
|
|
362
|
+
api_key=self.api_key,
|
|
363
|
+
**kwrs
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
if stream:
|
|
367
|
+
res = ''
|
|
368
|
+
for chunk in response:
|
|
369
|
+
if chunk.choices[0].delta.content is not None:
|
|
370
|
+
res += chunk.choices[0].delta.content
|
|
371
|
+
print(chunk.choices[0].delta.content, end="", flush=True)
|
|
372
|
+
return res
|
|
373
|
+
|
|
374
|
+
return response.choices[0].message.content
|
|
375
|
+
|
|
376
|
+
async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
|
|
377
|
+
"""
|
|
378
|
+
Async version of chat method. Streaming is not supported.
|
|
379
|
+
"""
|
|
380
|
+
response = await self.litellm.acompletion(
|
|
381
|
+
model=self.model,
|
|
382
|
+
messages=messages,
|
|
383
|
+
max_tokens=max_new_tokens,
|
|
384
|
+
temperature=temperature,
|
|
385
|
+
stream=False,
|
|
386
|
+
base_url=self.base_url,
|
|
387
|
+
api_key=self.api_key,
|
|
388
|
+
**kwrs
|
|
389
|
+
)
|
|
390
|
+
|
|
249
391
|
return response.choices[0].message.content
|