mseep-txtai 9.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mseep_txtai-9.1.1.dist-info/METADATA +262 -0
- mseep_txtai-9.1.1.dist-info/RECORD +251 -0
- mseep_txtai-9.1.1.dist-info/WHEEL +5 -0
- mseep_txtai-9.1.1.dist-info/licenses/LICENSE +190 -0
- mseep_txtai-9.1.1.dist-info/top_level.txt +1 -0
- txtai/__init__.py +16 -0
- txtai/agent/__init__.py +12 -0
- txtai/agent/base.py +54 -0
- txtai/agent/factory.py +39 -0
- txtai/agent/model.py +107 -0
- txtai/agent/placeholder.py +16 -0
- txtai/agent/tool/__init__.py +7 -0
- txtai/agent/tool/embeddings.py +69 -0
- txtai/agent/tool/factory.py +130 -0
- txtai/agent/tool/function.py +49 -0
- txtai/ann/__init__.py +7 -0
- txtai/ann/base.py +153 -0
- txtai/ann/dense/__init__.py +11 -0
- txtai/ann/dense/annoy.py +72 -0
- txtai/ann/dense/factory.py +76 -0
- txtai/ann/dense/faiss.py +233 -0
- txtai/ann/dense/hnsw.py +104 -0
- txtai/ann/dense/numpy.py +164 -0
- txtai/ann/dense/pgvector.py +323 -0
- txtai/ann/dense/sqlite.py +303 -0
- txtai/ann/dense/torch.py +38 -0
- txtai/ann/sparse/__init__.py +7 -0
- txtai/ann/sparse/factory.py +61 -0
- txtai/ann/sparse/ivfsparse.py +377 -0
- txtai/ann/sparse/pgsparse.py +56 -0
- txtai/api/__init__.py +18 -0
- txtai/api/application.py +134 -0
- txtai/api/authorization.py +53 -0
- txtai/api/base.py +159 -0
- txtai/api/cluster.py +295 -0
- txtai/api/extension.py +19 -0
- txtai/api/factory.py +40 -0
- txtai/api/responses/__init__.py +7 -0
- txtai/api/responses/factory.py +30 -0
- txtai/api/responses/json.py +56 -0
- txtai/api/responses/messagepack.py +51 -0
- txtai/api/route.py +41 -0
- txtai/api/routers/__init__.py +25 -0
- txtai/api/routers/agent.py +38 -0
- txtai/api/routers/caption.py +42 -0
- txtai/api/routers/embeddings.py +280 -0
- txtai/api/routers/entity.py +42 -0
- txtai/api/routers/extractor.py +28 -0
- txtai/api/routers/labels.py +47 -0
- txtai/api/routers/llm.py +61 -0
- txtai/api/routers/objects.py +42 -0
- txtai/api/routers/openai.py +191 -0
- txtai/api/routers/rag.py +61 -0
- txtai/api/routers/reranker.py +46 -0
- txtai/api/routers/segmentation.py +42 -0
- txtai/api/routers/similarity.py +48 -0
- txtai/api/routers/summary.py +46 -0
- txtai/api/routers/tabular.py +42 -0
- txtai/api/routers/textractor.py +42 -0
- txtai/api/routers/texttospeech.py +33 -0
- txtai/api/routers/transcription.py +42 -0
- txtai/api/routers/translation.py +46 -0
- txtai/api/routers/upload.py +36 -0
- txtai/api/routers/workflow.py +28 -0
- txtai/app/__init__.py +5 -0
- txtai/app/base.py +821 -0
- txtai/archive/__init__.py +9 -0
- txtai/archive/base.py +104 -0
- txtai/archive/compress.py +51 -0
- txtai/archive/factory.py +25 -0
- txtai/archive/tar.py +49 -0
- txtai/archive/zip.py +35 -0
- txtai/cloud/__init__.py +8 -0
- txtai/cloud/base.py +106 -0
- txtai/cloud/factory.py +70 -0
- txtai/cloud/hub.py +101 -0
- txtai/cloud/storage.py +125 -0
- txtai/console/__init__.py +5 -0
- txtai/console/__main__.py +22 -0
- txtai/console/base.py +264 -0
- txtai/data/__init__.py +10 -0
- txtai/data/base.py +138 -0
- txtai/data/labels.py +42 -0
- txtai/data/questions.py +135 -0
- txtai/data/sequences.py +48 -0
- txtai/data/texts.py +68 -0
- txtai/data/tokens.py +28 -0
- txtai/database/__init__.py +14 -0
- txtai/database/base.py +342 -0
- txtai/database/client.py +227 -0
- txtai/database/duckdb.py +150 -0
- txtai/database/embedded.py +76 -0
- txtai/database/encoder/__init__.py +8 -0
- txtai/database/encoder/base.py +37 -0
- txtai/database/encoder/factory.py +56 -0
- txtai/database/encoder/image.py +43 -0
- txtai/database/encoder/serialize.py +28 -0
- txtai/database/factory.py +77 -0
- txtai/database/rdbms.py +569 -0
- txtai/database/schema/__init__.py +6 -0
- txtai/database/schema/orm.py +99 -0
- txtai/database/schema/statement.py +98 -0
- txtai/database/sql/__init__.py +8 -0
- txtai/database/sql/aggregate.py +178 -0
- txtai/database/sql/base.py +189 -0
- txtai/database/sql/expression.py +404 -0
- txtai/database/sql/token.py +342 -0
- txtai/database/sqlite.py +57 -0
- txtai/embeddings/__init__.py +7 -0
- txtai/embeddings/base.py +1107 -0
- txtai/embeddings/index/__init__.py +14 -0
- txtai/embeddings/index/action.py +15 -0
- txtai/embeddings/index/autoid.py +92 -0
- txtai/embeddings/index/configuration.py +71 -0
- txtai/embeddings/index/documents.py +86 -0
- txtai/embeddings/index/functions.py +155 -0
- txtai/embeddings/index/indexes.py +199 -0
- txtai/embeddings/index/indexids.py +60 -0
- txtai/embeddings/index/reducer.py +104 -0
- txtai/embeddings/index/stream.py +67 -0
- txtai/embeddings/index/transform.py +205 -0
- txtai/embeddings/search/__init__.py +11 -0
- txtai/embeddings/search/base.py +344 -0
- txtai/embeddings/search/errors.py +9 -0
- txtai/embeddings/search/explain.py +120 -0
- txtai/embeddings/search/ids.py +61 -0
- txtai/embeddings/search/query.py +69 -0
- txtai/embeddings/search/scan.py +196 -0
- txtai/embeddings/search/terms.py +46 -0
- txtai/graph/__init__.py +10 -0
- txtai/graph/base.py +769 -0
- txtai/graph/factory.py +61 -0
- txtai/graph/networkx.py +275 -0
- txtai/graph/query.py +181 -0
- txtai/graph/rdbms.py +113 -0
- txtai/graph/topics.py +166 -0
- txtai/models/__init__.py +9 -0
- txtai/models/models.py +268 -0
- txtai/models/onnx.py +133 -0
- txtai/models/pooling/__init__.py +9 -0
- txtai/models/pooling/base.py +141 -0
- txtai/models/pooling/cls.py +28 -0
- txtai/models/pooling/factory.py +144 -0
- txtai/models/pooling/late.py +173 -0
- txtai/models/pooling/mean.py +33 -0
- txtai/models/pooling/muvera.py +164 -0
- txtai/models/registry.py +37 -0
- txtai/models/tokendetection.py +122 -0
- txtai/pipeline/__init__.py +17 -0
- txtai/pipeline/audio/__init__.py +11 -0
- txtai/pipeline/audio/audiomixer.py +58 -0
- txtai/pipeline/audio/audiostream.py +94 -0
- txtai/pipeline/audio/microphone.py +244 -0
- txtai/pipeline/audio/signal.py +186 -0
- txtai/pipeline/audio/texttoaudio.py +60 -0
- txtai/pipeline/audio/texttospeech.py +553 -0
- txtai/pipeline/audio/transcription.py +212 -0
- txtai/pipeline/base.py +23 -0
- txtai/pipeline/data/__init__.py +10 -0
- txtai/pipeline/data/filetohtml.py +206 -0
- txtai/pipeline/data/htmltomd.py +414 -0
- txtai/pipeline/data/segmentation.py +178 -0
- txtai/pipeline/data/tabular.py +155 -0
- txtai/pipeline/data/textractor.py +139 -0
- txtai/pipeline/data/tokenizer.py +112 -0
- txtai/pipeline/factory.py +77 -0
- txtai/pipeline/hfmodel.py +111 -0
- txtai/pipeline/hfpipeline.py +96 -0
- txtai/pipeline/image/__init__.py +7 -0
- txtai/pipeline/image/caption.py +55 -0
- txtai/pipeline/image/imagehash.py +90 -0
- txtai/pipeline/image/objects.py +80 -0
- txtai/pipeline/llm/__init__.py +11 -0
- txtai/pipeline/llm/factory.py +86 -0
- txtai/pipeline/llm/generation.py +173 -0
- txtai/pipeline/llm/huggingface.py +218 -0
- txtai/pipeline/llm/litellm.py +90 -0
- txtai/pipeline/llm/llama.py +152 -0
- txtai/pipeline/llm/llm.py +75 -0
- txtai/pipeline/llm/rag.py +477 -0
- txtai/pipeline/nop.py +14 -0
- txtai/pipeline/tensors.py +52 -0
- txtai/pipeline/text/__init__.py +13 -0
- txtai/pipeline/text/crossencoder.py +70 -0
- txtai/pipeline/text/entity.py +140 -0
- txtai/pipeline/text/labels.py +137 -0
- txtai/pipeline/text/lateencoder.py +103 -0
- txtai/pipeline/text/questions.py +48 -0
- txtai/pipeline/text/reranker.py +57 -0
- txtai/pipeline/text/similarity.py +83 -0
- txtai/pipeline/text/summary.py +98 -0
- txtai/pipeline/text/translation.py +298 -0
- txtai/pipeline/train/__init__.py +7 -0
- txtai/pipeline/train/hfonnx.py +196 -0
- txtai/pipeline/train/hftrainer.py +398 -0
- txtai/pipeline/train/mlonnx.py +63 -0
- txtai/scoring/__init__.py +12 -0
- txtai/scoring/base.py +188 -0
- txtai/scoring/bm25.py +29 -0
- txtai/scoring/factory.py +95 -0
- txtai/scoring/pgtext.py +181 -0
- txtai/scoring/sif.py +32 -0
- txtai/scoring/sparse.py +218 -0
- txtai/scoring/terms.py +499 -0
- txtai/scoring/tfidf.py +358 -0
- txtai/serialize/__init__.py +10 -0
- txtai/serialize/base.py +85 -0
- txtai/serialize/errors.py +9 -0
- txtai/serialize/factory.py +29 -0
- txtai/serialize/messagepack.py +42 -0
- txtai/serialize/pickle.py +98 -0
- txtai/serialize/serializer.py +46 -0
- txtai/util/__init__.py +7 -0
- txtai/util/resolver.py +32 -0
- txtai/util/sparsearray.py +62 -0
- txtai/util/template.py +16 -0
- txtai/vectors/__init__.py +8 -0
- txtai/vectors/base.py +476 -0
- txtai/vectors/dense/__init__.py +12 -0
- txtai/vectors/dense/external.py +55 -0
- txtai/vectors/dense/factory.py +121 -0
- txtai/vectors/dense/huggingface.py +44 -0
- txtai/vectors/dense/litellm.py +86 -0
- txtai/vectors/dense/llama.py +84 -0
- txtai/vectors/dense/m2v.py +67 -0
- txtai/vectors/dense/sbert.py +92 -0
- txtai/vectors/dense/words.py +211 -0
- txtai/vectors/recovery.py +57 -0
- txtai/vectors/sparse/__init__.py +7 -0
- txtai/vectors/sparse/base.py +90 -0
- txtai/vectors/sparse/factory.py +55 -0
- txtai/vectors/sparse/sbert.py +34 -0
- txtai/version.py +6 -0
- txtai/workflow/__init__.py +8 -0
- txtai/workflow/base.py +184 -0
- txtai/workflow/execute.py +99 -0
- txtai/workflow/factory.py +42 -0
- txtai/workflow/task/__init__.py +18 -0
- txtai/workflow/task/base.py +490 -0
- txtai/workflow/task/console.py +24 -0
- txtai/workflow/task/export.py +64 -0
- txtai/workflow/task/factory.py +89 -0
- txtai/workflow/task/file.py +28 -0
- txtai/workflow/task/image.py +36 -0
- txtai/workflow/task/retrieve.py +61 -0
- txtai/workflow/task/service.py +102 -0
- txtai/workflow/task/storage.py +110 -0
- txtai/workflow/task/stream.py +33 -0
- txtai/workflow/task/template.py +116 -0
- txtai/workflow/task/url.py +20 -0
- txtai/workflow/task/workflow.py +14 -0
@@ -0,0 +1,218 @@
|
|
1
|
+
"""
|
2
|
+
Hugging Face module
|
3
|
+
"""
|
4
|
+
|
5
|
+
from threading import Thread
|
6
|
+
|
7
|
+
from transformers import AutoModelForImageTextToText, TextIteratorStreamer
|
8
|
+
|
9
|
+
from ...models import Models
|
10
|
+
|
11
|
+
from ..hfpipeline import HFPipeline
|
12
|
+
|
13
|
+
from .generation import Generation
|
14
|
+
|
15
|
+
|
16
|
+
class HFGeneration(Generation):
|
17
|
+
"""
|
18
|
+
Hugging Face Transformers generative model.
|
19
|
+
"""
|
20
|
+
|
21
|
+
def __init__(self, path, template=None, **kwargs):
|
22
|
+
# Call parent constructor
|
23
|
+
super().__init__(path, template, **kwargs)
|
24
|
+
|
25
|
+
# Create HuggingFace LLM pipeline
|
26
|
+
self.llm = HFLLM(path, **kwargs)
|
27
|
+
|
28
|
+
def isvision(self):
|
29
|
+
return isinstance(self.llm.pipeline.model, AutoModelForImageTextToText)
|
30
|
+
|
31
|
+
def stream(self, texts, maxlength, stream, stop, **kwargs):
|
32
|
+
yield from self.llm(texts, maxlength=maxlength, stream=stream, stop=stop, **kwargs)
|
33
|
+
|
34
|
+
|
35
|
+
class HFLLM(HFPipeline):
|
36
|
+
"""
|
37
|
+
Hugging Face Transformers large language model (LLM) pipeline. This pipeline autodetects if the model path
|
38
|
+
is a text generation or sequence to sequence model.
|
39
|
+
"""
|
40
|
+
|
41
|
+
def __init__(self, path=None, quantize=False, gpu=True, model=None, task=None, **kwargs):
|
42
|
+
super().__init__(self.task(path, task, **kwargs), path, quantize, gpu, model, **kwargs)
|
43
|
+
|
44
|
+
# Load tokenizer, if necessary
|
45
|
+
self.pipeline.tokenizer = self.pipeline.tokenizer if self.pipeline.tokenizer else Models.tokenizer(path, **kwargs)
|
46
|
+
|
47
|
+
def __call__(self, text, prefix=None, maxlength=512, workers=0, stream=False, stop=None, **kwargs):
|
48
|
+
"""
|
49
|
+
Generates text. Supports the following input formats:
|
50
|
+
|
51
|
+
- String or list of strings (instruction-tuned models must follow chat templates)
|
52
|
+
- List of dictionaries with `role` and `content` key-values or lists of lists
|
53
|
+
|
54
|
+
Args:
|
55
|
+
text: text|list
|
56
|
+
prefix: optional prefix to prepend to text elements
|
57
|
+
maxlength: maximum sequence length
|
58
|
+
workers: number of concurrent workers to use for processing data, defaults to None
|
59
|
+
stream: stream response if True, defaults to False
|
60
|
+
stop: list of stop strings
|
61
|
+
kwargs: additional generation keyword arguments
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
generated text
|
65
|
+
"""
|
66
|
+
|
67
|
+
# List of texts
|
68
|
+
texts = text if isinstance(text, list) else [text]
|
69
|
+
|
70
|
+
# Add prefix, if necessary
|
71
|
+
if prefix:
|
72
|
+
texts = [f"{prefix}{x}" for x in texts]
|
73
|
+
|
74
|
+
# Combine all keyword arguments
|
75
|
+
args, kwargs = self.parameters(texts, maxlength, workers, stop, **kwargs)
|
76
|
+
|
77
|
+
# Stream response
|
78
|
+
if stream:
|
79
|
+
return StreamingResponse(self.pipeline, texts, stop, **kwargs)()
|
80
|
+
|
81
|
+
# Run pipeline and extract generated text
|
82
|
+
results = [self.extract(result) for result in self.pipeline(*args, **kwargs)]
|
83
|
+
|
84
|
+
return results[0] if isinstance(text, str) else results
|
85
|
+
|
86
|
+
def parameters(self, texts, maxlength, workers, stop, **kwargs):
|
87
|
+
"""
|
88
|
+
Builds a list of arguments and a combined parameter dictionary to use as keyword arguments.
|
89
|
+
|
90
|
+
Args:
|
91
|
+
texts: input texts
|
92
|
+
maxlength: maximum sequence length
|
93
|
+
workers: number of concurrent workers to use for processing data, defaults to None
|
94
|
+
stop: list of stop strings
|
95
|
+
kwargs: additional generation keyword arguments
|
96
|
+
|
97
|
+
Returns:
|
98
|
+
args, kwargs
|
99
|
+
"""
|
100
|
+
|
101
|
+
# Set defaults and get underlying model
|
102
|
+
defaults, model = {"max_length": maxlength, "max_new_tokens": None, "num_workers": workers}, self.pipeline.model
|
103
|
+
|
104
|
+
# Set parameters for vision models and return
|
105
|
+
if self.pipeline.task == "image-text-to-text":
|
106
|
+
# Maxlength has to be large enough to accomodate images
|
107
|
+
defaults["max_length"] = max(maxlength, 2048)
|
108
|
+
|
109
|
+
# Set default token id
|
110
|
+
tokenid = model.generation_config.pad_token_id
|
111
|
+
model.generation_config.pad_token_id = tokenid if tokenid else model.generation_config.eos_token_id
|
112
|
+
|
113
|
+
# Vision models take all arguments as keyword arguments
|
114
|
+
return [], {**{"text": texts, "truncation": True}, **defaults, **kwargs}
|
115
|
+
|
116
|
+
# Add pad token if it's missing from model config
|
117
|
+
if not model.config.pad_token_id:
|
118
|
+
tokenid = model.config.eos_token_id
|
119
|
+
tokenid = tokenid[0] if isinstance(tokenid, list) else tokenid
|
120
|
+
|
121
|
+
# Set pad_token_id parameter
|
122
|
+
defaults["pad_token_id"] = tokenid
|
123
|
+
|
124
|
+
# Update tokenizer for batching
|
125
|
+
if "batch_size" in kwargs and self.pipeline.tokenizer.pad_token_id is None:
|
126
|
+
self.pipeline.tokenizer.pad_token_id = tokenid
|
127
|
+
self.pipeline.tokenizer.padding_side = "left"
|
128
|
+
|
129
|
+
# Set tokenizer when stop strings is set
|
130
|
+
if stop:
|
131
|
+
defaults["tokenizer"] = self.pipeline.tokenizer
|
132
|
+
|
133
|
+
return [texts], {**defaults, **kwargs}
|
134
|
+
|
135
|
+
def extract(self, result):
|
136
|
+
"""
|
137
|
+
Extracts generated text from a pipeline result.
|
138
|
+
|
139
|
+
Args:
|
140
|
+
result: pipeline result
|
141
|
+
|
142
|
+
Returns:
|
143
|
+
generated text
|
144
|
+
"""
|
145
|
+
|
146
|
+
# Extract output from list, if necessary
|
147
|
+
result = result[0] if isinstance(result, list) else result
|
148
|
+
text = result["generated_text"]
|
149
|
+
return text[-1]["content"] if isinstance(text, list) else text
|
150
|
+
|
151
|
+
def task(self, path, task, **kwargs):
|
152
|
+
"""
|
153
|
+
Get the pipeline task name.
|
154
|
+
|
155
|
+
Args:
|
156
|
+
path: model path input
|
157
|
+
task: task name
|
158
|
+
kwargs: optional additional keyword arguments
|
159
|
+
|
160
|
+
Returns:
|
161
|
+
pipeline task name
|
162
|
+
"""
|
163
|
+
|
164
|
+
# Mapping from txtai to Hugging Face pipeline tasks
|
165
|
+
mapping = {"language-generation": "text-generation", "sequence-sequence": "text2text-generation", "vision": "image-text-to-text"}
|
166
|
+
|
167
|
+
# Attempt to resolve task
|
168
|
+
if path and not task:
|
169
|
+
task = Models.task(path, **kwargs)
|
170
|
+
|
171
|
+
# Map to Hugging Face task. Default to text2text-generation pipeline when task not resolved.
|
172
|
+
return mapping.get(task, "text2text-generation")
|
173
|
+
|
174
|
+
|
175
|
+
class Generator(HFLLM):
|
176
|
+
"""
|
177
|
+
Generate text with a causal language model.
|
178
|
+
"""
|
179
|
+
|
180
|
+
def __init__(self, path=None, quantize=False, gpu=True, model=None, **kwargs):
|
181
|
+
super().__init__(path, quantize, gpu, model, "language-generation", **kwargs)
|
182
|
+
|
183
|
+
|
184
|
+
class Sequences(HFLLM):
|
185
|
+
"""
|
186
|
+
Generate text with a sequence-sequence model.
|
187
|
+
"""
|
188
|
+
|
189
|
+
def __init__(self, path=None, quantize=False, gpu=True, model=None, **kwargs):
|
190
|
+
super().__init__(path, quantize, gpu, model, "sequence-sequence", **kwargs)
|
191
|
+
|
192
|
+
|
193
|
+
class StreamingResponse:
|
194
|
+
"""
|
195
|
+
Generate text as a streaming response.
|
196
|
+
"""
|
197
|
+
|
198
|
+
def __init__(self, pipeline, texts, stop, **kwargs):
|
199
|
+
# Create streamer
|
200
|
+
self.stream = TextIteratorStreamer(pipeline.tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=5)
|
201
|
+
kwargs["streamer"] = self.stream
|
202
|
+
kwargs["stop_strings"] = stop
|
203
|
+
|
204
|
+
# Create thread
|
205
|
+
self.thread = Thread(target=pipeline, args=[texts], kwargs=kwargs)
|
206
|
+
|
207
|
+
# Store number of inputs
|
208
|
+
self.length = len(texts)
|
209
|
+
|
210
|
+
def __call__(self):
|
211
|
+
# Start the process
|
212
|
+
self.thread.start()
|
213
|
+
|
214
|
+
return self
|
215
|
+
|
216
|
+
def __iter__(self):
|
217
|
+
for _ in range(self.length):
|
218
|
+
yield from self.stream
|
@@ -0,0 +1,90 @@
|
|
1
|
+
"""
|
2
|
+
LiteLLM module
|
3
|
+
"""
|
4
|
+
|
5
|
+
from transformers.utils import cached_file
|
6
|
+
|
7
|
+
# Conditional import
|
8
|
+
try:
|
9
|
+
import litellm as api
|
10
|
+
|
11
|
+
LITELLM = True
|
12
|
+
except ImportError:
|
13
|
+
LITELLM = False
|
14
|
+
|
15
|
+
from .generation import Generation
|
16
|
+
|
17
|
+
|
18
|
+
class LiteLLM(Generation):
|
19
|
+
"""
|
20
|
+
LiteLLM generative model.
|
21
|
+
"""
|
22
|
+
|
23
|
+
@staticmethod
|
24
|
+
def ismodel(path):
|
25
|
+
"""
|
26
|
+
Checks if path is a LiteLLM model.
|
27
|
+
|
28
|
+
Args:
|
29
|
+
path: input path
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
True if this is a LiteLLM model, False otherwise
|
33
|
+
"""
|
34
|
+
|
35
|
+
# pylint: disable=W0702
|
36
|
+
if isinstance(path, str) and LITELLM:
|
37
|
+
debug = api.suppress_debug_info
|
38
|
+
try:
|
39
|
+
# Suppress debug messages for this test
|
40
|
+
api.suppress_debug_info = True
|
41
|
+
return api.get_llm_provider(path) and not LiteLLM.ishub(path)
|
42
|
+
except:
|
43
|
+
return False
|
44
|
+
finally:
|
45
|
+
# Restore debug info value to original value
|
46
|
+
api.suppress_debug_info = debug
|
47
|
+
|
48
|
+
return False
|
49
|
+
|
50
|
+
@staticmethod
|
51
|
+
def ishub(path):
|
52
|
+
"""
|
53
|
+
Checks if path is available on the HF Hub.
|
54
|
+
|
55
|
+
Args:
|
56
|
+
input path
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
True if this is a model on the HF Hub
|
60
|
+
"""
|
61
|
+
|
62
|
+
# pylint: disable=W0702
|
63
|
+
try:
|
64
|
+
return cached_file(path_or_repo_id=path, filename="config.json") is not None if "/" in path else False
|
65
|
+
except:
|
66
|
+
return False
|
67
|
+
|
68
|
+
def __init__(self, path, template=None, **kwargs):
|
69
|
+
super().__init__(path, template, **kwargs)
|
70
|
+
|
71
|
+
if not LITELLM:
|
72
|
+
raise ImportError('LiteLLM is not available - install "pipeline" extra to enable')
|
73
|
+
|
74
|
+
# Ignore common pipeline parameters
|
75
|
+
self.kwargs = {k: v for k, v in self.kwargs.items() if k not in ["quantize", "gpu", "model", "task"]}
|
76
|
+
|
77
|
+
def stream(self, texts, maxlength, stream, stop, **kwargs):
|
78
|
+
for text in texts:
|
79
|
+
# LLM API call
|
80
|
+
result = api.completion(
|
81
|
+
model=self.path,
|
82
|
+
messages=[{"content": text, "role": "prompt"}] if isinstance(text, str) else text,
|
83
|
+
max_tokens=maxlength,
|
84
|
+
stream=stream,
|
85
|
+
stop=stop,
|
86
|
+
**{**self.kwargs, **kwargs}
|
87
|
+
)
|
88
|
+
|
89
|
+
# Stream response
|
90
|
+
yield from self.response(result if stream else [result])
|
@@ -0,0 +1,152 @@
|
|
1
|
+
"""
|
2
|
+
Llama module
|
3
|
+
"""
|
4
|
+
|
5
|
+
import os
|
6
|
+
|
7
|
+
from huggingface_hub import hf_hub_download
|
8
|
+
|
9
|
+
# Conditional import
|
10
|
+
try:
|
11
|
+
import llama_cpp as llama
|
12
|
+
|
13
|
+
LLAMA_CPP = True
|
14
|
+
except ImportError:
|
15
|
+
LLAMA_CPP = False
|
16
|
+
|
17
|
+
from .generation import Generation
|
18
|
+
|
19
|
+
|
20
|
+
class LlamaCpp(Generation):
|
21
|
+
"""
|
22
|
+
llama.cpp generative model.
|
23
|
+
"""
|
24
|
+
|
25
|
+
@staticmethod
|
26
|
+
def ismodel(path):
|
27
|
+
"""
|
28
|
+
Checks if path is a llama.cpp model.
|
29
|
+
|
30
|
+
Args:
|
31
|
+
path: input path
|
32
|
+
|
33
|
+
Returns:
|
34
|
+
True if this is a llama.cpp model, False otherwise
|
35
|
+
"""
|
36
|
+
|
37
|
+
return isinstance(path, str) and path.lower().endswith(".gguf")
|
38
|
+
|
39
|
+
def __init__(self, path, template=None, **kwargs):
|
40
|
+
super().__init__(path, template, **kwargs)
|
41
|
+
|
42
|
+
if not LLAMA_CPP:
|
43
|
+
raise ImportError('llama.cpp is not available - install "pipeline" extra to enable')
|
44
|
+
|
45
|
+
# Check if this is a local path, otherwise download from the HF Hub
|
46
|
+
path = path if os.path.exists(path) else self.download(path)
|
47
|
+
|
48
|
+
# Create llama.cpp instance
|
49
|
+
self.llm = self.create(path, **kwargs)
|
50
|
+
|
51
|
+
def stream(self, texts, maxlength, stream, stop, **kwargs):
|
52
|
+
for text in texts:
|
53
|
+
yield from (
|
54
|
+
self.messages(text, maxlength, stream, stop, **kwargs)
|
55
|
+
if isinstance(text, list)
|
56
|
+
else self.prompt(text, maxlength, stream, stop, **kwargs)
|
57
|
+
)
|
58
|
+
|
59
|
+
def download(self, path):
|
60
|
+
"""
|
61
|
+
Downloads path from the Hugging Face Hub.
|
62
|
+
|
63
|
+
Args:
|
64
|
+
path: full model path
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
local cached model path
|
68
|
+
"""
|
69
|
+
|
70
|
+
# Split into parts
|
71
|
+
parts = path.split("/")
|
72
|
+
|
73
|
+
# Calculate repo id split
|
74
|
+
repo = 2 if len(parts) > 2 else 1
|
75
|
+
|
76
|
+
# Download and cache file
|
77
|
+
return hf_hub_download(repo_id="/".join(parts[:repo]), filename="/".join(parts[repo:]))
|
78
|
+
|
79
|
+
def create(self, path, **kwargs):
|
80
|
+
"""
|
81
|
+
Creates a new llama.cpp model instance.
|
82
|
+
|
83
|
+
Args:
|
84
|
+
path: path to model
|
85
|
+
kwargs: additional keyword args
|
86
|
+
|
87
|
+
Returns:
|
88
|
+
llama.cpp instance
|
89
|
+
"""
|
90
|
+
|
91
|
+
# Default n_ctx=0 if not already set. This sets n_ctx = n_ctx_train.
|
92
|
+
kwargs["n_ctx"] = kwargs.get("n_ctx", 0)
|
93
|
+
|
94
|
+
# Default GPU layers if not already set
|
95
|
+
kwargs["n_gpu_layers"] = kwargs.get("n_gpu_layers", -1 if kwargs.get("gpu", os.environ.get("LLAMA_NO_METAL") != "1") else 0)
|
96
|
+
|
97
|
+
# Default verbose flag
|
98
|
+
kwargs["verbose"] = kwargs.get("verbose", False)
|
99
|
+
|
100
|
+
# Create llama.cpp instance
|
101
|
+
try:
|
102
|
+
return llama.Llama(model_path=path, **kwargs)
|
103
|
+
except ValueError as e:
|
104
|
+
# Fallback to default n_ctx when not enough memory for n_ctx = n_ctx_train
|
105
|
+
if not kwargs["n_ctx"]:
|
106
|
+
kwargs.pop("n_ctx")
|
107
|
+
return llama.Llama(model_path=path, **kwargs)
|
108
|
+
|
109
|
+
# Raise exception if n_ctx manually specified
|
110
|
+
raise e
|
111
|
+
|
112
|
+
def messages(self, messages, maxlength, stream, stop, **kwargs):
|
113
|
+
"""
|
114
|
+
Processes a list of messages.
|
115
|
+
|
116
|
+
Args:
|
117
|
+
messages: list of dictionaries with `role` and `content` key-values
|
118
|
+
maxlength: maximum sequence length
|
119
|
+
stream: stream response if True, defaults to False
|
120
|
+
stop: list of stop strings
|
121
|
+
kwargs: additional generation keyword arguments
|
122
|
+
|
123
|
+
Returns:
|
124
|
+
generated text
|
125
|
+
"""
|
126
|
+
|
127
|
+
# LLM call with messages
|
128
|
+
result = self.llm.create_chat_completion(messages=messages, max_tokens=maxlength, stream=stream, stop=stop, **kwargs)
|
129
|
+
|
130
|
+
# Stream response
|
131
|
+
yield from self.response(result if stream else [result])
|
132
|
+
|
133
|
+
def prompt(self, text, maxlength, stream, stop, **kwargs):
|
134
|
+
"""
|
135
|
+
Processes a prompt.
|
136
|
+
|
137
|
+
Args:
|
138
|
+
prompt: prompt text
|
139
|
+
maxlength: maximum sequence length
|
140
|
+
stream: stream response if True, defaults to False
|
141
|
+
stop: list of stop strings
|
142
|
+
kwargs: additional generation keyword arguments
|
143
|
+
|
144
|
+
Returns:
|
145
|
+
generated text
|
146
|
+
"""
|
147
|
+
|
148
|
+
# LLM call with prompt
|
149
|
+
result = self.llm(text, max_tokens=maxlength, stream=stream, stop=stop, **kwargs)
|
150
|
+
|
151
|
+
# Stream response
|
152
|
+
yield from self.response(result if stream else [result])
|
@@ -0,0 +1,75 @@
|
|
1
|
+
"""
|
2
|
+
LLM module
|
3
|
+
"""
|
4
|
+
|
5
|
+
import logging
|
6
|
+
|
7
|
+
from .factory import GenerationFactory
|
8
|
+
|
9
|
+
from ..base import Pipeline
|
10
|
+
|
11
|
+
# Logging configuration
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
class LLM(Pipeline):
|
16
|
+
"""
|
17
|
+
Pipeline for running large language models (LLMs). This class supports the following LLM backends:
|
18
|
+
|
19
|
+
- Local LLMs with Hugging Face Transformers
|
20
|
+
- Local LLMs with llama.cpp
|
21
|
+
- Remote API LLMs with LiteLLM
|
22
|
+
- Custom generation implementations
|
23
|
+
"""
|
24
|
+
|
25
|
+
def __init__(self, path=None, method=None, **kwargs):
|
26
|
+
"""
|
27
|
+
Creates a new LLM.
|
28
|
+
|
29
|
+
Args:
|
30
|
+
path: model path
|
31
|
+
method: llm model framework, infers from path if not provided
|
32
|
+
kwargs: model keyword arguments
|
33
|
+
"""
|
34
|
+
|
35
|
+
# Default LLM if not provided
|
36
|
+
path = path if path else "google/flan-t5-base"
|
37
|
+
|
38
|
+
# Generation instance
|
39
|
+
self.generator = GenerationFactory.create(path, method, **kwargs)
|
40
|
+
|
41
|
+
def __call__(self, text, maxlength=512, stream=False, stop=None, defaultrole="prompt", stripthink=False, **kwargs):
|
42
|
+
"""
|
43
|
+
Generates text. Supports the following input formats:
|
44
|
+
|
45
|
+
- String or list of strings (instruction-tuned models must follow chat templates)
|
46
|
+
- List of dictionaries with `role` and `content` key-values or lists of lists
|
47
|
+
|
48
|
+
Args:
|
49
|
+
text: text|list
|
50
|
+
maxlength: maximum sequence length
|
51
|
+
stream: stream response if True, defaults to False
|
52
|
+
stop: list of stop strings, defaults to None
|
53
|
+
defaultrole: default role to apply to text inputs (prompt for raw prompts (default) or user for user chat messages)
|
54
|
+
stripthink: strip thinking tags, defaults to False
|
55
|
+
kwargs: additional generation keyword arguments
|
56
|
+
|
57
|
+
Returns:
|
58
|
+
generated text
|
59
|
+
"""
|
60
|
+
|
61
|
+
# Debug logging
|
62
|
+
logger.debug(text)
|
63
|
+
|
64
|
+
# Run LLM generation
|
65
|
+
return self.generator(text, maxlength, stream, stop, defaultrole, stripthink, **kwargs)
|
66
|
+
|
67
|
+
def isvision(self):
|
68
|
+
"""
|
69
|
+
Returns True if this LLM supports vision operations.
|
70
|
+
|
71
|
+
Returns:
|
72
|
+
True if this is a vision model
|
73
|
+
"""
|
74
|
+
|
75
|
+
return self.generator.isvision()
|