mseep-txtai 9.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. mseep_txtai-9.1.1.dist-info/METADATA +262 -0
  2. mseep_txtai-9.1.1.dist-info/RECORD +251 -0
  3. mseep_txtai-9.1.1.dist-info/WHEEL +5 -0
  4. mseep_txtai-9.1.1.dist-info/licenses/LICENSE +190 -0
  5. mseep_txtai-9.1.1.dist-info/top_level.txt +1 -0
  6. txtai/__init__.py +16 -0
  7. txtai/agent/__init__.py +12 -0
  8. txtai/agent/base.py +54 -0
  9. txtai/agent/factory.py +39 -0
  10. txtai/agent/model.py +107 -0
  11. txtai/agent/placeholder.py +16 -0
  12. txtai/agent/tool/__init__.py +7 -0
  13. txtai/agent/tool/embeddings.py +69 -0
  14. txtai/agent/tool/factory.py +130 -0
  15. txtai/agent/tool/function.py +49 -0
  16. txtai/ann/__init__.py +7 -0
  17. txtai/ann/base.py +153 -0
  18. txtai/ann/dense/__init__.py +11 -0
  19. txtai/ann/dense/annoy.py +72 -0
  20. txtai/ann/dense/factory.py +76 -0
  21. txtai/ann/dense/faiss.py +233 -0
  22. txtai/ann/dense/hnsw.py +104 -0
  23. txtai/ann/dense/numpy.py +164 -0
  24. txtai/ann/dense/pgvector.py +323 -0
  25. txtai/ann/dense/sqlite.py +303 -0
  26. txtai/ann/dense/torch.py +38 -0
  27. txtai/ann/sparse/__init__.py +7 -0
  28. txtai/ann/sparse/factory.py +61 -0
  29. txtai/ann/sparse/ivfsparse.py +377 -0
  30. txtai/ann/sparse/pgsparse.py +56 -0
  31. txtai/api/__init__.py +18 -0
  32. txtai/api/application.py +134 -0
  33. txtai/api/authorization.py +53 -0
  34. txtai/api/base.py +159 -0
  35. txtai/api/cluster.py +295 -0
  36. txtai/api/extension.py +19 -0
  37. txtai/api/factory.py +40 -0
  38. txtai/api/responses/__init__.py +7 -0
  39. txtai/api/responses/factory.py +30 -0
  40. txtai/api/responses/json.py +56 -0
  41. txtai/api/responses/messagepack.py +51 -0
  42. txtai/api/route.py +41 -0
  43. txtai/api/routers/__init__.py +25 -0
  44. txtai/api/routers/agent.py +38 -0
  45. txtai/api/routers/caption.py +42 -0
  46. txtai/api/routers/embeddings.py +280 -0
  47. txtai/api/routers/entity.py +42 -0
  48. txtai/api/routers/extractor.py +28 -0
  49. txtai/api/routers/labels.py +47 -0
  50. txtai/api/routers/llm.py +61 -0
  51. txtai/api/routers/objects.py +42 -0
  52. txtai/api/routers/openai.py +191 -0
  53. txtai/api/routers/rag.py +61 -0
  54. txtai/api/routers/reranker.py +46 -0
  55. txtai/api/routers/segmentation.py +42 -0
  56. txtai/api/routers/similarity.py +48 -0
  57. txtai/api/routers/summary.py +46 -0
  58. txtai/api/routers/tabular.py +42 -0
  59. txtai/api/routers/textractor.py +42 -0
  60. txtai/api/routers/texttospeech.py +33 -0
  61. txtai/api/routers/transcription.py +42 -0
  62. txtai/api/routers/translation.py +46 -0
  63. txtai/api/routers/upload.py +36 -0
  64. txtai/api/routers/workflow.py +28 -0
  65. txtai/app/__init__.py +5 -0
  66. txtai/app/base.py +821 -0
  67. txtai/archive/__init__.py +9 -0
  68. txtai/archive/base.py +104 -0
  69. txtai/archive/compress.py +51 -0
  70. txtai/archive/factory.py +25 -0
  71. txtai/archive/tar.py +49 -0
  72. txtai/archive/zip.py +35 -0
  73. txtai/cloud/__init__.py +8 -0
  74. txtai/cloud/base.py +106 -0
  75. txtai/cloud/factory.py +70 -0
  76. txtai/cloud/hub.py +101 -0
  77. txtai/cloud/storage.py +125 -0
  78. txtai/console/__init__.py +5 -0
  79. txtai/console/__main__.py +22 -0
  80. txtai/console/base.py +264 -0
  81. txtai/data/__init__.py +10 -0
  82. txtai/data/base.py +138 -0
  83. txtai/data/labels.py +42 -0
  84. txtai/data/questions.py +135 -0
  85. txtai/data/sequences.py +48 -0
  86. txtai/data/texts.py +68 -0
  87. txtai/data/tokens.py +28 -0
  88. txtai/database/__init__.py +14 -0
  89. txtai/database/base.py +342 -0
  90. txtai/database/client.py +227 -0
  91. txtai/database/duckdb.py +150 -0
  92. txtai/database/embedded.py +76 -0
  93. txtai/database/encoder/__init__.py +8 -0
  94. txtai/database/encoder/base.py +37 -0
  95. txtai/database/encoder/factory.py +56 -0
  96. txtai/database/encoder/image.py +43 -0
  97. txtai/database/encoder/serialize.py +28 -0
  98. txtai/database/factory.py +77 -0
  99. txtai/database/rdbms.py +569 -0
  100. txtai/database/schema/__init__.py +6 -0
  101. txtai/database/schema/orm.py +99 -0
  102. txtai/database/schema/statement.py +98 -0
  103. txtai/database/sql/__init__.py +8 -0
  104. txtai/database/sql/aggregate.py +178 -0
  105. txtai/database/sql/base.py +189 -0
  106. txtai/database/sql/expression.py +404 -0
  107. txtai/database/sql/token.py +342 -0
  108. txtai/database/sqlite.py +57 -0
  109. txtai/embeddings/__init__.py +7 -0
  110. txtai/embeddings/base.py +1107 -0
  111. txtai/embeddings/index/__init__.py +14 -0
  112. txtai/embeddings/index/action.py +15 -0
  113. txtai/embeddings/index/autoid.py +92 -0
  114. txtai/embeddings/index/configuration.py +71 -0
  115. txtai/embeddings/index/documents.py +86 -0
  116. txtai/embeddings/index/functions.py +155 -0
  117. txtai/embeddings/index/indexes.py +199 -0
  118. txtai/embeddings/index/indexids.py +60 -0
  119. txtai/embeddings/index/reducer.py +104 -0
  120. txtai/embeddings/index/stream.py +67 -0
  121. txtai/embeddings/index/transform.py +205 -0
  122. txtai/embeddings/search/__init__.py +11 -0
  123. txtai/embeddings/search/base.py +344 -0
  124. txtai/embeddings/search/errors.py +9 -0
  125. txtai/embeddings/search/explain.py +120 -0
  126. txtai/embeddings/search/ids.py +61 -0
  127. txtai/embeddings/search/query.py +69 -0
  128. txtai/embeddings/search/scan.py +196 -0
  129. txtai/embeddings/search/terms.py +46 -0
  130. txtai/graph/__init__.py +10 -0
  131. txtai/graph/base.py +769 -0
  132. txtai/graph/factory.py +61 -0
  133. txtai/graph/networkx.py +275 -0
  134. txtai/graph/query.py +181 -0
  135. txtai/graph/rdbms.py +113 -0
  136. txtai/graph/topics.py +166 -0
  137. txtai/models/__init__.py +9 -0
  138. txtai/models/models.py +268 -0
  139. txtai/models/onnx.py +133 -0
  140. txtai/models/pooling/__init__.py +9 -0
  141. txtai/models/pooling/base.py +141 -0
  142. txtai/models/pooling/cls.py +28 -0
  143. txtai/models/pooling/factory.py +144 -0
  144. txtai/models/pooling/late.py +173 -0
  145. txtai/models/pooling/mean.py +33 -0
  146. txtai/models/pooling/muvera.py +164 -0
  147. txtai/models/registry.py +37 -0
  148. txtai/models/tokendetection.py +122 -0
  149. txtai/pipeline/__init__.py +17 -0
  150. txtai/pipeline/audio/__init__.py +11 -0
  151. txtai/pipeline/audio/audiomixer.py +58 -0
  152. txtai/pipeline/audio/audiostream.py +94 -0
  153. txtai/pipeline/audio/microphone.py +244 -0
  154. txtai/pipeline/audio/signal.py +186 -0
  155. txtai/pipeline/audio/texttoaudio.py +60 -0
  156. txtai/pipeline/audio/texttospeech.py +553 -0
  157. txtai/pipeline/audio/transcription.py +212 -0
  158. txtai/pipeline/base.py +23 -0
  159. txtai/pipeline/data/__init__.py +10 -0
  160. txtai/pipeline/data/filetohtml.py +206 -0
  161. txtai/pipeline/data/htmltomd.py +414 -0
  162. txtai/pipeline/data/segmentation.py +178 -0
  163. txtai/pipeline/data/tabular.py +155 -0
  164. txtai/pipeline/data/textractor.py +139 -0
  165. txtai/pipeline/data/tokenizer.py +112 -0
  166. txtai/pipeline/factory.py +77 -0
  167. txtai/pipeline/hfmodel.py +111 -0
  168. txtai/pipeline/hfpipeline.py +96 -0
  169. txtai/pipeline/image/__init__.py +7 -0
  170. txtai/pipeline/image/caption.py +55 -0
  171. txtai/pipeline/image/imagehash.py +90 -0
  172. txtai/pipeline/image/objects.py +80 -0
  173. txtai/pipeline/llm/__init__.py +11 -0
  174. txtai/pipeline/llm/factory.py +86 -0
  175. txtai/pipeline/llm/generation.py +173 -0
  176. txtai/pipeline/llm/huggingface.py +218 -0
  177. txtai/pipeline/llm/litellm.py +90 -0
  178. txtai/pipeline/llm/llama.py +152 -0
  179. txtai/pipeline/llm/llm.py +75 -0
  180. txtai/pipeline/llm/rag.py +477 -0
  181. txtai/pipeline/nop.py +14 -0
  182. txtai/pipeline/tensors.py +52 -0
  183. txtai/pipeline/text/__init__.py +13 -0
  184. txtai/pipeline/text/crossencoder.py +70 -0
  185. txtai/pipeline/text/entity.py +140 -0
  186. txtai/pipeline/text/labels.py +137 -0
  187. txtai/pipeline/text/lateencoder.py +103 -0
  188. txtai/pipeline/text/questions.py +48 -0
  189. txtai/pipeline/text/reranker.py +57 -0
  190. txtai/pipeline/text/similarity.py +83 -0
  191. txtai/pipeline/text/summary.py +98 -0
  192. txtai/pipeline/text/translation.py +298 -0
  193. txtai/pipeline/train/__init__.py +7 -0
  194. txtai/pipeline/train/hfonnx.py +196 -0
  195. txtai/pipeline/train/hftrainer.py +398 -0
  196. txtai/pipeline/train/mlonnx.py +63 -0
  197. txtai/scoring/__init__.py +12 -0
  198. txtai/scoring/base.py +188 -0
  199. txtai/scoring/bm25.py +29 -0
  200. txtai/scoring/factory.py +95 -0
  201. txtai/scoring/pgtext.py +181 -0
  202. txtai/scoring/sif.py +32 -0
  203. txtai/scoring/sparse.py +218 -0
  204. txtai/scoring/terms.py +499 -0
  205. txtai/scoring/tfidf.py +358 -0
  206. txtai/serialize/__init__.py +10 -0
  207. txtai/serialize/base.py +85 -0
  208. txtai/serialize/errors.py +9 -0
  209. txtai/serialize/factory.py +29 -0
  210. txtai/serialize/messagepack.py +42 -0
  211. txtai/serialize/pickle.py +98 -0
  212. txtai/serialize/serializer.py +46 -0
  213. txtai/util/__init__.py +7 -0
  214. txtai/util/resolver.py +32 -0
  215. txtai/util/sparsearray.py +62 -0
  216. txtai/util/template.py +16 -0
  217. txtai/vectors/__init__.py +8 -0
  218. txtai/vectors/base.py +476 -0
  219. txtai/vectors/dense/__init__.py +12 -0
  220. txtai/vectors/dense/external.py +55 -0
  221. txtai/vectors/dense/factory.py +121 -0
  222. txtai/vectors/dense/huggingface.py +44 -0
  223. txtai/vectors/dense/litellm.py +86 -0
  224. txtai/vectors/dense/llama.py +84 -0
  225. txtai/vectors/dense/m2v.py +67 -0
  226. txtai/vectors/dense/sbert.py +92 -0
  227. txtai/vectors/dense/words.py +211 -0
  228. txtai/vectors/recovery.py +57 -0
  229. txtai/vectors/sparse/__init__.py +7 -0
  230. txtai/vectors/sparse/base.py +90 -0
  231. txtai/vectors/sparse/factory.py +55 -0
  232. txtai/vectors/sparse/sbert.py +34 -0
  233. txtai/version.py +6 -0
  234. txtai/workflow/__init__.py +8 -0
  235. txtai/workflow/base.py +184 -0
  236. txtai/workflow/execute.py +99 -0
  237. txtai/workflow/factory.py +42 -0
  238. txtai/workflow/task/__init__.py +18 -0
  239. txtai/workflow/task/base.py +490 -0
  240. txtai/workflow/task/console.py +24 -0
  241. txtai/workflow/task/export.py +64 -0
  242. txtai/workflow/task/factory.py +89 -0
  243. txtai/workflow/task/file.py +28 -0
  244. txtai/workflow/task/image.py +36 -0
  245. txtai/workflow/task/retrieve.py +61 -0
  246. txtai/workflow/task/service.py +102 -0
  247. txtai/workflow/task/storage.py +110 -0
  248. txtai/workflow/task/stream.py +33 -0
  249. txtai/workflow/task/template.py +116 -0
  250. txtai/workflow/task/url.py +20 -0
  251. txtai/workflow/task/workflow.py +14 -0
@@ -0,0 +1,218 @@
1
+ """
2
+ Hugging Face module
3
+ """
4
+
5
+ from threading import Thread
6
+
7
+ from transformers import AutoModelForImageTextToText, TextIteratorStreamer
8
+
9
+ from ...models import Models
10
+
11
+ from ..hfpipeline import HFPipeline
12
+
13
+ from .generation import Generation
14
+
15
+
16
+ class HFGeneration(Generation):
17
+ """
18
+ Hugging Face Transformers generative model.
19
+ """
20
+
21
+ def __init__(self, path, template=None, **kwargs):
22
+ # Call parent constructor
23
+ super().__init__(path, template, **kwargs)
24
+
25
+ # Create HuggingFace LLM pipeline
26
+ self.llm = HFLLM(path, **kwargs)
27
+
28
+ def isvision(self):
29
+ return isinstance(self.llm.pipeline.model, AutoModelForImageTextToText)
30
+
31
+ def stream(self, texts, maxlength, stream, stop, **kwargs):
32
+ yield from self.llm(texts, maxlength=maxlength, stream=stream, stop=stop, **kwargs)
33
+
34
+
35
+ class HFLLM(HFPipeline):
36
+ """
37
+ Hugging Face Transformers large language model (LLM) pipeline. This pipeline autodetects if the model path
38
+ is a text generation or sequence to sequence model.
39
+ """
40
+
41
+ def __init__(self, path=None, quantize=False, gpu=True, model=None, task=None, **kwargs):
42
+ super().__init__(self.task(path, task, **kwargs), path, quantize, gpu, model, **kwargs)
43
+
44
+ # Load tokenizer, if necessary
45
+ self.pipeline.tokenizer = self.pipeline.tokenizer if self.pipeline.tokenizer else Models.tokenizer(path, **kwargs)
46
+
47
+ def __call__(self, text, prefix=None, maxlength=512, workers=0, stream=False, stop=None, **kwargs):
48
+ """
49
+ Generates text. Supports the following input formats:
50
+
51
+ - String or list of strings (instruction-tuned models must follow chat templates)
52
+ - List of dictionaries with `role` and `content` key-values or lists of lists
53
+
54
+ Args:
55
+ text: text|list
56
+ prefix: optional prefix to prepend to text elements
57
+ maxlength: maximum sequence length
58
+ workers: number of concurrent workers to use for processing data, defaults to None
59
+ stream: stream response if True, defaults to False
60
+ stop: list of stop strings
61
+ kwargs: additional generation keyword arguments
62
+
63
+ Returns:
64
+ generated text
65
+ """
66
+
67
+ # List of texts
68
+ texts = text if isinstance(text, list) else [text]
69
+
70
+ # Add prefix, if necessary
71
+ if prefix:
72
+ texts = [f"{prefix}{x}" for x in texts]
73
+
74
+ # Combine all keyword arguments
75
+ args, kwargs = self.parameters(texts, maxlength, workers, stop, **kwargs)
76
+
77
+ # Stream response
78
+ if stream:
79
+ return StreamingResponse(self.pipeline, texts, stop, **kwargs)()
80
+
81
+ # Run pipeline and extract generated text
82
+ results = [self.extract(result) for result in self.pipeline(*args, **kwargs)]
83
+
84
+ return results[0] if isinstance(text, str) else results
85
+
86
+ def parameters(self, texts, maxlength, workers, stop, **kwargs):
87
+ """
88
+ Builds a list of arguments and a combined parameter dictionary to use as keyword arguments.
89
+
90
+ Args:
91
+ texts: input texts
92
+ maxlength: maximum sequence length
93
+ workers: number of concurrent workers to use for processing data, defaults to None
94
+ stop: list of stop strings
95
+ kwargs: additional generation keyword arguments
96
+
97
+ Returns:
98
+ args, kwargs
99
+ """
100
+
101
+ # Set defaults and get underlying model
102
+ defaults, model = {"max_length": maxlength, "max_new_tokens": None, "num_workers": workers}, self.pipeline.model
103
+
104
+ # Set parameters for vision models and return
105
+ if self.pipeline.task == "image-text-to-text":
106
+ # Maxlength has to be large enough to accomodate images
107
+ defaults["max_length"] = max(maxlength, 2048)
108
+
109
+ # Set default token id
110
+ tokenid = model.generation_config.pad_token_id
111
+ model.generation_config.pad_token_id = tokenid if tokenid else model.generation_config.eos_token_id
112
+
113
+ # Vision models take all arguments as keyword arguments
114
+ return [], {**{"text": texts, "truncation": True}, **defaults, **kwargs}
115
+
116
+ # Add pad token if it's missing from model config
117
+ if not model.config.pad_token_id:
118
+ tokenid = model.config.eos_token_id
119
+ tokenid = tokenid[0] if isinstance(tokenid, list) else tokenid
120
+
121
+ # Set pad_token_id parameter
122
+ defaults["pad_token_id"] = tokenid
123
+
124
+ # Update tokenizer for batching
125
+ if "batch_size" in kwargs and self.pipeline.tokenizer.pad_token_id is None:
126
+ self.pipeline.tokenizer.pad_token_id = tokenid
127
+ self.pipeline.tokenizer.padding_side = "left"
128
+
129
+ # Set tokenizer when stop strings is set
130
+ if stop:
131
+ defaults["tokenizer"] = self.pipeline.tokenizer
132
+
133
+ return [texts], {**defaults, **kwargs}
134
+
135
+ def extract(self, result):
136
+ """
137
+ Extracts generated text from a pipeline result.
138
+
139
+ Args:
140
+ result: pipeline result
141
+
142
+ Returns:
143
+ generated text
144
+ """
145
+
146
+ # Extract output from list, if necessary
147
+ result = result[0] if isinstance(result, list) else result
148
+ text = result["generated_text"]
149
+ return text[-1]["content"] if isinstance(text, list) else text
150
+
151
+ def task(self, path, task, **kwargs):
152
+ """
153
+ Get the pipeline task name.
154
+
155
+ Args:
156
+ path: model path input
157
+ task: task name
158
+ kwargs: optional additional keyword arguments
159
+
160
+ Returns:
161
+ pipeline task name
162
+ """
163
+
164
+ # Mapping from txtai to Hugging Face pipeline tasks
165
+ mapping = {"language-generation": "text-generation", "sequence-sequence": "text2text-generation", "vision": "image-text-to-text"}
166
+
167
+ # Attempt to resolve task
168
+ if path and not task:
169
+ task = Models.task(path, **kwargs)
170
+
171
+ # Map to Hugging Face task. Default to text2text-generation pipeline when task not resolved.
172
+ return mapping.get(task, "text2text-generation")
173
+
174
+
175
+ class Generator(HFLLM):
176
+ """
177
+ Generate text with a causal language model.
178
+ """
179
+
180
+ def __init__(self, path=None, quantize=False, gpu=True, model=None, **kwargs):
181
+ super().__init__(path, quantize, gpu, model, "language-generation", **kwargs)
182
+
183
+
184
+ class Sequences(HFLLM):
185
+ """
186
+ Generate text with a sequence-sequence model.
187
+ """
188
+
189
+ def __init__(self, path=None, quantize=False, gpu=True, model=None, **kwargs):
190
+ super().__init__(path, quantize, gpu, model, "sequence-sequence", **kwargs)
191
+
192
+
193
+ class StreamingResponse:
194
+ """
195
+ Generate text as a streaming response.
196
+ """
197
+
198
+ def __init__(self, pipeline, texts, stop, **kwargs):
199
+ # Create streamer
200
+ self.stream = TextIteratorStreamer(pipeline.tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=5)
201
+ kwargs["streamer"] = self.stream
202
+ kwargs["stop_strings"] = stop
203
+
204
+ # Create thread
205
+ self.thread = Thread(target=pipeline, args=[texts], kwargs=kwargs)
206
+
207
+ # Store number of inputs
208
+ self.length = len(texts)
209
+
210
+ def __call__(self):
211
+ # Start the process
212
+ self.thread.start()
213
+
214
+ return self
215
+
216
+ def __iter__(self):
217
+ for _ in range(self.length):
218
+ yield from self.stream
@@ -0,0 +1,90 @@
1
+ """
2
+ LiteLLM module
3
+ """
4
+
5
+ from transformers.utils import cached_file
6
+
7
+ # Conditional import
8
+ try:
9
+ import litellm as api
10
+
11
+ LITELLM = True
12
+ except ImportError:
13
+ LITELLM = False
14
+
15
+ from .generation import Generation
16
+
17
+
18
+ class LiteLLM(Generation):
19
+ """
20
+ LiteLLM generative model.
21
+ """
22
+
23
+ @staticmethod
24
+ def ismodel(path):
25
+ """
26
+ Checks if path is a LiteLLM model.
27
+
28
+ Args:
29
+ path: input path
30
+
31
+ Returns:
32
+ True if this is a LiteLLM model, False otherwise
33
+ """
34
+
35
+ # pylint: disable=W0702
36
+ if isinstance(path, str) and LITELLM:
37
+ debug = api.suppress_debug_info
38
+ try:
39
+ # Suppress debug messages for this test
40
+ api.suppress_debug_info = True
41
+ return api.get_llm_provider(path) and not LiteLLM.ishub(path)
42
+ except:
43
+ return False
44
+ finally:
45
+ # Restore debug info value to original value
46
+ api.suppress_debug_info = debug
47
+
48
+ return False
49
+
50
+ @staticmethod
51
+ def ishub(path):
52
+ """
53
+ Checks if path is available on the HF Hub.
54
+
55
+ Args:
56
+ input path
57
+
58
+ Returns:
59
+ True if this is a model on the HF Hub
60
+ """
61
+
62
+ # pylint: disable=W0702
63
+ try:
64
+ return cached_file(path_or_repo_id=path, filename="config.json") is not None if "/" in path else False
65
+ except:
66
+ return False
67
+
68
+ def __init__(self, path, template=None, **kwargs):
69
+ super().__init__(path, template, **kwargs)
70
+
71
+ if not LITELLM:
72
+ raise ImportError('LiteLLM is not available - install "pipeline" extra to enable')
73
+
74
+ # Ignore common pipeline parameters
75
+ self.kwargs = {k: v for k, v in self.kwargs.items() if k not in ["quantize", "gpu", "model", "task"]}
76
+
77
+ def stream(self, texts, maxlength, stream, stop, **kwargs):
78
+ for text in texts:
79
+ # LLM API call
80
+ result = api.completion(
81
+ model=self.path,
82
+ messages=[{"content": text, "role": "prompt"}] if isinstance(text, str) else text,
83
+ max_tokens=maxlength,
84
+ stream=stream,
85
+ stop=stop,
86
+ **{**self.kwargs, **kwargs}
87
+ )
88
+
89
+ # Stream response
90
+ yield from self.response(result if stream else [result])
@@ -0,0 +1,152 @@
1
+ """
2
+ Llama module
3
+ """
4
+
5
+ import os
6
+
7
+ from huggingface_hub import hf_hub_download
8
+
9
+ # Conditional import
10
+ try:
11
+ import llama_cpp as llama
12
+
13
+ LLAMA_CPP = True
14
+ except ImportError:
15
+ LLAMA_CPP = False
16
+
17
+ from .generation import Generation
18
+
19
+
20
+ class LlamaCpp(Generation):
21
+ """
22
+ llama.cpp generative model.
23
+ """
24
+
25
+ @staticmethod
26
+ def ismodel(path):
27
+ """
28
+ Checks if path is a llama.cpp model.
29
+
30
+ Args:
31
+ path: input path
32
+
33
+ Returns:
34
+ True if this is a llama.cpp model, False otherwise
35
+ """
36
+
37
+ return isinstance(path, str) and path.lower().endswith(".gguf")
38
+
39
+ def __init__(self, path, template=None, **kwargs):
40
+ super().__init__(path, template, **kwargs)
41
+
42
+ if not LLAMA_CPP:
43
+ raise ImportError('llama.cpp is not available - install "pipeline" extra to enable')
44
+
45
+ # Check if this is a local path, otherwise download from the HF Hub
46
+ path = path if os.path.exists(path) else self.download(path)
47
+
48
+ # Create llama.cpp instance
49
+ self.llm = self.create(path, **kwargs)
50
+
51
+ def stream(self, texts, maxlength, stream, stop, **kwargs):
52
+ for text in texts:
53
+ yield from (
54
+ self.messages(text, maxlength, stream, stop, **kwargs)
55
+ if isinstance(text, list)
56
+ else self.prompt(text, maxlength, stream, stop, **kwargs)
57
+ )
58
+
59
+ def download(self, path):
60
+ """
61
+ Downloads path from the Hugging Face Hub.
62
+
63
+ Args:
64
+ path: full model path
65
+
66
+ Returns:
67
+ local cached model path
68
+ """
69
+
70
+ # Split into parts
71
+ parts = path.split("/")
72
+
73
+ # Calculate repo id split
74
+ repo = 2 if len(parts) > 2 else 1
75
+
76
+ # Download and cache file
77
+ return hf_hub_download(repo_id="/".join(parts[:repo]), filename="/".join(parts[repo:]))
78
+
79
+ def create(self, path, **kwargs):
80
+ """
81
+ Creates a new llama.cpp model instance.
82
+
83
+ Args:
84
+ path: path to model
85
+ kwargs: additional keyword args
86
+
87
+ Returns:
88
+ llama.cpp instance
89
+ """
90
+
91
+ # Default n_ctx=0 if not already set. This sets n_ctx = n_ctx_train.
92
+ kwargs["n_ctx"] = kwargs.get("n_ctx", 0)
93
+
94
+ # Default GPU layers if not already set
95
+ kwargs["n_gpu_layers"] = kwargs.get("n_gpu_layers", -1 if kwargs.get("gpu", os.environ.get("LLAMA_NO_METAL") != "1") else 0)
96
+
97
+ # Default verbose flag
98
+ kwargs["verbose"] = kwargs.get("verbose", False)
99
+
100
+ # Create llama.cpp instance
101
+ try:
102
+ return llama.Llama(model_path=path, **kwargs)
103
+ except ValueError as e:
104
+ # Fallback to default n_ctx when not enough memory for n_ctx = n_ctx_train
105
+ if not kwargs["n_ctx"]:
106
+ kwargs.pop("n_ctx")
107
+ return llama.Llama(model_path=path, **kwargs)
108
+
109
+ # Raise exception if n_ctx manually specified
110
+ raise e
111
+
112
+ def messages(self, messages, maxlength, stream, stop, **kwargs):
113
+ """
114
+ Processes a list of messages.
115
+
116
+ Args:
117
+ messages: list of dictionaries with `role` and `content` key-values
118
+ maxlength: maximum sequence length
119
+ stream: stream response if True, defaults to False
120
+ stop: list of stop strings
121
+ kwargs: additional generation keyword arguments
122
+
123
+ Returns:
124
+ generated text
125
+ """
126
+
127
+ # LLM call with messages
128
+ result = self.llm.create_chat_completion(messages=messages, max_tokens=maxlength, stream=stream, stop=stop, **kwargs)
129
+
130
+ # Stream response
131
+ yield from self.response(result if stream else [result])
132
+
133
+ def prompt(self, text, maxlength, stream, stop, **kwargs):
134
+ """
135
+ Processes a prompt.
136
+
137
+ Args:
138
+ prompt: prompt text
139
+ maxlength: maximum sequence length
140
+ stream: stream response if True, defaults to False
141
+ stop: list of stop strings
142
+ kwargs: additional generation keyword arguments
143
+
144
+ Returns:
145
+ generated text
146
+ """
147
+
148
+ # LLM call with prompt
149
+ result = self.llm(text, max_tokens=maxlength, stream=stream, stop=stop, **kwargs)
150
+
151
+ # Stream response
152
+ yield from self.response(result if stream else [result])
@@ -0,0 +1,75 @@
1
+ """
2
+ LLM module
3
+ """
4
+
5
+ import logging
6
+
7
+ from .factory import GenerationFactory
8
+
9
+ from ..base import Pipeline
10
+
11
+ # Logging configuration
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class LLM(Pipeline):
16
+ """
17
+ Pipeline for running large language models (LLMs). This class supports the following LLM backends:
18
+
19
+ - Local LLMs with Hugging Face Transformers
20
+ - Local LLMs with llama.cpp
21
+ - Remote API LLMs with LiteLLM
22
+ - Custom generation implementations
23
+ """
24
+
25
+ def __init__(self, path=None, method=None, **kwargs):
26
+ """
27
+ Creates a new LLM.
28
+
29
+ Args:
30
+ path: model path
31
+ method: llm model framework, infers from path if not provided
32
+ kwargs: model keyword arguments
33
+ """
34
+
35
+ # Default LLM if not provided
36
+ path = path if path else "google/flan-t5-base"
37
+
38
+ # Generation instance
39
+ self.generator = GenerationFactory.create(path, method, **kwargs)
40
+
41
+ def __call__(self, text, maxlength=512, stream=False, stop=None, defaultrole="prompt", stripthink=False, **kwargs):
42
+ """
43
+ Generates text. Supports the following input formats:
44
+
45
+ - String or list of strings (instruction-tuned models must follow chat templates)
46
+ - List of dictionaries with `role` and `content` key-values or lists of lists
47
+
48
+ Args:
49
+ text: text|list
50
+ maxlength: maximum sequence length
51
+ stream: stream response if True, defaults to False
52
+ stop: list of stop strings, defaults to None
53
+ defaultrole: default role to apply to text inputs (prompt for raw prompts (default) or user for user chat messages)
54
+ stripthink: strip thinking tags, defaults to False
55
+ kwargs: additional generation keyword arguments
56
+
57
+ Returns:
58
+ generated text
59
+ """
60
+
61
+ # Debug logging
62
+ logger.debug(text)
63
+
64
+ # Run LLM generation
65
+ return self.generator(text, maxlength, stream, stop, defaultrole, stripthink, **kwargs)
66
+
67
+ def isvision(self):
68
+ """
69
+ Returns True if this LLM supports vision operations.
70
+
71
+ Returns:
72
+ True if this is a vision model
73
+ """
74
+
75
+ return self.generator.isvision()