mseep-txtai 9.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. mseep_txtai-9.1.1.dist-info/METADATA +262 -0
  2. mseep_txtai-9.1.1.dist-info/RECORD +251 -0
  3. mseep_txtai-9.1.1.dist-info/WHEEL +5 -0
  4. mseep_txtai-9.1.1.dist-info/licenses/LICENSE +190 -0
  5. mseep_txtai-9.1.1.dist-info/top_level.txt +1 -0
  6. txtai/__init__.py +16 -0
  7. txtai/agent/__init__.py +12 -0
  8. txtai/agent/base.py +54 -0
  9. txtai/agent/factory.py +39 -0
  10. txtai/agent/model.py +107 -0
  11. txtai/agent/placeholder.py +16 -0
  12. txtai/agent/tool/__init__.py +7 -0
  13. txtai/agent/tool/embeddings.py +69 -0
  14. txtai/agent/tool/factory.py +130 -0
  15. txtai/agent/tool/function.py +49 -0
  16. txtai/ann/__init__.py +7 -0
  17. txtai/ann/base.py +153 -0
  18. txtai/ann/dense/__init__.py +11 -0
  19. txtai/ann/dense/annoy.py +72 -0
  20. txtai/ann/dense/factory.py +76 -0
  21. txtai/ann/dense/faiss.py +233 -0
  22. txtai/ann/dense/hnsw.py +104 -0
  23. txtai/ann/dense/numpy.py +164 -0
  24. txtai/ann/dense/pgvector.py +323 -0
  25. txtai/ann/dense/sqlite.py +303 -0
  26. txtai/ann/dense/torch.py +38 -0
  27. txtai/ann/sparse/__init__.py +7 -0
  28. txtai/ann/sparse/factory.py +61 -0
  29. txtai/ann/sparse/ivfsparse.py +377 -0
  30. txtai/ann/sparse/pgsparse.py +56 -0
  31. txtai/api/__init__.py +18 -0
  32. txtai/api/application.py +134 -0
  33. txtai/api/authorization.py +53 -0
  34. txtai/api/base.py +159 -0
  35. txtai/api/cluster.py +295 -0
  36. txtai/api/extension.py +19 -0
  37. txtai/api/factory.py +40 -0
  38. txtai/api/responses/__init__.py +7 -0
  39. txtai/api/responses/factory.py +30 -0
  40. txtai/api/responses/json.py +56 -0
  41. txtai/api/responses/messagepack.py +51 -0
  42. txtai/api/route.py +41 -0
  43. txtai/api/routers/__init__.py +25 -0
  44. txtai/api/routers/agent.py +38 -0
  45. txtai/api/routers/caption.py +42 -0
  46. txtai/api/routers/embeddings.py +280 -0
  47. txtai/api/routers/entity.py +42 -0
  48. txtai/api/routers/extractor.py +28 -0
  49. txtai/api/routers/labels.py +47 -0
  50. txtai/api/routers/llm.py +61 -0
  51. txtai/api/routers/objects.py +42 -0
  52. txtai/api/routers/openai.py +191 -0
  53. txtai/api/routers/rag.py +61 -0
  54. txtai/api/routers/reranker.py +46 -0
  55. txtai/api/routers/segmentation.py +42 -0
  56. txtai/api/routers/similarity.py +48 -0
  57. txtai/api/routers/summary.py +46 -0
  58. txtai/api/routers/tabular.py +42 -0
  59. txtai/api/routers/textractor.py +42 -0
  60. txtai/api/routers/texttospeech.py +33 -0
  61. txtai/api/routers/transcription.py +42 -0
  62. txtai/api/routers/translation.py +46 -0
  63. txtai/api/routers/upload.py +36 -0
  64. txtai/api/routers/workflow.py +28 -0
  65. txtai/app/__init__.py +5 -0
  66. txtai/app/base.py +821 -0
  67. txtai/archive/__init__.py +9 -0
  68. txtai/archive/base.py +104 -0
  69. txtai/archive/compress.py +51 -0
  70. txtai/archive/factory.py +25 -0
  71. txtai/archive/tar.py +49 -0
  72. txtai/archive/zip.py +35 -0
  73. txtai/cloud/__init__.py +8 -0
  74. txtai/cloud/base.py +106 -0
  75. txtai/cloud/factory.py +70 -0
  76. txtai/cloud/hub.py +101 -0
  77. txtai/cloud/storage.py +125 -0
  78. txtai/console/__init__.py +5 -0
  79. txtai/console/__main__.py +22 -0
  80. txtai/console/base.py +264 -0
  81. txtai/data/__init__.py +10 -0
  82. txtai/data/base.py +138 -0
  83. txtai/data/labels.py +42 -0
  84. txtai/data/questions.py +135 -0
  85. txtai/data/sequences.py +48 -0
  86. txtai/data/texts.py +68 -0
  87. txtai/data/tokens.py +28 -0
  88. txtai/database/__init__.py +14 -0
  89. txtai/database/base.py +342 -0
  90. txtai/database/client.py +227 -0
  91. txtai/database/duckdb.py +150 -0
  92. txtai/database/embedded.py +76 -0
  93. txtai/database/encoder/__init__.py +8 -0
  94. txtai/database/encoder/base.py +37 -0
  95. txtai/database/encoder/factory.py +56 -0
  96. txtai/database/encoder/image.py +43 -0
  97. txtai/database/encoder/serialize.py +28 -0
  98. txtai/database/factory.py +77 -0
  99. txtai/database/rdbms.py +569 -0
  100. txtai/database/schema/__init__.py +6 -0
  101. txtai/database/schema/orm.py +99 -0
  102. txtai/database/schema/statement.py +98 -0
  103. txtai/database/sql/__init__.py +8 -0
  104. txtai/database/sql/aggregate.py +178 -0
  105. txtai/database/sql/base.py +189 -0
  106. txtai/database/sql/expression.py +404 -0
  107. txtai/database/sql/token.py +342 -0
  108. txtai/database/sqlite.py +57 -0
  109. txtai/embeddings/__init__.py +7 -0
  110. txtai/embeddings/base.py +1107 -0
  111. txtai/embeddings/index/__init__.py +14 -0
  112. txtai/embeddings/index/action.py +15 -0
  113. txtai/embeddings/index/autoid.py +92 -0
  114. txtai/embeddings/index/configuration.py +71 -0
  115. txtai/embeddings/index/documents.py +86 -0
  116. txtai/embeddings/index/functions.py +155 -0
  117. txtai/embeddings/index/indexes.py +199 -0
  118. txtai/embeddings/index/indexids.py +60 -0
  119. txtai/embeddings/index/reducer.py +104 -0
  120. txtai/embeddings/index/stream.py +67 -0
  121. txtai/embeddings/index/transform.py +205 -0
  122. txtai/embeddings/search/__init__.py +11 -0
  123. txtai/embeddings/search/base.py +344 -0
  124. txtai/embeddings/search/errors.py +9 -0
  125. txtai/embeddings/search/explain.py +120 -0
  126. txtai/embeddings/search/ids.py +61 -0
  127. txtai/embeddings/search/query.py +69 -0
  128. txtai/embeddings/search/scan.py +196 -0
  129. txtai/embeddings/search/terms.py +46 -0
  130. txtai/graph/__init__.py +10 -0
  131. txtai/graph/base.py +769 -0
  132. txtai/graph/factory.py +61 -0
  133. txtai/graph/networkx.py +275 -0
  134. txtai/graph/query.py +181 -0
  135. txtai/graph/rdbms.py +113 -0
  136. txtai/graph/topics.py +166 -0
  137. txtai/models/__init__.py +9 -0
  138. txtai/models/models.py +268 -0
  139. txtai/models/onnx.py +133 -0
  140. txtai/models/pooling/__init__.py +9 -0
  141. txtai/models/pooling/base.py +141 -0
  142. txtai/models/pooling/cls.py +28 -0
  143. txtai/models/pooling/factory.py +144 -0
  144. txtai/models/pooling/late.py +173 -0
  145. txtai/models/pooling/mean.py +33 -0
  146. txtai/models/pooling/muvera.py +164 -0
  147. txtai/models/registry.py +37 -0
  148. txtai/models/tokendetection.py +122 -0
  149. txtai/pipeline/__init__.py +17 -0
  150. txtai/pipeline/audio/__init__.py +11 -0
  151. txtai/pipeline/audio/audiomixer.py +58 -0
  152. txtai/pipeline/audio/audiostream.py +94 -0
  153. txtai/pipeline/audio/microphone.py +244 -0
  154. txtai/pipeline/audio/signal.py +186 -0
  155. txtai/pipeline/audio/texttoaudio.py +60 -0
  156. txtai/pipeline/audio/texttospeech.py +553 -0
  157. txtai/pipeline/audio/transcription.py +212 -0
  158. txtai/pipeline/base.py +23 -0
  159. txtai/pipeline/data/__init__.py +10 -0
  160. txtai/pipeline/data/filetohtml.py +206 -0
  161. txtai/pipeline/data/htmltomd.py +414 -0
  162. txtai/pipeline/data/segmentation.py +178 -0
  163. txtai/pipeline/data/tabular.py +155 -0
  164. txtai/pipeline/data/textractor.py +139 -0
  165. txtai/pipeline/data/tokenizer.py +112 -0
  166. txtai/pipeline/factory.py +77 -0
  167. txtai/pipeline/hfmodel.py +111 -0
  168. txtai/pipeline/hfpipeline.py +96 -0
  169. txtai/pipeline/image/__init__.py +7 -0
  170. txtai/pipeline/image/caption.py +55 -0
  171. txtai/pipeline/image/imagehash.py +90 -0
  172. txtai/pipeline/image/objects.py +80 -0
  173. txtai/pipeline/llm/__init__.py +11 -0
  174. txtai/pipeline/llm/factory.py +86 -0
  175. txtai/pipeline/llm/generation.py +173 -0
  176. txtai/pipeline/llm/huggingface.py +218 -0
  177. txtai/pipeline/llm/litellm.py +90 -0
  178. txtai/pipeline/llm/llama.py +152 -0
  179. txtai/pipeline/llm/llm.py +75 -0
  180. txtai/pipeline/llm/rag.py +477 -0
  181. txtai/pipeline/nop.py +14 -0
  182. txtai/pipeline/tensors.py +52 -0
  183. txtai/pipeline/text/__init__.py +13 -0
  184. txtai/pipeline/text/crossencoder.py +70 -0
  185. txtai/pipeline/text/entity.py +140 -0
  186. txtai/pipeline/text/labels.py +137 -0
  187. txtai/pipeline/text/lateencoder.py +103 -0
  188. txtai/pipeline/text/questions.py +48 -0
  189. txtai/pipeline/text/reranker.py +57 -0
  190. txtai/pipeline/text/similarity.py +83 -0
  191. txtai/pipeline/text/summary.py +98 -0
  192. txtai/pipeline/text/translation.py +298 -0
  193. txtai/pipeline/train/__init__.py +7 -0
  194. txtai/pipeline/train/hfonnx.py +196 -0
  195. txtai/pipeline/train/hftrainer.py +398 -0
  196. txtai/pipeline/train/mlonnx.py +63 -0
  197. txtai/scoring/__init__.py +12 -0
  198. txtai/scoring/base.py +188 -0
  199. txtai/scoring/bm25.py +29 -0
  200. txtai/scoring/factory.py +95 -0
  201. txtai/scoring/pgtext.py +181 -0
  202. txtai/scoring/sif.py +32 -0
  203. txtai/scoring/sparse.py +218 -0
  204. txtai/scoring/terms.py +499 -0
  205. txtai/scoring/tfidf.py +358 -0
  206. txtai/serialize/__init__.py +10 -0
  207. txtai/serialize/base.py +85 -0
  208. txtai/serialize/errors.py +9 -0
  209. txtai/serialize/factory.py +29 -0
  210. txtai/serialize/messagepack.py +42 -0
  211. txtai/serialize/pickle.py +98 -0
  212. txtai/serialize/serializer.py +46 -0
  213. txtai/util/__init__.py +7 -0
  214. txtai/util/resolver.py +32 -0
  215. txtai/util/sparsearray.py +62 -0
  216. txtai/util/template.py +16 -0
  217. txtai/vectors/__init__.py +8 -0
  218. txtai/vectors/base.py +476 -0
  219. txtai/vectors/dense/__init__.py +12 -0
  220. txtai/vectors/dense/external.py +55 -0
  221. txtai/vectors/dense/factory.py +121 -0
  222. txtai/vectors/dense/huggingface.py +44 -0
  223. txtai/vectors/dense/litellm.py +86 -0
  224. txtai/vectors/dense/llama.py +84 -0
  225. txtai/vectors/dense/m2v.py +67 -0
  226. txtai/vectors/dense/sbert.py +92 -0
  227. txtai/vectors/dense/words.py +211 -0
  228. txtai/vectors/recovery.py +57 -0
  229. txtai/vectors/sparse/__init__.py +7 -0
  230. txtai/vectors/sparse/base.py +90 -0
  231. txtai/vectors/sparse/factory.py +55 -0
  232. txtai/vectors/sparse/sbert.py +34 -0
  233. txtai/version.py +6 -0
  234. txtai/workflow/__init__.py +8 -0
  235. txtai/workflow/base.py +184 -0
  236. txtai/workflow/execute.py +99 -0
  237. txtai/workflow/factory.py +42 -0
  238. txtai/workflow/task/__init__.py +18 -0
  239. txtai/workflow/task/base.py +490 -0
  240. txtai/workflow/task/console.py +24 -0
  241. txtai/workflow/task/export.py +64 -0
  242. txtai/workflow/task/factory.py +89 -0
  243. txtai/workflow/task/file.py +28 -0
  244. txtai/workflow/task/image.py +36 -0
  245. txtai/workflow/task/retrieve.py +61 -0
  246. txtai/workflow/task/service.py +102 -0
  247. txtai/workflow/task/storage.py +110 -0
  248. txtai/workflow/task/stream.py +33 -0
  249. txtai/workflow/task/template.py +116 -0
  250. txtai/workflow/task/url.py +20 -0
  251. txtai/workflow/task/workflow.py +14 -0
@@ -0,0 +1,25 @@
1
+ """
2
+ Router imports
3
+ """
4
+
5
+ from . import agent
6
+ from . import caption
7
+ from . import embeddings
8
+ from . import entity
9
+ from . import extractor
10
+ from . import labels
11
+ from . import llm
12
+ from . import objects
13
+ from . import openai
14
+ from . import rag
15
+ from . import reranker
16
+ from . import segmentation
17
+ from . import similarity
18
+ from . import summary
19
+ from . import tabular
20
+ from . import textractor
21
+ from . import texttospeech
22
+ from . import transcription
23
+ from . import translation
24
+ from . import workflow
25
+ from . import upload
@@ -0,0 +1,38 @@
1
+ """
2
+ Defines API paths for agent endpoints.
3
+ """
4
+
5
+ from typing import Optional
6
+
7
+ from fastapi import APIRouter, Body
8
+ from fastapi.responses import StreamingResponse
9
+
10
+ from .. import application
11
+ from ..route import EncodingAPIRoute
12
+
13
+ router = APIRouter(route_class=EncodingAPIRoute)
14
+
15
+
16
+ @router.post("/agent")
17
+ def agent(name: str = Body(...), text: str = Body(...), maxlength: Optional[int] = Body(default=None), stream: Optional[bool] = Body(default=None)):
18
+ """
19
+ Executes a named agent for input text.
20
+
21
+ Args:
22
+ name: agent name
23
+ text: instructions to run
24
+ maxlength: maximum sequence length
25
+ stream: stream response if True, defaults to False
26
+
27
+ Returns:
28
+ response text
29
+ """
30
+
31
+ # Build keyword arguments
32
+ kwargs = {key: value for key, value in [("stream", stream), ("maxlength", maxlength)] if value}
33
+
34
+ # Run agent
35
+ result = application.get().agent(name, text, **kwargs)
36
+
37
+ # Handle both standard and streaming responses
38
+ return StreamingResponse(result) if stream else result
@@ -0,0 +1,42 @@
1
+ """
2
+ Defines API paths for caption endpoints.
3
+ """
4
+
5
+ from typing import List
6
+
7
+ from fastapi import APIRouter, Body
8
+
9
+ from .. import application
10
+ from ..route import EncodingAPIRoute
11
+
12
+ router = APIRouter(route_class=EncodingAPIRoute)
13
+
14
+
15
+ @router.get("/caption")
16
+ def caption(file: str):
17
+ """
18
+ Builds captions for images.
19
+
20
+ Args:
21
+ file: file to process
22
+
23
+ Returns:
24
+ list of captions
25
+ """
26
+
27
+ return application.get().pipeline("caption", (file,))
28
+
29
+
30
+ @router.post("/batchcaption")
31
+ def batchcaption(files: List[str] = Body(...)):
32
+ """
33
+ Builds captions for images.
34
+
35
+ Args:
36
+ files: list of files to process
37
+
38
+ Returns:
39
+ list of captions
40
+ """
41
+
42
+ return application.get().pipeline("caption", (files,))
@@ -0,0 +1,280 @@
1
+ """
2
+ Defines API paths for embeddings endpoints.
3
+ """
4
+
5
+ from io import BytesIO
6
+ from typing import List, Optional
7
+
8
+ import PIL
9
+
10
+ from fastapi import APIRouter, Body, File, Form, HTTPException, Request, UploadFile
11
+ from fastapi.encoders import jsonable_encoder
12
+
13
+ from .. import application
14
+ from ..responses import ResponseFactory
15
+ from ..route import EncodingAPIRoute
16
+
17
+ from ...app import ReadOnlyError
18
+ from ...graph import Graph
19
+
20
+ router = APIRouter(route_class=EncodingAPIRoute)
21
+
22
+
23
+ @router.get("/search")
24
+ def search(query: str, request: Request):
25
+ """
26
+ Finds documents most similar to the input query. This method will run either an index search
27
+ or an index + database search depending on if a database is available.
28
+
29
+ Args:
30
+ query: input query
31
+ request: FastAPI request
32
+
33
+ Returns:
34
+ list of {id: value, score: value} for index search, list of dict for an index + database search
35
+ """
36
+
37
+ # Execute search
38
+ results = application.get().search(query, request=request)
39
+
40
+ # Encode using standard FastAPI encoder but skip certain classes
41
+ results = jsonable_encoder(
42
+ results, custom_encoder={bytes: lambda x: x, BytesIO: lambda x: x, PIL.Image.Image: lambda x: x, Graph: lambda x: x.savedict()}
43
+ )
44
+
45
+ # Return raw response to prevent duplicate encoding
46
+ response = ResponseFactory.create(request)
47
+ return response(results)
48
+
49
+
50
+ # pylint: disable=W0621
51
+ @router.post("/batchsearch")
52
+ def batchsearch(
53
+ request: Request,
54
+ queries: List[str] = Body(...),
55
+ limit: int = Body(default=None),
56
+ weights: float = Body(default=None),
57
+ index: str = Body(default=None),
58
+ parameters: List[dict] = Body(default=None),
59
+ graph: bool = Body(default=False),
60
+ ):
61
+ """
62
+ Finds documents most similar to the input queries. This method will run either an index search
63
+ or an index + database search depending on if a database is available.
64
+
65
+ Args:
66
+ queries: input queries
67
+ limit: maximum results
68
+ weights: hybrid score weights, if applicable
69
+ index: index name, if applicable
70
+ parameters: list of dicts of named parameters to bind to placeholders
71
+ graph: return graph results if True
72
+
73
+ Returns:
74
+ list of {id: value, score: value} per query for index search, list of dict per query for an index + database search
75
+ """
76
+
77
+ # Execute search
78
+ results = application.get().batchsearch(queries, limit, weights, index, parameters, graph)
79
+
80
+ # Encode using standard FastAPI encoder but skip certain classes
81
+ results = jsonable_encoder(
82
+ results, custom_encoder={bytes: lambda x: x, BytesIO: lambda x: x, PIL.Image.Image: lambda x: x, Graph: lambda x: x.savedict()}
83
+ )
84
+
85
+ # Return raw response to prevent duplicate encoding
86
+ response = ResponseFactory.create(request)
87
+ return response(results)
88
+
89
+
90
+ @router.post("/add")
91
+ def add(documents: List[dict] = Body(...)):
92
+ """
93
+ Adds a batch of documents for indexing.
94
+
95
+ Args:
96
+ documents: list of {id: value, text: value, tags: value}
97
+ """
98
+
99
+ try:
100
+ application.get().add(documents)
101
+ except ReadOnlyError as e:
102
+ raise HTTPException(status_code=403, detail=e.args[0]) from e
103
+
104
+
105
+ @router.post("/addobject")
106
+ def addobject(data: List[bytes] = File(), uid: List[str] = Form(default=None), field: str = Form(default=None)):
107
+ """
108
+ Adds a batch of binary documents for indexing.
109
+
110
+ Args:
111
+ data: list of binary objects
112
+ uid: list of corresponding ids
113
+ field: optional object field name
114
+ """
115
+
116
+ if uid and len(data) != len(uid):
117
+ raise HTTPException(status_code=422, detail="Length of data and document lists must match")
118
+
119
+ try:
120
+ # Add objects
121
+ application.get().addobject(data, uid, field)
122
+ except ReadOnlyError as e:
123
+ raise HTTPException(status_code=403, detail=e.args[0]) from e
124
+
125
+
126
+ @router.post("/addimage")
127
+ def addimage(data: List[UploadFile] = File(), uid: List[str] = Form(), field: str = Form(default=None)):
128
+ """
129
+ Adds a batch of images for indexing.
130
+
131
+ Args:
132
+ data: list of images
133
+ uid: list of corresponding ids
134
+ field: optional object field name
135
+ """
136
+
137
+ if uid and len(data) != len(uid):
138
+ raise HTTPException(status_code=422, detail="Length of data and uid lists must match")
139
+
140
+ try:
141
+ # Add images
142
+ application.get().addobject([PIL.Image.open(content.file) for content in data], uid, field)
143
+ except ReadOnlyError as e:
144
+ raise HTTPException(status_code=403, detail=e.args[0]) from e
145
+
146
+
147
+ @router.get("/index")
148
+ def index():
149
+ """
150
+ Builds an embeddings index for previously batched documents.
151
+ """
152
+
153
+ try:
154
+ application.get().index()
155
+ except ReadOnlyError as e:
156
+ raise HTTPException(status_code=403, detail=e.args[0]) from e
157
+
158
+
159
+ @router.get("/upsert")
160
+ def upsert():
161
+ """
162
+ Runs an embeddings upsert operation for previously batched documents.
163
+ """
164
+
165
+ try:
166
+ application.get().upsert()
167
+ except ReadOnlyError as e:
168
+ raise HTTPException(status_code=403, detail=e.args[0]) from e
169
+
170
+
171
+ @router.post("/delete")
172
+ def delete(ids: List = Body(...)):
173
+ """
174
+ Deletes from an embeddings index. Returns list of ids deleted.
175
+
176
+ Args:
177
+ ids: list of ids to delete
178
+
179
+ Returns:
180
+ ids deleted
181
+ """
182
+
183
+ try:
184
+ return application.get().delete(ids)
185
+ except ReadOnlyError as e:
186
+ raise HTTPException(status_code=403, detail=e.args[0]) from e
187
+
188
+
189
+ @router.post("/reindex")
190
+ def reindex(config: dict = Body(...), function: str = Body(default=None)):
191
+ """
192
+ Recreates this embeddings index using config. This method only works if document content storage is enabled.
193
+
194
+ Args:
195
+ config: new config
196
+ function: optional function to prepare content for indexing
197
+ """
198
+
199
+ try:
200
+ application.get().reindex(config, function)
201
+ except ReadOnlyError as e:
202
+ raise HTTPException(status_code=403, detail=e.args[0]) from e
203
+
204
+
205
+ @router.get("/count")
206
+ def count():
207
+ """
208
+ Total number of elements in this embeddings index.
209
+
210
+ Returns:
211
+ number of elements in embeddings index
212
+ """
213
+
214
+ return application.get().count()
215
+
216
+
217
+ @router.post("/explain")
218
+ def explain(query: str = Body(...), texts: List[str] = Body(default=None), limit: int = Body(default=None)):
219
+ """
220
+ Explains the importance of each input token in text for a query.
221
+
222
+ Args:
223
+ query: query text
224
+ texts: list of text
225
+
226
+ Returns:
227
+ list of dict where a higher scores represents higher importance relative to the query
228
+ """
229
+
230
+ return application.get().explain(query, texts, limit)
231
+
232
+
233
+ @router.post("/batchexplain")
234
+ def batchexplain(queries: List[str] = Body(...), texts: List[str] = Body(default=None), limit: int = Body(default=None)):
235
+ """
236
+ Explains the importance of each input token in text for a query.
237
+
238
+ Args:
239
+ query: query text
240
+ texts: list of text
241
+
242
+ Returns:
243
+ list of dict where a higher scores represents higher importance relative to the query
244
+ """
245
+
246
+ return application.get().batchexplain(queries, texts, limit)
247
+
248
+
249
+ @router.get("/transform")
250
+ def transform(text: str, category: Optional[str] = None, index: Optional[str] = None):
251
+ """
252
+ Transforms text into an embeddings array.
253
+
254
+ Args:
255
+ text: input text
256
+ category: category for instruction-based embeddings
257
+ index: index name, if applicable
258
+
259
+ Returns:
260
+ embeddings array
261
+ """
262
+
263
+ return application.get().transform(text, category, index)
264
+
265
+
266
+ @router.post("/batchtransform")
267
+ def batchtransform(texts: List[str] = Body(...), category: Optional[str] = None, index: Optional[str] = None):
268
+ """
269
+ Transforms list of text into embeddings arrays.
270
+
271
+ Args:
272
+ texts: list of text
273
+ category: category for instruction-based embeddings
274
+ index: index name, if applicable
275
+
276
+ Returns:
277
+ embeddings arrays
278
+ """
279
+
280
+ return application.get().batchtransform(texts, category, index)
@@ -0,0 +1,42 @@
1
+ """
2
+ Defines API paths for entity endpoints.
3
+ """
4
+
5
+ from typing import List
6
+
7
+ from fastapi import APIRouter, Body
8
+
9
+ from .. import application
10
+ from ..route import EncodingAPIRoute
11
+
12
+ router = APIRouter(route_class=EncodingAPIRoute)
13
+
14
+
15
+ @router.get("/entity")
16
+ def entity(text: str):
17
+ """
18
+ Applies a token classifier to text.
19
+
20
+ Args:
21
+ text: input text
22
+
23
+ Returns:
24
+ list of (entity, entity type, score) per text element
25
+ """
26
+
27
+ return application.get().pipeline("entity", (text,))
28
+
29
+
30
+ @router.post("/batchentity")
31
+ def batchentity(texts: List[str] = Body(...)):
32
+ """
33
+ Applies a token classifier to text.
34
+
35
+ Args:
36
+ texts: list of text
37
+
38
+ Returns:
39
+ list of (entity, entity type, score) per text element
40
+ """
41
+
42
+ return application.get().pipeline("entity", (texts,))
@@ -0,0 +1,28 @@
1
+ """
2
+ Defines API paths for extractor endpoints.
3
+ """
4
+
5
+ from typing import List, Optional
6
+
7
+ from fastapi import APIRouter, Body
8
+
9
+ from .. import application
10
+ from ..route import EncodingAPIRoute
11
+
12
+ router = APIRouter(route_class=EncodingAPIRoute)
13
+
14
+
15
+ @router.post("/extract")
16
+ def extract(queue: List[dict] = Body(...), texts: Optional[List[str]] = Body(default=None)):
17
+ """
18
+ Extracts answers to input questions.
19
+
20
+ Args:
21
+ queue: list of {name: value, query: value, question: value, snippet: value}
22
+ texts: optional list of text
23
+
24
+ Returns:
25
+ list of {name: value, answer: value}
26
+ """
27
+
28
+ return application.get().extract(queue, texts)
@@ -0,0 +1,47 @@
1
+ """
2
+ Defines API paths for labels endpoints.
3
+ """
4
+
5
+ from typing import List
6
+
7
+ from fastapi import APIRouter, Body
8
+
9
+ from .. import application
10
+ from ..route import EncodingAPIRoute
11
+
12
+ router = APIRouter(route_class=EncodingAPIRoute)
13
+
14
+
15
+ @router.post("/label")
16
+ def label(text: str = Body(...), labels: List[str] = Body(...)):
17
+ """
18
+ Applies a zero shot classifier to text using a list of labels. Returns a list of
19
+ {id: value, score: value} sorted by highest score, where id is the index in labels.
20
+
21
+ Args:
22
+ text: input text
23
+ labels: list of labels
24
+
25
+ Returns:
26
+ list of {id: value, score: value} per text element
27
+ """
28
+
29
+ return application.get().label(text, labels)
30
+
31
+
32
+ @router.post("/batchlabel")
33
+ def batchlabel(texts: List[str] = Body(...), labels: List[str] = Body(...)):
34
+ """
35
+ Applies a zero shot classifier to list of text using a list of labels. Returns a list of
36
+ {id: value, score: value} sorted by highest score, where id is the index in labels per
37
+ text element.
38
+
39
+ Args:
40
+ texts: list of text
41
+ labels: list of labels
42
+
43
+ Returns:
44
+ list of {id: value score: value} per text element
45
+ """
46
+
47
+ return application.get().label(texts, labels)
@@ -0,0 +1,61 @@
1
+ """
2
+ Defines API paths for llm endpoints.
3
+ """
4
+
5
+ from typing import List, Optional
6
+
7
+ from fastapi import APIRouter, Body
8
+ from fastapi.responses import StreamingResponse
9
+
10
+ from .. import application
11
+ from ..route import EncodingAPIRoute
12
+
13
+ router = APIRouter(route_class=EncodingAPIRoute)
14
+
15
+
16
+ @router.get("/llm")
17
+ def llm(text: str, maxlength: Optional[int] = None, stream: Optional[bool] = False):
18
+ """
19
+ Runs a LLM pipeline for the input text.
20
+
21
+ Args:
22
+ text: input text
23
+ maxlength: optional response max length
24
+ stream: streams response if True
25
+
26
+ Returns:
27
+ response text
28
+ """
29
+
30
+ # Build keyword arguments
31
+ kwargs = {key: value for key, value in [("stream", stream), ("maxlength", maxlength)] if value}
32
+
33
+ # Run pipeline
34
+ result = application.get().pipeline("llm", text, **kwargs)
35
+
36
+ # Handle both standard and streaming responses
37
+ return StreamingResponse(result) if stream else result
38
+
39
+
40
+ @router.post("/batchllm")
41
+ def batchllm(texts: List[str] = Body(...), maxlength: Optional[int] = Body(default=None), stream: Optional[bool] = Body(default=False)):
42
+ """
43
+ Runs a LLM pipeline for the input texts.
44
+
45
+ Args:
46
+ texts: input texts
47
+ maxlength: optional response max length
48
+ stream: streams response if True
49
+
50
+ Returns:
51
+ response texts
52
+ """
53
+
54
+ # Build keyword arguments
55
+ kwargs = {key: value for key, value in [("stream", stream), ("maxlength", maxlength)] if value}
56
+
57
+ # Run pipeline
58
+ result = application.get().pipeline("llm", texts, **kwargs)
59
+
60
+ # Handle both standard and streaming responses
61
+ return StreamingResponse(result) if stream else result
@@ -0,0 +1,42 @@
1
+ """
2
+ Defines API paths for objects endpoints.
3
+ """
4
+
5
+ from typing import List
6
+
7
+ from fastapi import APIRouter, Body
8
+
9
+ from .. import application
10
+ from ..route import EncodingAPIRoute
11
+
12
+ router = APIRouter(route_class=EncodingAPIRoute)
13
+
14
+
15
+ @router.get("/objects")
16
+ def objects(file: str):
17
+ """
18
+ Applies object detection/image classification models to images.
19
+
20
+ Args:
21
+ file: file to process
22
+
23
+ Returns:
24
+ list of (label, score) elements
25
+ """
26
+
27
+ return application.get().pipeline("objects", (file,))
28
+
29
+
30
+ @router.post("/batchobjects")
31
+ def batchobjects(files: List[str] = Body(...)):
32
+ """
33
+ Applies object detection/image classification models to images.
34
+
35
+ Args:
36
+ files: list of files to process
37
+
38
+ Returns:
39
+ list of (label, score) elements
40
+ """
41
+
42
+ return application.get().pipeline("objects", (files,))