mseep-txtai 9.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mseep_txtai-9.1.1.dist-info/METADATA +262 -0
- mseep_txtai-9.1.1.dist-info/RECORD +251 -0
- mseep_txtai-9.1.1.dist-info/WHEEL +5 -0
- mseep_txtai-9.1.1.dist-info/licenses/LICENSE +190 -0
- mseep_txtai-9.1.1.dist-info/top_level.txt +1 -0
- txtai/__init__.py +16 -0
- txtai/agent/__init__.py +12 -0
- txtai/agent/base.py +54 -0
- txtai/agent/factory.py +39 -0
- txtai/agent/model.py +107 -0
- txtai/agent/placeholder.py +16 -0
- txtai/agent/tool/__init__.py +7 -0
- txtai/agent/tool/embeddings.py +69 -0
- txtai/agent/tool/factory.py +130 -0
- txtai/agent/tool/function.py +49 -0
- txtai/ann/__init__.py +7 -0
- txtai/ann/base.py +153 -0
- txtai/ann/dense/__init__.py +11 -0
- txtai/ann/dense/annoy.py +72 -0
- txtai/ann/dense/factory.py +76 -0
- txtai/ann/dense/faiss.py +233 -0
- txtai/ann/dense/hnsw.py +104 -0
- txtai/ann/dense/numpy.py +164 -0
- txtai/ann/dense/pgvector.py +323 -0
- txtai/ann/dense/sqlite.py +303 -0
- txtai/ann/dense/torch.py +38 -0
- txtai/ann/sparse/__init__.py +7 -0
- txtai/ann/sparse/factory.py +61 -0
- txtai/ann/sparse/ivfsparse.py +377 -0
- txtai/ann/sparse/pgsparse.py +56 -0
- txtai/api/__init__.py +18 -0
- txtai/api/application.py +134 -0
- txtai/api/authorization.py +53 -0
- txtai/api/base.py +159 -0
- txtai/api/cluster.py +295 -0
- txtai/api/extension.py +19 -0
- txtai/api/factory.py +40 -0
- txtai/api/responses/__init__.py +7 -0
- txtai/api/responses/factory.py +30 -0
- txtai/api/responses/json.py +56 -0
- txtai/api/responses/messagepack.py +51 -0
- txtai/api/route.py +41 -0
- txtai/api/routers/__init__.py +25 -0
- txtai/api/routers/agent.py +38 -0
- txtai/api/routers/caption.py +42 -0
- txtai/api/routers/embeddings.py +280 -0
- txtai/api/routers/entity.py +42 -0
- txtai/api/routers/extractor.py +28 -0
- txtai/api/routers/labels.py +47 -0
- txtai/api/routers/llm.py +61 -0
- txtai/api/routers/objects.py +42 -0
- txtai/api/routers/openai.py +191 -0
- txtai/api/routers/rag.py +61 -0
- txtai/api/routers/reranker.py +46 -0
- txtai/api/routers/segmentation.py +42 -0
- txtai/api/routers/similarity.py +48 -0
- txtai/api/routers/summary.py +46 -0
- txtai/api/routers/tabular.py +42 -0
- txtai/api/routers/textractor.py +42 -0
- txtai/api/routers/texttospeech.py +33 -0
- txtai/api/routers/transcription.py +42 -0
- txtai/api/routers/translation.py +46 -0
- txtai/api/routers/upload.py +36 -0
- txtai/api/routers/workflow.py +28 -0
- txtai/app/__init__.py +5 -0
- txtai/app/base.py +821 -0
- txtai/archive/__init__.py +9 -0
- txtai/archive/base.py +104 -0
- txtai/archive/compress.py +51 -0
- txtai/archive/factory.py +25 -0
- txtai/archive/tar.py +49 -0
- txtai/archive/zip.py +35 -0
- txtai/cloud/__init__.py +8 -0
- txtai/cloud/base.py +106 -0
- txtai/cloud/factory.py +70 -0
- txtai/cloud/hub.py +101 -0
- txtai/cloud/storage.py +125 -0
- txtai/console/__init__.py +5 -0
- txtai/console/__main__.py +22 -0
- txtai/console/base.py +264 -0
- txtai/data/__init__.py +10 -0
- txtai/data/base.py +138 -0
- txtai/data/labels.py +42 -0
- txtai/data/questions.py +135 -0
- txtai/data/sequences.py +48 -0
- txtai/data/texts.py +68 -0
- txtai/data/tokens.py +28 -0
- txtai/database/__init__.py +14 -0
- txtai/database/base.py +342 -0
- txtai/database/client.py +227 -0
- txtai/database/duckdb.py +150 -0
- txtai/database/embedded.py +76 -0
- txtai/database/encoder/__init__.py +8 -0
- txtai/database/encoder/base.py +37 -0
- txtai/database/encoder/factory.py +56 -0
- txtai/database/encoder/image.py +43 -0
- txtai/database/encoder/serialize.py +28 -0
- txtai/database/factory.py +77 -0
- txtai/database/rdbms.py +569 -0
- txtai/database/schema/__init__.py +6 -0
- txtai/database/schema/orm.py +99 -0
- txtai/database/schema/statement.py +98 -0
- txtai/database/sql/__init__.py +8 -0
- txtai/database/sql/aggregate.py +178 -0
- txtai/database/sql/base.py +189 -0
- txtai/database/sql/expression.py +404 -0
- txtai/database/sql/token.py +342 -0
- txtai/database/sqlite.py +57 -0
- txtai/embeddings/__init__.py +7 -0
- txtai/embeddings/base.py +1107 -0
- txtai/embeddings/index/__init__.py +14 -0
- txtai/embeddings/index/action.py +15 -0
- txtai/embeddings/index/autoid.py +92 -0
- txtai/embeddings/index/configuration.py +71 -0
- txtai/embeddings/index/documents.py +86 -0
- txtai/embeddings/index/functions.py +155 -0
- txtai/embeddings/index/indexes.py +199 -0
- txtai/embeddings/index/indexids.py +60 -0
- txtai/embeddings/index/reducer.py +104 -0
- txtai/embeddings/index/stream.py +67 -0
- txtai/embeddings/index/transform.py +205 -0
- txtai/embeddings/search/__init__.py +11 -0
- txtai/embeddings/search/base.py +344 -0
- txtai/embeddings/search/errors.py +9 -0
- txtai/embeddings/search/explain.py +120 -0
- txtai/embeddings/search/ids.py +61 -0
- txtai/embeddings/search/query.py +69 -0
- txtai/embeddings/search/scan.py +196 -0
- txtai/embeddings/search/terms.py +46 -0
- txtai/graph/__init__.py +10 -0
- txtai/graph/base.py +769 -0
- txtai/graph/factory.py +61 -0
- txtai/graph/networkx.py +275 -0
- txtai/graph/query.py +181 -0
- txtai/graph/rdbms.py +113 -0
- txtai/graph/topics.py +166 -0
- txtai/models/__init__.py +9 -0
- txtai/models/models.py +268 -0
- txtai/models/onnx.py +133 -0
- txtai/models/pooling/__init__.py +9 -0
- txtai/models/pooling/base.py +141 -0
- txtai/models/pooling/cls.py +28 -0
- txtai/models/pooling/factory.py +144 -0
- txtai/models/pooling/late.py +173 -0
- txtai/models/pooling/mean.py +33 -0
- txtai/models/pooling/muvera.py +164 -0
- txtai/models/registry.py +37 -0
- txtai/models/tokendetection.py +122 -0
- txtai/pipeline/__init__.py +17 -0
- txtai/pipeline/audio/__init__.py +11 -0
- txtai/pipeline/audio/audiomixer.py +58 -0
- txtai/pipeline/audio/audiostream.py +94 -0
- txtai/pipeline/audio/microphone.py +244 -0
- txtai/pipeline/audio/signal.py +186 -0
- txtai/pipeline/audio/texttoaudio.py +60 -0
- txtai/pipeline/audio/texttospeech.py +553 -0
- txtai/pipeline/audio/transcription.py +212 -0
- txtai/pipeline/base.py +23 -0
- txtai/pipeline/data/__init__.py +10 -0
- txtai/pipeline/data/filetohtml.py +206 -0
- txtai/pipeline/data/htmltomd.py +414 -0
- txtai/pipeline/data/segmentation.py +178 -0
- txtai/pipeline/data/tabular.py +155 -0
- txtai/pipeline/data/textractor.py +139 -0
- txtai/pipeline/data/tokenizer.py +112 -0
- txtai/pipeline/factory.py +77 -0
- txtai/pipeline/hfmodel.py +111 -0
- txtai/pipeline/hfpipeline.py +96 -0
- txtai/pipeline/image/__init__.py +7 -0
- txtai/pipeline/image/caption.py +55 -0
- txtai/pipeline/image/imagehash.py +90 -0
- txtai/pipeline/image/objects.py +80 -0
- txtai/pipeline/llm/__init__.py +11 -0
- txtai/pipeline/llm/factory.py +86 -0
- txtai/pipeline/llm/generation.py +173 -0
- txtai/pipeline/llm/huggingface.py +218 -0
- txtai/pipeline/llm/litellm.py +90 -0
- txtai/pipeline/llm/llama.py +152 -0
- txtai/pipeline/llm/llm.py +75 -0
- txtai/pipeline/llm/rag.py +477 -0
- txtai/pipeline/nop.py +14 -0
- txtai/pipeline/tensors.py +52 -0
- txtai/pipeline/text/__init__.py +13 -0
- txtai/pipeline/text/crossencoder.py +70 -0
- txtai/pipeline/text/entity.py +140 -0
- txtai/pipeline/text/labels.py +137 -0
- txtai/pipeline/text/lateencoder.py +103 -0
- txtai/pipeline/text/questions.py +48 -0
- txtai/pipeline/text/reranker.py +57 -0
- txtai/pipeline/text/similarity.py +83 -0
- txtai/pipeline/text/summary.py +98 -0
- txtai/pipeline/text/translation.py +298 -0
- txtai/pipeline/train/__init__.py +7 -0
- txtai/pipeline/train/hfonnx.py +196 -0
- txtai/pipeline/train/hftrainer.py +398 -0
- txtai/pipeline/train/mlonnx.py +63 -0
- txtai/scoring/__init__.py +12 -0
- txtai/scoring/base.py +188 -0
- txtai/scoring/bm25.py +29 -0
- txtai/scoring/factory.py +95 -0
- txtai/scoring/pgtext.py +181 -0
- txtai/scoring/sif.py +32 -0
- txtai/scoring/sparse.py +218 -0
- txtai/scoring/terms.py +499 -0
- txtai/scoring/tfidf.py +358 -0
- txtai/serialize/__init__.py +10 -0
- txtai/serialize/base.py +85 -0
- txtai/serialize/errors.py +9 -0
- txtai/serialize/factory.py +29 -0
- txtai/serialize/messagepack.py +42 -0
- txtai/serialize/pickle.py +98 -0
- txtai/serialize/serializer.py +46 -0
- txtai/util/__init__.py +7 -0
- txtai/util/resolver.py +32 -0
- txtai/util/sparsearray.py +62 -0
- txtai/util/template.py +16 -0
- txtai/vectors/__init__.py +8 -0
- txtai/vectors/base.py +476 -0
- txtai/vectors/dense/__init__.py +12 -0
- txtai/vectors/dense/external.py +55 -0
- txtai/vectors/dense/factory.py +121 -0
- txtai/vectors/dense/huggingface.py +44 -0
- txtai/vectors/dense/litellm.py +86 -0
- txtai/vectors/dense/llama.py +84 -0
- txtai/vectors/dense/m2v.py +67 -0
- txtai/vectors/dense/sbert.py +92 -0
- txtai/vectors/dense/words.py +211 -0
- txtai/vectors/recovery.py +57 -0
- txtai/vectors/sparse/__init__.py +7 -0
- txtai/vectors/sparse/base.py +90 -0
- txtai/vectors/sparse/factory.py +55 -0
- txtai/vectors/sparse/sbert.py +34 -0
- txtai/version.py +6 -0
- txtai/workflow/__init__.py +8 -0
- txtai/workflow/base.py +184 -0
- txtai/workflow/execute.py +99 -0
- txtai/workflow/factory.py +42 -0
- txtai/workflow/task/__init__.py +18 -0
- txtai/workflow/task/base.py +490 -0
- txtai/workflow/task/console.py +24 -0
- txtai/workflow/task/export.py +64 -0
- txtai/workflow/task/factory.py +89 -0
- txtai/workflow/task/file.py +28 -0
- txtai/workflow/task/image.py +36 -0
- txtai/workflow/task/retrieve.py +61 -0
- txtai/workflow/task/service.py +102 -0
- txtai/workflow/task/storage.py +110 -0
- txtai/workflow/task/stream.py +33 -0
- txtai/workflow/task/template.py +116 -0
- txtai/workflow/task/url.py +20 -0
- txtai/workflow/task/workflow.py +14 -0
@@ -0,0 +1,25 @@
|
|
1
|
+
"""
|
2
|
+
Router imports
|
3
|
+
"""
|
4
|
+
|
5
|
+
from . import agent
|
6
|
+
from . import caption
|
7
|
+
from . import embeddings
|
8
|
+
from . import entity
|
9
|
+
from . import extractor
|
10
|
+
from . import labels
|
11
|
+
from . import llm
|
12
|
+
from . import objects
|
13
|
+
from . import openai
|
14
|
+
from . import rag
|
15
|
+
from . import reranker
|
16
|
+
from . import segmentation
|
17
|
+
from . import similarity
|
18
|
+
from . import summary
|
19
|
+
from . import tabular
|
20
|
+
from . import textractor
|
21
|
+
from . import texttospeech
|
22
|
+
from . import transcription
|
23
|
+
from . import translation
|
24
|
+
from . import workflow
|
25
|
+
from . import upload
|
@@ -0,0 +1,38 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for agent endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import Optional
|
6
|
+
|
7
|
+
from fastapi import APIRouter, Body
|
8
|
+
from fastapi.responses import StreamingResponse
|
9
|
+
|
10
|
+
from .. import application
|
11
|
+
from ..route import EncodingAPIRoute
|
12
|
+
|
13
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
14
|
+
|
15
|
+
|
16
|
+
@router.post("/agent")
|
17
|
+
def agent(name: str = Body(...), text: str = Body(...), maxlength: Optional[int] = Body(default=None), stream: Optional[bool] = Body(default=None)):
|
18
|
+
"""
|
19
|
+
Executes a named agent for input text.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
name: agent name
|
23
|
+
text: instructions to run
|
24
|
+
maxlength: maximum sequence length
|
25
|
+
stream: stream response if True, defaults to False
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
response text
|
29
|
+
"""
|
30
|
+
|
31
|
+
# Build keyword arguments
|
32
|
+
kwargs = {key: value for key, value in [("stream", stream), ("maxlength", maxlength)] if value}
|
33
|
+
|
34
|
+
# Run agent
|
35
|
+
result = application.get().agent(name, text, **kwargs)
|
36
|
+
|
37
|
+
# Handle both standard and streaming responses
|
38
|
+
return StreamingResponse(result) if stream else result
|
@@ -0,0 +1,42 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for caption endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
from fastapi import APIRouter, Body
|
8
|
+
|
9
|
+
from .. import application
|
10
|
+
from ..route import EncodingAPIRoute
|
11
|
+
|
12
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
13
|
+
|
14
|
+
|
15
|
+
@router.get("/caption")
|
16
|
+
def caption(file: str):
|
17
|
+
"""
|
18
|
+
Builds captions for images.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
file: file to process
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
list of captions
|
25
|
+
"""
|
26
|
+
|
27
|
+
return application.get().pipeline("caption", (file,))
|
28
|
+
|
29
|
+
|
30
|
+
@router.post("/batchcaption")
|
31
|
+
def batchcaption(files: List[str] = Body(...)):
|
32
|
+
"""
|
33
|
+
Builds captions for images.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
files: list of files to process
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
list of captions
|
40
|
+
"""
|
41
|
+
|
42
|
+
return application.get().pipeline("caption", (files,))
|
@@ -0,0 +1,280 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for embeddings endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from io import BytesIO
|
6
|
+
from typing import List, Optional
|
7
|
+
|
8
|
+
import PIL
|
9
|
+
|
10
|
+
from fastapi import APIRouter, Body, File, Form, HTTPException, Request, UploadFile
|
11
|
+
from fastapi.encoders import jsonable_encoder
|
12
|
+
|
13
|
+
from .. import application
|
14
|
+
from ..responses import ResponseFactory
|
15
|
+
from ..route import EncodingAPIRoute
|
16
|
+
|
17
|
+
from ...app import ReadOnlyError
|
18
|
+
from ...graph import Graph
|
19
|
+
|
20
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
21
|
+
|
22
|
+
|
23
|
+
@router.get("/search")
|
24
|
+
def search(query: str, request: Request):
|
25
|
+
"""
|
26
|
+
Finds documents most similar to the input query. This method will run either an index search
|
27
|
+
or an index + database search depending on if a database is available.
|
28
|
+
|
29
|
+
Args:
|
30
|
+
query: input query
|
31
|
+
request: FastAPI request
|
32
|
+
|
33
|
+
Returns:
|
34
|
+
list of {id: value, score: value} for index search, list of dict for an index + database search
|
35
|
+
"""
|
36
|
+
|
37
|
+
# Execute search
|
38
|
+
results = application.get().search(query, request=request)
|
39
|
+
|
40
|
+
# Encode using standard FastAPI encoder but skip certain classes
|
41
|
+
results = jsonable_encoder(
|
42
|
+
results, custom_encoder={bytes: lambda x: x, BytesIO: lambda x: x, PIL.Image.Image: lambda x: x, Graph: lambda x: x.savedict()}
|
43
|
+
)
|
44
|
+
|
45
|
+
# Return raw response to prevent duplicate encoding
|
46
|
+
response = ResponseFactory.create(request)
|
47
|
+
return response(results)
|
48
|
+
|
49
|
+
|
50
|
+
# pylint: disable=W0621
|
51
|
+
@router.post("/batchsearch")
|
52
|
+
def batchsearch(
|
53
|
+
request: Request,
|
54
|
+
queries: List[str] = Body(...),
|
55
|
+
limit: int = Body(default=None),
|
56
|
+
weights: float = Body(default=None),
|
57
|
+
index: str = Body(default=None),
|
58
|
+
parameters: List[dict] = Body(default=None),
|
59
|
+
graph: bool = Body(default=False),
|
60
|
+
):
|
61
|
+
"""
|
62
|
+
Finds documents most similar to the input queries. This method will run either an index search
|
63
|
+
or an index + database search depending on if a database is available.
|
64
|
+
|
65
|
+
Args:
|
66
|
+
queries: input queries
|
67
|
+
limit: maximum results
|
68
|
+
weights: hybrid score weights, if applicable
|
69
|
+
index: index name, if applicable
|
70
|
+
parameters: list of dicts of named parameters to bind to placeholders
|
71
|
+
graph: return graph results if True
|
72
|
+
|
73
|
+
Returns:
|
74
|
+
list of {id: value, score: value} per query for index search, list of dict per query for an index + database search
|
75
|
+
"""
|
76
|
+
|
77
|
+
# Execute search
|
78
|
+
results = application.get().batchsearch(queries, limit, weights, index, parameters, graph)
|
79
|
+
|
80
|
+
# Encode using standard FastAPI encoder but skip certain classes
|
81
|
+
results = jsonable_encoder(
|
82
|
+
results, custom_encoder={bytes: lambda x: x, BytesIO: lambda x: x, PIL.Image.Image: lambda x: x, Graph: lambda x: x.savedict()}
|
83
|
+
)
|
84
|
+
|
85
|
+
# Return raw response to prevent duplicate encoding
|
86
|
+
response = ResponseFactory.create(request)
|
87
|
+
return response(results)
|
88
|
+
|
89
|
+
|
90
|
+
@router.post("/add")
|
91
|
+
def add(documents: List[dict] = Body(...)):
|
92
|
+
"""
|
93
|
+
Adds a batch of documents for indexing.
|
94
|
+
|
95
|
+
Args:
|
96
|
+
documents: list of {id: value, text: value, tags: value}
|
97
|
+
"""
|
98
|
+
|
99
|
+
try:
|
100
|
+
application.get().add(documents)
|
101
|
+
except ReadOnlyError as e:
|
102
|
+
raise HTTPException(status_code=403, detail=e.args[0]) from e
|
103
|
+
|
104
|
+
|
105
|
+
@router.post("/addobject")
|
106
|
+
def addobject(data: List[bytes] = File(), uid: List[str] = Form(default=None), field: str = Form(default=None)):
|
107
|
+
"""
|
108
|
+
Adds a batch of binary documents for indexing.
|
109
|
+
|
110
|
+
Args:
|
111
|
+
data: list of binary objects
|
112
|
+
uid: list of corresponding ids
|
113
|
+
field: optional object field name
|
114
|
+
"""
|
115
|
+
|
116
|
+
if uid and len(data) != len(uid):
|
117
|
+
raise HTTPException(status_code=422, detail="Length of data and document lists must match")
|
118
|
+
|
119
|
+
try:
|
120
|
+
# Add objects
|
121
|
+
application.get().addobject(data, uid, field)
|
122
|
+
except ReadOnlyError as e:
|
123
|
+
raise HTTPException(status_code=403, detail=e.args[0]) from e
|
124
|
+
|
125
|
+
|
126
|
+
@router.post("/addimage")
|
127
|
+
def addimage(data: List[UploadFile] = File(), uid: List[str] = Form(), field: str = Form(default=None)):
|
128
|
+
"""
|
129
|
+
Adds a batch of images for indexing.
|
130
|
+
|
131
|
+
Args:
|
132
|
+
data: list of images
|
133
|
+
uid: list of corresponding ids
|
134
|
+
field: optional object field name
|
135
|
+
"""
|
136
|
+
|
137
|
+
if uid and len(data) != len(uid):
|
138
|
+
raise HTTPException(status_code=422, detail="Length of data and uid lists must match")
|
139
|
+
|
140
|
+
try:
|
141
|
+
# Add images
|
142
|
+
application.get().addobject([PIL.Image.open(content.file) for content in data], uid, field)
|
143
|
+
except ReadOnlyError as e:
|
144
|
+
raise HTTPException(status_code=403, detail=e.args[0]) from e
|
145
|
+
|
146
|
+
|
147
|
+
@router.get("/index")
|
148
|
+
def index():
|
149
|
+
"""
|
150
|
+
Builds an embeddings index for previously batched documents.
|
151
|
+
"""
|
152
|
+
|
153
|
+
try:
|
154
|
+
application.get().index()
|
155
|
+
except ReadOnlyError as e:
|
156
|
+
raise HTTPException(status_code=403, detail=e.args[0]) from e
|
157
|
+
|
158
|
+
|
159
|
+
@router.get("/upsert")
|
160
|
+
def upsert():
|
161
|
+
"""
|
162
|
+
Runs an embeddings upsert operation for previously batched documents.
|
163
|
+
"""
|
164
|
+
|
165
|
+
try:
|
166
|
+
application.get().upsert()
|
167
|
+
except ReadOnlyError as e:
|
168
|
+
raise HTTPException(status_code=403, detail=e.args[0]) from e
|
169
|
+
|
170
|
+
|
171
|
+
@router.post("/delete")
|
172
|
+
def delete(ids: List = Body(...)):
|
173
|
+
"""
|
174
|
+
Deletes from an embeddings index. Returns list of ids deleted.
|
175
|
+
|
176
|
+
Args:
|
177
|
+
ids: list of ids to delete
|
178
|
+
|
179
|
+
Returns:
|
180
|
+
ids deleted
|
181
|
+
"""
|
182
|
+
|
183
|
+
try:
|
184
|
+
return application.get().delete(ids)
|
185
|
+
except ReadOnlyError as e:
|
186
|
+
raise HTTPException(status_code=403, detail=e.args[0]) from e
|
187
|
+
|
188
|
+
|
189
|
+
@router.post("/reindex")
|
190
|
+
def reindex(config: dict = Body(...), function: str = Body(default=None)):
|
191
|
+
"""
|
192
|
+
Recreates this embeddings index using config. This method only works if document content storage is enabled.
|
193
|
+
|
194
|
+
Args:
|
195
|
+
config: new config
|
196
|
+
function: optional function to prepare content for indexing
|
197
|
+
"""
|
198
|
+
|
199
|
+
try:
|
200
|
+
application.get().reindex(config, function)
|
201
|
+
except ReadOnlyError as e:
|
202
|
+
raise HTTPException(status_code=403, detail=e.args[0]) from e
|
203
|
+
|
204
|
+
|
205
|
+
@router.get("/count")
|
206
|
+
def count():
|
207
|
+
"""
|
208
|
+
Total number of elements in this embeddings index.
|
209
|
+
|
210
|
+
Returns:
|
211
|
+
number of elements in embeddings index
|
212
|
+
"""
|
213
|
+
|
214
|
+
return application.get().count()
|
215
|
+
|
216
|
+
|
217
|
+
@router.post("/explain")
|
218
|
+
def explain(query: str = Body(...), texts: List[str] = Body(default=None), limit: int = Body(default=None)):
|
219
|
+
"""
|
220
|
+
Explains the importance of each input token in text for a query.
|
221
|
+
|
222
|
+
Args:
|
223
|
+
query: query text
|
224
|
+
texts: list of text
|
225
|
+
|
226
|
+
Returns:
|
227
|
+
list of dict where a higher scores represents higher importance relative to the query
|
228
|
+
"""
|
229
|
+
|
230
|
+
return application.get().explain(query, texts, limit)
|
231
|
+
|
232
|
+
|
233
|
+
@router.post("/batchexplain")
|
234
|
+
def batchexplain(queries: List[str] = Body(...), texts: List[str] = Body(default=None), limit: int = Body(default=None)):
|
235
|
+
"""
|
236
|
+
Explains the importance of each input token in text for a query.
|
237
|
+
|
238
|
+
Args:
|
239
|
+
query: query text
|
240
|
+
texts: list of text
|
241
|
+
|
242
|
+
Returns:
|
243
|
+
list of dict where a higher scores represents higher importance relative to the query
|
244
|
+
"""
|
245
|
+
|
246
|
+
return application.get().batchexplain(queries, texts, limit)
|
247
|
+
|
248
|
+
|
249
|
+
@router.get("/transform")
|
250
|
+
def transform(text: str, category: Optional[str] = None, index: Optional[str] = None):
|
251
|
+
"""
|
252
|
+
Transforms text into an embeddings array.
|
253
|
+
|
254
|
+
Args:
|
255
|
+
text: input text
|
256
|
+
category: category for instruction-based embeddings
|
257
|
+
index: index name, if applicable
|
258
|
+
|
259
|
+
Returns:
|
260
|
+
embeddings array
|
261
|
+
"""
|
262
|
+
|
263
|
+
return application.get().transform(text, category, index)
|
264
|
+
|
265
|
+
|
266
|
+
@router.post("/batchtransform")
|
267
|
+
def batchtransform(texts: List[str] = Body(...), category: Optional[str] = None, index: Optional[str] = None):
|
268
|
+
"""
|
269
|
+
Transforms list of text into embeddings arrays.
|
270
|
+
|
271
|
+
Args:
|
272
|
+
texts: list of text
|
273
|
+
category: category for instruction-based embeddings
|
274
|
+
index: index name, if applicable
|
275
|
+
|
276
|
+
Returns:
|
277
|
+
embeddings arrays
|
278
|
+
"""
|
279
|
+
|
280
|
+
return application.get().batchtransform(texts, category, index)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for entity endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
from fastapi import APIRouter, Body
|
8
|
+
|
9
|
+
from .. import application
|
10
|
+
from ..route import EncodingAPIRoute
|
11
|
+
|
12
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
13
|
+
|
14
|
+
|
15
|
+
@router.get("/entity")
|
16
|
+
def entity(text: str):
|
17
|
+
"""
|
18
|
+
Applies a token classifier to text.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
text: input text
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
list of (entity, entity type, score) per text element
|
25
|
+
"""
|
26
|
+
|
27
|
+
return application.get().pipeline("entity", (text,))
|
28
|
+
|
29
|
+
|
30
|
+
@router.post("/batchentity")
|
31
|
+
def batchentity(texts: List[str] = Body(...)):
|
32
|
+
"""
|
33
|
+
Applies a token classifier to text.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
texts: list of text
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
list of (entity, entity type, score) per text element
|
40
|
+
"""
|
41
|
+
|
42
|
+
return application.get().pipeline("entity", (texts,))
|
@@ -0,0 +1,28 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for extractor endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import List, Optional
|
6
|
+
|
7
|
+
from fastapi import APIRouter, Body
|
8
|
+
|
9
|
+
from .. import application
|
10
|
+
from ..route import EncodingAPIRoute
|
11
|
+
|
12
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
13
|
+
|
14
|
+
|
15
|
+
@router.post("/extract")
|
16
|
+
def extract(queue: List[dict] = Body(...), texts: Optional[List[str]] = Body(default=None)):
|
17
|
+
"""
|
18
|
+
Extracts answers to input questions.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
queue: list of {name: value, query: value, question: value, snippet: value}
|
22
|
+
texts: optional list of text
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
list of {name: value, answer: value}
|
26
|
+
"""
|
27
|
+
|
28
|
+
return application.get().extract(queue, texts)
|
@@ -0,0 +1,47 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for labels endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
from fastapi import APIRouter, Body
|
8
|
+
|
9
|
+
from .. import application
|
10
|
+
from ..route import EncodingAPIRoute
|
11
|
+
|
12
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
13
|
+
|
14
|
+
|
15
|
+
@router.post("/label")
|
16
|
+
def label(text: str = Body(...), labels: List[str] = Body(...)):
|
17
|
+
"""
|
18
|
+
Applies a zero shot classifier to text using a list of labels. Returns a list of
|
19
|
+
{id: value, score: value} sorted by highest score, where id is the index in labels.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
text: input text
|
23
|
+
labels: list of labels
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
list of {id: value, score: value} per text element
|
27
|
+
"""
|
28
|
+
|
29
|
+
return application.get().label(text, labels)
|
30
|
+
|
31
|
+
|
32
|
+
@router.post("/batchlabel")
|
33
|
+
def batchlabel(texts: List[str] = Body(...), labels: List[str] = Body(...)):
|
34
|
+
"""
|
35
|
+
Applies a zero shot classifier to list of text using a list of labels. Returns a list of
|
36
|
+
{id: value, score: value} sorted by highest score, where id is the index in labels per
|
37
|
+
text element.
|
38
|
+
|
39
|
+
Args:
|
40
|
+
texts: list of text
|
41
|
+
labels: list of labels
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
list of {id: value score: value} per text element
|
45
|
+
"""
|
46
|
+
|
47
|
+
return application.get().label(texts, labels)
|
txtai/api/routers/llm.py
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for llm endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import List, Optional
|
6
|
+
|
7
|
+
from fastapi import APIRouter, Body
|
8
|
+
from fastapi.responses import StreamingResponse
|
9
|
+
|
10
|
+
from .. import application
|
11
|
+
from ..route import EncodingAPIRoute
|
12
|
+
|
13
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
14
|
+
|
15
|
+
|
16
|
+
@router.get("/llm")
|
17
|
+
def llm(text: str, maxlength: Optional[int] = None, stream: Optional[bool] = False):
|
18
|
+
"""
|
19
|
+
Runs a LLM pipeline for the input text.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
text: input text
|
23
|
+
maxlength: optional response max length
|
24
|
+
stream: streams response if True
|
25
|
+
|
26
|
+
Returns:
|
27
|
+
response text
|
28
|
+
"""
|
29
|
+
|
30
|
+
# Build keyword arguments
|
31
|
+
kwargs = {key: value for key, value in [("stream", stream), ("maxlength", maxlength)] if value}
|
32
|
+
|
33
|
+
# Run pipeline
|
34
|
+
result = application.get().pipeline("llm", text, **kwargs)
|
35
|
+
|
36
|
+
# Handle both standard and streaming responses
|
37
|
+
return StreamingResponse(result) if stream else result
|
38
|
+
|
39
|
+
|
40
|
+
@router.post("/batchllm")
|
41
|
+
def batchllm(texts: List[str] = Body(...), maxlength: Optional[int] = Body(default=None), stream: Optional[bool] = Body(default=False)):
|
42
|
+
"""
|
43
|
+
Runs a LLM pipeline for the input texts.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
texts: input texts
|
47
|
+
maxlength: optional response max length
|
48
|
+
stream: streams response if True
|
49
|
+
|
50
|
+
Returns:
|
51
|
+
response texts
|
52
|
+
"""
|
53
|
+
|
54
|
+
# Build keyword arguments
|
55
|
+
kwargs = {key: value for key, value in [("stream", stream), ("maxlength", maxlength)] if value}
|
56
|
+
|
57
|
+
# Run pipeline
|
58
|
+
result = application.get().pipeline("llm", texts, **kwargs)
|
59
|
+
|
60
|
+
# Handle both standard and streaming responses
|
61
|
+
return StreamingResponse(result) if stream else result
|
@@ -0,0 +1,42 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for objects endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
from fastapi import APIRouter, Body
|
8
|
+
|
9
|
+
from .. import application
|
10
|
+
from ..route import EncodingAPIRoute
|
11
|
+
|
12
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
13
|
+
|
14
|
+
|
15
|
+
@router.get("/objects")
|
16
|
+
def objects(file: str):
|
17
|
+
"""
|
18
|
+
Applies object detection/image classification models to images.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
file: file to process
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
list of (label, score) elements
|
25
|
+
"""
|
26
|
+
|
27
|
+
return application.get().pipeline("objects", (file,))
|
28
|
+
|
29
|
+
|
30
|
+
@router.post("/batchobjects")
|
31
|
+
def batchobjects(files: List[str] = Body(...)):
|
32
|
+
"""
|
33
|
+
Applies object detection/image classification models to images.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
files: list of files to process
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
list of (label, score) elements
|
40
|
+
"""
|
41
|
+
|
42
|
+
return application.get().pipeline("objects", (files,))
|