mseep-txtai 9.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mseep_txtai-9.1.1.dist-info/METADATA +262 -0
- mseep_txtai-9.1.1.dist-info/RECORD +251 -0
- mseep_txtai-9.1.1.dist-info/WHEEL +5 -0
- mseep_txtai-9.1.1.dist-info/licenses/LICENSE +190 -0
- mseep_txtai-9.1.1.dist-info/top_level.txt +1 -0
- txtai/__init__.py +16 -0
- txtai/agent/__init__.py +12 -0
- txtai/agent/base.py +54 -0
- txtai/agent/factory.py +39 -0
- txtai/agent/model.py +107 -0
- txtai/agent/placeholder.py +16 -0
- txtai/agent/tool/__init__.py +7 -0
- txtai/agent/tool/embeddings.py +69 -0
- txtai/agent/tool/factory.py +130 -0
- txtai/agent/tool/function.py +49 -0
- txtai/ann/__init__.py +7 -0
- txtai/ann/base.py +153 -0
- txtai/ann/dense/__init__.py +11 -0
- txtai/ann/dense/annoy.py +72 -0
- txtai/ann/dense/factory.py +76 -0
- txtai/ann/dense/faiss.py +233 -0
- txtai/ann/dense/hnsw.py +104 -0
- txtai/ann/dense/numpy.py +164 -0
- txtai/ann/dense/pgvector.py +323 -0
- txtai/ann/dense/sqlite.py +303 -0
- txtai/ann/dense/torch.py +38 -0
- txtai/ann/sparse/__init__.py +7 -0
- txtai/ann/sparse/factory.py +61 -0
- txtai/ann/sparse/ivfsparse.py +377 -0
- txtai/ann/sparse/pgsparse.py +56 -0
- txtai/api/__init__.py +18 -0
- txtai/api/application.py +134 -0
- txtai/api/authorization.py +53 -0
- txtai/api/base.py +159 -0
- txtai/api/cluster.py +295 -0
- txtai/api/extension.py +19 -0
- txtai/api/factory.py +40 -0
- txtai/api/responses/__init__.py +7 -0
- txtai/api/responses/factory.py +30 -0
- txtai/api/responses/json.py +56 -0
- txtai/api/responses/messagepack.py +51 -0
- txtai/api/route.py +41 -0
- txtai/api/routers/__init__.py +25 -0
- txtai/api/routers/agent.py +38 -0
- txtai/api/routers/caption.py +42 -0
- txtai/api/routers/embeddings.py +280 -0
- txtai/api/routers/entity.py +42 -0
- txtai/api/routers/extractor.py +28 -0
- txtai/api/routers/labels.py +47 -0
- txtai/api/routers/llm.py +61 -0
- txtai/api/routers/objects.py +42 -0
- txtai/api/routers/openai.py +191 -0
- txtai/api/routers/rag.py +61 -0
- txtai/api/routers/reranker.py +46 -0
- txtai/api/routers/segmentation.py +42 -0
- txtai/api/routers/similarity.py +48 -0
- txtai/api/routers/summary.py +46 -0
- txtai/api/routers/tabular.py +42 -0
- txtai/api/routers/textractor.py +42 -0
- txtai/api/routers/texttospeech.py +33 -0
- txtai/api/routers/transcription.py +42 -0
- txtai/api/routers/translation.py +46 -0
- txtai/api/routers/upload.py +36 -0
- txtai/api/routers/workflow.py +28 -0
- txtai/app/__init__.py +5 -0
- txtai/app/base.py +821 -0
- txtai/archive/__init__.py +9 -0
- txtai/archive/base.py +104 -0
- txtai/archive/compress.py +51 -0
- txtai/archive/factory.py +25 -0
- txtai/archive/tar.py +49 -0
- txtai/archive/zip.py +35 -0
- txtai/cloud/__init__.py +8 -0
- txtai/cloud/base.py +106 -0
- txtai/cloud/factory.py +70 -0
- txtai/cloud/hub.py +101 -0
- txtai/cloud/storage.py +125 -0
- txtai/console/__init__.py +5 -0
- txtai/console/__main__.py +22 -0
- txtai/console/base.py +264 -0
- txtai/data/__init__.py +10 -0
- txtai/data/base.py +138 -0
- txtai/data/labels.py +42 -0
- txtai/data/questions.py +135 -0
- txtai/data/sequences.py +48 -0
- txtai/data/texts.py +68 -0
- txtai/data/tokens.py +28 -0
- txtai/database/__init__.py +14 -0
- txtai/database/base.py +342 -0
- txtai/database/client.py +227 -0
- txtai/database/duckdb.py +150 -0
- txtai/database/embedded.py +76 -0
- txtai/database/encoder/__init__.py +8 -0
- txtai/database/encoder/base.py +37 -0
- txtai/database/encoder/factory.py +56 -0
- txtai/database/encoder/image.py +43 -0
- txtai/database/encoder/serialize.py +28 -0
- txtai/database/factory.py +77 -0
- txtai/database/rdbms.py +569 -0
- txtai/database/schema/__init__.py +6 -0
- txtai/database/schema/orm.py +99 -0
- txtai/database/schema/statement.py +98 -0
- txtai/database/sql/__init__.py +8 -0
- txtai/database/sql/aggregate.py +178 -0
- txtai/database/sql/base.py +189 -0
- txtai/database/sql/expression.py +404 -0
- txtai/database/sql/token.py +342 -0
- txtai/database/sqlite.py +57 -0
- txtai/embeddings/__init__.py +7 -0
- txtai/embeddings/base.py +1107 -0
- txtai/embeddings/index/__init__.py +14 -0
- txtai/embeddings/index/action.py +15 -0
- txtai/embeddings/index/autoid.py +92 -0
- txtai/embeddings/index/configuration.py +71 -0
- txtai/embeddings/index/documents.py +86 -0
- txtai/embeddings/index/functions.py +155 -0
- txtai/embeddings/index/indexes.py +199 -0
- txtai/embeddings/index/indexids.py +60 -0
- txtai/embeddings/index/reducer.py +104 -0
- txtai/embeddings/index/stream.py +67 -0
- txtai/embeddings/index/transform.py +205 -0
- txtai/embeddings/search/__init__.py +11 -0
- txtai/embeddings/search/base.py +344 -0
- txtai/embeddings/search/errors.py +9 -0
- txtai/embeddings/search/explain.py +120 -0
- txtai/embeddings/search/ids.py +61 -0
- txtai/embeddings/search/query.py +69 -0
- txtai/embeddings/search/scan.py +196 -0
- txtai/embeddings/search/terms.py +46 -0
- txtai/graph/__init__.py +10 -0
- txtai/graph/base.py +769 -0
- txtai/graph/factory.py +61 -0
- txtai/graph/networkx.py +275 -0
- txtai/graph/query.py +181 -0
- txtai/graph/rdbms.py +113 -0
- txtai/graph/topics.py +166 -0
- txtai/models/__init__.py +9 -0
- txtai/models/models.py +268 -0
- txtai/models/onnx.py +133 -0
- txtai/models/pooling/__init__.py +9 -0
- txtai/models/pooling/base.py +141 -0
- txtai/models/pooling/cls.py +28 -0
- txtai/models/pooling/factory.py +144 -0
- txtai/models/pooling/late.py +173 -0
- txtai/models/pooling/mean.py +33 -0
- txtai/models/pooling/muvera.py +164 -0
- txtai/models/registry.py +37 -0
- txtai/models/tokendetection.py +122 -0
- txtai/pipeline/__init__.py +17 -0
- txtai/pipeline/audio/__init__.py +11 -0
- txtai/pipeline/audio/audiomixer.py +58 -0
- txtai/pipeline/audio/audiostream.py +94 -0
- txtai/pipeline/audio/microphone.py +244 -0
- txtai/pipeline/audio/signal.py +186 -0
- txtai/pipeline/audio/texttoaudio.py +60 -0
- txtai/pipeline/audio/texttospeech.py +553 -0
- txtai/pipeline/audio/transcription.py +212 -0
- txtai/pipeline/base.py +23 -0
- txtai/pipeline/data/__init__.py +10 -0
- txtai/pipeline/data/filetohtml.py +206 -0
- txtai/pipeline/data/htmltomd.py +414 -0
- txtai/pipeline/data/segmentation.py +178 -0
- txtai/pipeline/data/tabular.py +155 -0
- txtai/pipeline/data/textractor.py +139 -0
- txtai/pipeline/data/tokenizer.py +112 -0
- txtai/pipeline/factory.py +77 -0
- txtai/pipeline/hfmodel.py +111 -0
- txtai/pipeline/hfpipeline.py +96 -0
- txtai/pipeline/image/__init__.py +7 -0
- txtai/pipeline/image/caption.py +55 -0
- txtai/pipeline/image/imagehash.py +90 -0
- txtai/pipeline/image/objects.py +80 -0
- txtai/pipeline/llm/__init__.py +11 -0
- txtai/pipeline/llm/factory.py +86 -0
- txtai/pipeline/llm/generation.py +173 -0
- txtai/pipeline/llm/huggingface.py +218 -0
- txtai/pipeline/llm/litellm.py +90 -0
- txtai/pipeline/llm/llama.py +152 -0
- txtai/pipeline/llm/llm.py +75 -0
- txtai/pipeline/llm/rag.py +477 -0
- txtai/pipeline/nop.py +14 -0
- txtai/pipeline/tensors.py +52 -0
- txtai/pipeline/text/__init__.py +13 -0
- txtai/pipeline/text/crossencoder.py +70 -0
- txtai/pipeline/text/entity.py +140 -0
- txtai/pipeline/text/labels.py +137 -0
- txtai/pipeline/text/lateencoder.py +103 -0
- txtai/pipeline/text/questions.py +48 -0
- txtai/pipeline/text/reranker.py +57 -0
- txtai/pipeline/text/similarity.py +83 -0
- txtai/pipeline/text/summary.py +98 -0
- txtai/pipeline/text/translation.py +298 -0
- txtai/pipeline/train/__init__.py +7 -0
- txtai/pipeline/train/hfonnx.py +196 -0
- txtai/pipeline/train/hftrainer.py +398 -0
- txtai/pipeline/train/mlonnx.py +63 -0
- txtai/scoring/__init__.py +12 -0
- txtai/scoring/base.py +188 -0
- txtai/scoring/bm25.py +29 -0
- txtai/scoring/factory.py +95 -0
- txtai/scoring/pgtext.py +181 -0
- txtai/scoring/sif.py +32 -0
- txtai/scoring/sparse.py +218 -0
- txtai/scoring/terms.py +499 -0
- txtai/scoring/tfidf.py +358 -0
- txtai/serialize/__init__.py +10 -0
- txtai/serialize/base.py +85 -0
- txtai/serialize/errors.py +9 -0
- txtai/serialize/factory.py +29 -0
- txtai/serialize/messagepack.py +42 -0
- txtai/serialize/pickle.py +98 -0
- txtai/serialize/serializer.py +46 -0
- txtai/util/__init__.py +7 -0
- txtai/util/resolver.py +32 -0
- txtai/util/sparsearray.py +62 -0
- txtai/util/template.py +16 -0
- txtai/vectors/__init__.py +8 -0
- txtai/vectors/base.py +476 -0
- txtai/vectors/dense/__init__.py +12 -0
- txtai/vectors/dense/external.py +55 -0
- txtai/vectors/dense/factory.py +121 -0
- txtai/vectors/dense/huggingface.py +44 -0
- txtai/vectors/dense/litellm.py +86 -0
- txtai/vectors/dense/llama.py +84 -0
- txtai/vectors/dense/m2v.py +67 -0
- txtai/vectors/dense/sbert.py +92 -0
- txtai/vectors/dense/words.py +211 -0
- txtai/vectors/recovery.py +57 -0
- txtai/vectors/sparse/__init__.py +7 -0
- txtai/vectors/sparse/base.py +90 -0
- txtai/vectors/sparse/factory.py +55 -0
- txtai/vectors/sparse/sbert.py +34 -0
- txtai/version.py +6 -0
- txtai/workflow/__init__.py +8 -0
- txtai/workflow/base.py +184 -0
- txtai/workflow/execute.py +99 -0
- txtai/workflow/factory.py +42 -0
- txtai/workflow/task/__init__.py +18 -0
- txtai/workflow/task/base.py +490 -0
- txtai/workflow/task/console.py +24 -0
- txtai/workflow/task/export.py +64 -0
- txtai/workflow/task/factory.py +89 -0
- txtai/workflow/task/file.py +28 -0
- txtai/workflow/task/image.py +36 -0
- txtai/workflow/task/retrieve.py +61 -0
- txtai/workflow/task/service.py +102 -0
- txtai/workflow/task/storage.py +110 -0
- txtai/workflow/task/stream.py +33 -0
- txtai/workflow/task/template.py +116 -0
- txtai/workflow/task/url.py +20 -0
- txtai/workflow/task/workflow.py +14 -0
@@ -0,0 +1,191 @@
|
|
1
|
+
"""
|
2
|
+
Defines an OpenAI-compatible API endpoint for txtai.
|
3
|
+
|
4
|
+
See the following specification for more information:
|
5
|
+
https://github.com/openai/openai-openapi
|
6
|
+
"""
|
7
|
+
|
8
|
+
import uuid
|
9
|
+
import json
|
10
|
+
import time
|
11
|
+
|
12
|
+
from typing import List, Optional, Union
|
13
|
+
|
14
|
+
from fastapi import APIRouter, Body, Form, UploadFile
|
15
|
+
from fastapi.responses import Response, StreamingResponse
|
16
|
+
|
17
|
+
from .. import application
|
18
|
+
from ..route import EncodingAPIRoute
|
19
|
+
|
20
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
21
|
+
|
22
|
+
|
23
|
+
# pylint: disable=W0622
|
24
|
+
@router.post("/v1/chat/completions")
|
25
|
+
def chat(
|
26
|
+
messages: List[dict] = Body(...),
|
27
|
+
model: str = Body(...),
|
28
|
+
max_completion_tokens: Optional[int] = Body(default=None),
|
29
|
+
stream: Optional[bool] = Body(default=False),
|
30
|
+
):
|
31
|
+
"""
|
32
|
+
Runs a chat completion request.
|
33
|
+
|
34
|
+
Args:
|
35
|
+
messages: list of messages [{"role": role, "content": content}]
|
36
|
+
model: agent name, workflow name, pipeline name or embeddings
|
37
|
+
max_completion_tokens: sets the max length to generate
|
38
|
+
stream: streams response if True
|
39
|
+
|
40
|
+
Returns:
|
41
|
+
chat completion
|
42
|
+
"""
|
43
|
+
|
44
|
+
# Build keyword arguments
|
45
|
+
kwargs = {key: value for key, value in [("stream", stream), ("maxlength", max_completion_tokens)] if value}
|
46
|
+
|
47
|
+
# Get first message
|
48
|
+
message = messages[0]["content"]
|
49
|
+
|
50
|
+
# Agent
|
51
|
+
if model in application.get().agents:
|
52
|
+
result = application.get().agent(model, message, **kwargs)
|
53
|
+
|
54
|
+
# Embeddings search
|
55
|
+
elif model == "embeddings":
|
56
|
+
result = application.get().search(message, 1, **kwargs)[0]["text"]
|
57
|
+
|
58
|
+
# Pipeline
|
59
|
+
elif model in application.get().pipelines and model != "llm":
|
60
|
+
result = application.get().pipeline(model, message, **kwargs)
|
61
|
+
|
62
|
+
# Workflow
|
63
|
+
elif model in application.get().workflows:
|
64
|
+
result = list(application.get().workflow(model, [message], **kwargs))[0]
|
65
|
+
|
66
|
+
# Default to running all messages through default LLM
|
67
|
+
else:
|
68
|
+
result = application.get().pipeline("llm", messages, **kwargs)
|
69
|
+
|
70
|
+
# Write response
|
71
|
+
return StreamingResponse(StreamingChatResponse()(model, result)) if stream else ChatResponse()(model, result)
|
72
|
+
|
73
|
+
|
74
|
+
@router.post("/v1/embeddings")
|
75
|
+
def embeddings(input: Union[str, List[str]] = Body(...), model: str = Body(...)):
|
76
|
+
"""
|
77
|
+
Creates an embeddings vector for the input text.
|
78
|
+
|
79
|
+
Args:
|
80
|
+
input: text|list
|
81
|
+
model: model name
|
82
|
+
|
83
|
+
Returns:
|
84
|
+
list of embeddings vectors
|
85
|
+
"""
|
86
|
+
|
87
|
+
# Convert to embeddings
|
88
|
+
result = application.get().batchtransform([input] if isinstance(input, str) else input)
|
89
|
+
|
90
|
+
# Build and return response
|
91
|
+
data = []
|
92
|
+
for index, embedding in enumerate(result):
|
93
|
+
data.append({"object": "embedding", "embedding": embedding, "index": index})
|
94
|
+
|
95
|
+
return {"object": "list", "data": data, "model": model}
|
96
|
+
|
97
|
+
|
98
|
+
@router.post("/v1/audio/speech")
|
99
|
+
def speech(input: str = Body(...), voice: str = Body(...), response_format: Optional[str] = Body(default="mp3")):
|
100
|
+
"""
|
101
|
+
Generates speech for the input text.
|
102
|
+
|
103
|
+
Args:
|
104
|
+
input: input text
|
105
|
+
voice: speaker name
|
106
|
+
response_format: audio encoding format, defaults to mp3
|
107
|
+
|
108
|
+
Returns:
|
109
|
+
audio data
|
110
|
+
"""
|
111
|
+
|
112
|
+
# Convert to audio
|
113
|
+
audio = application.get().pipeline("texttospeech", input, speaker=voice, encoding=response_format)
|
114
|
+
|
115
|
+
# Write audio
|
116
|
+
return Response(audio)
|
117
|
+
|
118
|
+
|
119
|
+
@router.post("/v1/audio/transcriptions")
|
120
|
+
def transcribe(file: UploadFile, language: Optional[str] = Form(default=None), response_format: Optional[str] = Form(default="json")):
|
121
|
+
"""
|
122
|
+
Transcribes audio to text.
|
123
|
+
|
124
|
+
Args:
|
125
|
+
file: audio input file
|
126
|
+
language: language of input audio
|
127
|
+
response_format: output format (json or text)
|
128
|
+
|
129
|
+
Returns:
|
130
|
+
transcribed text
|
131
|
+
"""
|
132
|
+
|
133
|
+
# Transcribe
|
134
|
+
text = application.get().pipeline("transcription", file.file, language=language, task="transcribe")
|
135
|
+
return text if response_format == "text" else {"text": text}
|
136
|
+
|
137
|
+
|
138
|
+
@router.post("/v1/audio/translations")
|
139
|
+
def translate(
|
140
|
+
file: UploadFile,
|
141
|
+
response_format: Optional[str] = Form(default="json"),
|
142
|
+
):
|
143
|
+
"""
|
144
|
+
Translates audio to English.
|
145
|
+
|
146
|
+
Args:
|
147
|
+
file: audio input file
|
148
|
+
response_format: output format (json or text)
|
149
|
+
|
150
|
+
Returns:
|
151
|
+
translated text
|
152
|
+
"""
|
153
|
+
|
154
|
+
# Transcribe and translate to English
|
155
|
+
text = application.get().pipeline("transcription", file.file, language="English", task="translate")
|
156
|
+
return text if response_format == "text" else {"text": text}
|
157
|
+
|
158
|
+
|
159
|
+
class ChatResponse:
|
160
|
+
"""
|
161
|
+
Returns a chat response object.
|
162
|
+
"""
|
163
|
+
|
164
|
+
def __call__(self, model, result):
|
165
|
+
return {
|
166
|
+
"id": str(uuid.uuid4()),
|
167
|
+
"object": "chat.completion",
|
168
|
+
"created": int(time.time() * 1000),
|
169
|
+
"model": model,
|
170
|
+
"choices": [{"id": 0, "message": {"role": "assistant", "content": result}, "finish_reason": "stop"}],
|
171
|
+
}
|
172
|
+
|
173
|
+
|
174
|
+
class StreamingChatResponse:
|
175
|
+
"""
|
176
|
+
Returns a streaming chat response object.
|
177
|
+
"""
|
178
|
+
|
179
|
+
def __call__(self, model, result):
|
180
|
+
for chunk in result:
|
181
|
+
yield "data: " + json.dumps(
|
182
|
+
{
|
183
|
+
"id": str(uuid.uuid4()),
|
184
|
+
"object": "chat.completion.chunk",
|
185
|
+
"created": int(time.time() * 1000),
|
186
|
+
"model": model,
|
187
|
+
"choices": [{"id": 0, "delta": {"content": chunk}}],
|
188
|
+
}
|
189
|
+
) + "\n\n"
|
190
|
+
|
191
|
+
yield "data: [DONE]\n\n"
|
txtai/api/routers/rag.py
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for rag endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import List, Optional
|
6
|
+
|
7
|
+
from fastapi import APIRouter, Body
|
8
|
+
from fastapi.responses import StreamingResponse
|
9
|
+
|
10
|
+
from .. import application
|
11
|
+
from ..route import EncodingAPIRoute
|
12
|
+
|
13
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
14
|
+
|
15
|
+
|
16
|
+
@router.get("/rag")
|
17
|
+
def rag(query: str, maxlength: Optional[int] = None, stream: Optional[bool] = False):
|
18
|
+
"""
|
19
|
+
Runs a RAG pipeline for the input query.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
query: input RAG query
|
23
|
+
maxlength: optional response max length
|
24
|
+
stream: streams response if True
|
25
|
+
|
26
|
+
Returns:
|
27
|
+
answer
|
28
|
+
"""
|
29
|
+
|
30
|
+
# Build keyword arguments
|
31
|
+
kwargs = {key: value for key, value in [("stream", stream), ("maxlength", maxlength)] if value}
|
32
|
+
|
33
|
+
# Run pipeline
|
34
|
+
result = application.get().pipeline("rag", query, **kwargs)
|
35
|
+
|
36
|
+
# Handle both standard and streaming responses
|
37
|
+
return StreamingResponse(result) if stream else result
|
38
|
+
|
39
|
+
|
40
|
+
@router.post("/batchrag")
|
41
|
+
def batchrag(queries: List[str] = Body(...), maxlength: Optional[int] = Body(default=None), stream: Optional[bool] = Body(default=False)):
|
42
|
+
"""
|
43
|
+
Runs a RAG pipeline for the input queries.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
queries: input RAG queries
|
47
|
+
maxlength: optional response max length
|
48
|
+
stream: streams response if True
|
49
|
+
|
50
|
+
Returns:
|
51
|
+
answers
|
52
|
+
"""
|
53
|
+
|
54
|
+
# Build keyword arguments
|
55
|
+
kwargs = {key: value for key, value in [("stream", stream), ("maxlength", maxlength)] if value}
|
56
|
+
|
57
|
+
# Run pipeline
|
58
|
+
result = application.get().pipeline("rag", queries, **kwargs)
|
59
|
+
|
60
|
+
# Handle both standard and streaming responses
|
61
|
+
return StreamingResponse(result) if stream else result
|
@@ -0,0 +1,46 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for reranking endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import List, Optional
|
6
|
+
|
7
|
+
from fastapi import APIRouter, Body
|
8
|
+
|
9
|
+
from .. import application
|
10
|
+
from ..route import EncodingAPIRoute
|
11
|
+
|
12
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
13
|
+
|
14
|
+
|
15
|
+
@router.get("/rerank")
|
16
|
+
def rerank(query: str, limit: Optional[int] = 3, factor: Optional[int] = 10):
|
17
|
+
"""
|
18
|
+
Queries an embeddings database and reranks the results with a similarity pipeline.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
query: query text
|
22
|
+
limit: maximum results
|
23
|
+
factor: factor to multiply limit by for the initial embeddings search
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
query results
|
27
|
+
"""
|
28
|
+
|
29
|
+
return application.get().pipeline("reranker", (query, limit, factor))
|
30
|
+
|
31
|
+
|
32
|
+
@router.post("/batchrerank")
|
33
|
+
def batchrerank(queries: List[str] = Body(...), limit: Optional[int] = Body(default=3), factor: Optional[int] = Body(default=10)):
|
34
|
+
"""
|
35
|
+
Queries an embeddings database and reranks the results with a similarity pipeline.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
queries: list of queries
|
39
|
+
limit: maximum results
|
40
|
+
factor: factor to multiply limit by for the initial embeddings search
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
query results
|
44
|
+
"""
|
45
|
+
|
46
|
+
return application.get().pipeline("reranker", (queries, limit, factor))
|
@@ -0,0 +1,42 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for segmentation endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
from fastapi import APIRouter, Body
|
8
|
+
|
9
|
+
from .. import application
|
10
|
+
from ..route import EncodingAPIRoute
|
11
|
+
|
12
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
13
|
+
|
14
|
+
|
15
|
+
@router.get("/segment")
|
16
|
+
def segment(text: str):
|
17
|
+
"""
|
18
|
+
Segments text into semantic units.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
text: input text
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
segmented text
|
25
|
+
"""
|
26
|
+
|
27
|
+
return application.get().pipeline("segmentation", (text,))
|
28
|
+
|
29
|
+
|
30
|
+
@router.post("/batchsegment")
|
31
|
+
def batchsegment(texts: List[str] = Body(...)):
|
32
|
+
"""
|
33
|
+
Segments text into semantic units.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
texts: list of texts to segment
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
list of segmented text
|
40
|
+
"""
|
41
|
+
|
42
|
+
return application.get().pipeline("segmentation", (texts,))
|
@@ -0,0 +1,48 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for similarity endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
from fastapi import APIRouter, Body
|
8
|
+
|
9
|
+
from .. import application
|
10
|
+
from ..route import EncodingAPIRoute
|
11
|
+
|
12
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
13
|
+
|
14
|
+
|
15
|
+
@router.post("/similarity")
|
16
|
+
def similarity(query: str = Body(...), texts: List[str] = Body(...)):
|
17
|
+
"""
|
18
|
+
Computes the similarity between query and list of text. Returns a list of
|
19
|
+
{id: value, score: value} sorted by highest score, where id is the index
|
20
|
+
in texts.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
query: query text
|
24
|
+
texts: list of text
|
25
|
+
|
26
|
+
Returns:
|
27
|
+
list of {id: value, score: value}
|
28
|
+
"""
|
29
|
+
|
30
|
+
return application.get().similarity(query, texts)
|
31
|
+
|
32
|
+
|
33
|
+
@router.post("/batchsimilarity")
|
34
|
+
def batchsimilarity(queries: List[str] = Body(...), texts: List[str] = Body(...)):
|
35
|
+
"""
|
36
|
+
Computes the similarity between list of queries and list of text. Returns a list
|
37
|
+
of {id: value, score: value} sorted by highest score per query, where id is the
|
38
|
+
index in texts.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
queries: queries text
|
42
|
+
texts: list of text
|
43
|
+
|
44
|
+
Returns:
|
45
|
+
list of {id: value, score: value} per query
|
46
|
+
"""
|
47
|
+
|
48
|
+
return application.get().batchsimilarity(queries, texts)
|
@@ -0,0 +1,46 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for summary endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import List, Optional
|
6
|
+
|
7
|
+
from fastapi import APIRouter, Body
|
8
|
+
|
9
|
+
from .. import application
|
10
|
+
from ..route import EncodingAPIRoute
|
11
|
+
|
12
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
13
|
+
|
14
|
+
|
15
|
+
@router.get("/summary")
|
16
|
+
def summary(text: str, minlength: Optional[int] = None, maxlength: Optional[int] = None):
|
17
|
+
"""
|
18
|
+
Runs a summarization model against a block of text.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
text: text to summarize
|
22
|
+
minlength: minimum length for summary
|
23
|
+
maxlength: maximum length for summary
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
summary text
|
27
|
+
"""
|
28
|
+
|
29
|
+
return application.get().pipeline("summary", (text, minlength, maxlength))
|
30
|
+
|
31
|
+
|
32
|
+
@router.post("/batchsummary")
|
33
|
+
def batchsummary(texts: List[str] = Body(...), minlength: Optional[int] = Body(default=None), maxlength: Optional[int] = Body(default=None)):
|
34
|
+
"""
|
35
|
+
Runs a summarization model against a block of text.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
texts: list of text to summarize
|
39
|
+
minlength: minimum length for summary
|
40
|
+
maxlength: maximum length for summary
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
list of summary text
|
44
|
+
"""
|
45
|
+
|
46
|
+
return application.get().pipeline("summary", (texts, minlength, maxlength))
|
@@ -0,0 +1,42 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for tabular endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
from fastapi import APIRouter, Body
|
8
|
+
|
9
|
+
from .. import application
|
10
|
+
from ..route import EncodingAPIRoute
|
11
|
+
|
12
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
13
|
+
|
14
|
+
|
15
|
+
@router.get("/tabular")
|
16
|
+
def tabular(file: str):
|
17
|
+
"""
|
18
|
+
Splits tabular data into rows and columns.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
file: file to process
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
list of (id, text, tag) elements
|
25
|
+
"""
|
26
|
+
|
27
|
+
return application.get().pipeline("tabular", (file,))
|
28
|
+
|
29
|
+
|
30
|
+
@router.post("/batchtabular")
|
31
|
+
def batchtabular(files: List[str] = Body(...)):
|
32
|
+
"""
|
33
|
+
Splits tabular data into rows and columns.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
files: list of files to process
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
list of (id, text, tag) elements
|
40
|
+
"""
|
41
|
+
|
42
|
+
return application.get().pipeline("tabular", (files,))
|
@@ -0,0 +1,42 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for textractor endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
from fastapi import APIRouter, Body
|
8
|
+
|
9
|
+
from .. import application
|
10
|
+
from ..route import EncodingAPIRoute
|
11
|
+
|
12
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
13
|
+
|
14
|
+
|
15
|
+
@router.get("/textract")
|
16
|
+
def textract(file: str):
|
17
|
+
"""
|
18
|
+
Extracts text from a file at path.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
file: file to extract text
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
extracted text
|
25
|
+
"""
|
26
|
+
|
27
|
+
return application.get().pipeline("textractor", (file,))
|
28
|
+
|
29
|
+
|
30
|
+
@router.post("/batchtextract")
|
31
|
+
def batchtextract(files: List[str] = Body(...)):
|
32
|
+
"""
|
33
|
+
Extracts text from a file at path.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
files: list of files to extract text
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
list of extracted text
|
40
|
+
"""
|
41
|
+
|
42
|
+
return application.get().pipeline("textractor", (files,))
|
@@ -0,0 +1,33 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for TTS endpoints
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import Optional
|
6
|
+
|
7
|
+
from fastapi import APIRouter, Response
|
8
|
+
|
9
|
+
from .. import application
|
10
|
+
from ..route import EncodingAPIRoute
|
11
|
+
|
12
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
13
|
+
|
14
|
+
|
15
|
+
@router.get("/texttospeech")
|
16
|
+
def texttospeech(text: str, speaker: Optional[str] = None, encoding: Optional[str] = "mp3"):
|
17
|
+
"""
|
18
|
+
Generates speech from text.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
text: text
|
22
|
+
speaker: speaker id, defaults to 1
|
23
|
+
encoding: optional audio encoding format
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
Audio data
|
27
|
+
"""
|
28
|
+
|
29
|
+
# Convert to audio
|
30
|
+
audio = application.get().pipeline("texttospeech", text, speaker=speaker, encoding=encoding)
|
31
|
+
|
32
|
+
# Write audio
|
33
|
+
return Response(audio, headers={"Content-Disposition": f"attachment;filename=speech.{encoding.lower()}"})
|
@@ -0,0 +1,42 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for transcription endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
from fastapi import APIRouter, Body
|
8
|
+
|
9
|
+
from .. import application
|
10
|
+
from ..route import EncodingAPIRoute
|
11
|
+
|
12
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
13
|
+
|
14
|
+
|
15
|
+
@router.get("/transcribe")
|
16
|
+
def transcribe(file: str):
|
17
|
+
"""
|
18
|
+
Transcribes audio files to text.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
file: file to transcribe
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
transcribed text
|
25
|
+
"""
|
26
|
+
|
27
|
+
return application.get().pipeline("transcription", (file,))
|
28
|
+
|
29
|
+
|
30
|
+
@router.post("/batchtranscribe")
|
31
|
+
def batchtranscribe(files: List[str] = Body(...)):
|
32
|
+
"""
|
33
|
+
Transcribes audio files to text.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
files: list of files to transcribe
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
list of transcribed text
|
40
|
+
"""
|
41
|
+
|
42
|
+
return application.get().pipeline("transcription", (files,))
|
@@ -0,0 +1,46 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for translation endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import List, Optional
|
6
|
+
|
7
|
+
from fastapi import APIRouter, Body
|
8
|
+
|
9
|
+
from .. import application
|
10
|
+
from ..route import EncodingAPIRoute
|
11
|
+
|
12
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
13
|
+
|
14
|
+
|
15
|
+
@router.get("/translate")
|
16
|
+
def translate(text: str, target: Optional[str] = "en", source: Optional[str] = None):
|
17
|
+
"""
|
18
|
+
Translates text from source language into target language.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
text: text to translate
|
22
|
+
target: target language code, defaults to "en"
|
23
|
+
source: source language code, detects language if not provided
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
translated text
|
27
|
+
"""
|
28
|
+
|
29
|
+
return application.get().pipeline("translation", (text, target, source))
|
30
|
+
|
31
|
+
|
32
|
+
@router.post("/batchtranslate")
|
33
|
+
def batchtranslate(texts: List[str] = Body(...), target: Optional[str] = Body(default="en"), source: Optional[str] = Body(default=None)):
|
34
|
+
"""
|
35
|
+
Translates text from source language into target language.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
texts: list of text to translate
|
39
|
+
target: target language code, defaults to "en"
|
40
|
+
source: source language code, detects language if not provided
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
list of translated text
|
44
|
+
"""
|
45
|
+
|
46
|
+
return application.get().pipeline("translation", (texts, target, source))
|
@@ -0,0 +1,36 @@
|
|
1
|
+
"""
|
2
|
+
Defines API paths for upload endpoints.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import shutil
|
6
|
+
import tempfile
|
7
|
+
|
8
|
+
from typing import List
|
9
|
+
|
10
|
+
from fastapi import APIRouter, File, Form, UploadFile
|
11
|
+
|
12
|
+
from ..route import EncodingAPIRoute
|
13
|
+
|
14
|
+
|
15
|
+
router = APIRouter(route_class=EncodingAPIRoute)
|
16
|
+
|
17
|
+
|
18
|
+
@router.post("/upload")
|
19
|
+
def upload(files: List[UploadFile] = File(), suffix: str = Form(default=None)):
|
20
|
+
"""
|
21
|
+
Uploads files for local server processing.
|
22
|
+
|
23
|
+
Args:
|
24
|
+
data: list of files to upload
|
25
|
+
|
26
|
+
Returns:
|
27
|
+
list of server paths
|
28
|
+
"""
|
29
|
+
|
30
|
+
paths = []
|
31
|
+
for f in files:
|
32
|
+
with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=suffix) as tmp:
|
33
|
+
shutil.copyfileobj(f.file, tmp)
|
34
|
+
paths.append(tmp.name)
|
35
|
+
|
36
|
+
return paths
|