mseep-txtai 9.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. mseep_txtai-9.1.1.dist-info/METADATA +262 -0
  2. mseep_txtai-9.1.1.dist-info/RECORD +251 -0
  3. mseep_txtai-9.1.1.dist-info/WHEEL +5 -0
  4. mseep_txtai-9.1.1.dist-info/licenses/LICENSE +190 -0
  5. mseep_txtai-9.1.1.dist-info/top_level.txt +1 -0
  6. txtai/__init__.py +16 -0
  7. txtai/agent/__init__.py +12 -0
  8. txtai/agent/base.py +54 -0
  9. txtai/agent/factory.py +39 -0
  10. txtai/agent/model.py +107 -0
  11. txtai/agent/placeholder.py +16 -0
  12. txtai/agent/tool/__init__.py +7 -0
  13. txtai/agent/tool/embeddings.py +69 -0
  14. txtai/agent/tool/factory.py +130 -0
  15. txtai/agent/tool/function.py +49 -0
  16. txtai/ann/__init__.py +7 -0
  17. txtai/ann/base.py +153 -0
  18. txtai/ann/dense/__init__.py +11 -0
  19. txtai/ann/dense/annoy.py +72 -0
  20. txtai/ann/dense/factory.py +76 -0
  21. txtai/ann/dense/faiss.py +233 -0
  22. txtai/ann/dense/hnsw.py +104 -0
  23. txtai/ann/dense/numpy.py +164 -0
  24. txtai/ann/dense/pgvector.py +323 -0
  25. txtai/ann/dense/sqlite.py +303 -0
  26. txtai/ann/dense/torch.py +38 -0
  27. txtai/ann/sparse/__init__.py +7 -0
  28. txtai/ann/sparse/factory.py +61 -0
  29. txtai/ann/sparse/ivfsparse.py +377 -0
  30. txtai/ann/sparse/pgsparse.py +56 -0
  31. txtai/api/__init__.py +18 -0
  32. txtai/api/application.py +134 -0
  33. txtai/api/authorization.py +53 -0
  34. txtai/api/base.py +159 -0
  35. txtai/api/cluster.py +295 -0
  36. txtai/api/extension.py +19 -0
  37. txtai/api/factory.py +40 -0
  38. txtai/api/responses/__init__.py +7 -0
  39. txtai/api/responses/factory.py +30 -0
  40. txtai/api/responses/json.py +56 -0
  41. txtai/api/responses/messagepack.py +51 -0
  42. txtai/api/route.py +41 -0
  43. txtai/api/routers/__init__.py +25 -0
  44. txtai/api/routers/agent.py +38 -0
  45. txtai/api/routers/caption.py +42 -0
  46. txtai/api/routers/embeddings.py +280 -0
  47. txtai/api/routers/entity.py +42 -0
  48. txtai/api/routers/extractor.py +28 -0
  49. txtai/api/routers/labels.py +47 -0
  50. txtai/api/routers/llm.py +61 -0
  51. txtai/api/routers/objects.py +42 -0
  52. txtai/api/routers/openai.py +191 -0
  53. txtai/api/routers/rag.py +61 -0
  54. txtai/api/routers/reranker.py +46 -0
  55. txtai/api/routers/segmentation.py +42 -0
  56. txtai/api/routers/similarity.py +48 -0
  57. txtai/api/routers/summary.py +46 -0
  58. txtai/api/routers/tabular.py +42 -0
  59. txtai/api/routers/textractor.py +42 -0
  60. txtai/api/routers/texttospeech.py +33 -0
  61. txtai/api/routers/transcription.py +42 -0
  62. txtai/api/routers/translation.py +46 -0
  63. txtai/api/routers/upload.py +36 -0
  64. txtai/api/routers/workflow.py +28 -0
  65. txtai/app/__init__.py +5 -0
  66. txtai/app/base.py +821 -0
  67. txtai/archive/__init__.py +9 -0
  68. txtai/archive/base.py +104 -0
  69. txtai/archive/compress.py +51 -0
  70. txtai/archive/factory.py +25 -0
  71. txtai/archive/tar.py +49 -0
  72. txtai/archive/zip.py +35 -0
  73. txtai/cloud/__init__.py +8 -0
  74. txtai/cloud/base.py +106 -0
  75. txtai/cloud/factory.py +70 -0
  76. txtai/cloud/hub.py +101 -0
  77. txtai/cloud/storage.py +125 -0
  78. txtai/console/__init__.py +5 -0
  79. txtai/console/__main__.py +22 -0
  80. txtai/console/base.py +264 -0
  81. txtai/data/__init__.py +10 -0
  82. txtai/data/base.py +138 -0
  83. txtai/data/labels.py +42 -0
  84. txtai/data/questions.py +135 -0
  85. txtai/data/sequences.py +48 -0
  86. txtai/data/texts.py +68 -0
  87. txtai/data/tokens.py +28 -0
  88. txtai/database/__init__.py +14 -0
  89. txtai/database/base.py +342 -0
  90. txtai/database/client.py +227 -0
  91. txtai/database/duckdb.py +150 -0
  92. txtai/database/embedded.py +76 -0
  93. txtai/database/encoder/__init__.py +8 -0
  94. txtai/database/encoder/base.py +37 -0
  95. txtai/database/encoder/factory.py +56 -0
  96. txtai/database/encoder/image.py +43 -0
  97. txtai/database/encoder/serialize.py +28 -0
  98. txtai/database/factory.py +77 -0
  99. txtai/database/rdbms.py +569 -0
  100. txtai/database/schema/__init__.py +6 -0
  101. txtai/database/schema/orm.py +99 -0
  102. txtai/database/schema/statement.py +98 -0
  103. txtai/database/sql/__init__.py +8 -0
  104. txtai/database/sql/aggregate.py +178 -0
  105. txtai/database/sql/base.py +189 -0
  106. txtai/database/sql/expression.py +404 -0
  107. txtai/database/sql/token.py +342 -0
  108. txtai/database/sqlite.py +57 -0
  109. txtai/embeddings/__init__.py +7 -0
  110. txtai/embeddings/base.py +1107 -0
  111. txtai/embeddings/index/__init__.py +14 -0
  112. txtai/embeddings/index/action.py +15 -0
  113. txtai/embeddings/index/autoid.py +92 -0
  114. txtai/embeddings/index/configuration.py +71 -0
  115. txtai/embeddings/index/documents.py +86 -0
  116. txtai/embeddings/index/functions.py +155 -0
  117. txtai/embeddings/index/indexes.py +199 -0
  118. txtai/embeddings/index/indexids.py +60 -0
  119. txtai/embeddings/index/reducer.py +104 -0
  120. txtai/embeddings/index/stream.py +67 -0
  121. txtai/embeddings/index/transform.py +205 -0
  122. txtai/embeddings/search/__init__.py +11 -0
  123. txtai/embeddings/search/base.py +344 -0
  124. txtai/embeddings/search/errors.py +9 -0
  125. txtai/embeddings/search/explain.py +120 -0
  126. txtai/embeddings/search/ids.py +61 -0
  127. txtai/embeddings/search/query.py +69 -0
  128. txtai/embeddings/search/scan.py +196 -0
  129. txtai/embeddings/search/terms.py +46 -0
  130. txtai/graph/__init__.py +10 -0
  131. txtai/graph/base.py +769 -0
  132. txtai/graph/factory.py +61 -0
  133. txtai/graph/networkx.py +275 -0
  134. txtai/graph/query.py +181 -0
  135. txtai/graph/rdbms.py +113 -0
  136. txtai/graph/topics.py +166 -0
  137. txtai/models/__init__.py +9 -0
  138. txtai/models/models.py +268 -0
  139. txtai/models/onnx.py +133 -0
  140. txtai/models/pooling/__init__.py +9 -0
  141. txtai/models/pooling/base.py +141 -0
  142. txtai/models/pooling/cls.py +28 -0
  143. txtai/models/pooling/factory.py +144 -0
  144. txtai/models/pooling/late.py +173 -0
  145. txtai/models/pooling/mean.py +33 -0
  146. txtai/models/pooling/muvera.py +164 -0
  147. txtai/models/registry.py +37 -0
  148. txtai/models/tokendetection.py +122 -0
  149. txtai/pipeline/__init__.py +17 -0
  150. txtai/pipeline/audio/__init__.py +11 -0
  151. txtai/pipeline/audio/audiomixer.py +58 -0
  152. txtai/pipeline/audio/audiostream.py +94 -0
  153. txtai/pipeline/audio/microphone.py +244 -0
  154. txtai/pipeline/audio/signal.py +186 -0
  155. txtai/pipeline/audio/texttoaudio.py +60 -0
  156. txtai/pipeline/audio/texttospeech.py +553 -0
  157. txtai/pipeline/audio/transcription.py +212 -0
  158. txtai/pipeline/base.py +23 -0
  159. txtai/pipeline/data/__init__.py +10 -0
  160. txtai/pipeline/data/filetohtml.py +206 -0
  161. txtai/pipeline/data/htmltomd.py +414 -0
  162. txtai/pipeline/data/segmentation.py +178 -0
  163. txtai/pipeline/data/tabular.py +155 -0
  164. txtai/pipeline/data/textractor.py +139 -0
  165. txtai/pipeline/data/tokenizer.py +112 -0
  166. txtai/pipeline/factory.py +77 -0
  167. txtai/pipeline/hfmodel.py +111 -0
  168. txtai/pipeline/hfpipeline.py +96 -0
  169. txtai/pipeline/image/__init__.py +7 -0
  170. txtai/pipeline/image/caption.py +55 -0
  171. txtai/pipeline/image/imagehash.py +90 -0
  172. txtai/pipeline/image/objects.py +80 -0
  173. txtai/pipeline/llm/__init__.py +11 -0
  174. txtai/pipeline/llm/factory.py +86 -0
  175. txtai/pipeline/llm/generation.py +173 -0
  176. txtai/pipeline/llm/huggingface.py +218 -0
  177. txtai/pipeline/llm/litellm.py +90 -0
  178. txtai/pipeline/llm/llama.py +152 -0
  179. txtai/pipeline/llm/llm.py +75 -0
  180. txtai/pipeline/llm/rag.py +477 -0
  181. txtai/pipeline/nop.py +14 -0
  182. txtai/pipeline/tensors.py +52 -0
  183. txtai/pipeline/text/__init__.py +13 -0
  184. txtai/pipeline/text/crossencoder.py +70 -0
  185. txtai/pipeline/text/entity.py +140 -0
  186. txtai/pipeline/text/labels.py +137 -0
  187. txtai/pipeline/text/lateencoder.py +103 -0
  188. txtai/pipeline/text/questions.py +48 -0
  189. txtai/pipeline/text/reranker.py +57 -0
  190. txtai/pipeline/text/similarity.py +83 -0
  191. txtai/pipeline/text/summary.py +98 -0
  192. txtai/pipeline/text/translation.py +298 -0
  193. txtai/pipeline/train/__init__.py +7 -0
  194. txtai/pipeline/train/hfonnx.py +196 -0
  195. txtai/pipeline/train/hftrainer.py +398 -0
  196. txtai/pipeline/train/mlonnx.py +63 -0
  197. txtai/scoring/__init__.py +12 -0
  198. txtai/scoring/base.py +188 -0
  199. txtai/scoring/bm25.py +29 -0
  200. txtai/scoring/factory.py +95 -0
  201. txtai/scoring/pgtext.py +181 -0
  202. txtai/scoring/sif.py +32 -0
  203. txtai/scoring/sparse.py +218 -0
  204. txtai/scoring/terms.py +499 -0
  205. txtai/scoring/tfidf.py +358 -0
  206. txtai/serialize/__init__.py +10 -0
  207. txtai/serialize/base.py +85 -0
  208. txtai/serialize/errors.py +9 -0
  209. txtai/serialize/factory.py +29 -0
  210. txtai/serialize/messagepack.py +42 -0
  211. txtai/serialize/pickle.py +98 -0
  212. txtai/serialize/serializer.py +46 -0
  213. txtai/util/__init__.py +7 -0
  214. txtai/util/resolver.py +32 -0
  215. txtai/util/sparsearray.py +62 -0
  216. txtai/util/template.py +16 -0
  217. txtai/vectors/__init__.py +8 -0
  218. txtai/vectors/base.py +476 -0
  219. txtai/vectors/dense/__init__.py +12 -0
  220. txtai/vectors/dense/external.py +55 -0
  221. txtai/vectors/dense/factory.py +121 -0
  222. txtai/vectors/dense/huggingface.py +44 -0
  223. txtai/vectors/dense/litellm.py +86 -0
  224. txtai/vectors/dense/llama.py +84 -0
  225. txtai/vectors/dense/m2v.py +67 -0
  226. txtai/vectors/dense/sbert.py +92 -0
  227. txtai/vectors/dense/words.py +211 -0
  228. txtai/vectors/recovery.py +57 -0
  229. txtai/vectors/sparse/__init__.py +7 -0
  230. txtai/vectors/sparse/base.py +90 -0
  231. txtai/vectors/sparse/factory.py +55 -0
  232. txtai/vectors/sparse/sbert.py +34 -0
  233. txtai/version.py +6 -0
  234. txtai/workflow/__init__.py +8 -0
  235. txtai/workflow/base.py +184 -0
  236. txtai/workflow/execute.py +99 -0
  237. txtai/workflow/factory.py +42 -0
  238. txtai/workflow/task/__init__.py +18 -0
  239. txtai/workflow/task/base.py +490 -0
  240. txtai/workflow/task/console.py +24 -0
  241. txtai/workflow/task/export.py +64 -0
  242. txtai/workflow/task/factory.py +89 -0
  243. txtai/workflow/task/file.py +28 -0
  244. txtai/workflow/task/image.py +36 -0
  245. txtai/workflow/task/retrieve.py +61 -0
  246. txtai/workflow/task/service.py +102 -0
  247. txtai/workflow/task/storage.py +110 -0
  248. txtai/workflow/task/stream.py +33 -0
  249. txtai/workflow/task/template.py +116 -0
  250. txtai/workflow/task/url.py +20 -0
  251. txtai/workflow/task/workflow.py +14 -0
@@ -0,0 +1,191 @@
1
+ """
2
+ Defines an OpenAI-compatible API endpoint for txtai.
3
+
4
+ See the following specification for more information:
5
+ https://github.com/openai/openai-openapi
6
+ """
7
+
8
+ import uuid
9
+ import json
10
+ import time
11
+
12
+ from typing import List, Optional, Union
13
+
14
+ from fastapi import APIRouter, Body, Form, UploadFile
15
+ from fastapi.responses import Response, StreamingResponse
16
+
17
+ from .. import application
18
+ from ..route import EncodingAPIRoute
19
+
20
+ router = APIRouter(route_class=EncodingAPIRoute)
21
+
22
+
23
+ # pylint: disable=W0622
24
+ @router.post("/v1/chat/completions")
25
+ def chat(
26
+ messages: List[dict] = Body(...),
27
+ model: str = Body(...),
28
+ max_completion_tokens: Optional[int] = Body(default=None),
29
+ stream: Optional[bool] = Body(default=False),
30
+ ):
31
+ """
32
+ Runs a chat completion request.
33
+
34
+ Args:
35
+ messages: list of messages [{"role": role, "content": content}]
36
+ model: agent name, workflow name, pipeline name or embeddings
37
+ max_completion_tokens: sets the max length to generate
38
+ stream: streams response if True
39
+
40
+ Returns:
41
+ chat completion
42
+ """
43
+
44
+ # Build keyword arguments
45
+ kwargs = {key: value for key, value in [("stream", stream), ("maxlength", max_completion_tokens)] if value}
46
+
47
+ # Get first message
48
+ message = messages[0]["content"]
49
+
50
+ # Agent
51
+ if model in application.get().agents:
52
+ result = application.get().agent(model, message, **kwargs)
53
+
54
+ # Embeddings search
55
+ elif model == "embeddings":
56
+ result = application.get().search(message, 1, **kwargs)[0]["text"]
57
+
58
+ # Pipeline
59
+ elif model in application.get().pipelines and model != "llm":
60
+ result = application.get().pipeline(model, message, **kwargs)
61
+
62
+ # Workflow
63
+ elif model in application.get().workflows:
64
+ result = list(application.get().workflow(model, [message], **kwargs))[0]
65
+
66
+ # Default to running all messages through default LLM
67
+ else:
68
+ result = application.get().pipeline("llm", messages, **kwargs)
69
+
70
+ # Write response
71
+ return StreamingResponse(StreamingChatResponse()(model, result)) if stream else ChatResponse()(model, result)
72
+
73
+
74
+ @router.post("/v1/embeddings")
75
+ def embeddings(input: Union[str, List[str]] = Body(...), model: str = Body(...)):
76
+ """
77
+ Creates an embeddings vector for the input text.
78
+
79
+ Args:
80
+ input: text|list
81
+ model: model name
82
+
83
+ Returns:
84
+ list of embeddings vectors
85
+ """
86
+
87
+ # Convert to embeddings
88
+ result = application.get().batchtransform([input] if isinstance(input, str) else input)
89
+
90
+ # Build and return response
91
+ data = []
92
+ for index, embedding in enumerate(result):
93
+ data.append({"object": "embedding", "embedding": embedding, "index": index})
94
+
95
+ return {"object": "list", "data": data, "model": model}
96
+
97
+
98
+ @router.post("/v1/audio/speech")
99
+ def speech(input: str = Body(...), voice: str = Body(...), response_format: Optional[str] = Body(default="mp3")):
100
+ """
101
+ Generates speech for the input text.
102
+
103
+ Args:
104
+ input: input text
105
+ voice: speaker name
106
+ response_format: audio encoding format, defaults to mp3
107
+
108
+ Returns:
109
+ audio data
110
+ """
111
+
112
+ # Convert to audio
113
+ audio = application.get().pipeline("texttospeech", input, speaker=voice, encoding=response_format)
114
+
115
+ # Write audio
116
+ return Response(audio)
117
+
118
+
119
+ @router.post("/v1/audio/transcriptions")
120
+ def transcribe(file: UploadFile, language: Optional[str] = Form(default=None), response_format: Optional[str] = Form(default="json")):
121
+ """
122
+ Transcribes audio to text.
123
+
124
+ Args:
125
+ file: audio input file
126
+ language: language of input audio
127
+ response_format: output format (json or text)
128
+
129
+ Returns:
130
+ transcribed text
131
+ """
132
+
133
+ # Transcribe
134
+ text = application.get().pipeline("transcription", file.file, language=language, task="transcribe")
135
+ return text if response_format == "text" else {"text": text}
136
+
137
+
138
+ @router.post("/v1/audio/translations")
139
+ def translate(
140
+ file: UploadFile,
141
+ response_format: Optional[str] = Form(default="json"),
142
+ ):
143
+ """
144
+ Translates audio to English.
145
+
146
+ Args:
147
+ file: audio input file
148
+ response_format: output format (json or text)
149
+
150
+ Returns:
151
+ translated text
152
+ """
153
+
154
+ # Transcribe and translate to English
155
+ text = application.get().pipeline("transcription", file.file, language="English", task="translate")
156
+ return text if response_format == "text" else {"text": text}
157
+
158
+
159
+ class ChatResponse:
160
+ """
161
+ Returns a chat response object.
162
+ """
163
+
164
+ def __call__(self, model, result):
165
+ return {
166
+ "id": str(uuid.uuid4()),
167
+ "object": "chat.completion",
168
+ "created": int(time.time() * 1000),
169
+ "model": model,
170
+ "choices": [{"id": 0, "message": {"role": "assistant", "content": result}, "finish_reason": "stop"}],
171
+ }
172
+
173
+
174
+ class StreamingChatResponse:
175
+ """
176
+ Returns a streaming chat response object.
177
+ """
178
+
179
+ def __call__(self, model, result):
180
+ for chunk in result:
181
+ yield "data: " + json.dumps(
182
+ {
183
+ "id": str(uuid.uuid4()),
184
+ "object": "chat.completion.chunk",
185
+ "created": int(time.time() * 1000),
186
+ "model": model,
187
+ "choices": [{"id": 0, "delta": {"content": chunk}}],
188
+ }
189
+ ) + "\n\n"
190
+
191
+ yield "data: [DONE]\n\n"
@@ -0,0 +1,61 @@
1
+ """
2
+ Defines API paths for rag endpoints.
3
+ """
4
+
5
+ from typing import List, Optional
6
+
7
+ from fastapi import APIRouter, Body
8
+ from fastapi.responses import StreamingResponse
9
+
10
+ from .. import application
11
+ from ..route import EncodingAPIRoute
12
+
13
+ router = APIRouter(route_class=EncodingAPIRoute)
14
+
15
+
16
+ @router.get("/rag")
17
+ def rag(query: str, maxlength: Optional[int] = None, stream: Optional[bool] = False):
18
+ """
19
+ Runs a RAG pipeline for the input query.
20
+
21
+ Args:
22
+ query: input RAG query
23
+ maxlength: optional response max length
24
+ stream: streams response if True
25
+
26
+ Returns:
27
+ answer
28
+ """
29
+
30
+ # Build keyword arguments
31
+ kwargs = {key: value for key, value in [("stream", stream), ("maxlength", maxlength)] if value}
32
+
33
+ # Run pipeline
34
+ result = application.get().pipeline("rag", query, **kwargs)
35
+
36
+ # Handle both standard and streaming responses
37
+ return StreamingResponse(result) if stream else result
38
+
39
+
40
+ @router.post("/batchrag")
41
+ def batchrag(queries: List[str] = Body(...), maxlength: Optional[int] = Body(default=None), stream: Optional[bool] = Body(default=False)):
42
+ """
43
+ Runs a RAG pipeline for the input queries.
44
+
45
+ Args:
46
+ queries: input RAG queries
47
+ maxlength: optional response max length
48
+ stream: streams response if True
49
+
50
+ Returns:
51
+ answers
52
+ """
53
+
54
+ # Build keyword arguments
55
+ kwargs = {key: value for key, value in [("stream", stream), ("maxlength", maxlength)] if value}
56
+
57
+ # Run pipeline
58
+ result = application.get().pipeline("rag", queries, **kwargs)
59
+
60
+ # Handle both standard and streaming responses
61
+ return StreamingResponse(result) if stream else result
@@ -0,0 +1,46 @@
1
+ """
2
+ Defines API paths for reranking endpoints.
3
+ """
4
+
5
+ from typing import List, Optional
6
+
7
+ from fastapi import APIRouter, Body
8
+
9
+ from .. import application
10
+ from ..route import EncodingAPIRoute
11
+
12
+ router = APIRouter(route_class=EncodingAPIRoute)
13
+
14
+
15
+ @router.get("/rerank")
16
+ def rerank(query: str, limit: Optional[int] = 3, factor: Optional[int] = 10):
17
+ """
18
+ Queries an embeddings database and reranks the results with a similarity pipeline.
19
+
20
+ Args:
21
+ query: query text
22
+ limit: maximum results
23
+ factor: factor to multiply limit by for the initial embeddings search
24
+
25
+ Returns:
26
+ query results
27
+ """
28
+
29
+ return application.get().pipeline("reranker", (query, limit, factor))
30
+
31
+
32
+ @router.post("/batchrerank")
33
+ def batchrerank(queries: List[str] = Body(...), limit: Optional[int] = Body(default=3), factor: Optional[int] = Body(default=10)):
34
+ """
35
+ Queries an embeddings database and reranks the results with a similarity pipeline.
36
+
37
+ Args:
38
+ queries: list of queries
39
+ limit: maximum results
40
+ factor: factor to multiply limit by for the initial embeddings search
41
+
42
+ Returns:
43
+ query results
44
+ """
45
+
46
+ return application.get().pipeline("reranker", (queries, limit, factor))
@@ -0,0 +1,42 @@
1
+ """
2
+ Defines API paths for segmentation endpoints.
3
+ """
4
+
5
+ from typing import List
6
+
7
+ from fastapi import APIRouter, Body
8
+
9
+ from .. import application
10
+ from ..route import EncodingAPIRoute
11
+
12
+ router = APIRouter(route_class=EncodingAPIRoute)
13
+
14
+
15
+ @router.get("/segment")
16
+ def segment(text: str):
17
+ """
18
+ Segments text into semantic units.
19
+
20
+ Args:
21
+ text: input text
22
+
23
+ Returns:
24
+ segmented text
25
+ """
26
+
27
+ return application.get().pipeline("segmentation", (text,))
28
+
29
+
30
+ @router.post("/batchsegment")
31
+ def batchsegment(texts: List[str] = Body(...)):
32
+ """
33
+ Segments text into semantic units.
34
+
35
+ Args:
36
+ texts: list of texts to segment
37
+
38
+ Returns:
39
+ list of segmented text
40
+ """
41
+
42
+ return application.get().pipeline("segmentation", (texts,))
@@ -0,0 +1,48 @@
1
+ """
2
+ Defines API paths for similarity endpoints.
3
+ """
4
+
5
+ from typing import List
6
+
7
+ from fastapi import APIRouter, Body
8
+
9
+ from .. import application
10
+ from ..route import EncodingAPIRoute
11
+
12
+ router = APIRouter(route_class=EncodingAPIRoute)
13
+
14
+
15
+ @router.post("/similarity")
16
+ def similarity(query: str = Body(...), texts: List[str] = Body(...)):
17
+ """
18
+ Computes the similarity between query and list of text. Returns a list of
19
+ {id: value, score: value} sorted by highest score, where id is the index
20
+ in texts.
21
+
22
+ Args:
23
+ query: query text
24
+ texts: list of text
25
+
26
+ Returns:
27
+ list of {id: value, score: value}
28
+ """
29
+
30
+ return application.get().similarity(query, texts)
31
+
32
+
33
+ @router.post("/batchsimilarity")
34
+ def batchsimilarity(queries: List[str] = Body(...), texts: List[str] = Body(...)):
35
+ """
36
+ Computes the similarity between list of queries and list of text. Returns a list
37
+ of {id: value, score: value} sorted by highest score per query, where id is the
38
+ index in texts.
39
+
40
+ Args:
41
+ queries: queries text
42
+ texts: list of text
43
+
44
+ Returns:
45
+ list of {id: value, score: value} per query
46
+ """
47
+
48
+ return application.get().batchsimilarity(queries, texts)
@@ -0,0 +1,46 @@
1
+ """
2
+ Defines API paths for summary endpoints.
3
+ """
4
+
5
+ from typing import List, Optional
6
+
7
+ from fastapi import APIRouter, Body
8
+
9
+ from .. import application
10
+ from ..route import EncodingAPIRoute
11
+
12
+ router = APIRouter(route_class=EncodingAPIRoute)
13
+
14
+
15
+ @router.get("/summary")
16
+ def summary(text: str, minlength: Optional[int] = None, maxlength: Optional[int] = None):
17
+ """
18
+ Runs a summarization model against a block of text.
19
+
20
+ Args:
21
+ text: text to summarize
22
+ minlength: minimum length for summary
23
+ maxlength: maximum length for summary
24
+
25
+ Returns:
26
+ summary text
27
+ """
28
+
29
+ return application.get().pipeline("summary", (text, minlength, maxlength))
30
+
31
+
32
+ @router.post("/batchsummary")
33
+ def batchsummary(texts: List[str] = Body(...), minlength: Optional[int] = Body(default=None), maxlength: Optional[int] = Body(default=None)):
34
+ """
35
+ Runs a summarization model against a block of text.
36
+
37
+ Args:
38
+ texts: list of text to summarize
39
+ minlength: minimum length for summary
40
+ maxlength: maximum length for summary
41
+
42
+ Returns:
43
+ list of summary text
44
+ """
45
+
46
+ return application.get().pipeline("summary", (texts, minlength, maxlength))
@@ -0,0 +1,42 @@
1
+ """
2
+ Defines API paths for tabular endpoints.
3
+ """
4
+
5
+ from typing import List
6
+
7
+ from fastapi import APIRouter, Body
8
+
9
+ from .. import application
10
+ from ..route import EncodingAPIRoute
11
+
12
+ router = APIRouter(route_class=EncodingAPIRoute)
13
+
14
+
15
+ @router.get("/tabular")
16
+ def tabular(file: str):
17
+ """
18
+ Splits tabular data into rows and columns.
19
+
20
+ Args:
21
+ file: file to process
22
+
23
+ Returns:
24
+ list of (id, text, tag) elements
25
+ """
26
+
27
+ return application.get().pipeline("tabular", (file,))
28
+
29
+
30
+ @router.post("/batchtabular")
31
+ def batchtabular(files: List[str] = Body(...)):
32
+ """
33
+ Splits tabular data into rows and columns.
34
+
35
+ Args:
36
+ files: list of files to process
37
+
38
+ Returns:
39
+ list of (id, text, tag) elements
40
+ """
41
+
42
+ return application.get().pipeline("tabular", (files,))
@@ -0,0 +1,42 @@
1
+ """
2
+ Defines API paths for textractor endpoints.
3
+ """
4
+
5
+ from typing import List
6
+
7
+ from fastapi import APIRouter, Body
8
+
9
+ from .. import application
10
+ from ..route import EncodingAPIRoute
11
+
12
+ router = APIRouter(route_class=EncodingAPIRoute)
13
+
14
+
15
+ @router.get("/textract")
16
+ def textract(file: str):
17
+ """
18
+ Extracts text from a file at path.
19
+
20
+ Args:
21
+ file: file to extract text
22
+
23
+ Returns:
24
+ extracted text
25
+ """
26
+
27
+ return application.get().pipeline("textractor", (file,))
28
+
29
+
30
+ @router.post("/batchtextract")
31
+ def batchtextract(files: List[str] = Body(...)):
32
+ """
33
+ Extracts text from a file at path.
34
+
35
+ Args:
36
+ files: list of files to extract text
37
+
38
+ Returns:
39
+ list of extracted text
40
+ """
41
+
42
+ return application.get().pipeline("textractor", (files,))
@@ -0,0 +1,33 @@
1
+ """
2
+ Defines API paths for TTS endpoints
3
+ """
4
+
5
+ from typing import Optional
6
+
7
+ from fastapi import APIRouter, Response
8
+
9
+ from .. import application
10
+ from ..route import EncodingAPIRoute
11
+
12
+ router = APIRouter(route_class=EncodingAPIRoute)
13
+
14
+
15
+ @router.get("/texttospeech")
16
+ def texttospeech(text: str, speaker: Optional[str] = None, encoding: Optional[str] = "mp3"):
17
+ """
18
+ Generates speech from text.
19
+
20
+ Args:
21
+ text: text
22
+ speaker: speaker id, defaults to 1
23
+ encoding: optional audio encoding format
24
+
25
+ Returns:
26
+ Audio data
27
+ """
28
+
29
+ # Convert to audio
30
+ audio = application.get().pipeline("texttospeech", text, speaker=speaker, encoding=encoding)
31
+
32
+ # Write audio
33
+ return Response(audio, headers={"Content-Disposition": f"attachment;filename=speech.{encoding.lower()}"})
@@ -0,0 +1,42 @@
1
+ """
2
+ Defines API paths for transcription endpoints.
3
+ """
4
+
5
+ from typing import List
6
+
7
+ from fastapi import APIRouter, Body
8
+
9
+ from .. import application
10
+ from ..route import EncodingAPIRoute
11
+
12
+ router = APIRouter(route_class=EncodingAPIRoute)
13
+
14
+
15
+ @router.get("/transcribe")
16
+ def transcribe(file: str):
17
+ """
18
+ Transcribes audio files to text.
19
+
20
+ Args:
21
+ file: file to transcribe
22
+
23
+ Returns:
24
+ transcribed text
25
+ """
26
+
27
+ return application.get().pipeline("transcription", (file,))
28
+
29
+
30
+ @router.post("/batchtranscribe")
31
+ def batchtranscribe(files: List[str] = Body(...)):
32
+ """
33
+ Transcribes audio files to text.
34
+
35
+ Args:
36
+ files: list of files to transcribe
37
+
38
+ Returns:
39
+ list of transcribed text
40
+ """
41
+
42
+ return application.get().pipeline("transcription", (files,))
@@ -0,0 +1,46 @@
1
+ """
2
+ Defines API paths for translation endpoints.
3
+ """
4
+
5
+ from typing import List, Optional
6
+
7
+ from fastapi import APIRouter, Body
8
+
9
+ from .. import application
10
+ from ..route import EncodingAPIRoute
11
+
12
+ router = APIRouter(route_class=EncodingAPIRoute)
13
+
14
+
15
+ @router.get("/translate")
16
+ def translate(text: str, target: Optional[str] = "en", source: Optional[str] = None):
17
+ """
18
+ Translates text from source language into target language.
19
+
20
+ Args:
21
+ text: text to translate
22
+ target: target language code, defaults to "en"
23
+ source: source language code, detects language if not provided
24
+
25
+ Returns:
26
+ translated text
27
+ """
28
+
29
+ return application.get().pipeline("translation", (text, target, source))
30
+
31
+
32
+ @router.post("/batchtranslate")
33
+ def batchtranslate(texts: List[str] = Body(...), target: Optional[str] = Body(default="en"), source: Optional[str] = Body(default=None)):
34
+ """
35
+ Translates text from source language into target language.
36
+
37
+ Args:
38
+ texts: list of text to translate
39
+ target: target language code, defaults to "en"
40
+ source: source language code, detects language if not provided
41
+
42
+ Returns:
43
+ list of translated text
44
+ """
45
+
46
+ return application.get().pipeline("translation", (texts, target, source))
@@ -0,0 +1,36 @@
1
+ """
2
+ Defines API paths for upload endpoints.
3
+ """
4
+
5
+ import shutil
6
+ import tempfile
7
+
8
+ from typing import List
9
+
10
+ from fastapi import APIRouter, File, Form, UploadFile
11
+
12
+ from ..route import EncodingAPIRoute
13
+
14
+
15
+ router = APIRouter(route_class=EncodingAPIRoute)
16
+
17
+
18
+ @router.post("/upload")
19
+ def upload(files: List[UploadFile] = File(), suffix: str = Form(default=None)):
20
+ """
21
+ Uploads files for local server processing.
22
+
23
+ Args:
24
+ data: list of files to upload
25
+
26
+ Returns:
27
+ list of server paths
28
+ """
29
+
30
+ paths = []
31
+ for f in files:
32
+ with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=suffix) as tmp:
33
+ shutil.copyfileobj(f.file, tmp)
34
+ paths.append(tmp.name)
35
+
36
+ return paths