mseep-txtai 9.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. mseep_txtai-9.1.1.dist-info/METADATA +262 -0
  2. mseep_txtai-9.1.1.dist-info/RECORD +251 -0
  3. mseep_txtai-9.1.1.dist-info/WHEEL +5 -0
  4. mseep_txtai-9.1.1.dist-info/licenses/LICENSE +190 -0
  5. mseep_txtai-9.1.1.dist-info/top_level.txt +1 -0
  6. txtai/__init__.py +16 -0
  7. txtai/agent/__init__.py +12 -0
  8. txtai/agent/base.py +54 -0
  9. txtai/agent/factory.py +39 -0
  10. txtai/agent/model.py +107 -0
  11. txtai/agent/placeholder.py +16 -0
  12. txtai/agent/tool/__init__.py +7 -0
  13. txtai/agent/tool/embeddings.py +69 -0
  14. txtai/agent/tool/factory.py +130 -0
  15. txtai/agent/tool/function.py +49 -0
  16. txtai/ann/__init__.py +7 -0
  17. txtai/ann/base.py +153 -0
  18. txtai/ann/dense/__init__.py +11 -0
  19. txtai/ann/dense/annoy.py +72 -0
  20. txtai/ann/dense/factory.py +76 -0
  21. txtai/ann/dense/faiss.py +233 -0
  22. txtai/ann/dense/hnsw.py +104 -0
  23. txtai/ann/dense/numpy.py +164 -0
  24. txtai/ann/dense/pgvector.py +323 -0
  25. txtai/ann/dense/sqlite.py +303 -0
  26. txtai/ann/dense/torch.py +38 -0
  27. txtai/ann/sparse/__init__.py +7 -0
  28. txtai/ann/sparse/factory.py +61 -0
  29. txtai/ann/sparse/ivfsparse.py +377 -0
  30. txtai/ann/sparse/pgsparse.py +56 -0
  31. txtai/api/__init__.py +18 -0
  32. txtai/api/application.py +134 -0
  33. txtai/api/authorization.py +53 -0
  34. txtai/api/base.py +159 -0
  35. txtai/api/cluster.py +295 -0
  36. txtai/api/extension.py +19 -0
  37. txtai/api/factory.py +40 -0
  38. txtai/api/responses/__init__.py +7 -0
  39. txtai/api/responses/factory.py +30 -0
  40. txtai/api/responses/json.py +56 -0
  41. txtai/api/responses/messagepack.py +51 -0
  42. txtai/api/route.py +41 -0
  43. txtai/api/routers/__init__.py +25 -0
  44. txtai/api/routers/agent.py +38 -0
  45. txtai/api/routers/caption.py +42 -0
  46. txtai/api/routers/embeddings.py +280 -0
  47. txtai/api/routers/entity.py +42 -0
  48. txtai/api/routers/extractor.py +28 -0
  49. txtai/api/routers/labels.py +47 -0
  50. txtai/api/routers/llm.py +61 -0
  51. txtai/api/routers/objects.py +42 -0
  52. txtai/api/routers/openai.py +191 -0
  53. txtai/api/routers/rag.py +61 -0
  54. txtai/api/routers/reranker.py +46 -0
  55. txtai/api/routers/segmentation.py +42 -0
  56. txtai/api/routers/similarity.py +48 -0
  57. txtai/api/routers/summary.py +46 -0
  58. txtai/api/routers/tabular.py +42 -0
  59. txtai/api/routers/textractor.py +42 -0
  60. txtai/api/routers/texttospeech.py +33 -0
  61. txtai/api/routers/transcription.py +42 -0
  62. txtai/api/routers/translation.py +46 -0
  63. txtai/api/routers/upload.py +36 -0
  64. txtai/api/routers/workflow.py +28 -0
  65. txtai/app/__init__.py +5 -0
  66. txtai/app/base.py +821 -0
  67. txtai/archive/__init__.py +9 -0
  68. txtai/archive/base.py +104 -0
  69. txtai/archive/compress.py +51 -0
  70. txtai/archive/factory.py +25 -0
  71. txtai/archive/tar.py +49 -0
  72. txtai/archive/zip.py +35 -0
  73. txtai/cloud/__init__.py +8 -0
  74. txtai/cloud/base.py +106 -0
  75. txtai/cloud/factory.py +70 -0
  76. txtai/cloud/hub.py +101 -0
  77. txtai/cloud/storage.py +125 -0
  78. txtai/console/__init__.py +5 -0
  79. txtai/console/__main__.py +22 -0
  80. txtai/console/base.py +264 -0
  81. txtai/data/__init__.py +10 -0
  82. txtai/data/base.py +138 -0
  83. txtai/data/labels.py +42 -0
  84. txtai/data/questions.py +135 -0
  85. txtai/data/sequences.py +48 -0
  86. txtai/data/texts.py +68 -0
  87. txtai/data/tokens.py +28 -0
  88. txtai/database/__init__.py +14 -0
  89. txtai/database/base.py +342 -0
  90. txtai/database/client.py +227 -0
  91. txtai/database/duckdb.py +150 -0
  92. txtai/database/embedded.py +76 -0
  93. txtai/database/encoder/__init__.py +8 -0
  94. txtai/database/encoder/base.py +37 -0
  95. txtai/database/encoder/factory.py +56 -0
  96. txtai/database/encoder/image.py +43 -0
  97. txtai/database/encoder/serialize.py +28 -0
  98. txtai/database/factory.py +77 -0
  99. txtai/database/rdbms.py +569 -0
  100. txtai/database/schema/__init__.py +6 -0
  101. txtai/database/schema/orm.py +99 -0
  102. txtai/database/schema/statement.py +98 -0
  103. txtai/database/sql/__init__.py +8 -0
  104. txtai/database/sql/aggregate.py +178 -0
  105. txtai/database/sql/base.py +189 -0
  106. txtai/database/sql/expression.py +404 -0
  107. txtai/database/sql/token.py +342 -0
  108. txtai/database/sqlite.py +57 -0
  109. txtai/embeddings/__init__.py +7 -0
  110. txtai/embeddings/base.py +1107 -0
  111. txtai/embeddings/index/__init__.py +14 -0
  112. txtai/embeddings/index/action.py +15 -0
  113. txtai/embeddings/index/autoid.py +92 -0
  114. txtai/embeddings/index/configuration.py +71 -0
  115. txtai/embeddings/index/documents.py +86 -0
  116. txtai/embeddings/index/functions.py +155 -0
  117. txtai/embeddings/index/indexes.py +199 -0
  118. txtai/embeddings/index/indexids.py +60 -0
  119. txtai/embeddings/index/reducer.py +104 -0
  120. txtai/embeddings/index/stream.py +67 -0
  121. txtai/embeddings/index/transform.py +205 -0
  122. txtai/embeddings/search/__init__.py +11 -0
  123. txtai/embeddings/search/base.py +344 -0
  124. txtai/embeddings/search/errors.py +9 -0
  125. txtai/embeddings/search/explain.py +120 -0
  126. txtai/embeddings/search/ids.py +61 -0
  127. txtai/embeddings/search/query.py +69 -0
  128. txtai/embeddings/search/scan.py +196 -0
  129. txtai/embeddings/search/terms.py +46 -0
  130. txtai/graph/__init__.py +10 -0
  131. txtai/graph/base.py +769 -0
  132. txtai/graph/factory.py +61 -0
  133. txtai/graph/networkx.py +275 -0
  134. txtai/graph/query.py +181 -0
  135. txtai/graph/rdbms.py +113 -0
  136. txtai/graph/topics.py +166 -0
  137. txtai/models/__init__.py +9 -0
  138. txtai/models/models.py +268 -0
  139. txtai/models/onnx.py +133 -0
  140. txtai/models/pooling/__init__.py +9 -0
  141. txtai/models/pooling/base.py +141 -0
  142. txtai/models/pooling/cls.py +28 -0
  143. txtai/models/pooling/factory.py +144 -0
  144. txtai/models/pooling/late.py +173 -0
  145. txtai/models/pooling/mean.py +33 -0
  146. txtai/models/pooling/muvera.py +164 -0
  147. txtai/models/registry.py +37 -0
  148. txtai/models/tokendetection.py +122 -0
  149. txtai/pipeline/__init__.py +17 -0
  150. txtai/pipeline/audio/__init__.py +11 -0
  151. txtai/pipeline/audio/audiomixer.py +58 -0
  152. txtai/pipeline/audio/audiostream.py +94 -0
  153. txtai/pipeline/audio/microphone.py +244 -0
  154. txtai/pipeline/audio/signal.py +186 -0
  155. txtai/pipeline/audio/texttoaudio.py +60 -0
  156. txtai/pipeline/audio/texttospeech.py +553 -0
  157. txtai/pipeline/audio/transcription.py +212 -0
  158. txtai/pipeline/base.py +23 -0
  159. txtai/pipeline/data/__init__.py +10 -0
  160. txtai/pipeline/data/filetohtml.py +206 -0
  161. txtai/pipeline/data/htmltomd.py +414 -0
  162. txtai/pipeline/data/segmentation.py +178 -0
  163. txtai/pipeline/data/tabular.py +155 -0
  164. txtai/pipeline/data/textractor.py +139 -0
  165. txtai/pipeline/data/tokenizer.py +112 -0
  166. txtai/pipeline/factory.py +77 -0
  167. txtai/pipeline/hfmodel.py +111 -0
  168. txtai/pipeline/hfpipeline.py +96 -0
  169. txtai/pipeline/image/__init__.py +7 -0
  170. txtai/pipeline/image/caption.py +55 -0
  171. txtai/pipeline/image/imagehash.py +90 -0
  172. txtai/pipeline/image/objects.py +80 -0
  173. txtai/pipeline/llm/__init__.py +11 -0
  174. txtai/pipeline/llm/factory.py +86 -0
  175. txtai/pipeline/llm/generation.py +173 -0
  176. txtai/pipeline/llm/huggingface.py +218 -0
  177. txtai/pipeline/llm/litellm.py +90 -0
  178. txtai/pipeline/llm/llama.py +152 -0
  179. txtai/pipeline/llm/llm.py +75 -0
  180. txtai/pipeline/llm/rag.py +477 -0
  181. txtai/pipeline/nop.py +14 -0
  182. txtai/pipeline/tensors.py +52 -0
  183. txtai/pipeline/text/__init__.py +13 -0
  184. txtai/pipeline/text/crossencoder.py +70 -0
  185. txtai/pipeline/text/entity.py +140 -0
  186. txtai/pipeline/text/labels.py +137 -0
  187. txtai/pipeline/text/lateencoder.py +103 -0
  188. txtai/pipeline/text/questions.py +48 -0
  189. txtai/pipeline/text/reranker.py +57 -0
  190. txtai/pipeline/text/similarity.py +83 -0
  191. txtai/pipeline/text/summary.py +98 -0
  192. txtai/pipeline/text/translation.py +298 -0
  193. txtai/pipeline/train/__init__.py +7 -0
  194. txtai/pipeline/train/hfonnx.py +196 -0
  195. txtai/pipeline/train/hftrainer.py +398 -0
  196. txtai/pipeline/train/mlonnx.py +63 -0
  197. txtai/scoring/__init__.py +12 -0
  198. txtai/scoring/base.py +188 -0
  199. txtai/scoring/bm25.py +29 -0
  200. txtai/scoring/factory.py +95 -0
  201. txtai/scoring/pgtext.py +181 -0
  202. txtai/scoring/sif.py +32 -0
  203. txtai/scoring/sparse.py +218 -0
  204. txtai/scoring/terms.py +499 -0
  205. txtai/scoring/tfidf.py +358 -0
  206. txtai/serialize/__init__.py +10 -0
  207. txtai/serialize/base.py +85 -0
  208. txtai/serialize/errors.py +9 -0
  209. txtai/serialize/factory.py +29 -0
  210. txtai/serialize/messagepack.py +42 -0
  211. txtai/serialize/pickle.py +98 -0
  212. txtai/serialize/serializer.py +46 -0
  213. txtai/util/__init__.py +7 -0
  214. txtai/util/resolver.py +32 -0
  215. txtai/util/sparsearray.py +62 -0
  216. txtai/util/template.py +16 -0
  217. txtai/vectors/__init__.py +8 -0
  218. txtai/vectors/base.py +476 -0
  219. txtai/vectors/dense/__init__.py +12 -0
  220. txtai/vectors/dense/external.py +55 -0
  221. txtai/vectors/dense/factory.py +121 -0
  222. txtai/vectors/dense/huggingface.py +44 -0
  223. txtai/vectors/dense/litellm.py +86 -0
  224. txtai/vectors/dense/llama.py +84 -0
  225. txtai/vectors/dense/m2v.py +67 -0
  226. txtai/vectors/dense/sbert.py +92 -0
  227. txtai/vectors/dense/words.py +211 -0
  228. txtai/vectors/recovery.py +57 -0
  229. txtai/vectors/sparse/__init__.py +7 -0
  230. txtai/vectors/sparse/base.py +90 -0
  231. txtai/vectors/sparse/factory.py +55 -0
  232. txtai/vectors/sparse/sbert.py +34 -0
  233. txtai/version.py +6 -0
  234. txtai/workflow/__init__.py +8 -0
  235. txtai/workflow/base.py +184 -0
  236. txtai/workflow/execute.py +99 -0
  237. txtai/workflow/factory.py +42 -0
  238. txtai/workflow/task/__init__.py +18 -0
  239. txtai/workflow/task/base.py +490 -0
  240. txtai/workflow/task/console.py +24 -0
  241. txtai/workflow/task/export.py +64 -0
  242. txtai/workflow/task/factory.py +89 -0
  243. txtai/workflow/task/file.py +28 -0
  244. txtai/workflow/task/image.py +36 -0
  245. txtai/workflow/task/retrieve.py +61 -0
  246. txtai/workflow/task/service.py +102 -0
  247. txtai/workflow/task/storage.py +110 -0
  248. txtai/workflow/task/stream.py +33 -0
  249. txtai/workflow/task/template.py +116 -0
  250. txtai/workflow/task/url.py +20 -0
  251. txtai/workflow/task/workflow.py +14 -0
@@ -0,0 +1,196 @@
1
+ """
2
+ Scan module
3
+ """
4
+
5
+
6
+ class Scan:
7
+ """
8
+ Scans indexes for query matches.
9
+ """
10
+
11
+ def __init__(self, search, limit, weights, index):
12
+ """
13
+ Creates a new scan instance.
14
+
15
+ Args:
16
+ search: index search function
17
+ limit: maximum results
18
+ weights: default hybrid score weights
19
+ index: default index name
20
+ """
21
+
22
+ # Index search function
23
+ self.search = search
24
+
25
+ # Default query limit
26
+ self.limit = limit
27
+
28
+ # Default number of candidates
29
+ self.candidates = None
30
+
31
+ # Default query weights
32
+ self.weights = weights
33
+
34
+ # Default index
35
+ self.index = index
36
+
37
+ def __call__(self, queries, parameters):
38
+ """
39
+ Executes a scan for a list of queries.
40
+
41
+ Args:
42
+ queries: list of queries to run
43
+ parameters: list of dicts of named parameters to bind to placeholders
44
+
45
+ Returns:
46
+ list of (id, score) per query
47
+ """
48
+
49
+ # Query results group by unique query clause id
50
+ results = {}
51
+
52
+ # Default number of candidates
53
+ default = None
54
+
55
+ # Group by index and run
56
+ for index, iqueries in self.parse(queries, parameters).items():
57
+ # Query limit to pass to batch search
58
+ candidates = [query.candidates for query in iqueries if query.candidates]
59
+ if not candidates and not default:
60
+ default = self.default(queries)
61
+
62
+ candidates = max(candidates) if candidates else default
63
+
64
+ # Query weights to pass to batch search
65
+ weights = [query.weights for query in iqueries if query.weights is not None]
66
+ weights = max(weights) if weights else self.weights
67
+
68
+ # Index to run query against
69
+ index = index if index else self.index
70
+
71
+ # Run index searches
72
+ for x, result in enumerate(self.search([query.text for query in iqueries], candidates, weights, index)):
73
+ # Save query id and results to later join to original query
74
+ results[iqueries[x].uid] = (iqueries[x].qid, result)
75
+
76
+ # Sort by query uid and return results
77
+ return [result for _, result in sorted(results.items())]
78
+
79
+ def parse(self, queries, parameters):
80
+ """
81
+ Parse index query clauses from a list of parsed queries.
82
+
83
+ Args:
84
+ queries: list of parsed queries
85
+ parameters: list of dicts of named parameters to bind to placeholders
86
+
87
+ Returns:
88
+ index query clauses grouped by index
89
+ """
90
+
91
+ results, uid = {}, 0
92
+ for x, query in enumerate(queries):
93
+ if "similar" in query:
94
+ # Extract similar query clauses
95
+ for params in query["similar"]:
96
+ # Resolve bind parameters
97
+ if parameters and parameters[x]:
98
+ params = self.bind(params, parameters[x])
99
+
100
+ # Parse query clause
101
+ clause = Clause(uid, x, params)
102
+
103
+ # Create clause list for index
104
+ if clause.index not in results:
105
+ results[clause.index] = []
106
+
107
+ # Add query to index list, increment uid
108
+ results[clause.index].append(clause)
109
+ uid += 1
110
+
111
+ return results
112
+
113
+ def bind(self, similar, parameters):
114
+ """
115
+ Resolves bind parameters for a similar function call.
116
+
117
+ Args:
118
+ similar: similar function call arguments
119
+ parameters: bind parameters
120
+
121
+ Returns:
122
+ similar function call arguments with resolved bind parameters
123
+ """
124
+
125
+ resolved = []
126
+ for p in similar:
127
+ # Resolve bind parameters
128
+ if isinstance(p, str) and p.startswith(":") and p[1:] in parameters:
129
+ resolved.append(parameters[p[1:]])
130
+ else:
131
+ resolved.append(p)
132
+
133
+ return resolved
134
+
135
+ def default(self, queries):
136
+ """
137
+ Derives the default number of candidates. The number of candidates are the number of results to bring back
138
+ from index queries. This is an optional argument to similar() clauses.
139
+
140
+ For a single query filter clause, the default is the query limit. With multiple filtering clauses, the default is
141
+ 10x the query limit. This ensures that limit results are still returned with additional filtering after an index query.
142
+
143
+ Args:
144
+ queries: list of queries
145
+
146
+ Returns:
147
+ default candidate list size
148
+ """
149
+
150
+ multitoken = any(query.get("where") and len(query["where"].split()) > 1 for query in queries)
151
+ return self.limit * 10 if multitoken else self.limit
152
+
153
+
154
+ class Clause:
155
+ """
156
+ Parses and stores query clause parameters.
157
+ """
158
+
159
+ def __init__(self, uid, qid, params):
160
+ """
161
+ Creates a new query clause.
162
+
163
+ Args:
164
+ uid: query clause id
165
+ qid: query id clause is a part of
166
+ params: query parameters to parse
167
+ """
168
+
169
+ self.uid, self.qid = uid, qid
170
+ self.text, self.index = params[0], None
171
+ self.candidates, self.weights = None, None
172
+
173
+ # Parse additional similar clause parameters
174
+ if len(params) > 1:
175
+ self.parse(params[1:])
176
+
177
+ def parse(self, params):
178
+ """
179
+ Parses clause parameters into this instance.
180
+
181
+ Args:
182
+ params: query clause parameters
183
+ """
184
+
185
+ for param in params:
186
+ if (isinstance(param, str) and param.isdigit()) or isinstance(param, int):
187
+ # Number of query candidates
188
+ self.candidates = int(param)
189
+
190
+ elif (isinstance(param, str) and param.replace(".", "").isdigit()) or isinstance(param, float):
191
+ # Hybrid score weights
192
+ self.weights = float(param)
193
+
194
+ else:
195
+ # Target index
196
+ self.index = param
@@ -0,0 +1,46 @@
1
+ """
2
+ Terms module
3
+ """
4
+
5
+
6
+ class Terms:
7
+ """
8
+ Reduces a query statement down to keyword terms. This method extracts the query text from similar clauses if it's a SQL statement.
9
+ Otherwise, the original query is returned.
10
+ """
11
+
12
+ def __init__(self, embeddings):
13
+ """
14
+ Create a new terms action.
15
+
16
+ Args:
17
+ embeddings: embeddings instance
18
+ """
19
+
20
+ self.database = embeddings.database
21
+
22
+ def __call__(self, queries):
23
+ """
24
+ Extracts keyword terms from a list of queries.
25
+
26
+ Args:
27
+ queries: list of queries
28
+
29
+ Returns:
30
+ list of queries reduced down to keyword term strings
31
+ """
32
+
33
+ # Parse queries and extract keyword terms for each query
34
+ if self.database:
35
+ terms = []
36
+ for query in queries:
37
+ # Parse query
38
+ parse = self.database.parse(query)
39
+
40
+ # Join terms from similar clauses
41
+ terms.append(" ".join(" ".join(s) for s in parse["similar"]))
42
+
43
+ return terms
44
+
45
+ # Return original query when database is None
46
+ return queries
@@ -0,0 +1,10 @@
1
+ """
2
+ Graph imports
3
+ """
4
+
5
+ from .base import Graph
6
+ from .factory import GraphFactory
7
+ from .networkx import NetworkX
8
+ from .query import Query
9
+ from .rdbms import RDBMS
10
+ from .topics import Topics