mseep-txtai 9.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. mseep_txtai-9.1.1.dist-info/METADATA +262 -0
  2. mseep_txtai-9.1.1.dist-info/RECORD +251 -0
  3. mseep_txtai-9.1.1.dist-info/WHEEL +5 -0
  4. mseep_txtai-9.1.1.dist-info/licenses/LICENSE +190 -0
  5. mseep_txtai-9.1.1.dist-info/top_level.txt +1 -0
  6. txtai/__init__.py +16 -0
  7. txtai/agent/__init__.py +12 -0
  8. txtai/agent/base.py +54 -0
  9. txtai/agent/factory.py +39 -0
  10. txtai/agent/model.py +107 -0
  11. txtai/agent/placeholder.py +16 -0
  12. txtai/agent/tool/__init__.py +7 -0
  13. txtai/agent/tool/embeddings.py +69 -0
  14. txtai/agent/tool/factory.py +130 -0
  15. txtai/agent/tool/function.py +49 -0
  16. txtai/ann/__init__.py +7 -0
  17. txtai/ann/base.py +153 -0
  18. txtai/ann/dense/__init__.py +11 -0
  19. txtai/ann/dense/annoy.py +72 -0
  20. txtai/ann/dense/factory.py +76 -0
  21. txtai/ann/dense/faiss.py +233 -0
  22. txtai/ann/dense/hnsw.py +104 -0
  23. txtai/ann/dense/numpy.py +164 -0
  24. txtai/ann/dense/pgvector.py +323 -0
  25. txtai/ann/dense/sqlite.py +303 -0
  26. txtai/ann/dense/torch.py +38 -0
  27. txtai/ann/sparse/__init__.py +7 -0
  28. txtai/ann/sparse/factory.py +61 -0
  29. txtai/ann/sparse/ivfsparse.py +377 -0
  30. txtai/ann/sparse/pgsparse.py +56 -0
  31. txtai/api/__init__.py +18 -0
  32. txtai/api/application.py +134 -0
  33. txtai/api/authorization.py +53 -0
  34. txtai/api/base.py +159 -0
  35. txtai/api/cluster.py +295 -0
  36. txtai/api/extension.py +19 -0
  37. txtai/api/factory.py +40 -0
  38. txtai/api/responses/__init__.py +7 -0
  39. txtai/api/responses/factory.py +30 -0
  40. txtai/api/responses/json.py +56 -0
  41. txtai/api/responses/messagepack.py +51 -0
  42. txtai/api/route.py +41 -0
  43. txtai/api/routers/__init__.py +25 -0
  44. txtai/api/routers/agent.py +38 -0
  45. txtai/api/routers/caption.py +42 -0
  46. txtai/api/routers/embeddings.py +280 -0
  47. txtai/api/routers/entity.py +42 -0
  48. txtai/api/routers/extractor.py +28 -0
  49. txtai/api/routers/labels.py +47 -0
  50. txtai/api/routers/llm.py +61 -0
  51. txtai/api/routers/objects.py +42 -0
  52. txtai/api/routers/openai.py +191 -0
  53. txtai/api/routers/rag.py +61 -0
  54. txtai/api/routers/reranker.py +46 -0
  55. txtai/api/routers/segmentation.py +42 -0
  56. txtai/api/routers/similarity.py +48 -0
  57. txtai/api/routers/summary.py +46 -0
  58. txtai/api/routers/tabular.py +42 -0
  59. txtai/api/routers/textractor.py +42 -0
  60. txtai/api/routers/texttospeech.py +33 -0
  61. txtai/api/routers/transcription.py +42 -0
  62. txtai/api/routers/translation.py +46 -0
  63. txtai/api/routers/upload.py +36 -0
  64. txtai/api/routers/workflow.py +28 -0
  65. txtai/app/__init__.py +5 -0
  66. txtai/app/base.py +821 -0
  67. txtai/archive/__init__.py +9 -0
  68. txtai/archive/base.py +104 -0
  69. txtai/archive/compress.py +51 -0
  70. txtai/archive/factory.py +25 -0
  71. txtai/archive/tar.py +49 -0
  72. txtai/archive/zip.py +35 -0
  73. txtai/cloud/__init__.py +8 -0
  74. txtai/cloud/base.py +106 -0
  75. txtai/cloud/factory.py +70 -0
  76. txtai/cloud/hub.py +101 -0
  77. txtai/cloud/storage.py +125 -0
  78. txtai/console/__init__.py +5 -0
  79. txtai/console/__main__.py +22 -0
  80. txtai/console/base.py +264 -0
  81. txtai/data/__init__.py +10 -0
  82. txtai/data/base.py +138 -0
  83. txtai/data/labels.py +42 -0
  84. txtai/data/questions.py +135 -0
  85. txtai/data/sequences.py +48 -0
  86. txtai/data/texts.py +68 -0
  87. txtai/data/tokens.py +28 -0
  88. txtai/database/__init__.py +14 -0
  89. txtai/database/base.py +342 -0
  90. txtai/database/client.py +227 -0
  91. txtai/database/duckdb.py +150 -0
  92. txtai/database/embedded.py +76 -0
  93. txtai/database/encoder/__init__.py +8 -0
  94. txtai/database/encoder/base.py +37 -0
  95. txtai/database/encoder/factory.py +56 -0
  96. txtai/database/encoder/image.py +43 -0
  97. txtai/database/encoder/serialize.py +28 -0
  98. txtai/database/factory.py +77 -0
  99. txtai/database/rdbms.py +569 -0
  100. txtai/database/schema/__init__.py +6 -0
  101. txtai/database/schema/orm.py +99 -0
  102. txtai/database/schema/statement.py +98 -0
  103. txtai/database/sql/__init__.py +8 -0
  104. txtai/database/sql/aggregate.py +178 -0
  105. txtai/database/sql/base.py +189 -0
  106. txtai/database/sql/expression.py +404 -0
  107. txtai/database/sql/token.py +342 -0
  108. txtai/database/sqlite.py +57 -0
  109. txtai/embeddings/__init__.py +7 -0
  110. txtai/embeddings/base.py +1107 -0
  111. txtai/embeddings/index/__init__.py +14 -0
  112. txtai/embeddings/index/action.py +15 -0
  113. txtai/embeddings/index/autoid.py +92 -0
  114. txtai/embeddings/index/configuration.py +71 -0
  115. txtai/embeddings/index/documents.py +86 -0
  116. txtai/embeddings/index/functions.py +155 -0
  117. txtai/embeddings/index/indexes.py +199 -0
  118. txtai/embeddings/index/indexids.py +60 -0
  119. txtai/embeddings/index/reducer.py +104 -0
  120. txtai/embeddings/index/stream.py +67 -0
  121. txtai/embeddings/index/transform.py +205 -0
  122. txtai/embeddings/search/__init__.py +11 -0
  123. txtai/embeddings/search/base.py +344 -0
  124. txtai/embeddings/search/errors.py +9 -0
  125. txtai/embeddings/search/explain.py +120 -0
  126. txtai/embeddings/search/ids.py +61 -0
  127. txtai/embeddings/search/query.py +69 -0
  128. txtai/embeddings/search/scan.py +196 -0
  129. txtai/embeddings/search/terms.py +46 -0
  130. txtai/graph/__init__.py +10 -0
  131. txtai/graph/base.py +769 -0
  132. txtai/graph/factory.py +61 -0
  133. txtai/graph/networkx.py +275 -0
  134. txtai/graph/query.py +181 -0
  135. txtai/graph/rdbms.py +113 -0
  136. txtai/graph/topics.py +166 -0
  137. txtai/models/__init__.py +9 -0
  138. txtai/models/models.py +268 -0
  139. txtai/models/onnx.py +133 -0
  140. txtai/models/pooling/__init__.py +9 -0
  141. txtai/models/pooling/base.py +141 -0
  142. txtai/models/pooling/cls.py +28 -0
  143. txtai/models/pooling/factory.py +144 -0
  144. txtai/models/pooling/late.py +173 -0
  145. txtai/models/pooling/mean.py +33 -0
  146. txtai/models/pooling/muvera.py +164 -0
  147. txtai/models/registry.py +37 -0
  148. txtai/models/tokendetection.py +122 -0
  149. txtai/pipeline/__init__.py +17 -0
  150. txtai/pipeline/audio/__init__.py +11 -0
  151. txtai/pipeline/audio/audiomixer.py +58 -0
  152. txtai/pipeline/audio/audiostream.py +94 -0
  153. txtai/pipeline/audio/microphone.py +244 -0
  154. txtai/pipeline/audio/signal.py +186 -0
  155. txtai/pipeline/audio/texttoaudio.py +60 -0
  156. txtai/pipeline/audio/texttospeech.py +553 -0
  157. txtai/pipeline/audio/transcription.py +212 -0
  158. txtai/pipeline/base.py +23 -0
  159. txtai/pipeline/data/__init__.py +10 -0
  160. txtai/pipeline/data/filetohtml.py +206 -0
  161. txtai/pipeline/data/htmltomd.py +414 -0
  162. txtai/pipeline/data/segmentation.py +178 -0
  163. txtai/pipeline/data/tabular.py +155 -0
  164. txtai/pipeline/data/textractor.py +139 -0
  165. txtai/pipeline/data/tokenizer.py +112 -0
  166. txtai/pipeline/factory.py +77 -0
  167. txtai/pipeline/hfmodel.py +111 -0
  168. txtai/pipeline/hfpipeline.py +96 -0
  169. txtai/pipeline/image/__init__.py +7 -0
  170. txtai/pipeline/image/caption.py +55 -0
  171. txtai/pipeline/image/imagehash.py +90 -0
  172. txtai/pipeline/image/objects.py +80 -0
  173. txtai/pipeline/llm/__init__.py +11 -0
  174. txtai/pipeline/llm/factory.py +86 -0
  175. txtai/pipeline/llm/generation.py +173 -0
  176. txtai/pipeline/llm/huggingface.py +218 -0
  177. txtai/pipeline/llm/litellm.py +90 -0
  178. txtai/pipeline/llm/llama.py +152 -0
  179. txtai/pipeline/llm/llm.py +75 -0
  180. txtai/pipeline/llm/rag.py +477 -0
  181. txtai/pipeline/nop.py +14 -0
  182. txtai/pipeline/tensors.py +52 -0
  183. txtai/pipeline/text/__init__.py +13 -0
  184. txtai/pipeline/text/crossencoder.py +70 -0
  185. txtai/pipeline/text/entity.py +140 -0
  186. txtai/pipeline/text/labels.py +137 -0
  187. txtai/pipeline/text/lateencoder.py +103 -0
  188. txtai/pipeline/text/questions.py +48 -0
  189. txtai/pipeline/text/reranker.py +57 -0
  190. txtai/pipeline/text/similarity.py +83 -0
  191. txtai/pipeline/text/summary.py +98 -0
  192. txtai/pipeline/text/translation.py +298 -0
  193. txtai/pipeline/train/__init__.py +7 -0
  194. txtai/pipeline/train/hfonnx.py +196 -0
  195. txtai/pipeline/train/hftrainer.py +398 -0
  196. txtai/pipeline/train/mlonnx.py +63 -0
  197. txtai/scoring/__init__.py +12 -0
  198. txtai/scoring/base.py +188 -0
  199. txtai/scoring/bm25.py +29 -0
  200. txtai/scoring/factory.py +95 -0
  201. txtai/scoring/pgtext.py +181 -0
  202. txtai/scoring/sif.py +32 -0
  203. txtai/scoring/sparse.py +218 -0
  204. txtai/scoring/terms.py +499 -0
  205. txtai/scoring/tfidf.py +358 -0
  206. txtai/serialize/__init__.py +10 -0
  207. txtai/serialize/base.py +85 -0
  208. txtai/serialize/errors.py +9 -0
  209. txtai/serialize/factory.py +29 -0
  210. txtai/serialize/messagepack.py +42 -0
  211. txtai/serialize/pickle.py +98 -0
  212. txtai/serialize/serializer.py +46 -0
  213. txtai/util/__init__.py +7 -0
  214. txtai/util/resolver.py +32 -0
  215. txtai/util/sparsearray.py +62 -0
  216. txtai/util/template.py +16 -0
  217. txtai/vectors/__init__.py +8 -0
  218. txtai/vectors/base.py +476 -0
  219. txtai/vectors/dense/__init__.py +12 -0
  220. txtai/vectors/dense/external.py +55 -0
  221. txtai/vectors/dense/factory.py +121 -0
  222. txtai/vectors/dense/huggingface.py +44 -0
  223. txtai/vectors/dense/litellm.py +86 -0
  224. txtai/vectors/dense/llama.py +84 -0
  225. txtai/vectors/dense/m2v.py +67 -0
  226. txtai/vectors/dense/sbert.py +92 -0
  227. txtai/vectors/dense/words.py +211 -0
  228. txtai/vectors/recovery.py +57 -0
  229. txtai/vectors/sparse/__init__.py +7 -0
  230. txtai/vectors/sparse/base.py +90 -0
  231. txtai/vectors/sparse/factory.py +55 -0
  232. txtai/vectors/sparse/sbert.py +34 -0
  233. txtai/version.py +6 -0
  234. txtai/workflow/__init__.py +8 -0
  235. txtai/workflow/base.py +184 -0
  236. txtai/workflow/execute.py +99 -0
  237. txtai/workflow/factory.py +42 -0
  238. txtai/workflow/task/__init__.py +18 -0
  239. txtai/workflow/task/base.py +490 -0
  240. txtai/workflow/task/console.py +24 -0
  241. txtai/workflow/task/export.py +64 -0
  242. txtai/workflow/task/factory.py +89 -0
  243. txtai/workflow/task/file.py +28 -0
  244. txtai/workflow/task/image.py +36 -0
  245. txtai/workflow/task/retrieve.py +61 -0
  246. txtai/workflow/task/service.py +102 -0
  247. txtai/workflow/task/storage.py +110 -0
  248. txtai/workflow/task/stream.py +33 -0
  249. txtai/workflow/task/template.py +116 -0
  250. txtai/workflow/task/url.py +20 -0
  251. txtai/workflow/task/workflow.py +14 -0
@@ -0,0 +1,150 @@
1
+ """
2
+ DuckDB module
3
+ """
4
+
5
+ import os
6
+ import re
7
+
8
+ from tempfile import TemporaryDirectory
9
+
10
+ # Conditional import
11
+ try:
12
+ import duckdb
13
+
14
+ DUCKDB = True
15
+ except ImportError:
16
+ DUCKDB = False
17
+
18
+ from .embedded import Embedded
19
+ from .schema import Statement
20
+
21
+
22
+ class DuckDB(Embedded):
23
+ """
24
+ Database instance backed by DuckDB.
25
+ """
26
+
27
+ # Delete single document and object
28
+ DELETE_DOCUMENT = "DELETE FROM documents WHERE id = ?"
29
+ DELETE_OBJECT = "DELETE FROM objects WHERE id = ?"
30
+
31
+ def __init__(self, config):
32
+ super().__init__(config)
33
+
34
+ if not DUCKDB:
35
+ raise ImportError('DuckDB is not available - install "database" extra to enable')
36
+
37
+ def execute(self, function, *args):
38
+ # Call parent method with DuckDB compatible arguments
39
+ return super().execute(function, *self.formatargs(args))
40
+
41
+ def insertdocument(self, uid, data, tags, entry):
42
+ # Delete existing document
43
+ self.cursor.execute(DuckDB.DELETE_DOCUMENT, [uid])
44
+
45
+ # Call parent method
46
+ super().insertdocument(uid, data, tags, entry)
47
+
48
+ def insertobject(self, uid, data, tags, entry):
49
+ # Delete existing object
50
+ self.cursor.execute(DuckDB.DELETE_OBJECT, [uid])
51
+
52
+ # Call parent method
53
+ super().insertobject(uid, data, tags, entry)
54
+
55
+ def connect(self, path=":memory:"):
56
+ # Create connection and start a transaction
57
+ # pylint: disable=I1101
58
+ connection = duckdb.connect(path)
59
+ connection.begin()
60
+
61
+ return connection
62
+
63
+ def getcursor(self):
64
+ return self.connection
65
+
66
+ def jsonprefix(self):
67
+ # Return json column prefix
68
+ return "json_extract_string(data"
69
+
70
+ def jsoncolumn(self, name):
71
+ # Generate json column using json_extract function
72
+ return f"json_extract_string(data, '$.{name}')"
73
+
74
+ def rows(self):
75
+ # Iteratively retrieve and yield rows
76
+ batch = 256
77
+ rows = self.cursor.fetchmany(batch)
78
+ while rows:
79
+ yield from rows
80
+ rows = self.cursor.fetchmany(batch)
81
+
82
+ def addfunctions(self):
83
+ # DuckDB doesn't currently support scalar functions
84
+ return
85
+
86
+ def copy(self, path):
87
+ # Delete existing file, if necessary
88
+ if os.path.exists(path):
89
+ os.remove(path)
90
+
91
+ # Create database connection
92
+ # pylint: disable=I1101
93
+ connection = duckdb.connect(path)
94
+
95
+ # List of tables
96
+ tables = ["documents", "objects", "sections"]
97
+
98
+ with TemporaryDirectory() as directory:
99
+ # Export existing tables
100
+ for table in tables:
101
+ self.connection.execute(f"COPY {table} TO '{directory}/{table}.parquet' (FORMAT parquet)")
102
+
103
+ # Create initial schema
104
+ for schema in [Statement.CREATE_DOCUMENTS, Statement.CREATE_OBJECTS, Statement.CREATE_SECTIONS % "sections"]:
105
+ connection.execute(schema)
106
+
107
+ # Import tables into new schema
108
+ for table in tables:
109
+ connection.execute(f"COPY {table} FROM '{directory}/{table}.parquet' (FORMAT parquet)")
110
+
111
+ # Create indexes and sync data to database file
112
+ connection.execute(Statement.CREATE_SECTIONS_INDEX)
113
+ connection.execute("CHECKPOINT")
114
+
115
+ # Start transaction
116
+ connection.begin()
117
+
118
+ return connection
119
+
120
+ def formatargs(self, args):
121
+ """
122
+ DuckDB doesn't support named parameters. This method replaces named parameters with question marks
123
+ and makes parameters a list.
124
+
125
+ Args:
126
+ args: input arguments
127
+
128
+ Returns:
129
+ DuckDB compatible args
130
+ """
131
+
132
+ if args and len(args) > 1:
133
+ # Unpack query args
134
+ query, parameters = args
135
+
136
+ # Iterate over parameters
137
+ # - Replace named parameters with ?'s
138
+ # - Build list of value with position indexes
139
+ params = []
140
+ for key, value in parameters.items():
141
+ pattern = rf"\:{key}(?=\s|$)"
142
+ match = re.search(pattern, query)
143
+ if match:
144
+ query = re.sub(pattern, "?", query, count=1)
145
+ params.append((match.start(), value))
146
+
147
+ # Repack query and parameter list
148
+ args = (query, [value for _, value in sorted(params, key=lambda x: x[0])])
149
+
150
+ return args
@@ -0,0 +1,76 @@
1
+ """
2
+ Embedded module
3
+ """
4
+
5
+ from .rdbms import RDBMS
6
+
7
+
8
+ class Embedded(RDBMS):
9
+ """
10
+ Base class for embedded relational databases. An embedded relational database stores all content in a local file.
11
+ """
12
+
13
+ def __init__(self, config):
14
+ """
15
+ Creates a new Database.
16
+
17
+ Args:
18
+ config: database configuration parameters
19
+ """
20
+
21
+ super().__init__(config)
22
+
23
+ # Path to database file
24
+ self.path = None
25
+
26
+ def load(self, path):
27
+ # Call parent logic
28
+ super().load(path)
29
+
30
+ # Store path reference
31
+ self.path = path
32
+
33
+ def save(self, path):
34
+ # Temporary database
35
+ if not self.path:
36
+ # Save temporary database
37
+ self.connection.commit()
38
+
39
+ # Copy data from current to new
40
+ connection = self.copy(path)
41
+
42
+ # Close temporary database
43
+ self.connection.close()
44
+
45
+ # Point connection to new connection
46
+ self.session(connection=connection)
47
+ self.path = path
48
+
49
+ # Paths are equal, commit changes
50
+ elif self.path == path:
51
+ self.connection.commit()
52
+
53
+ # New path is different from current path, copy data and continue using current connection
54
+ else:
55
+ self.copy(path).close()
56
+
57
+ def jsonprefix(self):
58
+ # Return json column prefix
59
+ return "json_extract(data"
60
+
61
+ def jsoncolumn(self, name):
62
+ # Generate json column using json_extract function
63
+ return f"json_extract(data, '$.{name}')"
64
+
65
+ def copy(self, path):
66
+ """
67
+ Copies the current database into path.
68
+
69
+ Args:
70
+ path: path to write database
71
+
72
+ Returns:
73
+ new connection with data copied over
74
+ """
75
+
76
+ raise NotImplementedError
@@ -0,0 +1,8 @@
1
+ """
2
+ Encoder imports
3
+ """
4
+
5
+ from .base import Encoder
6
+ from .factory import EncoderFactory
7
+ from .image import ImageEncoder
8
+ from .serialize import SerializeEncoder
@@ -0,0 +1,37 @@
1
+ """
2
+ Encoder module
3
+ """
4
+
5
+ from io import BytesIO
6
+
7
+
8
+ class Encoder:
9
+ """
10
+ Encodes and decodes object content. The base encoder works only with byte arrays. It can be extended to encode different datatypes.
11
+ """
12
+
13
+ def encode(self, obj):
14
+ """
15
+ Encodes an object to a byte array using the encoder.
16
+
17
+ Args:
18
+ obj: object to encode
19
+
20
+ Returns:
21
+ encoded object as a byte array
22
+ """
23
+
24
+ return obj
25
+
26
+ def decode(self, data):
27
+ """
28
+ Decodes input byte array into an object using this encoder.
29
+
30
+ Args:
31
+ data: encoded data
32
+
33
+ Returns:
34
+ decoded object
35
+ """
36
+
37
+ return BytesIO(data) if data else None
@@ -0,0 +1,56 @@
1
+ """
2
+ Encoder factory module
3
+ """
4
+
5
+ from ...util import Resolver
6
+
7
+ from .base import Encoder
8
+ from .serialize import SerializeEncoder
9
+
10
+
11
+ class EncoderFactory:
12
+ """
13
+ Encoder factory. Creates new Encoder instances.
14
+ """
15
+
16
+ @staticmethod
17
+ def get(encoder):
18
+ """
19
+ Gets a new instance of encoder class.
20
+
21
+ Args:
22
+ encoder: Encoder instance class
23
+
24
+ Returns:
25
+ Encoder class
26
+ """
27
+
28
+ # Local task if no package
29
+ if "." not in encoder:
30
+ # Get parent package
31
+ encoder = ".".join(__name__.split(".")[:-1]) + "." + encoder.capitalize() + "Encoder"
32
+
33
+ return Resolver()(encoder)
34
+
35
+ @staticmethod
36
+ def create(encoder):
37
+ """
38
+ Creates a new Encoder instance.
39
+
40
+ Args:
41
+ encoder: Encoder instance class
42
+
43
+ Returns:
44
+ Encoder
45
+ """
46
+
47
+ # Return default encoder
48
+ if encoder is True:
49
+ return Encoder()
50
+
51
+ # Supported serialization methods
52
+ if encoder in ["messagepack", "pickle"]:
53
+ return SerializeEncoder(encoder)
54
+
55
+ # Get Encoder instance
56
+ return EncoderFactory.get(encoder)()
@@ -0,0 +1,43 @@
1
+ """
2
+ ImageEncoder module
3
+ """
4
+
5
+ from io import BytesIO
6
+
7
+ # Conditional import
8
+ try:
9
+ from PIL import Image
10
+
11
+ PIL = True
12
+ except ImportError:
13
+ PIL = False
14
+
15
+ from .base import Encoder
16
+
17
+
18
+ class ImageEncoder(Encoder):
19
+ """
20
+ Encodes and decodes Image objects as compressed binary content, using the original image's algorithm.
21
+ """
22
+
23
+ def __init__(self):
24
+ """
25
+ Creates a new ImageEncoder.
26
+ """
27
+
28
+ if not PIL:
29
+ raise ImportError('ImageEncoder is not available - install "database" extra to enable')
30
+
31
+ def encode(self, obj):
32
+ # Create byte stream
33
+ output = BytesIO()
34
+
35
+ # Write image to byte stream
36
+ obj.save(output, format=obj.format, quality="keep")
37
+
38
+ # Return byte array
39
+ return output.getvalue()
40
+
41
+ def decode(self, data):
42
+ # Return a PIL image
43
+ return Image.open(BytesIO(data)) if data else None
@@ -0,0 +1,28 @@
1
+ """
2
+ SerializeEncoder module
3
+ """
4
+
5
+ from ...serialize import SerializeFactory
6
+
7
+ from .base import Encoder
8
+
9
+
10
+ class SerializeEncoder(Encoder):
11
+ """
12
+ Encodes and decodes objects using the internal serialize package.
13
+ """
14
+
15
+ def __init__(self, method):
16
+ # Parent constructor
17
+ super().__init__()
18
+
19
+ # Pickle serialization
20
+ self.serializer = SerializeFactory.create(method)
21
+
22
+ def encode(self, obj):
23
+ # Pickle object
24
+ return self.serializer.savebytes(obj)
25
+
26
+ def decode(self, data):
27
+ # Unpickle to object
28
+ return self.serializer.loadbytes(data)
@@ -0,0 +1,77 @@
1
+ """
2
+ Factory module
3
+ """
4
+
5
+ from urllib.parse import urlparse
6
+
7
+ from ..util import Resolver
8
+
9
+ from .client import Client
10
+ from .duckdb import DuckDB
11
+ from .sqlite import SQLite
12
+
13
+
14
+ class DatabaseFactory:
15
+ """
16
+ Methods to create document databases.
17
+ """
18
+
19
+ @staticmethod
20
+ def create(config):
21
+ """
22
+ Create a Database.
23
+
24
+ Args:
25
+ config: database configuration parameters
26
+
27
+ Returns:
28
+ Database
29
+ """
30
+
31
+ # Database instance
32
+ database = None
33
+
34
+ # Enables document database
35
+ content = config.get("content")
36
+
37
+ # Standardize content name
38
+ if content is True:
39
+ content = "sqlite"
40
+
41
+ # Create document database instance
42
+ if content == "duckdb":
43
+ database = DuckDB(config)
44
+ elif content == "sqlite":
45
+ database = SQLite(config)
46
+ elif content:
47
+ # Check if content is a URL
48
+ url = urlparse(content)
49
+ if content == "client" or url.scheme:
50
+ # Connect to database server URL
51
+ database = Client(config)
52
+ else:
53
+ # Resolve custom database if content is not a URL
54
+ database = DatabaseFactory.resolve(content, config)
55
+
56
+ # Store config back
57
+ config["content"] = content
58
+
59
+ return database
60
+
61
+ @staticmethod
62
+ def resolve(backend, config):
63
+ """
64
+ Attempt to resolve a custom backend.
65
+
66
+ Args:
67
+ backend: backend class
68
+ config: index configuration parameters
69
+
70
+ Returns:
71
+ Database
72
+ """
73
+
74
+ try:
75
+ return Resolver()(backend)(config)
76
+ except Exception as e:
77
+ raise ImportError(f"Unable to resolve database backend: '{backend}'") from e