mseep-txtai 9.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mseep_txtai-9.1.1.dist-info/METADATA +262 -0
- mseep_txtai-9.1.1.dist-info/RECORD +251 -0
- mseep_txtai-9.1.1.dist-info/WHEEL +5 -0
- mseep_txtai-9.1.1.dist-info/licenses/LICENSE +190 -0
- mseep_txtai-9.1.1.dist-info/top_level.txt +1 -0
- txtai/__init__.py +16 -0
- txtai/agent/__init__.py +12 -0
- txtai/agent/base.py +54 -0
- txtai/agent/factory.py +39 -0
- txtai/agent/model.py +107 -0
- txtai/agent/placeholder.py +16 -0
- txtai/agent/tool/__init__.py +7 -0
- txtai/agent/tool/embeddings.py +69 -0
- txtai/agent/tool/factory.py +130 -0
- txtai/agent/tool/function.py +49 -0
- txtai/ann/__init__.py +7 -0
- txtai/ann/base.py +153 -0
- txtai/ann/dense/__init__.py +11 -0
- txtai/ann/dense/annoy.py +72 -0
- txtai/ann/dense/factory.py +76 -0
- txtai/ann/dense/faiss.py +233 -0
- txtai/ann/dense/hnsw.py +104 -0
- txtai/ann/dense/numpy.py +164 -0
- txtai/ann/dense/pgvector.py +323 -0
- txtai/ann/dense/sqlite.py +303 -0
- txtai/ann/dense/torch.py +38 -0
- txtai/ann/sparse/__init__.py +7 -0
- txtai/ann/sparse/factory.py +61 -0
- txtai/ann/sparse/ivfsparse.py +377 -0
- txtai/ann/sparse/pgsparse.py +56 -0
- txtai/api/__init__.py +18 -0
- txtai/api/application.py +134 -0
- txtai/api/authorization.py +53 -0
- txtai/api/base.py +159 -0
- txtai/api/cluster.py +295 -0
- txtai/api/extension.py +19 -0
- txtai/api/factory.py +40 -0
- txtai/api/responses/__init__.py +7 -0
- txtai/api/responses/factory.py +30 -0
- txtai/api/responses/json.py +56 -0
- txtai/api/responses/messagepack.py +51 -0
- txtai/api/route.py +41 -0
- txtai/api/routers/__init__.py +25 -0
- txtai/api/routers/agent.py +38 -0
- txtai/api/routers/caption.py +42 -0
- txtai/api/routers/embeddings.py +280 -0
- txtai/api/routers/entity.py +42 -0
- txtai/api/routers/extractor.py +28 -0
- txtai/api/routers/labels.py +47 -0
- txtai/api/routers/llm.py +61 -0
- txtai/api/routers/objects.py +42 -0
- txtai/api/routers/openai.py +191 -0
- txtai/api/routers/rag.py +61 -0
- txtai/api/routers/reranker.py +46 -0
- txtai/api/routers/segmentation.py +42 -0
- txtai/api/routers/similarity.py +48 -0
- txtai/api/routers/summary.py +46 -0
- txtai/api/routers/tabular.py +42 -0
- txtai/api/routers/textractor.py +42 -0
- txtai/api/routers/texttospeech.py +33 -0
- txtai/api/routers/transcription.py +42 -0
- txtai/api/routers/translation.py +46 -0
- txtai/api/routers/upload.py +36 -0
- txtai/api/routers/workflow.py +28 -0
- txtai/app/__init__.py +5 -0
- txtai/app/base.py +821 -0
- txtai/archive/__init__.py +9 -0
- txtai/archive/base.py +104 -0
- txtai/archive/compress.py +51 -0
- txtai/archive/factory.py +25 -0
- txtai/archive/tar.py +49 -0
- txtai/archive/zip.py +35 -0
- txtai/cloud/__init__.py +8 -0
- txtai/cloud/base.py +106 -0
- txtai/cloud/factory.py +70 -0
- txtai/cloud/hub.py +101 -0
- txtai/cloud/storage.py +125 -0
- txtai/console/__init__.py +5 -0
- txtai/console/__main__.py +22 -0
- txtai/console/base.py +264 -0
- txtai/data/__init__.py +10 -0
- txtai/data/base.py +138 -0
- txtai/data/labels.py +42 -0
- txtai/data/questions.py +135 -0
- txtai/data/sequences.py +48 -0
- txtai/data/texts.py +68 -0
- txtai/data/tokens.py +28 -0
- txtai/database/__init__.py +14 -0
- txtai/database/base.py +342 -0
- txtai/database/client.py +227 -0
- txtai/database/duckdb.py +150 -0
- txtai/database/embedded.py +76 -0
- txtai/database/encoder/__init__.py +8 -0
- txtai/database/encoder/base.py +37 -0
- txtai/database/encoder/factory.py +56 -0
- txtai/database/encoder/image.py +43 -0
- txtai/database/encoder/serialize.py +28 -0
- txtai/database/factory.py +77 -0
- txtai/database/rdbms.py +569 -0
- txtai/database/schema/__init__.py +6 -0
- txtai/database/schema/orm.py +99 -0
- txtai/database/schema/statement.py +98 -0
- txtai/database/sql/__init__.py +8 -0
- txtai/database/sql/aggregate.py +178 -0
- txtai/database/sql/base.py +189 -0
- txtai/database/sql/expression.py +404 -0
- txtai/database/sql/token.py +342 -0
- txtai/database/sqlite.py +57 -0
- txtai/embeddings/__init__.py +7 -0
- txtai/embeddings/base.py +1107 -0
- txtai/embeddings/index/__init__.py +14 -0
- txtai/embeddings/index/action.py +15 -0
- txtai/embeddings/index/autoid.py +92 -0
- txtai/embeddings/index/configuration.py +71 -0
- txtai/embeddings/index/documents.py +86 -0
- txtai/embeddings/index/functions.py +155 -0
- txtai/embeddings/index/indexes.py +199 -0
- txtai/embeddings/index/indexids.py +60 -0
- txtai/embeddings/index/reducer.py +104 -0
- txtai/embeddings/index/stream.py +67 -0
- txtai/embeddings/index/transform.py +205 -0
- txtai/embeddings/search/__init__.py +11 -0
- txtai/embeddings/search/base.py +344 -0
- txtai/embeddings/search/errors.py +9 -0
- txtai/embeddings/search/explain.py +120 -0
- txtai/embeddings/search/ids.py +61 -0
- txtai/embeddings/search/query.py +69 -0
- txtai/embeddings/search/scan.py +196 -0
- txtai/embeddings/search/terms.py +46 -0
- txtai/graph/__init__.py +10 -0
- txtai/graph/base.py +769 -0
- txtai/graph/factory.py +61 -0
- txtai/graph/networkx.py +275 -0
- txtai/graph/query.py +181 -0
- txtai/graph/rdbms.py +113 -0
- txtai/graph/topics.py +166 -0
- txtai/models/__init__.py +9 -0
- txtai/models/models.py +268 -0
- txtai/models/onnx.py +133 -0
- txtai/models/pooling/__init__.py +9 -0
- txtai/models/pooling/base.py +141 -0
- txtai/models/pooling/cls.py +28 -0
- txtai/models/pooling/factory.py +144 -0
- txtai/models/pooling/late.py +173 -0
- txtai/models/pooling/mean.py +33 -0
- txtai/models/pooling/muvera.py +164 -0
- txtai/models/registry.py +37 -0
- txtai/models/tokendetection.py +122 -0
- txtai/pipeline/__init__.py +17 -0
- txtai/pipeline/audio/__init__.py +11 -0
- txtai/pipeline/audio/audiomixer.py +58 -0
- txtai/pipeline/audio/audiostream.py +94 -0
- txtai/pipeline/audio/microphone.py +244 -0
- txtai/pipeline/audio/signal.py +186 -0
- txtai/pipeline/audio/texttoaudio.py +60 -0
- txtai/pipeline/audio/texttospeech.py +553 -0
- txtai/pipeline/audio/transcription.py +212 -0
- txtai/pipeline/base.py +23 -0
- txtai/pipeline/data/__init__.py +10 -0
- txtai/pipeline/data/filetohtml.py +206 -0
- txtai/pipeline/data/htmltomd.py +414 -0
- txtai/pipeline/data/segmentation.py +178 -0
- txtai/pipeline/data/tabular.py +155 -0
- txtai/pipeline/data/textractor.py +139 -0
- txtai/pipeline/data/tokenizer.py +112 -0
- txtai/pipeline/factory.py +77 -0
- txtai/pipeline/hfmodel.py +111 -0
- txtai/pipeline/hfpipeline.py +96 -0
- txtai/pipeline/image/__init__.py +7 -0
- txtai/pipeline/image/caption.py +55 -0
- txtai/pipeline/image/imagehash.py +90 -0
- txtai/pipeline/image/objects.py +80 -0
- txtai/pipeline/llm/__init__.py +11 -0
- txtai/pipeline/llm/factory.py +86 -0
- txtai/pipeline/llm/generation.py +173 -0
- txtai/pipeline/llm/huggingface.py +218 -0
- txtai/pipeline/llm/litellm.py +90 -0
- txtai/pipeline/llm/llama.py +152 -0
- txtai/pipeline/llm/llm.py +75 -0
- txtai/pipeline/llm/rag.py +477 -0
- txtai/pipeline/nop.py +14 -0
- txtai/pipeline/tensors.py +52 -0
- txtai/pipeline/text/__init__.py +13 -0
- txtai/pipeline/text/crossencoder.py +70 -0
- txtai/pipeline/text/entity.py +140 -0
- txtai/pipeline/text/labels.py +137 -0
- txtai/pipeline/text/lateencoder.py +103 -0
- txtai/pipeline/text/questions.py +48 -0
- txtai/pipeline/text/reranker.py +57 -0
- txtai/pipeline/text/similarity.py +83 -0
- txtai/pipeline/text/summary.py +98 -0
- txtai/pipeline/text/translation.py +298 -0
- txtai/pipeline/train/__init__.py +7 -0
- txtai/pipeline/train/hfonnx.py +196 -0
- txtai/pipeline/train/hftrainer.py +398 -0
- txtai/pipeline/train/mlonnx.py +63 -0
- txtai/scoring/__init__.py +12 -0
- txtai/scoring/base.py +188 -0
- txtai/scoring/bm25.py +29 -0
- txtai/scoring/factory.py +95 -0
- txtai/scoring/pgtext.py +181 -0
- txtai/scoring/sif.py +32 -0
- txtai/scoring/sparse.py +218 -0
- txtai/scoring/terms.py +499 -0
- txtai/scoring/tfidf.py +358 -0
- txtai/serialize/__init__.py +10 -0
- txtai/serialize/base.py +85 -0
- txtai/serialize/errors.py +9 -0
- txtai/serialize/factory.py +29 -0
- txtai/serialize/messagepack.py +42 -0
- txtai/serialize/pickle.py +98 -0
- txtai/serialize/serializer.py +46 -0
- txtai/util/__init__.py +7 -0
- txtai/util/resolver.py +32 -0
- txtai/util/sparsearray.py +62 -0
- txtai/util/template.py +16 -0
- txtai/vectors/__init__.py +8 -0
- txtai/vectors/base.py +476 -0
- txtai/vectors/dense/__init__.py +12 -0
- txtai/vectors/dense/external.py +55 -0
- txtai/vectors/dense/factory.py +121 -0
- txtai/vectors/dense/huggingface.py +44 -0
- txtai/vectors/dense/litellm.py +86 -0
- txtai/vectors/dense/llama.py +84 -0
- txtai/vectors/dense/m2v.py +67 -0
- txtai/vectors/dense/sbert.py +92 -0
- txtai/vectors/dense/words.py +211 -0
- txtai/vectors/recovery.py +57 -0
- txtai/vectors/sparse/__init__.py +7 -0
- txtai/vectors/sparse/base.py +90 -0
- txtai/vectors/sparse/factory.py +55 -0
- txtai/vectors/sparse/sbert.py +34 -0
- txtai/version.py +6 -0
- txtai/workflow/__init__.py +8 -0
- txtai/workflow/base.py +184 -0
- txtai/workflow/execute.py +99 -0
- txtai/workflow/factory.py +42 -0
- txtai/workflow/task/__init__.py +18 -0
- txtai/workflow/task/base.py +490 -0
- txtai/workflow/task/console.py +24 -0
- txtai/workflow/task/export.py +64 -0
- txtai/workflow/task/factory.py +89 -0
- txtai/workflow/task/file.py +28 -0
- txtai/workflow/task/image.py +36 -0
- txtai/workflow/task/retrieve.py +61 -0
- txtai/workflow/task/service.py +102 -0
- txtai/workflow/task/storage.py +110 -0
- txtai/workflow/task/stream.py +33 -0
- txtai/workflow/task/template.py +116 -0
- txtai/workflow/task/url.py +20 -0
- txtai/workflow/task/workflow.py +14 -0
txtai/database/duckdb.py
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
"""
|
2
|
+
DuckDB module
|
3
|
+
"""
|
4
|
+
|
5
|
+
import os
|
6
|
+
import re
|
7
|
+
|
8
|
+
from tempfile import TemporaryDirectory
|
9
|
+
|
10
|
+
# Conditional import
|
11
|
+
try:
|
12
|
+
import duckdb
|
13
|
+
|
14
|
+
DUCKDB = True
|
15
|
+
except ImportError:
|
16
|
+
DUCKDB = False
|
17
|
+
|
18
|
+
from .embedded import Embedded
|
19
|
+
from .schema import Statement
|
20
|
+
|
21
|
+
|
22
|
+
class DuckDB(Embedded):
|
23
|
+
"""
|
24
|
+
Database instance backed by DuckDB.
|
25
|
+
"""
|
26
|
+
|
27
|
+
# Delete single document and object
|
28
|
+
DELETE_DOCUMENT = "DELETE FROM documents WHERE id = ?"
|
29
|
+
DELETE_OBJECT = "DELETE FROM objects WHERE id = ?"
|
30
|
+
|
31
|
+
def __init__(self, config):
|
32
|
+
super().__init__(config)
|
33
|
+
|
34
|
+
if not DUCKDB:
|
35
|
+
raise ImportError('DuckDB is not available - install "database" extra to enable')
|
36
|
+
|
37
|
+
def execute(self, function, *args):
|
38
|
+
# Call parent method with DuckDB compatible arguments
|
39
|
+
return super().execute(function, *self.formatargs(args))
|
40
|
+
|
41
|
+
def insertdocument(self, uid, data, tags, entry):
|
42
|
+
# Delete existing document
|
43
|
+
self.cursor.execute(DuckDB.DELETE_DOCUMENT, [uid])
|
44
|
+
|
45
|
+
# Call parent method
|
46
|
+
super().insertdocument(uid, data, tags, entry)
|
47
|
+
|
48
|
+
def insertobject(self, uid, data, tags, entry):
|
49
|
+
# Delete existing object
|
50
|
+
self.cursor.execute(DuckDB.DELETE_OBJECT, [uid])
|
51
|
+
|
52
|
+
# Call parent method
|
53
|
+
super().insertobject(uid, data, tags, entry)
|
54
|
+
|
55
|
+
def connect(self, path=":memory:"):
|
56
|
+
# Create connection and start a transaction
|
57
|
+
# pylint: disable=I1101
|
58
|
+
connection = duckdb.connect(path)
|
59
|
+
connection.begin()
|
60
|
+
|
61
|
+
return connection
|
62
|
+
|
63
|
+
def getcursor(self):
|
64
|
+
return self.connection
|
65
|
+
|
66
|
+
def jsonprefix(self):
|
67
|
+
# Return json column prefix
|
68
|
+
return "json_extract_string(data"
|
69
|
+
|
70
|
+
def jsoncolumn(self, name):
|
71
|
+
# Generate json column using json_extract function
|
72
|
+
return f"json_extract_string(data, '$.{name}')"
|
73
|
+
|
74
|
+
def rows(self):
|
75
|
+
# Iteratively retrieve and yield rows
|
76
|
+
batch = 256
|
77
|
+
rows = self.cursor.fetchmany(batch)
|
78
|
+
while rows:
|
79
|
+
yield from rows
|
80
|
+
rows = self.cursor.fetchmany(batch)
|
81
|
+
|
82
|
+
def addfunctions(self):
|
83
|
+
# DuckDB doesn't currently support scalar functions
|
84
|
+
return
|
85
|
+
|
86
|
+
def copy(self, path):
|
87
|
+
# Delete existing file, if necessary
|
88
|
+
if os.path.exists(path):
|
89
|
+
os.remove(path)
|
90
|
+
|
91
|
+
# Create database connection
|
92
|
+
# pylint: disable=I1101
|
93
|
+
connection = duckdb.connect(path)
|
94
|
+
|
95
|
+
# List of tables
|
96
|
+
tables = ["documents", "objects", "sections"]
|
97
|
+
|
98
|
+
with TemporaryDirectory() as directory:
|
99
|
+
# Export existing tables
|
100
|
+
for table in tables:
|
101
|
+
self.connection.execute(f"COPY {table} TO '{directory}/{table}.parquet' (FORMAT parquet)")
|
102
|
+
|
103
|
+
# Create initial schema
|
104
|
+
for schema in [Statement.CREATE_DOCUMENTS, Statement.CREATE_OBJECTS, Statement.CREATE_SECTIONS % "sections"]:
|
105
|
+
connection.execute(schema)
|
106
|
+
|
107
|
+
# Import tables into new schema
|
108
|
+
for table in tables:
|
109
|
+
connection.execute(f"COPY {table} FROM '{directory}/{table}.parquet' (FORMAT parquet)")
|
110
|
+
|
111
|
+
# Create indexes and sync data to database file
|
112
|
+
connection.execute(Statement.CREATE_SECTIONS_INDEX)
|
113
|
+
connection.execute("CHECKPOINT")
|
114
|
+
|
115
|
+
# Start transaction
|
116
|
+
connection.begin()
|
117
|
+
|
118
|
+
return connection
|
119
|
+
|
120
|
+
def formatargs(self, args):
|
121
|
+
"""
|
122
|
+
DuckDB doesn't support named parameters. This method replaces named parameters with question marks
|
123
|
+
and makes parameters a list.
|
124
|
+
|
125
|
+
Args:
|
126
|
+
args: input arguments
|
127
|
+
|
128
|
+
Returns:
|
129
|
+
DuckDB compatible args
|
130
|
+
"""
|
131
|
+
|
132
|
+
if args and len(args) > 1:
|
133
|
+
# Unpack query args
|
134
|
+
query, parameters = args
|
135
|
+
|
136
|
+
# Iterate over parameters
|
137
|
+
# - Replace named parameters with ?'s
|
138
|
+
# - Build list of value with position indexes
|
139
|
+
params = []
|
140
|
+
for key, value in parameters.items():
|
141
|
+
pattern = rf"\:{key}(?=\s|$)"
|
142
|
+
match = re.search(pattern, query)
|
143
|
+
if match:
|
144
|
+
query = re.sub(pattern, "?", query, count=1)
|
145
|
+
params.append((match.start(), value))
|
146
|
+
|
147
|
+
# Repack query and parameter list
|
148
|
+
args = (query, [value for _, value in sorted(params, key=lambda x: x[0])])
|
149
|
+
|
150
|
+
return args
|
@@ -0,0 +1,76 @@
|
|
1
|
+
"""
|
2
|
+
Embedded module
|
3
|
+
"""
|
4
|
+
|
5
|
+
from .rdbms import RDBMS
|
6
|
+
|
7
|
+
|
8
|
+
class Embedded(RDBMS):
|
9
|
+
"""
|
10
|
+
Base class for embedded relational databases. An embedded relational database stores all content in a local file.
|
11
|
+
"""
|
12
|
+
|
13
|
+
def __init__(self, config):
|
14
|
+
"""
|
15
|
+
Creates a new Database.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
config: database configuration parameters
|
19
|
+
"""
|
20
|
+
|
21
|
+
super().__init__(config)
|
22
|
+
|
23
|
+
# Path to database file
|
24
|
+
self.path = None
|
25
|
+
|
26
|
+
def load(self, path):
|
27
|
+
# Call parent logic
|
28
|
+
super().load(path)
|
29
|
+
|
30
|
+
# Store path reference
|
31
|
+
self.path = path
|
32
|
+
|
33
|
+
def save(self, path):
|
34
|
+
# Temporary database
|
35
|
+
if not self.path:
|
36
|
+
# Save temporary database
|
37
|
+
self.connection.commit()
|
38
|
+
|
39
|
+
# Copy data from current to new
|
40
|
+
connection = self.copy(path)
|
41
|
+
|
42
|
+
# Close temporary database
|
43
|
+
self.connection.close()
|
44
|
+
|
45
|
+
# Point connection to new connection
|
46
|
+
self.session(connection=connection)
|
47
|
+
self.path = path
|
48
|
+
|
49
|
+
# Paths are equal, commit changes
|
50
|
+
elif self.path == path:
|
51
|
+
self.connection.commit()
|
52
|
+
|
53
|
+
# New path is different from current path, copy data and continue using current connection
|
54
|
+
else:
|
55
|
+
self.copy(path).close()
|
56
|
+
|
57
|
+
def jsonprefix(self):
|
58
|
+
# Return json column prefix
|
59
|
+
return "json_extract(data"
|
60
|
+
|
61
|
+
def jsoncolumn(self, name):
|
62
|
+
# Generate json column using json_extract function
|
63
|
+
return f"json_extract(data, '$.{name}')"
|
64
|
+
|
65
|
+
def copy(self, path):
|
66
|
+
"""
|
67
|
+
Copies the current database into path.
|
68
|
+
|
69
|
+
Args:
|
70
|
+
path: path to write database
|
71
|
+
|
72
|
+
Returns:
|
73
|
+
new connection with data copied over
|
74
|
+
"""
|
75
|
+
|
76
|
+
raise NotImplementedError
|
@@ -0,0 +1,37 @@
|
|
1
|
+
"""
|
2
|
+
Encoder module
|
3
|
+
"""
|
4
|
+
|
5
|
+
from io import BytesIO
|
6
|
+
|
7
|
+
|
8
|
+
class Encoder:
|
9
|
+
"""
|
10
|
+
Encodes and decodes object content. The base encoder works only with byte arrays. It can be extended to encode different datatypes.
|
11
|
+
"""
|
12
|
+
|
13
|
+
def encode(self, obj):
|
14
|
+
"""
|
15
|
+
Encodes an object to a byte array using the encoder.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
obj: object to encode
|
19
|
+
|
20
|
+
Returns:
|
21
|
+
encoded object as a byte array
|
22
|
+
"""
|
23
|
+
|
24
|
+
return obj
|
25
|
+
|
26
|
+
def decode(self, data):
|
27
|
+
"""
|
28
|
+
Decodes input byte array into an object using this encoder.
|
29
|
+
|
30
|
+
Args:
|
31
|
+
data: encoded data
|
32
|
+
|
33
|
+
Returns:
|
34
|
+
decoded object
|
35
|
+
"""
|
36
|
+
|
37
|
+
return BytesIO(data) if data else None
|
@@ -0,0 +1,56 @@
|
|
1
|
+
"""
|
2
|
+
Encoder factory module
|
3
|
+
"""
|
4
|
+
|
5
|
+
from ...util import Resolver
|
6
|
+
|
7
|
+
from .base import Encoder
|
8
|
+
from .serialize import SerializeEncoder
|
9
|
+
|
10
|
+
|
11
|
+
class EncoderFactory:
|
12
|
+
"""
|
13
|
+
Encoder factory. Creates new Encoder instances.
|
14
|
+
"""
|
15
|
+
|
16
|
+
@staticmethod
|
17
|
+
def get(encoder):
|
18
|
+
"""
|
19
|
+
Gets a new instance of encoder class.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
encoder: Encoder instance class
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
Encoder class
|
26
|
+
"""
|
27
|
+
|
28
|
+
# Local task if no package
|
29
|
+
if "." not in encoder:
|
30
|
+
# Get parent package
|
31
|
+
encoder = ".".join(__name__.split(".")[:-1]) + "." + encoder.capitalize() + "Encoder"
|
32
|
+
|
33
|
+
return Resolver()(encoder)
|
34
|
+
|
35
|
+
@staticmethod
|
36
|
+
def create(encoder):
|
37
|
+
"""
|
38
|
+
Creates a new Encoder instance.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
encoder: Encoder instance class
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
Encoder
|
45
|
+
"""
|
46
|
+
|
47
|
+
# Return default encoder
|
48
|
+
if encoder is True:
|
49
|
+
return Encoder()
|
50
|
+
|
51
|
+
# Supported serialization methods
|
52
|
+
if encoder in ["messagepack", "pickle"]:
|
53
|
+
return SerializeEncoder(encoder)
|
54
|
+
|
55
|
+
# Get Encoder instance
|
56
|
+
return EncoderFactory.get(encoder)()
|
@@ -0,0 +1,43 @@
|
|
1
|
+
"""
|
2
|
+
ImageEncoder module
|
3
|
+
"""
|
4
|
+
|
5
|
+
from io import BytesIO
|
6
|
+
|
7
|
+
# Conditional import
|
8
|
+
try:
|
9
|
+
from PIL import Image
|
10
|
+
|
11
|
+
PIL = True
|
12
|
+
except ImportError:
|
13
|
+
PIL = False
|
14
|
+
|
15
|
+
from .base import Encoder
|
16
|
+
|
17
|
+
|
18
|
+
class ImageEncoder(Encoder):
|
19
|
+
"""
|
20
|
+
Encodes and decodes Image objects as compressed binary content, using the original image's algorithm.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(self):
|
24
|
+
"""
|
25
|
+
Creates a new ImageEncoder.
|
26
|
+
"""
|
27
|
+
|
28
|
+
if not PIL:
|
29
|
+
raise ImportError('ImageEncoder is not available - install "database" extra to enable')
|
30
|
+
|
31
|
+
def encode(self, obj):
|
32
|
+
# Create byte stream
|
33
|
+
output = BytesIO()
|
34
|
+
|
35
|
+
# Write image to byte stream
|
36
|
+
obj.save(output, format=obj.format, quality="keep")
|
37
|
+
|
38
|
+
# Return byte array
|
39
|
+
return output.getvalue()
|
40
|
+
|
41
|
+
def decode(self, data):
|
42
|
+
# Return a PIL image
|
43
|
+
return Image.open(BytesIO(data)) if data else None
|
@@ -0,0 +1,28 @@
|
|
1
|
+
"""
|
2
|
+
SerializeEncoder module
|
3
|
+
"""
|
4
|
+
|
5
|
+
from ...serialize import SerializeFactory
|
6
|
+
|
7
|
+
from .base import Encoder
|
8
|
+
|
9
|
+
|
10
|
+
class SerializeEncoder(Encoder):
|
11
|
+
"""
|
12
|
+
Encodes and decodes objects using the internal serialize package.
|
13
|
+
"""
|
14
|
+
|
15
|
+
def __init__(self, method):
|
16
|
+
# Parent constructor
|
17
|
+
super().__init__()
|
18
|
+
|
19
|
+
# Pickle serialization
|
20
|
+
self.serializer = SerializeFactory.create(method)
|
21
|
+
|
22
|
+
def encode(self, obj):
|
23
|
+
# Pickle object
|
24
|
+
return self.serializer.savebytes(obj)
|
25
|
+
|
26
|
+
def decode(self, data):
|
27
|
+
# Unpickle to object
|
28
|
+
return self.serializer.loadbytes(data)
|
@@ -0,0 +1,77 @@
|
|
1
|
+
"""
|
2
|
+
Factory module
|
3
|
+
"""
|
4
|
+
|
5
|
+
from urllib.parse import urlparse
|
6
|
+
|
7
|
+
from ..util import Resolver
|
8
|
+
|
9
|
+
from .client import Client
|
10
|
+
from .duckdb import DuckDB
|
11
|
+
from .sqlite import SQLite
|
12
|
+
|
13
|
+
|
14
|
+
class DatabaseFactory:
|
15
|
+
"""
|
16
|
+
Methods to create document databases.
|
17
|
+
"""
|
18
|
+
|
19
|
+
@staticmethod
|
20
|
+
def create(config):
|
21
|
+
"""
|
22
|
+
Create a Database.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
config: database configuration parameters
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
Database
|
29
|
+
"""
|
30
|
+
|
31
|
+
# Database instance
|
32
|
+
database = None
|
33
|
+
|
34
|
+
# Enables document database
|
35
|
+
content = config.get("content")
|
36
|
+
|
37
|
+
# Standardize content name
|
38
|
+
if content is True:
|
39
|
+
content = "sqlite"
|
40
|
+
|
41
|
+
# Create document database instance
|
42
|
+
if content == "duckdb":
|
43
|
+
database = DuckDB(config)
|
44
|
+
elif content == "sqlite":
|
45
|
+
database = SQLite(config)
|
46
|
+
elif content:
|
47
|
+
# Check if content is a URL
|
48
|
+
url = urlparse(content)
|
49
|
+
if content == "client" or url.scheme:
|
50
|
+
# Connect to database server URL
|
51
|
+
database = Client(config)
|
52
|
+
else:
|
53
|
+
# Resolve custom database if content is not a URL
|
54
|
+
database = DatabaseFactory.resolve(content, config)
|
55
|
+
|
56
|
+
# Store config back
|
57
|
+
config["content"] = content
|
58
|
+
|
59
|
+
return database
|
60
|
+
|
61
|
+
@staticmethod
|
62
|
+
def resolve(backend, config):
|
63
|
+
"""
|
64
|
+
Attempt to resolve a custom backend.
|
65
|
+
|
66
|
+
Args:
|
67
|
+
backend: backend class
|
68
|
+
config: index configuration parameters
|
69
|
+
|
70
|
+
Returns:
|
71
|
+
Database
|
72
|
+
"""
|
73
|
+
|
74
|
+
try:
|
75
|
+
return Resolver()(backend)(config)
|
76
|
+
except Exception as e:
|
77
|
+
raise ImportError(f"Unable to resolve database backend: '{backend}'") from e
|