MindsDB 25.2.2.1__py3-none-any.whl → 25.2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- {MindsDB-25.2.2.1.dist-info → MindsDB-25.2.3.0.dist-info}/METADATA +229 -229
- {MindsDB-25.2.2.1.dist-info → MindsDB-25.2.3.0.dist-info}/RECORD +16 -15
- mindsdb/__about__.py +1 -1
- mindsdb/api/http/namespaces/databases.py +69 -1
- mindsdb/integrations/handlers/file_handler/file_handler.py +28 -46
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +8 -11
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_tables.py +1 -1
- mindsdb/integrations/handlers/timegpt_handler/requirements.txt +1 -1
- mindsdb/integrations/utilities/files/file_reader.py +66 -14
- mindsdb/interfaces/file/file_controller.py +140 -11
- mindsdb/interfaces/storage/db.py +1 -0
- mindsdb/migrations/versions/2025-02-09_4943359e354a_file_metadata.py +31 -0
- mindsdb/utilities/render/sqlalchemy_render.py +11 -5
- {MindsDB-25.2.2.1.dist-info → MindsDB-25.2.3.0.dist-info}/LICENSE +0 -0
- {MindsDB-25.2.2.1.dist-info → MindsDB-25.2.3.0.dist-info}/WHEEL +0 -0
- {MindsDB-25.2.2.1.dist-info → MindsDB-25.2.3.0.dist-info}/top_level.txt +0 -0
|
@@ -3,12 +3,17 @@ import os
|
|
|
3
3
|
import shutil
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
7
8
|
from mindsdb.interfaces.storage import db
|
|
8
9
|
from mindsdb.interfaces.storage.fs import FsStore
|
|
9
10
|
from mindsdb.utilities import log
|
|
10
11
|
from mindsdb.utilities.config import Config
|
|
11
12
|
from mindsdb.utilities.context import context as ctx
|
|
13
|
+
from sqlalchemy.orm.attributes import flag_modified
|
|
14
|
+
|
|
15
|
+
from mindsdb.integrations.utilities.files.file_reader import FileReader
|
|
16
|
+
|
|
12
17
|
|
|
13
18
|
logger = log.getLogger(__name__)
|
|
14
19
|
|
|
@@ -82,31 +87,38 @@ class FileController:
|
|
|
82
87
|
|
|
83
88
|
file_dir = None
|
|
84
89
|
try:
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
ds_meta = {"row_count": len(df), "column_names": list(df.columns)}
|
|
90
|
+
pages_files, pages_index = self.get_file_pages(file_path)
|
|
88
91
|
|
|
92
|
+
metadata = {
|
|
93
|
+
'is_feather': True,
|
|
94
|
+
'pages': pages_index
|
|
95
|
+
}
|
|
96
|
+
df = pages_files[0]
|
|
89
97
|
file_record = db.File(
|
|
90
98
|
name=name,
|
|
91
99
|
company_id=ctx.company_id,
|
|
92
100
|
source_file_path=file_name,
|
|
93
101
|
file_path="",
|
|
94
|
-
row_count=
|
|
95
|
-
columns=
|
|
102
|
+
row_count=len(df),
|
|
103
|
+
columns=list(df.columns),
|
|
104
|
+
metadata_=metadata
|
|
96
105
|
)
|
|
97
106
|
db.session.add(file_record)
|
|
98
|
-
db.session.
|
|
107
|
+
db.session.flush()
|
|
108
|
+
|
|
99
109
|
store_file_path = f"file_{ctx.company_id}_{file_record.id}"
|
|
100
110
|
file_record.file_path = store_file_path
|
|
101
|
-
db.session.commit()
|
|
102
111
|
|
|
103
112
|
file_dir = Path(self.dir).joinpath(store_file_path)
|
|
104
113
|
file_dir.mkdir(parents=True, exist_ok=True)
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
114
|
+
|
|
115
|
+
self.store_pages_as_feather(file_dir, pages_files)
|
|
116
|
+
# store original file
|
|
117
|
+
shutil.move(file_path, str(file_dir.joinpath(file_name)))
|
|
108
118
|
|
|
109
119
|
self.fs_store.put(store_file_path, base_dir=self.dir)
|
|
120
|
+
db.session.commit()
|
|
121
|
+
|
|
110
122
|
except Exception as e:
|
|
111
123
|
logger.error(e)
|
|
112
124
|
if file_dir is not None:
|
|
@@ -115,6 +127,39 @@ class FileController:
|
|
|
115
127
|
|
|
116
128
|
return file_record.id
|
|
117
129
|
|
|
130
|
+
def get_file_pages(self, source_path: str):
|
|
131
|
+
"""
|
|
132
|
+
Reads file and extract pages from it
|
|
133
|
+
Returned structures:
|
|
134
|
+
- page_files: dict with content, {page_num: dataframe}
|
|
135
|
+
- pages_index: dict, link between page name and num: {page_name: page_num}
|
|
136
|
+
"""
|
|
137
|
+
file_reader = FileReader(path=source_path)
|
|
138
|
+
tables = file_reader.get_contents()
|
|
139
|
+
|
|
140
|
+
pages_files = {}
|
|
141
|
+
pages_index = {}
|
|
142
|
+
if len(tables) == 1:
|
|
143
|
+
df = list(tables.values())[0]
|
|
144
|
+
pages_files[0] = df
|
|
145
|
+
else:
|
|
146
|
+
# file has several pages, create a new one with info
|
|
147
|
+
df = pd.DataFrame(tables.keys(), columns=["Tables"])
|
|
148
|
+
pages_files[0] = df
|
|
149
|
+
for i, page_name in enumerate(tables.keys(), 1):
|
|
150
|
+
pages_files[i] = tables[page_name]
|
|
151
|
+
pages_index[page_name] = i
|
|
152
|
+
return pages_files, pages_index
|
|
153
|
+
|
|
154
|
+
def store_pages_as_feather(self, dest_dir: Path, pages_files: dict):
|
|
155
|
+
"""
|
|
156
|
+
Stores pages in file storage dir in feather format
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
for num, df in pages_files.items():
|
|
160
|
+
dest = dest_dir.joinpath(f'{num}.feather')
|
|
161
|
+
df.to_feather(str(dest))
|
|
162
|
+
|
|
118
163
|
def delete_file(self, name):
|
|
119
164
|
file_record = (
|
|
120
165
|
db.session.query(db.File)
|
|
@@ -144,3 +189,87 @@ class FileController:
|
|
|
144
189
|
.joinpath(file_dir)
|
|
145
190
|
.joinpath(Path(file_record.source_file_path).name)
|
|
146
191
|
)
|
|
192
|
+
|
|
193
|
+
def get_file_data(self, name: str, page_name: str = None) -> pd.DataFrame:
|
|
194
|
+
"""
|
|
195
|
+
Returns file content as dataframe
|
|
196
|
+
|
|
197
|
+
:param name: name of file
|
|
198
|
+
:param page_name: page name, optional
|
|
199
|
+
:return: Page or file content
|
|
200
|
+
"""
|
|
201
|
+
file_record = (
|
|
202
|
+
db.session.query(db.File)
|
|
203
|
+
.filter_by(company_id=ctx.company_id, name=name)
|
|
204
|
+
.first()
|
|
205
|
+
)
|
|
206
|
+
if file_record is None:
|
|
207
|
+
raise Exception(f"File '{name}' does not exists")
|
|
208
|
+
|
|
209
|
+
file_dir = f"file_{ctx.company_id}_{file_record.id}"
|
|
210
|
+
self.fs_store.get(file_dir, base_dir=self.dir)
|
|
211
|
+
|
|
212
|
+
metadata = file_record.metadata_ or {}
|
|
213
|
+
if metadata.get('is_feather') is not True:
|
|
214
|
+
# migrate file
|
|
215
|
+
|
|
216
|
+
file_path = (
|
|
217
|
+
Path(self.dir)
|
|
218
|
+
.joinpath(file_dir)
|
|
219
|
+
.joinpath(Path(file_record.source_file_path).name)
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
pages_files, pages_index = self.get_file_pages(str(file_path))
|
|
223
|
+
|
|
224
|
+
self.store_pages_as_feather(file_path.parent, pages_files)
|
|
225
|
+
metadata['is_feather'] = True
|
|
226
|
+
metadata['pages'] = pages_index
|
|
227
|
+
|
|
228
|
+
file_record.metadata_ = metadata
|
|
229
|
+
flag_modified(file_record, 'metadata_')
|
|
230
|
+
db.session.commit()
|
|
231
|
+
|
|
232
|
+
if page_name is None:
|
|
233
|
+
num = 0
|
|
234
|
+
else:
|
|
235
|
+
num = metadata.get('pages', {}).get(page_name)
|
|
236
|
+
if num is None:
|
|
237
|
+
raise KeyError(f'Page not found: {page_name}')
|
|
238
|
+
|
|
239
|
+
path = (
|
|
240
|
+
Path(self.dir)
|
|
241
|
+
.joinpath(file_dir)
|
|
242
|
+
.joinpath(f'{num}.feather')
|
|
243
|
+
)
|
|
244
|
+
return pd.read_feather(path)
|
|
245
|
+
|
|
246
|
+
def set_file_data(self, name: str, df: pd.DataFrame, page_name: str = None):
|
|
247
|
+
"""
|
|
248
|
+
Save file content
|
|
249
|
+
:param name: name of file
|
|
250
|
+
:param df: content to store
|
|
251
|
+
:param page_name: name of page, optional
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
file_record = (
|
|
255
|
+
db.session.query(db.File)
|
|
256
|
+
.filter_by(company_id=ctx.company_id, name=name)
|
|
257
|
+
.first()
|
|
258
|
+
)
|
|
259
|
+
if file_record is None:
|
|
260
|
+
raise Exception(f"File '{name}' does not exists")
|
|
261
|
+
|
|
262
|
+
file_dir = f"file_{ctx.company_id}_{file_record.id}"
|
|
263
|
+
self.fs_store.get(file_dir, base_dir=self.dir)
|
|
264
|
+
|
|
265
|
+
num = 0
|
|
266
|
+
if page_name is not None and file_record.metadata_ is not None:
|
|
267
|
+
num = file_record.metadata_.get('pages', {}).get(page_name, 0)
|
|
268
|
+
|
|
269
|
+
path = (
|
|
270
|
+
Path(self.dir)
|
|
271
|
+
.joinpath(file_dir)
|
|
272
|
+
.joinpath(f'{num}.feather')
|
|
273
|
+
)
|
|
274
|
+
df.to_feather(path)
|
|
275
|
+
self.fs_store.put(file_dir, base_dir=self.dir)
|
mindsdb/interfaces/storage/db.py
CHANGED
|
@@ -258,6 +258,7 @@ class File(Base):
|
|
|
258
258
|
row_count = Column(Integer, nullable=False)
|
|
259
259
|
columns = Column(Json, nullable=False)
|
|
260
260
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
261
|
+
metadata_: dict = Column("metadata", JSON, nullable=True)
|
|
261
262
|
updated_at = Column(
|
|
262
263
|
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
263
264
|
)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""file_metadata
|
|
2
|
+
|
|
3
|
+
Revision ID: 4943359e354a
|
|
4
|
+
Revises: c06c35f7e8e1
|
|
5
|
+
Create Date: 2025-02-09 10:10:55.577407
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
from alembic import op
|
|
9
|
+
import sqlalchemy as sa
|
|
10
|
+
import mindsdb.interfaces.storage.db # noqa
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# revision identifiers, used by Alembic.
|
|
14
|
+
revision = '4943359e354a'
|
|
15
|
+
down_revision = 'c06c35f7e8e1'
|
|
16
|
+
branch_labels = None
|
|
17
|
+
depends_on = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def upgrade():
|
|
21
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
22
|
+
with op.batch_alter_table('file', schema=None) as batch_op:
|
|
23
|
+
batch_op.add_column(sa.Column('metadata', sa.JSON(), nullable=True))
|
|
24
|
+
# ### end Alembic commands ###
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def downgrade():
|
|
28
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
29
|
+
with op.batch_alter_table('file', schema=None) as batch_op:
|
|
30
|
+
batch_op.drop_column('metadata')
|
|
31
|
+
# ### end Alembic commands ###
|
|
@@ -93,9 +93,6 @@ class SqlalchemyRender:
|
|
|
93
93
|
if hasattr(dialect, 'preparer'):
|
|
94
94
|
class Preparer(dialect.preparer):
|
|
95
95
|
|
|
96
|
-
def __init__(self, *args, **kwargs):
|
|
97
|
-
super().__init__(*args, **kwargs)
|
|
98
|
-
|
|
99
96
|
def _requires_quotes(self, value: str) -> bool:
|
|
100
97
|
# check force-quote flag
|
|
101
98
|
if isinstance(value, AttributedStr):
|
|
@@ -242,6 +239,8 @@ class SqlalchemyRender:
|
|
|
242
239
|
|
|
243
240
|
op = t.op.lower()
|
|
244
241
|
if op in ('in', 'not in'):
|
|
242
|
+
if t.args[1].parentheses:
|
|
243
|
+
arg1 = [arg1]
|
|
245
244
|
if isinstance(arg1, sa.sql.selectable.ColumnClause):
|
|
246
245
|
raise NotImplementedError(f'Required list argument for: {op}')
|
|
247
246
|
|
|
@@ -536,12 +535,19 @@ class SqlalchemyRender:
|
|
|
536
535
|
query = query.select_from(table)
|
|
537
536
|
|
|
538
537
|
# other tables
|
|
538
|
+
has_explicit_join = False
|
|
539
539
|
for item in join_list[1:]:
|
|
540
540
|
table = self.to_table(item['table'])
|
|
541
541
|
if item['is_implicit']:
|
|
542
542
|
# add to from clause
|
|
543
|
-
|
|
543
|
+
if has_explicit_join:
|
|
544
|
+
# sqlalchemy doesn't support implicit join after explicit
|
|
545
|
+
# convert it to explicit
|
|
546
|
+
query = query.join(table, sa.text('1=1'))
|
|
547
|
+
else:
|
|
548
|
+
query = query.select_from(table)
|
|
544
549
|
else:
|
|
550
|
+
has_explicit_join = True
|
|
545
551
|
if item['condition'] is None:
|
|
546
552
|
# otherwise, sqlalchemy raises "Don't know how to join to ..."
|
|
547
553
|
condition = sa.text('1=1')
|
|
@@ -564,7 +570,7 @@ class SqlalchemyRender:
|
|
|
564
570
|
condition,
|
|
565
571
|
full=is_full
|
|
566
572
|
)
|
|
567
|
-
elif isinstance(from_table, ast.Union):
|
|
573
|
+
elif isinstance(from_table, (ast.Union, ast.Intersect, ast.Except)):
|
|
568
574
|
alias = None
|
|
569
575
|
if from_table.alias:
|
|
570
576
|
alias = self.get_alias(from_table.alias)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|