MindsDB 25.2.2.2__py3-none-any.whl → 25.2.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- {MindsDB-25.2.2.2.dist-info → MindsDB-25.2.4.0.dist-info}/METADATA +209 -228
- {MindsDB-25.2.2.2.dist-info → MindsDB-25.2.4.0.dist-info}/RECORD +52 -50
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +1 -11
- mindsdb/api/executor/datahub/datanodes/system_tables.py +4 -1
- mindsdb/api/http/initialize.py +8 -5
- mindsdb/api/http/namespaces/agents.py +0 -7
- mindsdb/api/http/namespaces/config.py +0 -48
- mindsdb/api/http/namespaces/databases.py +69 -1
- mindsdb/api/http/namespaces/knowledge_bases.py +1 -1
- mindsdb/api/http/namespaces/util.py +0 -28
- mindsdb/integrations/handlers/anyscale_endpoints_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/dspy_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/file_handler/file_handler.py +28 -46
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +8 -11
- mindsdb/integrations/handlers/langchain_embedding_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/langchain_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/llama_index_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_tables.py +1 -1
- mindsdb/integrations/handlers/openai_handler/constants.py +3 -1
- mindsdb/integrations/handlers/openai_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/rag_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py +33 -8
- mindsdb/integrations/handlers/timegpt_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +3 -2
- mindsdb/integrations/handlers/web_handler/web_handler.py +42 -33
- mindsdb/integrations/handlers/youtube_handler/__init__.py +2 -0
- mindsdb/integrations/handlers/youtube_handler/connection_args.py +32 -0
- mindsdb/integrations/libs/llm/utils.py +5 -0
- mindsdb/integrations/libs/process_cache.py +2 -2
- mindsdb/integrations/utilities/files/file_reader.py +66 -14
- mindsdb/integrations/utilities/rag/chains/local_context_summarizer_chain.py +227 -0
- mindsdb/interfaces/agents/agents_controller.py +3 -3
- mindsdb/interfaces/agents/callback_handlers.py +52 -5
- mindsdb/interfaces/agents/langchain_agent.py +5 -3
- mindsdb/interfaces/database/database.py +1 -1
- mindsdb/interfaces/database/integrations.py +1 -1
- mindsdb/interfaces/file/file_controller.py +140 -11
- mindsdb/interfaces/jobs/scheduler.py +1 -1
- mindsdb/interfaces/knowledge_base/preprocessing/constants.py +2 -2
- mindsdb/interfaces/skills/skills_controller.py +2 -2
- mindsdb/interfaces/skills/sql_agent.py +6 -1
- mindsdb/interfaces/storage/db.py +1 -12
- mindsdb/migrations/versions/2025-02-09_4943359e354a_file_metadata.py +31 -0
- mindsdb/migrations/versions/2025-02-10_6ab9903fc59a_del_log_table.py +33 -0
- mindsdb/utilities/config.py +1 -0
- mindsdb/utilities/log.py +17 -2
- mindsdb/utilities/ml_task_queue/consumer.py +4 -2
- mindsdb/utilities/render/sqlalchemy_render.py +15 -5
- mindsdb/utilities/log_controller.py +0 -39
- mindsdb/utilities/telemetry.py +0 -44
- {MindsDB-25.2.2.2.dist-info → MindsDB-25.2.4.0.dist-info}/LICENSE +0 -0
- {MindsDB-25.2.2.2.dist-info → MindsDB-25.2.4.0.dist-info}/WHEEL +0 -0
- {MindsDB-25.2.2.2.dist-info → MindsDB-25.2.4.0.dist-info}/top_level.txt +0 -0
|
@@ -3,12 +3,17 @@ import os
|
|
|
3
3
|
import shutil
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
7
8
|
from mindsdb.interfaces.storage import db
|
|
8
9
|
from mindsdb.interfaces.storage.fs import FsStore
|
|
9
10
|
from mindsdb.utilities import log
|
|
10
11
|
from mindsdb.utilities.config import Config
|
|
11
12
|
from mindsdb.utilities.context import context as ctx
|
|
13
|
+
from sqlalchemy.orm.attributes import flag_modified
|
|
14
|
+
|
|
15
|
+
from mindsdb.integrations.utilities.files.file_reader import FileReader
|
|
16
|
+
|
|
12
17
|
|
|
13
18
|
logger = log.getLogger(__name__)
|
|
14
19
|
|
|
@@ -82,31 +87,38 @@ class FileController:
|
|
|
82
87
|
|
|
83
88
|
file_dir = None
|
|
84
89
|
try:
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
ds_meta = {"row_count": len(df), "column_names": list(df.columns)}
|
|
90
|
+
pages_files, pages_index = self.get_file_pages(file_path)
|
|
88
91
|
|
|
92
|
+
metadata = {
|
|
93
|
+
'is_feather': True,
|
|
94
|
+
'pages': pages_index
|
|
95
|
+
}
|
|
96
|
+
df = pages_files[0]
|
|
89
97
|
file_record = db.File(
|
|
90
98
|
name=name,
|
|
91
99
|
company_id=ctx.company_id,
|
|
92
100
|
source_file_path=file_name,
|
|
93
101
|
file_path="",
|
|
94
|
-
row_count=
|
|
95
|
-
columns=
|
|
102
|
+
row_count=len(df),
|
|
103
|
+
columns=list(df.columns),
|
|
104
|
+
metadata_=metadata
|
|
96
105
|
)
|
|
97
106
|
db.session.add(file_record)
|
|
98
|
-
db.session.
|
|
107
|
+
db.session.flush()
|
|
108
|
+
|
|
99
109
|
store_file_path = f"file_{ctx.company_id}_{file_record.id}"
|
|
100
110
|
file_record.file_path = store_file_path
|
|
101
|
-
db.session.commit()
|
|
102
111
|
|
|
103
112
|
file_dir = Path(self.dir).joinpath(store_file_path)
|
|
104
113
|
file_dir.mkdir(parents=True, exist_ok=True)
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
114
|
+
|
|
115
|
+
self.store_pages_as_feather(file_dir, pages_files)
|
|
116
|
+
# store original file
|
|
117
|
+
shutil.move(file_path, str(file_dir.joinpath(file_name)))
|
|
108
118
|
|
|
109
119
|
self.fs_store.put(store_file_path, base_dir=self.dir)
|
|
120
|
+
db.session.commit()
|
|
121
|
+
|
|
110
122
|
except Exception as e:
|
|
111
123
|
logger.error(e)
|
|
112
124
|
if file_dir is not None:
|
|
@@ -115,6 +127,39 @@ class FileController:
|
|
|
115
127
|
|
|
116
128
|
return file_record.id
|
|
117
129
|
|
|
130
|
+
def get_file_pages(self, source_path: str):
|
|
131
|
+
"""
|
|
132
|
+
Reads file and extract pages from it
|
|
133
|
+
Returned structures:
|
|
134
|
+
- page_files: dict with content, {page_num: dataframe}
|
|
135
|
+
- pages_index: dict, link between page name and num: {page_name: page_num}
|
|
136
|
+
"""
|
|
137
|
+
file_reader = FileReader(path=source_path)
|
|
138
|
+
tables = file_reader.get_contents()
|
|
139
|
+
|
|
140
|
+
pages_files = {}
|
|
141
|
+
pages_index = {}
|
|
142
|
+
if len(tables) == 1:
|
|
143
|
+
df = list(tables.values())[0]
|
|
144
|
+
pages_files[0] = df
|
|
145
|
+
else:
|
|
146
|
+
# file has several pages, create a new one with info
|
|
147
|
+
df = pd.DataFrame(tables.keys(), columns=["Tables"])
|
|
148
|
+
pages_files[0] = df
|
|
149
|
+
for i, page_name in enumerate(tables.keys(), 1):
|
|
150
|
+
pages_files[i] = tables[page_name]
|
|
151
|
+
pages_index[page_name] = i
|
|
152
|
+
return pages_files, pages_index
|
|
153
|
+
|
|
154
|
+
def store_pages_as_feather(self, dest_dir: Path, pages_files: dict):
|
|
155
|
+
"""
|
|
156
|
+
Stores pages in file storage dir in feather format
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
for num, df in pages_files.items():
|
|
160
|
+
dest = dest_dir.joinpath(f'{num}.feather')
|
|
161
|
+
df.to_feather(str(dest))
|
|
162
|
+
|
|
118
163
|
def delete_file(self, name):
|
|
119
164
|
file_record = (
|
|
120
165
|
db.session.query(db.File)
|
|
@@ -144,3 +189,87 @@ class FileController:
|
|
|
144
189
|
.joinpath(file_dir)
|
|
145
190
|
.joinpath(Path(file_record.source_file_path).name)
|
|
146
191
|
)
|
|
192
|
+
|
|
193
|
+
def get_file_data(self, name: str, page_name: str = None) -> pd.DataFrame:
|
|
194
|
+
"""
|
|
195
|
+
Returns file content as dataframe
|
|
196
|
+
|
|
197
|
+
:param name: name of file
|
|
198
|
+
:param page_name: page name, optional
|
|
199
|
+
:return: Page or file content
|
|
200
|
+
"""
|
|
201
|
+
file_record = (
|
|
202
|
+
db.session.query(db.File)
|
|
203
|
+
.filter_by(company_id=ctx.company_id, name=name)
|
|
204
|
+
.first()
|
|
205
|
+
)
|
|
206
|
+
if file_record is None:
|
|
207
|
+
raise Exception(f"File '{name}' does not exists")
|
|
208
|
+
|
|
209
|
+
file_dir = f"file_{ctx.company_id}_{file_record.id}"
|
|
210
|
+
self.fs_store.get(file_dir, base_dir=self.dir)
|
|
211
|
+
|
|
212
|
+
metadata = file_record.metadata_ or {}
|
|
213
|
+
if metadata.get('is_feather') is not True:
|
|
214
|
+
# migrate file
|
|
215
|
+
|
|
216
|
+
file_path = (
|
|
217
|
+
Path(self.dir)
|
|
218
|
+
.joinpath(file_dir)
|
|
219
|
+
.joinpath(Path(file_record.source_file_path).name)
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
pages_files, pages_index = self.get_file_pages(str(file_path))
|
|
223
|
+
|
|
224
|
+
self.store_pages_as_feather(file_path.parent, pages_files)
|
|
225
|
+
metadata['is_feather'] = True
|
|
226
|
+
metadata['pages'] = pages_index
|
|
227
|
+
|
|
228
|
+
file_record.metadata_ = metadata
|
|
229
|
+
flag_modified(file_record, 'metadata_')
|
|
230
|
+
db.session.commit()
|
|
231
|
+
|
|
232
|
+
if page_name is None:
|
|
233
|
+
num = 0
|
|
234
|
+
else:
|
|
235
|
+
num = metadata.get('pages', {}).get(page_name)
|
|
236
|
+
if num is None:
|
|
237
|
+
raise KeyError(f'Page not found: {page_name}')
|
|
238
|
+
|
|
239
|
+
path = (
|
|
240
|
+
Path(self.dir)
|
|
241
|
+
.joinpath(file_dir)
|
|
242
|
+
.joinpath(f'{num}.feather')
|
|
243
|
+
)
|
|
244
|
+
return pd.read_feather(path)
|
|
245
|
+
|
|
246
|
+
def set_file_data(self, name: str, df: pd.DataFrame, page_name: str = None):
|
|
247
|
+
"""
|
|
248
|
+
Save file content
|
|
249
|
+
:param name: name of file
|
|
250
|
+
:param df: content to store
|
|
251
|
+
:param page_name: name of page, optional
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
file_record = (
|
|
255
|
+
db.session.query(db.File)
|
|
256
|
+
.filter_by(company_id=ctx.company_id, name=name)
|
|
257
|
+
.first()
|
|
258
|
+
)
|
|
259
|
+
if file_record is None:
|
|
260
|
+
raise Exception(f"File '{name}' does not exists")
|
|
261
|
+
|
|
262
|
+
file_dir = f"file_{ctx.company_id}_{file_record.id}"
|
|
263
|
+
self.fs_store.get(file_dir, base_dir=self.dir)
|
|
264
|
+
|
|
265
|
+
num = 0
|
|
266
|
+
if page_name is not None and file_record.metadata_ is not None:
|
|
267
|
+
num = file_record.metadata_.get('pages', {}).get(page_name, 0)
|
|
268
|
+
|
|
269
|
+
path = (
|
|
270
|
+
Path(self.dir)
|
|
271
|
+
.joinpath(file_dir)
|
|
272
|
+
.joinpath(f'{num}.feather')
|
|
273
|
+
)
|
|
274
|
+
df.to_feather(path)
|
|
275
|
+
self.fs_store.put(file_dir, base_dir=self.dir)
|
|
@@ -44,7 +44,7 @@ class Scheduler:
|
|
|
44
44
|
self.q_in = queue.Queue()
|
|
45
45
|
self.q_out = queue.Queue()
|
|
46
46
|
self.work_thread = threading.Thread(
|
|
47
|
-
target=execute_async, args=(self.q_in, self.q_out)
|
|
47
|
+
target=execute_async, args=(self.q_in, self.q_out), name='Scheduler.execute_async'
|
|
48
48
|
)
|
|
49
49
|
self.work_thread.start()
|
|
50
50
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
from typing import Dict, List, Optional
|
|
3
3
|
|
|
4
|
-
from sqlalchemy import null
|
|
4
|
+
from sqlalchemy import null, func
|
|
5
5
|
from sqlalchemy.orm.attributes import flag_modified
|
|
6
6
|
|
|
7
7
|
from mindsdb.interfaces.storage import db
|
|
@@ -33,7 +33,7 @@ class SkillsController:
|
|
|
33
33
|
|
|
34
34
|
project = self.project_controller.get(name=project_name)
|
|
35
35
|
return db.Skills.query.filter(
|
|
36
|
-
db.Skills.name == skill_name,
|
|
36
|
+
func.lower(db.Skills.name) == func.lower(skill_name),
|
|
37
37
|
db.Skills.project_id == project.id,
|
|
38
38
|
db.Skills.deleted_at == null()
|
|
39
39
|
).first()
|
|
@@ -287,6 +287,7 @@ class SQLAgent:
|
|
|
287
287
|
return info
|
|
288
288
|
|
|
289
289
|
def _get_sample_rows(self, table: str, fields: List[str]) -> str:
|
|
290
|
+
logger.info(f'_get_sample_rows: table={table} fields={fields}')
|
|
290
291
|
command = f"select {', '.join(fields)} from {table} limit {self._sample_rows_in_table_info};"
|
|
291
292
|
try:
|
|
292
293
|
ret = self._call_engine(command)
|
|
@@ -300,7 +301,7 @@ class SQLAgent:
|
|
|
300
301
|
map(lambda row: [truncate_value(value) for value in row], sample_rows))
|
|
301
302
|
sample_rows_str = "\n" + list_to_csv_str([fields] + sample_rows)
|
|
302
303
|
except Exception as e:
|
|
303
|
-
logger.
|
|
304
|
+
logger.info(f'_get_sample_rows error: {e}')
|
|
304
305
|
sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
|
|
305
306
|
|
|
306
307
|
return sample_rows_str
|
|
@@ -347,14 +348,18 @@ class SQLAgent:
|
|
|
347
348
|
|
|
348
349
|
def get_table_info_safe(self, table_names: Optional[List[str]] = None) -> str:
|
|
349
350
|
try:
|
|
351
|
+
logger.info(f'get_table_info_safe: {table_names}')
|
|
350
352
|
return self.get_table_info(table_names)
|
|
351
353
|
except Exception as e:
|
|
354
|
+
logger.info(f'get_table_info_safe error: {e}')
|
|
352
355
|
return f"Error: {e}"
|
|
353
356
|
|
|
354
357
|
def query_safe(self, command: str, fetch: str = "all") -> str:
|
|
355
358
|
try:
|
|
359
|
+
logger.info(f'query_safe (fetch={fetch}): {command}')
|
|
356
360
|
return self.query(command, fetch)
|
|
357
361
|
except Exception as e:
|
|
362
|
+
logger.info(f'query_safe error: {e}')
|
|
358
363
|
msg = f"Error: {e}"
|
|
359
364
|
if 'does not exist' in msg and ' relation ' in msg:
|
|
360
365
|
msg += '\nAvailable tables: ' + ', '.join(self.get_usable_table_names())
|
mindsdb/interfaces/storage/db.py
CHANGED
|
@@ -218,18 +218,6 @@ class Project(Base):
|
|
|
218
218
|
)
|
|
219
219
|
|
|
220
220
|
|
|
221
|
-
class Log(Base):
|
|
222
|
-
__tablename__ = "log"
|
|
223
|
-
|
|
224
|
-
id = Column(Integer, primary_key=True)
|
|
225
|
-
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
226
|
-
log_type = Column(String) # log, info, warning, traceback etc
|
|
227
|
-
source = Column(String) # file + line
|
|
228
|
-
company_id = Column(Integer)
|
|
229
|
-
payload = Column(String)
|
|
230
|
-
created_at_index = Index("some_index", "created_at_index")
|
|
231
|
-
|
|
232
|
-
|
|
233
221
|
class Integration(Base):
|
|
234
222
|
__tablename__ = "integration"
|
|
235
223
|
id = Column(Integer, primary_key=True)
|
|
@@ -258,6 +246,7 @@ class File(Base):
|
|
|
258
246
|
row_count = Column(Integer, nullable=False)
|
|
259
247
|
columns = Column(Json, nullable=False)
|
|
260
248
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
249
|
+
metadata_: dict = Column("metadata", JSON, nullable=True)
|
|
261
250
|
updated_at = Column(
|
|
262
251
|
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
263
252
|
)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""file_metadata
|
|
2
|
+
|
|
3
|
+
Revision ID: 4943359e354a
|
|
4
|
+
Revises: c06c35f7e8e1
|
|
5
|
+
Create Date: 2025-02-09 10:10:55.577407
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
from alembic import op
|
|
9
|
+
import sqlalchemy as sa
|
|
10
|
+
import mindsdb.interfaces.storage.db # noqa
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# revision identifiers, used by Alembic.
|
|
14
|
+
revision = '4943359e354a'
|
|
15
|
+
down_revision = 'c06c35f7e8e1'
|
|
16
|
+
branch_labels = None
|
|
17
|
+
depends_on = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def upgrade():
|
|
21
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
22
|
+
with op.batch_alter_table('file', schema=None) as batch_op:
|
|
23
|
+
batch_op.add_column(sa.Column('metadata', sa.JSON(), nullable=True))
|
|
24
|
+
# ### end Alembic commands ###
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def downgrade():
|
|
28
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
29
|
+
with op.batch_alter_table('file', schema=None) as batch_op:
|
|
30
|
+
batch_op.drop_column('metadata')
|
|
31
|
+
# ### end Alembic commands ###
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""del_log_table
|
|
2
|
+
|
|
3
|
+
Revision ID: 6ab9903fc59a
|
|
4
|
+
Revises: 4943359e354a
|
|
5
|
+
Create Date: 2025-02-10 16:50:27.186697
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
from alembic import op
|
|
9
|
+
import sqlalchemy as sa
|
|
10
|
+
import mindsdb.interfaces.storage.db # noqa
|
|
11
|
+
|
|
12
|
+
# revision identifiers, used by Alembic.
|
|
13
|
+
revision = '6ab9903fc59a'
|
|
14
|
+
down_revision = '4943359e354a'
|
|
15
|
+
branch_labels = None
|
|
16
|
+
depends_on = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def upgrade():
|
|
20
|
+
op.drop_table('log')
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def downgrade():
|
|
24
|
+
op.create_table(
|
|
25
|
+
'log',
|
|
26
|
+
sa.Column('id', sa.INTEGER(), nullable=False),
|
|
27
|
+
sa.Column('created_at', sa.DATETIME(), nullable=True),
|
|
28
|
+
sa.Column('log_type', sa.VARCHAR(), nullable=True),
|
|
29
|
+
sa.Column('source', sa.VARCHAR(), nullable=True),
|
|
30
|
+
sa.Column('company_id', sa.INTEGER(), nullable=True),
|
|
31
|
+
sa.Column('payload', sa.VARCHAR(), nullable=True),
|
|
32
|
+
sa.PrimaryKeyConstraint('id')
|
|
33
|
+
)
|
mindsdb/utilities/config.py
CHANGED
mindsdb/utilities/log.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import logging
|
|
2
3
|
from logging.config import dictConfig
|
|
3
4
|
|
|
@@ -7,6 +8,19 @@ from mindsdb.utilities.config import config as app_config
|
|
|
7
8
|
logging_initialized = False
|
|
8
9
|
|
|
9
10
|
|
|
11
|
+
class JsonFormatter(logging.Formatter):
|
|
12
|
+
def format(self, record):
|
|
13
|
+
record_message = super().format(record)
|
|
14
|
+
log_record = {
|
|
15
|
+
'process_name': record.processName,
|
|
16
|
+
'name': record.name,
|
|
17
|
+
'message': record_message,
|
|
18
|
+
'level': record.levelname,
|
|
19
|
+
'time': record.created
|
|
20
|
+
}
|
|
21
|
+
return json.dumps(log_record)
|
|
22
|
+
|
|
23
|
+
|
|
10
24
|
class ColorFormatter(logging.Formatter):
|
|
11
25
|
green = "\x1b[32;20m"
|
|
12
26
|
default = "\x1b[39;20m"
|
|
@@ -53,7 +67,7 @@ def configure_logging():
|
|
|
53
67
|
if console_handler_config['enabled'] is True:
|
|
54
68
|
handlers_config['console'] = {
|
|
55
69
|
"class": "logging.StreamHandler",
|
|
56
|
-
"formatter":
|
|
70
|
+
"formatter": console_handler_config.get('formatter', 'default'),
|
|
57
71
|
"level": console_handler_config_level
|
|
58
72
|
}
|
|
59
73
|
|
|
@@ -74,7 +88,8 @@ def configure_logging():
|
|
|
74
88
|
logging_config = dict(
|
|
75
89
|
version=1,
|
|
76
90
|
formatters={
|
|
77
|
-
"
|
|
91
|
+
"default": {"()": ColorFormatter},
|
|
92
|
+
"json": {"()": JsonFormatter},
|
|
78
93
|
"file": {
|
|
79
94
|
"format": "%(asctime)s %(processName)15s %(levelname)-8s %(name)s: %(message)s"
|
|
80
95
|
}
|
|
@@ -74,7 +74,9 @@ class MLTaskConsumer(BaseRedisQueue):
|
|
|
74
74
|
|
|
75
75
|
# region collect cpu usage statistic
|
|
76
76
|
self.cpu_stat = [0] * 10
|
|
77
|
-
self._collect_cpu_stat_thread = threading.Thread(
|
|
77
|
+
self._collect_cpu_stat_thread = threading.Thread(
|
|
78
|
+
target=self._collect_cpu_stat, name='MLTaskConsumer._collect_cpu_stat'
|
|
79
|
+
)
|
|
78
80
|
self._collect_cpu_stat_thread.start()
|
|
79
81
|
# endregion
|
|
80
82
|
|
|
@@ -221,7 +223,7 @@ class MLTaskConsumer(BaseRedisQueue):
|
|
|
221
223
|
if self._ready_event.is_set() is False:
|
|
222
224
|
continue
|
|
223
225
|
self._ready_event.clear()
|
|
224
|
-
threading.Thread(target=self._listen).start()
|
|
226
|
+
threading.Thread(target=self._listen, name='MLTaskConsumer._listen').start()
|
|
225
227
|
self.stop()
|
|
226
228
|
|
|
227
229
|
def stop(self) -> None:
|
|
@@ -63,6 +63,10 @@ class AttributedStr(str):
|
|
|
63
63
|
obj.is_quoted = is_quoted
|
|
64
64
|
return obj
|
|
65
65
|
|
|
66
|
+
def replace(self, *args):
|
|
67
|
+
obj = super().replace(*args)
|
|
68
|
+
return AttributedStr(obj, self.is_quoted)
|
|
69
|
+
|
|
66
70
|
|
|
67
71
|
def get_is_quoted(identifier: ast.Identifier):
|
|
68
72
|
quoted = getattr(identifier, 'is_quoted', [])
|
|
@@ -93,9 +97,6 @@ class SqlalchemyRender:
|
|
|
93
97
|
if hasattr(dialect, 'preparer'):
|
|
94
98
|
class Preparer(dialect.preparer):
|
|
95
99
|
|
|
96
|
-
def __init__(self, *args, **kwargs):
|
|
97
|
-
super().__init__(*args, **kwargs)
|
|
98
|
-
|
|
99
100
|
def _requires_quotes(self, value: str) -> bool:
|
|
100
101
|
# check force-quote flag
|
|
101
102
|
if isinstance(value, AttributedStr):
|
|
@@ -242,6 +243,8 @@ class SqlalchemyRender:
|
|
|
242
243
|
|
|
243
244
|
op = t.op.lower()
|
|
244
245
|
if op in ('in', 'not in'):
|
|
246
|
+
if t.args[1].parentheses:
|
|
247
|
+
arg1 = [arg1]
|
|
245
248
|
if isinstance(arg1, sa.sql.selectable.ColumnClause):
|
|
246
249
|
raise NotImplementedError(f'Required list argument for: {op}')
|
|
247
250
|
|
|
@@ -536,12 +539,19 @@ class SqlalchemyRender:
|
|
|
536
539
|
query = query.select_from(table)
|
|
537
540
|
|
|
538
541
|
# other tables
|
|
542
|
+
has_explicit_join = False
|
|
539
543
|
for item in join_list[1:]:
|
|
540
544
|
table = self.to_table(item['table'])
|
|
541
545
|
if item['is_implicit']:
|
|
542
546
|
# add to from clause
|
|
543
|
-
|
|
547
|
+
if has_explicit_join:
|
|
548
|
+
# sqlalchemy doesn't support implicit join after explicit
|
|
549
|
+
# convert it to explicit
|
|
550
|
+
query = query.join(table, sa.text('1=1'))
|
|
551
|
+
else:
|
|
552
|
+
query = query.select_from(table)
|
|
544
553
|
else:
|
|
554
|
+
has_explicit_join = True
|
|
545
555
|
if item['condition'] is None:
|
|
546
556
|
# otherwise, sqlalchemy raises "Don't know how to join to ..."
|
|
547
557
|
condition = sa.text('1=1')
|
|
@@ -564,7 +574,7 @@ class SqlalchemyRender:
|
|
|
564
574
|
condition,
|
|
565
575
|
full=is_full
|
|
566
576
|
)
|
|
567
|
-
elif isinstance(from_table, ast.Union):
|
|
577
|
+
elif isinstance(from_table, (ast.Union, ast.Intersect, ast.Except)):
|
|
568
578
|
alias = None
|
|
569
579
|
if from_table.alias:
|
|
570
580
|
alias = self.get_alias(from_table.alias)
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
from mindsdb.interfaces.storage import db
|
|
2
|
-
from mindsdb.utilities.context import context as ctx
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def fmt_log_record(log_record):
|
|
6
|
-
return {
|
|
7
|
-
'log_from': 'mindsdb',
|
|
8
|
-
'level': log_record.log_type,
|
|
9
|
-
'context': 'unknown',
|
|
10
|
-
'text': log_record.payload,
|
|
11
|
-
'created_at': str(log_record.created_at).split('.')[0]
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def get_logs(min_timestamp, max_timestamp, context, level, log_from, limit):
|
|
16
|
-
logs = db.session.query(db.Log).filter(
|
|
17
|
-
db.Log.company_id == ctx.company_id,
|
|
18
|
-
db.Log.created_at > min_timestamp
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
if max_timestamp is not None:
|
|
22
|
-
logs = logs.filter(db.Log.created_at < max_timestamp)
|
|
23
|
-
|
|
24
|
-
if context is not None:
|
|
25
|
-
# e.g. datasource/predictor and assoicated id
|
|
26
|
-
pass
|
|
27
|
-
|
|
28
|
-
if level is not None:
|
|
29
|
-
logs = logs.filter(db.Log.log_type == level)
|
|
30
|
-
|
|
31
|
-
if log_from is not None:
|
|
32
|
-
# mindsdb/native/lightwood/all
|
|
33
|
-
pass
|
|
34
|
-
|
|
35
|
-
if limit is not None:
|
|
36
|
-
logs = logs.limit(limit)
|
|
37
|
-
|
|
38
|
-
logs = [fmt_log_record(x) for x in logs]
|
|
39
|
-
return logs
|
mindsdb/utilities/telemetry.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
TELEMETRY_FILE = 'telemetry.lock'
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def enable_telemetry(storage_dir):
|
|
8
|
-
os.environ['CHECK_FOR_UPDATES'] = '1'
|
|
9
|
-
path = os.path.join(storage_dir, TELEMETRY_FILE)
|
|
10
|
-
if os.path.exists(path):
|
|
11
|
-
os.remove(path)
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def disable_telemetry(storage_dir):
|
|
15
|
-
os.environ['CHECK_FOR_UPDATES'] = '0'
|
|
16
|
-
path = os.path.join(storage_dir, TELEMETRY_FILE)
|
|
17
|
-
with open(path, 'w') as _:
|
|
18
|
-
pass
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def telemetry_file_exists(storage_dir):
|
|
22
|
-
path = os.path.join(storage_dir, TELEMETRY_FILE)
|
|
23
|
-
return os.path.exists(path)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def inject_telemetry_to_static(static_folder):
|
|
27
|
-
TEXT = '<script>localStorage.isTestUser = true;</script>'
|
|
28
|
-
index = Path(static_folder).joinpath('index.html')
|
|
29
|
-
disable_telemetry = os.getenv('CHECK_FOR_UPDATES', '1').lower() in ['0', 'false', 'False']
|
|
30
|
-
if index.is_file():
|
|
31
|
-
with open(str(index), 'rt') as f:
|
|
32
|
-
content = f.read()
|
|
33
|
-
script_index = content.find('<script>')
|
|
34
|
-
need_update = True
|
|
35
|
-
if TEXT not in content and disable_telemetry:
|
|
36
|
-
content = content[:script_index] + TEXT + content[script_index:]
|
|
37
|
-
elif not disable_telemetry and TEXT in content:
|
|
38
|
-
content = content.replace(TEXT, '')
|
|
39
|
-
else:
|
|
40
|
-
need_update = False
|
|
41
|
-
|
|
42
|
-
if need_update:
|
|
43
|
-
with open(str(index), 'wt') as f:
|
|
44
|
-
f.write(content)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|