MindsDB 25.3.3.0__py3-none-any.whl → 25.3.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +2 -6
- mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +1 -1
- mindsdb/api/http/namespaces/agents.py +9 -5
- mindsdb/api/http/namespaces/chatbots.py +6 -5
- mindsdb/api/http/namespaces/databases.py +5 -6
- mindsdb/api/http/namespaces/skills.py +5 -4
- mindsdb/api/http/namespaces/views.py +6 -7
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +23 -2
- mindsdb/integrations/handlers/dummy_data_handler/dummy_data_handler.py +16 -6
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +64 -83
- mindsdb/integrations/handlers/huggingface_handler/requirements.txt +5 -4
- mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +5 -5
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_graph_api_one_drive_client.py +1 -1
- mindsdb/integrations/handlers/ms_teams_handler/ms_graph_api_teams_client.py +278 -0
- mindsdb/integrations/handlers/ms_teams_handler/ms_teams_handler.py +114 -70
- mindsdb/integrations/handlers/ms_teams_handler/ms_teams_tables.py +431 -0
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +18 -4
- mindsdb/integrations/libs/vectordatabase_handler.py +2 -2
- mindsdb/integrations/utilities/files/file_reader.py +3 -3
- mindsdb/integrations/utilities/handlers/api_utilities/microsoft/ms_graph_api_utilities.py +36 -2
- mindsdb/integrations/utilities/rag/settings.py +1 -0
- mindsdb/interfaces/chatbot/chatbot_controller.py +6 -4
- mindsdb/interfaces/jobs/jobs_controller.py +1 -4
- mindsdb/interfaces/knowledge_base/controller.py +9 -28
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +1 -1
- mindsdb/interfaces/skills/skills_controller.py +8 -7
- {mindsdb-25.3.3.0.dist-info → mindsdb-25.3.4.0.dist-info}/METADATA +221 -218
- {mindsdb-25.3.3.0.dist-info → mindsdb-25.3.4.0.dist-info}/RECORD +32 -30
- {mindsdb-25.3.3.0.dist-info → mindsdb-25.3.4.0.dist-info}/WHEEL +1 -1
- {mindsdb-25.3.3.0.dist-info → mindsdb-25.3.4.0.dist-info/licenses}/LICENSE +0 -0
- {mindsdb-25.3.3.0.dist-info → mindsdb-25.3.4.0.dist-info}/top_level.txt +0 -0
mindsdb/__about__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
__title__ = 'MindsDB'
|
|
2
2
|
__package_name__ = 'mindsdb'
|
|
3
|
-
__version__ = '25.3.
|
|
3
|
+
__version__ = '25.3.4.0'
|
|
4
4
|
__description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
|
|
5
5
|
__email__ = "jorge@mindsdb.com"
|
|
6
6
|
__author__ = 'MindsDB Inc'
|
|
@@ -3,12 +3,8 @@ import pandas as pd
|
|
|
3
3
|
from mindsdb_sql_parser.ast.base import ASTNode
|
|
4
4
|
|
|
5
5
|
from mindsdb.api.executor.datahub.datanodes.datanode import DataNode
|
|
6
|
-
from mindsdb.api.executor.datahub.datanodes.integration_datanode import
|
|
7
|
-
|
|
8
|
-
)
|
|
9
|
-
from mindsdb.api.executor.datahub.datanodes.project_datanode import (
|
|
10
|
-
ProjectDataNode,
|
|
11
|
-
)
|
|
6
|
+
from mindsdb.api.executor.datahub.datanodes.integration_datanode import IntegrationDataNode
|
|
7
|
+
from mindsdb.api.executor.datahub.datanodes.project_datanode import ProjectDataNode
|
|
12
8
|
from mindsdb.api.executor import exceptions as exc
|
|
13
9
|
from mindsdb.api.executor.utilities.sql import query_df
|
|
14
10
|
from mindsdb.api.executor.utilities.sql import get_query_tables
|
|
@@ -310,7 +310,7 @@ class ChatbotsTable(MdbTable):
|
|
|
310
310
|
):
|
|
311
311
|
project_name = query.where.args[1].value
|
|
312
312
|
|
|
313
|
-
chatbot_data = chatbot_controller.get_chatbots(project_name)
|
|
313
|
+
chatbot_data = chatbot_controller.get_chatbots(project_name=project_name)
|
|
314
314
|
|
|
315
315
|
columns = cls.columns
|
|
316
316
|
columns_lower = [col.lower() for col in columns]
|
|
@@ -47,7 +47,7 @@ def create_agent(project_name, name, agent):
|
|
|
47
47
|
|
|
48
48
|
try:
|
|
49
49
|
existing_agent = agents_controller.get_agent(name, project_name=project_name)
|
|
50
|
-
except ValueError:
|
|
50
|
+
except (ValueError, EntityNotExistsError):
|
|
51
51
|
# Project must exist.
|
|
52
52
|
return http_error(
|
|
53
53
|
HTTPStatus.NOT_FOUND,
|
|
@@ -141,7 +141,7 @@ class AgentResource(Resource):
|
|
|
141
141
|
f'Agent with name {agent_name} does not exist'
|
|
142
142
|
)
|
|
143
143
|
return existing_agent.as_dict()
|
|
144
|
-
except ValueError:
|
|
144
|
+
except (ValueError, EntityNotExistsError):
|
|
145
145
|
# Project needs to exist.
|
|
146
146
|
return http_error(
|
|
147
147
|
HTTPStatus.NOT_FOUND,
|
|
@@ -173,7 +173,11 @@ class AgentResource(Resource):
|
|
|
173
173
|
f'Project with name {project_name} does not exist'
|
|
174
174
|
)
|
|
175
175
|
if existing_agent_record is None:
|
|
176
|
-
|
|
176
|
+
return http_error(
|
|
177
|
+
HTTPStatus.BAD_REQUEST,
|
|
178
|
+
'Creation is not allowed',
|
|
179
|
+
'Creation of an agent using the PUT method is not allowed.'
|
|
180
|
+
)
|
|
177
181
|
|
|
178
182
|
agent = request.json['agent']
|
|
179
183
|
name = agent.get('name', None)
|
|
@@ -272,7 +276,7 @@ class AgentResource(Resource):
|
|
|
272
276
|
'Agent not found',
|
|
273
277
|
f'Agent with name {agent_name} does not exist'
|
|
274
278
|
)
|
|
275
|
-
except ValueError:
|
|
279
|
+
except (ValueError, EntityNotExistsError):
|
|
276
280
|
# Project needs to exist.
|
|
277
281
|
return http_error(
|
|
278
282
|
HTTPStatus.NOT_FOUND,
|
|
@@ -435,7 +439,7 @@ class AgentCompletions(Resource):
|
|
|
435
439
|
'Agent not found',
|
|
436
440
|
f'Agent with name {agent_name} does not exist'
|
|
437
441
|
)
|
|
438
|
-
except ValueError:
|
|
442
|
+
except (ValueError, EntityNotExistsError):
|
|
439
443
|
# Project needs to exist.
|
|
440
444
|
return http_error(
|
|
441
445
|
HTTPStatus.NOT_FOUND,
|
|
@@ -11,6 +11,7 @@ from mindsdb.metrics.metrics import api_endpoint_metrics
|
|
|
11
11
|
from mindsdb.interfaces.chatbot.chatbot_controller import ChatBotController
|
|
12
12
|
from mindsdb.interfaces.model.functions import PredictorRecordNotFound
|
|
13
13
|
from mindsdb.interfaces.storage.db import Predictor
|
|
14
|
+
from mindsdb.utilities.exception import EntityNotExistsError
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
def create_chatbot(project_name, name, chatbot):
|
|
@@ -59,7 +60,7 @@ def create_chatbot(project_name, name, chatbot):
|
|
|
59
60
|
|
|
60
61
|
try:
|
|
61
62
|
existing_chatbot = chatbot_controller.get_chatbot(name, project_name=project_name)
|
|
62
|
-
except
|
|
63
|
+
except EntityNotExistsError:
|
|
63
64
|
# Project must exist.
|
|
64
65
|
return http_error(
|
|
65
66
|
HTTPStatus.NOT_FOUND,
|
|
@@ -152,7 +153,7 @@ class ChatBotsResource(Resource):
|
|
|
152
153
|
chatbot_controller = ChatBotController()
|
|
153
154
|
try:
|
|
154
155
|
all_bots = chatbot_controller.get_chatbots(project_name)
|
|
155
|
-
except ValueError:
|
|
156
|
+
except (ValueError, EntityNotExistsError):
|
|
156
157
|
# Project needs to exist.
|
|
157
158
|
return http_error(
|
|
158
159
|
HTTPStatus.NOT_FOUND,
|
|
@@ -197,7 +198,7 @@ class ChatBotResource(Resource):
|
|
|
197
198
|
f'Chatbot with name {chatbot_name} does not exist'
|
|
198
199
|
)
|
|
199
200
|
return existing_chatbot
|
|
200
|
-
except ValueError:
|
|
201
|
+
except (ValueError, EntityNotExistsError):
|
|
201
202
|
# Project needs to exist.
|
|
202
203
|
return http_error(
|
|
203
204
|
HTTPStatus.NOT_FOUND,
|
|
@@ -221,7 +222,7 @@ class ChatBotResource(Resource):
|
|
|
221
222
|
|
|
222
223
|
try:
|
|
223
224
|
existing_chatbot = chatbot_controller.get_chatbot(chatbot_name, project_name=project_name)
|
|
224
|
-
except
|
|
225
|
+
except EntityNotExistsError:
|
|
225
226
|
# Project needs to exist.
|
|
226
227
|
return http_error(
|
|
227
228
|
HTTPStatus.NOT_FOUND,
|
|
@@ -306,7 +307,7 @@ class ChatBotResource(Resource):
|
|
|
306
307
|
'Chatbot not found',
|
|
307
308
|
f'Chatbot with name {chatbot_name} does not exist'
|
|
308
309
|
)
|
|
309
|
-
except
|
|
310
|
+
except EntityNotExistsError:
|
|
310
311
|
# Project needs to exist.
|
|
311
312
|
return http_error(
|
|
312
313
|
HTTPStatus.NOT_FOUND,
|
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
from http import HTTPStatus
|
|
2
|
-
import tempfile
|
|
3
1
|
import time
|
|
2
|
+
import shutil
|
|
3
|
+
import tempfile
|
|
4
|
+
from http import HTTPStatus
|
|
4
5
|
from typing import Dict
|
|
5
6
|
from pathlib import Path
|
|
6
|
-
import shutil
|
|
7
|
-
from sqlalchemy.exc import NoResultFound
|
|
8
7
|
|
|
9
8
|
from flask import request
|
|
10
9
|
from flask_restx import Resource
|
|
@@ -337,7 +336,7 @@ class TablesList(Resource):
|
|
|
337
336
|
HTTPStatus.BAD_REQUEST, 'Error',
|
|
338
337
|
error_message
|
|
339
338
|
)
|
|
340
|
-
except
|
|
339
|
+
except EntityNotExistsError:
|
|
341
340
|
# Only support creating tables from integrations.
|
|
342
341
|
pass
|
|
343
342
|
|
|
@@ -419,7 +418,7 @@ class TableResource(Resource):
|
|
|
419
418
|
+ f'If you want to delete a model or view, use the projects/{database_name}/models/{table_name} or ' \
|
|
420
419
|
+ f'projects/{database_name}/views/{table_name} endpoints instead.'
|
|
421
420
|
return http_error(HTTPStatus.BAD_REQUEST, 'Error', error_message)
|
|
422
|
-
except
|
|
421
|
+
except EntityNotExistsError:
|
|
423
422
|
# Only support dropping tables from integrations.
|
|
424
423
|
pass
|
|
425
424
|
|
|
@@ -7,6 +7,7 @@ from mindsdb.metrics.metrics import api_endpoint_metrics
|
|
|
7
7
|
from mindsdb.api.http.namespaces.configs.projects import ns_conf
|
|
8
8
|
from mindsdb.api.http.utils import http_error
|
|
9
9
|
from mindsdb.interfaces.skills.skills_controller import SkillsController
|
|
10
|
+
from mindsdb.utilities.exception import EntityNotExistsError
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
def create_skill(project_name, skill):
|
|
@@ -52,7 +53,7 @@ class SkillsResource(Resource):
|
|
|
52
53
|
skills_controller = SkillsController()
|
|
53
54
|
try:
|
|
54
55
|
all_skills = skills_controller.get_skills(project_name)
|
|
55
|
-
except
|
|
56
|
+
except EntityNotExistsError:
|
|
56
57
|
# Project needs to exist.
|
|
57
58
|
return http_error(
|
|
58
59
|
HTTPStatus.NOT_FOUND,
|
|
@@ -88,7 +89,7 @@ class SkillResource(Resource):
|
|
|
88
89
|
skills_controller = SkillsController()
|
|
89
90
|
try:
|
|
90
91
|
existing_skill = skills_controller.get_skill(skill_name, project_name)
|
|
91
|
-
except
|
|
92
|
+
except EntityNotExistsError:
|
|
92
93
|
# Project needs to exist
|
|
93
94
|
return http_error(
|
|
94
95
|
HTTPStatus.NOT_FOUND,
|
|
@@ -120,7 +121,7 @@ class SkillResource(Resource):
|
|
|
120
121
|
|
|
121
122
|
try:
|
|
122
123
|
existing_skill = skills_controller.get_skill(skill_name, project_name)
|
|
123
|
-
except
|
|
124
|
+
except EntityNotExistsError:
|
|
124
125
|
# Project needs to exist
|
|
125
126
|
return http_error(
|
|
126
127
|
HTTPStatus.NOT_FOUND,
|
|
@@ -152,7 +153,7 @@ class SkillResource(Resource):
|
|
|
152
153
|
skills_controller = SkillsController()
|
|
153
154
|
try:
|
|
154
155
|
existing_skill = skills_controller.get_skill(skill_name, project_name)
|
|
155
|
-
except
|
|
156
|
+
except EntityNotExistsError:
|
|
156
157
|
# Project needs to exist
|
|
157
158
|
return http_error(
|
|
158
159
|
HTTPStatus.NOT_FOUND,
|
|
@@ -2,13 +2,12 @@ from http import HTTPStatus
|
|
|
2
2
|
|
|
3
3
|
from flask import request
|
|
4
4
|
from flask_restx import Resource
|
|
5
|
-
from sqlalchemy.exc import NoResultFound
|
|
6
|
-
|
|
7
5
|
|
|
8
6
|
from mindsdb.api.http.utils import http_error
|
|
9
7
|
from mindsdb.api.http.namespaces.configs.projects import ns_conf
|
|
10
8
|
from mindsdb.api.executor.controllers.session_controller import SessionController
|
|
11
9
|
from mindsdb.metrics.metrics import api_endpoint_metrics
|
|
10
|
+
from mindsdb.utilities.exception import EntityNotExistsError
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
@ns_conf.route('/<project_name>/views')
|
|
@@ -20,7 +19,7 @@ class ViewsList(Resource):
|
|
|
20
19
|
session = SessionController()
|
|
21
20
|
try:
|
|
22
21
|
project = session.database_controller.get_project(project_name)
|
|
23
|
-
except
|
|
22
|
+
except EntityNotExistsError:
|
|
24
23
|
return http_error(
|
|
25
24
|
HTTPStatus.NOT_FOUND,
|
|
26
25
|
'Project not found',
|
|
@@ -55,7 +54,7 @@ class ViewsList(Resource):
|
|
|
55
54
|
|
|
56
55
|
try:
|
|
57
56
|
project = session.database_controller.get_project(project_name)
|
|
58
|
-
except
|
|
57
|
+
except EntityNotExistsError:
|
|
59
58
|
return http_error(HTTPStatus.NOT_FOUND, 'Not found', f'Project name {project_name} does not exist')
|
|
60
59
|
|
|
61
60
|
if project.get_view(name) is not None:
|
|
@@ -82,7 +81,7 @@ class ViewResource(Resource):
|
|
|
82
81
|
session = SessionController()
|
|
83
82
|
try:
|
|
84
83
|
project = session.database_controller.get_project(project_name)
|
|
85
|
-
except
|
|
84
|
+
except EntityNotExistsError:
|
|
86
85
|
return http_error(HTTPStatus.NOT_FOUND, 'Project not found', f'Project name {project_name} does not exist')
|
|
87
86
|
|
|
88
87
|
view = project.get_view(view_name)
|
|
@@ -106,7 +105,7 @@ class ViewResource(Resource):
|
|
|
106
105
|
session = SessionController()
|
|
107
106
|
try:
|
|
108
107
|
project = session.database_controller.get_project(project_name)
|
|
109
|
-
except
|
|
108
|
+
except EntityNotExistsError:
|
|
110
109
|
return http_error(HTTPStatus.NOT_FOUND, 'Project not found', f'Project name {project_name} does not exist')
|
|
111
110
|
|
|
112
111
|
existing_view = project.get_view(view_name)
|
|
@@ -143,7 +142,7 @@ class ViewResource(Resource):
|
|
|
143
142
|
session = SessionController()
|
|
144
143
|
try:
|
|
145
144
|
project = session.database_controller.get_project(project_name)
|
|
146
|
-
except
|
|
145
|
+
except EntityNotExistsError:
|
|
147
146
|
return http_error(HTTPStatus.NOT_FOUND, 'Project not found', f'Project name {project_name} does not exist')
|
|
148
147
|
|
|
149
148
|
if project.get_view(view_name) is None:
|
|
@@ -210,6 +210,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
210
210
|
chroma_db_conditions = []
|
|
211
211
|
for condition in metadata_conditions:
|
|
212
212
|
metadata_key = condition.column.split(".")[-1]
|
|
213
|
+
|
|
213
214
|
chroma_db_conditions.append(
|
|
214
215
|
{
|
|
215
216
|
metadata_key: {
|
|
@@ -310,9 +311,29 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
310
311
|
payload = {column: payload[column] for column in columns}
|
|
311
312
|
|
|
312
313
|
# always include distance
|
|
314
|
+
distance_filter = None
|
|
315
|
+
distance_col = TableField.DISTANCE.value
|
|
313
316
|
if distances is not None:
|
|
314
|
-
payload[
|
|
315
|
-
|
|
317
|
+
payload[distance_col] = distances
|
|
318
|
+
|
|
319
|
+
for cond in conditions:
|
|
320
|
+
if cond.column == distance_col:
|
|
321
|
+
distance_filter = cond
|
|
322
|
+
break
|
|
323
|
+
|
|
324
|
+
df = pd.DataFrame(payload)
|
|
325
|
+
if distance_filter is not None:
|
|
326
|
+
op_map = {
|
|
327
|
+
'<': '__lt__',
|
|
328
|
+
'<=': '__le__',
|
|
329
|
+
'>': '__gt__',
|
|
330
|
+
'>=': '__ge__',
|
|
331
|
+
'=': '__eq__',
|
|
332
|
+
}
|
|
333
|
+
op = op_map.get(distance_filter.op.value)
|
|
334
|
+
if op:
|
|
335
|
+
df = df[getattr(df[distance_col], op)(distance_filter.value)]
|
|
336
|
+
return df
|
|
316
337
|
|
|
317
338
|
def _dataframe_metadata_to_chroma_metadata(self, metadata: Union[Dict[str, str], str]) -> Optional[Dict[str, str]]:
|
|
318
339
|
"""Convert DataFrame metadata to ChromaDB compatible metadata format"""
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import time
|
|
2
|
+
from typing import Optional, List
|
|
2
3
|
|
|
3
4
|
import duckdb
|
|
4
5
|
from typing import Any
|
|
@@ -36,18 +37,27 @@ class DummyHandler(DatabaseHandler):
|
|
|
36
37
|
"""
|
|
37
38
|
return HandlerStatusResponse(success=True)
|
|
38
39
|
|
|
39
|
-
def native_query(self, query: Any) -> HandlerResponse:
|
|
40
|
+
def native_query(self, query: Any, params: Optional[List] = None) -> HandlerResponse:
|
|
40
41
|
"""Receive raw query and act upon it somehow
|
|
41
42
|
|
|
42
43
|
Args:
|
|
43
|
-
query (Any): query in native format (str for sql databases,
|
|
44
|
-
|
|
44
|
+
query (Any): query in native format (str for sql databases, dict for mongo, etc)
|
|
45
|
+
params (Optional[List])
|
|
45
46
|
|
|
46
47
|
Returns:
|
|
47
48
|
HandlerResponse
|
|
48
49
|
"""
|
|
49
50
|
con = duckdb.connect(self.db_path)
|
|
50
|
-
|
|
51
|
+
if params is not None:
|
|
52
|
+
query = query.replace('%s', '?')
|
|
53
|
+
cur = con.executemany(query, params)
|
|
54
|
+
if cur.rowcount >= 0:
|
|
55
|
+
result_df = cur.fetchdf()
|
|
56
|
+
else:
|
|
57
|
+
con.close()
|
|
58
|
+
return HandlerResponse(RESPONSE_TYPE.OK)
|
|
59
|
+
else:
|
|
60
|
+
result_df = con.execute(query).fetchdf()
|
|
51
61
|
con.close()
|
|
52
62
|
return HandlerResponse(RESPONSE_TYPE.TABLE, result_df)
|
|
53
63
|
|
|
@@ -62,8 +72,8 @@ class DummyHandler(DatabaseHandler):
|
|
|
62
72
|
HandlerResponse
|
|
63
73
|
"""
|
|
64
74
|
renderer = SqlalchemyRender('postgres')
|
|
65
|
-
query_str = renderer.
|
|
66
|
-
return self.native_query(query_str)
|
|
75
|
+
query_str, params = renderer.get_exec_params(query, with_failback=True)
|
|
76
|
+
return self.native_query(query_str, params)
|
|
67
77
|
|
|
68
78
|
def get_tables(self) -> HandlerResponse:
|
|
69
79
|
"""Get a list of all the tables in the database
|
|
@@ -8,7 +8,6 @@ import pandas
|
|
|
8
8
|
import pytest
|
|
9
9
|
from mindsdb_sql_parser.exceptions import ParsingException
|
|
10
10
|
from mindsdb_sql_parser.ast import CreateTable, DropTables, Identifier, Insert, TableColumn, Update
|
|
11
|
-
from pytest_lazyfixture import lazy_fixture
|
|
12
11
|
|
|
13
12
|
from mindsdb.integrations.handlers.file_handler.file_handler import FileHandler
|
|
14
13
|
from mindsdb.integrations.libs.response import RESPONSE_TYPE
|
|
@@ -75,33 +74,26 @@ def curr_dir():
|
|
|
75
74
|
return os.path.dirname(os.path.realpath(__file__))
|
|
76
75
|
|
|
77
76
|
|
|
78
|
-
# Fixtures to get a path to a partiular type of file
|
|
79
|
-
@pytest.fixture
|
|
80
77
|
def csv_file() -> str:
|
|
81
78
|
return os.path.join(curr_dir(), "data", "test.csv")
|
|
82
79
|
|
|
83
80
|
|
|
84
|
-
@pytest.fixture
|
|
85
81
|
def xlsx_file() -> str:
|
|
86
82
|
return os.path.join(curr_dir(), "data", "test.xlsx")
|
|
87
83
|
|
|
88
84
|
|
|
89
|
-
@pytest.fixture
|
|
90
85
|
def json_file() -> str:
|
|
91
86
|
return os.path.join(curr_dir(), "data", "test.json")
|
|
92
87
|
|
|
93
88
|
|
|
94
|
-
@pytest.fixture
|
|
95
89
|
def parquet_file() -> str:
|
|
96
90
|
return os.path.join(curr_dir(), "data", "test.parquet")
|
|
97
91
|
|
|
98
92
|
|
|
99
|
-
@pytest.fixture
|
|
100
93
|
def pdf_file() -> str:
|
|
101
94
|
return os.path.join(curr_dir(), "data", "test.pdf")
|
|
102
95
|
|
|
103
96
|
|
|
104
|
-
@pytest.fixture
|
|
105
97
|
def txt_file() -> str:
|
|
106
98
|
return os.path.join(curr_dir(), "data", "test.txt")
|
|
107
99
|
|
|
@@ -109,56 +101,47 @@ def txt_file() -> str:
|
|
|
109
101
|
class TestIsItX:
|
|
110
102
|
"""Tests all of the 'is_it_x()' functions to determine a file's type"""
|
|
111
103
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
(
|
|
125
|
-
(
|
|
126
|
-
(
|
|
127
|
-
(
|
|
128
|
-
(
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
def
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
(
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
(
|
|
152
|
-
|
|
153
|
-
(lazy_fixture("json_file"), False),
|
|
154
|
-
(lazy_fixture("parquet_file"), True),
|
|
155
|
-
(lazy_fixture("txt_file"), False),
|
|
156
|
-
(lazy_fixture("pdf_file"), False),
|
|
157
|
-
],
|
|
158
|
-
)
|
|
159
|
-
def test_is_it_parquet(self, file_path, result):
|
|
160
|
-
with open(file_path, "rb") as fh:
|
|
161
|
-
assert FileReader.is_parquet(BytesIO(fh.read())) is result
|
|
104
|
+
def test_is_it_csv(self):
|
|
105
|
+
# We can't test xlsx or parquet here because they're binary files
|
|
106
|
+
for file_path, result in (
|
|
107
|
+
(csv_file(), True),
|
|
108
|
+
(json_file(), False)
|
|
109
|
+
):
|
|
110
|
+
with open(file_path, "r") as fh:
|
|
111
|
+
assert FileReader.is_csv(StringIO(fh.read())) is result
|
|
112
|
+
|
|
113
|
+
def test_format(self):
|
|
114
|
+
for file_path, result in (
|
|
115
|
+
(csv_file(), 'csv'),
|
|
116
|
+
(xlsx_file(), 'xlsx'),
|
|
117
|
+
(json_file(), 'json'),
|
|
118
|
+
(parquet_file(), 'parquet'),
|
|
119
|
+
(txt_file(), 'txt'),
|
|
120
|
+
(pdf_file(), 'pdf'),
|
|
121
|
+
):
|
|
122
|
+
assert FileReader(path=file_path).get_format() == result
|
|
123
|
+
|
|
124
|
+
def test_is_it_json(self):
|
|
125
|
+
# We can't test xlsx or parquet here because they're binary files
|
|
126
|
+
for file_path, result in (
|
|
127
|
+
(csv_file(), False),
|
|
128
|
+
(json_file(), True),
|
|
129
|
+
(txt_file(), False),
|
|
130
|
+
):
|
|
131
|
+
with open(file_path, "r") as fh:
|
|
132
|
+
assert FileReader.is_json(StringIO(fh.read())) is result
|
|
133
|
+
|
|
134
|
+
def test_is_it_parquet(self):
|
|
135
|
+
for file_path, result in (
|
|
136
|
+
(csv_file(), False),
|
|
137
|
+
(xlsx_file(), False),
|
|
138
|
+
(json_file(), False),
|
|
139
|
+
(parquet_file(), True),
|
|
140
|
+
(txt_file(), False),
|
|
141
|
+
(pdf_file(), False),
|
|
142
|
+
):
|
|
143
|
+
with open(file_path, "rb") as fh:
|
|
144
|
+
assert FileReader.is_parquet(BytesIO(fh.read())) is result
|
|
162
145
|
|
|
163
146
|
|
|
164
147
|
class TestQuery:
|
|
@@ -188,13 +171,14 @@ class TestQuery:
|
|
|
188
171
|
|
|
189
172
|
assert response.type == RESPONSE_TYPE.ERROR
|
|
190
173
|
|
|
191
|
-
def test_query_insert(self,
|
|
174
|
+
def test_query_insert(self, monkeypatch):
|
|
192
175
|
"""Test an invalid insert query"""
|
|
193
176
|
# Create a temporary file to save the csv file to.
|
|
177
|
+
csv_file_path = csv_file()
|
|
194
178
|
csv_tmp = os.path.join(tempfile.gettempdir(), "test.csv")
|
|
195
179
|
if os.path.exists(csv_tmp):
|
|
196
180
|
os.remove(csv_tmp)
|
|
197
|
-
shutil.copy(
|
|
181
|
+
shutil.copy(csv_file_path, csv_tmp)
|
|
198
182
|
|
|
199
183
|
def mock_get_file_path(self, name):
|
|
200
184
|
return csv_tmp
|
|
@@ -270,18 +254,7 @@ class TestQuery:
|
|
|
270
254
|
file_handler.native_query("INVALID QUERY")
|
|
271
255
|
|
|
272
256
|
|
|
273
|
-
|
|
274
|
-
"file_path,expected_columns",
|
|
275
|
-
[
|
|
276
|
-
(lazy_fixture("csv_file"), test_file_content[0]),
|
|
277
|
-
(lazy_fixture("xlsx_file"), test_file_content[0]),
|
|
278
|
-
(lazy_fixture("json_file"), test_file_content[0]),
|
|
279
|
-
(lazy_fixture("parquet_file"), test_file_content[0]),
|
|
280
|
-
(lazy_fixture("pdf_file"), ["content", "metadata"]),
|
|
281
|
-
(lazy_fixture("txt_file"), ["content", "metadata"]),
|
|
282
|
-
],
|
|
283
|
-
)
|
|
284
|
-
def test_handle_source(file_path, expected_columns):
|
|
257
|
+
def test_handle_source():
|
|
285
258
|
|
|
286
259
|
def get_reader(file_path):
|
|
287
260
|
# using path
|
|
@@ -300,17 +273,25 @@ def test_handle_source(file_path, expected_columns):
|
|
|
300
273
|
reader = FileReader(file=fd, name=Path(file_path).name)
|
|
301
274
|
yield reader
|
|
302
275
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
276
|
+
for file_path, expected_columns in (
|
|
277
|
+
(csv_file(), test_file_content[0]),
|
|
278
|
+
(xlsx_file(), test_file_content[0]),
|
|
279
|
+
(json_file(), test_file_content[0]),
|
|
280
|
+
(parquet_file(), test_file_content[0]),
|
|
281
|
+
(pdf_file(), ["content", "metadata"]),
|
|
282
|
+
(txt_file(), ["content", "metadata"]),
|
|
283
|
+
):
|
|
284
|
+
# using different methods to create reader
|
|
285
|
+
for reader in get_reader(file_path):
|
|
286
|
+
df = reader.get_page_content()
|
|
287
|
+
assert isinstance(df, pandas.DataFrame)
|
|
288
|
+
|
|
289
|
+
assert df.columns.tolist() == expected_columns
|
|
290
|
+
|
|
291
|
+
# The pdf and txt files have some different content
|
|
292
|
+
if reader.get_format() not in ("pdf", "txt"):
|
|
293
|
+
assert len(df) == len(test_file_content) - 1
|
|
294
|
+
assert df.values.tolist() == test_file_content[1:]
|
|
314
295
|
|
|
315
296
|
|
|
316
297
|
@pytest.mark.parametrize(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
datasets==2.16.1
|
|
2
|
-
evaluate
|
|
3
|
-
nltk>=3.9
|
|
4
|
-
huggingface-hub
|
|
5
1
|
# Needs to be installed with `pip install --extra-index-url https://download.pytorch.org/whl/ .[huggingface_cpu]`
|
|
6
|
-
|
|
2
|
+
datasets==2.16.1
|
|
3
|
+
evaluate==0.4.3
|
|
4
|
+
nltk==3.9.1
|
|
5
|
+
huggingface-hub==0.29.3
|
|
6
|
+
torch==2.6.0+cpu
|