MindsDB 25.3.2.0__py3-none-any.whl → 25.3.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (45) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +0 -1
  3. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +2 -6
  4. mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +1 -1
  5. mindsdb/api/http/namespaces/agents.py +9 -5
  6. mindsdb/api/http/namespaces/chatbots.py +6 -5
  7. mindsdb/api/http/namespaces/databases.py +5 -6
  8. mindsdb/api/http/namespaces/skills.py +5 -4
  9. mindsdb/api/http/namespaces/views.py +6 -7
  10. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +23 -2
  11. mindsdb/integrations/handlers/dummy_data_handler/dummy_data_handler.py +16 -6
  12. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +64 -83
  13. mindsdb/integrations/handlers/github_handler/generate_api.py +228 -0
  14. mindsdb/integrations/handlers/github_handler/github_handler.py +15 -8
  15. mindsdb/integrations/handlers/github_handler/requirements.txt +1 -1
  16. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +5 -4
  17. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +5 -5
  18. mindsdb/integrations/handlers/ms_one_drive_handler/ms_graph_api_one_drive_client.py +1 -1
  19. mindsdb/integrations/handlers/ms_teams_handler/ms_graph_api_teams_client.py +278 -0
  20. mindsdb/integrations/handlers/ms_teams_handler/ms_teams_handler.py +114 -70
  21. mindsdb/integrations/handlers/ms_teams_handler/ms_teams_tables.py +431 -0
  22. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +18 -4
  23. mindsdb/integrations/handlers/redshift_handler/redshift_handler.py +1 -0
  24. mindsdb/integrations/handlers/salesforce_handler/requirements.txt +1 -1
  25. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +20 -25
  26. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +2 -2
  27. mindsdb/integrations/handlers/timescaledb_handler/timescaledb_handler.py +11 -6
  28. mindsdb/integrations/libs/ml_handler_process/learn_process.py +9 -3
  29. mindsdb/integrations/libs/vectordatabase_handler.py +2 -2
  30. mindsdb/integrations/utilities/files/file_reader.py +3 -3
  31. mindsdb/integrations/utilities/handlers/api_utilities/microsoft/ms_graph_api_utilities.py +36 -2
  32. mindsdb/integrations/utilities/rag/settings.py +1 -0
  33. mindsdb/interfaces/chatbot/chatbot_controller.py +6 -4
  34. mindsdb/interfaces/jobs/jobs_controller.py +1 -4
  35. mindsdb/interfaces/knowledge_base/controller.py +9 -28
  36. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +1 -1
  37. mindsdb/interfaces/skills/skills_controller.py +8 -7
  38. mindsdb/utilities/render/sqlalchemy_render.py +11 -5
  39. {mindsdb-25.3.2.0.dist-info → mindsdb-25.3.4.0.dist-info}/METADATA +236 -233
  40. {mindsdb-25.3.2.0.dist-info → mindsdb-25.3.4.0.dist-info}/RECORD +43 -42
  41. {mindsdb-25.3.2.0.dist-info → mindsdb-25.3.4.0.dist-info}/WHEEL +1 -1
  42. mindsdb/integrations/handlers/timescaledb_handler/tests/__init__.py +0 -0
  43. mindsdb/integrations/handlers/timescaledb_handler/tests/test_timescaledb_handler.py +0 -47
  44. {mindsdb-25.3.2.0.dist-info → mindsdb-25.3.4.0.dist-info/licenses}/LICENSE +0 -0
  45. {mindsdb-25.3.2.0.dist-info → mindsdb-25.3.4.0.dist-info}/top_level.txt +0 -0
mindsdb/__about__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  __title__ = 'MindsDB'
2
2
  __package_name__ = 'mindsdb'
3
- __version__ = '25.3.2.0'
3
+ __version__ = '25.3.4.0'
4
4
  __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
5
5
  __email__ = "jorge@mindsdb.com"
6
6
  __author__ = 'MindsDB Inc'
mindsdb/__main__.py CHANGED
@@ -46,7 +46,6 @@ try:
46
46
  except RuntimeError:
47
47
  logger.info('Torch multiprocessing context already set, ignoring...')
48
48
 
49
-
50
49
  _stop_event = threading.Event()
51
50
 
52
51
 
@@ -3,12 +3,8 @@ import pandas as pd
3
3
  from mindsdb_sql_parser.ast.base import ASTNode
4
4
 
5
5
  from mindsdb.api.executor.datahub.datanodes.datanode import DataNode
6
- from mindsdb.api.executor.datahub.datanodes.integration_datanode import (
7
- IntegrationDataNode,
8
- )
9
- from mindsdb.api.executor.datahub.datanodes.project_datanode import (
10
- ProjectDataNode,
11
- )
6
+ from mindsdb.api.executor.datahub.datanodes.integration_datanode import IntegrationDataNode
7
+ from mindsdb.api.executor.datahub.datanodes.project_datanode import ProjectDataNode
12
8
  from mindsdb.api.executor import exceptions as exc
13
9
  from mindsdb.api.executor.utilities.sql import query_df
14
10
  from mindsdb.api.executor.utilities.sql import get_query_tables
@@ -310,7 +310,7 @@ class ChatbotsTable(MdbTable):
310
310
  ):
311
311
  project_name = query.where.args[1].value
312
312
 
313
- chatbot_data = chatbot_controller.get_chatbots(project_name)
313
+ chatbot_data = chatbot_controller.get_chatbots(project_name=project_name)
314
314
 
315
315
  columns = cls.columns
316
316
  columns_lower = [col.lower() for col in columns]
@@ -47,7 +47,7 @@ def create_agent(project_name, name, agent):
47
47
 
48
48
  try:
49
49
  existing_agent = agents_controller.get_agent(name, project_name=project_name)
50
- except ValueError:
50
+ except (ValueError, EntityNotExistsError):
51
51
  # Project must exist.
52
52
  return http_error(
53
53
  HTTPStatus.NOT_FOUND,
@@ -141,7 +141,7 @@ class AgentResource(Resource):
141
141
  f'Agent with name {agent_name} does not exist'
142
142
  )
143
143
  return existing_agent.as_dict()
144
- except ValueError:
144
+ except (ValueError, EntityNotExistsError):
145
145
  # Project needs to exist.
146
146
  return http_error(
147
147
  HTTPStatus.NOT_FOUND,
@@ -173,7 +173,11 @@ class AgentResource(Resource):
173
173
  f'Project with name {project_name} does not exist'
174
174
  )
175
175
  if existing_agent_record is None:
176
- raise Exception
176
+ return http_error(
177
+ HTTPStatus.BAD_REQUEST,
178
+ 'Creation is not allowed',
179
+ 'Creation of an agent using the PUT method is not allowed.'
180
+ )
177
181
 
178
182
  agent = request.json['agent']
179
183
  name = agent.get('name', None)
@@ -272,7 +276,7 @@ class AgentResource(Resource):
272
276
  'Agent not found',
273
277
  f'Agent with name {agent_name} does not exist'
274
278
  )
275
- except ValueError:
279
+ except (ValueError, EntityNotExistsError):
276
280
  # Project needs to exist.
277
281
  return http_error(
278
282
  HTTPStatus.NOT_FOUND,
@@ -435,7 +439,7 @@ class AgentCompletions(Resource):
435
439
  'Agent not found',
436
440
  f'Agent with name {agent_name} does not exist'
437
441
  )
438
- except ValueError:
442
+ except (ValueError, EntityNotExistsError):
439
443
  # Project needs to exist.
440
444
  return http_error(
441
445
  HTTPStatus.NOT_FOUND,
@@ -11,6 +11,7 @@ from mindsdb.metrics.metrics import api_endpoint_metrics
11
11
  from mindsdb.interfaces.chatbot.chatbot_controller import ChatBotController
12
12
  from mindsdb.interfaces.model.functions import PredictorRecordNotFound
13
13
  from mindsdb.interfaces.storage.db import Predictor
14
+ from mindsdb.utilities.exception import EntityNotExistsError
14
15
 
15
16
 
16
17
  def create_chatbot(project_name, name, chatbot):
@@ -59,7 +60,7 @@ def create_chatbot(project_name, name, chatbot):
59
60
 
60
61
  try:
61
62
  existing_chatbot = chatbot_controller.get_chatbot(name, project_name=project_name)
62
- except ValueError:
63
+ except EntityNotExistsError:
63
64
  # Project must exist.
64
65
  return http_error(
65
66
  HTTPStatus.NOT_FOUND,
@@ -152,7 +153,7 @@ class ChatBotsResource(Resource):
152
153
  chatbot_controller = ChatBotController()
153
154
  try:
154
155
  all_bots = chatbot_controller.get_chatbots(project_name)
155
- except ValueError:
156
+ except (ValueError, EntityNotExistsError):
156
157
  # Project needs to exist.
157
158
  return http_error(
158
159
  HTTPStatus.NOT_FOUND,
@@ -197,7 +198,7 @@ class ChatBotResource(Resource):
197
198
  f'Chatbot with name {chatbot_name} does not exist'
198
199
  )
199
200
  return existing_chatbot
200
- except ValueError:
201
+ except (ValueError, EntityNotExistsError):
201
202
  # Project needs to exist.
202
203
  return http_error(
203
204
  HTTPStatus.NOT_FOUND,
@@ -221,7 +222,7 @@ class ChatBotResource(Resource):
221
222
 
222
223
  try:
223
224
  existing_chatbot = chatbot_controller.get_chatbot(chatbot_name, project_name=project_name)
224
- except ValueError:
225
+ except EntityNotExistsError:
225
226
  # Project needs to exist.
226
227
  return http_error(
227
228
  HTTPStatus.NOT_FOUND,
@@ -306,7 +307,7 @@ class ChatBotResource(Resource):
306
307
  'Chatbot not found',
307
308
  f'Chatbot with name {chatbot_name} does not exist'
308
309
  )
309
- except ValueError:
310
+ except EntityNotExistsError:
310
311
  # Project needs to exist.
311
312
  return http_error(
312
313
  HTTPStatus.NOT_FOUND,
@@ -1,10 +1,9 @@
1
- from http import HTTPStatus
2
- import tempfile
3
1
  import time
2
+ import shutil
3
+ import tempfile
4
+ from http import HTTPStatus
4
5
  from typing import Dict
5
6
  from pathlib import Path
6
- import shutil
7
- from sqlalchemy.exc import NoResultFound
8
7
 
9
8
  from flask import request
10
9
  from flask_restx import Resource
@@ -337,7 +336,7 @@ class TablesList(Resource):
337
336
  HTTPStatus.BAD_REQUEST, 'Error',
338
337
  error_message
339
338
  )
340
- except NoResultFound:
339
+ except EntityNotExistsError:
341
340
  # Only support creating tables from integrations.
342
341
  pass
343
342
 
@@ -419,7 +418,7 @@ class TableResource(Resource):
419
418
  + f'If you want to delete a model or view, use the projects/{database_name}/models/{table_name} or ' \
420
419
  + f'projects/{database_name}/views/{table_name} endpoints instead.'
421
420
  return http_error(HTTPStatus.BAD_REQUEST, 'Error', error_message)
422
- except NoResultFound:
421
+ except EntityNotExistsError:
423
422
  # Only support dropping tables from integrations.
424
423
  pass
425
424
 
@@ -7,6 +7,7 @@ from mindsdb.metrics.metrics import api_endpoint_metrics
7
7
  from mindsdb.api.http.namespaces.configs.projects import ns_conf
8
8
  from mindsdb.api.http.utils import http_error
9
9
  from mindsdb.interfaces.skills.skills_controller import SkillsController
10
+ from mindsdb.utilities.exception import EntityNotExistsError
10
11
 
11
12
 
12
13
  def create_skill(project_name, skill):
@@ -52,7 +53,7 @@ class SkillsResource(Resource):
52
53
  skills_controller = SkillsController()
53
54
  try:
54
55
  all_skills = skills_controller.get_skills(project_name)
55
- except ValueError:
56
+ except EntityNotExistsError:
56
57
  # Project needs to exist.
57
58
  return http_error(
58
59
  HTTPStatus.NOT_FOUND,
@@ -88,7 +89,7 @@ class SkillResource(Resource):
88
89
  skills_controller = SkillsController()
89
90
  try:
90
91
  existing_skill = skills_controller.get_skill(skill_name, project_name)
91
- except ValueError:
92
+ except EntityNotExistsError:
92
93
  # Project needs to exist
93
94
  return http_error(
94
95
  HTTPStatus.NOT_FOUND,
@@ -120,7 +121,7 @@ class SkillResource(Resource):
120
121
 
121
122
  try:
122
123
  existing_skill = skills_controller.get_skill(skill_name, project_name)
123
- except ValueError:
124
+ except EntityNotExistsError:
124
125
  # Project needs to exist
125
126
  return http_error(
126
127
  HTTPStatus.NOT_FOUND,
@@ -152,7 +153,7 @@ class SkillResource(Resource):
152
153
  skills_controller = SkillsController()
153
154
  try:
154
155
  existing_skill = skills_controller.get_skill(skill_name, project_name)
155
- except ValueError:
156
+ except EntityNotExistsError:
156
157
  # Project needs to exist
157
158
  return http_error(
158
159
  HTTPStatus.NOT_FOUND,
@@ -2,13 +2,12 @@ from http import HTTPStatus
2
2
 
3
3
  from flask import request
4
4
  from flask_restx import Resource
5
- from sqlalchemy.exc import NoResultFound
6
-
7
5
 
8
6
  from mindsdb.api.http.utils import http_error
9
7
  from mindsdb.api.http.namespaces.configs.projects import ns_conf
10
8
  from mindsdb.api.executor.controllers.session_controller import SessionController
11
9
  from mindsdb.metrics.metrics import api_endpoint_metrics
10
+ from mindsdb.utilities.exception import EntityNotExistsError
12
11
 
13
12
 
14
13
  @ns_conf.route('/<project_name>/views')
@@ -20,7 +19,7 @@ class ViewsList(Resource):
20
19
  session = SessionController()
21
20
  try:
22
21
  project = session.database_controller.get_project(project_name)
23
- except NoResultFound:
22
+ except EntityNotExistsError:
24
23
  return http_error(
25
24
  HTTPStatus.NOT_FOUND,
26
25
  'Project not found',
@@ -55,7 +54,7 @@ class ViewsList(Resource):
55
54
 
56
55
  try:
57
56
  project = session.database_controller.get_project(project_name)
58
- except NoResultFound:
57
+ except EntityNotExistsError:
59
58
  return http_error(HTTPStatus.NOT_FOUND, 'Not found', f'Project name {project_name} does not exist')
60
59
 
61
60
  if project.get_view(name) is not None:
@@ -82,7 +81,7 @@ class ViewResource(Resource):
82
81
  session = SessionController()
83
82
  try:
84
83
  project = session.database_controller.get_project(project_name)
85
- except NoResultFound:
84
+ except EntityNotExistsError:
86
85
  return http_error(HTTPStatus.NOT_FOUND, 'Project not found', f'Project name {project_name} does not exist')
87
86
 
88
87
  view = project.get_view(view_name)
@@ -106,7 +105,7 @@ class ViewResource(Resource):
106
105
  session = SessionController()
107
106
  try:
108
107
  project = session.database_controller.get_project(project_name)
109
- except NoResultFound:
108
+ except EntityNotExistsError:
110
109
  return http_error(HTTPStatus.NOT_FOUND, 'Project not found', f'Project name {project_name} does not exist')
111
110
 
112
111
  existing_view = project.get_view(view_name)
@@ -143,7 +142,7 @@ class ViewResource(Resource):
143
142
  session = SessionController()
144
143
  try:
145
144
  project = session.database_controller.get_project(project_name)
146
- except NoResultFound:
145
+ except EntityNotExistsError:
147
146
  return http_error(HTTPStatus.NOT_FOUND, 'Project not found', f'Project name {project_name} does not exist')
148
147
 
149
148
  if project.get_view(view_name) is None:
@@ -210,6 +210,7 @@ class ChromaDBHandler(VectorStoreHandler):
210
210
  chroma_db_conditions = []
211
211
  for condition in metadata_conditions:
212
212
  metadata_key = condition.column.split(".")[-1]
213
+
213
214
  chroma_db_conditions.append(
214
215
  {
215
216
  metadata_key: {
@@ -310,9 +311,29 @@ class ChromaDBHandler(VectorStoreHandler):
310
311
  payload = {column: payload[column] for column in columns}
311
312
 
312
313
  # always include distance
314
+ distance_filter = None
315
+ distance_col = TableField.DISTANCE.value
313
316
  if distances is not None:
314
- payload[TableField.DISTANCE.value] = distances
315
- return pd.DataFrame(payload)
317
+ payload[distance_col] = distances
318
+
319
+ for cond in conditions:
320
+ if cond.column == distance_col:
321
+ distance_filter = cond
322
+ break
323
+
324
+ df = pd.DataFrame(payload)
325
+ if distance_filter is not None:
326
+ op_map = {
327
+ '<': '__lt__',
328
+ '<=': '__le__',
329
+ '>': '__gt__',
330
+ '>=': '__ge__',
331
+ '=': '__eq__',
332
+ }
333
+ op = op_map.get(distance_filter.op.value)
334
+ if op:
335
+ df = df[getattr(df[distance_col], op)(distance_filter.value)]
336
+ return df
316
337
 
317
338
  def _dataframe_metadata_to_chroma_metadata(self, metadata: Union[Dict[str, str], str]) -> Optional[Dict[str, str]]:
318
339
  """Convert DataFrame metadata to ChromaDB compatible metadata format"""
@@ -1,4 +1,5 @@
1
1
  import time
2
+ from typing import Optional, List
2
3
 
3
4
  import duckdb
4
5
  from typing import Any
@@ -36,18 +37,27 @@ class DummyHandler(DatabaseHandler):
36
37
  """
37
38
  return HandlerStatusResponse(success=True)
38
39
 
39
- def native_query(self, query: Any) -> HandlerResponse:
40
+ def native_query(self, query: Any, params: Optional[List] = None) -> HandlerResponse:
40
41
  """Receive raw query and act upon it somehow
41
42
 
42
43
  Args:
43
- query (Any): query in native format (str for sql databases,
44
- dict for mongo, etc)
44
+ query (Any): query in native format (str for sql databases, dict for mongo, etc)
45
+ params (Optional[List])
45
46
 
46
47
  Returns:
47
48
  HandlerResponse
48
49
  """
49
50
  con = duckdb.connect(self.db_path)
50
- result_df = con.execute(query).fetchdf()
51
+ if params is not None:
52
+ query = query.replace('%s', '?')
53
+ cur = con.executemany(query, params)
54
+ if cur.rowcount >= 0:
55
+ result_df = cur.fetchdf()
56
+ else:
57
+ con.close()
58
+ return HandlerResponse(RESPONSE_TYPE.OK)
59
+ else:
60
+ result_df = con.execute(query).fetchdf()
51
61
  con.close()
52
62
  return HandlerResponse(RESPONSE_TYPE.TABLE, result_df)
53
63
 
@@ -62,8 +72,8 @@ class DummyHandler(DatabaseHandler):
62
72
  HandlerResponse
63
73
  """
64
74
  renderer = SqlalchemyRender('postgres')
65
- query_str = renderer.get_string(query, with_failback=True)
66
- return self.native_query(query_str)
75
+ query_str, params = renderer.get_exec_params(query, with_failback=True)
76
+ return self.native_query(query_str, params)
67
77
 
68
78
  def get_tables(self) -> HandlerResponse:
69
79
  """Get a list of all the tables in the database
@@ -8,7 +8,6 @@ import pandas
8
8
  import pytest
9
9
  from mindsdb_sql_parser.exceptions import ParsingException
10
10
  from mindsdb_sql_parser.ast import CreateTable, DropTables, Identifier, Insert, TableColumn, Update
11
- from pytest_lazyfixture import lazy_fixture
12
11
 
13
12
  from mindsdb.integrations.handlers.file_handler.file_handler import FileHandler
14
13
  from mindsdb.integrations.libs.response import RESPONSE_TYPE
@@ -75,33 +74,26 @@ def curr_dir():
75
74
  return os.path.dirname(os.path.realpath(__file__))
76
75
 
77
76
 
78
- # Fixtures to get a path to a partiular type of file
79
- @pytest.fixture
80
77
  def csv_file() -> str:
81
78
  return os.path.join(curr_dir(), "data", "test.csv")
82
79
 
83
80
 
84
- @pytest.fixture
85
81
  def xlsx_file() -> str:
86
82
  return os.path.join(curr_dir(), "data", "test.xlsx")
87
83
 
88
84
 
89
- @pytest.fixture
90
85
  def json_file() -> str:
91
86
  return os.path.join(curr_dir(), "data", "test.json")
92
87
 
93
88
 
94
- @pytest.fixture
95
89
  def parquet_file() -> str:
96
90
  return os.path.join(curr_dir(), "data", "test.parquet")
97
91
 
98
92
 
99
- @pytest.fixture
100
93
  def pdf_file() -> str:
101
94
  return os.path.join(curr_dir(), "data", "test.pdf")
102
95
 
103
96
 
104
- @pytest.fixture
105
97
  def txt_file() -> str:
106
98
  return os.path.join(curr_dir(), "data", "test.txt")
107
99
 
@@ -109,56 +101,47 @@ def txt_file() -> str:
109
101
  class TestIsItX:
110
102
  """Tests all of the 'is_it_x()' functions to determine a file's type"""
111
103
 
112
- # We can't test xlsx or parquet here because they're binary files
113
- @pytest.mark.parametrize(
114
- "file_path,result",
115
- [(lazy_fixture("csv_file"), True), (lazy_fixture("json_file"), False)],
116
- )
117
- def test_is_it_csv(self, file_path, result):
118
- with open(file_path, "r") as fh:
119
- assert FileReader.is_csv(StringIO(fh.read())) is result
120
-
121
- @pytest.mark.parametrize(
122
- "file_path,result",
123
- [
124
- (lazy_fixture("csv_file"), 'csv'),
125
- (lazy_fixture("xlsx_file"), 'xlsx'),
126
- (lazy_fixture("json_file"), 'json'),
127
- (lazy_fixture("parquet_file"), 'parquet'),
128
- (lazy_fixture("txt_file"), 'txt'),
129
- (lazy_fixture("pdf_file"), 'pdf'),
130
- ],
131
- )
132
- def test_format(self, file_path, result):
133
- assert FileReader(path=file_path).get_format() == result
134
-
135
- # We can't test xlsx or parquet here because they're binary files
136
- @pytest.mark.parametrize(
137
- "file_path,result",
138
- [
139
- (lazy_fixture("csv_file"), False),
140
- (lazy_fixture("json_file"), True),
141
- (lazy_fixture("txt_file"), False),
142
- ],
143
- )
144
- def test_is_it_json(self, file_path, result):
145
- with open(file_path, "r") as fh:
146
- assert FileReader.is_json(StringIO(fh.read())) is result
147
-
148
- @pytest.mark.parametrize(
149
- "file_path,result",
150
- [
151
- (lazy_fixture("csv_file"), False),
152
- (lazy_fixture("xlsx_file"), False),
153
- (lazy_fixture("json_file"), False),
154
- (lazy_fixture("parquet_file"), True),
155
- (lazy_fixture("txt_file"), False),
156
- (lazy_fixture("pdf_file"), False),
157
- ],
158
- )
159
- def test_is_it_parquet(self, file_path, result):
160
- with open(file_path, "rb") as fh:
161
- assert FileReader.is_parquet(BytesIO(fh.read())) is result
104
+ def test_is_it_csv(self):
105
+ # We can't test xlsx or parquet here because they're binary files
106
+ for file_path, result in (
107
+ (csv_file(), True),
108
+ (json_file(), False)
109
+ ):
110
+ with open(file_path, "r") as fh:
111
+ assert FileReader.is_csv(StringIO(fh.read())) is result
112
+
113
+ def test_format(self):
114
+ for file_path, result in (
115
+ (csv_file(), 'csv'),
116
+ (xlsx_file(), 'xlsx'),
117
+ (json_file(), 'json'),
118
+ (parquet_file(), 'parquet'),
119
+ (txt_file(), 'txt'),
120
+ (pdf_file(), 'pdf'),
121
+ ):
122
+ assert FileReader(path=file_path).get_format() == result
123
+
124
+ def test_is_it_json(self):
125
+ # We can't test xlsx or parquet here because they're binary files
126
+ for file_path, result in (
127
+ (csv_file(), False),
128
+ (json_file(), True),
129
+ (txt_file(), False),
130
+ ):
131
+ with open(file_path, "r") as fh:
132
+ assert FileReader.is_json(StringIO(fh.read())) is result
133
+
134
+ def test_is_it_parquet(self):
135
+ for file_path, result in (
136
+ (csv_file(), False),
137
+ (xlsx_file(), False),
138
+ (json_file(), False),
139
+ (parquet_file(), True),
140
+ (txt_file(), False),
141
+ (pdf_file(), False),
142
+ ):
143
+ with open(file_path, "rb") as fh:
144
+ assert FileReader.is_parquet(BytesIO(fh.read())) is result
162
145
 
163
146
 
164
147
  class TestQuery:
@@ -188,13 +171,14 @@ class TestQuery:
188
171
 
189
172
  assert response.type == RESPONSE_TYPE.ERROR
190
173
 
191
- def test_query_insert(self, csv_file, monkeypatch):
174
+ def test_query_insert(self, monkeypatch):
192
175
  """Test an invalid insert query"""
193
176
  # Create a temporary file to save the csv file to.
177
+ csv_file_path = csv_file()
194
178
  csv_tmp = os.path.join(tempfile.gettempdir(), "test.csv")
195
179
  if os.path.exists(csv_tmp):
196
180
  os.remove(csv_tmp)
197
- shutil.copy(csv_file, csv_tmp)
181
+ shutil.copy(csv_file_path, csv_tmp)
198
182
 
199
183
  def mock_get_file_path(self, name):
200
184
  return csv_tmp
@@ -270,18 +254,7 @@ class TestQuery:
270
254
  file_handler.native_query("INVALID QUERY")
271
255
 
272
256
 
273
- @pytest.mark.parametrize(
274
- "file_path,expected_columns",
275
- [
276
- (lazy_fixture("csv_file"), test_file_content[0]),
277
- (lazy_fixture("xlsx_file"), test_file_content[0]),
278
- (lazy_fixture("json_file"), test_file_content[0]),
279
- (lazy_fixture("parquet_file"), test_file_content[0]),
280
- (lazy_fixture("pdf_file"), ["content", "metadata"]),
281
- (lazy_fixture("txt_file"), ["content", "metadata"]),
282
- ],
283
- )
284
- def test_handle_source(file_path, expected_columns):
257
+ def test_handle_source():
285
258
 
286
259
  def get_reader(file_path):
287
260
  # using path
@@ -300,17 +273,25 @@ def test_handle_source(file_path, expected_columns):
300
273
  reader = FileReader(file=fd, name=Path(file_path).name)
301
274
  yield reader
302
275
 
303
- # using different methods to create reader
304
- for reader in get_reader(file_path):
305
- df = reader.get_page_content()
306
- assert isinstance(df, pandas.DataFrame)
307
-
308
- assert df.columns.tolist() == expected_columns
309
-
310
- # The pdf and txt files have some different content
311
- if reader.get_format() not in ("pdf", "txt"):
312
- assert len(df) == len(test_file_content) - 1
313
- assert df.values.tolist() == test_file_content[1:]
276
+ for file_path, expected_columns in (
277
+ (csv_file(), test_file_content[0]),
278
+ (xlsx_file(), test_file_content[0]),
279
+ (json_file(), test_file_content[0]),
280
+ (parquet_file(), test_file_content[0]),
281
+ (pdf_file(), ["content", "metadata"]),
282
+ (txt_file(), ["content", "metadata"]),
283
+ ):
284
+ # using different methods to create reader
285
+ for reader in get_reader(file_path):
286
+ df = reader.get_page_content()
287
+ assert isinstance(df, pandas.DataFrame)
288
+
289
+ assert df.columns.tolist() == expected_columns
290
+
291
+ # The pdf and txt files have some different content
292
+ if reader.get_format() not in ("pdf", "txt"):
293
+ assert len(df) == len(test_file_content) - 1
294
+ assert df.values.tolist() == test_file_content[1:]
314
295
 
315
296
 
316
297
  @pytest.mark.parametrize(