MindsDB 25.3.2.0__py3-none-any.whl → 25.3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

mindsdb/__about__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  __title__ = 'MindsDB'
2
2
  __package_name__ = 'mindsdb'
3
- __version__ = '25.3.2.0'
3
+ __version__ = '25.3.3.0'
4
4
  __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
5
5
  __email__ = "jorge@mindsdb.com"
6
6
  __author__ = 'MindsDB Inc'
mindsdb/__main__.py CHANGED
@@ -46,7 +46,6 @@ try:
46
46
  except RuntimeError:
47
47
  logger.info('Torch multiprocessing context already set, ignoring...')
48
48
 
49
-
50
49
  _stop_event = threading.Event()
51
50
 
52
51
 
@@ -0,0 +1,228 @@
1
+ import inspect
2
+ from typing import List
3
+ from dataclasses import dataclass
4
+
5
+ import pandas as pd
6
+ import github
7
+
8
+ from mindsdb.integrations.utilities.sql_utils import (FilterCondition, FilterOperator, SortColumn)
9
+ from mindsdb.integrations.libs.api_handler import APIResource
10
+
11
+
12
+ @dataclass
13
+ class Type:
14
+ name: str
15
+ sub_type: str = None
16
+ optional: bool = False
17
+
18
+
19
+ @dataclass
20
+ class GHMethod:
21
+ name: str
22
+ table_name: str
23
+ params: dict
24
+ output: Type
25
+
26
+
27
+ def parse_annotations(annotations):
28
+ '''
29
+ Parse string annotation, and extract type, input examples:
30
+ - Milestone | Opt[str]
31
+ - PaginatedList[Issue]
32
+ '''
33
+ type_name, sub_type = None, None
34
+ if not isinstance(annotations, str):
35
+
36
+ return Type(getattr(annotations, '__name__', None))
37
+ for item in annotations.split('|'):
38
+ item = item.strip()
39
+ if item is None:
40
+ continue
41
+ if '[' in item:
42
+ type_name = item[: item.find('[')]
43
+ item2 = item[item.find('[') + 1: item.rfind(']')]
44
+ if type_name == 'Opt':
45
+ inner_type = parse_annotations(item2)
46
+ inner_type.optional = Type
47
+ return inner_type
48
+ if type_name == 'dict':
49
+ item2 = item2[item2.find(',') + 1:]
50
+ sub_type = parse_annotations(item2).name
51
+ else:
52
+ type_name = item
53
+ # get only first type
54
+ break
55
+ return Type(type_name, sub_type)
56
+
57
+
58
+ def get_properties(cls):
59
+ # find properties of the class
60
+
61
+ properties = {}
62
+ for prop_name, prop in inspect.getmembers(cls):
63
+ if prop_name.startswith('_'):
64
+ continue
65
+ if not isinstance(prop, property):
66
+ continue
67
+ sig2 = inspect.signature(prop.fget)
68
+
69
+ properties[prop_name] = parse_annotations(sig2.return_annotation)
70
+ return properties
71
+
72
+
73
+ def get_github_types():
74
+ # get github types
75
+ types = {}
76
+
77
+ GithubObject = github.GithubObject.GithubObject
78
+ for module_name, module in inspect.getmembers(github, inspect.ismodule):
79
+ cls = getattr(module, module_name, None)
80
+ if cls is None:
81
+ continue
82
+ if issubclass(cls, GithubObject):
83
+
84
+ # remove inherited props
85
+ parent_props = []
86
+ for cls2 in cls.__bases__:
87
+ parent_props += get_properties(cls2).keys()
88
+
89
+ properties = {}
90
+ for k, v in get_properties(cls).items():
91
+ if k not in parent_props:
92
+ properties[k] = v
93
+
94
+ types[module_name] = properties
95
+ return types
96
+
97
+
98
+ def get_github_methods(cls):
99
+ '''
100
+ Analyse class in order to find methods which return list of objects.
101
+ '''
102
+ methods = []
103
+
104
+ for method_name, method in inspect.getmembers(cls, inspect.isfunction):
105
+ sig = inspect.signature(method)
106
+
107
+ return_type = parse_annotations(sig.return_annotation)
108
+ list_prefix = 'get_'
109
+ if not (method_name.startswith(list_prefix) and return_type.name == 'PaginatedList'):
110
+ continue
111
+
112
+ table_name = method_name[len(list_prefix):]
113
+
114
+ params = {}
115
+ for param_name, param in sig.parameters.items():
116
+ params[param_name] = parse_annotations(param.annotation)
117
+
118
+ methods.append(GHMethod(
119
+ name=method_name,
120
+ table_name=table_name,
121
+ params=params,
122
+ output=return_type
123
+ ))
124
+ return methods
125
+
126
+
127
+ class GHTable(APIResource):
128
+ def __init__(self, *args, method: GHMethod = None, github_types=None, **kwargs):
129
+ self.method = method
130
+ self.github_types = github_types
131
+
132
+ self.output_columns = {}
133
+ if method.output.sub_type in self.github_types:
134
+ self.output_columns = self.github_types[method.output.sub_type]
135
+
136
+ # check params:
137
+ self.params, self.list_params = [], []
138
+ for name, param_type in method.params.items():
139
+ self.params.append(name)
140
+ if param_type.name == 'list':
141
+ self.list_params.append(name)
142
+
143
+ self._allow_sort = 'sort' in method.params
144
+
145
+ super().__init__(*args, **kwargs)
146
+
147
+ def repr_value(self, value, type_name):
148
+ if value is None or type_name in ('bool', 'int', 'float'):
149
+ return value
150
+ if type_name in self.github_types:
151
+ properties = self.github_types[type_name]
152
+ if 'login' in properties:
153
+ value = getattr(value, 'login')
154
+ elif 'url' in properties:
155
+ value = getattr(value, 'url')
156
+ return str(value)
157
+
158
+ def get_columns(self) -> List[str]:
159
+ return list(self.output_columns.keys())
160
+
161
+ def list(
162
+ self,
163
+ conditions: List[FilterCondition] = None,
164
+ limit: int = None,
165
+ sort: List[SortColumn] = None,
166
+ targets: List[str] = None,
167
+ **kwargs
168
+ ) -> pd.DataFrame:
169
+
170
+ if limit is None:
171
+ limit = 20
172
+
173
+ method_kwargs = {}
174
+ if sort is not None and self._allow_sort:
175
+ for col in sort:
176
+ method_kwargs['sort'] = col.column
177
+ method_kwargs['direction'] = 'asc' if col.ascending else 'desc'
178
+ sort.applied = True
179
+ # supported only 1 column
180
+ break
181
+
182
+ if conditions:
183
+ for condition in conditions:
184
+ if condition.column not in self.params:
185
+ continue
186
+
187
+ if condition.column in self.list_params:
188
+ if condition.op == FilterOperator.IN:
189
+ method_kwargs[condition.column] = condition.value
190
+ elif condition.op == FilterOperator.EQUAL:
191
+ method_kwargs[condition.column] = [condition]
192
+ condition.applied = True
193
+ else:
194
+ method_kwargs[condition.column] = condition.value
195
+ condition.applied = True
196
+
197
+ connection = self.handler.connect()
198
+ method = getattr(connection.get_repo(self.handler.repository), self.method.name)
199
+
200
+ data = []
201
+ count = 0
202
+ for record in method(**method_kwargs):
203
+ item = {}
204
+ for name, output_type in self.output_columns.items():
205
+
206
+ # workaround to prevent making addition request per property.
207
+ if name in targets:
208
+ # request only if is required
209
+ value = getattr(record, name)
210
+ else:
211
+ value = getattr(record, '_' + name).value
212
+ if value is not None:
213
+ if output_type.name == 'list':
214
+ value = ",".join([
215
+ str(self.repr_value(i, output_type.sub_type))
216
+ for i in value
217
+ ])
218
+ else:
219
+ value = self.repr_value(value, output_type.name)
220
+ item[name] = value
221
+
222
+ data.append(item)
223
+
224
+ count += 1
225
+ if limit <= count:
226
+ break
227
+
228
+ return pd.DataFrame(data, columns=self.get_columns())
@@ -12,7 +12,7 @@ from mindsdb.integrations.handlers.github_handler.github_tables import (
12
12
  GithubMilestonesTable,
13
13
  GithubProjectsTable, GithubFilesTable
14
14
  )
15
-
15
+ from mindsdb.integrations.handlers.github_handler.generate_api import get_github_types, get_github_methods, GHTable
16
16
  from mindsdb.integrations.libs.api_handler import APIHandler
17
17
  from mindsdb.integrations.libs.response import (
18
18
  HandlerStatusResponse as StatusResponse,
@@ -22,6 +22,7 @@ from mindsdb.utilities import log
22
22
 
23
23
  logger = log.getLogger(__name__)
24
24
 
25
+
25
26
  class GithubHandler(APIHandler):
26
27
  """The GitHub handler implementation"""
27
28
 
@@ -43,16 +44,22 @@ class GithubHandler(APIHandler):
43
44
  self.connection = None
44
45
  self.is_connected = False
45
46
 
47
+ # custom tables
46
48
  self._register_table("issues", GithubIssuesTable(self))
47
- self._register_table("pull_requests", GithubPullRequestsTable(self))
48
- self._register_table("commits", GithubCommitsTable(self))
49
- self._register_table("releases", GithubReleasesTable(self))
50
- self._register_table("branches", GithubBranchesTable(self))
51
- self._register_table("contributors", GithubContributorsTable(self))
52
- self._register_table("milestones", GithubMilestonesTable(self))
53
- self._register_table("projects", GithubProjectsTable(self))
54
49
  self._register_table("files", GithubFilesTable(self))
55
50
 
51
+ # generated tables
52
+ github_types = get_github_types()
53
+
54
+ # generate tables from repository object
55
+ for method in get_github_methods(github.Repository.Repository):
56
+ if method.table_name in self._tables:
57
+ continue
58
+
59
+ table = GHTable(self, github_types=github_types, method=method)
60
+ self._register_table(method.table_name, table)
61
+
62
+
56
63
  def connect(self) -> StatusResponse:
57
64
  """Set up the connection required by the handler.
58
65
 
@@ -1 +1 @@
1
- pygithub
1
+ pygithub==2.6.1
@@ -57,6 +57,7 @@ class RedshiftHandler(PostgresHandler):
57
57
  connection.commit()
58
58
  except Exception as e:
59
59
  logger.error(f"Error inserting data into {table_name}, {e}!")
60
+ connection.rollback()
60
61
  response = Response(
61
62
  RESPONSE_TYPE.ERROR,
62
63
  error_code=0,
@@ -1 +1 @@
1
- salesforce_api
1
+ salesforce_api==0.1.45
@@ -39,31 +39,8 @@ class SalesforceHandler(APIHandler):
39
39
 
40
40
  self.connection = None
41
41
  self.is_connected = False
42
-
43
- # Register Salesforce tables.
44
- self.resource_names = {
45
- 'Account',
46
- 'Contact',
47
- 'Opportunity',
48
- 'Lead',
49
- 'Task',
50
- 'Event',
51
- 'User',
52
- 'Product2',
53
- 'Pricebook2',
54
- 'PricebookEntry',
55
- 'Order',
56
- 'OrderItem',
57
- 'Case',
58
- 'Campaign',
59
- 'CampaignMember',
60
- 'Contract',
61
- 'Asset'
62
- }
63
-
64
- for resource_name in self.resource_names:
65
- table_class = create_table_class(resource_name, resource_name)
66
- self._register_table(resource_name, table_class(self))
42
+ self.thread_safe = True
43
+ self.resource_names = []
67
44
 
68
45
  def connect(self) -> salesforce_api.client.Client:
69
46
  """
@@ -92,6 +69,12 @@ class SalesforceHandler(APIHandler):
92
69
  is_sandbox=self.connection_data.get('is_sandbox', False)
93
70
  )
94
71
  self.is_connected = True
72
+
73
+ # Register Salesforce tables.
74
+ for resource_name in self._get_resource_names():
75
+ table_class = create_table_class(resource_name)
76
+ self._register_table(resource_name.lower(), table_class(self))
77
+
95
78
  return self.connection
96
79
  except AuthenticationError as auth_error:
97
80
  logger.error(f"Authentication error connecting to Salesforce, {auth_error}!")
@@ -179,3 +162,15 @@ class SalesforceHandler(APIHandler):
179
162
  )
180
163
 
181
164
  return response
165
+
166
+ def _get_resource_names(self) -> None:
167
+ """
168
+ Retrieves the names of the Salesforce resources.
169
+
170
+ Returns:
171
+ None
172
+ """
173
+ if not self.resource_names:
174
+ self.resource_names = [resource['name'] for resource in self.connection.sobjects.describe()['sobjects']]
175
+
176
+ return self.resource_names
@@ -11,7 +11,7 @@ from mindsdb.utilities import log
11
11
  logger = log.getLogger(__name__)
12
12
 
13
13
 
14
- def create_table_class(table_name: Text, resource_name: Text) -> APIResource:
14
+ def create_table_class(resource_name: Text) -> APIResource:
15
15
  """
16
16
  Creates a table class for the given Salesforce resource.
17
17
  """
@@ -31,7 +31,7 @@ def create_table_class(table_name: Text, resource_name: Text) -> APIResource:
31
31
  Returns:
32
32
  pd.DataFrame: A DataFrame containing the data retrieved from the Salesforce resource.
33
33
  """
34
- query.from_table = table_name
34
+ query.from_table = resource_name
35
35
 
36
36
  # SOQL does not support * in SELECT queries. Replace * with column names.
37
37
  if isinstance(query.targets[0], Star):
@@ -10,8 +10,6 @@ class TimeScaleDBHandler(PostgresHandler):
10
10
  super().__init__(name, **kwargs)
11
11
 
12
12
 
13
-
14
-
15
13
  connection_args = OrderedDict(
16
14
  host={
17
15
  'type': ARG_TYPE.STR,
@@ -31,6 +29,12 @@ connection_args = OrderedDict(
31
29
  'type': ARG_TYPE.STR,
32
30
  'description': 'The password to authenticate the user with the TimeScaleDB server.'
33
31
  },
32
+ schema={
33
+ 'type': ARG_TYPE.STR,
34
+ 'description': 'The schema in which objects are searched first.',
35
+ 'required': False,
36
+ 'label': 'Schema'
37
+ },
34
38
  port={
35
39
  'type': ARG_TYPE.INT,
36
40
  'description': 'Specify port to connect TimeScaleDB '
@@ -39,8 +43,9 @@ connection_args = OrderedDict(
39
43
 
40
44
  connection_args_example = OrderedDict(
41
45
  host='127.0.0.1',
42
- port=36806,
43
- password='P455W0rD',
44
- user='tsdbadmin',
45
- database="tsdb"
46
+ port=5432,
47
+ password='password',
48
+ user='root',
49
+ database="timescaledb",
50
+ schema='public'
46
51
  )
@@ -111,10 +111,16 @@ def learn_process(data_integration_ref: dict, problem_definition: dict, fetch_da
111
111
  )
112
112
  handlers_cacher[predictor_record.id] = ml_handler
113
113
 
114
- if not ml_handler.generative:
114
+ if not ml_handler.generative and target is not None:
115
115
  if training_data_df is not None and target not in training_data_df.columns:
116
- raise Exception(
117
- f'Prediction target "{target}" not found in training dataframe: {list(training_data_df.columns)}')
116
+ # is the case different? convert column case in input dataframe
117
+ col_names = {c.lower(): c for c in training_data_df.columns}
118
+ target_found = col_names.get(target.lower())
119
+ if target_found:
120
+ training_data_df.rename(columns={target_found: target}, inplace=True)
121
+ else:
122
+ raise Exception(
123
+ f'Prediction target "{target}" not found in training dataframe: {list(training_data_df.columns)}')
118
124
 
119
125
  # create new model
120
126
  if base_model_id is None:
@@ -483,7 +483,7 @@ class SqlalchemyRender:
483
483
 
484
484
  return schema, table_name
485
485
 
486
- def to_table(self, node):
486
+ def to_table(self, node, is_lateral=False):
487
487
  if isinstance(node, ast.Identifier):
488
488
  schema, table_name = self.get_table_name(node)
489
489
 
@@ -497,7 +497,10 @@ class SqlalchemyRender:
497
497
  alias = None
498
498
  if node.alias:
499
499
  alias = self.get_alias(node.alias)
500
- table = sub_stmt.subquery(alias)
500
+ if is_lateral:
501
+ table = sub_stmt.lateral(alias)
502
+ else:
503
+ table = sub_stmt.subquery(alias)
501
504
 
502
505
  else:
503
506
  # TODO tests are failing
@@ -526,8 +529,11 @@ class SqlalchemyRender:
526
529
 
527
530
  query = query.add_cte(stmt.cte(self.get_alias(alias), nesting=True))
528
531
 
529
- if node.distinct:
532
+ if node.distinct is True:
530
533
  query = query.distinct()
534
+ elif isinstance(node.distinct, list):
535
+ columns = [self.to_expression(c) for c in node.distinct]
536
+ query = query.distinct(*columns)
531
537
 
532
538
  if node.from_table is not None:
533
539
  from_table = node.from_table
@@ -541,7 +547,8 @@ class SqlalchemyRender:
541
547
  # other tables
542
548
  has_explicit_join = False
543
549
  for item in join_list[1:]:
544
- table = self.to_table(item['table'])
550
+ join_type = item['join_type']
551
+ table = self.to_table(item['table'], is_lateral=('LATERAL' in join_type))
545
552
  if item['is_implicit']:
546
553
  # add to from clause
547
554
  if has_explicit_join:
@@ -558,7 +565,6 @@ class SqlalchemyRender:
558
565
  else:
559
566
  condition = self.to_expression(item['condition'])
560
567
 
561
- join_type = item['join_type']
562
568
  if 'ASOF' in join_type:
563
569
  raise NotImplementedError(f'Unsupported join type: {join_type}')
564
570
  method = 'join'