MindsDB 25.1.2.1__py3-none-any.whl → 25.1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (77) hide show
  1. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/METADATA +244 -242
  2. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/RECORD +76 -67
  3. mindsdb/__about__.py +1 -1
  4. mindsdb/__main__.py +5 -3
  5. mindsdb/api/executor/__init__.py +0 -1
  6. mindsdb/api/executor/command_executor.py +2 -1
  7. mindsdb/api/executor/data_types/answer.py +1 -1
  8. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +7 -2
  9. mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -1
  10. mindsdb/api/executor/sql_query/__init__.py +1 -0
  11. mindsdb/api/executor/sql_query/result_set.py +36 -21
  12. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +1 -1
  13. mindsdb/api/executor/sql_query/steps/join_step.py +4 -4
  14. mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
  15. mindsdb/api/executor/utilities/sql.py +2 -10
  16. mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
  17. mindsdb/api/http/namespaces/sql.py +3 -1
  18. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
  19. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +7 -0
  20. mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
  21. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
  22. mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
  23. mindsdb/integrations/handlers/file_handler/file_handler.py +1 -1
  24. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
  25. mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
  26. mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
  27. mindsdb/integrations/handlers/langchain_embedding_handler/fastapi_embeddings.py +82 -0
  28. mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +8 -1
  29. mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
  30. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +48 -16
  31. mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
  32. mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
  33. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +12 -6
  34. mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
  35. mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
  36. mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -1
  37. mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +76 -27
  38. mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py +18 -1
  39. mindsdb/integrations/utilities/rag/pipelines/rag.py +73 -18
  40. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
  41. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +36 -14
  42. mindsdb/integrations/utilities/rag/settings.py +8 -2
  43. mindsdb/integrations/utilities/sql_utils.py +1 -1
  44. mindsdb/interfaces/agents/agents_controller.py +3 -5
  45. mindsdb/interfaces/agents/langchain_agent.py +112 -150
  46. mindsdb/interfaces/agents/langfuse_callback_handler.py +0 -37
  47. mindsdb/interfaces/agents/mindsdb_database_agent.py +15 -13
  48. mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
  49. mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
  50. mindsdb/interfaces/chatbot/memory.py +58 -13
  51. mindsdb/interfaces/database/projects.py +17 -15
  52. mindsdb/interfaces/database/views.py +12 -25
  53. mindsdb/interfaces/knowledge_base/controller.py +39 -15
  54. mindsdb/interfaces/model/functions.py +15 -4
  55. mindsdb/interfaces/model/model_controller.py +4 -7
  56. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +47 -38
  57. mindsdb/interfaces/skills/retrieval_tool.py +10 -3
  58. mindsdb/interfaces/skills/skill_tool.py +97 -53
  59. mindsdb/interfaces/skills/sql_agent.py +77 -36
  60. mindsdb/interfaces/storage/db.py +1 -1
  61. mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
  62. mindsdb/utilities/cache.py +7 -4
  63. mindsdb/utilities/context.py +11 -1
  64. mindsdb/utilities/langfuse.py +264 -0
  65. mindsdb/utilities/log.py +20 -2
  66. mindsdb/utilities/otel/__init__.py +206 -0
  67. mindsdb/utilities/otel/logger.py +25 -0
  68. mindsdb/utilities/otel/meter.py +19 -0
  69. mindsdb/utilities/otel/metric_handlers/__init__.py +25 -0
  70. mindsdb/utilities/otel/tracer.py +16 -0
  71. mindsdb/utilities/partitioning.py +52 -0
  72. mindsdb/utilities/render/sqlalchemy_render.py +7 -1
  73. mindsdb/utilities/utils.py +34 -0
  74. mindsdb/utilities/otel.py +0 -72
  75. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/LICENSE +0 -0
  76. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/WHEEL +0 -0
  77. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,264 @@
1
+ import os
2
+ import typing
3
+
4
+ from mindsdb.utilities import log
5
+ from langfuse import Langfuse
6
+ from langfuse.client import StatefulSpanClient
7
+ from langfuse.callback import CallbackHandler
8
+ from langfuse.api.resources.commons.errors.not_found_error import NotFoundError as TraceNotFoundError
9
+
10
+ logger = log.getLogger(__name__)
11
+
12
+ # Define Langfuse public key.
13
+ LANGFUSE_PUBLIC_KEY = os.getenv("LANGFUSE_PUBLIC_KEY", "langfuse_public_key")
14
+
15
+ # Define Langfuse secret key.
16
+ LANGFUSE_SECRET_KEY = os.getenv("LANGFUSE_SECRET_KEY", "langfuse_secret_key")
17
+
18
+ # Define Langfuse host.
19
+ LANGFUSE_HOST = os.getenv("LANGFUSE_HOST", "http://localhost:3000")
20
+
21
+ # Define Langfuse environment.
22
+ LANGFUSE_ENVIRONMENT = os.getenv("LANGFUSE_ENVIRONMENT", "local")
23
+
24
+ # Define Langfuse release.
25
+ LANGFUSE_RELEASE = os.getenv("LANGFUSE_RELEASE", "local")
26
+
27
+ # Define Langfuse debug mode.
28
+ LANGFUSE_DEBUG = os.getenv("LANGFUSE_DEBUG", "false").lower() == "true"
29
+
30
+ # Define Langfuse timeout.
31
+ LANGFUSE_TIMEOUT = int(os.getenv("LANGFUSE_TIMEOUT", 10))
32
+
33
+ # Define Langfuse sample rate.
34
+ LANGFUSE_SAMPLE_RATE = float(os.getenv("LANGFUSE_SAMPLE_RATE", 1.0))
35
+
36
+ # Define if Langfuse is disabled.
37
+ LANGFUSE_DISABLED = os.getenv("LANGFUSE_DISABLED", "false").lower() == "true" or LANGFUSE_ENVIRONMENT == "local"
38
+ LANGFUSE_FORCE_RUN = os.getenv("LANGFUSE_FORCE_RUN", "false").lower() == "true"
39
+
40
+
41
+ class LangfuseClientWrapper:
42
+ """
43
+ Langfuse client wrapper. Defines Langfuse client configuration and initializes Langfuse client.
44
+ """
45
+
46
+ def __init__(self,
47
+ public_key: str = LANGFUSE_PUBLIC_KEY,
48
+ secret_key: str = LANGFUSE_SECRET_KEY,
49
+ host: str = LANGFUSE_HOST,
50
+ environment: str = LANGFUSE_ENVIRONMENT,
51
+ release: str = LANGFUSE_RELEASE,
52
+ debug: bool = LANGFUSE_DEBUG,
53
+ timeout: int = LANGFUSE_TIMEOUT,
54
+ sample_rate: float = LANGFUSE_SAMPLE_RATE,
55
+ disable: bool = LANGFUSE_DISABLED,
56
+ force_run: bool = LANGFUSE_FORCE_RUN) -> None:
57
+ """
58
+ Initialize Langfuse client.
59
+
60
+ Args:
61
+ public_key (str): Langfuse public key.
62
+ secret_key (str): Langfuse secret key.
63
+ host (str): Langfuse host.
64
+ release (str): Langfuse release.
65
+ timeout (int): Langfuse timeout.
66
+ sample_rate (float): Langfuse sample rate.
67
+ """
68
+
69
+ self.metadata = None
70
+ self.public_key = public_key
71
+ self.secret_key = secret_key
72
+ self.host = host
73
+ self.environment = environment
74
+ self.release = release
75
+ self.debug = debug
76
+ self.timeout = timeout
77
+ self.sample_rate = sample_rate
78
+ self.disable = disable
79
+ self.force_run = force_run
80
+
81
+ self.client = None
82
+ self.trace = None
83
+ self.metadata = None
84
+ self.tags = None
85
+
86
+ # Check if Langfuse is disabled.
87
+ if LANGFUSE_DISABLED and not LANGFUSE_FORCE_RUN:
88
+ logger.info("Langfuse is disabled.")
89
+ return
90
+
91
+ logger.info("Langfuse enabled")
92
+ logger.debug(f"LANGFUSE_PUBLIC_KEY: {LANGFUSE_PUBLIC_KEY}")
93
+ logger.debug(f"LANGFUSE_SECRET_KEY: {'*' * len(LANGFUSE_SECRET_KEY)}")
94
+ logger.debug(f"LANGFUSE_HOST: {LANGFUSE_HOST}")
95
+ logger.debug(f"LANGFUSE_ENVIRONMENT: {LANGFUSE_ENVIRONMENT}")
96
+ logger.debug(f"LANGFUSE_RELEASE: {LANGFUSE_RELEASE}")
97
+ logger.debug(f"LANGFUSE_DEBUG: {LANGFUSE_DEBUG}")
98
+ logger.debug(f"LANGFUSE_TIMEOUT: {LANGFUSE_TIMEOUT}")
99
+ logger.debug(f"LANGFUSE_SAMPLE_RATE: {LANGFUSE_SAMPLE_RATE * 100}%")
100
+
101
+ self.client = Langfuse(
102
+ public_key=public_key,
103
+ secret_key=secret_key,
104
+ host=host,
105
+ release=release,
106
+ debug=debug,
107
+ timeout=timeout,
108
+ sample_rate=sample_rate
109
+ )
110
+
111
+ def setup_trace(self,
112
+ name: str,
113
+ input: typing.Optional[typing.Any] = None,
114
+ tags: typing.Optional[typing.List] = None,
115
+ metadata: typing.Optional[typing.Dict] = None,
116
+ user_id: str = None,
117
+ session_id: str = None) -> None:
118
+ """
119
+ Setup trace. If Langfuse is disabled, nothing will be done.
120
+ Args:
121
+ name (str): Trace name.
122
+ input (dict): Trace input.
123
+ tags (dict): Trace tags.
124
+ metadata (dict): Trace metadata.
125
+ user_id (str): User ID.
126
+ session_id (str): Session ID.
127
+ """
128
+
129
+ if self.client is None:
130
+ logger.debug("Langfuse is disabled.")
131
+ return
132
+
133
+ self.set_metadata(metadata)
134
+ self.set_tags(tags)
135
+
136
+ try:
137
+ self.trace = self.client.trace(
138
+ name=name,
139
+ input=input,
140
+ metadata=self.metadata,
141
+ tags=self.tags,
142
+ user_id=user_id,
143
+ session_id=session_id
144
+ )
145
+ except Exception as e:
146
+ logger.error(f'Something went wrong while processing Langfuse trace {self.trace.id}: {str(e)}')
147
+
148
+ logger.info(f"Langfuse trace configured with ID: {self.trace.id}")
149
+
150
+ def start_span(self,
151
+ name: str,
152
+ input: typing.Optional[typing.Any] = None) -> typing.Optional[StatefulSpanClient]:
153
+ """
154
+ Create span. If Langfuse is disabled, nothing will be done.
155
+
156
+ Args:
157
+ name (str): Span name.
158
+ input (dict): Span input.
159
+ """
160
+
161
+ if self.client is None:
162
+ logger.debug("Langfuse is disabled.")
163
+ return None
164
+
165
+ return self.trace.span(name=name, input=input)
166
+
167
+ def end_span_stream(self,
168
+ span: typing.Optional[StatefulSpanClient] = None) -> None:
169
+ """
170
+ End span. If Langfuse is disabled, nothing will happen.
171
+ Args:
172
+ span (Any): Span object.
173
+ """
174
+
175
+ if self.client is None:
176
+ logger.debug("Langfuse is disabled.")
177
+ return
178
+
179
+ span.end()
180
+ self.trace.update()
181
+
182
+ def end_span(self,
183
+ span: typing.Optional[StatefulSpanClient] = None,
184
+ output: typing.Optional[typing.Any] = None) -> None:
185
+ """
186
+ End trace. If Langfuse is disabled, nothing will be done.
187
+
188
+ Args:
189
+ span (Any): Span object.
190
+ output (Any): Span output.
191
+ """
192
+
193
+ if self.client is None:
194
+ logger.debug("Langfuse is disabled.")
195
+ return
196
+
197
+ if span is None:
198
+ logger.debug("Langfuse span is not created.")
199
+ return
200
+
201
+ span.end(output=output)
202
+ self.trace.update(output=output)
203
+
204
+ metadata = self.metadata or {}
205
+
206
+ try:
207
+ # Ensure all batched traces are sent before fetching.
208
+ self.client.flush()
209
+ metadata['tool_usage'] = self._get_tool_usage()
210
+ self.trace.update(metadata=metadata)
211
+
212
+ except Exception as e:
213
+ logger.error(f'Something went wrong while processing Langfuse trace {self.trace.id}: {str(e)}')
214
+
215
+ def get_langchain_handler(self) -> typing.Optional[CallbackHandler]:
216
+ """
217
+ Get Langchain handler. If Langfuse is disabled, returns None.
218
+ """
219
+
220
+ if self.client is None:
221
+ logger.debug("Langfuse is disabled.")
222
+ return None
223
+
224
+ return self.trace.get_langchain_handler()
225
+
226
+ def set_metadata(self, custom_metadata: dict = None) -> None:
227
+ """
228
+ Get default metadata.
229
+ """
230
+ self.metadata = custom_metadata or {}
231
+
232
+ self.metadata["environment"] = self.environment
233
+ self.metadata["release"] = self.release
234
+
235
+ def set_tags(self, custom_tags: typing.Optional[typing.List] = None) -> None:
236
+ """
237
+ Get default tags.
238
+ """
239
+ self.tags = custom_tags or []
240
+
241
+ self.tags.append(self.environment)
242
+ self.tags.append(self.release)
243
+
244
+ def _get_tool_usage(self) -> typing.Dict:
245
+ """ Retrieves tool usage information from a langfuse trace.
246
+ Note: assumes trace marks an action with string `AgentAction` """
247
+
248
+ tool_usage = {}
249
+
250
+ try:
251
+ fetched_trace = self.client.get_trace(self.trace.id)
252
+ steps = [s.name for s in fetched_trace.observations]
253
+ for step in steps:
254
+ if 'AgentAction' in step:
255
+ tool_name = step.split('-')[1]
256
+ if tool_name not in tool_usage:
257
+ tool_usage[tool_name] = 0
258
+ tool_usage[tool_name] += 1
259
+ except TraceNotFoundError:
260
+ logger.warning(f'Langfuse trace {self.trace.id} not found')
261
+ except Exception as e:
262
+ logger.error(f'Something went wrong while processing Langfuse trace {self.trace.id}: {str(e)}')
263
+
264
+ return tool_usage
mindsdb/utilities/log.py CHANGED
@@ -29,6 +29,23 @@ class ColorFormatter(logging.Formatter):
29
29
  return log_fmt.format(record)
30
30
 
31
31
 
32
+ def get_console_handler_config_level() -> int:
33
+ console_handler_config = app_config['logging']['handlers']['console']
34
+ return getattr(logging, console_handler_config["level"])
35
+
36
+
37
+ def get_file_handler_config_level() -> int:
38
+ file_handler_config = app_config['logging']['handlers']['file']
39
+ return getattr(logging, file_handler_config["level"])
40
+
41
+
42
+ def get_mindsdb_log_level() -> int:
43
+ console_handler_config_level = get_console_handler_config_level()
44
+ file_handler_config_level = get_file_handler_config_level()
45
+
46
+ return min(console_handler_config_level, file_handler_config_level)
47
+
48
+
32
49
  def configure_logging():
33
50
  handlers_config = {}
34
51
  console_handler_config = app_config['logging']['handlers']['console']
@@ -39,6 +56,7 @@ def configure_logging():
39
56
  "formatter": "f",
40
57
  "level": console_handler_config_level
41
58
  }
59
+
42
60
  file_handler_config = app_config['logging']['handlers']['file']
43
61
  file_handler_config_level = getattr(logging, file_handler_config["level"])
44
62
  if file_handler_config['enabled'] is True:
@@ -51,7 +69,7 @@ def configure_logging():
51
69
  "backupCount": file_handler_config["backupCount"]
52
70
  }
53
71
 
54
- mindsdb_log_level = min(console_handler_config_level, file_handler_config_level)
72
+ mindsdb_log_level = get_mindsdb_log_level()
55
73
 
56
74
  logging_config = dict(
57
75
  version=1,
@@ -65,7 +83,7 @@ def configure_logging():
65
83
  loggers={
66
84
  "": { # root logger
67
85
  "handlers": list(handlers_config.keys()),
68
- "level": logging.WARNING,
86
+ "level": mindsdb_log_level,
69
87
  },
70
88
  "__main__": {
71
89
  "level": mindsdb_log_level,
@@ -0,0 +1,206 @@
1
+ import os
2
+ import typing
3
+
4
+ from opentelemetry import trace # noqa: F401
5
+ from opentelemetry import metrics # noqa: F401
6
+ from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter as OTLPLogExporterGRPC
7
+ from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter as OTLPLogExporterHTTP
8
+ from opentelemetry.sdk._logs._internal.export import LogExporter
9
+ from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter as OTLPMetricExporterGRPC
10
+ from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as OTLPMetricExporterHTTP
11
+ from opentelemetry.sdk.metrics.export import MetricExporter, ConsoleMetricExporter
12
+ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as OTLPSpanExporterGRPC
13
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as OTLPSpanExporterHTTP
14
+ from opentelemetry.sdk.trace.export import SpanExporter, ConsoleSpanExporter
15
+ from opentelemetry.sdk.resources import Resource
16
+ from opentelemetry.sdk.trace.sampling import TraceIdRatioBased
17
+
18
+ from mindsdb.utilities.otel.logger import setup_logger
19
+ from mindsdb.utilities.otel.meter import setup_meter
20
+ from mindsdb.utilities.otel.tracer import setup_tracer
21
+ from mindsdb.utilities.utils import parse_csv_attributes
22
+ from mindsdb.utilities import log
23
+
24
+ logger = log.getLogger(__name__)
25
+
26
+ # Check OpenTelemetry exporter type
27
+ OTEL_EXPORTER_TYPE = os.getenv("OTEL_EXPORTER_TYPE", "console") # console or otlp
28
+
29
+ # Define OpenTelemetry exporter protocol
30
+ OTEL_EXPORTER_PROTOCOL = os.getenv("OTEL_EXPORTER_PROTOCOL", "grpc") # grpc or http
31
+
32
+ # Define OTLP endpoint. If not set, the default OTLP endpoint will be used
33
+ OTEL_OTLP_ENDPOINT = os.getenv("OTEL_OTLP_ENDPOINT", "http://localhost:4317")
34
+
35
+ # Define OTLP logging endpoint. If not set, the default OTLP logging endpoint will be used
36
+ OTEL_OTLP_LOGGING_ENDPOINT = os.getenv("OTEL_OTLP_LOGGING_ENDPOINT", OTEL_OTLP_ENDPOINT)
37
+
38
+ # Define OTLP tracing endpoint. If not set, the default OTLP tracing endpoint will be used
39
+ OTEL_OTLP_TRACING_ENDPOINT = os.getenv("OTEL_OTLP_TRACING_ENDPOINT", OTEL_OTLP_ENDPOINT)
40
+
41
+ # Define OTLP metrics endpoint. If not set, the default OTLP metrics endpoint will be used
42
+ OTEL_OTLP_METRICS_ENDPOINT = os.getenv("OTEL_OTLP_METRICS_ENDPOINT", OTEL_OTLP_ENDPOINT)
43
+
44
+ # Define service name
45
+ OTEL_SERVICE_NAME = os.getenv("OTEL_SERVICE_NAME", "mindsdb")
46
+
47
+ # Define service instace ID
48
+ OTEL_SERVICE_INSTANCE_ID = os.getenv("OTEL_SERVICE_INSTANCE_ID", "mindsdb-instance")
49
+
50
+ # The name of the environment we"re on, by default local for development, this is set differently per-env in our Helm
51
+ # chart values files
52
+ OTEL_SERVICE_ENVIRONMENT = os.getenv("OTEL_SERVICE_ENVIRONMENT", "local").lower()
53
+
54
+ # Define service release
55
+ OTEL_SERVICE_RELEASE = os.getenv("OTEL_SERVICE_RELEASE", "local").lower()
56
+
57
+ # Define how often to capture traces
58
+ OTEL_TRACE_SAMPLE_RATE = float(os.getenv("OTEL_TRACE_SAMPLE_RATE", "1.0"))
59
+
60
+ # Define extra attributes
61
+ OTEL_EXTRA_ATTRIBUTES = os.getenv("OTEL_EXTRA_ATTRIBUTES", "")
62
+
63
+ # By default, we have Open Telemetry SDK enabled on all envs, except for local which is disabled by default.
64
+ OTEL_SDK_DISABLED = (os.getenv("OTEL_SDK_DISABLED", "false").lower() == "true"
65
+ or os.getenv("OTEL_SERVICE_ENVIRONMENT", "local").lower() == "local")
66
+
67
+ # Define if OpenTelemetry logging is disabled. By default, it is disabled.
68
+ OTEL_LOGGING_DISABLED = os.getenv("OTEL_LOGGING_DISABLED", "true").lower() == "true"
69
+
70
+ # Define if OpenTelemetry tracing is disabled. By default, it is enabled.
71
+ OTEL_TRACING_DISABLED = os.getenv("OTEL_TRACING_DISABLED", "false").lower() == "true"
72
+
73
+ # Define if OpenTelemetry metrics is disabled. By default, it is disabled.
74
+ OTEL_METRICS_DISABLED = os.getenv("OTEL_METRICS_DISABLED", "true").lower() == "true"
75
+
76
+ # If you want to enable Open Telemetry on local for some reason please set OTEL_SDK_FORCE_RUN to true
77
+ OTEL_SDK_FORCE_RUN = os.getenv("OTEL_SDK_FORCE_RUN", "false").lower() == "true"
78
+
79
+
80
+ def get_otel_attributes() -> dict:
81
+ """
82
+ Get OpenTelemetry attributes
83
+
84
+ Returns:
85
+ dict: OpenTelemetry attributes
86
+ """
87
+
88
+ base_attributes = {
89
+ "service.name": OTEL_SERVICE_NAME,
90
+ "service.instance.id": OTEL_SERVICE_INSTANCE_ID,
91
+ "environment": OTEL_SERVICE_ENVIRONMENT,
92
+ "release": OTEL_SERVICE_RELEASE,
93
+ }
94
+
95
+ extra_attributes = {}
96
+ try:
97
+ extra_attributes = parse_csv_attributes(OTEL_EXTRA_ATTRIBUTES)
98
+ except Exception as e:
99
+ logger.error(f"Failed to parse OTEL_EXTRA_ATTRIBUTES: {e}")
100
+
101
+ attributes = {**extra_attributes, **base_attributes} # Base attributes take precedence over extra attributes
102
+
103
+ return attributes
104
+
105
+
106
+ def get_logging_exporter() -> typing.Optional[LogExporter]:
107
+ """
108
+ Get OpenTelemetry logging exporter.
109
+
110
+ Returns:
111
+ OTLPLogExporter: OpenTelemetry logging exporter
112
+ """
113
+
114
+ if OTEL_EXPORTER_TYPE == "otlp":
115
+
116
+ if OTEL_EXPORTER_PROTOCOL == "grpc":
117
+ return OTLPLogExporterGRPC(
118
+ endpoint=OTEL_OTLP_LOGGING_ENDPOINT,
119
+ insecure=True
120
+ )
121
+
122
+ elif OTEL_EXPORTER_PROTOCOL == "http":
123
+ return OTLPLogExporterHTTP(
124
+ endpoint=OTEL_OTLP_LOGGING_ENDPOINT
125
+ )
126
+
127
+ return None
128
+
129
+
130
+ def get_span_exporter() -> SpanExporter:
131
+ """
132
+ Get OpenTelemetry span exporter
133
+
134
+ Returns:
135
+ OTLPSpanExporter: OpenTelemetry span exporter
136
+ """
137
+
138
+ if OTEL_EXPORTER_TYPE == "otlp":
139
+
140
+ if OTEL_EXPORTER_PROTOCOL == "grpc":
141
+ return OTLPSpanExporterGRPC(
142
+ endpoint=OTEL_OTLP_TRACING_ENDPOINT,
143
+ insecure=True
144
+ )
145
+
146
+ elif OTEL_EXPORTER_PROTOCOL == "http":
147
+ return OTLPSpanExporterHTTP(
148
+ endpoint=OTEL_OTLP_TRACING_ENDPOINT
149
+ )
150
+
151
+ return ConsoleSpanExporter()
152
+
153
+
154
+ def get_metrics_exporter() -> typing.Optional[MetricExporter]:
155
+ """
156
+ Get OpenTelemetry metrics exporter
157
+
158
+ Returns:
159
+ OTLPLogExporter: OpenTelemetry metrics exporter
160
+ """
161
+
162
+ if OTEL_EXPORTER_TYPE == "otlp":
163
+
164
+ if OTEL_EXPORTER_PROTOCOL == "grpc":
165
+ return OTLPMetricExporterGRPC(
166
+ endpoint=OTEL_OTLP_METRICS_ENDPOINT,
167
+ insecure=True
168
+ )
169
+
170
+ elif OTEL_EXPORTER_PROTOCOL == "http":
171
+ return OTLPMetricExporterHTTP(
172
+ endpoint=OTEL_OTLP_METRICS_ENDPOINT
173
+ )
174
+
175
+ return ConsoleMetricExporter()
176
+
177
+
178
+ if not OTEL_SDK_DISABLED or OTEL_SDK_FORCE_RUN:
179
+ logger.info("OpenTelemetry enabled")
180
+ logger.info(f"OpenTelemetry exporter type: {OTEL_EXPORTER_TYPE}")
181
+ logger.info(f"OpenTelemetry service name: {OTEL_SERVICE_NAME}")
182
+ logger.info(f"OpenTelemetry service environment: {OTEL_SERVICE_ENVIRONMENT}")
183
+ logger.info(f"OpenTelemetry service release: {OTEL_SERVICE_RELEASE}")
184
+ logger.info(f"OpenTelemetry trace sample rate: {OTEL_TRACE_SAMPLE_RATE}")
185
+ logger.info(f"OpenTelemetry extra attributes: {OTEL_EXTRA_ATTRIBUTES}")
186
+
187
+ # Define OpenTelemetry resources (e.g., service name)
188
+ attributes = get_otel_attributes()
189
+
190
+ # Define OpenTelemetry sampler
191
+ sampler = TraceIdRatioBased(OTEL_TRACE_SAMPLE_RATE)
192
+
193
+ # Define OpenTelemetry resources (e.g., service name)
194
+ resource = Resource(attributes=attributes)
195
+
196
+ if not OTEL_LOGGING_DISABLED:
197
+ logger.info("OpenTelemetry Logging is enabled")
198
+ setup_logger(resource, get_logging_exporter())
199
+
200
+ if not OTEL_TRACING_DISABLED:
201
+ logger.info("OpenTelemetry Tracing is enabled")
202
+ setup_tracer(resource, sampler, get_span_exporter())
203
+
204
+ if not OTEL_METRICS_DISABLED:
205
+ logger.info("OpenTelemetry Metrics is enabled")
206
+ setup_meter(resource, get_metrics_exporter())
@@ -0,0 +1,25 @@
1
+ import logging
2
+
3
+ from opentelemetry._logs import set_logger_provider
4
+ from opentelemetry.sdk._logs._internal.export import LogExporter
5
+ from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
6
+ from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
7
+ from opentelemetry.sdk.resources import Resource
8
+
9
+ from mindsdb.utilities.log import get_mindsdb_log_level
10
+
11
+
12
+ def setup_logger(resource: Resource, exporter: LogExporter) -> None:
13
+ """
14
+ Setup OpenTelemetry logging
15
+ """
16
+ mindsdb_log_level = get_mindsdb_log_level()
17
+
18
+ logger_provider = LoggerProvider(resource=resource)
19
+ set_logger_provider(logger_provider)
20
+
21
+ logger_provider.add_log_record_processor(BatchLogRecordProcessor(exporter))
22
+ handler = LoggingHandler(level=mindsdb_log_level, logger_provider=logger_provider)
23
+
24
+ # Attach OTLP handler to root logger
25
+ logging.getLogger().addHandler(handler)
@@ -0,0 +1,19 @@
1
+ from opentelemetry import metrics
2
+ from opentelemetry.sdk.metrics import MeterProvider
3
+ from opentelemetry.sdk.resources import Resource
4
+ from opentelemetry.sdk.metrics.export import (
5
+ MetricExporter,
6
+ PeriodicExportingMetricReader,
7
+ )
8
+
9
+
10
+ def setup_meter(resource: Resource, exporter: MetricExporter) -> None:
11
+ """
12
+ Setup OpenTelemetry metrics
13
+ """
14
+
15
+ metric_reader = PeriodicExportingMetricReader(exporter=exporter)
16
+ provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
17
+
18
+ # Sets the global default meter provider
19
+ metrics.set_meter_provider(provider)
@@ -0,0 +1,25 @@
1
+ from mindsdb.utilities.otel import metrics, OTEL_SERVICE_NAME
2
+
3
+ _query_request_counter = None
4
+
5
+
6
+ def get_query_request_counter():
7
+ """
8
+ Get the query request counter
9
+ """
10
+ global _query_request_counter
11
+
12
+ # Create the counter if it does not exist
13
+ if _query_request_counter is None:
14
+ meter_name = f"{OTEL_SERVICE_NAME}.query_service_meter"
15
+
16
+ # Get the meter from the main metrics object
17
+ meter = metrics.get_meter(meter_name)
18
+
19
+ _query_request_counter = meter.create_counter(
20
+ name="query_request_count",
21
+ description="Counts the number of times the SQL query is called",
22
+ unit="1",
23
+ )
24
+
25
+ return _query_request_counter
@@ -0,0 +1,16 @@
1
+ from opentelemetry import trace
2
+ from opentelemetry.sdk.resources import Resource
3
+ from opentelemetry.sdk.trace import TracerProvider
4
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor, SpanExporter
5
+ from opentelemetry.sdk.trace.sampling import Sampler
6
+
7
+
8
+ def setup_tracer(resource: Resource, sampler: Sampler, exporter: SpanExporter) -> None:
9
+ """
10
+ Setup OpenTelemetry tracing
11
+ """
12
+ # Set the tracer provider with the custom resource
13
+ trace.set_tracer_provider(TracerProvider(resource=resource, sampler=sampler))
14
+
15
+ # Replace the default span processor with the custom one
16
+ trace.get_tracer_provider().add_span_processor(BatchSpanProcessor(exporter))
@@ -0,0 +1,52 @@
1
+ import os
2
+ from typing import Iterable, Callable
3
+ import pandas as pd
4
+
5
+ from mindsdb.utilities.config import Config
6
+ from mindsdb.utilities.context_executor import execute_in_threads
7
+
8
+
9
+ def process_dataframe_in_partitions(df: pd.DataFrame, callback: Callable, partition_size: int) -> Iterable:
10
+ """
11
+ Splits dataframe into partitions and apply callback on each partition
12
+
13
+ :param df: input dataframe
14
+ :param callback: function to apply on each partition
15
+ :param partition_size: size of each partition
16
+ :return: yield results
17
+ """
18
+
19
+ # tasks
20
+ def split_data_f(df):
21
+ chunk = 0
22
+ while chunk * partition_size < len(df):
23
+ # create results with partition
24
+ df1 = df.iloc[chunk * partition_size: (chunk + 1) * partition_size]
25
+ chunk += 1
26
+ yield [df1]
27
+
28
+ tasks = split_data_f(df)
29
+
30
+ # workers count
31
+ is_cloud = Config().is_cloud
32
+ if is_cloud:
33
+ max_threads = int(os.getenv('MINDSDB_MAX_PARTITIONING_THREADS', 10))
34
+ else:
35
+ max_threads = os.cpu_count() - 2
36
+
37
+ # don't exceed chunk_count
38
+ chunk_count = int(len(df) / partition_size)
39
+ max_threads = min(max_threads, chunk_count)
40
+
41
+ if max_threads < 1:
42
+ max_threads = 1
43
+
44
+ if max_threads == 1:
45
+ # don't spawn threads
46
+
47
+ for task in tasks:
48
+ yield callback(*task)
49
+
50
+ else:
51
+ for result in execute_in_threads(callback, tasks, thread_count=max_threads):
52
+ yield result
@@ -14,6 +14,10 @@ from sqlalchemy.sql import functions as sa_fnc
14
14
  from mindsdb_sql_parser import ast
15
15
 
16
16
 
17
+ RESERVED_WORDS = {
18
+ "collation"
19
+ }
20
+
17
21
  sa_type_names = [
18
22
  key for key, val in sa.types.__dict__.items() if hasattr(val, '__module__')
19
23
  and val.__module__ in ('sqlalchemy.sql.sqltypes', 'sqlalchemy.sql.type_api')
@@ -98,7 +102,7 @@ class SqlalchemyRender:
98
102
  # in that case use origin string
99
103
 
100
104
  part_lower = str(sa.column(i.lower()).compile(dialect=self.dialect))
101
- if part.lower() != part_lower:
105
+ if part.lower() != part_lower and i.lower() not in RESERVED_WORDS:
102
106
  part = i
103
107
 
104
108
  parts2.append(part)
@@ -506,6 +510,8 @@ class SqlalchemyRender:
506
510
  condition = self.to_expression(item['condition'])
507
511
 
508
512
  join_type = item['join_type']
513
+ if 'ASOF' in join_type:
514
+ raise NotImplementedError(f'Unsupported join type: {join_type}')
509
515
  method = 'join'
510
516
  is_full = False
511
517
  if join_type == 'LEFT JOIN':