MindsDB 25.1.2.1__py3-none-any.whl → 25.1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.3.0.dist-info}/METADATA +251 -250
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.3.0.dist-info}/RECORD +33 -27
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +5 -3
- mindsdb/api/executor/sql_query/result_set.py +36 -21
- mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +1 -1
- mindsdb/api/executor/sql_query/steps/join_step.py +4 -4
- mindsdb/api/executor/utilities/sql.py +2 -10
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +7 -0
- mindsdb/integrations/handlers/file_handler/file_handler.py +1 -1
- mindsdb/integrations/handlers/langchain_embedding_handler/fastapi_embeddings.py +82 -0
- mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +8 -1
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +48 -16
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +3 -3
- mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +76 -27
- mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py +18 -1
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +6 -2
- mindsdb/integrations/utilities/rag/settings.py +2 -0
- mindsdb/integrations/utilities/sql_utils.py +1 -1
- mindsdb/interfaces/knowledge_base/controller.py +33 -14
- mindsdb/interfaces/skills/retrieval_tool.py +10 -3
- mindsdb/utilities/cache.py +7 -4
- mindsdb/utilities/context.py +9 -0
- mindsdb/utilities/log.py +20 -2
- mindsdb/utilities/otel/__init__.py +206 -0
- mindsdb/utilities/otel/logger.py +25 -0
- mindsdb/utilities/otel/meter.py +19 -0
- mindsdb/utilities/otel/metric_handlers/__init__.py +25 -0
- mindsdb/utilities/otel/tracer.py +16 -0
- mindsdb/utilities/utils.py +34 -0
- mindsdb/utilities/otel.py +0 -72
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.3.0.dist-info}/LICENSE +0 -0
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.3.0.dist-info}/WHEEL +0 -0
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.3.0.dist-info}/top_level.txt +0 -0
|
@@ -161,7 +161,7 @@ class PostgresHandler(DatabaseHandler):
|
|
|
161
161
|
'float8': 'float64'
|
|
162
162
|
}
|
|
163
163
|
columns = df.columns
|
|
164
|
-
df =
|
|
164
|
+
df.columns = list(range(len(columns)))
|
|
165
165
|
for column_index, column_name in enumerate(df.columns):
|
|
166
166
|
col = df[column_name]
|
|
167
167
|
if str(col.dtype) == 'object':
|
|
@@ -172,7 +172,7 @@ class PostgresHandler(DatabaseHandler):
|
|
|
172
172
|
df[column_name] = col.astype(types_map[pg_type.name])
|
|
173
173
|
except ValueError as e:
|
|
174
174
|
logger.error(f'Error casting column {col.name} to {types_map[pg_type.name]}: {e}')
|
|
175
|
-
|
|
175
|
+
df.columns = columns
|
|
176
176
|
|
|
177
177
|
@profiler.profile()
|
|
178
178
|
def native_query(self, query: str, params=None) -> Response:
|
|
@@ -202,7 +202,7 @@ class PostgresHandler(DatabaseHandler):
|
|
|
202
202
|
result,
|
|
203
203
|
columns=[x.name for x in cur.description]
|
|
204
204
|
)
|
|
205
|
-
|
|
205
|
+
self._cast_dtypes(df, cur.description)
|
|
206
206
|
response = Response(
|
|
207
207
|
RESPONSE_TYPE.TABLE,
|
|
208
208
|
df
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
from typing import Any, List, Optional, Dict
|
|
1
|
+
from typing import Any, List, Union, Optional, Dict
|
|
2
2
|
|
|
3
3
|
from langchain_community.vectorstores import PGVector
|
|
4
4
|
from langchain_community.vectorstores.pgvector import Base
|
|
5
5
|
|
|
6
|
-
from pgvector.sqlalchemy import Vector
|
|
6
|
+
from pgvector.sqlalchemy import SPARSEVEC, Vector
|
|
7
7
|
import sqlalchemy as sa
|
|
8
8
|
from sqlalchemy.dialects.postgresql import JSON
|
|
9
9
|
|
|
@@ -15,9 +15,17 @@ _generated_sa_tables = {}
|
|
|
15
15
|
|
|
16
16
|
class PGVectorMDB(PGVector):
|
|
17
17
|
"""
|
|
18
|
-
|
|
18
|
+
langchain_community.vectorstores.PGVector adapted for mindsdb vector store table structure
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
+
def __init__(self, *args, is_sparse: bool = False, vector_size: Optional[int] = None, **kwargs):
|
|
22
|
+
# todo get is_sparse and vector_size from kb vector table
|
|
23
|
+
self.is_sparse = is_sparse
|
|
24
|
+
if is_sparse and vector_size is None:
|
|
25
|
+
raise ValueError("vector_size is required when is_sparse=True")
|
|
26
|
+
self.vector_size = vector_size
|
|
27
|
+
super().__init__(*args, **kwargs)
|
|
28
|
+
|
|
21
29
|
def __post_init__(
|
|
22
30
|
self,
|
|
23
31
|
) -> None:
|
|
@@ -32,53 +40,94 @@ class PGVectorMDB(PGVector):
|
|
|
32
40
|
__tablename__ = collection_name
|
|
33
41
|
|
|
34
42
|
id = sa.Column(sa.Integer, primary_key=True)
|
|
35
|
-
embedding
|
|
36
|
-
|
|
37
|
-
|
|
43
|
+
embedding = sa.Column(
|
|
44
|
+
"embeddings",
|
|
45
|
+
SPARSEVEC() if self.is_sparse else Vector() if self.vector_size is None else
|
|
46
|
+
SPARSEVEC(self.vector_size) if self.is_sparse else Vector(self.vector_size)
|
|
47
|
+
)
|
|
48
|
+
document = sa.Column("content", sa.String, nullable=True)
|
|
49
|
+
cmetadata = sa.Column("metadata", JSON, nullable=True)
|
|
38
50
|
|
|
39
51
|
_generated_sa_tables[collection_name] = EmbeddingStore
|
|
40
52
|
|
|
41
53
|
self.EmbeddingStore = _generated_sa_tables[collection_name]
|
|
42
54
|
|
|
43
55
|
def __query_collection(
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
56
|
+
self,
|
|
57
|
+
embedding: Union[List[float], Dict[int, float], str],
|
|
58
|
+
k: int = 4,
|
|
59
|
+
filter: Optional[Dict[str, str]] = None,
|
|
48
60
|
) -> List[Any]:
|
|
49
61
|
"""Query the collection."""
|
|
50
62
|
with Session(self._bind) as session:
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
63
|
+
if self.is_sparse:
|
|
64
|
+
# Sparse vectors: expect string in format "{key:value,...}/size" or dictionary
|
|
65
|
+
if isinstance(embedding, dict):
|
|
66
|
+
from pgvector.utils import SparseVector
|
|
67
|
+
embedding = SparseVector(embedding, self.vector_size)
|
|
68
|
+
embedding_str = embedding.to_text()
|
|
69
|
+
elif isinstance(embedding, str):
|
|
70
|
+
# Use string as is - it should already be in the correct format
|
|
71
|
+
embedding_str = embedding
|
|
72
|
+
# Use inner product for sparse vectors
|
|
73
|
+
distance_op = "<#>"
|
|
74
|
+
# For inner product, larger values are better matches
|
|
75
|
+
order_direction = "DESC"
|
|
76
|
+
else:
|
|
77
|
+
# Dense vectors: expect string in JSON array format or list of floats
|
|
78
|
+
if isinstance(embedding, list):
|
|
79
|
+
embedding_str = f"[{','.join(str(x) for x in embedding)}]"
|
|
80
|
+
elif isinstance(embedding, str):
|
|
81
|
+
embedding_str = embedding
|
|
82
|
+
# Use cosine similarity for dense vectors
|
|
83
|
+
distance_op = "<=>"
|
|
84
|
+
# For cosine similarity, smaller values are better matches
|
|
85
|
+
order_direction = "ASC"
|
|
86
|
+
|
|
87
|
+
# Use SQL directly for vector comparison
|
|
88
|
+
query = sa.text(
|
|
89
|
+
f"""
|
|
90
|
+
SELECT t.*, t.embeddings {distance_op} '{embedding_str}' as distance
|
|
91
|
+
FROM {self.collection_name} t
|
|
92
|
+
ORDER BY distance {order_direction}
|
|
93
|
+
LIMIT {k}
|
|
94
|
+
"""
|
|
60
95
|
)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
96
|
+
results = session.execute(query).all()
|
|
97
|
+
|
|
98
|
+
# Convert results to the expected format
|
|
99
|
+
formatted_results = []
|
|
100
|
+
for rec in results:
|
|
101
|
+
metadata = rec.metadata if bool(rec.metadata) else {0: 0}
|
|
102
|
+
embedding_store = self.EmbeddingStore()
|
|
103
|
+
embedding_store.document = rec.content
|
|
104
|
+
embedding_store.cmetadata = metadata
|
|
105
|
+
result = type(
|
|
106
|
+
'Result', (), {
|
|
107
|
+
'EmbeddingStore': embedding_store,
|
|
108
|
+
'distance': rec.distance
|
|
109
|
+
}
|
|
110
|
+
)
|
|
111
|
+
formatted_results.append(result)
|
|
64
112
|
|
|
65
|
-
|
|
113
|
+
return formatted_results
|
|
66
114
|
|
|
67
115
|
# aliases for different langchain versions
|
|
68
116
|
def _PGVector__query_collection(self, *args, **kwargs):
|
|
117
|
+
|
|
69
118
|
return self.__query_collection(*args, **kwargs)
|
|
70
119
|
|
|
71
120
|
def _query_collection(self, *args, **kwargs):
|
|
72
121
|
return self.__query_collection(*args, **kwargs)
|
|
73
122
|
|
|
74
123
|
def create_collection(self):
|
|
75
|
-
raise RuntimeError(
|
|
124
|
+
raise RuntimeError("Forbidden")
|
|
76
125
|
|
|
77
126
|
def delete_collection(self):
|
|
78
|
-
raise RuntimeError(
|
|
127
|
+
raise RuntimeError("Forbidden")
|
|
79
128
|
|
|
80
129
|
def delete(self, *args, **kwargs):
|
|
81
|
-
raise RuntimeError(
|
|
130
|
+
raise RuntimeError("Forbidden")
|
|
82
131
|
|
|
83
132
|
def add_embeddings(self, *args, **kwargs):
|
|
84
|
-
raise RuntimeError(
|
|
133
|
+
raise RuntimeError("Forbidden")
|
|
@@ -7,6 +7,7 @@ from pydantic import BaseModel
|
|
|
7
7
|
|
|
8
8
|
from mindsdb.integrations.utilities.rag.settings import VectorStoreType, VectorStoreConfig
|
|
9
9
|
from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.MDBVectorStore import MDBVectorStore
|
|
10
|
+
from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.pgvector import PGVectorMDB
|
|
10
11
|
from mindsdb.utilities import log
|
|
11
12
|
|
|
12
13
|
|
|
@@ -28,6 +29,20 @@ class VectorStoreLoader(BaseModel):
|
|
|
28
29
|
Loads the vector store based on the provided config and embeddings model
|
|
29
30
|
:return:
|
|
30
31
|
"""
|
|
32
|
+
if self.config.is_sparse is not None and self.config.vector_size is not None and self.config.kb_table is not None:
|
|
33
|
+
# Only use PGVector store for sparse vectors.
|
|
34
|
+
db_handler = self.config.kb_table.get_vector_db()
|
|
35
|
+
db_args = db_handler.connection_args
|
|
36
|
+
# Assume we are always using PGVector & psycopg2.
|
|
37
|
+
connection_str = f"postgresql+psycopg2://{db_args.get('user')}:{db_args.get('password')}@{db_args.get('host')}:{db_args.get('port')}/{db_args.get('dbname', db_args.get('database'))}"
|
|
38
|
+
|
|
39
|
+
return PGVectorMDB(
|
|
40
|
+
connection_string=connection_str,
|
|
41
|
+
collection_name=self.config.kb_table._kb.vector_database_table,
|
|
42
|
+
embedding_function=self.embedding_model,
|
|
43
|
+
is_sparse=self.config.is_sparse,
|
|
44
|
+
vector_size=self.config.vector_size
|
|
45
|
+
)
|
|
31
46
|
return MDBVectorStore(kb_table=self.config.kb_table)
|
|
32
47
|
|
|
33
48
|
|
|
@@ -56,5 +71,7 @@ class VectorStoreFactory:
|
|
|
56
71
|
return PGVectorMDB(
|
|
57
72
|
connection_string=settings.connection_string,
|
|
58
73
|
collection_name=settings.collection_name,
|
|
59
|
-
embedding_function=embedding_model
|
|
74
|
+
embedding_function=embedding_model,
|
|
75
|
+
is_sparse=settings.is_sparse,
|
|
76
|
+
vector_size=settings.vector_size
|
|
60
77
|
)
|
|
@@ -169,7 +169,7 @@ Output:
|
|
|
169
169
|
logger.info(f'SQL Retriever query {checked_sql_query} failed with error {error_msg}')
|
|
170
170
|
if num_retries >= self.num_retries:
|
|
171
171
|
logger.info('Using fallback retriever in SQL retriever.')
|
|
172
|
-
return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager)
|
|
172
|
+
return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager=run_manager)
|
|
173
173
|
query_to_retry = self._prepare_retry_query(checked_sql_query, error_msg, run_manager)
|
|
174
174
|
query_to_retry_with_embeddings = query_to_retry.format(embeddings=str(embedded_query))
|
|
175
175
|
# Handle LLM output that has the ```sql delimiter possibly.
|
|
@@ -185,4 +185,8 @@ Output:
|
|
|
185
185
|
document_row.get('content', ''),
|
|
186
186
|
metadata=document_row.get('metadata', {})
|
|
187
187
|
))
|
|
188
|
-
|
|
188
|
+
if retrieved_documents:
|
|
189
|
+
return retrieved_documents
|
|
190
|
+
# If the SQL query constructed did not return any documents, fallback.
|
|
191
|
+
logger.info('No documents returned from SQL retriever. using fallback retriever.')
|
|
192
|
+
return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager=run_manager)
|
|
@@ -290,6 +290,8 @@ class VectorStoreConfig(BaseModel):
|
|
|
290
290
|
collection_name: str = DEFAULT_COLLECTION_NAME
|
|
291
291
|
connection_string: str = None
|
|
292
292
|
kb_table: Any = None
|
|
293
|
+
is_sparse: bool = False
|
|
294
|
+
vector_size: Optional[int] = None
|
|
293
295
|
|
|
294
296
|
class Config:
|
|
295
297
|
arbitrary_types_allowed = True
|
|
@@ -642,11 +642,13 @@ class KnowledgeBaseController:
|
|
|
642
642
|
storage: Identifier,
|
|
643
643
|
params: dict,
|
|
644
644
|
preprocessing_config: Optional[dict] = None,
|
|
645
|
-
if_not_exists: bool = False
|
|
645
|
+
if_not_exists: bool = False
|
|
646
646
|
) -> db.KnowledgeBase:
|
|
647
647
|
"""
|
|
648
648
|
Add a new knowledge base to the database
|
|
649
649
|
:param preprocessing_config: Optional preprocessing configuration to validate and store
|
|
650
|
+
:param is_sparse: Whether to use sparse vectors for embeddings
|
|
651
|
+
:param vector_size: Optional size specification for vectors, required when is_sparse=True
|
|
650
652
|
"""
|
|
651
653
|
# Validate preprocessing config first if provided
|
|
652
654
|
if preprocessing_config is not None:
|
|
@@ -654,6 +656,12 @@ class KnowledgeBaseController:
|
|
|
654
656
|
params = params or {}
|
|
655
657
|
params['preprocessing'] = preprocessing_config
|
|
656
658
|
|
|
659
|
+
# Check if vector_size is provided when using sparse vectors
|
|
660
|
+
is_sparse = params.get('is_sparse')
|
|
661
|
+
vector_size = params.get('vector_size')
|
|
662
|
+
if is_sparse and vector_size is None:
|
|
663
|
+
raise ValueError("vector_size is required when is_sparse=True")
|
|
664
|
+
|
|
657
665
|
# get project id
|
|
658
666
|
project = self.session.database_controller.get_project(project_name)
|
|
659
667
|
project_id = project.id
|
|
@@ -693,7 +701,20 @@ class KnowledgeBaseController:
|
|
|
693
701
|
cloud_pg_vector = os.environ.get('KB_PGVECTOR_URL')
|
|
694
702
|
if cloud_pg_vector:
|
|
695
703
|
vector_table_name = name
|
|
696
|
-
|
|
704
|
+
# Add sparse vector support for pgvector
|
|
705
|
+
vector_db_params = {}
|
|
706
|
+
# Check both explicit parameter and model configuration
|
|
707
|
+
is_sparse = is_sparse or model_record.learn_args.get('using', {}).get('sparse')
|
|
708
|
+
if is_sparse:
|
|
709
|
+
vector_db_params['is_sparse'] = True
|
|
710
|
+
if vector_size is not None:
|
|
711
|
+
vector_db_params['vector_size'] = vector_size
|
|
712
|
+
vector_db_name = self._create_persistent_pgvector(vector_db_params)
|
|
713
|
+
|
|
714
|
+
# create table in vectordb before creating KB
|
|
715
|
+
self.session.datahub.get(vector_db_name).integration_handler.create_table(
|
|
716
|
+
vector_table_name
|
|
717
|
+
)
|
|
697
718
|
else:
|
|
698
719
|
# create chroma db with same name
|
|
699
720
|
vector_table_name = "default_collection"
|
|
@@ -707,15 +728,14 @@ class KnowledgeBaseController:
|
|
|
707
728
|
|
|
708
729
|
vector_database_id = self.session.integration_controller.get(vector_db_name)['id']
|
|
709
730
|
|
|
710
|
-
#
|
|
711
|
-
if
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
)
|
|
731
|
+
# Store sparse vector settings in params if specified
|
|
732
|
+
if is_sparse:
|
|
733
|
+
params = params or {}
|
|
734
|
+
params['vector_config'] = {
|
|
735
|
+
'is_sparse': is_sparse
|
|
736
|
+
}
|
|
737
|
+
if vector_size is not None:
|
|
738
|
+
params['vector_config']['vector_size'] = vector_size
|
|
719
739
|
|
|
720
740
|
kb = db.KnowledgeBase(
|
|
721
741
|
name=name,
|
|
@@ -729,16 +749,15 @@ class KnowledgeBaseController:
|
|
|
729
749
|
db.session.commit()
|
|
730
750
|
return kb
|
|
731
751
|
|
|
732
|
-
def _create_persistent_pgvector(self):
|
|
752
|
+
def _create_persistent_pgvector(self, params=None):
|
|
733
753
|
"""Create default vector database for knowledge base, if not specified"""
|
|
734
|
-
|
|
735
754
|
vector_store_name = "kb_pgvector_store"
|
|
736
755
|
|
|
737
756
|
# check if exists
|
|
738
757
|
if self.session.integration_controller.get(vector_store_name):
|
|
739
758
|
return vector_store_name
|
|
740
759
|
|
|
741
|
-
self.session.integration_controller.add(vector_store_name, 'pgvector', {})
|
|
760
|
+
self.session.integration_controller.add(vector_store_name, 'pgvector', params or {})
|
|
742
761
|
return vector_store_name
|
|
743
762
|
|
|
744
763
|
def _create_persistent_chroma(self, kb_name, engine="chromadb"):
|
|
@@ -43,10 +43,17 @@ def build_retrieval_tool(tool: dict, pred_args: dict, skill: db.Skills):
|
|
|
43
43
|
raise ValueError(f"Knowledge base not found: {kb_name}")
|
|
44
44
|
|
|
45
45
|
kb_table = executor.session.kb_controller.get_table(kb.name, kb.project_id)
|
|
46
|
+
vector_store_config = {
|
|
47
|
+
'kb_table': kb_table
|
|
48
|
+
}
|
|
49
|
+
is_sparse = tools_config.pop('is_sparse', None)
|
|
50
|
+
vector_size = tools_config.pop('vector_size', None)
|
|
51
|
+
if is_sparse is not None:
|
|
52
|
+
vector_store_config['is_sparse'] = is_sparse
|
|
53
|
+
if vector_size is not None:
|
|
54
|
+
vector_store_config['vector_size'] = vector_size
|
|
46
55
|
kb_params = {
|
|
47
|
-
'vector_store_config':
|
|
48
|
-
'kb_table': kb_table
|
|
49
|
-
}
|
|
56
|
+
'vector_store_config': vector_store_config
|
|
50
57
|
}
|
|
51
58
|
|
|
52
59
|
# Get embedding model from knowledge base table
|
mindsdb/utilities/cache.py
CHANGED
|
@@ -71,10 +71,13 @@ _CACHE_MAX_SIZE = 500
|
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
def dataframe_checksum(df: pd.DataFrame):
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
74
|
+
original_columns = df.columns
|
|
75
|
+
df.columns = list(range(len(df.columns)))
|
|
76
|
+
result = hashlib.sha256(
|
|
77
|
+
str(df.values).encode()
|
|
78
|
+
).hexdigest()
|
|
79
|
+
df.columns = original_columns
|
|
80
|
+
return result
|
|
78
81
|
|
|
79
82
|
|
|
80
83
|
def json_checksum(obj: t.Union[dict, list]):
|
mindsdb/utilities/context.py
CHANGED
|
@@ -52,6 +52,15 @@ class Context:
|
|
|
52
52
|
def load(self, storage: dict) -> None:
|
|
53
53
|
self._storage.set(storage)
|
|
54
54
|
|
|
55
|
+
def metadata(self, **kwargs) -> dict:
|
|
56
|
+
return {
|
|
57
|
+
'user_id': self.user_id or "",
|
|
58
|
+
'company_id': self.company_id or "",
|
|
59
|
+
'session_id': self.session_id,
|
|
60
|
+
'user_class': self.user_class,
|
|
61
|
+
**kwargs
|
|
62
|
+
}
|
|
63
|
+
|
|
55
64
|
|
|
56
65
|
_context_var = ContextVar('mindsdb.context')
|
|
57
66
|
context = Context(_context_var)
|
mindsdb/utilities/log.py
CHANGED
|
@@ -29,6 +29,23 @@ class ColorFormatter(logging.Formatter):
|
|
|
29
29
|
return log_fmt.format(record)
|
|
30
30
|
|
|
31
31
|
|
|
32
|
+
def get_console_handler_config_level() -> int:
|
|
33
|
+
console_handler_config = app_config['logging']['handlers']['console']
|
|
34
|
+
return getattr(logging, console_handler_config["level"])
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_file_handler_config_level() -> int:
|
|
38
|
+
file_handler_config = app_config['logging']['handlers']['file']
|
|
39
|
+
return getattr(logging, file_handler_config["level"])
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_mindsdb_log_level() -> int:
|
|
43
|
+
console_handler_config_level = get_console_handler_config_level()
|
|
44
|
+
file_handler_config_level = get_file_handler_config_level()
|
|
45
|
+
|
|
46
|
+
return min(console_handler_config_level, file_handler_config_level)
|
|
47
|
+
|
|
48
|
+
|
|
32
49
|
def configure_logging():
|
|
33
50
|
handlers_config = {}
|
|
34
51
|
console_handler_config = app_config['logging']['handlers']['console']
|
|
@@ -39,6 +56,7 @@ def configure_logging():
|
|
|
39
56
|
"formatter": "f",
|
|
40
57
|
"level": console_handler_config_level
|
|
41
58
|
}
|
|
59
|
+
|
|
42
60
|
file_handler_config = app_config['logging']['handlers']['file']
|
|
43
61
|
file_handler_config_level = getattr(logging, file_handler_config["level"])
|
|
44
62
|
if file_handler_config['enabled'] is True:
|
|
@@ -51,7 +69,7 @@ def configure_logging():
|
|
|
51
69
|
"backupCount": file_handler_config["backupCount"]
|
|
52
70
|
}
|
|
53
71
|
|
|
54
|
-
mindsdb_log_level =
|
|
72
|
+
mindsdb_log_level = get_mindsdb_log_level()
|
|
55
73
|
|
|
56
74
|
logging_config = dict(
|
|
57
75
|
version=1,
|
|
@@ -65,7 +83,7 @@ def configure_logging():
|
|
|
65
83
|
loggers={
|
|
66
84
|
"": { # root logger
|
|
67
85
|
"handlers": list(handlers_config.keys()),
|
|
68
|
-
"level":
|
|
86
|
+
"level": mindsdb_log_level,
|
|
69
87
|
},
|
|
70
88
|
"__main__": {
|
|
71
89
|
"level": mindsdb_log_level,
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import typing
|
|
3
|
+
|
|
4
|
+
from opentelemetry import trace # noqa: F401
|
|
5
|
+
from opentelemetry import metrics # noqa: F401
|
|
6
|
+
from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter as OTLPLogExporterGRPC
|
|
7
|
+
from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter as OTLPLogExporterHTTP
|
|
8
|
+
from opentelemetry.sdk._logs._internal.export import LogExporter
|
|
9
|
+
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter as OTLPMetricExporterGRPC
|
|
10
|
+
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as OTLPMetricExporterHTTP
|
|
11
|
+
from opentelemetry.sdk.metrics.export import MetricExporter, ConsoleMetricExporter
|
|
12
|
+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as OTLPSpanExporterGRPC
|
|
13
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as OTLPSpanExporterHTTP
|
|
14
|
+
from opentelemetry.sdk.trace.export import SpanExporter, ConsoleSpanExporter
|
|
15
|
+
from opentelemetry.sdk.resources import Resource
|
|
16
|
+
from opentelemetry.sdk.trace.sampling import TraceIdRatioBased
|
|
17
|
+
|
|
18
|
+
from mindsdb.utilities.otel.logger import setup_logger
|
|
19
|
+
from mindsdb.utilities.otel.meter import setup_meter
|
|
20
|
+
from mindsdb.utilities.otel.tracer import setup_tracer
|
|
21
|
+
from mindsdb.utilities.utils import parse_csv_attributes
|
|
22
|
+
from mindsdb.utilities import log
|
|
23
|
+
|
|
24
|
+
logger = log.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
# Check OpenTelemetry exporter type
|
|
27
|
+
OTEL_EXPORTER_TYPE = os.getenv("OTEL_EXPORTER_TYPE", "console") # console or otlp
|
|
28
|
+
|
|
29
|
+
# Define OpenTelemetry exporter protocol
|
|
30
|
+
OTEL_EXPORTER_PROTOCOL = os.getenv("OTEL_EXPORTER_PROTOCOL", "grpc") # grpc or http
|
|
31
|
+
|
|
32
|
+
# Define OTLP endpoint. If not set, the default OTLP endpoint will be used
|
|
33
|
+
OTEL_OTLP_ENDPOINT = os.getenv("OTEL_OTLP_ENDPOINT", "http://localhost:4317")
|
|
34
|
+
|
|
35
|
+
# Define OTLP logging endpoint. If not set, the default OTLP logging endpoint will be used
|
|
36
|
+
OTEL_OTLP_LOGGING_ENDPOINT = os.getenv("OTEL_OTLP_LOGGING_ENDPOINT", OTEL_OTLP_ENDPOINT)
|
|
37
|
+
|
|
38
|
+
# Define OTLP tracing endpoint. If not set, the default OTLP tracing endpoint will be used
|
|
39
|
+
OTEL_OTLP_TRACING_ENDPOINT = os.getenv("OTEL_OTLP_TRACING_ENDPOINT", OTEL_OTLP_ENDPOINT)
|
|
40
|
+
|
|
41
|
+
# Define OTLP metrics endpoint. If not set, the default OTLP metrics endpoint will be used
|
|
42
|
+
OTEL_OTLP_METRICS_ENDPOINT = os.getenv("OTEL_OTLP_METRICS_ENDPOINT", OTEL_OTLP_ENDPOINT)
|
|
43
|
+
|
|
44
|
+
# Define service name
|
|
45
|
+
OTEL_SERVICE_NAME = os.getenv("OTEL_SERVICE_NAME", "mindsdb")
|
|
46
|
+
|
|
47
|
+
# Define service instace ID
|
|
48
|
+
OTEL_SERVICE_INSTANCE_ID = os.getenv("OTEL_SERVICE_INSTANCE_ID", "mindsdb-instance")
|
|
49
|
+
|
|
50
|
+
# The name of the environment we"re on, by default local for development, this is set differently per-env in our Helm
|
|
51
|
+
# chart values files
|
|
52
|
+
OTEL_SERVICE_ENVIRONMENT = os.getenv("OTEL_SERVICE_ENVIRONMENT", "local").lower()
|
|
53
|
+
|
|
54
|
+
# Define service release
|
|
55
|
+
OTEL_SERVICE_RELEASE = os.getenv("OTEL_SERVICE_RELEASE", "local").lower()
|
|
56
|
+
|
|
57
|
+
# Define how often to capture traces
|
|
58
|
+
OTEL_TRACE_SAMPLE_RATE = float(os.getenv("OTEL_TRACE_SAMPLE_RATE", "1.0"))
|
|
59
|
+
|
|
60
|
+
# Define extra attributes
|
|
61
|
+
OTEL_EXTRA_ATTRIBUTES = os.getenv("OTEL_EXTRA_ATTRIBUTES", "")
|
|
62
|
+
|
|
63
|
+
# By default, we have Open Telemetry SDK enabled on all envs, except for local which is disabled by default.
|
|
64
|
+
OTEL_SDK_DISABLED = (os.getenv("OTEL_SDK_DISABLED", "false").lower() == "true"
|
|
65
|
+
or os.getenv("OTEL_SERVICE_ENVIRONMENT", "local").lower() == "local")
|
|
66
|
+
|
|
67
|
+
# Define if OpenTelemetry logging is disabled. By default, it is disabled.
|
|
68
|
+
OTEL_LOGGING_DISABLED = os.getenv("OTEL_LOGGING_DISABLED", "true").lower() == "true"
|
|
69
|
+
|
|
70
|
+
# Define if OpenTelemetry tracing is disabled. By default, it is enabled.
|
|
71
|
+
OTEL_TRACING_DISABLED = os.getenv("OTEL_TRACING_DISABLED", "false").lower() == "true"
|
|
72
|
+
|
|
73
|
+
# Define if OpenTelemetry metrics is disabled. By default, it is disabled.
|
|
74
|
+
OTEL_METRICS_DISABLED = os.getenv("OTEL_METRICS_DISABLED", "true").lower() == "true"
|
|
75
|
+
|
|
76
|
+
# If you want to enable Open Telemetry on local for some reason please set OTEL_SDK_FORCE_RUN to true
|
|
77
|
+
OTEL_SDK_FORCE_RUN = os.getenv("OTEL_SDK_FORCE_RUN", "false").lower() == "true"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_otel_attributes() -> dict:
|
|
81
|
+
"""
|
|
82
|
+
Get OpenTelemetry attributes
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
dict: OpenTelemetry attributes
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
base_attributes = {
|
|
89
|
+
"service.name": OTEL_SERVICE_NAME,
|
|
90
|
+
"service.instance.id": OTEL_SERVICE_INSTANCE_ID,
|
|
91
|
+
"environment": OTEL_SERVICE_ENVIRONMENT,
|
|
92
|
+
"release": OTEL_SERVICE_RELEASE,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
extra_attributes = {}
|
|
96
|
+
try:
|
|
97
|
+
extra_attributes = parse_csv_attributes(OTEL_EXTRA_ATTRIBUTES)
|
|
98
|
+
except Exception as e:
|
|
99
|
+
logger.error(f"Failed to parse OTEL_EXTRA_ATTRIBUTES: {e}")
|
|
100
|
+
|
|
101
|
+
attributes = {**extra_attributes, **base_attributes} # Base attributes take precedence over extra attributes
|
|
102
|
+
|
|
103
|
+
return attributes
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def get_logging_exporter() -> typing.Optional[LogExporter]:
|
|
107
|
+
"""
|
|
108
|
+
Get OpenTelemetry logging exporter.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
OTLPLogExporter: OpenTelemetry logging exporter
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
if OTEL_EXPORTER_TYPE == "otlp":
|
|
115
|
+
|
|
116
|
+
if OTEL_EXPORTER_PROTOCOL == "grpc":
|
|
117
|
+
return OTLPLogExporterGRPC(
|
|
118
|
+
endpoint=OTEL_OTLP_LOGGING_ENDPOINT,
|
|
119
|
+
insecure=True
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
elif OTEL_EXPORTER_PROTOCOL == "http":
|
|
123
|
+
return OTLPLogExporterHTTP(
|
|
124
|
+
endpoint=OTEL_OTLP_LOGGING_ENDPOINT
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def get_span_exporter() -> SpanExporter:
|
|
131
|
+
"""
|
|
132
|
+
Get OpenTelemetry span exporter
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
OTLPSpanExporter: OpenTelemetry span exporter
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
if OTEL_EXPORTER_TYPE == "otlp":
|
|
139
|
+
|
|
140
|
+
if OTEL_EXPORTER_PROTOCOL == "grpc":
|
|
141
|
+
return OTLPSpanExporterGRPC(
|
|
142
|
+
endpoint=OTEL_OTLP_TRACING_ENDPOINT,
|
|
143
|
+
insecure=True
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
elif OTEL_EXPORTER_PROTOCOL == "http":
|
|
147
|
+
return OTLPSpanExporterHTTP(
|
|
148
|
+
endpoint=OTEL_OTLP_TRACING_ENDPOINT
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
return ConsoleSpanExporter()
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def get_metrics_exporter() -> typing.Optional[MetricExporter]:
|
|
155
|
+
"""
|
|
156
|
+
Get OpenTelemetry metrics exporter
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
OTLPLogExporter: OpenTelemetry metrics exporter
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
if OTEL_EXPORTER_TYPE == "otlp":
|
|
163
|
+
|
|
164
|
+
if OTEL_EXPORTER_PROTOCOL == "grpc":
|
|
165
|
+
return OTLPMetricExporterGRPC(
|
|
166
|
+
endpoint=OTEL_OTLP_METRICS_ENDPOINT,
|
|
167
|
+
insecure=True
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
elif OTEL_EXPORTER_PROTOCOL == "http":
|
|
171
|
+
return OTLPMetricExporterHTTP(
|
|
172
|
+
endpoint=OTEL_OTLP_METRICS_ENDPOINT
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
return ConsoleMetricExporter()
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
if not OTEL_SDK_DISABLED or OTEL_SDK_FORCE_RUN:
|
|
179
|
+
logger.info("OpenTelemetry enabled")
|
|
180
|
+
logger.info(f"OpenTelemetry exporter type: {OTEL_EXPORTER_TYPE}")
|
|
181
|
+
logger.info(f"OpenTelemetry service name: {OTEL_SERVICE_NAME}")
|
|
182
|
+
logger.info(f"OpenTelemetry service environment: {OTEL_SERVICE_ENVIRONMENT}")
|
|
183
|
+
logger.info(f"OpenTelemetry service release: {OTEL_SERVICE_RELEASE}")
|
|
184
|
+
logger.info(f"OpenTelemetry trace sample rate: {OTEL_TRACE_SAMPLE_RATE}")
|
|
185
|
+
logger.info(f"OpenTelemetry extra attributes: {OTEL_EXTRA_ATTRIBUTES}")
|
|
186
|
+
|
|
187
|
+
# Define OpenTelemetry resources (e.g., service name)
|
|
188
|
+
attributes = get_otel_attributes()
|
|
189
|
+
|
|
190
|
+
# Define OpenTelemetry sampler
|
|
191
|
+
sampler = TraceIdRatioBased(OTEL_TRACE_SAMPLE_RATE)
|
|
192
|
+
|
|
193
|
+
# Define OpenTelemetry resources (e.g., service name)
|
|
194
|
+
resource = Resource(attributes=attributes)
|
|
195
|
+
|
|
196
|
+
if not OTEL_LOGGING_DISABLED:
|
|
197
|
+
logger.info("OpenTelemetry Logging is enabled")
|
|
198
|
+
setup_logger(resource, get_logging_exporter())
|
|
199
|
+
|
|
200
|
+
if not OTEL_TRACING_DISABLED:
|
|
201
|
+
logger.info("OpenTelemetry Tracing is enabled")
|
|
202
|
+
setup_tracer(resource, sampler, get_span_exporter())
|
|
203
|
+
|
|
204
|
+
if not OTEL_METRICS_DISABLED:
|
|
205
|
+
logger.info("OpenTelemetry Metrics is enabled")
|
|
206
|
+
setup_meter(resource, get_metrics_exporter())
|