kweaver-dolphin 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- DolphinLanguageSDK/__init__.py +58 -0
- dolphin/__init__.py +62 -0
- dolphin/cli/__init__.py +20 -0
- dolphin/cli/args/__init__.py +9 -0
- dolphin/cli/args/parser.py +567 -0
- dolphin/cli/builtin_agents/__init__.py +22 -0
- dolphin/cli/commands/__init__.py +4 -0
- dolphin/cli/interrupt/__init__.py +8 -0
- dolphin/cli/interrupt/handler.py +205 -0
- dolphin/cli/interrupt/keyboard.py +82 -0
- dolphin/cli/main.py +49 -0
- dolphin/cli/multimodal/__init__.py +34 -0
- dolphin/cli/multimodal/clipboard.py +327 -0
- dolphin/cli/multimodal/handler.py +249 -0
- dolphin/cli/multimodal/image_processor.py +214 -0
- dolphin/cli/multimodal/input_parser.py +149 -0
- dolphin/cli/runner/__init__.py +8 -0
- dolphin/cli/runner/runner.py +989 -0
- dolphin/cli/ui/__init__.py +10 -0
- dolphin/cli/ui/console.py +2795 -0
- dolphin/cli/ui/input.py +340 -0
- dolphin/cli/ui/layout.py +425 -0
- dolphin/cli/ui/stream_renderer.py +302 -0
- dolphin/cli/utils/__init__.py +8 -0
- dolphin/cli/utils/helpers.py +135 -0
- dolphin/cli/utils/version.py +49 -0
- dolphin/core/__init__.py +107 -0
- dolphin/core/agent/__init__.py +10 -0
- dolphin/core/agent/agent_state.py +69 -0
- dolphin/core/agent/base_agent.py +970 -0
- dolphin/core/code_block/__init__.py +0 -0
- dolphin/core/code_block/agent_init_block.py +0 -0
- dolphin/core/code_block/assign_block.py +98 -0
- dolphin/core/code_block/basic_code_block.py +1865 -0
- dolphin/core/code_block/explore_block.py +1327 -0
- dolphin/core/code_block/explore_block_v2.py +712 -0
- dolphin/core/code_block/explore_strategy.py +672 -0
- dolphin/core/code_block/judge_block.py +220 -0
- dolphin/core/code_block/prompt_block.py +32 -0
- dolphin/core/code_block/skill_call_deduplicator.py +291 -0
- dolphin/core/code_block/tool_block.py +129 -0
- dolphin/core/common/__init__.py +17 -0
- dolphin/core/common/constants.py +176 -0
- dolphin/core/common/enums.py +1173 -0
- dolphin/core/common/exceptions.py +133 -0
- dolphin/core/common/multimodal.py +539 -0
- dolphin/core/common/object_type.py +165 -0
- dolphin/core/common/output_format.py +432 -0
- dolphin/core/common/types.py +36 -0
- dolphin/core/config/__init__.py +16 -0
- dolphin/core/config/global_config.py +1289 -0
- dolphin/core/config/ontology_config.py +133 -0
- dolphin/core/context/__init__.py +12 -0
- dolphin/core/context/context.py +1580 -0
- dolphin/core/context/context_manager.py +161 -0
- dolphin/core/context/var_output.py +82 -0
- dolphin/core/context/variable_pool.py +356 -0
- dolphin/core/context_engineer/__init__.py +41 -0
- dolphin/core/context_engineer/config/__init__.py +5 -0
- dolphin/core/context_engineer/config/settings.py +402 -0
- dolphin/core/context_engineer/core/__init__.py +7 -0
- dolphin/core/context_engineer/core/budget_manager.py +327 -0
- dolphin/core/context_engineer/core/context_assembler.py +583 -0
- dolphin/core/context_engineer/core/context_manager.py +637 -0
- dolphin/core/context_engineer/core/tokenizer_service.py +260 -0
- dolphin/core/context_engineer/example/incremental_example.py +267 -0
- dolphin/core/context_engineer/example/traditional_example.py +334 -0
- dolphin/core/context_engineer/services/__init__.py +5 -0
- dolphin/core/context_engineer/services/compressor.py +399 -0
- dolphin/core/context_engineer/utils/__init__.py +6 -0
- dolphin/core/context_engineer/utils/context_utils.py +441 -0
- dolphin/core/context_engineer/utils/message_formatter.py +270 -0
- dolphin/core/context_engineer/utils/token_utils.py +139 -0
- dolphin/core/coroutine/__init__.py +15 -0
- dolphin/core/coroutine/context_snapshot.py +154 -0
- dolphin/core/coroutine/context_snapshot_profile.py +922 -0
- dolphin/core/coroutine/context_snapshot_store.py +268 -0
- dolphin/core/coroutine/execution_frame.py +145 -0
- dolphin/core/coroutine/execution_state_registry.py +161 -0
- dolphin/core/coroutine/resume_handle.py +101 -0
- dolphin/core/coroutine/step_result.py +101 -0
- dolphin/core/executor/__init__.py +18 -0
- dolphin/core/executor/debug_controller.py +630 -0
- dolphin/core/executor/dolphin_executor.py +1063 -0
- dolphin/core/executor/executor.py +624 -0
- dolphin/core/flags/__init__.py +27 -0
- dolphin/core/flags/definitions.py +49 -0
- dolphin/core/flags/manager.py +113 -0
- dolphin/core/hook/__init__.py +95 -0
- dolphin/core/hook/expression_evaluator.py +499 -0
- dolphin/core/hook/hook_dispatcher.py +380 -0
- dolphin/core/hook/hook_types.py +248 -0
- dolphin/core/hook/isolated_variable_pool.py +284 -0
- dolphin/core/interfaces.py +53 -0
- dolphin/core/llm/__init__.py +0 -0
- dolphin/core/llm/llm.py +495 -0
- dolphin/core/llm/llm_call.py +100 -0
- dolphin/core/llm/llm_client.py +1285 -0
- dolphin/core/llm/message_sanitizer.py +120 -0
- dolphin/core/logging/__init__.py +20 -0
- dolphin/core/logging/logger.py +526 -0
- dolphin/core/message/__init__.py +8 -0
- dolphin/core/message/compressor.py +749 -0
- dolphin/core/parser/__init__.py +8 -0
- dolphin/core/parser/parser.py +405 -0
- dolphin/core/runtime/__init__.py +10 -0
- dolphin/core/runtime/runtime_graph.py +926 -0
- dolphin/core/runtime/runtime_instance.py +446 -0
- dolphin/core/skill/__init__.py +14 -0
- dolphin/core/skill/context_retention.py +157 -0
- dolphin/core/skill/skill_function.py +686 -0
- dolphin/core/skill/skill_matcher.py +282 -0
- dolphin/core/skill/skillkit.py +700 -0
- dolphin/core/skill/skillset.py +72 -0
- dolphin/core/trajectory/__init__.py +10 -0
- dolphin/core/trajectory/recorder.py +189 -0
- dolphin/core/trajectory/trajectory.py +522 -0
- dolphin/core/utils/__init__.py +9 -0
- dolphin/core/utils/cache_kv.py +212 -0
- dolphin/core/utils/tools.py +340 -0
- dolphin/lib/__init__.py +93 -0
- dolphin/lib/debug/__init__.py +8 -0
- dolphin/lib/debug/visualizer.py +409 -0
- dolphin/lib/memory/__init__.py +28 -0
- dolphin/lib/memory/async_processor.py +220 -0
- dolphin/lib/memory/llm_calls.py +195 -0
- dolphin/lib/memory/manager.py +78 -0
- dolphin/lib/memory/sandbox.py +46 -0
- dolphin/lib/memory/storage.py +245 -0
- dolphin/lib/memory/utils.py +51 -0
- dolphin/lib/ontology/__init__.py +12 -0
- dolphin/lib/ontology/basic/__init__.py +0 -0
- dolphin/lib/ontology/basic/base.py +102 -0
- dolphin/lib/ontology/basic/concept.py +130 -0
- dolphin/lib/ontology/basic/object.py +11 -0
- dolphin/lib/ontology/basic/relation.py +63 -0
- dolphin/lib/ontology/datasource/__init__.py +27 -0
- dolphin/lib/ontology/datasource/datasource.py +66 -0
- dolphin/lib/ontology/datasource/oracle_datasource.py +338 -0
- dolphin/lib/ontology/datasource/sql.py +845 -0
- dolphin/lib/ontology/mapping.py +177 -0
- dolphin/lib/ontology/ontology.py +733 -0
- dolphin/lib/ontology/ontology_context.py +16 -0
- dolphin/lib/ontology/ontology_manager.py +107 -0
- dolphin/lib/skill_results/__init__.py +31 -0
- dolphin/lib/skill_results/cache_backend.py +559 -0
- dolphin/lib/skill_results/result_processor.py +181 -0
- dolphin/lib/skill_results/result_reference.py +179 -0
- dolphin/lib/skill_results/skillkit_hook.py +324 -0
- dolphin/lib/skill_results/strategies.py +328 -0
- dolphin/lib/skill_results/strategy_registry.py +150 -0
- dolphin/lib/skillkits/__init__.py +44 -0
- dolphin/lib/skillkits/agent_skillkit.py +155 -0
- dolphin/lib/skillkits/cognitive_skillkit.py +82 -0
- dolphin/lib/skillkits/env_skillkit.py +250 -0
- dolphin/lib/skillkits/mcp_adapter.py +616 -0
- dolphin/lib/skillkits/mcp_skillkit.py +771 -0
- dolphin/lib/skillkits/memory_skillkit.py +650 -0
- dolphin/lib/skillkits/noop_skillkit.py +31 -0
- dolphin/lib/skillkits/ontology_skillkit.py +89 -0
- dolphin/lib/skillkits/plan_act_skillkit.py +452 -0
- dolphin/lib/skillkits/resource/__init__.py +52 -0
- dolphin/lib/skillkits/resource/models/__init__.py +6 -0
- dolphin/lib/skillkits/resource/models/skill_config.py +109 -0
- dolphin/lib/skillkits/resource/models/skill_meta.py +127 -0
- dolphin/lib/skillkits/resource/resource_skillkit.py +393 -0
- dolphin/lib/skillkits/resource/skill_cache.py +215 -0
- dolphin/lib/skillkits/resource/skill_loader.py +395 -0
- dolphin/lib/skillkits/resource/skill_validator.py +406 -0
- dolphin/lib/skillkits/resource_skillkit.py +11 -0
- dolphin/lib/skillkits/search_skillkit.py +163 -0
- dolphin/lib/skillkits/sql_skillkit.py +274 -0
- dolphin/lib/skillkits/system_skillkit.py +509 -0
- dolphin/lib/skillkits/vm_skillkit.py +65 -0
- dolphin/lib/utils/__init__.py +9 -0
- dolphin/lib/utils/data_process.py +207 -0
- dolphin/lib/utils/handle_progress.py +178 -0
- dolphin/lib/utils/security.py +139 -0
- dolphin/lib/utils/text_retrieval.py +462 -0
- dolphin/lib/vm/__init__.py +11 -0
- dolphin/lib/vm/env_executor.py +895 -0
- dolphin/lib/vm/python_session_manager.py +453 -0
- dolphin/lib/vm/vm.py +610 -0
- dolphin/sdk/__init__.py +60 -0
- dolphin/sdk/agent/__init__.py +12 -0
- dolphin/sdk/agent/agent_factory.py +236 -0
- dolphin/sdk/agent/dolphin_agent.py +1106 -0
- dolphin/sdk/api/__init__.py +4 -0
- dolphin/sdk/runtime/__init__.py +8 -0
- dolphin/sdk/runtime/env.py +363 -0
- dolphin/sdk/skill/__init__.py +10 -0
- dolphin/sdk/skill/global_skills.py +706 -0
- dolphin/sdk/skill/traditional_toolkit.py +260 -0
- kweaver_dolphin-0.1.0.dist-info/METADATA +521 -0
- kweaver_dolphin-0.1.0.dist-info/RECORD +199 -0
- kweaver_dolphin-0.1.0.dist-info/WHEEL +5 -0
- kweaver_dolphin-0.1.0.dist-info/entry_points.txt +27 -0
- kweaver_dolphin-0.1.0.dist-info/licenses/LICENSE.txt +201 -0
- kweaver_dolphin-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,845 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from typing import Any, Dict, List, Optional
|
|
3
|
+
import re
|
|
4
|
+
import datetime
|
|
5
|
+
|
|
6
|
+
from sqlalchemy import create_engine, inspect, text
|
|
7
|
+
from sqlalchemy.engine import Engine
|
|
8
|
+
|
|
9
|
+
# Add NullPool import
|
|
10
|
+
from sqlalchemy.pool import NullPool
|
|
11
|
+
|
|
12
|
+
from dolphin.core.common.enums import Messages
|
|
13
|
+
from dolphin.lib.ontology.basic.concept import ConceptMemberType, Concept
|
|
14
|
+
from dolphin.lib.ontology.mapping import Mapping
|
|
15
|
+
from dolphin.lib.ontology.datasource.datasource import DataSource
|
|
16
|
+
from dolphin.lib.ontology.datasource.datasource import DataSourceType
|
|
17
|
+
|
|
18
|
+
from dolphin.core.logging.logger import get_logger
|
|
19
|
+
|
|
20
|
+
logger = get_logger("ontology")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _camelCase(s: str) -> str:
|
|
24
|
+
"""Convert a string separated by underscores or spaces to camel case.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
s: The input string to convert.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
The camel case version of the input string.
|
|
31
|
+
"""
|
|
32
|
+
s = re.sub(r"[_\-]+", " ", s).title().replace(" ", "")
|
|
33
|
+
return s[0].lower() + s[1:] if s else ""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class DataSourceSql(DataSource):
|
|
37
|
+
"""Base class for SQL type data sources, using SQLAlchemy"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, name: str, type: DataSourceType, config: Dict[str, Any]):
|
|
40
|
+
super().__init__(name, type, config)
|
|
41
|
+
self._engine: Engine = None
|
|
42
|
+
self._inspector = None
|
|
43
|
+
self.host = config.get("host", "localhost")
|
|
44
|
+
self.port = config.get("port", 3306) # Default MySQL port
|
|
45
|
+
self.username = config.get("username")
|
|
46
|
+
self.password = config.get("password")
|
|
47
|
+
self.database = config.get("database")
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def type(self) -> DataSourceType:
|
|
51
|
+
return self._type # Return the stored type
|
|
52
|
+
|
|
53
|
+
@abstractmethod
|
|
54
|
+
def connect(self) -> Engine:
|
|
55
|
+
"""Establish database connection"""
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
@abstractmethod
|
|
59
|
+
def get_schema(self) -> Dict[str, List[Dict[str, str]]]:
|
|
60
|
+
"""Get database schema (table name -> list of column info, each column info is {'name': column name, 'type': type string})"""
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
def close(self) -> None:
|
|
64
|
+
"""Close database connection"""
|
|
65
|
+
if self._engine:
|
|
66
|
+
try:
|
|
67
|
+
self._engine.dispose()
|
|
68
|
+
logger.info(f"databaseconnect to已关闭: {self.name}")
|
|
69
|
+
self._engine = None
|
|
70
|
+
except Exception as e:
|
|
71
|
+
logger.error(f"关闭databaseconnect to时出错 {self.name}: {e}")
|
|
72
|
+
else:
|
|
73
|
+
logger.warning(f"尝试关闭一个未建立或已关闭的connect to: {self.name}")
|
|
74
|
+
|
|
75
|
+
def test_connection(self) -> bool:
|
|
76
|
+
"""Test whether the database connection is successful"""
|
|
77
|
+
originalConnectionState = self._engine
|
|
78
|
+
connToClose = None
|
|
79
|
+
try:
|
|
80
|
+
if not self._engine:
|
|
81
|
+
connToClose = self.connect()
|
|
82
|
+
if not self._engine:
|
|
83
|
+
logger.warning(f"测试connect to {self.name} failed:无法建立connect to")
|
|
84
|
+
return False
|
|
85
|
+
# Simple test query
|
|
86
|
+
self.executeQuery(
|
|
87
|
+
"SELECT 1", fetchColumns=False
|
|
88
|
+
) # test_connection does not care about the result
|
|
89
|
+
logger.info(f"测试connect to {self.name} successful")
|
|
90
|
+
return True
|
|
91
|
+
except Exception as e:
|
|
92
|
+
logger.error(f"测试connect tofailed {self.name}: {e}")
|
|
93
|
+
return False
|
|
94
|
+
finally:
|
|
95
|
+
# If the connection is temporarily established for testing purposes, close it.
|
|
96
|
+
if connToClose and connToClose == self._engine:
|
|
97
|
+
self.close()
|
|
98
|
+
# Restore the original connection status (if there was already a connection before the test)
|
|
99
|
+
elif originalConnectionState and not self._engine:
|
|
100
|
+
self._engine = originalConnectionState # Avoid affecting subsequent operations
|
|
101
|
+
|
|
102
|
+
# SQL-specific methods can be added, such as executing SQL statements
|
|
103
|
+
def executeQuery(self, query: str, fetchColumns: bool = True) -> Dict[str, Any]:
|
|
104
|
+
"""Execute an SQL query and return the results.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
query (str): The SQL query statement to execute
|
|
108
|
+
fetchColumns (bool): Whether to retrieve column name information, default is True
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
Dict[str, Any]: A dictionary containing the query results and column names (if fetchColumns is True)
|
|
112
|
+
"""
|
|
113
|
+
conn = self._engine
|
|
114
|
+
shouldCloseConn = False
|
|
115
|
+
if not conn:
|
|
116
|
+
conn = self.connect()
|
|
117
|
+
if not conn:
|
|
118
|
+
raise ConnectionError(f"无法connect to到database: {self.name}")
|
|
119
|
+
shouldCloseConn = True # If it's a temporary connection, close it after use.
|
|
120
|
+
|
|
121
|
+
cursor = None
|
|
122
|
+
try:
|
|
123
|
+
cursor = conn.connect().execute(text(query))
|
|
124
|
+
results = cursor.fetchall()
|
|
125
|
+
if fetchColumns:
|
|
126
|
+
# Use cursor.keys() to get column names from SQLAlchemy's Result object
|
|
127
|
+
# The Result object (cursor) itself doesn't have a 'description' attribute directly
|
|
128
|
+
columns = list(cursor.keys())
|
|
129
|
+
return {"columns": columns, "data": results}
|
|
130
|
+
return {
|
|
131
|
+
"columns": [],
|
|
132
|
+
"data": results,
|
|
133
|
+
} # Return empty columns if not fetching
|
|
134
|
+
except Exception as e:
|
|
135
|
+
logger.error(f"Error executing query on {self.name}: {e}")
|
|
136
|
+
raise
|
|
137
|
+
finally:
|
|
138
|
+
if cursor:
|
|
139
|
+
cursor.close()
|
|
140
|
+
if shouldCloseConn and conn:
|
|
141
|
+
conn.dispose()
|
|
142
|
+
if conn == self._engine:
|
|
143
|
+
self._engine = None # Ensure internal state consistency
|
|
144
|
+
|
|
145
|
+
def _map_db_type_to_concept_type(self, db_type_full: str) -> ConceptMemberType:
|
|
146
|
+
"""Map database-specific column type strings to ConceptMemberType.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
column_type (str): The database-specific column type string.
|
|
150
|
+
Returns:
|
|
151
|
+
ConceptMemberType: The corresponding ConceptMemberType."""
|
|
152
|
+
logger.debug(
|
|
153
|
+
f"Mapping DB type: Original='{db_type_full}', Type='{type(db_type_full)}'"
|
|
154
|
+
)
|
|
155
|
+
try:
|
|
156
|
+
if not db_type_full or not isinstance(
|
|
157
|
+
db_type_full, str
|
|
158
|
+
): # Ensure it's a non-empty string
|
|
159
|
+
logger.warning(
|
|
160
|
+
f"Invalid db_type_full: '{db_type_full}'. Defaulting to ANY."
|
|
161
|
+
)
|
|
162
|
+
return ConceptMemberType.ANY
|
|
163
|
+
|
|
164
|
+
# Extract the main type part before the parentheses and convert it to lowercase
|
|
165
|
+
db_type_main = db_type_full.split("(")[0].strip().lower()
|
|
166
|
+
|
|
167
|
+
result_type = ConceptMemberType.ANY # Default
|
|
168
|
+
|
|
169
|
+
if (
|
|
170
|
+
"char" in db_type_main
|
|
171
|
+
or "varchar" in db_type_main
|
|
172
|
+
or "varchar2" in db_type_main # Oracle VARCHAR2
|
|
173
|
+
or "text" in db_type_main
|
|
174
|
+
or "string" in db_type_main
|
|
175
|
+
or "enum" in db_type_main
|
|
176
|
+
or "set" in db_type_main
|
|
177
|
+
or "clob" in db_type_main # Oracle CLOB
|
|
178
|
+
):
|
|
179
|
+
result_type = ConceptMemberType.STRING
|
|
180
|
+
elif (
|
|
181
|
+
"int" in db_type_main
|
|
182
|
+
or "integer" in db_type_main
|
|
183
|
+
or "tinyint" in db_type_main
|
|
184
|
+
or "smallint" in db_type_main
|
|
185
|
+
or "mediumint" in db_type_main
|
|
186
|
+
or "bigint" in db_type_main
|
|
187
|
+
):
|
|
188
|
+
result_type = ConceptMemberType.NUMBER
|
|
189
|
+
elif (
|
|
190
|
+
"float" in db_type_main
|
|
191
|
+
or "double" in db_type_main
|
|
192
|
+
or "decimal" in db_type_main
|
|
193
|
+
or "numeric" in db_type_main
|
|
194
|
+
or "real" in db_type_main
|
|
195
|
+
or "number" in db_type_main # Oracle NUMBER type
|
|
196
|
+
):
|
|
197
|
+
result_type = ConceptMemberType.NUMBER
|
|
198
|
+
elif "bool" in db_type_main or "boolean" in db_type_main:
|
|
199
|
+
result_type = ConceptMemberType.BOOLEAN
|
|
200
|
+
elif (
|
|
201
|
+
"date" in db_type_main
|
|
202
|
+
or "datetime" in db_type_main
|
|
203
|
+
or "timestamp" in db_type_main
|
|
204
|
+
or "time" in db_type_main
|
|
205
|
+
or "year" in db_type_main
|
|
206
|
+
):
|
|
207
|
+
result_type = (
|
|
208
|
+
ConceptMemberType.STRING
|
|
209
|
+
) # Or a more specific date/time type if available
|
|
210
|
+
elif "json" in db_type_main:
|
|
211
|
+
result_type = (
|
|
212
|
+
ConceptMemberType.STRING
|
|
213
|
+
) # Or OBJECT if handling structured JSON
|
|
214
|
+
|
|
215
|
+
if (
|
|
216
|
+
result_type == ConceptMemberType.ANY
|
|
217
|
+
and db_type_main
|
|
218
|
+
not in [
|
|
219
|
+
"unknown",
|
|
220
|
+
"",
|
|
221
|
+
]
|
|
222
|
+
): # Log if no specific mapping found, unless it was already 'unknown' or empty
|
|
223
|
+
logger.warning(
|
|
224
|
+
f"Unknown DB type: '{db_type_full}' (main: '{db_type_main}'), defaulted to ANY."
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
logger.debug(
|
|
228
|
+
f"Mapped DB type '{db_type_full}' to ConceptMemberType '{result_type.name if result_type else 'None'}' (Python type: {type(result_type)})"
|
|
229
|
+
)
|
|
230
|
+
return result_type
|
|
231
|
+
except Exception as e:
|
|
232
|
+
logger.error(
|
|
233
|
+
f"Error mapping DB type '{db_type_full}': {e}. Defaulting to ANY.",
|
|
234
|
+
exc_info=True,
|
|
235
|
+
)
|
|
236
|
+
return ConceptMemberType.ANY
|
|
237
|
+
|
|
238
|
+
def scan(self) -> List[Mapping]:
|
|
239
|
+
"""Scan SQL database schema and generate Concept and Mapping for each table"""
|
|
240
|
+
logger.info(f"Starting to scan data source: {self.name}")
|
|
241
|
+
mappings = []
|
|
242
|
+
try:
|
|
243
|
+
schema = self.get_schema()
|
|
244
|
+
if not schema:
|
|
245
|
+
logger.warning(f"无法Get数据源 {self.name} schema 信息")
|
|
246
|
+
return []
|
|
247
|
+
|
|
248
|
+
for table_name, columns_details in schema.items():
|
|
249
|
+
# 1. Create Concept
|
|
250
|
+
concept_name = _camelCase(table_name).capitalize()
|
|
251
|
+
|
|
252
|
+
members = {}
|
|
253
|
+
valid_columns_for_mapping = []
|
|
254
|
+
if not columns_details:
|
|
255
|
+
logger.warning(
|
|
256
|
+
f"表 '{table_name}' 在 {self.name} 中没有列信息,跳过"
|
|
257
|
+
)
|
|
258
|
+
continue
|
|
259
|
+
|
|
260
|
+
for col_detail in columns_details:
|
|
261
|
+
col_name = col_detail.get("name")
|
|
262
|
+
col_type_str = col_detail.get("type")
|
|
263
|
+
|
|
264
|
+
if not col_name: # Skip entries without column names
|
|
265
|
+
logger.debug(
|
|
266
|
+
f"Skipping column with no name in table '{table_name}'. Detail: {col_detail}"
|
|
267
|
+
)
|
|
268
|
+
continue
|
|
269
|
+
|
|
270
|
+
member_name = _camelCase(col_name)
|
|
271
|
+
member_type = self._map_db_type_to_concept_type(col_type_str)
|
|
272
|
+
members[member_name] = member_type
|
|
273
|
+
valid_columns_for_mapping.append(col_name)
|
|
274
|
+
|
|
275
|
+
if not members:
|
|
276
|
+
logger.warning(
|
|
277
|
+
f"表 '{table_name}' 在 {self.name} 中没有可转换为成员的有效列,跳过创建 Concept"
|
|
278
|
+
)
|
|
279
|
+
continue
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
concept = Concept(name=concept_name, members=members)
|
|
283
|
+
logger.debug(
|
|
284
|
+
f"为表 '{table_name}' 创建了 Concept: {concept_name} with members: {members}"
|
|
285
|
+
)
|
|
286
|
+
except ValueError as e:
|
|
287
|
+
logger.error(
|
|
288
|
+
f"为表 '{table_name}' 创建 Concept '{concept_name}' failed: {e}"
|
|
289
|
+
)
|
|
290
|
+
continue
|
|
291
|
+
|
|
292
|
+
# 2. Create Mapping
|
|
293
|
+
# Field to member mapping: column name -> camelCase member name
|
|
294
|
+
# Use valid_columns_for_mapping to ensure only existing columns are mapped
|
|
295
|
+
fieldToMemberMap = {
|
|
296
|
+
col: _camelCase(col) for col in valid_columns_for_mapping
|
|
297
|
+
}
|
|
298
|
+
if not fieldToMemberMap: # Theoretically, if members has content, there should also be content here
|
|
299
|
+
logger.warning(
|
|
300
|
+
f"表 '{table_name}' 在 {self.name} 中没有有效列名可映射,跳过创建 Mapping"
|
|
301
|
+
)
|
|
302
|
+
continue
|
|
303
|
+
|
|
304
|
+
try:
|
|
305
|
+
mapping = Mapping(
|
|
306
|
+
dataSource=self,
|
|
307
|
+
space=table_name,
|
|
308
|
+
concept=concept,
|
|
309
|
+
fieldToMemberMap=fieldToMemberMap,
|
|
310
|
+
)
|
|
311
|
+
mappings.append(mapping)
|
|
312
|
+
logger.debug(f"为 Concept '{concept_name}' 创建了 Mapping")
|
|
313
|
+
except (ValueError, TypeError) as e:
|
|
314
|
+
logger.error(f"为 Concept '{concept_name}' 创建 Mapping failed: {e}")
|
|
315
|
+
|
|
316
|
+
logger.info(
|
|
317
|
+
f"数据源 {self.name} 扫描完成,生成了 {len(mappings)} 个 Mappings"
|
|
318
|
+
)
|
|
319
|
+
return mappings
|
|
320
|
+
|
|
321
|
+
except ConnectionError as e:
|
|
322
|
+
logger.error(f"扫描数据源 {self.name} failed:connect to错误 {e}")
|
|
323
|
+
return []
|
|
324
|
+
except Exception as e:
|
|
325
|
+
logger.error(f"扫描数据源 {self.name} 时发生意外错误: {e}")
|
|
326
|
+
# Consider throwing an exception or returning an empty list
|
|
327
|
+
return []
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
class DataSourceMysql(DataSourceSql):
|
|
331
|
+
"""MySQL Data Source Implementation"""
|
|
332
|
+
|
|
333
|
+
def __init__(self, name: str, config: Dict[str, Any]):
|
|
334
|
+
# Pass the correct type directly DataSourceType.MYSQL
|
|
335
|
+
super().__init__(name, DataSourceType.MYSQL, config)
|
|
336
|
+
self._type = DataSourceType.MYSQL # Store specific types
|
|
337
|
+
|
|
338
|
+
def connect(self) -> Engine:
|
|
339
|
+
"""Connect to MySQL database"""
|
|
340
|
+
if self._engine:
|
|
341
|
+
logger.debug(f"Already connected to {self.name} , reconnecting")
|
|
342
|
+
self.close()
|
|
343
|
+
|
|
344
|
+
try:
|
|
345
|
+
# Use pymysql to connect to MySQL database
|
|
346
|
+
# Make sure to install: pip install pymysql
|
|
347
|
+
import pymysql # Move here, import only when needed
|
|
348
|
+
|
|
349
|
+
# Modify the connection method to disable the connection pool for the test environment
|
|
350
|
+
connection_url = f"mysql+pymysql://{self.username}:{self.password}@{self.host}:{self.port}/{self.database}"
|
|
351
|
+
|
|
352
|
+
# Use NullPool to disable connection pooling, or use a more appropriate connection pool configuration
|
|
353
|
+
# In testing environments, it is usually simpler and more reliable to disable connection pooling.
|
|
354
|
+
self._engine = create_engine(
|
|
355
|
+
connection_url,
|
|
356
|
+
poolclass=NullPool, # Disable connection pooling
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
self._inspector = inspect(self._engine)
|
|
360
|
+
logger.info(f"Successfully connected to MySQL database: {self.name}")
|
|
361
|
+
return self._engine
|
|
362
|
+
except ImportError:
|
|
363
|
+
logger.error(
|
|
364
|
+
f"connect to MySQL database {self.name} failed: missing 'mysql-connector-python' library. Please run 'pip install mysql-connector-python'"
|
|
365
|
+
)
|
|
366
|
+
self._engine = None
|
|
367
|
+
raise ConnectionError(f"缺少 MySQL 驱动: {self.name}")
|
|
368
|
+
except pymysql.Error as err: # Catch specific database connection errors
|
|
369
|
+
logger.error(f"connect to MySQL databasefailed {self.name}: {err}")
|
|
370
|
+
self._engine = None
|
|
371
|
+
raise ConnectionError(f"无法connect to到 MySQL database: {self.name}, {err}")
|
|
372
|
+
except Exception as e: # Other unexpected errors
|
|
373
|
+
logger.error(f"connect to MySQL unknown error occurred {self.name}: {e}")
|
|
374
|
+
self._engine = None
|
|
375
|
+
raise ConnectionError(f"connect to MySQL unknown error occurred: {self.name}, {e}")
|
|
376
|
+
|
|
377
|
+
def get_schema(self) -> Dict[str, List[Dict[str, str]]]:
|
|
378
|
+
"""Get MySQL database schema information (table name -> list of column information)"""
|
|
379
|
+
if self.schema:
|
|
380
|
+
return self.schema
|
|
381
|
+
|
|
382
|
+
conn = self._engine
|
|
383
|
+
should_close_conn = False
|
|
384
|
+
if not conn:
|
|
385
|
+
conn = self.connect() # Try to connect
|
|
386
|
+
if not conn:
|
|
387
|
+
raise ConnectionError(f"无法Get模式,database未connect to: {self.name}")
|
|
388
|
+
should_close_conn = True
|
|
389
|
+
|
|
390
|
+
schema: Dict[str, List[Dict[str, str]]] = {}
|
|
391
|
+
cursor = None
|
|
392
|
+
try:
|
|
393
|
+
# Use self._inspector to get the table name, which is more in line with SQLAlchemy's approach
|
|
394
|
+
if not self._inspector: # Ensure inspector exists
|
|
395
|
+
if not conn: # If conn was not previously established successfully
|
|
396
|
+
conn = self.connect()
|
|
397
|
+
if not conn:
|
|
398
|
+
raise ConnectionError(
|
|
399
|
+
f"无法Get模式,database未connect to: {self.name}"
|
|
400
|
+
)
|
|
401
|
+
self._inspector = inspect(conn)
|
|
402
|
+
|
|
403
|
+
tables = self._inspector.get_table_names()
|
|
404
|
+
|
|
405
|
+
# Get column names and types for each table
|
|
406
|
+
for table_name in tables:
|
|
407
|
+
# Use self._inspector to get column information
|
|
408
|
+
columns_info = self._inspector.get_columns(table_name)
|
|
409
|
+
|
|
410
|
+
current_table_cols = []
|
|
411
|
+
for column_data in columns_info:
|
|
412
|
+
# column_data is a dictionary containing keys such as 'name', 'type', 'nullable', 'default'
|
|
413
|
+
# 'type' is usually a SQLAlchemy type object, which needs to be converted to a string.
|
|
414
|
+
col_name = column_data.get("name")
|
|
415
|
+
col_type_obj = column_data.get("type")
|
|
416
|
+
|
|
417
|
+
if col_name and col_type_obj is not None: # Ensure that column names and types exist
|
|
418
|
+
# Convert SQLAlchemy type objects to their string representations
|
|
419
|
+
# For example: VARCHAR(length=50), INTEGER(), NUMERIC(precision=10, scale=2)
|
|
420
|
+
col_type_str = str(col_type_obj).replace(
|
|
421
|
+
' COLLATE "utf8mb4_unicode_ci"', ""
|
|
422
|
+
)
|
|
423
|
+
current_table_cols.append(
|
|
424
|
+
{"name": col_name, "type": col_type_str}
|
|
425
|
+
)
|
|
426
|
+
elif col_name: # Unknown type, but column name exists
|
|
427
|
+
current_table_cols.append(
|
|
428
|
+
{"name": col_name, "type": "UNKNOWN"}
|
|
429
|
+
) # Or record a default value
|
|
430
|
+
logger.warning(
|
|
431
|
+
f"Column '{col_name}' in table '{table_name}' has an unknown type."
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
if current_table_cols: # Only add to schema when the table has columns
|
|
435
|
+
schema[table_name] = current_table_cols
|
|
436
|
+
else:
|
|
437
|
+
logger.info(
|
|
438
|
+
f"Table '{table_name}' has no columns or columns could not be retrieved."
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
logger.debug(f"Get到 {self.name} schema: {len(schema)} tables")
|
|
442
|
+
self.schema = schema
|
|
443
|
+
return schema
|
|
444
|
+
except Exception as err: # Catch all database-related errors
|
|
445
|
+
logger.error(f"Get MySQL 模式failed {self.name}: {err}")
|
|
446
|
+
# Ensure that if the connection is temporarily open, it will be closed when an error occurs.
|
|
447
|
+
# Instead of relying on the caller (such as scan) to handle it
|
|
448
|
+
if (
|
|
449
|
+
should_close_conn and conn and conn == self._engine
|
|
450
|
+
): # If this connection was specifically opened for this method
|
|
451
|
+
conn.dispose()
|
|
452
|
+
self._engine = None # Reset engine state
|
|
453
|
+
self._inspector = None # Reset inspector
|
|
454
|
+
raise RuntimeError(f"Get MySQL 模式failed: {err}") from err
|
|
455
|
+
finally:
|
|
456
|
+
# The cursor is no longer directly managed at this method level, as methods such as inspector.get_columns handle the cursor internally.
|
|
457
|
+
# if cursor:
|
|
458
|
+
# cursor.close()
|
|
459
|
+
if should_close_conn and conn:
|
|
460
|
+
if conn == self._engine: # Only close when this method creates self._engine
|
|
461
|
+
conn.dispose()
|
|
462
|
+
self._engine = None
|
|
463
|
+
self._inspector = None # Also clear the inspector
|
|
464
|
+
elif conn != self._engine: # If it's a temporarily created conn and not self._engine
|
|
465
|
+
conn.dispose()
|
|
466
|
+
|
|
467
|
+
def sampleData(self, conceptName: str, count: int = 10) -> Messages:
|
|
468
|
+
"""Retrieve sample data from a MySQL data source for the specified Concept name.
|
|
469
|
+
|
|
470
|
+
It converts the Concept name back to a possible table name (assuming the naming convention used in the scan method),
|
|
471
|
+
then queries that table to retrieve the specified number of sample rows.
|
|
472
|
+
|
|
473
|
+
Args:
|
|
474
|
+
conceptName (str): The name of the Concept for which to retrieve sample data.
|
|
475
|
+
count (int): The number of sample rows to retrieve, defaults to 10.
|
|
476
|
+
|
|
477
|
+
Returns:
|
|
478
|
+
Messages: A list of dictionaries, each representing a row of data,
|
|
479
|
+
where keys are column names and values are corresponding data.
|
|
480
|
+
Returns an empty list if the Concept is not found or an error occurs.
|
|
481
|
+
"""
|
|
482
|
+
if count <= 0:
|
|
483
|
+
logger.info(
|
|
484
|
+
f"Sample count is {count}, returning empty list for concept '{conceptName}' in {self.name}."
|
|
485
|
+
)
|
|
486
|
+
return []
|
|
487
|
+
|
|
488
|
+
target_table_name: Optional[str] = None
|
|
489
|
+
actual_column_names: List[str] = []
|
|
490
|
+
|
|
491
|
+
try:
|
|
492
|
+
db_schema = self.get_schema() # This might connect if not connected.
|
|
493
|
+
if not db_schema:
|
|
494
|
+
logger.warning(
|
|
495
|
+
f"Could not retrieve schema for {self.name} to find concept '{conceptName}'."
|
|
496
|
+
)
|
|
497
|
+
return []
|
|
498
|
+
|
|
499
|
+
for table_name_from_schema, columns_details in db_schema.items():
|
|
500
|
+
# Ensure table_name_from_schema is a string for _camelCase
|
|
501
|
+
if not isinstance(table_name_from_schema, str):
|
|
502
|
+
logger.warning(
|
|
503
|
+
f"Skipping non-string table name in schema: {table_name_from_schema}"
|
|
504
|
+
)
|
|
505
|
+
continue
|
|
506
|
+
|
|
507
|
+
generated_concept_name = _camelCase(table_name_from_schema).capitalize()
|
|
508
|
+
if generated_concept_name == conceptName:
|
|
509
|
+
target_table_name = table_name_from_schema
|
|
510
|
+
actual_column_names = [
|
|
511
|
+
col_info["name"]
|
|
512
|
+
for col_info in columns_details
|
|
513
|
+
if col_info.get("name")
|
|
514
|
+
]
|
|
515
|
+
if not actual_column_names:
|
|
516
|
+
logger.warning(
|
|
517
|
+
f"Concept '{conceptName}' (Table '{target_table_name}') found in {self.name} but has no columns. Cannot sample data."
|
|
518
|
+
)
|
|
519
|
+
return [] # Cannot select data if no columns
|
|
520
|
+
break # Found the table
|
|
521
|
+
|
|
522
|
+
if not target_table_name:
|
|
523
|
+
logger.warning(
|
|
524
|
+
f"Concept '{conceptName}' not found as a discoverable table in datasource '{self.name}'."
|
|
525
|
+
)
|
|
526
|
+
return []
|
|
527
|
+
|
|
528
|
+
# Quoting column names and table name for the SQL query
|
|
529
|
+
quoted_column_names_str = ", ".join(
|
|
530
|
+
[f"`{col}`" for col in actual_column_names]
|
|
531
|
+
)
|
|
532
|
+
# Ensure target_table_name is just the name, not schema.name, etc.
|
|
533
|
+
# get_schema() returns table names as keys, so this should be fine.
|
|
534
|
+
sql_query = f"SELECT {quoted_column_names_str} FROM `{target_table_name}` LIMIT {count}"
|
|
535
|
+
|
|
536
|
+
logger.debug(
|
|
537
|
+
f"Executing sample data query for concept '{conceptName}' on {self.name}: {sql_query}"
|
|
538
|
+
)
|
|
539
|
+
query_result = self.executeQuery(
|
|
540
|
+
sql_query
|
|
541
|
+
) # fetchColumns is True by default
|
|
542
|
+
|
|
543
|
+
result_columns = query_result.get("columns", [])
|
|
544
|
+
result_data_rows = query_result.get("data", [])
|
|
545
|
+
|
|
546
|
+
if not result_columns and result_data_rows:
|
|
547
|
+
logger.warning(
|
|
548
|
+
f"Query for concept '{conceptName}' in {self.name} returned data but no column names. This might indicate an issue with executeQuery or the underlying table structure."
|
|
549
|
+
)
|
|
550
|
+
# Attempt to use actual_column_names if order and count match, but this is risky.
|
|
551
|
+
# Sticking to result_columns from executeQuery is safer.
|
|
552
|
+
|
|
553
|
+
formatted_samples: Messages = []
|
|
554
|
+
for row_tuple in result_data_rows:
|
|
555
|
+
processed_row = []
|
|
556
|
+
for item in row_tuple:
|
|
557
|
+
if isinstance(
|
|
558
|
+
item, (datetime.datetime, datetime.date, datetime.time)
|
|
559
|
+
):
|
|
560
|
+
processed_row.append(item.isoformat())
|
|
561
|
+
else:
|
|
562
|
+
processed_row.append(item)
|
|
563
|
+
|
|
564
|
+
if len(processed_row) == len(result_columns):
|
|
565
|
+
formatted_samples.append(dict(zip(result_columns, processed_row)))
|
|
566
|
+
else:
|
|
567
|
+
logger.warning(
|
|
568
|
+
f"Row data length mismatch for concept '{conceptName}' in {self.name}. "
|
|
569
|
+
f"Expected {len(result_columns)} columns based on query result, got {len(processed_row)}. Row: {processed_row}"
|
|
570
|
+
)
|
|
571
|
+
return formatted_samples
|
|
572
|
+
|
|
573
|
+
except ConnectionError as ce:
|
|
574
|
+
logger.error(
|
|
575
|
+
f"Connection error while fetching sample data for concept '{conceptName}' from {self.name}: {ce}"
|
|
576
|
+
)
|
|
577
|
+
return []
|
|
578
|
+
except Exception as e:
|
|
579
|
+
logger.error(
|
|
580
|
+
f"Error fetching sample data for concept '{conceptName}' (table: {target_table_name or 'unknown'}) from {self.name}: {e}",
|
|
581
|
+
exc_info=True,
|
|
582
|
+
)
|
|
583
|
+
return []
|
|
584
|
+
|
|
585
|
+
# Can add MySQL-specific methods, such as executing MySQL-specific queries
|
|
586
|
+
def execute_mysql_specific_query(self, query: str) -> Dict[str, Any]:
|
|
587
|
+
"""Execute a MySQL-specific query and return the results"""
|
|
588
|
+
if not self._engine:
|
|
589
|
+
self.connect()
|
|
590
|
+
if not self._engine:
|
|
591
|
+
raise ConnectionError(f"无法connect to到database: {self.name}")
|
|
592
|
+
|
|
593
|
+
cursor = None
|
|
594
|
+
try:
|
|
595
|
+
cursor = self._engine.connect().execute(text(query))
|
|
596
|
+
results = cursor.fetchall()
|
|
597
|
+
columns = [desc[0] for desc in cursor.description]
|
|
598
|
+
return {"columns": columns, "data": results}
|
|
599
|
+
except Exception as e:
|
|
600
|
+
logger.error(f"执行 MySQL 特定查询时出错 on {self.name}: {e}")
|
|
601
|
+
raise
|
|
602
|
+
finally:
|
|
603
|
+
if cursor:
|
|
604
|
+
cursor.close()
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
class DataSourceSqlite(DataSourceSql):
|
|
608
|
+
"""SQLite Data Source Implementation"""
|
|
609
|
+
|
|
610
|
+
def __init__(self, name: str, config: Dict[str, Any]):
|
|
611
|
+
# Pass the correct type DataSourceType.SQLITE directly
|
|
612
|
+
super().__init__(name, DataSourceType.SQLITE, config)
|
|
613
|
+
self._type = DataSourceType.SQLITE # Store specific types
|
|
614
|
+
# SQLite only needs the database file path
|
|
615
|
+
self.database_path = config.get(
|
|
616
|
+
"database",
|
|
617
|
+
config.get("path", config.get("file_path", config.get("database_path"))),
|
|
618
|
+
)
|
|
619
|
+
if not self.database_path:
|
|
620
|
+
raise ValueError("SQLite 数据源配置缺少database文件路径")
|
|
621
|
+
|
|
622
|
+
def connect(self) -> Engine:
|
|
623
|
+
"""Connect to SQLite database"""
|
|
624
|
+
if self._engine:
|
|
625
|
+
logger.debug(f"Already connected to {self.name} , reconnecting")
|
|
626
|
+
self.close()
|
|
627
|
+
|
|
628
|
+
try:
|
|
629
|
+
# Using SQLite connection strings
|
|
630
|
+
connection_url = f"sqlite:///{self.database_path}"
|
|
631
|
+
|
|
632
|
+
# Use NullPool to disable connection pooling, which is simpler and more reliable in testing environments.
|
|
633
|
+
self._engine = create_engine(
|
|
634
|
+
connection_url,
|
|
635
|
+
poolclass=NullPool, # Disable connection pooling
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
self._inspector = inspect(self._engine)
|
|
639
|
+
logger.info(
|
|
640
|
+
f"Successfully connected to SQLite database: {self.name} at {self.database_path}"
|
|
641
|
+
)
|
|
642
|
+
return self._engine
|
|
643
|
+
except Exception as e:
|
|
644
|
+
logger.error(f"connect to SQLite databasefailed {self.name}: {e}")
|
|
645
|
+
self._engine = None
|
|
646
|
+
raise ConnectionError(f"无法connect to到 SQLite database: {self.name}, {e}")
|
|
647
|
+
|
|
648
|
+
def get_schema(self) -> Dict[str, List[Dict[str, str]]]:
|
|
649
|
+
"""Get schema information of SQLite database (table name -> list of column information)"""
|
|
650
|
+
if self.schema:
|
|
651
|
+
return self.schema
|
|
652
|
+
|
|
653
|
+
conn = self._engine
|
|
654
|
+
should_close_conn = False
|
|
655
|
+
if not conn:
|
|
656
|
+
conn = self.connect() # Try to connect
|
|
657
|
+
if not conn:
|
|
658
|
+
raise ConnectionError(f"无法Get模式,database未connect to: {self.name}")
|
|
659
|
+
should_close_conn = True
|
|
660
|
+
|
|
661
|
+
schema: Dict[str, List[Dict[str, str]]] = {}
|
|
662
|
+
try:
|
|
663
|
+
# Use self._inspector to get the table name, which is more in line with SQLAlchemy's approach
|
|
664
|
+
if not self._inspector: # Ensure inspector exists
|
|
665
|
+
if not conn: # If conn was not previously established successfully
|
|
666
|
+
conn = self.connect()
|
|
667
|
+
if not conn:
|
|
668
|
+
raise ConnectionError(
|
|
669
|
+
f"无法Get模式,database未connect to: {self.name}"
|
|
670
|
+
)
|
|
671
|
+
self._inspector = inspect(conn)
|
|
672
|
+
|
|
673
|
+
tables = self._inspector.get_table_names()
|
|
674
|
+
|
|
675
|
+
# Get column names and types for each table
|
|
676
|
+
for table_name in tables:
|
|
677
|
+
# Use self._inspector to get column information
|
|
678
|
+
columns_info = self._inspector.get_columns(table_name)
|
|
679
|
+
|
|
680
|
+
current_table_cols = []
|
|
681
|
+
for column_data in columns_info:
|
|
682
|
+
# column_data is a dictionary containing keys such as 'name', 'type', 'nullable', 'default', etc.
|
|
683
|
+
# 'type' is usually a SQLAlchemy type object, which needs to be converted to a string
|
|
684
|
+
col_name = column_data.get("name")
|
|
685
|
+
col_type_obj = column_data.get("type")
|
|
686
|
+
|
|
687
|
+
if col_name and col_type_obj is not None: # Ensure that column names and types exist
|
|
688
|
+
# Convert SQLAlchemy type objects to their string representation
|
|
689
|
+
col_type_str = str(col_type_obj)
|
|
690
|
+
current_table_cols.append(
|
|
691
|
+
{"name": col_name, "type": col_type_str}
|
|
692
|
+
)
|
|
693
|
+
elif col_name: # Unknown type, but column name exists
|
|
694
|
+
current_table_cols.append(
|
|
695
|
+
{"name": col_name, "type": "UNKNOWN"}
|
|
696
|
+
) # Or record a default value
|
|
697
|
+
logger.warning(
|
|
698
|
+
f"Column '{col_name}' in table '{table_name}' has an unknown type."
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
if current_table_cols: # Only add to schema when the table has columns
|
|
702
|
+
schema[table_name] = current_table_cols
|
|
703
|
+
else:
|
|
704
|
+
logger.info(
|
|
705
|
+
f"Table '{table_name}' has no columns or columns could not be retrieved."
|
|
706
|
+
)
|
|
707
|
+
|
|
708
|
+
logger.debug(f"Get到 {self.name} schema: {len(schema)} tables")
|
|
709
|
+
self.schema = schema
|
|
710
|
+
return schema
|
|
711
|
+
except Exception as err: # Catch all database-related errors
|
|
712
|
+
logger.error(f"Get SQLite 模式failed {self.name}: {err}")
|
|
713
|
+
# Ensure that if the connection is temporarily open, it will be closed when an error occurs.
|
|
714
|
+
if (
|
|
715
|
+
should_close_conn and conn and conn == self._engine
|
|
716
|
+
): # If this connection was specifically opened for this method
|
|
717
|
+
conn.dispose()
|
|
718
|
+
self._engine = None # Reset engine state
|
|
719
|
+
self._inspector = None # Reset inspector
|
|
720
|
+
raise RuntimeError(f"Get SQLite 模式failed: {err}") from err
|
|
721
|
+
finally:
|
|
722
|
+
if should_close_conn and conn:
|
|
723
|
+
if conn == self._engine: # Only close when this method creates self._engine
|
|
724
|
+
conn.dispose()
|
|
725
|
+
self._engine = None
|
|
726
|
+
self._inspector = None # Also clear the inspector
|
|
727
|
+
elif conn != self._engine: # If it's a temporarily created conn and not self._engine
|
|
728
|
+
conn.dispose()
|
|
729
|
+
|
|
730
|
+
def sampleData(self, conceptName: str, count: int = 10) -> Messages:
|
|
731
|
+
"""Retrieve sample data from an SQLite data source for a specified Concept name.
|
|
732
|
+
|
|
733
|
+
It converts the Concept name back to a possible table name (assuming the naming convention used in the scan method),
|
|
734
|
+
then queries that table to retrieve the specified number of sample rows.
|
|
735
|
+
|
|
736
|
+
Args:
|
|
737
|
+
conceptName (str): The name of the Concept for which to retrieve sample data.
|
|
738
|
+
count (int): The number of sample rows to retrieve, defaults to 10.
|
|
739
|
+
|
|
740
|
+
Returns:
|
|
741
|
+
Messages: A list of dictionaries, each representing a row of data,
|
|
742
|
+
where keys are column names and values are corresponding data.
|
|
743
|
+
Returns an empty list if the Concept is not found or an error occurs.
|
|
744
|
+
"""
|
|
745
|
+
if count <= 0:
|
|
746
|
+
logger.info(
|
|
747
|
+
f"Sample count is {count}, returning empty list for concept '{conceptName}' in {self.name}."
|
|
748
|
+
)
|
|
749
|
+
return []
|
|
750
|
+
|
|
751
|
+
target_table_name: Optional[str] = None
|
|
752
|
+
actual_column_names: List[str] = []
|
|
753
|
+
|
|
754
|
+
try:
|
|
755
|
+
db_schema = self.get_schema() # This might connect if not connected.
|
|
756
|
+
if not db_schema:
|
|
757
|
+
logger.warning(
|
|
758
|
+
f"Could not retrieve schema for {self.name} to find concept '{conceptName}'."
|
|
759
|
+
)
|
|
760
|
+
return []
|
|
761
|
+
|
|
762
|
+
for table_name_from_schema, columns_details in db_schema.items():
|
|
763
|
+
# Ensure table_name_from_schema is a string for _camelCase
|
|
764
|
+
if not isinstance(table_name_from_schema, str):
|
|
765
|
+
logger.warning(
|
|
766
|
+
f"Skipping non-string table name in schema: {table_name_from_schema}"
|
|
767
|
+
)
|
|
768
|
+
continue
|
|
769
|
+
|
|
770
|
+
generated_concept_name = _camelCase(table_name_from_schema).capitalize()
|
|
771
|
+
if generated_concept_name == conceptName:
|
|
772
|
+
target_table_name = table_name_from_schema
|
|
773
|
+
actual_column_names = [
|
|
774
|
+
col_info["name"]
|
|
775
|
+
for col_info in columns_details
|
|
776
|
+
if col_info.get("name")
|
|
777
|
+
]
|
|
778
|
+
if not actual_column_names:
|
|
779
|
+
logger.warning(
|
|
780
|
+
f"Concept '{conceptName}' (Table '{target_table_name}') found in {self.name} but has no columns. Cannot sample data."
|
|
781
|
+
)
|
|
782
|
+
return [] # Cannot select data if no columns
|
|
783
|
+
break # Found the table
|
|
784
|
+
|
|
785
|
+
if not target_table_name:
|
|
786
|
+
logger.warning(
|
|
787
|
+
f"Concept '{conceptName}' not found as a discoverable table in datasource '{self.name}'."
|
|
788
|
+
)
|
|
789
|
+
return []
|
|
790
|
+
|
|
791
|
+
# SQLite uses double quotes or square brackets to quote identifiers, but special quoting is usually not needed.
|
|
792
|
+
quoted_column_names_str = ", ".join(
|
|
793
|
+
[f'"{col}"' for col in actual_column_names]
|
|
794
|
+
)
|
|
795
|
+
sql_query = f'SELECT {quoted_column_names_str} FROM "{target_table_name}" LIMIT {count}'
|
|
796
|
+
|
|
797
|
+
logger.debug(
|
|
798
|
+
f"Executing sample data query for concept '{conceptName}' on {self.name}: {sql_query}"
|
|
799
|
+
)
|
|
800
|
+
query_result = self.executeQuery(
|
|
801
|
+
sql_query
|
|
802
|
+
) # fetchColumns is True by default
|
|
803
|
+
|
|
804
|
+
result_columns = query_result.get("columns", [])
|
|
805
|
+
result_data_rows = query_result.get("data", [])
|
|
806
|
+
|
|
807
|
+
if not result_columns and result_data_rows:
|
|
808
|
+
logger.warning(
|
|
809
|
+
f"Query for concept '{conceptName}' in {self.name} returned data but no column names. This might indicate an issue with executeQuery or the underlying table structure."
|
|
810
|
+
)
|
|
811
|
+
|
|
812
|
+
formatted_samples: Messages = []
|
|
813
|
+
for row_tuple in result_data_rows:
|
|
814
|
+
processed_row = []
|
|
815
|
+
for item in row_tuple:
|
|
816
|
+
if isinstance(
|
|
817
|
+
item, (datetime.datetime, datetime.date, datetime.time)
|
|
818
|
+
):
|
|
819
|
+
processed_row.append(item.isoformat())
|
|
820
|
+
else:
|
|
821
|
+
processed_row.append(item)
|
|
822
|
+
|
|
823
|
+
if len(processed_row) == len(result_columns):
|
|
824
|
+
formatted_samples.append(dict(zip(result_columns, processed_row)))
|
|
825
|
+
else:
|
|
826
|
+
logger.warning(
|
|
827
|
+
f"Row data length mismatch for concept '{conceptName}' in {self.name}. "
|
|
828
|
+
f"Expected {len(result_columns)} columns based on query result, got {len(processed_row)}. Row: {processed_row}"
|
|
829
|
+
)
|
|
830
|
+
return formatted_samples
|
|
831
|
+
|
|
832
|
+
except ConnectionError as ce:
|
|
833
|
+
logger.error(
|
|
834
|
+
f"Connection error while fetching sample data for concept '{conceptName}' from {self.name}: {ce}"
|
|
835
|
+
)
|
|
836
|
+
return []
|
|
837
|
+
except Exception as e:
|
|
838
|
+
logger.error(
|
|
839
|
+
f"Error fetching sample data for concept '{conceptName}' (table: {target_table_name or 'unknown'}) from {self.name}: {e}",
|
|
840
|
+
exc_info=True,
|
|
841
|
+
)
|
|
842
|
+
return []
|
|
843
|
+
|
|
844
|
+
|
|
845
|
+
# Import Oracle datasource
|