kweaver-dolphin 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. DolphinLanguageSDK/__init__.py +58 -0
  2. dolphin/__init__.py +62 -0
  3. dolphin/cli/__init__.py +20 -0
  4. dolphin/cli/args/__init__.py +9 -0
  5. dolphin/cli/args/parser.py +567 -0
  6. dolphin/cli/builtin_agents/__init__.py +22 -0
  7. dolphin/cli/commands/__init__.py +4 -0
  8. dolphin/cli/interrupt/__init__.py +8 -0
  9. dolphin/cli/interrupt/handler.py +205 -0
  10. dolphin/cli/interrupt/keyboard.py +82 -0
  11. dolphin/cli/main.py +49 -0
  12. dolphin/cli/multimodal/__init__.py +34 -0
  13. dolphin/cli/multimodal/clipboard.py +327 -0
  14. dolphin/cli/multimodal/handler.py +249 -0
  15. dolphin/cli/multimodal/image_processor.py +214 -0
  16. dolphin/cli/multimodal/input_parser.py +149 -0
  17. dolphin/cli/runner/__init__.py +8 -0
  18. dolphin/cli/runner/runner.py +989 -0
  19. dolphin/cli/ui/__init__.py +10 -0
  20. dolphin/cli/ui/console.py +2795 -0
  21. dolphin/cli/ui/input.py +340 -0
  22. dolphin/cli/ui/layout.py +425 -0
  23. dolphin/cli/ui/stream_renderer.py +302 -0
  24. dolphin/cli/utils/__init__.py +8 -0
  25. dolphin/cli/utils/helpers.py +135 -0
  26. dolphin/cli/utils/version.py +49 -0
  27. dolphin/core/__init__.py +107 -0
  28. dolphin/core/agent/__init__.py +10 -0
  29. dolphin/core/agent/agent_state.py +69 -0
  30. dolphin/core/agent/base_agent.py +970 -0
  31. dolphin/core/code_block/__init__.py +0 -0
  32. dolphin/core/code_block/agent_init_block.py +0 -0
  33. dolphin/core/code_block/assign_block.py +98 -0
  34. dolphin/core/code_block/basic_code_block.py +1865 -0
  35. dolphin/core/code_block/explore_block.py +1327 -0
  36. dolphin/core/code_block/explore_block_v2.py +712 -0
  37. dolphin/core/code_block/explore_strategy.py +672 -0
  38. dolphin/core/code_block/judge_block.py +220 -0
  39. dolphin/core/code_block/prompt_block.py +32 -0
  40. dolphin/core/code_block/skill_call_deduplicator.py +291 -0
  41. dolphin/core/code_block/tool_block.py +129 -0
  42. dolphin/core/common/__init__.py +17 -0
  43. dolphin/core/common/constants.py +176 -0
  44. dolphin/core/common/enums.py +1173 -0
  45. dolphin/core/common/exceptions.py +133 -0
  46. dolphin/core/common/multimodal.py +539 -0
  47. dolphin/core/common/object_type.py +165 -0
  48. dolphin/core/common/output_format.py +432 -0
  49. dolphin/core/common/types.py +36 -0
  50. dolphin/core/config/__init__.py +16 -0
  51. dolphin/core/config/global_config.py +1289 -0
  52. dolphin/core/config/ontology_config.py +133 -0
  53. dolphin/core/context/__init__.py +12 -0
  54. dolphin/core/context/context.py +1580 -0
  55. dolphin/core/context/context_manager.py +161 -0
  56. dolphin/core/context/var_output.py +82 -0
  57. dolphin/core/context/variable_pool.py +356 -0
  58. dolphin/core/context_engineer/__init__.py +41 -0
  59. dolphin/core/context_engineer/config/__init__.py +5 -0
  60. dolphin/core/context_engineer/config/settings.py +402 -0
  61. dolphin/core/context_engineer/core/__init__.py +7 -0
  62. dolphin/core/context_engineer/core/budget_manager.py +327 -0
  63. dolphin/core/context_engineer/core/context_assembler.py +583 -0
  64. dolphin/core/context_engineer/core/context_manager.py +637 -0
  65. dolphin/core/context_engineer/core/tokenizer_service.py +260 -0
  66. dolphin/core/context_engineer/example/incremental_example.py +267 -0
  67. dolphin/core/context_engineer/example/traditional_example.py +334 -0
  68. dolphin/core/context_engineer/services/__init__.py +5 -0
  69. dolphin/core/context_engineer/services/compressor.py +399 -0
  70. dolphin/core/context_engineer/utils/__init__.py +6 -0
  71. dolphin/core/context_engineer/utils/context_utils.py +441 -0
  72. dolphin/core/context_engineer/utils/message_formatter.py +270 -0
  73. dolphin/core/context_engineer/utils/token_utils.py +139 -0
  74. dolphin/core/coroutine/__init__.py +15 -0
  75. dolphin/core/coroutine/context_snapshot.py +154 -0
  76. dolphin/core/coroutine/context_snapshot_profile.py +922 -0
  77. dolphin/core/coroutine/context_snapshot_store.py +268 -0
  78. dolphin/core/coroutine/execution_frame.py +145 -0
  79. dolphin/core/coroutine/execution_state_registry.py +161 -0
  80. dolphin/core/coroutine/resume_handle.py +101 -0
  81. dolphin/core/coroutine/step_result.py +101 -0
  82. dolphin/core/executor/__init__.py +18 -0
  83. dolphin/core/executor/debug_controller.py +630 -0
  84. dolphin/core/executor/dolphin_executor.py +1063 -0
  85. dolphin/core/executor/executor.py +624 -0
  86. dolphin/core/flags/__init__.py +27 -0
  87. dolphin/core/flags/definitions.py +49 -0
  88. dolphin/core/flags/manager.py +113 -0
  89. dolphin/core/hook/__init__.py +95 -0
  90. dolphin/core/hook/expression_evaluator.py +499 -0
  91. dolphin/core/hook/hook_dispatcher.py +380 -0
  92. dolphin/core/hook/hook_types.py +248 -0
  93. dolphin/core/hook/isolated_variable_pool.py +284 -0
  94. dolphin/core/interfaces.py +53 -0
  95. dolphin/core/llm/__init__.py +0 -0
  96. dolphin/core/llm/llm.py +495 -0
  97. dolphin/core/llm/llm_call.py +100 -0
  98. dolphin/core/llm/llm_client.py +1285 -0
  99. dolphin/core/llm/message_sanitizer.py +120 -0
  100. dolphin/core/logging/__init__.py +20 -0
  101. dolphin/core/logging/logger.py +526 -0
  102. dolphin/core/message/__init__.py +8 -0
  103. dolphin/core/message/compressor.py +749 -0
  104. dolphin/core/parser/__init__.py +8 -0
  105. dolphin/core/parser/parser.py +405 -0
  106. dolphin/core/runtime/__init__.py +10 -0
  107. dolphin/core/runtime/runtime_graph.py +926 -0
  108. dolphin/core/runtime/runtime_instance.py +446 -0
  109. dolphin/core/skill/__init__.py +14 -0
  110. dolphin/core/skill/context_retention.py +157 -0
  111. dolphin/core/skill/skill_function.py +686 -0
  112. dolphin/core/skill/skill_matcher.py +282 -0
  113. dolphin/core/skill/skillkit.py +700 -0
  114. dolphin/core/skill/skillset.py +72 -0
  115. dolphin/core/trajectory/__init__.py +10 -0
  116. dolphin/core/trajectory/recorder.py +189 -0
  117. dolphin/core/trajectory/trajectory.py +522 -0
  118. dolphin/core/utils/__init__.py +9 -0
  119. dolphin/core/utils/cache_kv.py +212 -0
  120. dolphin/core/utils/tools.py +340 -0
  121. dolphin/lib/__init__.py +93 -0
  122. dolphin/lib/debug/__init__.py +8 -0
  123. dolphin/lib/debug/visualizer.py +409 -0
  124. dolphin/lib/memory/__init__.py +28 -0
  125. dolphin/lib/memory/async_processor.py +220 -0
  126. dolphin/lib/memory/llm_calls.py +195 -0
  127. dolphin/lib/memory/manager.py +78 -0
  128. dolphin/lib/memory/sandbox.py +46 -0
  129. dolphin/lib/memory/storage.py +245 -0
  130. dolphin/lib/memory/utils.py +51 -0
  131. dolphin/lib/ontology/__init__.py +12 -0
  132. dolphin/lib/ontology/basic/__init__.py +0 -0
  133. dolphin/lib/ontology/basic/base.py +102 -0
  134. dolphin/lib/ontology/basic/concept.py +130 -0
  135. dolphin/lib/ontology/basic/object.py +11 -0
  136. dolphin/lib/ontology/basic/relation.py +63 -0
  137. dolphin/lib/ontology/datasource/__init__.py +27 -0
  138. dolphin/lib/ontology/datasource/datasource.py +66 -0
  139. dolphin/lib/ontology/datasource/oracle_datasource.py +338 -0
  140. dolphin/lib/ontology/datasource/sql.py +845 -0
  141. dolphin/lib/ontology/mapping.py +177 -0
  142. dolphin/lib/ontology/ontology.py +733 -0
  143. dolphin/lib/ontology/ontology_context.py +16 -0
  144. dolphin/lib/ontology/ontology_manager.py +107 -0
  145. dolphin/lib/skill_results/__init__.py +31 -0
  146. dolphin/lib/skill_results/cache_backend.py +559 -0
  147. dolphin/lib/skill_results/result_processor.py +181 -0
  148. dolphin/lib/skill_results/result_reference.py +179 -0
  149. dolphin/lib/skill_results/skillkit_hook.py +324 -0
  150. dolphin/lib/skill_results/strategies.py +328 -0
  151. dolphin/lib/skill_results/strategy_registry.py +150 -0
  152. dolphin/lib/skillkits/__init__.py +44 -0
  153. dolphin/lib/skillkits/agent_skillkit.py +155 -0
  154. dolphin/lib/skillkits/cognitive_skillkit.py +82 -0
  155. dolphin/lib/skillkits/env_skillkit.py +250 -0
  156. dolphin/lib/skillkits/mcp_adapter.py +616 -0
  157. dolphin/lib/skillkits/mcp_skillkit.py +771 -0
  158. dolphin/lib/skillkits/memory_skillkit.py +650 -0
  159. dolphin/lib/skillkits/noop_skillkit.py +31 -0
  160. dolphin/lib/skillkits/ontology_skillkit.py +89 -0
  161. dolphin/lib/skillkits/plan_act_skillkit.py +452 -0
  162. dolphin/lib/skillkits/resource/__init__.py +52 -0
  163. dolphin/lib/skillkits/resource/models/__init__.py +6 -0
  164. dolphin/lib/skillkits/resource/models/skill_config.py +109 -0
  165. dolphin/lib/skillkits/resource/models/skill_meta.py +127 -0
  166. dolphin/lib/skillkits/resource/resource_skillkit.py +393 -0
  167. dolphin/lib/skillkits/resource/skill_cache.py +215 -0
  168. dolphin/lib/skillkits/resource/skill_loader.py +395 -0
  169. dolphin/lib/skillkits/resource/skill_validator.py +406 -0
  170. dolphin/lib/skillkits/resource_skillkit.py +11 -0
  171. dolphin/lib/skillkits/search_skillkit.py +163 -0
  172. dolphin/lib/skillkits/sql_skillkit.py +274 -0
  173. dolphin/lib/skillkits/system_skillkit.py +509 -0
  174. dolphin/lib/skillkits/vm_skillkit.py +65 -0
  175. dolphin/lib/utils/__init__.py +9 -0
  176. dolphin/lib/utils/data_process.py +207 -0
  177. dolphin/lib/utils/handle_progress.py +178 -0
  178. dolphin/lib/utils/security.py +139 -0
  179. dolphin/lib/utils/text_retrieval.py +462 -0
  180. dolphin/lib/vm/__init__.py +11 -0
  181. dolphin/lib/vm/env_executor.py +895 -0
  182. dolphin/lib/vm/python_session_manager.py +453 -0
  183. dolphin/lib/vm/vm.py +610 -0
  184. dolphin/sdk/__init__.py +60 -0
  185. dolphin/sdk/agent/__init__.py +12 -0
  186. dolphin/sdk/agent/agent_factory.py +236 -0
  187. dolphin/sdk/agent/dolphin_agent.py +1106 -0
  188. dolphin/sdk/api/__init__.py +4 -0
  189. dolphin/sdk/runtime/__init__.py +8 -0
  190. dolphin/sdk/runtime/env.py +363 -0
  191. dolphin/sdk/skill/__init__.py +10 -0
  192. dolphin/sdk/skill/global_skills.py +706 -0
  193. dolphin/sdk/skill/traditional_toolkit.py +260 -0
  194. kweaver_dolphin-0.1.0.dist-info/METADATA +521 -0
  195. kweaver_dolphin-0.1.0.dist-info/RECORD +199 -0
  196. kweaver_dolphin-0.1.0.dist-info/WHEEL +5 -0
  197. kweaver_dolphin-0.1.0.dist-info/entry_points.txt +27 -0
  198. kweaver_dolphin-0.1.0.dist-info/licenses/LICENSE.txt +201 -0
  199. kweaver_dolphin-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,845 @@
1
+ from abc import abstractmethod
2
+ from typing import Any, Dict, List, Optional
3
+ import re
4
+ import datetime
5
+
6
+ from sqlalchemy import create_engine, inspect, text
7
+ from sqlalchemy.engine import Engine
8
+
9
+ # Add NullPool import
10
+ from sqlalchemy.pool import NullPool
11
+
12
+ from dolphin.core.common.enums import Messages
13
+ from dolphin.lib.ontology.basic.concept import ConceptMemberType, Concept
14
+ from dolphin.lib.ontology.mapping import Mapping
15
+ from dolphin.lib.ontology.datasource.datasource import DataSource
16
+ from dolphin.lib.ontology.datasource.datasource import DataSourceType
17
+
18
+ from dolphin.core.logging.logger import get_logger
19
+
20
+ logger = get_logger("ontology")
21
+
22
+
23
+ def _camelCase(s: str) -> str:
24
+ """Convert a string separated by underscores or spaces to camel case.
25
+
26
+ Args:
27
+ s: The input string to convert.
28
+
29
+ Returns:
30
+ The camel case version of the input string.
31
+ """
32
+ s = re.sub(r"[_\-]+", " ", s).title().replace(" ", "")
33
+ return s[0].lower() + s[1:] if s else ""
34
+
35
+
36
+ class DataSourceSql(DataSource):
37
+ """Base class for SQL type data sources, using SQLAlchemy"""
38
+
39
+ def __init__(self, name: str, type: DataSourceType, config: Dict[str, Any]):
40
+ super().__init__(name, type, config)
41
+ self._engine: Engine = None
42
+ self._inspector = None
43
+ self.host = config.get("host", "localhost")
44
+ self.port = config.get("port", 3306) # Default MySQL port
45
+ self.username = config.get("username")
46
+ self.password = config.get("password")
47
+ self.database = config.get("database")
48
+
49
+ @property
50
+ def type(self) -> DataSourceType:
51
+ return self._type # Return the stored type
52
+
53
+ @abstractmethod
54
+ def connect(self) -> Engine:
55
+ """Establish database connection"""
56
+ pass
57
+
58
+ @abstractmethod
59
+ def get_schema(self) -> Dict[str, List[Dict[str, str]]]:
60
+ """Get database schema (table name -> list of column info, each column info is {'name': column name, 'type': type string})"""
61
+ pass
62
+
63
+ def close(self) -> None:
64
+ """Close database connection"""
65
+ if self._engine:
66
+ try:
67
+ self._engine.dispose()
68
+ logger.info(f"databaseconnect to已关闭: {self.name}")
69
+ self._engine = None
70
+ except Exception as e:
71
+ logger.error(f"关闭databaseconnect to时出错 {self.name}: {e}")
72
+ else:
73
+ logger.warning(f"尝试关闭一个未建立或已关闭的connect to: {self.name}")
74
+
75
+ def test_connection(self) -> bool:
76
+ """Test whether the database connection is successful"""
77
+ originalConnectionState = self._engine
78
+ connToClose = None
79
+ try:
80
+ if not self._engine:
81
+ connToClose = self.connect()
82
+ if not self._engine:
83
+ logger.warning(f"测试connect to {self.name} failed:无法建立connect to")
84
+ return False
85
+ # Simple test query
86
+ self.executeQuery(
87
+ "SELECT 1", fetchColumns=False
88
+ ) # test_connection does not care about the result
89
+ logger.info(f"测试connect to {self.name} successful")
90
+ return True
91
+ except Exception as e:
92
+ logger.error(f"测试connect tofailed {self.name}: {e}")
93
+ return False
94
+ finally:
95
+ # If the connection is temporarily established for testing purposes, close it.
96
+ if connToClose and connToClose == self._engine:
97
+ self.close()
98
+ # Restore the original connection status (if there was already a connection before the test)
99
+ elif originalConnectionState and not self._engine:
100
+ self._engine = originalConnectionState # Avoid affecting subsequent operations
101
+
102
+ # SQL-specific methods can be added, such as executing SQL statements
103
+ def executeQuery(self, query: str, fetchColumns: bool = True) -> Dict[str, Any]:
104
+ """Execute an SQL query and return the results.
105
+
106
+ Args:
107
+ query (str): The SQL query statement to execute
108
+ fetchColumns (bool): Whether to retrieve column name information, default is True
109
+
110
+ Returns:
111
+ Dict[str, Any]: A dictionary containing the query results and column names (if fetchColumns is True)
112
+ """
113
+ conn = self._engine
114
+ shouldCloseConn = False
115
+ if not conn:
116
+ conn = self.connect()
117
+ if not conn:
118
+ raise ConnectionError(f"无法connect to到database: {self.name}")
119
+ shouldCloseConn = True # If it's a temporary connection, close it after use.
120
+
121
+ cursor = None
122
+ try:
123
+ cursor = conn.connect().execute(text(query))
124
+ results = cursor.fetchall()
125
+ if fetchColumns:
126
+ # Use cursor.keys() to get column names from SQLAlchemy's Result object
127
+ # The Result object (cursor) itself doesn't have a 'description' attribute directly
128
+ columns = list(cursor.keys())
129
+ return {"columns": columns, "data": results}
130
+ return {
131
+ "columns": [],
132
+ "data": results,
133
+ } # Return empty columns if not fetching
134
+ except Exception as e:
135
+ logger.error(f"Error executing query on {self.name}: {e}")
136
+ raise
137
+ finally:
138
+ if cursor:
139
+ cursor.close()
140
+ if shouldCloseConn and conn:
141
+ conn.dispose()
142
+ if conn == self._engine:
143
+ self._engine = None # Ensure internal state consistency
144
+
145
+ def _map_db_type_to_concept_type(self, db_type_full: str) -> ConceptMemberType:
146
+ """Map database-specific column type strings to ConceptMemberType.
147
+
148
+ Args:
149
+ column_type (str): The database-specific column type string.
150
+ Returns:
151
+ ConceptMemberType: The corresponding ConceptMemberType."""
152
+ logger.debug(
153
+ f"Mapping DB type: Original='{db_type_full}', Type='{type(db_type_full)}'"
154
+ )
155
+ try:
156
+ if not db_type_full or not isinstance(
157
+ db_type_full, str
158
+ ): # Ensure it's a non-empty string
159
+ logger.warning(
160
+ f"Invalid db_type_full: '{db_type_full}'. Defaulting to ANY."
161
+ )
162
+ return ConceptMemberType.ANY
163
+
164
+ # Extract the main type part before the parentheses and convert it to lowercase
165
+ db_type_main = db_type_full.split("(")[0].strip().lower()
166
+
167
+ result_type = ConceptMemberType.ANY # Default
168
+
169
+ if (
170
+ "char" in db_type_main
171
+ or "varchar" in db_type_main
172
+ or "varchar2" in db_type_main # Oracle VARCHAR2
173
+ or "text" in db_type_main
174
+ or "string" in db_type_main
175
+ or "enum" in db_type_main
176
+ or "set" in db_type_main
177
+ or "clob" in db_type_main # Oracle CLOB
178
+ ):
179
+ result_type = ConceptMemberType.STRING
180
+ elif (
181
+ "int" in db_type_main
182
+ or "integer" in db_type_main
183
+ or "tinyint" in db_type_main
184
+ or "smallint" in db_type_main
185
+ or "mediumint" in db_type_main
186
+ or "bigint" in db_type_main
187
+ ):
188
+ result_type = ConceptMemberType.NUMBER
189
+ elif (
190
+ "float" in db_type_main
191
+ or "double" in db_type_main
192
+ or "decimal" in db_type_main
193
+ or "numeric" in db_type_main
194
+ or "real" in db_type_main
195
+ or "number" in db_type_main # Oracle NUMBER type
196
+ ):
197
+ result_type = ConceptMemberType.NUMBER
198
+ elif "bool" in db_type_main or "boolean" in db_type_main:
199
+ result_type = ConceptMemberType.BOOLEAN
200
+ elif (
201
+ "date" in db_type_main
202
+ or "datetime" in db_type_main
203
+ or "timestamp" in db_type_main
204
+ or "time" in db_type_main
205
+ or "year" in db_type_main
206
+ ):
207
+ result_type = (
208
+ ConceptMemberType.STRING
209
+ ) # Or a more specific date/time type if available
210
+ elif "json" in db_type_main:
211
+ result_type = (
212
+ ConceptMemberType.STRING
213
+ ) # Or OBJECT if handling structured JSON
214
+
215
+ if (
216
+ result_type == ConceptMemberType.ANY
217
+ and db_type_main
218
+ not in [
219
+ "unknown",
220
+ "",
221
+ ]
222
+ ): # Log if no specific mapping found, unless it was already 'unknown' or empty
223
+ logger.warning(
224
+ f"Unknown DB type: '{db_type_full}' (main: '{db_type_main}'), defaulted to ANY."
225
+ )
226
+
227
+ logger.debug(
228
+ f"Mapped DB type '{db_type_full}' to ConceptMemberType '{result_type.name if result_type else 'None'}' (Python type: {type(result_type)})"
229
+ )
230
+ return result_type
231
+ except Exception as e:
232
+ logger.error(
233
+ f"Error mapping DB type '{db_type_full}': {e}. Defaulting to ANY.",
234
+ exc_info=True,
235
+ )
236
+ return ConceptMemberType.ANY
237
+
238
+ def scan(self) -> List[Mapping]:
239
+ """Scan SQL database schema and generate Concept and Mapping for each table"""
240
+ logger.info(f"Starting to scan data source: {self.name}")
241
+ mappings = []
242
+ try:
243
+ schema = self.get_schema()
244
+ if not schema:
245
+ logger.warning(f"无法Get数据源 {self.name} schema 信息")
246
+ return []
247
+
248
+ for table_name, columns_details in schema.items():
249
+ # 1. Create Concept
250
+ concept_name = _camelCase(table_name).capitalize()
251
+
252
+ members = {}
253
+ valid_columns_for_mapping = []
254
+ if not columns_details:
255
+ logger.warning(
256
+ f"表 '{table_name}' 在 {self.name} 中没有列信息,跳过"
257
+ )
258
+ continue
259
+
260
+ for col_detail in columns_details:
261
+ col_name = col_detail.get("name")
262
+ col_type_str = col_detail.get("type")
263
+
264
+ if not col_name: # Skip entries without column names
265
+ logger.debug(
266
+ f"Skipping column with no name in table '{table_name}'. Detail: {col_detail}"
267
+ )
268
+ continue
269
+
270
+ member_name = _camelCase(col_name)
271
+ member_type = self._map_db_type_to_concept_type(col_type_str)
272
+ members[member_name] = member_type
273
+ valid_columns_for_mapping.append(col_name)
274
+
275
+ if not members:
276
+ logger.warning(
277
+ f"表 '{table_name}' 在 {self.name} 中没有可转换为成员的有效列,跳过创建 Concept"
278
+ )
279
+ continue
280
+
281
+ try:
282
+ concept = Concept(name=concept_name, members=members)
283
+ logger.debug(
284
+ f"为表 '{table_name}' 创建了 Concept: {concept_name} with members: {members}"
285
+ )
286
+ except ValueError as e:
287
+ logger.error(
288
+ f"为表 '{table_name}' 创建 Concept '{concept_name}' failed: {e}"
289
+ )
290
+ continue
291
+
292
+ # 2. Create Mapping
293
+ # Field to member mapping: column name -> camelCase member name
294
+ # Use valid_columns_for_mapping to ensure only existing columns are mapped
295
+ fieldToMemberMap = {
296
+ col: _camelCase(col) for col in valid_columns_for_mapping
297
+ }
298
+ if not fieldToMemberMap: # Theoretically, if members has content, there should also be content here
299
+ logger.warning(
300
+ f"表 '{table_name}' 在 {self.name} 中没有有效列名可映射,跳过创建 Mapping"
301
+ )
302
+ continue
303
+
304
+ try:
305
+ mapping = Mapping(
306
+ dataSource=self,
307
+ space=table_name,
308
+ concept=concept,
309
+ fieldToMemberMap=fieldToMemberMap,
310
+ )
311
+ mappings.append(mapping)
312
+ logger.debug(f"为 Concept '{concept_name}' 创建了 Mapping")
313
+ except (ValueError, TypeError) as e:
314
+ logger.error(f"为 Concept '{concept_name}' 创建 Mapping failed: {e}")
315
+
316
+ logger.info(
317
+ f"数据源 {self.name} 扫描完成,生成了 {len(mappings)} 个 Mappings"
318
+ )
319
+ return mappings
320
+
321
+ except ConnectionError as e:
322
+ logger.error(f"扫描数据源 {self.name} failed:connect to错误 {e}")
323
+ return []
324
+ except Exception as e:
325
+ logger.error(f"扫描数据源 {self.name} 时发生意外错误: {e}")
326
+ # Consider throwing an exception or returning an empty list
327
+ return []
328
+
329
+
330
+ class DataSourceMysql(DataSourceSql):
331
+ """MySQL Data Source Implementation"""
332
+
333
+ def __init__(self, name: str, config: Dict[str, Any]):
334
+ # Pass the correct type directly DataSourceType.MYSQL
335
+ super().__init__(name, DataSourceType.MYSQL, config)
336
+ self._type = DataSourceType.MYSQL # Store specific types
337
+
338
+ def connect(self) -> Engine:
339
+ """Connect to MySQL database"""
340
+ if self._engine:
341
+ logger.debug(f"Already connected to {self.name} , reconnecting")
342
+ self.close()
343
+
344
+ try:
345
+ # Use pymysql to connect to MySQL database
346
+ # Make sure to install: pip install pymysql
347
+ import pymysql # Move here, import only when needed
348
+
349
+ # Modify the connection method to disable the connection pool for the test environment
350
+ connection_url = f"mysql+pymysql://{self.username}:{self.password}@{self.host}:{self.port}/{self.database}"
351
+
352
+ # Use NullPool to disable connection pooling, or use a more appropriate connection pool configuration
353
+ # In testing environments, it is usually simpler and more reliable to disable connection pooling.
354
+ self._engine = create_engine(
355
+ connection_url,
356
+ poolclass=NullPool, # Disable connection pooling
357
+ )
358
+
359
+ self._inspector = inspect(self._engine)
360
+ logger.info(f"Successfully connected to MySQL database: {self.name}")
361
+ return self._engine
362
+ except ImportError:
363
+ logger.error(
364
+ f"connect to MySQL database {self.name} failed: missing 'mysql-connector-python' library. Please run 'pip install mysql-connector-python'"
365
+ )
366
+ self._engine = None
367
+ raise ConnectionError(f"缺少 MySQL 驱动: {self.name}")
368
+ except pymysql.Error as err: # Catch specific database connection errors
369
+ logger.error(f"connect to MySQL databasefailed {self.name}: {err}")
370
+ self._engine = None
371
+ raise ConnectionError(f"无法connect to到 MySQL database: {self.name}, {err}")
372
+ except Exception as e: # Other unexpected errors
373
+ logger.error(f"connect to MySQL unknown error occurred {self.name}: {e}")
374
+ self._engine = None
375
+ raise ConnectionError(f"connect to MySQL unknown error occurred: {self.name}, {e}")
376
+
377
+ def get_schema(self) -> Dict[str, List[Dict[str, str]]]:
378
+ """Get MySQL database schema information (table name -> list of column information)"""
379
+ if self.schema:
380
+ return self.schema
381
+
382
+ conn = self._engine
383
+ should_close_conn = False
384
+ if not conn:
385
+ conn = self.connect() # Try to connect
386
+ if not conn:
387
+ raise ConnectionError(f"无法Get模式,database未connect to: {self.name}")
388
+ should_close_conn = True
389
+
390
+ schema: Dict[str, List[Dict[str, str]]] = {}
391
+ cursor = None
392
+ try:
393
+ # Use self._inspector to get the table name, which is more in line with SQLAlchemy's approach
394
+ if not self._inspector: # Ensure inspector exists
395
+ if not conn: # If conn was not previously established successfully
396
+ conn = self.connect()
397
+ if not conn:
398
+ raise ConnectionError(
399
+ f"无法Get模式,database未connect to: {self.name}"
400
+ )
401
+ self._inspector = inspect(conn)
402
+
403
+ tables = self._inspector.get_table_names()
404
+
405
+ # Get column names and types for each table
406
+ for table_name in tables:
407
+ # Use self._inspector to get column information
408
+ columns_info = self._inspector.get_columns(table_name)
409
+
410
+ current_table_cols = []
411
+ for column_data in columns_info:
412
+ # column_data is a dictionary containing keys such as 'name', 'type', 'nullable', 'default'
413
+ # 'type' is usually a SQLAlchemy type object, which needs to be converted to a string.
414
+ col_name = column_data.get("name")
415
+ col_type_obj = column_data.get("type")
416
+
417
+ if col_name and col_type_obj is not None: # Ensure that column names and types exist
418
+ # Convert SQLAlchemy type objects to their string representations
419
+ # For example: VARCHAR(length=50), INTEGER(), NUMERIC(precision=10, scale=2)
420
+ col_type_str = str(col_type_obj).replace(
421
+ ' COLLATE "utf8mb4_unicode_ci"', ""
422
+ )
423
+ current_table_cols.append(
424
+ {"name": col_name, "type": col_type_str}
425
+ )
426
+ elif col_name: # Unknown type, but column name exists
427
+ current_table_cols.append(
428
+ {"name": col_name, "type": "UNKNOWN"}
429
+ ) # Or record a default value
430
+ logger.warning(
431
+ f"Column '{col_name}' in table '{table_name}' has an unknown type."
432
+ )
433
+
434
+ if current_table_cols: # Only add to schema when the table has columns
435
+ schema[table_name] = current_table_cols
436
+ else:
437
+ logger.info(
438
+ f"Table '{table_name}' has no columns or columns could not be retrieved."
439
+ )
440
+
441
+ logger.debug(f"Get到 {self.name} schema: {len(schema)} tables")
442
+ self.schema = schema
443
+ return schema
444
+ except Exception as err: # Catch all database-related errors
445
+ logger.error(f"Get MySQL 模式failed {self.name}: {err}")
446
+ # Ensure that if the connection is temporarily open, it will be closed when an error occurs.
447
+ # Instead of relying on the caller (such as scan) to handle it
448
+ if (
449
+ should_close_conn and conn and conn == self._engine
450
+ ): # If this connection was specifically opened for this method
451
+ conn.dispose()
452
+ self._engine = None # Reset engine state
453
+ self._inspector = None # Reset inspector
454
+ raise RuntimeError(f"Get MySQL 模式failed: {err}") from err
455
+ finally:
456
+ # The cursor is no longer directly managed at this method level, as methods such as inspector.get_columns handle the cursor internally.
457
+ # if cursor:
458
+ # cursor.close()
459
+ if should_close_conn and conn:
460
+ if conn == self._engine: # Only close when this method creates self._engine
461
+ conn.dispose()
462
+ self._engine = None
463
+ self._inspector = None # Also clear the inspector
464
+ elif conn != self._engine: # If it's a temporarily created conn and not self._engine
465
+ conn.dispose()
466
+
467
+ def sampleData(self, conceptName: str, count: int = 10) -> Messages:
468
+ """Retrieve sample data from a MySQL data source for the specified Concept name.
469
+
470
+ It converts the Concept name back to a possible table name (assuming the naming convention used in the scan method),
471
+ then queries that table to retrieve the specified number of sample rows.
472
+
473
+ Args:
474
+ conceptName (str): The name of the Concept for which to retrieve sample data.
475
+ count (int): The number of sample rows to retrieve, defaults to 10.
476
+
477
+ Returns:
478
+ Messages: A list of dictionaries, each representing a row of data,
479
+ where keys are column names and values are corresponding data.
480
+ Returns an empty list if the Concept is not found or an error occurs.
481
+ """
482
+ if count <= 0:
483
+ logger.info(
484
+ f"Sample count is {count}, returning empty list for concept '{conceptName}' in {self.name}."
485
+ )
486
+ return []
487
+
488
+ target_table_name: Optional[str] = None
489
+ actual_column_names: List[str] = []
490
+
491
+ try:
492
+ db_schema = self.get_schema() # This might connect if not connected.
493
+ if not db_schema:
494
+ logger.warning(
495
+ f"Could not retrieve schema for {self.name} to find concept '{conceptName}'."
496
+ )
497
+ return []
498
+
499
+ for table_name_from_schema, columns_details in db_schema.items():
500
+ # Ensure table_name_from_schema is a string for _camelCase
501
+ if not isinstance(table_name_from_schema, str):
502
+ logger.warning(
503
+ f"Skipping non-string table name in schema: {table_name_from_schema}"
504
+ )
505
+ continue
506
+
507
+ generated_concept_name = _camelCase(table_name_from_schema).capitalize()
508
+ if generated_concept_name == conceptName:
509
+ target_table_name = table_name_from_schema
510
+ actual_column_names = [
511
+ col_info["name"]
512
+ for col_info in columns_details
513
+ if col_info.get("name")
514
+ ]
515
+ if not actual_column_names:
516
+ logger.warning(
517
+ f"Concept '{conceptName}' (Table '{target_table_name}') found in {self.name} but has no columns. Cannot sample data."
518
+ )
519
+ return [] # Cannot select data if no columns
520
+ break # Found the table
521
+
522
+ if not target_table_name:
523
+ logger.warning(
524
+ f"Concept '{conceptName}' not found as a discoverable table in datasource '{self.name}'."
525
+ )
526
+ return []
527
+
528
+ # Quoting column names and table name for the SQL query
529
+ quoted_column_names_str = ", ".join(
530
+ [f"`{col}`" for col in actual_column_names]
531
+ )
532
+ # Ensure target_table_name is just the name, not schema.name, etc.
533
+ # get_schema() returns table names as keys, so this should be fine.
534
+ sql_query = f"SELECT {quoted_column_names_str} FROM `{target_table_name}` LIMIT {count}"
535
+
536
+ logger.debug(
537
+ f"Executing sample data query for concept '{conceptName}' on {self.name}: {sql_query}"
538
+ )
539
+ query_result = self.executeQuery(
540
+ sql_query
541
+ ) # fetchColumns is True by default
542
+
543
+ result_columns = query_result.get("columns", [])
544
+ result_data_rows = query_result.get("data", [])
545
+
546
+ if not result_columns and result_data_rows:
547
+ logger.warning(
548
+ f"Query for concept '{conceptName}' in {self.name} returned data but no column names. This might indicate an issue with executeQuery or the underlying table structure."
549
+ )
550
+ # Attempt to use actual_column_names if order and count match, but this is risky.
551
+ # Sticking to result_columns from executeQuery is safer.
552
+
553
+ formatted_samples: Messages = []
554
+ for row_tuple in result_data_rows:
555
+ processed_row = []
556
+ for item in row_tuple:
557
+ if isinstance(
558
+ item, (datetime.datetime, datetime.date, datetime.time)
559
+ ):
560
+ processed_row.append(item.isoformat())
561
+ else:
562
+ processed_row.append(item)
563
+
564
+ if len(processed_row) == len(result_columns):
565
+ formatted_samples.append(dict(zip(result_columns, processed_row)))
566
+ else:
567
+ logger.warning(
568
+ f"Row data length mismatch for concept '{conceptName}' in {self.name}. "
569
+ f"Expected {len(result_columns)} columns based on query result, got {len(processed_row)}. Row: {processed_row}"
570
+ )
571
+ return formatted_samples
572
+
573
+ except ConnectionError as ce:
574
+ logger.error(
575
+ f"Connection error while fetching sample data for concept '{conceptName}' from {self.name}: {ce}"
576
+ )
577
+ return []
578
+ except Exception as e:
579
+ logger.error(
580
+ f"Error fetching sample data for concept '{conceptName}' (table: {target_table_name or 'unknown'}) from {self.name}: {e}",
581
+ exc_info=True,
582
+ )
583
+ return []
584
+
585
+ # Can add MySQL-specific methods, such as executing MySQL-specific queries
586
+ def execute_mysql_specific_query(self, query: str) -> Dict[str, Any]:
587
+ """Execute a MySQL-specific query and return the results"""
588
+ if not self._engine:
589
+ self.connect()
590
+ if not self._engine:
591
+ raise ConnectionError(f"无法connect to到database: {self.name}")
592
+
593
+ cursor = None
594
+ try:
595
+ cursor = self._engine.connect().execute(text(query))
596
+ results = cursor.fetchall()
597
+ columns = [desc[0] for desc in cursor.description]
598
+ return {"columns": columns, "data": results}
599
+ except Exception as e:
600
+ logger.error(f"执行 MySQL 特定查询时出错 on {self.name}: {e}")
601
+ raise
602
+ finally:
603
+ if cursor:
604
+ cursor.close()
605
+
606
+
607
+ class DataSourceSqlite(DataSourceSql):
608
+ """SQLite Data Source Implementation"""
609
+
610
+ def __init__(self, name: str, config: Dict[str, Any]):
611
+ # Pass the correct type DataSourceType.SQLITE directly
612
+ super().__init__(name, DataSourceType.SQLITE, config)
613
+ self._type = DataSourceType.SQLITE # Store specific types
614
+ # SQLite only needs the database file path
615
+ self.database_path = config.get(
616
+ "database",
617
+ config.get("path", config.get("file_path", config.get("database_path"))),
618
+ )
619
+ if not self.database_path:
620
+ raise ValueError("SQLite 数据源配置缺少database文件路径")
621
+
622
+ def connect(self) -> Engine:
623
+ """Connect to SQLite database"""
624
+ if self._engine:
625
+ logger.debug(f"Already connected to {self.name} , reconnecting")
626
+ self.close()
627
+
628
+ try:
629
+ # Using SQLite connection strings
630
+ connection_url = f"sqlite:///{self.database_path}"
631
+
632
+ # Use NullPool to disable connection pooling, which is simpler and more reliable in testing environments.
633
+ self._engine = create_engine(
634
+ connection_url,
635
+ poolclass=NullPool, # Disable connection pooling
636
+ )
637
+
638
+ self._inspector = inspect(self._engine)
639
+ logger.info(
640
+ f"Successfully connected to SQLite database: {self.name} at {self.database_path}"
641
+ )
642
+ return self._engine
643
+ except Exception as e:
644
+ logger.error(f"connect to SQLite databasefailed {self.name}: {e}")
645
+ self._engine = None
646
+ raise ConnectionError(f"无法connect to到 SQLite database: {self.name}, {e}")
647
+
648
+ def get_schema(self) -> Dict[str, List[Dict[str, str]]]:
649
+ """Get schema information of SQLite database (table name -> list of column information)"""
650
+ if self.schema:
651
+ return self.schema
652
+
653
+ conn = self._engine
654
+ should_close_conn = False
655
+ if not conn:
656
+ conn = self.connect() # Try to connect
657
+ if not conn:
658
+ raise ConnectionError(f"无法Get模式,database未connect to: {self.name}")
659
+ should_close_conn = True
660
+
661
+ schema: Dict[str, List[Dict[str, str]]] = {}
662
+ try:
663
+ # Use self._inspector to get the table name, which is more in line with SQLAlchemy's approach
664
+ if not self._inspector: # Ensure inspector exists
665
+ if not conn: # If conn was not previously established successfully
666
+ conn = self.connect()
667
+ if not conn:
668
+ raise ConnectionError(
669
+ f"无法Get模式,database未connect to: {self.name}"
670
+ )
671
+ self._inspector = inspect(conn)
672
+
673
+ tables = self._inspector.get_table_names()
674
+
675
+ # Get column names and types for each table
676
+ for table_name in tables:
677
+ # Use self._inspector to get column information
678
+ columns_info = self._inspector.get_columns(table_name)
679
+
680
+ current_table_cols = []
681
+ for column_data in columns_info:
682
+ # column_data is a dictionary containing keys such as 'name', 'type', 'nullable', 'default', etc.
683
+ # 'type' is usually a SQLAlchemy type object, which needs to be converted to a string
684
+ col_name = column_data.get("name")
685
+ col_type_obj = column_data.get("type")
686
+
687
+ if col_name and col_type_obj is not None: # Ensure that column names and types exist
688
+ # Convert SQLAlchemy type objects to their string representation
689
+ col_type_str = str(col_type_obj)
690
+ current_table_cols.append(
691
+ {"name": col_name, "type": col_type_str}
692
+ )
693
+ elif col_name: # Unknown type, but column name exists
694
+ current_table_cols.append(
695
+ {"name": col_name, "type": "UNKNOWN"}
696
+ ) # Or record a default value
697
+ logger.warning(
698
+ f"Column '{col_name}' in table '{table_name}' has an unknown type."
699
+ )
700
+
701
+ if current_table_cols: # Only add to schema when the table has columns
702
+ schema[table_name] = current_table_cols
703
+ else:
704
+ logger.info(
705
+ f"Table '{table_name}' has no columns or columns could not be retrieved."
706
+ )
707
+
708
+ logger.debug(f"Get到 {self.name} schema: {len(schema)} tables")
709
+ self.schema = schema
710
+ return schema
711
+ except Exception as err: # Catch all database-related errors
712
+ logger.error(f"Get SQLite 模式failed {self.name}: {err}")
713
+ # Ensure that if the connection is temporarily open, it will be closed when an error occurs.
714
+ if (
715
+ should_close_conn and conn and conn == self._engine
716
+ ): # If this connection was specifically opened for this method
717
+ conn.dispose()
718
+ self._engine = None # Reset engine state
719
+ self._inspector = None # Reset inspector
720
+ raise RuntimeError(f"Get SQLite 模式failed: {err}") from err
721
+ finally:
722
+ if should_close_conn and conn:
723
+ if conn == self._engine: # Only close when this method creates self._engine
724
+ conn.dispose()
725
+ self._engine = None
726
+ self._inspector = None # Also clear the inspector
727
+ elif conn != self._engine: # If it's a temporarily created conn and not self._engine
728
+ conn.dispose()
729
+
730
+ def sampleData(self, conceptName: str, count: int = 10) -> Messages:
731
+ """Retrieve sample data from an SQLite data source for a specified Concept name.
732
+
733
+ It converts the Concept name back to a possible table name (assuming the naming convention used in the scan method),
734
+ then queries that table to retrieve the specified number of sample rows.
735
+
736
+ Args:
737
+ conceptName (str): The name of the Concept for which to retrieve sample data.
738
+ count (int): The number of sample rows to retrieve, defaults to 10.
739
+
740
+ Returns:
741
+ Messages: A list of dictionaries, each representing a row of data,
742
+ where keys are column names and values are corresponding data.
743
+ Returns an empty list if the Concept is not found or an error occurs.
744
+ """
745
+ if count <= 0:
746
+ logger.info(
747
+ f"Sample count is {count}, returning empty list for concept '{conceptName}' in {self.name}."
748
+ )
749
+ return []
750
+
751
+ target_table_name: Optional[str] = None
752
+ actual_column_names: List[str] = []
753
+
754
+ try:
755
+ db_schema = self.get_schema() # This might connect if not connected.
756
+ if not db_schema:
757
+ logger.warning(
758
+ f"Could not retrieve schema for {self.name} to find concept '{conceptName}'."
759
+ )
760
+ return []
761
+
762
+ for table_name_from_schema, columns_details in db_schema.items():
763
+ # Ensure table_name_from_schema is a string for _camelCase
764
+ if not isinstance(table_name_from_schema, str):
765
+ logger.warning(
766
+ f"Skipping non-string table name in schema: {table_name_from_schema}"
767
+ )
768
+ continue
769
+
770
+ generated_concept_name = _camelCase(table_name_from_schema).capitalize()
771
+ if generated_concept_name == conceptName:
772
+ target_table_name = table_name_from_schema
773
+ actual_column_names = [
774
+ col_info["name"]
775
+ for col_info in columns_details
776
+ if col_info.get("name")
777
+ ]
778
+ if not actual_column_names:
779
+ logger.warning(
780
+ f"Concept '{conceptName}' (Table '{target_table_name}') found in {self.name} but has no columns. Cannot sample data."
781
+ )
782
+ return [] # Cannot select data if no columns
783
+ break # Found the table
784
+
785
+ if not target_table_name:
786
+ logger.warning(
787
+ f"Concept '{conceptName}' not found as a discoverable table in datasource '{self.name}'."
788
+ )
789
+ return []
790
+
791
+ # SQLite uses double quotes or square brackets to quote identifiers, but special quoting is usually not needed.
792
+ quoted_column_names_str = ", ".join(
793
+ [f'"{col}"' for col in actual_column_names]
794
+ )
795
+ sql_query = f'SELECT {quoted_column_names_str} FROM "{target_table_name}" LIMIT {count}'
796
+
797
+ logger.debug(
798
+ f"Executing sample data query for concept '{conceptName}' on {self.name}: {sql_query}"
799
+ )
800
+ query_result = self.executeQuery(
801
+ sql_query
802
+ ) # fetchColumns is True by default
803
+
804
+ result_columns = query_result.get("columns", [])
805
+ result_data_rows = query_result.get("data", [])
806
+
807
+ if not result_columns and result_data_rows:
808
+ logger.warning(
809
+ f"Query for concept '{conceptName}' in {self.name} returned data but no column names. This might indicate an issue with executeQuery or the underlying table structure."
810
+ )
811
+
812
+ formatted_samples: Messages = []
813
+ for row_tuple in result_data_rows:
814
+ processed_row = []
815
+ for item in row_tuple:
816
+ if isinstance(
817
+ item, (datetime.datetime, datetime.date, datetime.time)
818
+ ):
819
+ processed_row.append(item.isoformat())
820
+ else:
821
+ processed_row.append(item)
822
+
823
+ if len(processed_row) == len(result_columns):
824
+ formatted_samples.append(dict(zip(result_columns, processed_row)))
825
+ else:
826
+ logger.warning(
827
+ f"Row data length mismatch for concept '{conceptName}' in {self.name}. "
828
+ f"Expected {len(result_columns)} columns based on query result, got {len(processed_row)}. Row: {processed_row}"
829
+ )
830
+ return formatted_samples
831
+
832
+ except ConnectionError as ce:
833
+ logger.error(
834
+ f"Connection error while fetching sample data for concept '{conceptName}' from {self.name}: {ce}"
835
+ )
836
+ return []
837
+ except Exception as e:
838
+ logger.error(
839
+ f"Error fetching sample data for concept '{conceptName}' (table: {target_table_name or 'unknown'}) from {self.name}: {e}",
840
+ exc_info=True,
841
+ )
842
+ return []
843
+
844
+
845
+ # Import Oracle datasource