kweaver-dolphin 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- DolphinLanguageSDK/__init__.py +58 -0
- dolphin/__init__.py +62 -0
- dolphin/cli/__init__.py +20 -0
- dolphin/cli/args/__init__.py +9 -0
- dolphin/cli/args/parser.py +567 -0
- dolphin/cli/builtin_agents/__init__.py +22 -0
- dolphin/cli/commands/__init__.py +4 -0
- dolphin/cli/interrupt/__init__.py +8 -0
- dolphin/cli/interrupt/handler.py +205 -0
- dolphin/cli/interrupt/keyboard.py +82 -0
- dolphin/cli/main.py +49 -0
- dolphin/cli/multimodal/__init__.py +34 -0
- dolphin/cli/multimodal/clipboard.py +327 -0
- dolphin/cli/multimodal/handler.py +249 -0
- dolphin/cli/multimodal/image_processor.py +214 -0
- dolphin/cli/multimodal/input_parser.py +149 -0
- dolphin/cli/runner/__init__.py +8 -0
- dolphin/cli/runner/runner.py +989 -0
- dolphin/cli/ui/__init__.py +10 -0
- dolphin/cli/ui/console.py +2795 -0
- dolphin/cli/ui/input.py +340 -0
- dolphin/cli/ui/layout.py +425 -0
- dolphin/cli/ui/stream_renderer.py +302 -0
- dolphin/cli/utils/__init__.py +8 -0
- dolphin/cli/utils/helpers.py +135 -0
- dolphin/cli/utils/version.py +49 -0
- dolphin/core/__init__.py +107 -0
- dolphin/core/agent/__init__.py +10 -0
- dolphin/core/agent/agent_state.py +69 -0
- dolphin/core/agent/base_agent.py +970 -0
- dolphin/core/code_block/__init__.py +0 -0
- dolphin/core/code_block/agent_init_block.py +0 -0
- dolphin/core/code_block/assign_block.py +98 -0
- dolphin/core/code_block/basic_code_block.py +1865 -0
- dolphin/core/code_block/explore_block.py +1327 -0
- dolphin/core/code_block/explore_block_v2.py +712 -0
- dolphin/core/code_block/explore_strategy.py +672 -0
- dolphin/core/code_block/judge_block.py +220 -0
- dolphin/core/code_block/prompt_block.py +32 -0
- dolphin/core/code_block/skill_call_deduplicator.py +291 -0
- dolphin/core/code_block/tool_block.py +129 -0
- dolphin/core/common/__init__.py +17 -0
- dolphin/core/common/constants.py +176 -0
- dolphin/core/common/enums.py +1173 -0
- dolphin/core/common/exceptions.py +133 -0
- dolphin/core/common/multimodal.py +539 -0
- dolphin/core/common/object_type.py +165 -0
- dolphin/core/common/output_format.py +432 -0
- dolphin/core/common/types.py +36 -0
- dolphin/core/config/__init__.py +16 -0
- dolphin/core/config/global_config.py +1289 -0
- dolphin/core/config/ontology_config.py +133 -0
- dolphin/core/context/__init__.py +12 -0
- dolphin/core/context/context.py +1580 -0
- dolphin/core/context/context_manager.py +161 -0
- dolphin/core/context/var_output.py +82 -0
- dolphin/core/context/variable_pool.py +356 -0
- dolphin/core/context_engineer/__init__.py +41 -0
- dolphin/core/context_engineer/config/__init__.py +5 -0
- dolphin/core/context_engineer/config/settings.py +402 -0
- dolphin/core/context_engineer/core/__init__.py +7 -0
- dolphin/core/context_engineer/core/budget_manager.py +327 -0
- dolphin/core/context_engineer/core/context_assembler.py +583 -0
- dolphin/core/context_engineer/core/context_manager.py +637 -0
- dolphin/core/context_engineer/core/tokenizer_service.py +260 -0
- dolphin/core/context_engineer/example/incremental_example.py +267 -0
- dolphin/core/context_engineer/example/traditional_example.py +334 -0
- dolphin/core/context_engineer/services/__init__.py +5 -0
- dolphin/core/context_engineer/services/compressor.py +399 -0
- dolphin/core/context_engineer/utils/__init__.py +6 -0
- dolphin/core/context_engineer/utils/context_utils.py +441 -0
- dolphin/core/context_engineer/utils/message_formatter.py +270 -0
- dolphin/core/context_engineer/utils/token_utils.py +139 -0
- dolphin/core/coroutine/__init__.py +15 -0
- dolphin/core/coroutine/context_snapshot.py +154 -0
- dolphin/core/coroutine/context_snapshot_profile.py +922 -0
- dolphin/core/coroutine/context_snapshot_store.py +268 -0
- dolphin/core/coroutine/execution_frame.py +145 -0
- dolphin/core/coroutine/execution_state_registry.py +161 -0
- dolphin/core/coroutine/resume_handle.py +101 -0
- dolphin/core/coroutine/step_result.py +101 -0
- dolphin/core/executor/__init__.py +18 -0
- dolphin/core/executor/debug_controller.py +630 -0
- dolphin/core/executor/dolphin_executor.py +1063 -0
- dolphin/core/executor/executor.py +624 -0
- dolphin/core/flags/__init__.py +27 -0
- dolphin/core/flags/definitions.py +49 -0
- dolphin/core/flags/manager.py +113 -0
- dolphin/core/hook/__init__.py +95 -0
- dolphin/core/hook/expression_evaluator.py +499 -0
- dolphin/core/hook/hook_dispatcher.py +380 -0
- dolphin/core/hook/hook_types.py +248 -0
- dolphin/core/hook/isolated_variable_pool.py +284 -0
- dolphin/core/interfaces.py +53 -0
- dolphin/core/llm/__init__.py +0 -0
- dolphin/core/llm/llm.py +495 -0
- dolphin/core/llm/llm_call.py +100 -0
- dolphin/core/llm/llm_client.py +1285 -0
- dolphin/core/llm/message_sanitizer.py +120 -0
- dolphin/core/logging/__init__.py +20 -0
- dolphin/core/logging/logger.py +526 -0
- dolphin/core/message/__init__.py +8 -0
- dolphin/core/message/compressor.py +749 -0
- dolphin/core/parser/__init__.py +8 -0
- dolphin/core/parser/parser.py +405 -0
- dolphin/core/runtime/__init__.py +10 -0
- dolphin/core/runtime/runtime_graph.py +926 -0
- dolphin/core/runtime/runtime_instance.py +446 -0
- dolphin/core/skill/__init__.py +14 -0
- dolphin/core/skill/context_retention.py +157 -0
- dolphin/core/skill/skill_function.py +686 -0
- dolphin/core/skill/skill_matcher.py +282 -0
- dolphin/core/skill/skillkit.py +700 -0
- dolphin/core/skill/skillset.py +72 -0
- dolphin/core/trajectory/__init__.py +10 -0
- dolphin/core/trajectory/recorder.py +189 -0
- dolphin/core/trajectory/trajectory.py +522 -0
- dolphin/core/utils/__init__.py +9 -0
- dolphin/core/utils/cache_kv.py +212 -0
- dolphin/core/utils/tools.py +340 -0
- dolphin/lib/__init__.py +93 -0
- dolphin/lib/debug/__init__.py +8 -0
- dolphin/lib/debug/visualizer.py +409 -0
- dolphin/lib/memory/__init__.py +28 -0
- dolphin/lib/memory/async_processor.py +220 -0
- dolphin/lib/memory/llm_calls.py +195 -0
- dolphin/lib/memory/manager.py +78 -0
- dolphin/lib/memory/sandbox.py +46 -0
- dolphin/lib/memory/storage.py +245 -0
- dolphin/lib/memory/utils.py +51 -0
- dolphin/lib/ontology/__init__.py +12 -0
- dolphin/lib/ontology/basic/__init__.py +0 -0
- dolphin/lib/ontology/basic/base.py +102 -0
- dolphin/lib/ontology/basic/concept.py +130 -0
- dolphin/lib/ontology/basic/object.py +11 -0
- dolphin/lib/ontology/basic/relation.py +63 -0
- dolphin/lib/ontology/datasource/__init__.py +27 -0
- dolphin/lib/ontology/datasource/datasource.py +66 -0
- dolphin/lib/ontology/datasource/oracle_datasource.py +338 -0
- dolphin/lib/ontology/datasource/sql.py +845 -0
- dolphin/lib/ontology/mapping.py +177 -0
- dolphin/lib/ontology/ontology.py +733 -0
- dolphin/lib/ontology/ontology_context.py +16 -0
- dolphin/lib/ontology/ontology_manager.py +107 -0
- dolphin/lib/skill_results/__init__.py +31 -0
- dolphin/lib/skill_results/cache_backend.py +559 -0
- dolphin/lib/skill_results/result_processor.py +181 -0
- dolphin/lib/skill_results/result_reference.py +179 -0
- dolphin/lib/skill_results/skillkit_hook.py +324 -0
- dolphin/lib/skill_results/strategies.py +328 -0
- dolphin/lib/skill_results/strategy_registry.py +150 -0
- dolphin/lib/skillkits/__init__.py +44 -0
- dolphin/lib/skillkits/agent_skillkit.py +155 -0
- dolphin/lib/skillkits/cognitive_skillkit.py +82 -0
- dolphin/lib/skillkits/env_skillkit.py +250 -0
- dolphin/lib/skillkits/mcp_adapter.py +616 -0
- dolphin/lib/skillkits/mcp_skillkit.py +771 -0
- dolphin/lib/skillkits/memory_skillkit.py +650 -0
- dolphin/lib/skillkits/noop_skillkit.py +31 -0
- dolphin/lib/skillkits/ontology_skillkit.py +89 -0
- dolphin/lib/skillkits/plan_act_skillkit.py +452 -0
- dolphin/lib/skillkits/resource/__init__.py +52 -0
- dolphin/lib/skillkits/resource/models/__init__.py +6 -0
- dolphin/lib/skillkits/resource/models/skill_config.py +109 -0
- dolphin/lib/skillkits/resource/models/skill_meta.py +127 -0
- dolphin/lib/skillkits/resource/resource_skillkit.py +393 -0
- dolphin/lib/skillkits/resource/skill_cache.py +215 -0
- dolphin/lib/skillkits/resource/skill_loader.py +395 -0
- dolphin/lib/skillkits/resource/skill_validator.py +406 -0
- dolphin/lib/skillkits/resource_skillkit.py +11 -0
- dolphin/lib/skillkits/search_skillkit.py +163 -0
- dolphin/lib/skillkits/sql_skillkit.py +274 -0
- dolphin/lib/skillkits/system_skillkit.py +509 -0
- dolphin/lib/skillkits/vm_skillkit.py +65 -0
- dolphin/lib/utils/__init__.py +9 -0
- dolphin/lib/utils/data_process.py +207 -0
- dolphin/lib/utils/handle_progress.py +178 -0
- dolphin/lib/utils/security.py +139 -0
- dolphin/lib/utils/text_retrieval.py +462 -0
- dolphin/lib/vm/__init__.py +11 -0
- dolphin/lib/vm/env_executor.py +895 -0
- dolphin/lib/vm/python_session_manager.py +453 -0
- dolphin/lib/vm/vm.py +610 -0
- dolphin/sdk/__init__.py +60 -0
- dolphin/sdk/agent/__init__.py +12 -0
- dolphin/sdk/agent/agent_factory.py +236 -0
- dolphin/sdk/agent/dolphin_agent.py +1106 -0
- dolphin/sdk/api/__init__.py +4 -0
- dolphin/sdk/runtime/__init__.py +8 -0
- dolphin/sdk/runtime/env.py +363 -0
- dolphin/sdk/skill/__init__.py +10 -0
- dolphin/sdk/skill/global_skills.py +706 -0
- dolphin/sdk/skill/traditional_toolkit.py +260 -0
- kweaver_dolphin-0.1.0.dist-info/METADATA +521 -0
- kweaver_dolphin-0.1.0.dist-info/RECORD +199 -0
- kweaver_dolphin-0.1.0.dist-info/WHEEL +5 -0
- kweaver_dolphin-0.1.0.dist-info/entry_points.txt +27 -0
- kweaver_dolphin-0.1.0.dist-info/licenses/LICENSE.txt +201 -0
- kweaver_dolphin-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,733 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base class for all ontology objects
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import List, Dict, Optional, TypeVar, Type, Any
|
|
7
|
+
from dolphin.core.common.enums import Messages
|
|
8
|
+
from dolphin.core.config import DataSourceType, OntologyConfig, DataSourceConfig
|
|
9
|
+
from dolphin.lib.ontology.basic.concept import Concept
|
|
10
|
+
from dolphin.lib.ontology.mapping import Mapping
|
|
11
|
+
import concurrent.futures
|
|
12
|
+
from dataclasses import dataclass, asdict
|
|
13
|
+
from enum import Enum, auto
|
|
14
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
15
|
+
|
|
16
|
+
from dolphin.lib.ontology.datasource.datasource import DataSource
|
|
17
|
+
|
|
18
|
+
# 延迟导入 SQL 相关的数据源类(需要 sqlalchemy)
|
|
19
|
+
try:
|
|
20
|
+
from dolphin.lib.ontology.datasource.sql import (
|
|
21
|
+
DataSourceMysql,
|
|
22
|
+
DataSourceSqlite,
|
|
23
|
+
)
|
|
24
|
+
from dolphin.lib.ontology.datasource.oracle_datasource import DataSourceOracle
|
|
25
|
+
_SQL_AVAILABLE = True
|
|
26
|
+
except ImportError:
|
|
27
|
+
# sqlalchemy 未安装,这些类不可用
|
|
28
|
+
_SQL_AVAILABLE = False
|
|
29
|
+
DataSourceMysql = None
|
|
30
|
+
DataSourceSqlite = None
|
|
31
|
+
DataSourceOracle = None
|
|
32
|
+
|
|
33
|
+
# Add import of Dolphin SDK log
|
|
34
|
+
from dolphin.core.logging.logger import get_logger
|
|
35
|
+
|
|
36
|
+
logger = get_logger("ontology")
|
|
37
|
+
|
|
38
|
+
# Custom Types
|
|
39
|
+
T = TypeVar("T")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class MergeStrategy(Enum):
|
|
43
|
+
"""Concept Merging Strategy"""
|
|
44
|
+
|
|
45
|
+
REPLACE = auto() # Completely replace existing concepts
|
|
46
|
+
EXTEND = auto() # Keep existing members, add new members
|
|
47
|
+
KEEP_EXISTING = auto() # Keep existing concepts, ignore new concepts
|
|
48
|
+
RENAME_NEW = auto() # Rename new concepts to avoid conflicts
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class OntologyStatus(Enum):
|
|
52
|
+
"""Ontology Status"""
|
|
53
|
+
|
|
54
|
+
INITIALIZED = auto() # Initialization completed
|
|
55
|
+
LOADING = auto() # Loading
|
|
56
|
+
BUILDING = auto() # Building
|
|
57
|
+
READY = auto() # Ready
|
|
58
|
+
ERROR = auto() # An error occurred
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class OntologyStats:
|
|
63
|
+
"""Ontology Statistics Information"""
|
|
64
|
+
|
|
65
|
+
dataSourcesCount: int = 0
|
|
66
|
+
conceptsCount: int = 0
|
|
67
|
+
mappingsCount: int = 0
|
|
68
|
+
lastBuildTime: str = ""
|
|
69
|
+
lastConfigLoadTime: str = ""
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class Ontology:
|
|
73
|
+
"""Ontology Management Class
|
|
74
|
+
|
|
75
|
+
Responsible for managing data sources (DataSource), concepts (Concept), and their mappings (Mapping).
|
|
76
|
+
Can load data sources from configuration files and trigger scans to automatically generate concepts and mappings.
|
|
77
|
+
Supports ontology serialization, validation, and state management.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
# Mapping of data source types to implementation classes
|
|
81
|
+
# 动态构建注册表,只在 SQL 数据源类可用时添加
|
|
82
|
+
_DATA_SOURCE_REGISTRY: Dict[DataSourceType, Type[DataSource]] = {}
|
|
83
|
+
|
|
84
|
+
@classmethod
|
|
85
|
+
def _build_data_source_registry(cls):
|
|
86
|
+
"""构建数据源注册表(延迟初始化)"""
|
|
87
|
+
if not cls._DATA_SOURCE_REGISTRY:
|
|
88
|
+
# 基础数据源总是可用
|
|
89
|
+
# SQL 数据源只在 sqlalchemy 安装时可用
|
|
90
|
+
if _SQL_AVAILABLE and DataSourceMysql is not None:
|
|
91
|
+
cls._DATA_SOURCE_REGISTRY[DataSourceType.MYSQL] = DataSourceMysql
|
|
92
|
+
if _SQL_AVAILABLE and DataSourceSqlite is not None:
|
|
93
|
+
cls._DATA_SOURCE_REGISTRY[DataSourceType.SQLITE] = DataSourceSqlite
|
|
94
|
+
if _SQL_AVAILABLE and DataSourceOracle is not None:
|
|
95
|
+
cls._DATA_SOURCE_REGISTRY[DataSourceType.ORACLE] = DataSourceOracle
|
|
96
|
+
return cls._DATA_SOURCE_REGISTRY
|
|
97
|
+
|
|
98
|
+
def __init__(self, ontologyConfig: OntologyConfig):
|
|
99
|
+
self._ontologyConfig = ontologyConfig
|
|
100
|
+
self._dataSources: Dict[str, DataSource] = {}
|
|
101
|
+
self._concepts: Dict[str, Concept] = {}
|
|
102
|
+
self._mappings: Dict[tuple[str, str], Mapping] = {}
|
|
103
|
+
self._status: OntologyStatus = OntologyStatus.INITIALIZED
|
|
104
|
+
self._stats: OntologyStats = OntologyStats()
|
|
105
|
+
logger.debug("Ontology manager initialized")
|
|
106
|
+
|
|
107
|
+
if ontologyConfig:
|
|
108
|
+
self._loadDataSourcesFromConfig()
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def status(self) -> OntologyStatus:
|
|
112
|
+
"""Get the current ontology status"""
|
|
113
|
+
return self._status
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def stats(self) -> OntologyStats:
|
|
117
|
+
"""Get ontology statistics"""
|
|
118
|
+
self._stats.dataSourcesCount = len(self._dataSources)
|
|
119
|
+
self._stats.conceptsCount = len(self._concepts)
|
|
120
|
+
self._stats.mappingsCount = len(self._mappings)
|
|
121
|
+
return self._stats
|
|
122
|
+
|
|
123
|
+
def registerDataSourceType(
|
|
124
|
+
self, dataType: DataSourceType, cls: Type[DataSource]
|
|
125
|
+
) -> None:
|
|
126
|
+
"""Register a new data source type
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
dataType (DataSourceType): Data source type enumeration (from config.py)
|
|
130
|
+
cls (Type[DataSource]): DataSource subclass that handles this type
|
|
131
|
+
"""
|
|
132
|
+
# 确保注册表已初始化
|
|
133
|
+
Ontology._build_data_source_registry()
|
|
134
|
+
Ontology._DATA_SOURCE_REGISTRY[dataType] = cls
|
|
135
|
+
logger.debug(f"Registered data source type {dataType.name} -> {cls.__name__}")
|
|
136
|
+
|
|
137
|
+
def addDataSource(self, dataSource: DataSource) -> None:
|
|
138
|
+
"""Add a data source instance"""
|
|
139
|
+
if dataSource.name in self._dataSources:
|
|
140
|
+
logger.warning(f"Data source '{dataSource.name}' already exists,will be overwritten")
|
|
141
|
+
self._dataSources[dataSource.name] = dataSource
|
|
142
|
+
logger.debug(f"Data source added: {dataSource.name}")
|
|
143
|
+
|
|
144
|
+
def getDataSource(self, name: str) -> Optional[DataSource]:
|
|
145
|
+
"""Get data source instance by name"""
|
|
146
|
+
return self._dataSources.get(name)
|
|
147
|
+
|
|
148
|
+
def getAllDataSources(self) -> List[DataSource]:
|
|
149
|
+
"""Get all data source instances"""
|
|
150
|
+
return list(self._dataSources.values())
|
|
151
|
+
|
|
152
|
+
def addConcept(
|
|
153
|
+
self, concept: Concept, strategy: MergeStrategy = MergeStrategy.REPLACE
|
|
154
|
+
) -> Concept:
|
|
155
|
+
"""Add a concept instance, handling conflicts using the specified merge strategy.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
concept (Concept): The concept to add
|
|
159
|
+
strategy (MergeStrategy): The merge strategy to apply when the concept already exists
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
Concept: The final concept after addition/merging
|
|
163
|
+
"""
|
|
164
|
+
existingConcept = self._concepts.get(concept.name)
|
|
165
|
+
if existingConcept is None:
|
|
166
|
+
# Concept does not exist, add directly
|
|
167
|
+
self._concepts[concept.name] = concept
|
|
168
|
+
logger.debug(f"Concept added: {concept.name}")
|
|
169
|
+
return concept
|
|
170
|
+
|
|
171
|
+
# Concept already exists, handle according to policy
|
|
172
|
+
if strategy == MergeStrategy.REPLACE:
|
|
173
|
+
# Fully replace
|
|
174
|
+
self._concepts[concept.name] = concept
|
|
175
|
+
logger.warning(f"Concept '{concept.name}' already exists,has been replaced")
|
|
176
|
+
return concept
|
|
177
|
+
|
|
178
|
+
elif strategy == MergeStrategy.EXTEND:
|
|
179
|
+
# Merge Members
|
|
180
|
+
mergedMembers = dict(existingConcept.members)
|
|
181
|
+
for name, type_ in concept.members.items():
|
|
182
|
+
if name in mergedMembers:
|
|
183
|
+
logger.debug(
|
|
184
|
+
f"Concept '{concept.name}' member '{name}' already exists,keeping original type"
|
|
185
|
+
)
|
|
186
|
+
else:
|
|
187
|
+
mergedMembers[name] = type_
|
|
188
|
+
logger.debug(f"Concept '{concept.name}' added new member '{name}'")
|
|
189
|
+
|
|
190
|
+
# Create a new Concept instance
|
|
191
|
+
mergedConcept = Concept(concept.name, mergedMembers)
|
|
192
|
+
self._concepts[concept.name] = mergedConcept
|
|
193
|
+
logger.debug(
|
|
194
|
+
f"Concept '{concept.name}' merged with {len(mergedMembers)} members"
|
|
195
|
+
)
|
|
196
|
+
return mergedConcept
|
|
197
|
+
|
|
198
|
+
elif strategy == MergeStrategy.KEEP_EXISTING:
|
|
199
|
+
# Retain existing concepts
|
|
200
|
+
logger.debug(f"Concept '{concept.name}' already exists,keeping original definition, ignoring new")
|
|
201
|
+
return existingConcept
|
|
202
|
+
|
|
203
|
+
elif strategy == MergeStrategy.RENAME_NEW:
|
|
204
|
+
# Rename New Concept
|
|
205
|
+
i = 1
|
|
206
|
+
newName = f"{concept.name}_{i}"
|
|
207
|
+
while newName in self._concepts:
|
|
208
|
+
i += 1
|
|
209
|
+
newName = f"{concept.name}_{i}"
|
|
210
|
+
|
|
211
|
+
# Create a new Concept instance
|
|
212
|
+
renamedConcept = Concept(newName, concept.members)
|
|
213
|
+
self._concepts[newName] = renamedConcept
|
|
214
|
+
logger.warning(f"Concept '{concept.name}' already exists,renamed to '{newName}'")
|
|
215
|
+
return renamedConcept
|
|
216
|
+
|
|
217
|
+
def getConcept(self, name: str) -> Optional[Concept]:
|
|
218
|
+
"""Get concept instance by name"""
|
|
219
|
+
return self._concepts.get(name)
|
|
220
|
+
|
|
221
|
+
def getAllConcepts(self, **kwargs) -> List[Concept]:
|
|
222
|
+
"""Get all concept instances"""
|
|
223
|
+
return list(self._concepts.values())
|
|
224
|
+
|
|
225
|
+
def getConceptDescription(self, name: str, **kwargs) -> str:
|
|
226
|
+
"""Get concept descriptions"""
|
|
227
|
+
concept = self._concepts.get(name)
|
|
228
|
+
if concept is None:
|
|
229
|
+
return f"Concept '{name}' does not exist"
|
|
230
|
+
|
|
231
|
+
return json.dumps(concept.toDict(), ensure_ascii=False, indent=2)
|
|
232
|
+
|
|
233
|
+
def getAllConceptsDescription(self) -> str:
|
|
234
|
+
"""Generate a JSON format string for concept descriptions
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
str: A JSON format string containing all concepts and their members
|
|
238
|
+
"""
|
|
239
|
+
import json
|
|
240
|
+
|
|
241
|
+
concepts_data = {}
|
|
242
|
+
for concept in self._concepts.values():
|
|
243
|
+
concepts_data[concept.name] = concept.toDict()
|
|
244
|
+
|
|
245
|
+
return json.dumps(concepts_data, ensure_ascii=False, indent=2)
|
|
246
|
+
|
|
247
|
+
def addMapping(self, mapping: Mapping) -> None:
|
|
248
|
+
"""Add a mapping instance"""
|
|
249
|
+
mappingKey = (mapping.dataSource.name, mapping.concept.name)
|
|
250
|
+
if mappingKey in self._mappings:
|
|
251
|
+
logger.warning(
|
|
252
|
+
f"Data source '{mapping.dataSource.name}' toConcept '{mapping.concept.name}' 的Mappingalready exists,will be overwritten"
|
|
253
|
+
)
|
|
254
|
+
self._mappings[mappingKey] = mapping
|
|
255
|
+
logger.debug(f"Mapping added: {mapping.dataSource.name} -> {mapping.concept.name}")
|
|
256
|
+
|
|
257
|
+
def getMapping(self, dataSourceName: str, conceptName: str) -> Optional[Mapping]:
|
|
258
|
+
"""Get mapping instance by data source name and concept name"""
|
|
259
|
+
return self._mappings.get((dataSourceName, conceptName))
|
|
260
|
+
|
|
261
|
+
def getMappingsForDataSource(self, dataSourceName: str) -> List[Mapping]:
|
|
262
|
+
"""Get all mappings for the specified data source"""
|
|
263
|
+
return [m for k, m in self._mappings.items() if k[0] == dataSourceName]
|
|
264
|
+
|
|
265
|
+
def getMappingsForConcept(self, conceptName: str) -> List[Mapping]:
|
|
266
|
+
"""Get all mappings for the specified concept"""
|
|
267
|
+
return [m for k, m in self._mappings.items() if k[1] == conceptName]
|
|
268
|
+
|
|
269
|
+
def getAllMappings(self) -> List[Mapping]:
|
|
270
|
+
"""Get all mapping instances"""
|
|
271
|
+
return list(self._mappings.values())
|
|
272
|
+
|
|
273
|
+
def getDataSourceFromConcept(self, conceptName: str) -> Optional[DataSource]:
|
|
274
|
+
"""Get data source by concept"""
|
|
275
|
+
for k, m in self._mappings.items():
|
|
276
|
+
if m.concept.name == conceptName:
|
|
277
|
+
return m.dataSource
|
|
278
|
+
return None
|
|
279
|
+
|
|
280
|
+
def getDataSourcesFromConcepts(self, concepts: list) -> list:
|
|
281
|
+
"""Get data source by concept"""
|
|
282
|
+
# Collect configuration dictionaries, using the data source name as the key for deduplication
|
|
283
|
+
configsMap = {}
|
|
284
|
+
for m in self._mappings.values():
|
|
285
|
+
if m.concept.name in concepts:
|
|
286
|
+
# Use the data source name as a key to avoid duplicate configurations
|
|
287
|
+
configsMap[m.dataSource.name] = m.dataSource.config
|
|
288
|
+
return list(configsMap.values())
|
|
289
|
+
|
|
290
|
+
def getDataSourceSchemasFromConcepts(self, concepts: List[str]) -> Dict[str, Any]:
|
|
291
|
+
"""Get the schema of a data source by concept"""
|
|
292
|
+
data = {}
|
|
293
|
+
for conceptName in concepts:
|
|
294
|
+
dataSource = self.getDataSourceFromConcept(conceptName)
|
|
295
|
+
if dataSource is None:
|
|
296
|
+
continue
|
|
297
|
+
data[conceptName] = dataSource.get_schema()
|
|
298
|
+
return data
|
|
299
|
+
|
|
300
|
+
def sampleData(self, conceptNames: List[str], count: int = 1) -> Dict[str, Any]:
|
|
301
|
+
"""Get sample data according to concept"""
|
|
302
|
+
data = {}
|
|
303
|
+
for conceptName in conceptNames:
|
|
304
|
+
dataSource = self.getDataSourceFromConcept(conceptName)
|
|
305
|
+
if dataSource is None:
|
|
306
|
+
continue
|
|
307
|
+
|
|
308
|
+
sampledata = dataSource.sampleData(conceptName, count)
|
|
309
|
+
if sampledata:
|
|
310
|
+
data[conceptName] = sampledata
|
|
311
|
+
return data
|
|
312
|
+
|
|
313
|
+
def executeSql(self, sql: str, dataSourceName: Optional[str] = None) -> Messages:
|
|
314
|
+
"""Execute an SQL statement and return the result.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
sql (str): The SQL statement to execute
|
|
318
|
+
dataSourceName (Optional[str]): Specifies the data source name; if not provided, the first registered data source is selected
|
|
319
|
+
|
|
320
|
+
Returns:
|
|
321
|
+
Messages: A list of query results, with each element being a dictionary where keys are column names and values are corresponding values
|
|
322
|
+
"""
|
|
323
|
+
if not self._dataSources:
|
|
324
|
+
raise RuntimeError("No data sources available to execute SQL")
|
|
325
|
+
if dataSourceName is None:
|
|
326
|
+
# Select the first registered data source
|
|
327
|
+
ds = next(iter(self._dataSources.values()))
|
|
328
|
+
else:
|
|
329
|
+
ds = self._dataSources.get(dataSourceName)
|
|
330
|
+
if ds is None:
|
|
331
|
+
raise KeyError(f"Data source '{dataSourceName}' does not exist")
|
|
332
|
+
return ds.executeSql(sql)
|
|
333
|
+
|
|
334
|
+
def buildOntologyFromSources(
|
|
335
|
+
self,
|
|
336
|
+
runScan: bool = True,
|
|
337
|
+
concurrent: bool = True,
|
|
338
|
+
maxWorkers: int = None,
|
|
339
|
+
conceptStrategy: MergeStrategy = MergeStrategy.EXTEND,
|
|
340
|
+
) -> None:
|
|
341
|
+
"""Build ontology (Concepts and Mappings) from the added data sources.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
run_scan (bool): Whether to perform a scan operation on each data source, default is True
|
|
345
|
+
concurrent (bool): Whether to scan multiple data sources concurrently, default is True
|
|
346
|
+
max_workers (int): Maximum number of worker threads, default is None (determined by the system)
|
|
347
|
+
concept_strategy (MergeStrategy): Concept merging strategy, default is EXTEND
|
|
348
|
+
"""
|
|
349
|
+
import datetime
|
|
350
|
+
|
|
351
|
+
if not self._dataSources:
|
|
352
|
+
logger.warning("No data sources available for building ontology")
|
|
353
|
+
return
|
|
354
|
+
|
|
355
|
+
self._status = OntologyStatus.BUILDING
|
|
356
|
+
logger.debug("Starting to build ontology from data sources...")
|
|
357
|
+
|
|
358
|
+
# If scanning is not performed, return directly
|
|
359
|
+
if not runScan:
|
|
360
|
+
logger.debug("Skipping all data source scanning (runScan=False)")
|
|
361
|
+
self._status = OntologyStatus.READY
|
|
362
|
+
return
|
|
363
|
+
|
|
364
|
+
# Concurrent scanning data source
|
|
365
|
+
if concurrent and len(self._dataSources) > 1:
|
|
366
|
+
self._scanDataSourcesConcurrently(maxWorkers, conceptStrategy)
|
|
367
|
+
else:
|
|
368
|
+
self._scanDataSourcesSequentially(conceptStrategy)
|
|
369
|
+
|
|
370
|
+
self._stats.lastBuildTime = datetime.datetime.now().isoformat()
|
|
371
|
+
self._status = OntologyStatus.READY
|
|
372
|
+
logger.debug("Ontology building process completed")
|
|
373
|
+
|
|
374
|
+
def validate(self) -> List[str]:
|
|
375
|
+
"""Validate ontology consistency
|
|
376
|
+
|
|
377
|
+
Check whether the reference relationships among data sources, concepts, and mappings are consistent.
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
List[str]: List of validation error messages; empty list if no errors found
|
|
381
|
+
"""
|
|
382
|
+
errors = []
|
|
383
|
+
|
|
384
|
+
# 1. Check that all data sources referenced in the mappings exist
|
|
385
|
+
for (dsName, conceptName), mapping in self._mappings.items():
|
|
386
|
+
if mapping.data_source.name != dsName:
|
|
387
|
+
errors.append(
|
|
388
|
+
f"Mapping键 ({dsName}, {conceptName}) 与Mapping对象中的Data sourcename {mapping.data_source.name} inconsistent"
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
if dsName not in self._dataSources:
|
|
392
|
+
errors.append(
|
|
393
|
+
f"Mapping ({dsName}, {conceptName}) 引用了does not exist的Data source '{dsName}'"
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
# 2. Check that all mapped reference concepts exist
|
|
397
|
+
if mapping.concept.name != conceptName:
|
|
398
|
+
errors.append(
|
|
399
|
+
f"Mapping键 ({dsName}, {conceptName}) 与Mapping对象中的Conceptname {mapping.concept.name} inconsistent"
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
if conceptName not in self._concepts:
|
|
403
|
+
errors.append(
|
|
404
|
+
f"Mapping ({dsName}, {conceptName}) 引用了does not exist的Concept '{conceptName}'"
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
# 3. Check whether the field-to-member mapping in the mapping is valid
|
|
408
|
+
for memberName in mapping.fieldToMemberMap.values():
|
|
409
|
+
concept = self._concepts.get(conceptName)
|
|
410
|
+
if concept and memberName not in concept.members:
|
|
411
|
+
errors.append(
|
|
412
|
+
f"Mapping ({dsName}, {conceptName}) 引用了Concept中does not exist的member '{memberName}'"
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
if not errors:
|
|
416
|
+
logger.debug("Ontology validation passed, no issues found")
|
|
417
|
+
else:
|
|
418
|
+
logger.warning(f"Ontology validation found {len(errors)} issues")
|
|
419
|
+
for i, error in enumerate(errors, 1):
|
|
420
|
+
logger.warning(f"Issue {i}: {error}")
|
|
421
|
+
|
|
422
|
+
return errors
|
|
423
|
+
|
|
424
|
+
def saveToFile(self, filePath: str) -> bool:
|
|
425
|
+
"""Save the ontology to a file.
|
|
426
|
+
|
|
427
|
+
The saved format is JSON, containing concept definitions, data source references, and mapping relationships.
|
|
428
|
+
Data source connection details are not saved to avoid leaking sensitive information.
|
|
429
|
+
|
|
430
|
+
Args:
|
|
431
|
+
file_path (str): Save path
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
bool: Whether the save was successful
|
|
435
|
+
"""
|
|
436
|
+
try:
|
|
437
|
+
# 1. Collect concept information
|
|
438
|
+
conceptsData = {}
|
|
439
|
+
for name, concept in self._concepts.items():
|
|
440
|
+
conceptsData[name] = concept.toDict()
|
|
441
|
+
|
|
442
|
+
# 2. Collect data source reference information (excluding sensitive information such as passwords)
|
|
443
|
+
datasourcesRef = {}
|
|
444
|
+
for name, ds in self._dataSources.items():
|
|
445
|
+
datasourcesRef[name] = {"name": name, "type": ds.type.name}
|
|
446
|
+
|
|
447
|
+
# 3. Collect mapping information
|
|
448
|
+
mappingsData = []
|
|
449
|
+
for (dsName, conceptName), mapping in self._mappings.items():
|
|
450
|
+
mappingsData.append(
|
|
451
|
+
{
|
|
452
|
+
"data_source": dsName,
|
|
453
|
+
"concept": conceptName,
|
|
454
|
+
"field_to_member_map": mapping.fieldToMemberMap,
|
|
455
|
+
}
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
# 4. Assemble complete data
|
|
459
|
+
ontologyData = {
|
|
460
|
+
"concepts": conceptsData,
|
|
461
|
+
"datasources_ref": datasourcesRef,
|
|
462
|
+
"mappings": mappingsData,
|
|
463
|
+
"stats": asdict(self._stats),
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
# 5. Writing Files
|
|
467
|
+
with open(filePath, "w", encoding="utf-8") as f:
|
|
468
|
+
json.dump(ontologyData, f, indent=2, ensure_ascii=False)
|
|
469
|
+
|
|
470
|
+
logger.debug(f"Ontology saved to file: {filePath}")
|
|
471
|
+
return True
|
|
472
|
+
|
|
473
|
+
except Exception as e:
|
|
474
|
+
logger.exception(f"保存本体toFile {filePath} 时error: {e}")
|
|
475
|
+
return False
|
|
476
|
+
|
|
477
|
+
def loadFromFile(self, filePath: str) -> bool:
|
|
478
|
+
"""Load ontology structure from file
|
|
479
|
+
|
|
480
|
+
Args:
|
|
481
|
+
filePath (str): Path to the ontology file
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
bool: Whether the loading was successful
|
|
485
|
+
"""
|
|
486
|
+
try:
|
|
487
|
+
with open(filePath, "r", encoding="utf-8") as f:
|
|
488
|
+
ontologyData = json.load(f)
|
|
489
|
+
|
|
490
|
+
# Validate data format
|
|
491
|
+
if not all(key in ontologyData for key in ["concepts", "mappings"]):
|
|
492
|
+
logger.error(f"File {filePath} has incorrect format, missing required keys")
|
|
493
|
+
return False
|
|
494
|
+
|
|
495
|
+
# Loading Concepts
|
|
496
|
+
from dolphin.lib.ontology.basic.concept import (
|
|
497
|
+
Concept,
|
|
498
|
+
ConceptMemberType,
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
concepts = {}
|
|
502
|
+
for name, conceptData in ontologyData["concepts"].items():
|
|
503
|
+
members = {}
|
|
504
|
+
for memberName, typeName in conceptData["members"].items():
|
|
505
|
+
try:
|
|
506
|
+
memberType = ConceptMemberType[typeName]
|
|
507
|
+
members[memberName] = memberType
|
|
508
|
+
except KeyError:
|
|
509
|
+
logger.warning(f"Unknown member type: {typeName},using ANY instead")
|
|
510
|
+
members[memberName] = ConceptMemberType.ANY
|
|
511
|
+
|
|
512
|
+
concepts[name] = Concept(name=name, members=members)
|
|
513
|
+
|
|
514
|
+
# Load mapping (requires an existing data source instance)
|
|
515
|
+
from dolphin.lib.ontology.mapping import Mapping
|
|
516
|
+
|
|
517
|
+
mappings = {}
|
|
518
|
+
for mappingData in ontologyData["mappings"]:
|
|
519
|
+
dsName = mappingData["data_source"]
|
|
520
|
+
conceptName = mappingData["concept"]
|
|
521
|
+
fieldMap = mappingData["field_to_member_map"]
|
|
522
|
+
|
|
523
|
+
# Check if data source and concept are available
|
|
524
|
+
dataSource = self._dataSources.get(dsName)
|
|
525
|
+
concept = concepts.get(conceptName)
|
|
526
|
+
|
|
527
|
+
if not dataSource:
|
|
528
|
+
logger.warning(f"Mapping中引用的Data source '{dsName}' does not exist,跳过")
|
|
529
|
+
continue
|
|
530
|
+
|
|
531
|
+
if not concept:
|
|
532
|
+
logger.warning(f"Mapping中引用的Concept '{conceptName}' does not exist,跳过")
|
|
533
|
+
continue
|
|
534
|
+
|
|
535
|
+
# Create mapping
|
|
536
|
+
try:
|
|
537
|
+
mapping = Mapping(
|
|
538
|
+
dataSource=dataSource,
|
|
539
|
+
concept=concept,
|
|
540
|
+
fieldToMemberMap=fieldMap,
|
|
541
|
+
)
|
|
542
|
+
mappings[(dsName, conceptName)] = mapping
|
|
543
|
+
except ValueError as e:
|
|
544
|
+
logger.warning(f"CreatingMapping ({dsName}, {conceptName}) failed: {e}")
|
|
545
|
+
|
|
546
|
+
# Update internal state
|
|
547
|
+
self._concepts = concepts
|
|
548
|
+
self._mappings = mappings
|
|
549
|
+
|
|
550
|
+
logger.debug(
|
|
551
|
+
f"Loaded from file {filePath} ontology with {len(concepts)} 个Concept和 {len(mappings)} 个Mapping"
|
|
552
|
+
)
|
|
553
|
+
self._status = OntologyStatus.READY
|
|
554
|
+
return True
|
|
555
|
+
|
|
556
|
+
except FileNotFoundError:
|
|
557
|
+
logger.error(f"本体File未找to: {filePath}")
|
|
558
|
+
return False
|
|
559
|
+
except json.JSONDecodeError as e:
|
|
560
|
+
logger.error(f"Error parsing ontology file {filePath} error: {e}")
|
|
561
|
+
return False
|
|
562
|
+
except Exception:
|
|
563
|
+
logger.exception(f"Loading ontology file {filePath} unexpected error occurred")
|
|
564
|
+
return False
|
|
565
|
+
|
|
566
|
+
def reset(self) -> None:
|
|
567
|
+
"""Reset the ontology, clearing all data sources, concepts, and mappings"""
|
|
568
|
+
self._dataSources.clear()
|
|
569
|
+
self._concepts.clear()
|
|
570
|
+
self._mappings.clear()
|
|
571
|
+
self._status = OntologyStatus.INITIALIZED
|
|
572
|
+
logger.debug("Ontology has been reset")
|
|
573
|
+
|
|
574
|
+
def _loadDataSourcesFromConfig(self) -> None:
|
|
575
|
+
"""Load data source instance from global configuration"""
|
|
576
|
+
import datetime
|
|
577
|
+
|
|
578
|
+
self._status = OntologyStatus.LOADING
|
|
579
|
+
logger.debug("Loading data sources from global configuration")
|
|
580
|
+
try:
|
|
581
|
+
# Get a list of DataSourceConfig from global configuration
|
|
582
|
+
dataSourcesConfigs: List["DataSourceConfig"] = (
|
|
583
|
+
self._ontologyConfig.dataSourcesConfig.getAllSourceConfigs()
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
if not dataSourcesConfigs:
|
|
587
|
+
logger.warning("No data sources in global configuration")
|
|
588
|
+
self._status = OntologyStatus.INITIALIZED
|
|
589
|
+
return
|
|
590
|
+
|
|
591
|
+
loadedCount = 0
|
|
592
|
+
# Traverse DataSourceConfig loaded from configuration
|
|
593
|
+
for dsConfig in dataSourcesConfigs:
|
|
594
|
+
name = dsConfig.name
|
|
595
|
+
# Use the type enumeration loaded from config.py directly
|
|
596
|
+
dataSourceType: DataSourceType = dsConfig.type
|
|
597
|
+
|
|
598
|
+
if not name or not dataSourceType:
|
|
599
|
+
logger.warning(
|
|
600
|
+
f"跳过无效的Data source配置(missing name or type):{dsConfig}"
|
|
601
|
+
)
|
|
602
|
+
continue
|
|
603
|
+
|
|
604
|
+
# Create DataSource instance directly using config.DataSourceType
|
|
605
|
+
datasourceInstance: Optional[DataSource] = self._createDataSource(
|
|
606
|
+
name, dataSourceType, dsConfig.__dict__
|
|
607
|
+
)
|
|
608
|
+
if datasourceInstance:
|
|
609
|
+
self.addDataSource(datasourceInstance)
|
|
610
|
+
logger.debug(
|
|
611
|
+
f"successful加载并添加Data source: {name} ({dataSourceType.name})"
|
|
612
|
+
)
|
|
613
|
+
loadedCount += 1
|
|
614
|
+
|
|
615
|
+
self._stats.lastConfigLoadTime = datetime.datetime.now().isoformat()
|
|
616
|
+
logger.debug(f"Successfully loaded from global config {loadedCount} 个Data source")
|
|
617
|
+
self._status = (
|
|
618
|
+
OntologyStatus.READY if loadedCount > 0 else OntologyStatus.INITIALIZED
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
except Exception as e:
|
|
622
|
+
logger.exception(f"加载Data source配置 unexpected error occurred: {e}")
|
|
623
|
+
self._status = OntologyStatus.ERROR
|
|
624
|
+
|
|
625
|
+
def _createDataSource(
|
|
626
|
+
self, name: str, dataType: DataSourceType, config: Dict[str, Any]
|
|
627
|
+
) -> Optional[DataSource]:
|
|
628
|
+
"""Create a data source instance based on type and configuration.
|
|
629
|
+
|
|
630
|
+
Args:
|
|
631
|
+
name (str): Data source name
|
|
632
|
+
dataType (DataSourceType): Data source type enumeration (config.DataSourceType)
|
|
633
|
+
config (Dict[str, Any]): Specific configuration for the data source
|
|
634
|
+
"""
|
|
635
|
+
# 确保注册表已初始化
|
|
636
|
+
registry = self._build_data_source_registry()
|
|
637
|
+
|
|
638
|
+
# First try direct lookup
|
|
639
|
+
datasourceCls = registry.get(dataType)
|
|
640
|
+
|
|
641
|
+
# If direct lookup fails, try string-based matching as fallback
|
|
642
|
+
# This handles cases where different module loading paths create different enum instances
|
|
643
|
+
if datasourceCls is None:
|
|
644
|
+
dataTypeStr = str(dataType) # e.g., "DataSourceType.MYSQL"
|
|
645
|
+
for key, value in registry.items():
|
|
646
|
+
if str(key) == dataTypeStr:
|
|
647
|
+
datasourceCls = value
|
|
648
|
+
break
|
|
649
|
+
|
|
650
|
+
if not datasourceCls:
|
|
651
|
+
logger.warning(f"Data source类型 '{dataType.name}' not registered, skipping '{name}'")
|
|
652
|
+
return None
|
|
653
|
+
|
|
654
|
+
try:
|
|
655
|
+
# Create data source instance
|
|
656
|
+
datasourceInstance = datasourceCls(name=name, config=config)
|
|
657
|
+
return datasourceInstance
|
|
658
|
+
except Exception as e:
|
|
659
|
+
logger.error(f"Creating {dataType.name} Data source '{name}' 实例failed: {e}")
|
|
660
|
+
return None
|
|
661
|
+
|
|
662
|
+
def _scanDataSourcesSequentially(self, conceptStrategy: MergeStrategy) -> None:
|
|
663
|
+
"""Scan the data source in order"""
|
|
664
|
+
for dsName, dataSource in self._dataSources.items():
|
|
665
|
+
self._scanSingleDataSource(dsName, dataSource, conceptStrategy)
|
|
666
|
+
|
|
667
|
+
def _scanDataSourcesConcurrently(
|
|
668
|
+
self, maxWorkers: int, conceptStrategy: MergeStrategy
|
|
669
|
+
) -> None:
|
|
670
|
+
"""Concurrent scanning data source"""
|
|
671
|
+
with ThreadPoolExecutor(max_workers=maxWorkers) as executor:
|
|
672
|
+
# Submit all tasks
|
|
673
|
+
futures = {
|
|
674
|
+
executor.submit(
|
|
675
|
+
self._scanSingleDataSource, dsName, dataSource, conceptStrategy
|
|
676
|
+
): dsName
|
|
677
|
+
for dsName, dataSource in self._dataSources.items()
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
# Wait for completion
|
|
681
|
+
for future in concurrent.futures.as_completed(futures):
|
|
682
|
+
dsName = futures[future]
|
|
683
|
+
try:
|
|
684
|
+
future.result() # Get result (if there is an exception, it will be thrown here)
|
|
685
|
+
except Exception as e:
|
|
686
|
+
logger.exception(f"并发扫描Data source {dsName} unhandled exception: {e}")
|
|
687
|
+
|
|
688
|
+
def _scanSingleDataSource(
|
|
689
|
+
self, dsName: str, dataSource: DataSource, conceptStrategy: MergeStrategy
|
|
690
|
+
) -> None:
|
|
691
|
+
"""Scan a single data source"""
|
|
692
|
+
logger.debug(f"Scanning data source: {dsName}...")
|
|
693
|
+
try:
|
|
694
|
+
# Test connection, skip scanning if failed
|
|
695
|
+
if not dataSource.test_connection():
|
|
696
|
+
logger.warning(f"Data source {dsName} connection test failed, skipping scan")
|
|
697
|
+
return
|
|
698
|
+
|
|
699
|
+
# Perform scanning to retrieve Mappings (Concepts will be created internally within scan)
|
|
700
|
+
mappings = dataSource.scan()
|
|
701
|
+
if not mappings:
|
|
702
|
+
logger.debug(f"Data source {dsName} scan returned no mappings")
|
|
703
|
+
return
|
|
704
|
+
|
|
705
|
+
# Add Concepts and Mappings generated by scanning
|
|
706
|
+
added_mappings = 0
|
|
707
|
+
for mapping in mappings:
|
|
708
|
+
finalConcept = self.addConcept(
|
|
709
|
+
mapping.concept, strategy=conceptStrategy
|
|
710
|
+
)
|
|
711
|
+
if finalConcept is not mapping.concept:
|
|
712
|
+
from dolphin.lib.ontology.mapping import Mapping
|
|
713
|
+
|
|
714
|
+
newMapping = Mapping(
|
|
715
|
+
dataSource=mapping.dataSource,
|
|
716
|
+
space=mapping.space,
|
|
717
|
+
concept=finalConcept,
|
|
718
|
+
fieldToMemberMap=mapping.fieldToMemberMap,
|
|
719
|
+
)
|
|
720
|
+
self.addMapping(newMapping)
|
|
721
|
+
else:
|
|
722
|
+
self.addMapping(mapping)
|
|
723
|
+
|
|
724
|
+
added_mappings += 1
|
|
725
|
+
|
|
726
|
+
logger.debug(f"Data source {dsName} scan completed, added {len(mappings)} 个Mapping")
|
|
727
|
+
|
|
728
|
+
except NotImplementedError:
|
|
729
|
+
logger.error(f"Data source {dsName} ({dataSource.type.name}) scan method not implemented")
|
|
730
|
+
except ConnectionError as e:
|
|
731
|
+
logger.error(f"扫描Data source {dsName} 时connect tofailed: {e}")
|
|
732
|
+
except Exception:
|
|
733
|
+
logger.exception(f"扫描Data source {dsName} unexpected error occurred")
|