govio 0.2.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- govio/__init__.py +10 -0
- govio/cli/__init__.py +4 -0
- govio/cli/config.py +136 -0
- govio/cli/main.py +106 -0
- govio/cli/meta_export.py +263 -0
- govio/cli/observe.py +241 -0
- govio/cli/onboard.py +610 -0
- govio/cli/query.py +129 -0
- govio/cli/std_recommend.py +70 -0
- govio/core/__init__.py +4 -0
- govio/core/assets_generator.py +235 -0
- govio/core/graph_factory.py +54 -0
- govio/graph/__init__.py +4 -0
- govio/graph/falkordb_graph.py +100 -0
- govio/graph/networkx_graph.py +73 -0
- govio/metadata/__init__.py +25 -0
- govio/metadata/application.py +51 -0
- govio/metadata/database.py +160 -0
- govio/metadata/duckdb_loader.py +69 -0
- govio/metadata/gen_networkx.py +136 -0
- govio/metadata/metric.py +329 -0
- govio/metadata/metric_schema.json +194 -0
- govio/metadata/recommender.py +471 -0
- govio/metadata/relationship.py +305 -0
- govio/metadata/standard.py +125 -0
- govio/metadata/utility.py +235 -0
- govio/observe_data/__init__.py +1 -0
- govio/observe_data/config.py +39 -0
- govio/observe_data/core/__init__.py +1 -0
- govio/observe_data/core/comparator.py +52 -0
- govio/observe_data/core/database.py +64 -0
- govio/observe_data/core/dataframe_store.py +66 -0
- govio/observe_data/core/explorer.py +106 -0
- govio/observe_data/core/observe_store.py +179 -0
- govio/observe_data/core/visualizer.py +50 -0
- govio/observe_data/tools/__init__.py +1 -0
- govio/observe_data/tools/list_dataframes.py +29 -0
- govio/observe_data/tools/list_datasources.py +30 -0
- govio/observe_data/tools/load_dataframe.py +95 -0
- govio/observe_data/tools/release_dataframe.py +44 -0
- govio/observe_data/tools/visualize_relations.py +42 -0
- govio-0.2.11.dist-info/METADATA +473 -0
- govio-0.2.11.dist-info/RECORD +46 -0
- govio-0.2.11.dist-info/WHEEL +4 -0
- govio-0.2.11.dist-info/entry_points.txt +2 -0
- govio-0.2.11.dist-info/licenses/LICENSE +21 -0
govio/__init__.py
ADDED
govio/cli/__init__.py
ADDED
govio/cli/config.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
import yaml
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
# 旧格式中属于 metadata section 的字段
|
|
7
|
+
_METADATA_KEYS = {"kundb", "workspace_uuid", "app_list", "app_map", "relationship", "metric", "csv_dir"}
|
|
8
|
+
# 旧格式中属于 graph section 的字段
|
|
9
|
+
_GRAPH_KEYS = {"backend", "networkx", "falkordb"}
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ConfigManager:
|
|
13
|
+
"""管理 govio 配置文件"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, config_path: Path | None = None) -> None:
|
|
16
|
+
if config_path is None:
|
|
17
|
+
self.config_path = Path.home() / ".govio" / "config.yaml"
|
|
18
|
+
else:
|
|
19
|
+
self.config_path = config_path
|
|
20
|
+
|
|
21
|
+
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
22
|
+
|
|
23
|
+
def exists(self) -> bool:
|
|
24
|
+
"""检查配置文件是否存在"""
|
|
25
|
+
return self.config_path.exists()
|
|
26
|
+
|
|
27
|
+
def load(self) -> dict[str, Any]:
|
|
28
|
+
"""加载配置文件,自动迁移旧格式"""
|
|
29
|
+
if not self.exists():
|
|
30
|
+
raise FileNotFoundError(f"配置文件不存在: {self.config_path}")
|
|
31
|
+
|
|
32
|
+
with open(self.config_path, "r", encoding="utf-8") as f:
|
|
33
|
+
config = yaml.safe_load(f) or {}
|
|
34
|
+
|
|
35
|
+
if self._is_old_format(config):
|
|
36
|
+
config = self._migrate(config)
|
|
37
|
+
|
|
38
|
+
return config
|
|
39
|
+
|
|
40
|
+
def save(self, config: dict[str, Any]) -> None:
|
|
41
|
+
"""保存配置文件"""
|
|
42
|
+
with open(self.config_path, "w", encoding="utf-8") as f:
|
|
43
|
+
yaml.dump(config, f, allow_unicode=True, default_flow_style=False)
|
|
44
|
+
|
|
45
|
+
def _is_old_format(self, config: dict[str, Any]) -> bool:
|
|
46
|
+
"""检测是否为旧的扁平格式"""
|
|
47
|
+
return "kundb" in config or ("backend" in config and "graph" not in config)
|
|
48
|
+
|
|
49
|
+
def _migrate(self, config: dict[str, Any]) -> dict[str, Any]:
|
|
50
|
+
"""将旧扁平格式迁移为新的嵌套格式"""
|
|
51
|
+
backup_path = self.config_path.with_suffix(".yaml.bak")
|
|
52
|
+
shutil.copy2(self.config_path, backup_path)
|
|
53
|
+
|
|
54
|
+
new_config: dict[str, Any] = {}
|
|
55
|
+
known_keys = _METADATA_KEYS | _GRAPH_KEYS | {"datasources"}
|
|
56
|
+
|
|
57
|
+
metadata = {}
|
|
58
|
+
for key in _METADATA_KEYS:
|
|
59
|
+
if key in config:
|
|
60
|
+
metadata[key] = config[key]
|
|
61
|
+
if metadata:
|
|
62
|
+
new_config["metadata"] = metadata
|
|
63
|
+
|
|
64
|
+
graph = {}
|
|
65
|
+
for key in _GRAPH_KEYS:
|
|
66
|
+
if key in config:
|
|
67
|
+
graph[key] = config[key]
|
|
68
|
+
if graph:
|
|
69
|
+
new_config["graph"] = graph
|
|
70
|
+
|
|
71
|
+
if "datasources" in config:
|
|
72
|
+
new_config["datasources"] = config["datasources"]
|
|
73
|
+
|
|
74
|
+
# 保留未知字段
|
|
75
|
+
for key, value in config.items():
|
|
76
|
+
if key not in known_keys and key not in new_config:
|
|
77
|
+
new_config[key] = value
|
|
78
|
+
|
|
79
|
+
self.save(new_config)
|
|
80
|
+
|
|
81
|
+
return new_config
|
|
82
|
+
|
|
83
|
+
@staticmethod
|
|
84
|
+
def _validate_backend(scope: dict[str, Any]) -> None:
|
|
85
|
+
"""验证 backend 相关配置(networkx/falkordb)"""
|
|
86
|
+
if "backend" not in scope:
|
|
87
|
+
raise ValueError("配置缺少 'backend' 字段")
|
|
88
|
+
backend = scope["backend"]
|
|
89
|
+
if backend not in ["networkx", "falkordb"]:
|
|
90
|
+
raise ValueError(f"不支持的 backend: {backend}")
|
|
91
|
+
if backend == "networkx":
|
|
92
|
+
if "networkx" not in scope:
|
|
93
|
+
raise ValueError("NetworkX backend 需要 'networkx' 配置")
|
|
94
|
+
if "gml_path" not in scope["networkx"]:
|
|
95
|
+
raise ValueError("NetworkX 配置缺少 'gml_path' 字段")
|
|
96
|
+
elif backend == "falkordb":
|
|
97
|
+
if "falkordb" not in scope:
|
|
98
|
+
raise ValueError("FalkorDB backend 需要 'falkordb' 配置")
|
|
99
|
+
for field in ["host", "port", "graph"]:
|
|
100
|
+
if field not in scope["falkordb"]:
|
|
101
|
+
raise ValueError(f"FalkorDB 配置缺少 '{field}' 字段")
|
|
102
|
+
|
|
103
|
+
def validate(self, config: dict[str, Any]) -> bool:
|
|
104
|
+
"""验证配置的有效性
|
|
105
|
+
|
|
106
|
+
支持新格式(嵌套)和旧格式(扁平)的验证。
|
|
107
|
+
"""
|
|
108
|
+
if "graph" in config:
|
|
109
|
+
self._validate_backend(config["graph"])
|
|
110
|
+
elif "backend" in config:
|
|
111
|
+
self._validate_backend(config)
|
|
112
|
+
else:
|
|
113
|
+
raise ValueError("配置缺少 'backend' 字段")
|
|
114
|
+
|
|
115
|
+
csv_dir = config.get("metadata", {}).get("csv_dir") or config.get("csv_dir")
|
|
116
|
+
if csv_dir:
|
|
117
|
+
csv_path = Path(csv_dir)
|
|
118
|
+
if not csv_path.exists():
|
|
119
|
+
raise ValueError(f"csv_dir 不存在: {csv_path}")
|
|
120
|
+
|
|
121
|
+
if "graph_dir" in config:
|
|
122
|
+
graph_path = Path(config["graph_dir"])
|
|
123
|
+
if not graph_path.exists():
|
|
124
|
+
raise ValueError(f"graph_dir 不存在: {graph_path}")
|
|
125
|
+
|
|
126
|
+
datasources = config.get("datasources")
|
|
127
|
+
if datasources:
|
|
128
|
+
if not isinstance(datasources, dict):
|
|
129
|
+
raise ValueError("datasources 必须为字典类型")
|
|
130
|
+
for name, ds_data in datasources.items():
|
|
131
|
+
if not isinstance(ds_data, dict):
|
|
132
|
+
raise ValueError(f"数据源 '{name}' 配置必须为字典类型")
|
|
133
|
+
if "url" not in ds_data:
|
|
134
|
+
raise ValueError(f"数据源 '{name}' 缺少 'url' 字段")
|
|
135
|
+
|
|
136
|
+
return True
|
govio/cli/main.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
from govio.cli.config import ConfigManager
|
|
6
|
+
|
|
7
|
+
from .meta_export import meta_export
|
|
8
|
+
from .onboard import onboard
|
|
9
|
+
from .std_recommend import std_recommend
|
|
10
|
+
from .observe import observe
|
|
11
|
+
from .query import query
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def main():
|
|
15
|
+
parser = argparse.ArgumentParser(
|
|
16
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
17
|
+
description="数据治理知识图谱项目,提供元数据查询、表字段比较、SQL 生成、数据标准推荐等数据治理支持功能。",
|
|
18
|
+
)
|
|
19
|
+
sub = parser.add_subparsers(dest="action")
|
|
20
|
+
|
|
21
|
+
p_onboard = sub.add_parser("onboard", help="初始化配置向导")
|
|
22
|
+
p_onboard.add_argument(
|
|
23
|
+
"--new-falkordb",
|
|
24
|
+
type=Path,
|
|
25
|
+
metavar="CSV_DIR",
|
|
26
|
+
help="跳过 CSV 生成,直接将指定目录的 CSV 导入 FalkorDB",
|
|
27
|
+
)
|
|
28
|
+
p_onboard.add_argument(
|
|
29
|
+
"--new-networkx",
|
|
30
|
+
type=Path,
|
|
31
|
+
metavar="CSV_DIR",
|
|
32
|
+
help="跳过 CSV 生成,直接从指定目录的 CSV 生成 GML 文件",
|
|
33
|
+
)
|
|
34
|
+
sub.add_parser("backend", help="显示当前后端类型")
|
|
35
|
+
|
|
36
|
+
p_std = sub.add_parser("std-recommend", help="数据标准推荐")
|
|
37
|
+
p_std.add_argument("--output-dir", type=Path, help="推荐数据标准的输出目录")
|
|
38
|
+
|
|
39
|
+
# query 子命令
|
|
40
|
+
p_query = sub.add_parser("query", help="知识图谱查询")
|
|
41
|
+
code_type = "NetworkX 用 Python 代码,FalkorDB 用 Cypher"
|
|
42
|
+
config_manager = ConfigManager()
|
|
43
|
+
if config_manager.exists():
|
|
44
|
+
config = config_manager.load()
|
|
45
|
+
backend = (config.get("graph") or {}).get("backend")
|
|
46
|
+
if backend == "falkordb":
|
|
47
|
+
code_type = "Cypher"
|
|
48
|
+
elif backend == "networkx":
|
|
49
|
+
code_type = "Python 代码"
|
|
50
|
+
|
|
51
|
+
p_query.add_argument(
|
|
52
|
+
"-c",
|
|
53
|
+
"--code",
|
|
54
|
+
help=f"查询语句({code_type})",
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# meta-export 子命令:从 DuckDB 导出元数据 CSV
|
|
58
|
+
p_meta = sub.add_parser("meta-export", help="从 DuckDB + TDS 合并导出全量元数据 CSV")
|
|
59
|
+
p_meta.add_argument("--db", type=str, required=True, help="DuckDB 数据库文件路径")
|
|
60
|
+
p_meta.add_argument(
|
|
61
|
+
"--schemas",
|
|
62
|
+
type=str,
|
|
63
|
+
required=True,
|
|
64
|
+
help="要导出的 schema 列表,逗号分隔(如 dm,dwd,dws)",
|
|
65
|
+
)
|
|
66
|
+
p_meta.add_argument("--output", type=Path, required=True, help="CSV 输出目录")
|
|
67
|
+
p_meta.add_argument(
|
|
68
|
+
"--dry-run",
|
|
69
|
+
action="store_true",
|
|
70
|
+
help="仅生成 CSV 并输出状态,不更新图数据和生成 assets",
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# observe 子命令:保留未知参数传递给 observe()
|
|
74
|
+
p_observe = sub.add_parser("observe", help="数据表探查")
|
|
75
|
+
p_observe.add_argument(
|
|
76
|
+
"observe_args", nargs=argparse.REMAINDER, help="observe 子命令参数"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
args = parser.parse_args()
|
|
80
|
+
|
|
81
|
+
if args.action == "onboard":
|
|
82
|
+
onboard(new_falkordb=args.new_falkordb, new_networkx=args.new_networkx)
|
|
83
|
+
elif args.action == "backend":
|
|
84
|
+
config_manager = ConfigManager()
|
|
85
|
+
if not config_manager.exists():
|
|
86
|
+
print("错误: 未找到配置文件,请先运行 govio-cli onboard", file=sys.stderr)
|
|
87
|
+
sys.exit(1)
|
|
88
|
+
config = config_manager.load()
|
|
89
|
+
backend = (config.get("graph") or {}).get("backend")
|
|
90
|
+
if not backend:
|
|
91
|
+
print("错误: 配置文件中未设置后端类型", file=sys.stderr)
|
|
92
|
+
sys.exit(1)
|
|
93
|
+
print(backend)
|
|
94
|
+
elif args.action == "std-recommend":
|
|
95
|
+
std_recommend(args.output_dir)
|
|
96
|
+
elif args.action == "query":
|
|
97
|
+
query(args.code)
|
|
98
|
+
elif args.action == "meta-export":
|
|
99
|
+
meta_export(args.db, args.schemas.split(","), args.output, dry_run=args.dry_run)
|
|
100
|
+
elif args.action == "observe":
|
|
101
|
+
# 将 observe 子命令参数设为 sys.argv 供 observe() 解析
|
|
102
|
+
sys.argv = ["govio-cli"] + args.observe_args
|
|
103
|
+
observe()
|
|
104
|
+
else:
|
|
105
|
+
parser.print_help()
|
|
106
|
+
sys.exit(1)
|
govio/cli/meta_export.py
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from govio.cli.config import ConfigManager
|
|
7
|
+
from govio.core.graph_factory import GraphFactory
|
|
8
|
+
from govio.core.assets_generator import AssetsGenerator
|
|
9
|
+
from govio.metadata.database import TDSLoader
|
|
10
|
+
from govio.metadata.application import AppInfoLoader
|
|
11
|
+
from govio.metadata.standard import StandardLoader
|
|
12
|
+
from govio.metadata.duckdb_loader import DuckDBLoader
|
|
13
|
+
from govio.metadata.utility import reorder_index
|
|
14
|
+
from govio.metadata.relationship import load_relationships
|
|
15
|
+
from govio.metadata.metric import MetricLoader
|
|
16
|
+
|
|
17
|
+
SKILLS_ASSETS_DIR = Path("skills/govio/assets")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def merge_metadata(
|
|
21
|
+
df_tds: pd.DataFrame, df_duck: pd.DataFrame, key: str
|
|
22
|
+
) -> pd.DataFrame:
|
|
23
|
+
"""TDS full + DuckDB incremental. DuckDB wins on conflict."""
|
|
24
|
+
combined = pd.concat([df_tds, df_duck], ignore_index=True)
|
|
25
|
+
return combined.drop_duplicates(subset=[key], keep="last").reset_index(drop=True)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def meta_export(db_path: str, schemas: list[str], output: Path, dry_run: bool = True):
|
|
29
|
+
output.mkdir(parents=True, exist_ok=True)
|
|
30
|
+
|
|
31
|
+
# --- Load config for TDS ---
|
|
32
|
+
config = ConfigManager().load()
|
|
33
|
+
metadata = config.get("metadata") or {}
|
|
34
|
+
kundb = metadata.get("kundb", "")
|
|
35
|
+
workspace_uuid = metadata.get("workspace_uuid", "82ee37374b314a938bf28170ab4db7cf")
|
|
36
|
+
app_list_file = metadata.get("app_list", "")
|
|
37
|
+
app_map_file = metadata.get("app_map", "")
|
|
38
|
+
relationship_file = metadata.get("relationship")
|
|
39
|
+
metric_file = metadata.get("metric")
|
|
40
|
+
|
|
41
|
+
if not all([kundb, app_list_file, app_map_file]):
|
|
42
|
+
print("❌ 配置缺少必要字段,请检查 metadata 中的 kundb, app_list, app_map")
|
|
43
|
+
sys.exit(1)
|
|
44
|
+
|
|
45
|
+
df_app_db_map = pd.read_json(app_map_file, orient="records")
|
|
46
|
+
|
|
47
|
+
# --- Load TDS metadata ---
|
|
48
|
+
tds_loader = TDSLoader(kundb, workspace_uuid, df_app_db_map["schema"].to_list())
|
|
49
|
+
tds_tables = tds_loader.PhysicalTable
|
|
50
|
+
tds_columns = tds_loader.Col
|
|
51
|
+
|
|
52
|
+
# --- Load DuckDB metadata ---
|
|
53
|
+
duck_loader = DuckDBLoader(db_path, schemas)
|
|
54
|
+
duck_tables = duck_loader.PhysicalTable
|
|
55
|
+
duck_columns = duck_loader.Col
|
|
56
|
+
|
|
57
|
+
# --- Merge ---
|
|
58
|
+
df_tables = merge_metadata(tds_tables, duck_tables, "full_table_name")
|
|
59
|
+
df_columns = merge_metadata(tds_columns, duck_columns, "column")
|
|
60
|
+
|
|
61
|
+
# --- Load apps and standards ---
|
|
62
|
+
app_loader = AppInfoLoader(app_list_file, df_app_db_map["name"].to_list())
|
|
63
|
+
df_apps = app_loader.Application
|
|
64
|
+
std_loader = StandardLoader(kundb, workspace_uuid)
|
|
65
|
+
df_stds = std_loader.Standard
|
|
66
|
+
|
|
67
|
+
# --- Assign IDs ---
|
|
68
|
+
reorder_index([df_tables, df_columns, df_apps, df_stds], start=1)
|
|
69
|
+
|
|
70
|
+
files = []
|
|
71
|
+
|
|
72
|
+
# --- Node CSVs ---
|
|
73
|
+
df_tables.to_csv(output / "PhysicalTable.csv", index_label=":ID(PhysicalTable)")
|
|
74
|
+
files.append("-n " + str(output / "PhysicalTable.csv"))
|
|
75
|
+
|
|
76
|
+
df_columns.to_csv(output / "Col.csv", index_label=":ID(Col)")
|
|
77
|
+
files.append("-n " + str(output / "Col.csv"))
|
|
78
|
+
|
|
79
|
+
df_apps.to_csv(output / "Application.csv", index_label=":ID(Application)")
|
|
80
|
+
files.append("-n " + str(output / "Application.csv"))
|
|
81
|
+
|
|
82
|
+
df_stds.to_csv(output / "Standard.csv", index_label=":ID(Standard)")
|
|
83
|
+
files.append("-n " + str(output / "Standard.csv"))
|
|
84
|
+
|
|
85
|
+
# --- HAS_COLUMN edge ---
|
|
86
|
+
df_has_column = pd.merge(
|
|
87
|
+
df_tables[["full_table_name"]]
|
|
88
|
+
.reset_index()
|
|
89
|
+
.rename(columns={"index": ":START_ID(PhysicalTable)"}),
|
|
90
|
+
df_columns[["full_table_name"]]
|
|
91
|
+
.reset_index()
|
|
92
|
+
.rename(columns={"index": ":END_ID(Col)"}),
|
|
93
|
+
on="full_table_name",
|
|
94
|
+
how="inner",
|
|
95
|
+
)[[":START_ID(PhysicalTable)", ":END_ID(Col)"]]
|
|
96
|
+
df_has_column.to_csv(output / "HAS_COLUMN.csv", index=False)
|
|
97
|
+
files.append("-r " + str(output / "HAS_COLUMN.csv"))
|
|
98
|
+
|
|
99
|
+
# --- USE edge ---
|
|
100
|
+
df_app_table = pd.merge(
|
|
101
|
+
df_app_db_map,
|
|
102
|
+
df_tables[["schema"]]
|
|
103
|
+
.reset_index()
|
|
104
|
+
.rename(columns={"index": ":END_ID(PhysicalTable)"}),
|
|
105
|
+
on="schema",
|
|
106
|
+
how="inner",
|
|
107
|
+
)
|
|
108
|
+
df_use = pd.merge(
|
|
109
|
+
df_apps[["name"]]
|
|
110
|
+
.reset_index()
|
|
111
|
+
.rename(columns={"index": ":START_ID(Application)"}),
|
|
112
|
+
df_app_table,
|
|
113
|
+
on="name",
|
|
114
|
+
how="inner",
|
|
115
|
+
)[[":START_ID(Application)", ":END_ID(PhysicalTable)"]]
|
|
116
|
+
df_use.to_csv(output / "USE.csv", index=False)
|
|
117
|
+
files.append("-r " + str(output / "USE.csv"))
|
|
118
|
+
|
|
119
|
+
# --- Optional: RELATES_TO ---
|
|
120
|
+
relations_count = 0
|
|
121
|
+
if relationship_file:
|
|
122
|
+
try:
|
|
123
|
+
df_relates_to = load_relationships(relationship_file, df_tables, df_columns)
|
|
124
|
+
relations_count = len(df_relates_to)
|
|
125
|
+
df_relates_to.to_csv(
|
|
126
|
+
output / "RELATES_TO.csv",
|
|
127
|
+
index=False,
|
|
128
|
+
header=[
|
|
129
|
+
":START_ID(PhysicalTable)",
|
|
130
|
+
":END_ID(PhysicalTable)",
|
|
131
|
+
"relationship_type",
|
|
132
|
+
"description",
|
|
133
|
+
"source_columns",
|
|
134
|
+
"target_columns",
|
|
135
|
+
],
|
|
136
|
+
)
|
|
137
|
+
files.append("-r " + str(output / "RELATES_TO.csv"))
|
|
138
|
+
print(f"成功生成 RELATES_TO.csv,包含 {len(df_relates_to)} 个关系 来自[{relationship_file}]")
|
|
139
|
+
except Exception as e:
|
|
140
|
+
print(f"警告: 无法加载关系文件: {e}")
|
|
141
|
+
|
|
142
|
+
# --- Optional: metrics ---
|
|
143
|
+
metric_count = 0
|
|
144
|
+
if metric_file:
|
|
145
|
+
try:
|
|
146
|
+
metric_loader = MetricLoader(metric_file, df_tables, df_columns)
|
|
147
|
+
df_metrics = metric_loader.Metric
|
|
148
|
+
df_dimensions = metric_loader.Dimension
|
|
149
|
+
|
|
150
|
+
# 计算 Metric/Dimension 的 ID 起始偏移(接续已有节点)
|
|
151
|
+
metric_offset = (
|
|
152
|
+
len(df_tables) + len(df_columns) + len(df_apps) + len(df_stds) + 1
|
|
153
|
+
)
|
|
154
|
+
dim_offset = metric_offset + len(df_metrics)
|
|
155
|
+
reorder_index([df_metrics, df_dimensions], start=metric_offset)
|
|
156
|
+
|
|
157
|
+
df_metrics.to_csv(output / "Metric.csv", index_label=":ID(Metric)")
|
|
158
|
+
files.append("-n " + str(output / "Metric.csv"))
|
|
159
|
+
|
|
160
|
+
df_dimensions.to_csv(
|
|
161
|
+
output / "Dimension.csv", index_label=":ID(Dimension)"
|
|
162
|
+
)
|
|
163
|
+
files.append("-n " + str(output / "Dimension.csv"))
|
|
164
|
+
|
|
165
|
+
# USES_TABLE 边
|
|
166
|
+
uses_table = metric_loader.uses_table_edges.copy()
|
|
167
|
+
if not uses_table.empty:
|
|
168
|
+
uses_table[":START_ID(Metric)"] += metric_offset
|
|
169
|
+
uses_table.to_csv(output / "USES_TABLE.csv", index=False)
|
|
170
|
+
files.append("-r " + str(output / "USES_TABLE.csv"))
|
|
171
|
+
|
|
172
|
+
# REFERS_COLUMN 边
|
|
173
|
+
refers_col = metric_loader.refers_column_edges.copy()
|
|
174
|
+
if not refers_col.empty:
|
|
175
|
+
refers_col[":START_ID(Metric)"] += metric_offset
|
|
176
|
+
refers_col.to_csv(output / "REFERS_COLUMN.csv", index=False)
|
|
177
|
+
files.append("-r " + str(output / "REFERS_COLUMN.csv"))
|
|
178
|
+
|
|
179
|
+
# DERIVED_FROM 边
|
|
180
|
+
derived_from = metric_loader.derived_from_edges.copy()
|
|
181
|
+
if not derived_from.empty:
|
|
182
|
+
derived_from[":START_ID(Metric)"] += metric_offset
|
|
183
|
+
derived_from[":END_ID(Metric)"] += metric_offset
|
|
184
|
+
derived_from.to_csv(output / "DERIVED_FROM.csv", index=False)
|
|
185
|
+
files.append("-r " + str(output / "DERIVED_FROM.csv"))
|
|
186
|
+
|
|
187
|
+
# DIMENSION_USED 边
|
|
188
|
+
dim_used = metric_loader.dimension_used_edges.copy()
|
|
189
|
+
if not dim_used.empty:
|
|
190
|
+
dim_used[":START_ID(Metric)"] += metric_offset
|
|
191
|
+
dim_used[":END_ID(Dimension)"] += dim_offset
|
|
192
|
+
dim_used.to_csv(output / "DIMENSION_USED.csv", index=False)
|
|
193
|
+
files.append("-r " + str(output / "DIMENSION_USED.csv"))
|
|
194
|
+
|
|
195
|
+
# SUPERSEDES 边
|
|
196
|
+
supersedes = metric_loader.supersedes_edges
|
|
197
|
+
if not supersedes.empty:
|
|
198
|
+
supersedes.to_csv(output / "SUPERSEDES.csv", index=False)
|
|
199
|
+
files.append("-r " + str(output / "SUPERSEDES.csv"))
|
|
200
|
+
|
|
201
|
+
print(
|
|
202
|
+
f"成功生成指标数据:{len(df_metrics)} 个指标, "
|
|
203
|
+
f"{len(df_dimensions)} 个维度"
|
|
204
|
+
)
|
|
205
|
+
metric_count = len(df_metrics)
|
|
206
|
+
except Exception as e:
|
|
207
|
+
print(f"警告: 无法加载指标定义文件: {e}")
|
|
208
|
+
|
|
209
|
+
# --- Summary ---
|
|
210
|
+
print(f"成功导出: {len(df_tables)} 张表, {len(df_columns)} 个字段, "
|
|
211
|
+
f"{len(df_apps)} 个应用, {len(df_stds)} 个标准, {relations_count}个数据关系, {metric_count}个指标")
|
|
212
|
+
# print(f"ID 范围: 1 ~ {len(df_tables) + len(df_columns) + len(df_apps) + len(df_stds)}")
|
|
213
|
+
print(f"\nfalkordb-bulk-insert {{GRAPH}} {' '.join(files)}")
|
|
214
|
+
|
|
215
|
+
if dry_run:
|
|
216
|
+
return
|
|
217
|
+
|
|
218
|
+
# --- Update graph and generate assets ---
|
|
219
|
+
from govio.cli.onboard import import_csv_to_falkordb
|
|
220
|
+
from govio.metadata.gen_networkx import build_graph
|
|
221
|
+
|
|
222
|
+
graph = config.get("graph") or {}
|
|
223
|
+
backend = graph.get("backend")
|
|
224
|
+
if not backend:
|
|
225
|
+
print("警告: 配置中未指定 backend,跳过图数据更新和 assets 生成")
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
# Update graph
|
|
229
|
+
if backend == "falkordb":
|
|
230
|
+
falkordb_cfg = graph.get("falkordb", {})
|
|
231
|
+
host = falkordb_cfg.get("host", "localhost")
|
|
232
|
+
port = falkordb_cfg.get("port", 6379)
|
|
233
|
+
graph_name = falkordb_cfg.get("graph", "ontology")
|
|
234
|
+
print(f"\n正在导入 CSV 到 FalkorDB ({host}:{port}/{graph_name})...")
|
|
235
|
+
try:
|
|
236
|
+
import_csv_to_falkordb(output, host, port, graph_name)
|
|
237
|
+
print("✓ FalkorDB 数据已更新")
|
|
238
|
+
except Exception as e:
|
|
239
|
+
print(f"❌ 导入 FalkorDB 失败: {e}")
|
|
240
|
+
return
|
|
241
|
+
elif backend == "networkx":
|
|
242
|
+
networkx_cfg = graph.get("networkx", {})
|
|
243
|
+
gml_path = networkx_cfg.get("gml_path", str(SKILLS_ASSETS_DIR / "ontology.gml"))
|
|
244
|
+
print(f"\n正在从 CSV 生成 GML 文件 ({gml_path})...")
|
|
245
|
+
try:
|
|
246
|
+
build_graph(str(output), gml_path)
|
|
247
|
+
print("✓ GML 文件已更新")
|
|
248
|
+
except Exception as e:
|
|
249
|
+
print(f"❌ 生成 GML 失败: {e}")
|
|
250
|
+
return
|
|
251
|
+
|
|
252
|
+
# Generate assets
|
|
253
|
+
print("\n正在生成 assets...")
|
|
254
|
+
try:
|
|
255
|
+
graph_obj = GraphFactory.create(graph)
|
|
256
|
+
generator = AssetsGenerator(graph_obj, SKILLS_ASSETS_DIR)
|
|
257
|
+
generator.generate_all()
|
|
258
|
+
print(f"✓ Assets 已生成到: {SKILLS_ASSETS_DIR}")
|
|
259
|
+
except Exception as e:
|
|
260
|
+
print(f"❌ 生成 assets 失败: {e}")
|
|
261
|
+
return
|
|
262
|
+
|
|
263
|
+
print("\n✅ meta-export 完成!")
|