dtflow 0.5.5__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtflow/SKILL.md +225 -0
- dtflow/__init__.py +1 -1
- dtflow/__main__.py +33 -52
- dtflow/cli/commands.py +16 -10
- dtflow/cli/sample.py +159 -11
- dtflow/cli/skill.py +72 -0
- {dtflow-0.5.5.dist-info → dtflow-0.5.7.dist-info}/METADATA +43 -4
- {dtflow-0.5.5.dist-info → dtflow-0.5.7.dist-info}/RECORD +10 -13
- dtflow/mcp/__init__.py +0 -29
- dtflow/mcp/__main__.py +0 -18
- dtflow/mcp/cli.py +0 -388
- dtflow/mcp/docs.py +0 -416
- dtflow/mcp/server.py +0 -153
- {dtflow-0.5.5.dist-info → dtflow-0.5.7.dist-info}/WHEEL +0 -0
- {dtflow-0.5.5.dist-info → dtflow-0.5.7.dist-info}/entry_points.txt +0 -0
dtflow/cli/skill.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Claude Code Skill 安装命令
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import shutil
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
|
|
10
|
+
console = Console()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_skill_source_path() -> Path:
|
|
14
|
+
"""获取 SKILL.md 源文件路径"""
|
|
15
|
+
return Path(__file__).parent.parent / "SKILL.md"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_skill_target_dir() -> Path:
|
|
19
|
+
"""获取 skill 安装目标目录"""
|
|
20
|
+
return Path.home() / ".claude" / "skills" / "dtflow"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def install_skill() -> None:
|
|
24
|
+
"""安装 dtflow skill 到 Claude Code"""
|
|
25
|
+
source = get_skill_source_path()
|
|
26
|
+
target_dir = get_skill_target_dir()
|
|
27
|
+
target = target_dir / "SKILL.md"
|
|
28
|
+
|
|
29
|
+
if not source.exists():
|
|
30
|
+
console.print("[red]错误: SKILL.md 源文件不存在[/red]")
|
|
31
|
+
raise SystemExit(1)
|
|
32
|
+
|
|
33
|
+
# 创建目标目录
|
|
34
|
+
target_dir.mkdir(parents=True, exist_ok=True)
|
|
35
|
+
|
|
36
|
+
# 复制文件
|
|
37
|
+
shutil.copy2(source, target)
|
|
38
|
+
|
|
39
|
+
console.print("[green]✓[/green] 已安装 dtflow skill 到 Claude Code")
|
|
40
|
+
console.print(f" [dim]{target}[/dim]")
|
|
41
|
+
console.print()
|
|
42
|
+
console.print("[dim]在 Claude Code 中使用 /dtflow 调用此 skill[/dim]")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def uninstall_skill() -> None:
|
|
46
|
+
"""卸载 dtflow skill"""
|
|
47
|
+
target_dir = get_skill_target_dir()
|
|
48
|
+
target = target_dir / "SKILL.md"
|
|
49
|
+
|
|
50
|
+
if not target.exists():
|
|
51
|
+
console.print("[yellow]dtflow skill 未安装[/yellow]")
|
|
52
|
+
return
|
|
53
|
+
|
|
54
|
+
target.unlink()
|
|
55
|
+
|
|
56
|
+
# 如果目录为空,也删除目录
|
|
57
|
+
if target_dir.exists() and not any(target_dir.iterdir()):
|
|
58
|
+
target_dir.rmdir()
|
|
59
|
+
|
|
60
|
+
console.print("[green]✓[/green] 已卸载 dtflow skill")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def skill_status() -> None:
|
|
64
|
+
"""显示 skill 安装状态"""
|
|
65
|
+
target = get_skill_target_dir() / "SKILL.md"
|
|
66
|
+
|
|
67
|
+
if target.exists():
|
|
68
|
+
console.print("[green]✓[/green] dtflow skill 已安装")
|
|
69
|
+
console.print(f" [dim]{target}[/dim]")
|
|
70
|
+
else:
|
|
71
|
+
console.print("[yellow]✗[/yellow] dtflow skill 未安装")
|
|
72
|
+
console.print(" [dim]运行 dt install-skill 安装[/dim]")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dtflow
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.7
|
|
4
4
|
Summary: A flexible data transformation tool for ML training formats (SFT, RLHF, Pretrain)
|
|
5
5
|
Project-URL: Homepage, https://github.com/yourusername/DataTransformer
|
|
6
6
|
Project-URL: Documentation, https://github.com/yourusername/DataTransformer#readme
|
|
@@ -69,8 +69,6 @@ Requires-Dist: tokenizers>=0.15.0; extra == 'full'
|
|
|
69
69
|
Requires-Dist: toolong>=1.5.0; extra == 'full'
|
|
70
70
|
Provides-Extra: logs
|
|
71
71
|
Requires-Dist: toolong>=1.5.0; extra == 'logs'
|
|
72
|
-
Provides-Extra: mcp
|
|
73
|
-
Requires-Dist: mcp>=1.0.0; extra == 'mcp'
|
|
74
72
|
Provides-Extra: similarity
|
|
75
73
|
Requires-Dist: datasketch>=1.5.0; extra == 'similarity'
|
|
76
74
|
Requires-Dist: scikit-learn>=0.24.0; extra == 'similarity'
|
|
@@ -99,6 +97,17 @@ pip install transformers # Token 统计(HuggingFace 模型)
|
|
|
99
97
|
pip install datasets # HuggingFace Dataset 转换
|
|
100
98
|
```
|
|
101
99
|
|
|
100
|
+
## 🤖 Claude Code 集成
|
|
101
|
+
|
|
102
|
+
dtflow 内置了 [Claude Code](https://docs.anthropic.com/en/docs/claude-code) skill:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
dt install-skill # 安装 skill
|
|
106
|
+
dt skill-status # 查看状态
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
安装后在 Claude Code 中输入 `/dtflow`,Claude 将掌握 dtflow 的完整用法,可直接协助你完成数据处理任务。
|
|
110
|
+
|
|
102
111
|
## 快速开始
|
|
103
112
|
|
|
104
113
|
```python
|
|
@@ -423,6 +432,8 @@ dt sample data.csv --num=100 --sample_type=head
|
|
|
423
432
|
dt sample data.jsonl 1000 --by=category # 分层采样
|
|
424
433
|
dt sample data.jsonl 1000 --by=meta.source # 按嵌套字段分层采样
|
|
425
434
|
dt sample data.jsonl 1000 --by=messages.# # 按消息数量分层采样
|
|
435
|
+
dt sample data.jsonl --where="category=tech" # 筛选后采样
|
|
436
|
+
dt sample data.jsonl --where="messages.#>=2" # 多条件筛选
|
|
426
437
|
|
|
427
438
|
# 数据转换 - 预设模式
|
|
428
439
|
dt transform data.jsonl --preset=openai_chat
|
|
@@ -471,6 +482,10 @@ dt concat a.jsonl b.jsonl -o merged.jsonl
|
|
|
471
482
|
# 数据统计
|
|
472
483
|
dt stats data.jsonl
|
|
473
484
|
|
|
485
|
+
# Claude Code Skill 安装
|
|
486
|
+
dt install-skill # 安装到 ~/.claude/skills/
|
|
487
|
+
dt skill-status # 查看安装状态
|
|
488
|
+
|
|
474
489
|
# 数据验证
|
|
475
490
|
dt validate data.jsonl --preset=openai_chat # 使用预设 schema 验证
|
|
476
491
|
dt validate data.jsonl --preset=alpaca --verbose # 详细输出
|
|
@@ -496,7 +511,7 @@ CLI 命令中的字段参数支持嵌套路径语法,可访问深层嵌套的
|
|
|
496
511
|
|
|
497
512
|
| 命令 | 参数 | 示例 |
|
|
498
513
|
|------|------|------|
|
|
499
|
-
| `sample` | `--by=` | `--by=meta.source`、`--
|
|
514
|
+
| `sample` | `--by=`, `--where=` | `--by=meta.source`、`--where=messages.#>=2` |
|
|
500
515
|
| `dedupe` | `--key=` | `--key=meta.id`、`--key=messages[0].content` |
|
|
501
516
|
| `clean` | `--drop-empty=` | `--drop-empty=meta.source` |
|
|
502
517
|
| `clean` | `--min-len=` | `--min-len=messages.#:2` |
|
|
@@ -504,6 +519,18 @@ CLI 命令中的字段参数支持嵌套路径语法,可访问深层嵌套的
|
|
|
504
519
|
| `token-stats` | `--field=` | `--field=messages[-1].content` |
|
|
505
520
|
| `diff` | `--key=` | `--key=meta.uuid` |
|
|
506
521
|
|
|
522
|
+
`--where` 支持的操作符:
|
|
523
|
+
|
|
524
|
+
| 操作符 | 含义 | 示例 |
|
|
525
|
+
|--------|------|------|
|
|
526
|
+
| `=` | 等于 | `--where="category=tech"` |
|
|
527
|
+
| `!=` | 不等于 | `--where="source!=wiki"` |
|
|
528
|
+
| `~=` | 包含 | `--where="content~=机器学习"` |
|
|
529
|
+
| `>` | 大于 | `--where="score>0.8"` |
|
|
530
|
+
| `>=` | 大于等于 | `--where="messages.#>=2"` |
|
|
531
|
+
| `<` | 小于 | `--where="length<1000"` |
|
|
532
|
+
| `<=` | 小于等于 | `--where="turns<=10"` |
|
|
533
|
+
|
|
507
534
|
示例数据:
|
|
508
535
|
```json
|
|
509
536
|
{"meta": {"source": "wiki"}, "messages": [{"role": "user", "content": "hi"}, {"role": "assistant", "content": "hello"}]}
|
|
@@ -601,6 +628,18 @@ dt history processed.jsonl
|
|
|
601
628
|
dt history processed.jsonl --json # JSON 格式输出
|
|
602
629
|
```
|
|
603
630
|
|
|
631
|
+
### 日志查看
|
|
632
|
+
|
|
633
|
+
dtflow 内置了 [toolong](https://github.com/Textualize/toolong) 日志查看器:
|
|
634
|
+
|
|
635
|
+
```bash
|
|
636
|
+
pip install dtflow[logs] # 安装日志工具
|
|
637
|
+
|
|
638
|
+
tl app.log # 交互式 TUI 查看
|
|
639
|
+
tl --tail app.log # 实时跟踪(类似 tail -f)
|
|
640
|
+
dt logs # 查看使用说明
|
|
641
|
+
```
|
|
642
|
+
|
|
604
643
|
### 大文件流式处理
|
|
605
644
|
|
|
606
645
|
专为超大文件设计的流式处理接口,内存占用 O(1),支持 JSONL、CSV、Parquet、Arrow 格式:
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
dtflow/
|
|
2
|
-
dtflow/
|
|
1
|
+
dtflow/SKILL.md,sha256=sHf6i6DKUCca5zvSJ67VHu05tFlST4mYgnoURXVe1g0,7836
|
|
2
|
+
dtflow/__init__.py,sha256=Ee7CDDxzki69MEGeXB5bczuMts5OwZZ-jVsKjH_rD_0,3031
|
|
3
|
+
dtflow/__main__.py,sha256=3LXTku09Fw1dsgTUtX1UJCmE20qKeZpNga3UqmI3UiY,12145
|
|
3
4
|
dtflow/converters.py,sha256=X3qeFD7FCOMnfiP3MicL5MXimOm4XUYBs5pczIkudU0,22331
|
|
4
5
|
dtflow/core.py,sha256=qMo6B3LK--TWRK7ZBKObGcs3pKFnd0NPoaM0T8JC7Jw,38135
|
|
5
6
|
dtflow/framework.py,sha256=jyICi_RWHjX7WfsXdSbWmP1SL7y1OWSPyd5G5Y-lvg4,17578
|
|
@@ -11,27 +12,23 @@ dtflow/streaming.py,sha256=dxpNd1-Wz_PTLTdvM5qn06_2TJr5NRlIIuw0LOSS2Iw,24755
|
|
|
11
12
|
dtflow/tokenizers.py,sha256=7ZAelSmcDxLWH5kICgH9Q1ULH3_BfDZb9suHMjJJRZU,20589
|
|
12
13
|
dtflow/cli/__init__.py,sha256=QhZ-thgx9IBTFII7T_hdoWFUl0CCsdGQHN5ZEZw2XB0,423
|
|
13
14
|
dtflow/cli/clean.py,sha256=y9VCRibgK1j8WIY3h0XZX0m93EdELQC7TdnseMWwS-0,17799
|
|
14
|
-
dtflow/cli/commands.py,sha256=
|
|
15
|
+
dtflow/cli/commands.py,sha256=zKUG-B9Az-spqyqM00cR8Sgc2UgeOPQDThJFHWDNO_w,1336
|
|
15
16
|
dtflow/cli/common.py,sha256=gCwnF5Sw2ploqfZJO_z3Ms9mR1HNT7Lj6ydHn0uVaIw,13817
|
|
16
17
|
dtflow/cli/io_ops.py,sha256=BMDisP6dxzzmSjYwmeFwaHmpHHPqirmXAWeNTD-9MQM,13254
|
|
17
18
|
dtflow/cli/lineage.py,sha256=_lNh35nF9AA0Zy6FyZ4g8IzrXH2ZQnp3inF-o2Hs1pw,1383
|
|
18
19
|
dtflow/cli/pipeline.py,sha256=QNEo-BJlaC1CVnVeRZr7TwfuZYloJ4TebIzJ5ALzry0,1426
|
|
19
|
-
dtflow/cli/sample.py,sha256=
|
|
20
|
+
dtflow/cli/sample.py,sha256=pubpx4AIzsarBEalD150MC2apYQSt4bal70IZkTfFO0,15475
|
|
21
|
+
dtflow/cli/skill.py,sha256=opiTEBejA7JHKrEMftMOPDQlOgZ4n59rwaHXGU1Nukk,2022
|
|
20
22
|
dtflow/cli/stats.py,sha256=u4ehCfgw1X8WuOyAjrApMRgcIO3BVmINbsTjxEscQro,24086
|
|
21
23
|
dtflow/cli/transform.py,sha256=w6xqMOxPxQvL2u_BPCfpDHuPSC9gmcqMPVN8s-B6bbY,15052
|
|
22
24
|
dtflow/cli/validate.py,sha256=65aGVlMS_Rq0Ch0YQ-TclVJ03RQP4CnG137wthzb8Ao,4384
|
|
23
|
-
dtflow/mcp/__init__.py,sha256=huEJ3rXDbxDRjsLPEvjNT2u3tWs6Poiv6fokPIrByjw,897
|
|
24
|
-
dtflow/mcp/__main__.py,sha256=PoT2ZZmJq9xDZxDACJfqDW9Ld_ukHrGNK-0XUd7WGnY,448
|
|
25
|
-
dtflow/mcp/cli.py,sha256=ck0oOS_642cNktxULaMRE7BJfMxsBCwotmCj3PSPwVk,13110
|
|
26
|
-
dtflow/mcp/docs.py,sha256=DI2Vf-eFo4chRP_bDLsv4Uc3kJt8_1emz8N-NBSVirM,8834
|
|
27
|
-
dtflow/mcp/server.py,sha256=Nf0UlqDGhV55ndGuEglfr7VRjDWAC_9rRsNhdr0-ssM,4275
|
|
28
25
|
dtflow/storage/__init__.py,sha256=C0jpWNQU808Ezz7lWneddABal3wILy8ijFUNiSKbHV4,362
|
|
29
26
|
dtflow/storage/io.py,sha256=ZH2aSE-S89gpy3z4oTqhcqWf4u10OdkDoyul7o_YBDI,23374
|
|
30
27
|
dtflow/utils/__init__.py,sha256=Pn-ltwV04fBQmeZG7FxInDQmzH29LYOi90LgeLMEuQk,506
|
|
31
28
|
dtflow/utils/display.py,sha256=OeOdTh6mbDwSkDWlmkjfpTjy2QG8ZUaYU0NpHUWkpEQ,5881
|
|
32
29
|
dtflow/utils/field_path.py,sha256=K8nU196RxTSJ1OoieTWGcYOWl9KjGq2iSxCAkfjECuM,7621
|
|
33
30
|
dtflow/utils/helpers.py,sha256=JXN176_B2pm53GLVyZ1wj3wrmBJG52Tkw6AMQSdj7M8,791
|
|
34
|
-
dtflow-0.5.
|
|
35
|
-
dtflow-0.5.
|
|
36
|
-
dtflow-0.5.
|
|
37
|
-
dtflow-0.5.
|
|
31
|
+
dtflow-0.5.7.dist-info/METADATA,sha256=mlWaRHSM1ZucQrAa8PGcHzjHj2RQPBynnmdA_JoNSNI,23899
|
|
32
|
+
dtflow-0.5.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
33
|
+
dtflow-0.5.7.dist-info/entry_points.txt,sha256=dadIDOK7Iu9pMxnMPBfpb4aAPe4hQbBOshpQYjVYpGc,44
|
|
34
|
+
dtflow-0.5.7.dist-info/RECORD,,
|
dtflow/mcp/__init__.py
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
"""DataTransformer MCP (Model Context Protocol) 服务
|
|
2
|
-
|
|
3
|
-
提供 DataTransformer 的用法查询功能,供 AI 模型调用。
|
|
4
|
-
|
|
5
|
-
使用方式:
|
|
6
|
-
# 安装 MCP 服务到 Claude Code
|
|
7
|
-
dt mcp install
|
|
8
|
-
|
|
9
|
-
# 运行 MCP 服务(通常由 Claude 自动调用)
|
|
10
|
-
dt-mcp
|
|
11
|
-
|
|
12
|
-
注意: MCP 功能需要安装 mcp 依赖: pip install dtflow[mcp]
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
__all__ = ["main", "mcp"]
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def __getattr__(name):
|
|
19
|
-
"""延迟导入 server 模块,避免在未安装 mcp 依赖时报错"""
|
|
20
|
-
if name in ("main", "mcp"):
|
|
21
|
-
try:
|
|
22
|
-
from .server import main, mcp
|
|
23
|
-
|
|
24
|
-
return main if name == "main" else mcp
|
|
25
|
-
except ImportError as e:
|
|
26
|
-
raise ImportError(
|
|
27
|
-
f"MCP 功能需要安装 mcp 依赖: pip install dtflow[mcp]\n原始错误: {e}"
|
|
28
|
-
) from e
|
|
29
|
-
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
dtflow/mcp/__main__.py
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
"""Datatron MCP 服务入口
|
|
2
|
-
|
|
3
|
-
使用方式:
|
|
4
|
-
python -m dtflow.mcp
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
if __name__ == "__main__":
|
|
8
|
-
try:
|
|
9
|
-
from .server import main
|
|
10
|
-
|
|
11
|
-
main()
|
|
12
|
-
except ImportError as e:
|
|
13
|
-
import sys
|
|
14
|
-
|
|
15
|
-
print(f"错误: MCP 功能需要安装 mcp 依赖", file=sys.stderr)
|
|
16
|
-
print(f"请运行: pip install dtflow[mcp]", file=sys.stderr)
|
|
17
|
-
print(f"\n原始错误: {e}", file=sys.stderr)
|
|
18
|
-
sys.exit(1)
|
dtflow/mcp/cli.py
DELETED
|
@@ -1,388 +0,0 @@
|
|
|
1
|
-
"""Datatron MCP CLI 命令
|
|
2
|
-
|
|
3
|
-
提供 MCP 服务的安装和管理命令。
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import os
|
|
7
|
-
import platform
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
from typing import Literal
|
|
10
|
-
|
|
11
|
-
import orjson
|
|
12
|
-
|
|
13
|
-
try:
|
|
14
|
-
from rich import print
|
|
15
|
-
from rich.console import Console
|
|
16
|
-
|
|
17
|
-
console = Console()
|
|
18
|
-
except ImportError:
|
|
19
|
-
console = None
|
|
20
|
-
|
|
21
|
-
def print(*args, **kwargs):
|
|
22
|
-
import builtins
|
|
23
|
-
|
|
24
|
-
builtins.print(*args, **kwargs)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
# 支持的目标类型
|
|
28
|
-
TargetType = Literal["desktop", "code", "all"]
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def get_claude_desktop_config_path() -> Path:
|
|
32
|
-
"""获取 Claude Desktop 配置文件路径"""
|
|
33
|
-
system = platform.system()
|
|
34
|
-
|
|
35
|
-
if system == "Darwin": # macOS
|
|
36
|
-
return (
|
|
37
|
-
Path.home()
|
|
38
|
-
/ "Library"
|
|
39
|
-
/ "Application Support"
|
|
40
|
-
/ "Claude"
|
|
41
|
-
/ "claude_desktop_config.json"
|
|
42
|
-
)
|
|
43
|
-
elif system == "Windows":
|
|
44
|
-
return Path(os.environ.get("APPDATA", "")) / "Claude" / "claude_desktop_config.json"
|
|
45
|
-
elif system == "Linux":
|
|
46
|
-
return Path.home() / ".config" / "Claude" / "claude_desktop_config.json"
|
|
47
|
-
else:
|
|
48
|
-
raise RuntimeError(f"不支持的操作系统: {system}")
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def get_claude_code_config_path() -> Path:
|
|
52
|
-
"""获取 Claude Code 配置文件路径"""
|
|
53
|
-
return Path.home() / ".claude.json"
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def get_dt_mcp_command() -> list[str]:
|
|
57
|
-
"""获取 dt-mcp 命令路径
|
|
58
|
-
|
|
59
|
-
使用 python -m 形式,更通用
|
|
60
|
-
"""
|
|
61
|
-
return ["python", "-m", "dtflow.mcp"]
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def _install_to_config(config_path: Path, name: str, target_name: str) -> bool:
|
|
65
|
-
"""安装 MCP 服务到指定配置文件
|
|
66
|
-
|
|
67
|
-
Returns:
|
|
68
|
-
True 成功,False 失败
|
|
69
|
-
"""
|
|
70
|
-
# 确保配置目录存在
|
|
71
|
-
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
72
|
-
|
|
73
|
-
# 读取现有配置
|
|
74
|
-
config = {}
|
|
75
|
-
if config_path.exists():
|
|
76
|
-
try:
|
|
77
|
-
with open(config_path, "rb") as f:
|
|
78
|
-
config = orjson.loads(f.read())
|
|
79
|
-
except orjson.JSONDecodeError:
|
|
80
|
-
if console:
|
|
81
|
-
console.print(
|
|
82
|
-
f"[yellow]警告:[/yellow] {target_name} 配置文件格式错误,将创建新配置"
|
|
83
|
-
)
|
|
84
|
-
else:
|
|
85
|
-
print(f"警告: {target_name} 配置文件格式错误,将创建新配置")
|
|
86
|
-
|
|
87
|
-
# 确保 mcpServers 字段存在
|
|
88
|
-
if "mcpServers" not in config:
|
|
89
|
-
config["mcpServers"] = {}
|
|
90
|
-
|
|
91
|
-
# 获取命令
|
|
92
|
-
command = get_dt_mcp_command()
|
|
93
|
-
|
|
94
|
-
# 添加 datatron MCP 服务配置
|
|
95
|
-
config["mcpServers"][name] = {
|
|
96
|
-
"type": "stdio",
|
|
97
|
-
"command": command[0],
|
|
98
|
-
"args": command[1:] if len(command) > 1 else [],
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
# 写入配置
|
|
102
|
-
try:
|
|
103
|
-
with open(config_path, "wb") as f:
|
|
104
|
-
f.write(orjson.dumps(config, option=orjson.OPT_INDENT_2))
|
|
105
|
-
return True
|
|
106
|
-
except Exception as e:
|
|
107
|
-
if console:
|
|
108
|
-
console.print(f"[bold red]错误:[/bold red] 无法写入 {target_name} 配置文件: {e}")
|
|
109
|
-
else:
|
|
110
|
-
print(f"错误: 无法写入 {target_name} 配置文件: {e}")
|
|
111
|
-
return False
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def _uninstall_from_config(config_path: Path, name: str, target_name: str) -> bool:
|
|
115
|
-
"""从指定配置文件移除 MCP 服务
|
|
116
|
-
|
|
117
|
-
Returns:
|
|
118
|
-
True 成功移除,False 未找到或失败
|
|
119
|
-
"""
|
|
120
|
-
if not config_path.exists():
|
|
121
|
-
return False
|
|
122
|
-
|
|
123
|
-
try:
|
|
124
|
-
with open(config_path, "rb") as f:
|
|
125
|
-
config = orjson.loads(f.read())
|
|
126
|
-
except orjson.JSONDecodeError:
|
|
127
|
-
return False
|
|
128
|
-
|
|
129
|
-
if "mcpServers" not in config or name not in config["mcpServers"]:
|
|
130
|
-
return False
|
|
131
|
-
|
|
132
|
-
del config["mcpServers"][name]
|
|
133
|
-
|
|
134
|
-
try:
|
|
135
|
-
with open(config_path, "wb") as f:
|
|
136
|
-
f.write(orjson.dumps(config, option=orjson.OPT_INDENT_2))
|
|
137
|
-
return True
|
|
138
|
-
except Exception:
|
|
139
|
-
return False
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
def _show_config_status(config_path: Path, target_name: str):
|
|
143
|
-
"""显示单个配置文件的状态"""
|
|
144
|
-
if console:
|
|
145
|
-
console.print(f"\n[bold]{target_name} 配置:[/bold]")
|
|
146
|
-
console.print(f" 路径: [bold blue]{config_path}[/bold blue]")
|
|
147
|
-
console.print(
|
|
148
|
-
f" 存在: {'[green]是[/green]' if config_path.exists() else '[yellow]否[/yellow]'}"
|
|
149
|
-
)
|
|
150
|
-
else:
|
|
151
|
-
print(f"\n{target_name} 配置:")
|
|
152
|
-
print(f" 路径: {config_path}")
|
|
153
|
-
print(f" 存在: {'是' if config_path.exists() else '否'}")
|
|
154
|
-
|
|
155
|
-
if not config_path.exists():
|
|
156
|
-
return
|
|
157
|
-
|
|
158
|
-
try:
|
|
159
|
-
with open(config_path, "rb") as f:
|
|
160
|
-
config = orjson.loads(f.read())
|
|
161
|
-
except orjson.JSONDecodeError:
|
|
162
|
-
if console:
|
|
163
|
-
console.print(" [red]配置文件格式错误[/red]")
|
|
164
|
-
else:
|
|
165
|
-
print(" 配置文件格式错误")
|
|
166
|
-
return
|
|
167
|
-
|
|
168
|
-
mcp_servers = config.get("mcpServers", {})
|
|
169
|
-
if mcp_servers:
|
|
170
|
-
if console:
|
|
171
|
-
console.print(" 已安装的 MCP 服务:")
|
|
172
|
-
else:
|
|
173
|
-
print(" 已安装的 MCP 服务:")
|
|
174
|
-
for name, server_config in mcp_servers.items():
|
|
175
|
-
command = server_config.get("command", "N/A")
|
|
176
|
-
is_dt = "data" in name.lower() or "dt" in name.lower() or "transformer" in str(command)
|
|
177
|
-
if console:
|
|
178
|
-
marker = "[green]*[/green]" if is_dt else " "
|
|
179
|
-
console.print(f" {marker} [cyan]{name}[/cyan]")
|
|
180
|
-
else:
|
|
181
|
-
marker = "*" if is_dt else " "
|
|
182
|
-
print(f" {marker} {name}")
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
class MCPCommands:
|
|
186
|
-
"""MCP 服务管理命令"""
|
|
187
|
-
|
|
188
|
-
def install(self, name: str = "datatron", target: str = "code"):
|
|
189
|
-
"""
|
|
190
|
-
安装 Datatron MCP 服务
|
|
191
|
-
|
|
192
|
-
Args:
|
|
193
|
-
name: MCP 服务名称(默认: datatron)
|
|
194
|
-
target: 安装目标 - 'desktop'(Claude Desktop), 'code'(Claude Code), 'all'(两者)
|
|
195
|
-
|
|
196
|
-
示例:
|
|
197
|
-
# 安装到 Claude Code(推荐)
|
|
198
|
-
dt mcp install
|
|
199
|
-
|
|
200
|
-
# 安装到所有目标
|
|
201
|
-
dt mcp install --target all
|
|
202
|
-
|
|
203
|
-
# 仅安装到 Claude Desktop
|
|
204
|
-
dt mcp install --target desktop
|
|
205
|
-
|
|
206
|
-
# 自定义服务名称
|
|
207
|
-
dt mcp install --name my-dt
|
|
208
|
-
"""
|
|
209
|
-
command = get_dt_mcp_command()
|
|
210
|
-
installed_targets = []
|
|
211
|
-
|
|
212
|
-
# Claude Desktop
|
|
213
|
-
if target in ("desktop", "all"):
|
|
214
|
-
try:
|
|
215
|
-
desktop_path = get_claude_desktop_config_path()
|
|
216
|
-
if _install_to_config(desktop_path, name, "Claude Desktop"):
|
|
217
|
-
installed_targets.append(("Claude Desktop", desktop_path))
|
|
218
|
-
except RuntimeError:
|
|
219
|
-
if target == "desktop":
|
|
220
|
-
if console:
|
|
221
|
-
console.print("[bold red]错误:[/bold red] 不支持的操作系统")
|
|
222
|
-
else:
|
|
223
|
-
print("错误: 不支持的操作系统")
|
|
224
|
-
return
|
|
225
|
-
|
|
226
|
-
# Claude Code
|
|
227
|
-
if target in ("code", "all"):
|
|
228
|
-
code_path = get_claude_code_config_path()
|
|
229
|
-
if _install_to_config(code_path, name, "Claude Code"):
|
|
230
|
-
installed_targets.append(("Claude Code", code_path))
|
|
231
|
-
|
|
232
|
-
if not installed_targets:
|
|
233
|
-
if console:
|
|
234
|
-
console.print("[bold red]错误:[/bold red] 安装失败")
|
|
235
|
-
else:
|
|
236
|
-
print("错误: 安装失败")
|
|
237
|
-
return
|
|
238
|
-
|
|
239
|
-
if console:
|
|
240
|
-
console.print(f"\n[bold green]Datatron MCP 服务安装成功[/bold green]\n")
|
|
241
|
-
console.print(f"服务名称: [bold blue]{name}[/bold blue]")
|
|
242
|
-
console.print(f"命令: [bold blue]{' '.join(command)}[/bold blue]")
|
|
243
|
-
console.print(f"\n已安装到:")
|
|
244
|
-
for target_name, config_path in installed_targets:
|
|
245
|
-
console.print(f" - {target_name}: [dim]{config_path}[/dim]")
|
|
246
|
-
console.print(f"\n[dim]请重启 Claude Desktop/Code 以使配置生效[/dim]")
|
|
247
|
-
else:
|
|
248
|
-
print(f"\nDatatron MCP 服务安装成功\n")
|
|
249
|
-
print(f"服务名称: {name}")
|
|
250
|
-
print(f"命令: {' '.join(command)}")
|
|
251
|
-
print(f"\n已安装到:")
|
|
252
|
-
for target_name, config_path in installed_targets:
|
|
253
|
-
print(f" - {target_name}: {config_path}")
|
|
254
|
-
print(f"\n请重启 Claude Desktop/Code 以使配置生效")
|
|
255
|
-
|
|
256
|
-
def uninstall(self, name: str = "datatron", target: str = "all"):
|
|
257
|
-
"""
|
|
258
|
-
移除 Datatron MCP 服务
|
|
259
|
-
|
|
260
|
-
Args:
|
|
261
|
-
name: MCP 服务名称(默认: datatron)
|
|
262
|
-
target: 移除目标 - 'desktop', 'code', 'all'
|
|
263
|
-
|
|
264
|
-
示例:
|
|
265
|
-
dt mcp uninstall
|
|
266
|
-
dt mcp uninstall --target code
|
|
267
|
-
"""
|
|
268
|
-
removed_targets = []
|
|
269
|
-
|
|
270
|
-
# Claude Desktop
|
|
271
|
-
if target in ("desktop", "all"):
|
|
272
|
-
try:
|
|
273
|
-
desktop_path = get_claude_desktop_config_path()
|
|
274
|
-
if _uninstall_from_config(desktop_path, name, "Claude Desktop"):
|
|
275
|
-
removed_targets.append("Claude Desktop")
|
|
276
|
-
except RuntimeError:
|
|
277
|
-
pass
|
|
278
|
-
|
|
279
|
-
# Claude Code
|
|
280
|
-
if target in ("code", "all"):
|
|
281
|
-
code_path = get_claude_code_config_path()
|
|
282
|
-
if _uninstall_from_config(code_path, name, "Claude Code"):
|
|
283
|
-
removed_targets.append("Claude Code")
|
|
284
|
-
|
|
285
|
-
if removed_targets:
|
|
286
|
-
if console:
|
|
287
|
-
console.print(f"\n[bold green]Datatron MCP 服务已移除[/bold green]")
|
|
288
|
-
console.print(f"从以下位置移除: {', '.join(removed_targets)}")
|
|
289
|
-
console.print(f"\n[dim]请重启 Claude Desktop/Code 以使配置生效[/dim]")
|
|
290
|
-
else:
|
|
291
|
-
print(f"\nDatatron MCP 服务已移除")
|
|
292
|
-
print(f"从以下位置移除: {', '.join(removed_targets)}")
|
|
293
|
-
print(f"\n请重启 Claude Desktop/Code 以使配置生效")
|
|
294
|
-
else:
|
|
295
|
-
if console:
|
|
296
|
-
console.print(f"[yellow]未找到名为 '{name}' 的 MCP 服务[/yellow]")
|
|
297
|
-
else:
|
|
298
|
-
print(f"未找到名为 '{name}' 的 MCP 服务")
|
|
299
|
-
|
|
300
|
-
def status(self):
|
|
301
|
-
"""
|
|
302
|
-
查看 Datatron MCP 服务安装状态
|
|
303
|
-
|
|
304
|
-
示例:
|
|
305
|
-
dt mcp status
|
|
306
|
-
"""
|
|
307
|
-
# Claude Desktop
|
|
308
|
-
try:
|
|
309
|
-
desktop_path = get_claude_desktop_config_path()
|
|
310
|
-
_show_config_status(desktop_path, "Claude Desktop")
|
|
311
|
-
except RuntimeError:
|
|
312
|
-
pass
|
|
313
|
-
|
|
314
|
-
# Claude Code
|
|
315
|
-
code_path = get_claude_code_config_path()
|
|
316
|
-
_show_config_status(code_path, "Claude Code")
|
|
317
|
-
|
|
318
|
-
# 检查 mcp 依赖是否安装
|
|
319
|
-
if console:
|
|
320
|
-
console.print(f"\n[bold]依赖状态:[/bold]")
|
|
321
|
-
else:
|
|
322
|
-
print(f"\n依赖状态:")
|
|
323
|
-
|
|
324
|
-
try:
|
|
325
|
-
import mcp
|
|
326
|
-
|
|
327
|
-
if console:
|
|
328
|
-
console.print(f" mcp: [green]已安装[/green]")
|
|
329
|
-
else:
|
|
330
|
-
print(f" mcp: 已安装")
|
|
331
|
-
except ImportError:
|
|
332
|
-
if console:
|
|
333
|
-
console.print(f" mcp: [red]未安装[/red] (运行 'pip install dtflow[mcp]')")
|
|
334
|
-
else:
|
|
335
|
-
print(f" mcp: 未安装 (运行 'pip install dtflow[mcp]')")
|
|
336
|
-
|
|
337
|
-
def test(self):
|
|
338
|
-
"""
|
|
339
|
-
测试 Datatron MCP 服务是否正常工作
|
|
340
|
-
|
|
341
|
-
示例:
|
|
342
|
-
dt mcp test
|
|
343
|
-
"""
|
|
344
|
-
if console:
|
|
345
|
-
console.print("\n[bold]测试 Datatron MCP 服务...[/bold]\n")
|
|
346
|
-
else:
|
|
347
|
-
print("\n测试 Datatron MCP 服务...\n")
|
|
348
|
-
|
|
349
|
-
# 检查依赖
|
|
350
|
-
try:
|
|
351
|
-
from dtflow.mcp import mcp
|
|
352
|
-
|
|
353
|
-
if console:
|
|
354
|
-
console.print("[green]OK[/green] MCP 模块导入成功")
|
|
355
|
-
else:
|
|
356
|
-
print("OK MCP 模块导入成功")
|
|
357
|
-
except ImportError as e:
|
|
358
|
-
if console:
|
|
359
|
-
console.print(f"[red]FAIL[/red] MCP 模块导入失败: {e}")
|
|
360
|
-
console.print("\n请安装 mcp 依赖: pip install datatron[mcp]")
|
|
361
|
-
else:
|
|
362
|
-
print(f"FAIL MCP 模块导入失败: {e}")
|
|
363
|
-
print("\n请安装 mcp 依赖: pip install datatron[mcp]")
|
|
364
|
-
return
|
|
365
|
-
|
|
366
|
-
# 检查文档
|
|
367
|
-
try:
|
|
368
|
-
from dtflow.mcp.docs import DOCS, TOPICS
|
|
369
|
-
|
|
370
|
-
if console:
|
|
371
|
-
console.print(f"[green]OK[/green] 文档加载成功 ({len(TOPICS)} 个主题)")
|
|
372
|
-
else:
|
|
373
|
-
print(f"OK 文档加载成功 ({len(TOPICS)} 个主题)")
|
|
374
|
-
except ImportError as e:
|
|
375
|
-
if console:
|
|
376
|
-
console.print(f"[red]FAIL[/red] 文档加载失败: {e}")
|
|
377
|
-
else:
|
|
378
|
-
print(f"FAIL 文档加载失败: {e}")
|
|
379
|
-
return
|
|
380
|
-
|
|
381
|
-
# 检查命令
|
|
382
|
-
command = get_dt_mcp_command()
|
|
383
|
-
if console:
|
|
384
|
-
console.print(f"[green]OK[/green] MCP 命令: {' '.join(command)}")
|
|
385
|
-
console.print("\n[bold green]所有测试通过[/bold green]")
|
|
386
|
-
else:
|
|
387
|
-
print(f"OK MCP 命令: {' '.join(command)}")
|
|
388
|
-
print("\n所有测试通过")
|