dtflow 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtflow/__init__.py +70 -43
- dtflow/__main__.py +301 -239
- dtflow/cli/__init__.py +29 -2
- dtflow/cli/commands.py +1112 -113
- dtflow/converters.py +39 -23
- dtflow/core.py +140 -72
- dtflow/lineage.py +410 -0
- dtflow/mcp/__init__.py +1 -0
- dtflow/mcp/__main__.py +2 -0
- dtflow/mcp/cli.py +35 -17
- dtflow/mcp/docs.py +0 -5
- dtflow/pipeline.py +460 -0
- dtflow/presets.py +24 -22
- dtflow/storage/__init__.py +11 -10
- dtflow/storage/io.py +384 -369
- dtflow/streaming.py +656 -0
- dtflow/tokenizers.py +212 -57
- dtflow/utils/__init__.py +2 -1
- dtflow/utils/display.py +28 -27
- {dtflow-0.3.0.dist-info → dtflow-0.3.2.dist-info}/METADATA +153 -7
- dtflow-0.3.2.dist-info/RECORD +24 -0
- dtflow-0.3.0.dist-info/RECORD +0 -21
- {dtflow-0.3.0.dist-info → dtflow-0.3.2.dist-info}/WHEEL +0 -0
- {dtflow-0.3.0.dist-info → dtflow-0.3.2.dist-info}/entry_points.txt +0 -0
dtflow/lineage.py
ADDED
|
@@ -0,0 +1,410 @@
|
|
|
1
|
+
"""
|
|
2
|
+
数据血缘模块
|
|
3
|
+
|
|
4
|
+
记录数据处理的完整历史,支持数据溯源和版本对比。
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import hashlib
|
|
8
|
+
import os
|
|
9
|
+
import platform
|
|
10
|
+
import time
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Dict, List, Optional, Union
|
|
14
|
+
|
|
15
|
+
import orjson
|
|
16
|
+
|
|
17
|
+
# 血缘元数据版本
|
|
18
|
+
LINEAGE_VERSION = "1.0"
|
|
19
|
+
|
|
20
|
+
# 元数据文件后缀
|
|
21
|
+
LINEAGE_SUFFIX = ".lineage.json"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _get_file_hash(filepath: str, sample_size: int = 10000) -> str:
|
|
25
|
+
"""
|
|
26
|
+
计算文件内容哈希(采样方式,避免大文件性能问题)。
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
filepath: 文件路径
|
|
30
|
+
sample_size: 采样字节数
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
文件哈希值(前16位)
|
|
34
|
+
"""
|
|
35
|
+
hasher = hashlib.sha256()
|
|
36
|
+
file_size = os.path.getsize(filepath)
|
|
37
|
+
|
|
38
|
+
with open(filepath, "rb") as f:
|
|
39
|
+
# 读取文件头
|
|
40
|
+
hasher.update(f.read(sample_size))
|
|
41
|
+
|
|
42
|
+
# 如果文件较大,还要读取中间和尾部
|
|
43
|
+
if file_size > sample_size * 3:
|
|
44
|
+
f.seek(file_size // 2)
|
|
45
|
+
hasher.update(f.read(sample_size))
|
|
46
|
+
f.seek(-sample_size, 2)
|
|
47
|
+
hasher.update(f.read(sample_size))
|
|
48
|
+
|
|
49
|
+
return hasher.hexdigest()[:16]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _get_lineage_path(data_path: str) -> str:
|
|
53
|
+
"""获取血缘元数据文件路径"""
|
|
54
|
+
return str(data_path) + LINEAGE_SUFFIX
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _get_environment_info() -> Dict[str, str]:
|
|
58
|
+
"""获取运行环境信息"""
|
|
59
|
+
return {
|
|
60
|
+
"python_version": platform.python_version(),
|
|
61
|
+
"platform": platform.system(),
|
|
62
|
+
"hostname": platform.node(),
|
|
63
|
+
"user": os.environ.get("USER", os.environ.get("USERNAME", "unknown")),
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class LineageRecord:
|
|
68
|
+
"""血缘记录"""
|
|
69
|
+
|
|
70
|
+
def __init__(
|
|
71
|
+
self,
|
|
72
|
+
source: Optional[str] = None,
|
|
73
|
+
operations: Optional[List[Dict[str, Any]]] = None,
|
|
74
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
75
|
+
):
|
|
76
|
+
self.version = LINEAGE_VERSION
|
|
77
|
+
self.created_at = datetime.now().isoformat()
|
|
78
|
+
self.source = source
|
|
79
|
+
self.operations = operations or []
|
|
80
|
+
self.metadata = metadata or {}
|
|
81
|
+
self.environment = _get_environment_info()
|
|
82
|
+
|
|
83
|
+
def add_operation(
|
|
84
|
+
self,
|
|
85
|
+
op_type: str,
|
|
86
|
+
params: Optional[Dict[str, Any]] = None,
|
|
87
|
+
input_count: Optional[int] = None,
|
|
88
|
+
output_count: Optional[int] = None,
|
|
89
|
+
) -> "LineageRecord":
|
|
90
|
+
"""添加操作记录"""
|
|
91
|
+
op = {
|
|
92
|
+
"type": op_type,
|
|
93
|
+
"timestamp": datetime.now().isoformat(),
|
|
94
|
+
}
|
|
95
|
+
if params:
|
|
96
|
+
op["params"] = params
|
|
97
|
+
if input_count is not None:
|
|
98
|
+
op["input_count"] = input_count
|
|
99
|
+
if output_count is not None:
|
|
100
|
+
op["output_count"] = output_count
|
|
101
|
+
|
|
102
|
+
self.operations.append(op)
|
|
103
|
+
return self
|
|
104
|
+
|
|
105
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
106
|
+
"""转换为字典"""
|
|
107
|
+
return {
|
|
108
|
+
"version": self.version,
|
|
109
|
+
"created_at": self.created_at,
|
|
110
|
+
"source": self.source,
|
|
111
|
+
"operations": self.operations,
|
|
112
|
+
"metadata": self.metadata,
|
|
113
|
+
"environment": self.environment,
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
@classmethod
|
|
117
|
+
def from_dict(cls, data: Dict[str, Any]) -> "LineageRecord":
|
|
118
|
+
"""从字典创建"""
|
|
119
|
+
record = cls(
|
|
120
|
+
source=data.get("source"),
|
|
121
|
+
operations=data.get("operations", []),
|
|
122
|
+
metadata=data.get("metadata", {}),
|
|
123
|
+
)
|
|
124
|
+
record.version = data.get("version", LINEAGE_VERSION)
|
|
125
|
+
record.created_at = data.get("created_at", datetime.now().isoformat())
|
|
126
|
+
record.environment = data.get("environment", {})
|
|
127
|
+
return record
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class LineageTracker:
|
|
131
|
+
"""
|
|
132
|
+
血缘追踪器
|
|
133
|
+
|
|
134
|
+
用于记录数据处理的完整历史。
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
def __init__(self, source_path: Optional[str] = None):
|
|
138
|
+
"""
|
|
139
|
+
初始化追踪器。
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
source_path: 源数据文件路径
|
|
143
|
+
"""
|
|
144
|
+
self.source_path = source_path
|
|
145
|
+
self.source_lineage = None
|
|
146
|
+
self.operations: List[Dict[str, Any]] = []
|
|
147
|
+
|
|
148
|
+
# 如果源文件有血缘记录,加载它
|
|
149
|
+
if source_path:
|
|
150
|
+
self.source_lineage = load_lineage(source_path)
|
|
151
|
+
|
|
152
|
+
def record(
|
|
153
|
+
self,
|
|
154
|
+
op_type: str,
|
|
155
|
+
params: Optional[Dict[str, Any]] = None,
|
|
156
|
+
input_count: Optional[int] = None,
|
|
157
|
+
output_count: Optional[int] = None,
|
|
158
|
+
) -> "LineageTracker":
|
|
159
|
+
"""
|
|
160
|
+
记录一次操作。
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
op_type: 操作类型 (filter, transform, dedupe, sample, etc.)
|
|
164
|
+
params: 操作参数
|
|
165
|
+
input_count: 输入数据量
|
|
166
|
+
output_count: 输出数据量
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
self,支持链式调用
|
|
170
|
+
"""
|
|
171
|
+
op = {
|
|
172
|
+
"type": op_type,
|
|
173
|
+
"timestamp": datetime.now().isoformat(),
|
|
174
|
+
}
|
|
175
|
+
if params:
|
|
176
|
+
# 清理参数,移除不可序列化的内容
|
|
177
|
+
op["params"] = _sanitize_params(params)
|
|
178
|
+
if input_count is not None:
|
|
179
|
+
op["input_count"] = input_count
|
|
180
|
+
if output_count is not None:
|
|
181
|
+
op["output_count"] = output_count
|
|
182
|
+
|
|
183
|
+
self.operations.append(op)
|
|
184
|
+
return self
|
|
185
|
+
|
|
186
|
+
def build_record(self, output_path: str, output_count: int) -> LineageRecord:
|
|
187
|
+
"""
|
|
188
|
+
构建最终的血缘记录。
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
output_path: 输出文件路径
|
|
192
|
+
output_count: 输出数据量
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
LineageRecord 对象
|
|
196
|
+
"""
|
|
197
|
+
# 构建来源信息
|
|
198
|
+
source_info = None
|
|
199
|
+
if self.source_path:
|
|
200
|
+
source_info = {
|
|
201
|
+
"path": str(self.source_path),
|
|
202
|
+
"hash": (
|
|
203
|
+
_get_file_hash(self.source_path) if os.path.exists(self.source_path) else None
|
|
204
|
+
),
|
|
205
|
+
}
|
|
206
|
+
# 如果源文件有血缘,记录血缘链
|
|
207
|
+
if self.source_lineage:
|
|
208
|
+
source_info["lineage_ref"] = _get_lineage_path(self.source_path)
|
|
209
|
+
|
|
210
|
+
record = LineageRecord(
|
|
211
|
+
source=source_info,
|
|
212
|
+
operations=self.operations,
|
|
213
|
+
metadata={
|
|
214
|
+
"output_path": str(output_path),
|
|
215
|
+
"output_count": output_count,
|
|
216
|
+
},
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
return record
|
|
220
|
+
|
|
221
|
+
def save(self, output_path: str, output_count: int) -> str:
|
|
222
|
+
"""
|
|
223
|
+
保存血缘记录到文件。
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
output_path: 输出数据文件路径
|
|
227
|
+
output_count: 输出数据量
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
血缘文件路径
|
|
231
|
+
"""
|
|
232
|
+
record = self.build_record(output_path, output_count)
|
|
233
|
+
lineage_path = _get_lineage_path(output_path)
|
|
234
|
+
|
|
235
|
+
with open(lineage_path, "wb") as f:
|
|
236
|
+
f.write(orjson.dumps(record.to_dict(), option=orjson.OPT_INDENT_2))
|
|
237
|
+
|
|
238
|
+
return lineage_path
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def _sanitize_params(params: Dict[str, Any]) -> Dict[str, Any]:
|
|
242
|
+
"""
|
|
243
|
+
清理参数,移除不可序列化的内容。
|
|
244
|
+
"""
|
|
245
|
+
result = {}
|
|
246
|
+
for key, value in params.items():
|
|
247
|
+
if callable(value):
|
|
248
|
+
# 函数:只记录名称
|
|
249
|
+
result[key] = f"<function:{getattr(value, '__name__', 'anonymous')}>"
|
|
250
|
+
elif isinstance(value, (str, int, float, bool, type(None))):
|
|
251
|
+
result[key] = value
|
|
252
|
+
elif isinstance(value, (list, tuple)):
|
|
253
|
+
result[key] = [_sanitize_value(v) for v in value]
|
|
254
|
+
elif isinstance(value, dict):
|
|
255
|
+
result[key] = _sanitize_params(value)
|
|
256
|
+
else:
|
|
257
|
+
result[key] = str(value)
|
|
258
|
+
return result
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _sanitize_value(value: Any) -> Any:
|
|
262
|
+
"""清理单个值"""
|
|
263
|
+
if callable(value):
|
|
264
|
+
return f"<function:{getattr(value, '__name__', 'anonymous')}>"
|
|
265
|
+
elif isinstance(value, (str, int, float, bool, type(None))):
|
|
266
|
+
return value
|
|
267
|
+
elif isinstance(value, dict):
|
|
268
|
+
return _sanitize_params(value)
|
|
269
|
+
else:
|
|
270
|
+
return str(value)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
# ============ 公共 API ============
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def load_lineage(data_path: str) -> Optional[LineageRecord]:
|
|
277
|
+
"""
|
|
278
|
+
加载数据文件的血缘记录。
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
data_path: 数据文件路径
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
LineageRecord 或 None(如果没有血缘记录)
|
|
285
|
+
"""
|
|
286
|
+
lineage_path = _get_lineage_path(data_path)
|
|
287
|
+
if not os.path.exists(lineage_path):
|
|
288
|
+
return None
|
|
289
|
+
|
|
290
|
+
try:
|
|
291
|
+
with open(lineage_path, "rb") as f:
|
|
292
|
+
data = orjson.loads(f.read())
|
|
293
|
+
return LineageRecord.from_dict(data)
|
|
294
|
+
except (orjson.JSONDecodeError, IOError):
|
|
295
|
+
return None
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def get_lineage_chain(data_path: str, max_depth: int = 10) -> List[LineageRecord]:
|
|
299
|
+
"""
|
|
300
|
+
获取完整的血缘链。
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
data_path: 数据文件路径
|
|
304
|
+
max_depth: 最大追溯深度
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
血缘记录列表,从最新到最旧
|
|
308
|
+
"""
|
|
309
|
+
chain = []
|
|
310
|
+
current_path = data_path
|
|
311
|
+
visited = set()
|
|
312
|
+
|
|
313
|
+
for _ in range(max_depth):
|
|
314
|
+
if current_path in visited:
|
|
315
|
+
break # 避免循环引用
|
|
316
|
+
visited.add(current_path)
|
|
317
|
+
|
|
318
|
+
record = load_lineage(current_path)
|
|
319
|
+
if not record:
|
|
320
|
+
break
|
|
321
|
+
|
|
322
|
+
chain.append(record)
|
|
323
|
+
|
|
324
|
+
# 追溯到源文件
|
|
325
|
+
if record.source and isinstance(record.source, dict):
|
|
326
|
+
source_path = record.source.get("path")
|
|
327
|
+
if source_path and os.path.exists(source_path):
|
|
328
|
+
current_path = source_path
|
|
329
|
+
else:
|
|
330
|
+
break
|
|
331
|
+
else:
|
|
332
|
+
break
|
|
333
|
+
|
|
334
|
+
return chain
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def format_lineage_report(data_path: str) -> str:
|
|
338
|
+
"""
|
|
339
|
+
格式化血缘报告。
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
data_path: 数据文件路径
|
|
343
|
+
|
|
344
|
+
Returns:
|
|
345
|
+
格式化的报告字符串
|
|
346
|
+
"""
|
|
347
|
+
chain = get_lineage_chain(data_path)
|
|
348
|
+
|
|
349
|
+
if not chain:
|
|
350
|
+
return f"文件 {data_path} 没有血缘记录"
|
|
351
|
+
|
|
352
|
+
lines = []
|
|
353
|
+
lines.append(f"📊 数据血缘报告: {data_path}")
|
|
354
|
+
lines.append("=" * 60)
|
|
355
|
+
|
|
356
|
+
for i, record in enumerate(chain):
|
|
357
|
+
prefix = "└─" if i == len(chain) - 1 else "├─"
|
|
358
|
+
indent = " " * i
|
|
359
|
+
|
|
360
|
+
# 基本信息
|
|
361
|
+
lines.append(f"{indent}{prefix} 版本 {i + 1}")
|
|
362
|
+
lines.append(f"{indent} 创建时间: {record.created_at}")
|
|
363
|
+
|
|
364
|
+
# 来源信息
|
|
365
|
+
if record.source:
|
|
366
|
+
if isinstance(record.source, dict):
|
|
367
|
+
lines.append(f"{indent} 来源: {record.source.get('path', 'unknown')}")
|
|
368
|
+
if record.source.get("hash"):
|
|
369
|
+
lines.append(f"{indent} 哈希: {record.source['hash']}")
|
|
370
|
+
else:
|
|
371
|
+
lines.append(f"{indent} 来源: {record.source}")
|
|
372
|
+
|
|
373
|
+
# 操作列表
|
|
374
|
+
if record.operations:
|
|
375
|
+
lines.append(f"{indent} 操作链:")
|
|
376
|
+
for j, op in enumerate(record.operations):
|
|
377
|
+
op_prefix = "└─" if j == len(record.operations) - 1 else "├─"
|
|
378
|
+
op_type = op.get("type", "unknown")
|
|
379
|
+
input_count = op.get("input_count", "?")
|
|
380
|
+
output_count = op.get("output_count", "?")
|
|
381
|
+
lines.append(f"{indent} {op_prefix} {op_type}: {input_count} → {output_count}")
|
|
382
|
+
|
|
383
|
+
# 显示参数
|
|
384
|
+
if op.get("params"):
|
|
385
|
+
for key, value in op["params"].items():
|
|
386
|
+
lines.append(f"{indent} {key}: {value}")
|
|
387
|
+
|
|
388
|
+
# 元数据
|
|
389
|
+
if record.metadata:
|
|
390
|
+
output_count = record.metadata.get("output_count")
|
|
391
|
+
if output_count:
|
|
392
|
+
lines.append(f"{indent} 输出数量: {output_count}")
|
|
393
|
+
|
|
394
|
+
lines.append("")
|
|
395
|
+
|
|
396
|
+
return "\n".join(lines)
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def has_lineage(data_path: str) -> bool:
|
|
400
|
+
"""检查文件是否有血缘记录"""
|
|
401
|
+
return os.path.exists(_get_lineage_path(data_path))
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def delete_lineage(data_path: str) -> bool:
|
|
405
|
+
"""删除血缘记录"""
|
|
406
|
+
lineage_path = _get_lineage_path(data_path)
|
|
407
|
+
if os.path.exists(lineage_path):
|
|
408
|
+
os.remove(lineage_path)
|
|
409
|
+
return True
|
|
410
|
+
return False
|
dtflow/mcp/__init__.py
CHANGED
dtflow/mcp/__main__.py
CHANGED
|
@@ -7,9 +7,11 @@
|
|
|
7
7
|
if __name__ == "__main__":
|
|
8
8
|
try:
|
|
9
9
|
from .server import main
|
|
10
|
+
|
|
10
11
|
main()
|
|
11
12
|
except ImportError as e:
|
|
12
13
|
import sys
|
|
14
|
+
|
|
13
15
|
print(f"错误: MCP 功能需要安装 mcp 依赖", file=sys.stderr)
|
|
14
16
|
print(f"请运行: pip install dtflow[mcp]", file=sys.stderr)
|
|
15
17
|
print(f"\n原始错误: {e}", file=sys.stderr)
|
dtflow/mcp/cli.py
CHANGED
|
@@ -3,22 +3,27 @@
|
|
|
3
3
|
提供 MCP 服务的安装和管理命令。
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
-
import json
|
|
7
6
|
import os
|
|
8
7
|
import platform
|
|
9
8
|
from pathlib import Path
|
|
10
9
|
from typing import Literal
|
|
11
10
|
|
|
11
|
+
import orjson
|
|
12
|
+
|
|
12
13
|
try:
|
|
13
14
|
from rich import print
|
|
14
15
|
from rich.console import Console
|
|
16
|
+
|
|
15
17
|
console = Console()
|
|
16
18
|
except ImportError:
|
|
17
19
|
console = None
|
|
20
|
+
|
|
18
21
|
def print(*args, **kwargs):
|
|
19
22
|
import builtins
|
|
23
|
+
|
|
20
24
|
builtins.print(*args, **kwargs)
|
|
21
25
|
|
|
26
|
+
|
|
22
27
|
# 支持的目标类型
|
|
23
28
|
TargetType = Literal["desktop", "code", "all"]
|
|
24
29
|
|
|
@@ -28,7 +33,13 @@ def get_claude_desktop_config_path() -> Path:
|
|
|
28
33
|
system = platform.system()
|
|
29
34
|
|
|
30
35
|
if system == "Darwin": # macOS
|
|
31
|
-
return
|
|
36
|
+
return (
|
|
37
|
+
Path.home()
|
|
38
|
+
/ "Library"
|
|
39
|
+
/ "Application Support"
|
|
40
|
+
/ "Claude"
|
|
41
|
+
/ "claude_desktop_config.json"
|
|
42
|
+
)
|
|
32
43
|
elif system == "Windows":
|
|
33
44
|
return Path(os.environ.get("APPDATA", "")) / "Claude" / "claude_desktop_config.json"
|
|
34
45
|
elif system == "Linux":
|
|
@@ -63,11 +74,13 @@ def _install_to_config(config_path: Path, name: str, target_name: str) -> bool:
|
|
|
63
74
|
config = {}
|
|
64
75
|
if config_path.exists():
|
|
65
76
|
try:
|
|
66
|
-
with open(config_path, "
|
|
67
|
-
config =
|
|
68
|
-
except
|
|
77
|
+
with open(config_path, "rb") as f:
|
|
78
|
+
config = orjson.loads(f.read())
|
|
79
|
+
except orjson.JSONDecodeError:
|
|
69
80
|
if console:
|
|
70
|
-
console.print(
|
|
81
|
+
console.print(
|
|
82
|
+
f"[yellow]警告:[/yellow] {target_name} 配置文件格式错误,将创建新配置"
|
|
83
|
+
)
|
|
71
84
|
else:
|
|
72
85
|
print(f"警告: {target_name} 配置文件格式错误,将创建新配置")
|
|
73
86
|
|
|
@@ -87,8 +100,8 @@ def _install_to_config(config_path: Path, name: str, target_name: str) -> bool:
|
|
|
87
100
|
|
|
88
101
|
# 写入配置
|
|
89
102
|
try:
|
|
90
|
-
with open(config_path, "
|
|
91
|
-
|
|
103
|
+
with open(config_path, "wb") as f:
|
|
104
|
+
f.write(orjson.dumps(config, option=orjson.OPT_INDENT_2))
|
|
92
105
|
return True
|
|
93
106
|
except Exception as e:
|
|
94
107
|
if console:
|
|
@@ -108,9 +121,9 @@ def _uninstall_from_config(config_path: Path, name: str, target_name: str) -> bo
|
|
|
108
121
|
return False
|
|
109
122
|
|
|
110
123
|
try:
|
|
111
|
-
with open(config_path, "
|
|
112
|
-
config =
|
|
113
|
-
except
|
|
124
|
+
with open(config_path, "rb") as f:
|
|
125
|
+
config = orjson.loads(f.read())
|
|
126
|
+
except orjson.JSONDecodeError:
|
|
114
127
|
return False
|
|
115
128
|
|
|
116
129
|
if "mcpServers" not in config or name not in config["mcpServers"]:
|
|
@@ -119,8 +132,8 @@ def _uninstall_from_config(config_path: Path, name: str, target_name: str) -> bo
|
|
|
119
132
|
del config["mcpServers"][name]
|
|
120
133
|
|
|
121
134
|
try:
|
|
122
|
-
with open(config_path, "
|
|
123
|
-
|
|
135
|
+
with open(config_path, "wb") as f:
|
|
136
|
+
f.write(orjson.dumps(config, option=orjson.OPT_INDENT_2))
|
|
124
137
|
return True
|
|
125
138
|
except Exception:
|
|
126
139
|
return False
|
|
@@ -131,7 +144,9 @@ def _show_config_status(config_path: Path, target_name: str):
|
|
|
131
144
|
if console:
|
|
132
145
|
console.print(f"\n[bold]{target_name} 配置:[/bold]")
|
|
133
146
|
console.print(f" 路径: [bold blue]{config_path}[/bold blue]")
|
|
134
|
-
console.print(
|
|
147
|
+
console.print(
|
|
148
|
+
f" 存在: {'[green]是[/green]' if config_path.exists() else '[yellow]否[/yellow]'}"
|
|
149
|
+
)
|
|
135
150
|
else:
|
|
136
151
|
print(f"\n{target_name} 配置:")
|
|
137
152
|
print(f" 路径: {config_path}")
|
|
@@ -141,9 +156,9 @@ def _show_config_status(config_path: Path, target_name: str):
|
|
|
141
156
|
return
|
|
142
157
|
|
|
143
158
|
try:
|
|
144
|
-
with open(config_path, "
|
|
145
|
-
config =
|
|
146
|
-
except
|
|
159
|
+
with open(config_path, "rb") as f:
|
|
160
|
+
config = orjson.loads(f.read())
|
|
161
|
+
except orjson.JSONDecodeError:
|
|
147
162
|
if console:
|
|
148
163
|
console.print(" [red]配置文件格式错误[/red]")
|
|
149
164
|
else:
|
|
@@ -308,6 +323,7 @@ class MCPCommands:
|
|
|
308
323
|
|
|
309
324
|
try:
|
|
310
325
|
import mcp
|
|
326
|
+
|
|
311
327
|
if console:
|
|
312
328
|
console.print(f" mcp: [green]已安装[/green]")
|
|
313
329
|
else:
|
|
@@ -333,6 +349,7 @@ class MCPCommands:
|
|
|
333
349
|
# 检查依赖
|
|
334
350
|
try:
|
|
335
351
|
from dtflow.mcp import mcp
|
|
352
|
+
|
|
336
353
|
if console:
|
|
337
354
|
console.print("[green]OK[/green] MCP 模块导入成功")
|
|
338
355
|
else:
|
|
@@ -349,6 +366,7 @@ class MCPCommands:
|
|
|
349
366
|
# 检查文档
|
|
350
367
|
try:
|
|
351
368
|
from dtflow.mcp.docs import DOCS, TOPICS
|
|
369
|
+
|
|
352
370
|
if console:
|
|
353
371
|
console.print(f"[green]OK[/green] 文档加载成功 ({len(TOPICS)} 个主题)")
|
|
354
372
|
else:
|
dtflow/mcp/docs.py
CHANGED
|
@@ -74,7 +74,6 @@ dt transform data.jsonl
|
|
|
74
74
|
dt sample data.jsonl --num=10
|
|
75
75
|
```
|
|
76
76
|
""",
|
|
77
|
-
|
|
78
77
|
"basic_usage": """# Datatron 基本用法
|
|
79
78
|
|
|
80
79
|
## 加载与保存
|
|
@@ -150,7 +149,6 @@ dt.shuffle(seed=42)
|
|
|
150
149
|
train, test = dt.split(ratio=0.8)
|
|
151
150
|
```
|
|
152
151
|
""",
|
|
153
|
-
|
|
154
152
|
"presets": """# 预设转换模板
|
|
155
153
|
|
|
156
154
|
Datatron 提供常用的格式转换预设,可直接用于 CLI 或 Python API。
|
|
@@ -233,7 +231,6 @@ result = dt.to(transform_fn)
|
|
|
233
231
|
}
|
|
234
232
|
```
|
|
235
233
|
""",
|
|
236
|
-
|
|
237
234
|
"cli": """# CLI 命令行工具
|
|
238
235
|
|
|
239
236
|
Datatron 提供 `dt` 命令行工具。
|
|
@@ -295,7 +292,6 @@ dt sample data.jsonl --num=50 --seed=42
|
|
|
295
292
|
- Excel (.xlsx, .xls) - 需要安装 pandas
|
|
296
293
|
- Parquet (.parquet) - 需要安装 pyarrow
|
|
297
294
|
""",
|
|
298
|
-
|
|
299
295
|
"storage": """# 存储格式支持
|
|
300
296
|
|
|
301
297
|
Datatron 支持多种常用数据格式。
|
|
@@ -348,7 +344,6 @@ pip install dtflow[full]
|
|
|
348
344
|
pip install dtflow[storage]
|
|
349
345
|
```
|
|
350
346
|
""",
|
|
351
|
-
|
|
352
347
|
"chain_api": """# 链式 API 设计
|
|
353
348
|
|
|
354
349
|
Datatron 支持流畅的链式调用。
|