dtflow 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtflow/__init__.py +17 -1
- dtflow/__main__.py +292 -239
- dtflow/cli/__init__.py +8 -2
- dtflow/cli/commands.py +1030 -92
- dtflow/core.py +96 -31
- dtflow/lineage.py +407 -0
- dtflow/mcp/cli.py +14 -14
- dtflow/pipeline.py +450 -0
- dtflow/storage/io.py +376 -370
- dtflow/streaming.py +661 -0
- dtflow/tokenizers.py +188 -51
- dtflow/utils/display.py +5 -4
- {dtflow-0.3.0.dist-info → dtflow-0.3.1.dist-info}/METADATA +153 -7
- dtflow-0.3.1.dist-info/RECORD +24 -0
- dtflow-0.3.0.dist-info/RECORD +0 -21
- {dtflow-0.3.0.dist-info → dtflow-0.3.1.dist-info}/WHEEL +0 -0
- {dtflow-0.3.0.dist-info → dtflow-0.3.1.dist-info}/entry_points.txt +0 -0
dtflow/__init__.py
CHANGED
|
@@ -13,6 +13,7 @@ from .storage import save_data, load_data, sample_file
|
|
|
13
13
|
from .tokenizers import (
|
|
14
14
|
count_tokens, token_counter, token_filter, token_stats,
|
|
15
15
|
messages_token_counter, messages_token_filter, messages_token_stats,
|
|
16
|
+
DEFAULT_MODEL, MODEL_ALIASES, OPENAI_MODELS, resolve_model,
|
|
16
17
|
)
|
|
17
18
|
from .converters import (
|
|
18
19
|
to_hf_dataset, from_hf_dataset, to_hf_chat_format,
|
|
@@ -23,8 +24,14 @@ from .converters import (
|
|
|
23
24
|
# ms-swift
|
|
24
25
|
to_swift_messages, to_swift_query_response, to_swift_vlm,
|
|
25
26
|
)
|
|
27
|
+
from .streaming import (
|
|
28
|
+
StreamingTransformer,
|
|
29
|
+
load_stream,
|
|
30
|
+
load_sharded,
|
|
31
|
+
process_shards,
|
|
32
|
+
)
|
|
26
33
|
|
|
27
|
-
__version__ = '0.3.
|
|
34
|
+
__version__ = '0.3.1'
|
|
28
35
|
|
|
29
36
|
__all__ = [
|
|
30
37
|
# core
|
|
@@ -47,6 +54,10 @@ __all__ = [
|
|
|
47
54
|
'messages_token_counter',
|
|
48
55
|
'messages_token_filter',
|
|
49
56
|
'messages_token_stats',
|
|
57
|
+
'DEFAULT_MODEL',
|
|
58
|
+
'MODEL_ALIASES',
|
|
59
|
+
'OPENAI_MODELS',
|
|
60
|
+
'resolve_model',
|
|
50
61
|
# converters
|
|
51
62
|
'to_hf_dataset',
|
|
52
63
|
'from_hf_dataset',
|
|
@@ -64,4 +75,9 @@ __all__ = [
|
|
|
64
75
|
'to_swift_messages',
|
|
65
76
|
'to_swift_query_response',
|
|
66
77
|
'to_swift_vlm',
|
|
78
|
+
# streaming
|
|
79
|
+
'StreamingTransformer',
|
|
80
|
+
'load_stream',
|
|
81
|
+
'load_sharded',
|
|
82
|
+
'process_shards',
|
|
67
83
|
]
|
dtflow/__main__.py
CHANGED
|
@@ -2,252 +2,305 @@
|
|
|
2
2
|
Datatron CLI entry point.
|
|
3
3
|
|
|
4
4
|
Usage:
|
|
5
|
-
python -m datatron <command> [options]
|
|
6
5
|
dt <command> [options]
|
|
6
|
+
dt --install-completion # 安装 shell 自动补全
|
|
7
7
|
|
|
8
8
|
Commands:
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
stats
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
9
|
+
sample 从数据文件中采样
|
|
10
|
+
head 显示文件的前 N 条数据
|
|
11
|
+
tail 显示文件的后 N 条数据
|
|
12
|
+
transform 转换数据格式(核心命令)
|
|
13
|
+
stats 显示数据文件的统计信息
|
|
14
|
+
token-stats Token 统计
|
|
15
|
+
diff 数据集对比
|
|
16
|
+
dedupe 数据去重
|
|
17
|
+
concat 拼接多个数据文件
|
|
18
|
+
clean 数据清洗
|
|
19
|
+
run 执行 Pipeline 配置文件
|
|
20
|
+
history 显示数据血缘历史
|
|
21
|
+
mcp MCP 服务管理(install/uninstall/status)
|
|
22
|
+
logs 日志查看工具使用说明
|
|
18
23
|
"""
|
|
19
|
-
import
|
|
20
|
-
|
|
21
|
-
from
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
)
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
24
|
+
import os
|
|
25
|
+
import sys
|
|
26
|
+
from typing import List, Optional
|
|
27
|
+
|
|
28
|
+
import typer
|
|
29
|
+
|
|
30
|
+
from .cli.commands import (
|
|
31
|
+
sample as _sample,
|
|
32
|
+
head as _head,
|
|
33
|
+
tail as _tail,
|
|
34
|
+
transform as _transform,
|
|
35
|
+
dedupe as _dedupe,
|
|
36
|
+
concat as _concat,
|
|
37
|
+
stats as _stats,
|
|
38
|
+
clean as _clean,
|
|
39
|
+
run as _run,
|
|
40
|
+
token_stats as _token_stats,
|
|
41
|
+
diff as _diff,
|
|
42
|
+
history as _history,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# 创建主应用
|
|
46
|
+
app = typer.Typer(
|
|
47
|
+
name="dt",
|
|
48
|
+
help="Datatron CLI - 数据转换工具",
|
|
49
|
+
add_completion=True,
|
|
50
|
+
no_args_is_help=True,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# ============ 数据预览命令 ============
|
|
55
|
+
|
|
56
|
+
@app.command()
|
|
57
|
+
def sample(
|
|
58
|
+
filename: str = typer.Argument(..., help="输入文件路径"),
|
|
59
|
+
num: int = typer.Argument(10, help="采样数量"),
|
|
60
|
+
type: str = typer.Option("head", "--type", "-t", help="采样方式: random/head/tail"),
|
|
61
|
+
output: Optional[str] = typer.Option(None, "--output", "-o", help="输出文件路径"),
|
|
62
|
+
seed: Optional[int] = typer.Option(None, "--seed", help="随机种子"),
|
|
63
|
+
by: Optional[str] = typer.Option(None, "--by", help="分层采样字段"),
|
|
64
|
+
uniform: bool = typer.Option(False, "--uniform", help="均匀采样模式"),
|
|
65
|
+
fields: Optional[str] = typer.Option(None, "--fields", "-f", help="只显示指定字段(逗号分隔)"),
|
|
66
|
+
):
|
|
67
|
+
"""从数据文件中采样指定数量的数据"""
|
|
68
|
+
_sample(filename, num, type, output, seed, by, uniform, fields)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@app.command()
|
|
72
|
+
def head(
|
|
73
|
+
filename: str = typer.Argument(..., help="输入文件路径"),
|
|
74
|
+
num: int = typer.Argument(10, help="显示数量"),
|
|
75
|
+
output: Optional[str] = typer.Option(None, "--output", "-o", help="输出文件路径"),
|
|
76
|
+
fields: Optional[str] = typer.Option(None, "--fields", "-f", help="只显示指定字段"),
|
|
77
|
+
):
|
|
78
|
+
"""显示文件的前 N 条数据"""
|
|
79
|
+
_head(filename, num, output, fields)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@app.command()
|
|
83
|
+
def tail(
|
|
84
|
+
filename: str = typer.Argument(..., help="输入文件路径"),
|
|
85
|
+
num: int = typer.Argument(10, help="显示数量"),
|
|
86
|
+
output: Optional[str] = typer.Option(None, "--output", "-o", help="输出文件路径"),
|
|
87
|
+
fields: Optional[str] = typer.Option(None, "--fields", "-f", help="只显示指定字段"),
|
|
88
|
+
):
|
|
89
|
+
"""显示文件的后 N 条数据"""
|
|
90
|
+
_tail(filename, num, output, fields)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# ============ 数据转换命令 ============
|
|
94
|
+
|
|
95
|
+
@app.command()
|
|
96
|
+
def transform(
|
|
97
|
+
filename: str = typer.Argument(..., help="输入文件路径"),
|
|
98
|
+
num: Optional[int] = typer.Argument(None, help="只转换前 N 条数据"),
|
|
99
|
+
preset: Optional[str] = typer.Option(None, "--preset", "-p", help="使用预设模板"),
|
|
100
|
+
config: Optional[str] = typer.Option(None, "--config", "-c", help="配置文件路径"),
|
|
101
|
+
output: Optional[str] = typer.Option(None, "--output", "-o", help="输出文件路径"),
|
|
102
|
+
):
|
|
103
|
+
"""转换数据格式"""
|
|
104
|
+
_transform(filename, num, preset, config, output)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@app.command()
|
|
108
|
+
def run(
|
|
109
|
+
config: str = typer.Argument(..., help="Pipeline YAML 配置文件"),
|
|
110
|
+
input: Optional[str] = typer.Option(None, "--input", "-i", help="输入文件路径"),
|
|
111
|
+
output: Optional[str] = typer.Option(None, "--output", "-o", help="输出文件路径"),
|
|
112
|
+
):
|
|
113
|
+
"""执行 Pipeline 配置文件"""
|
|
114
|
+
_run(config, input, output)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# ============ 数据处理命令 ============
|
|
118
|
+
|
|
119
|
+
@app.command()
|
|
120
|
+
def dedupe(
|
|
121
|
+
filename: str = typer.Argument(..., help="输入文件路径"),
|
|
122
|
+
key: Optional[str] = typer.Option(None, "--key", "-k", help="去重依据字段"),
|
|
123
|
+
similar: Optional[float] = typer.Option(None, "--similar", "-s", help="相似度阈值 (0-1)"),
|
|
124
|
+
output: Optional[str] = typer.Option(None, "--output", "-o", help="输出文件路径"),
|
|
125
|
+
):
|
|
126
|
+
"""数据去重"""
|
|
127
|
+
_dedupe(filename, key, similar, output)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@app.command()
|
|
131
|
+
def concat(
|
|
132
|
+
files: List[str] = typer.Argument(..., help="输入文件列表"),
|
|
133
|
+
output: Optional[str] = typer.Option(None, "--output", "-o", help="输出文件路径(必须)"),
|
|
134
|
+
strict: bool = typer.Option(False, "--strict", help="严格模式,字段必须一致"),
|
|
135
|
+
):
|
|
136
|
+
"""拼接多个数据文件"""
|
|
137
|
+
_concat(*files, output=output, strict=strict)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@app.command()
|
|
141
|
+
def clean(
|
|
142
|
+
filename: str = typer.Argument(..., help="输入文件路径"),
|
|
143
|
+
drop_empty: Optional[str] = typer.Option(None, "--drop-empty", help="删除空值记录"),
|
|
144
|
+
min_len: Optional[str] = typer.Option(None, "--min-len", help="最小长度过滤 (字段:长度)"),
|
|
145
|
+
max_len: Optional[str] = typer.Option(None, "--max-len", help="最大长度过滤 (字段:长度)"),
|
|
146
|
+
keep: Optional[str] = typer.Option(None, "--keep", help="只保留指定字段"),
|
|
147
|
+
drop: Optional[str] = typer.Option(None, "--drop", help="删除指定字段"),
|
|
148
|
+
strip: bool = typer.Option(False, "--strip", help="去除字符串首尾空白"),
|
|
149
|
+
output: Optional[str] = typer.Option(None, "--output", "-o", help="输出文件路径"),
|
|
150
|
+
):
|
|
151
|
+
"""数据清洗"""
|
|
152
|
+
_clean(filename, drop_empty, min_len, max_len, keep, drop, strip, output)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
# ============ 数据统计命令 ============
|
|
156
|
+
|
|
157
|
+
@app.command()
|
|
158
|
+
def stats(
|
|
159
|
+
filename: str = typer.Argument(..., help="输入文件路径"),
|
|
160
|
+
top: int = typer.Option(10, "--top", "-n", help="显示 Top N 值"),
|
|
161
|
+
):
|
|
162
|
+
"""显示数据文件的统计信息"""
|
|
163
|
+
_stats(filename, top)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
@app.command("token-stats")
|
|
167
|
+
def token_stats(
|
|
168
|
+
filename: str = typer.Argument(..., help="输入文件路径"),
|
|
169
|
+
field: str = typer.Option("messages", "--field", "-f", help="统计字段"),
|
|
170
|
+
model: str = typer.Option("cl100k_base", "--model", "-m", help="分词器: cl100k_base (默认), qwen2.5, llama3, gpt-4 等"),
|
|
171
|
+
detailed: bool = typer.Option(False, "--detailed", "-d", help="显示详细统计"),
|
|
172
|
+
):
|
|
173
|
+
"""统计数据集的 Token 信息"""
|
|
174
|
+
_token_stats(filename, field, model, detailed)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@app.command()
|
|
178
|
+
def diff(
|
|
179
|
+
file1: str = typer.Argument(..., help="第一个文件"),
|
|
180
|
+
file2: str = typer.Argument(..., help="第二个文件"),
|
|
181
|
+
key: Optional[str] = typer.Option(None, "--key", "-k", help="匹配键字段"),
|
|
182
|
+
output: Optional[str] = typer.Option(None, "--output", "-o", help="报告输出路径"),
|
|
183
|
+
):
|
|
184
|
+
"""对比两个数据集的差异"""
|
|
185
|
+
_diff(file1, file2, key, output)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@app.command()
|
|
189
|
+
def history(
|
|
190
|
+
filename: str = typer.Argument(..., help="数据文件路径"),
|
|
191
|
+
json: bool = typer.Option(False, "--json", "-j", help="JSON 格式输出"),
|
|
192
|
+
):
|
|
193
|
+
"""显示数据文件的血缘历史"""
|
|
194
|
+
_history(filename, json)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# ============ 工具命令 ============
|
|
198
|
+
|
|
199
|
+
@app.command()
|
|
200
|
+
def logs():
|
|
201
|
+
"""日志查看工具使用说明"""
|
|
202
|
+
help_text = """
|
|
203
|
+
日志查看工具 (tl)
|
|
204
|
+
|
|
205
|
+
dtflow 内置了 toolong 日志查看器,安装后可直接使用 tl 命令:
|
|
206
|
+
|
|
207
|
+
基本用法:
|
|
208
|
+
tl app.log 查看日志文件(交互式 TUI)
|
|
209
|
+
tl app.log error.log 同时查看多个日志
|
|
210
|
+
tl --tail app.log 实时跟踪模式(类似 tail -f)
|
|
211
|
+
tl *.log 通配符匹配多个文件
|
|
212
|
+
|
|
213
|
+
快捷键:
|
|
214
|
+
/ 搜索
|
|
215
|
+
n/N 下一个/上一个匹配
|
|
216
|
+
g/G 跳到开头/结尾
|
|
217
|
+
f 过滤显示
|
|
218
|
+
q 退出
|
|
219
|
+
|
|
220
|
+
安装:
|
|
221
|
+
pip install dtflow[logs] # 仅安装日志工具
|
|
222
|
+
pip install dtflow[full] # 安装全部可选依赖
|
|
223
|
+
"""
|
|
224
|
+
print(help_text)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
# ============ MCP 子命令 ============
|
|
228
|
+
|
|
229
|
+
mcp_app = typer.Typer(help="MCP 服务管理")
|
|
230
|
+
app.add_typer(mcp_app, name="mcp")
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
@mcp_app.command()
|
|
234
|
+
def install(
|
|
235
|
+
name: str = typer.Option("datatron", "--name", "-n", help="MCP 服务名称"),
|
|
236
|
+
target: str = typer.Option("code", "--target", "-t", help="安装目标: desktop/code/all"),
|
|
237
|
+
):
|
|
238
|
+
"""安装 Datatron MCP 服务"""
|
|
239
|
+
from .mcp.cli import MCPCommands
|
|
240
|
+
MCPCommands().install(name, target)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
@mcp_app.command()
|
|
244
|
+
def uninstall(
|
|
245
|
+
name: str = typer.Option("datatron", "--name", "-n", help="MCP 服务名称"),
|
|
246
|
+
target: str = typer.Option("all", "--target", "-t", help="移除目标: desktop/code/all"),
|
|
247
|
+
):
|
|
248
|
+
"""移除 Datatron MCP 服务"""
|
|
249
|
+
from .mcp.cli import MCPCommands
|
|
250
|
+
MCPCommands().uninstall(name, target)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
@mcp_app.command()
|
|
254
|
+
def status():
|
|
255
|
+
"""查看 MCP 服务安装状态"""
|
|
256
|
+
from .mcp.cli import MCPCommands
|
|
257
|
+
MCPCommands().status()
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
@mcp_app.command()
|
|
261
|
+
def test():
|
|
262
|
+
"""测试 MCP 服务是否正常"""
|
|
263
|
+
from .mcp.cli import MCPCommands
|
|
264
|
+
MCPCommands().test()
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def _show_completion_hint():
|
|
268
|
+
"""首次运行时提示用户可以安装补全"""
|
|
269
|
+
from pathlib import Path
|
|
270
|
+
|
|
271
|
+
# 标记文件
|
|
272
|
+
marker = Path.home() / ".config" / "dtflow" / ".completion_hinted"
|
|
273
|
+
|
|
274
|
+
# 已提示过则跳过
|
|
275
|
+
if marker.exists():
|
|
276
|
+
return
|
|
277
|
+
|
|
278
|
+
# 检测是否在交互式终端中(检查 stderr,因为 stdout 可能被管道)
|
|
279
|
+
if not (sys.stderr.isatty() or sys.stdout.isatty()):
|
|
280
|
+
return
|
|
281
|
+
|
|
282
|
+
# 显示提示(使用 stderr 避免干扰管道输出)
|
|
283
|
+
from rich.console import Console
|
|
284
|
+
console = Console(stderr=True)
|
|
285
|
+
console.print(
|
|
286
|
+
"[dim]💡 提示: 运行 [green]dt --install-completion[/green] 启用命令补全[/dim]"
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# 记录已提示
|
|
290
|
+
try:
|
|
291
|
+
marker.parent.mkdir(parents=True, exist_ok=True)
|
|
292
|
+
marker.touch()
|
|
293
|
+
except Exception:
|
|
294
|
+
pass
|
|
247
295
|
|
|
248
296
|
|
|
249
297
|
def main():
|
|
250
|
-
|
|
298
|
+
# less 分页器配置(仅 Unix-like 系统)
|
|
299
|
+
if sys.platform != 'win32':
|
|
300
|
+
os.environ['PAGER'] = 'less -RXF'
|
|
301
|
+
|
|
302
|
+
# _show_completion_hint()
|
|
303
|
+
app()
|
|
251
304
|
|
|
252
305
|
|
|
253
306
|
if __name__ == "__main__":
|
dtflow/cli/__init__.py
CHANGED
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
"""
|
|
2
2
|
CLI module for DataTransformer.
|
|
3
3
|
"""
|
|
4
|
-
from .commands import
|
|
4
|
+
from .commands import (
|
|
5
|
+
clean, concat, dedupe, diff, head, history, run,
|
|
6
|
+
sample, stats, tail, token_stats, transform
|
|
7
|
+
)
|
|
5
8
|
|
|
6
|
-
__all__ = [
|
|
9
|
+
__all__ = [
|
|
10
|
+
"sample", "head", "tail", "transform", "dedupe", "concat",
|
|
11
|
+
"stats", "clean", "run", "token_stats", "diff", "history"
|
|
12
|
+
]
|