dtflow 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtflow/__init__.py +1 -1
- dtflow/__main__.py +2 -4
- dtflow/cli/sample.py +2 -2
- {dtflow-0.5.4.dist-info → dtflow-0.5.5.dist-info}/METADATA +1 -1
- {dtflow-0.5.4.dist-info → dtflow-0.5.5.dist-info}/RECORD +7 -7
- {dtflow-0.5.4.dist-info → dtflow-0.5.5.dist-info}/WHEEL +0 -0
- {dtflow-0.5.4.dist-info → dtflow-0.5.5.dist-info}/entry_points.txt +0 -0
dtflow/__init__.py
CHANGED
dtflow/__main__.py
CHANGED
|
@@ -60,7 +60,7 @@ def sample(
|
|
|
60
60
|
filename: str = typer.Argument(..., help="输入文件路径"),
|
|
61
61
|
num_arg: Optional[int] = typer.Argument(None, help="采样数量", metavar="NUM"),
|
|
62
62
|
num: int = typer.Option(10, "--num", "-n", help="采样数量", show_default=True),
|
|
63
|
-
type: str = typer.Option("
|
|
63
|
+
type: str = typer.Option("random", "--type", "-t", help="采样方式: random/head/tail"),
|
|
64
64
|
output: Optional[str] = typer.Option(None, "--output", "-o", help="输出文件路径"),
|
|
65
65
|
seed: Optional[int] = typer.Option(None, "--seed", help="随机种子"),
|
|
66
66
|
by: Optional[str] = typer.Option(None, "--by", help="分层采样字段"),
|
|
@@ -223,9 +223,7 @@ def validate(
|
|
|
223
223
|
None, "--preset", "-p", help="预设 Schema: openai_chat, alpaca, dpo, sharegpt"
|
|
224
224
|
),
|
|
225
225
|
output: Optional[str] = typer.Option(None, "--output", "-o", help="输出有效数据的文件路径"),
|
|
226
|
-
filter: bool = typer.Option(
|
|
227
|
-
False, "--filter", "-f", help="过滤无效数据并保存"
|
|
228
|
-
),
|
|
226
|
+
filter: bool = typer.Option(False, "--filter", "-f", help="过滤无效数据并保存"),
|
|
229
227
|
max_errors: int = typer.Option(20, "--max-errors", help="最多显示的错误数量"),
|
|
230
228
|
verbose: bool = typer.Option(False, "--verbose", "-v", help="显示详细信息"),
|
|
231
229
|
):
|
dtflow/cli/sample.py
CHANGED
|
@@ -20,7 +20,7 @@ from .common import (
|
|
|
20
20
|
def sample(
|
|
21
21
|
filename: str,
|
|
22
22
|
num: int = 10,
|
|
23
|
-
type: Literal["random", "head", "tail"] = "
|
|
23
|
+
type: Literal["random", "head", "tail"] = "random",
|
|
24
24
|
output: Optional[str] = None,
|
|
25
25
|
seed: Optional[int] = None,
|
|
26
26
|
by: Optional[str] = None,
|
|
@@ -37,7 +37,7 @@ def sample(
|
|
|
37
37
|
- num > 0: 采样指定数量
|
|
38
38
|
- num = 0: 采样所有数据
|
|
39
39
|
- num < 0: Python 切片风格(如 -1 表示最后 1 条,-10 表示最后 10 条)
|
|
40
|
-
type: 采样方式,可选 random/head/tail,默认
|
|
40
|
+
type: 采样方式,可选 random/head/tail,默认 random
|
|
41
41
|
output: 输出文件路径,不指定则打印到控制台
|
|
42
42
|
seed: 随机种子(仅在 type=random 时有效)
|
|
43
43
|
by: 分层采样字段名,按该字段的值分组采样
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dtflow
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.5
|
|
4
4
|
Summary: A flexible data transformation tool for ML training formats (SFT, RLHF, Pretrain)
|
|
5
5
|
Project-URL: Homepage, https://github.com/yourusername/DataTransformer
|
|
6
6
|
Project-URL: Documentation, https://github.com/yourusername/DataTransformer#readme
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
dtflow/__init__.py,sha256=
|
|
2
|
-
dtflow/__main__.py,sha256=
|
|
1
|
+
dtflow/__init__.py,sha256=6vE7u7NxILhzgsAoQzel0huISJhLLGgqVW2obUMz1xM,3031
|
|
2
|
+
dtflow/__main__.py,sha256=htJk04NOEZbDoZILnwCMKjG--NPWuwv-2ZbFXlk7jjM,12558
|
|
3
3
|
dtflow/converters.py,sha256=X3qeFD7FCOMnfiP3MicL5MXimOm4XUYBs5pczIkudU0,22331
|
|
4
4
|
dtflow/core.py,sha256=qMo6B3LK--TWRK7ZBKObGcs3pKFnd0NPoaM0T8JC7Jw,38135
|
|
5
5
|
dtflow/framework.py,sha256=jyICi_RWHjX7WfsXdSbWmP1SL7y1OWSPyd5G5Y-lvg4,17578
|
|
@@ -16,7 +16,7 @@ dtflow/cli/common.py,sha256=gCwnF5Sw2ploqfZJO_z3Ms9mR1HNT7Lj6ydHn0uVaIw,13817
|
|
|
16
16
|
dtflow/cli/io_ops.py,sha256=BMDisP6dxzzmSjYwmeFwaHmpHHPqirmXAWeNTD-9MQM,13254
|
|
17
17
|
dtflow/cli/lineage.py,sha256=_lNh35nF9AA0Zy6FyZ4g8IzrXH2ZQnp3inF-o2Hs1pw,1383
|
|
18
18
|
dtflow/cli/pipeline.py,sha256=QNEo-BJlaC1CVnVeRZr7TwfuZYloJ4TebIzJ5ALzry0,1426
|
|
19
|
-
dtflow/cli/sample.py,sha256=
|
|
19
|
+
dtflow/cli/sample.py,sha256=y_yRG2vBr300HZD4MosHZ4uAYhiTSqDxVN-ZMWTURog,10501
|
|
20
20
|
dtflow/cli/stats.py,sha256=u4ehCfgw1X8WuOyAjrApMRgcIO3BVmINbsTjxEscQro,24086
|
|
21
21
|
dtflow/cli/transform.py,sha256=w6xqMOxPxQvL2u_BPCfpDHuPSC9gmcqMPVN8s-B6bbY,15052
|
|
22
22
|
dtflow/cli/validate.py,sha256=65aGVlMS_Rq0Ch0YQ-TclVJ03RQP4CnG137wthzb8Ao,4384
|
|
@@ -31,7 +31,7 @@ dtflow/utils/__init__.py,sha256=Pn-ltwV04fBQmeZG7FxInDQmzH29LYOi90LgeLMEuQk,506
|
|
|
31
31
|
dtflow/utils/display.py,sha256=OeOdTh6mbDwSkDWlmkjfpTjy2QG8ZUaYU0NpHUWkpEQ,5881
|
|
32
32
|
dtflow/utils/field_path.py,sha256=K8nU196RxTSJ1OoieTWGcYOWl9KjGq2iSxCAkfjECuM,7621
|
|
33
33
|
dtflow/utils/helpers.py,sha256=JXN176_B2pm53GLVyZ1wj3wrmBJG52Tkw6AMQSdj7M8,791
|
|
34
|
-
dtflow-0.5.
|
|
35
|
-
dtflow-0.5.
|
|
36
|
-
dtflow-0.5.
|
|
37
|
-
dtflow-0.5.
|
|
34
|
+
dtflow-0.5.5.dist-info/METADATA,sha256=SzaIuZqLFK-Ncnim1NmBUs7bdnqIJiGtPikbnrM-gsA,22544
|
|
35
|
+
dtflow-0.5.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
36
|
+
dtflow-0.5.5.dist-info/entry_points.txt,sha256=dadIDOK7Iu9pMxnMPBfpb4aAPe4hQbBOshpQYjVYpGc,44
|
|
37
|
+
dtflow-0.5.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|