dtflow 0.5.8__py3-none-any.whl → 0.5.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dtflow/eval.py ADDED
@@ -0,0 +1,276 @@
1
+ """
2
+ 评估指标计算模块
3
+
4
+ 提供分类任务的指标计算和评估报告导出:
5
+ - MetricsCalculator: 计算 accuracy/precision/recall/F1/混淆矩阵
6
+ - export_eval_report: 生成 metrics.md + result.jsonl + bad_case.jsonl
7
+
8
+ 依赖: scikit-learn, pandas
9
+ 安装: pip install dtflow[eval]
10
+ """
11
+
12
+ import os
13
+ from datetime import datetime
14
+ from pathlib import Path
15
+ from typing import TYPE_CHECKING, Optional
16
+
17
+ if TYPE_CHECKING:
18
+ from pandas import DataFrame
19
+
20
+
21
+ def _check_eval_deps():
22
+ """检查 eval 依赖是否已安装"""
23
+ try:
24
+ import pandas # noqa: F401
25
+ import sklearn # noqa: F401
26
+ except ImportError as e:
27
+ missing = str(e).split("'")[1] if "'" in str(e) else str(e)
28
+ raise ImportError(
29
+ f"eval 功能需要额外依赖: {missing}\n" f"请运行: pip install dtflow[eval]"
30
+ ) from e
31
+
32
+
33
+ class MetricsCalculator:
34
+ """分类指标计算器
35
+
36
+ 基于 sklearn 计算 accuracy/precision/recall/F1/混淆矩阵/分类报告。
37
+
38
+ Args:
39
+ df: 包含预测列和标签列的 DataFrame
40
+ pred_col: 预测值列名
41
+ label_col: 标签值列名
42
+ include_macro_micro_avg: 是否在报告中包含 macro/micro 平均
43
+ remove_matrix_zero_row: 是否移除混淆矩阵中 support=0 的行
44
+ """
45
+
46
+ def __init__(
47
+ self,
48
+ df: "DataFrame",
49
+ pred_col: str = "predict",
50
+ label_col: str = "label",
51
+ include_macro_micro_avg: bool = False,
52
+ remove_matrix_zero_row: bool = False,
53
+ ):
54
+ _check_eval_deps()
55
+ self.df = df
56
+ self.y_pred = df[pred_col]
57
+ self.y_true = df[label_col]
58
+ self.all_labels = sorted(set(self.y_true.unique()).union(set(self.y_pred.unique())))
59
+ self.needed_labels = None
60
+ self.remove_matrix_zero_row = remove_matrix_zero_row
61
+ self.include_macro_micro_avg = include_macro_micro_avg
62
+ self.metrics = self._calculate_metrics()
63
+
64
+ def _calculate_metrics(self):
65
+ from sklearn.metrics import (
66
+ accuracy_score,
67
+ classification_report,
68
+ confusion_matrix,
69
+ precision_score,
70
+ recall_score,
71
+ )
72
+
73
+ accuracy = accuracy_score(self.y_true, self.y_pred)
74
+ precision = precision_score(
75
+ self.y_true, self.y_pred, labels=self.all_labels, average="weighted", zero_division=0
76
+ )
77
+ recall = recall_score(
78
+ self.y_true, self.y_pred, labels=self.all_labels, average="weighted", zero_division=0
79
+ )
80
+ conf_matrix = confusion_matrix(self.y_true, self.y_pred, labels=self.all_labels)
81
+ report = classification_report(
82
+ self.y_true, self.y_pred, labels=self.all_labels, output_dict=True, zero_division=0
83
+ )
84
+
85
+ # 默认只保留加权平均
86
+ if not self.include_macro_micro_avg:
87
+ report = {
88
+ label: metrics
89
+ for label, metrics in report.items()
90
+ if label in self.all_labels or label == "weighted avg"
91
+ }
92
+
93
+ # 去除 support=0 的类别(注意 accuracy 是 float 不是 dict)
94
+ report = {
95
+ label: metrics
96
+ for label, metrics in report.items()
97
+ if isinstance(metrics, dict) and metrics.get("support", 0) > 0
98
+ }
99
+
100
+ self.needed_labels = [label for label in report.keys() if label in self.all_labels]
101
+
102
+ # 可选移除混淆矩阵中不需要的行
103
+ needed_idx_list = [self.all_labels.index(label) for label in self.needed_labels]
104
+ if self.remove_matrix_zero_row:
105
+ conf_matrix = conf_matrix[needed_idx_list]
106
+
107
+ return {
108
+ "accuracy": accuracy,
109
+ "precision": precision,
110
+ "recall": recall,
111
+ "confusion_matrix": conf_matrix,
112
+ "classification_report": report,
113
+ }
114
+
115
+ def get_metrics(self):
116
+ return self.metrics
117
+
118
+ def format_classification_report_as_markdown(self):
119
+ """将分类报告格式化为 Markdown 表格"""
120
+ report = self.metrics["classification_report"]
121
+ header = "| Label | Precision | Recall | F1-score | Support |\n"
122
+ separator = "|-------|-----------|--------|----------|---------|\n"
123
+ rows = []
124
+ for label, metrics in report.items():
125
+ if isinstance(metrics, dict):
126
+ rows.append(
127
+ f"| {label} | {metrics['precision']:.2f} | {metrics['recall']:.2f} "
128
+ f"| {metrics['f1-score']:.2f} | {metrics['support']:.0f} |"
129
+ )
130
+ return header + separator + "\n".join(rows)
131
+
132
+ def _clean_label_for_markdown(self, label, max_length=20):
133
+ """清理标签文本,使其适合 Markdown 表格显示"""
134
+ label = str(label).replace("\n", " ")
135
+ label = label.replace("|", "\\|")
136
+ label = label.replace("-", "\\-")
137
+ label = label.replace("<", "&lt;")
138
+ label = label.replace(">", "&gt;")
139
+ if len(label) > max_length:
140
+ label = label[:max_length] + "..."
141
+ label = label.strip()
142
+ if not label:
143
+ label = "(empty)"
144
+ return label
145
+
146
+ def format_confusion_matrix_as_markdown(self, max_label_length=20):
147
+ """将混淆矩阵格式化为 Markdown 表格"""
148
+ matrix = self.metrics["confusion_matrix"]
149
+
150
+ if self.remove_matrix_zero_row:
151
+ labels = self.needed_labels
152
+ else:
153
+ labels = self.all_labels
154
+
155
+ processed_labels = [self._clean_label_for_markdown(lb, max_label_length) for lb in labels]
156
+
157
+ header = "| 真实值/预测值 | " + " | ".join(processed_labels) + " |\n"
158
+ separator_parts = [":---:"] * (len(processed_labels) + 1)
159
+ separator = "| " + " | ".join(separator_parts) + " |\n"
160
+
161
+ rows = []
162
+ for i, row in enumerate(matrix):
163
+ row_label = self._clean_label_for_markdown(labels[i], max_label_length)
164
+ formatted_row = [f"{num:,}" for num in row]
165
+ rows.append(f"| {row_label} | " + " | ".join(formatted_row) + " |")
166
+
167
+ return header + separator + "\n".join(rows)
168
+
169
+
170
+ def export_eval_report(
171
+ df: "DataFrame",
172
+ pred_col: str,
173
+ label_col: str,
174
+ record_folder: str = "record",
175
+ input_name: Optional[str] = None,
176
+ ):
177
+ """生成评估报告并保存到指定目录
178
+
179
+ 输出文件:
180
+ - metrics.md: 指标概览 + 分类报告 + 混淆矩阵
181
+ - result.jsonl: 完整预测结果
182
+ - bad_case.jsonl: 预测错误样本
183
+
184
+ Args:
185
+ df: 包含预测和标签的 DataFrame
186
+ pred_col: 预测值列名
187
+ label_col: 标签值列名
188
+ record_folder: 输出根目录
189
+ input_name: 输入文件名(用于子目录命名)
190
+ """
191
+ from rich.console import Console
192
+ from rich.markdown import Markdown
193
+
194
+ calculator = MetricsCalculator(df, pred_col=pred_col, label_col=label_col)
195
+ metrics = calculator.get_metrics()
196
+
197
+ # 用 Rich Table 构建指标概览(替代 tabulate)
198
+ from rich.table import Table
199
+
200
+ overview_table = Table(title="指标概览", show_header=True)
201
+ overview_table.add_column("Accuracy", justify="center")
202
+ overview_table.add_column("Precision", justify="center")
203
+ overview_table.add_column("Recall", justify="center")
204
+ overview_table.add_row(
205
+ f"{metrics['accuracy']:.4f}",
206
+ f"{metrics['precision']:.4f}",
207
+ f"{metrics['recall']:.4f}",
208
+ )
209
+
210
+ # 构建 Markdown 报告内容
211
+ md = (
212
+ f"\n\n### 指标概览\n\n"
213
+ f"| Accuracy | Precision | Recall |\n"
214
+ f"|----------|-----------|--------|\n"
215
+ f"| {metrics['accuracy']:.4f} | {metrics['precision']:.4f} | {metrics['recall']:.4f} |"
216
+ )
217
+ metrics_md = calculator.format_classification_report_as_markdown()
218
+ confusion_md = calculator.format_confusion_matrix_as_markdown()
219
+ md += f"\n\n### Classification Report\n{metrics_md}\n" f"\n### Confusion Matrix\n{confusion_md}"
220
+
221
+ # 创建输出目录(带序号和时间戳)
222
+ now = datetime.now().strftime("%Y%m%d-%H-%M-%S")
223
+ record_path = Path(record_folder)
224
+ if input_name:
225
+ record_path = record_path / input_name
226
+
227
+ if record_path.exists():
228
+ existing = [d.name for d in record_path.iterdir() if d.is_dir()]
229
+ max_idx = 0
230
+ for name in existing:
231
+ parts = name.split("-", 1)
232
+ if parts[0].isdigit():
233
+ max_idx = max(max_idx, int(parts[0]))
234
+ idx = max_idx + 1
235
+ else:
236
+ idx = 1
237
+
238
+ record_path = record_path / f"{idx}-{now}"
239
+ record_path.mkdir(parents=True, exist_ok=True)
240
+
241
+ # 终端输出
242
+ console = Console()
243
+ console.print(overview_table)
244
+ console.print(Markdown(md))
245
+
246
+ # 保存文件
247
+ with open(os.path.join(record_path, "metrics.md"), "w", encoding="utf-8") as f:
248
+ f.write(md)
249
+
250
+ bad_case_df = df[df[pred_col] != df[label_col]]
251
+
252
+ # 保存 JSONL
253
+ df.to_json(
254
+ os.path.join(record_path, "result.jsonl"),
255
+ orient="records",
256
+ lines=True,
257
+ force_ascii=False,
258
+ )
259
+ bad_case_df.to_json(
260
+ os.path.join(record_path, "bad_case.jsonl"),
261
+ orient="records",
262
+ lines=True,
263
+ force_ascii=False,
264
+ )
265
+
266
+ # 尝试保存 CSV
267
+ try:
268
+ df.to_csv(os.path.join(record_path, "result.csv"), index=False)
269
+ bad_case_df.to_csv(os.path.join(record_path, "bad_case.csv"), index=False)
270
+ except Exception:
271
+ pass
272
+
273
+ console.print(f"\n[green]报告已保存到: {record_path}[/green]")
274
+ console.print(f"[dim] - metrics.md ({len(df)} 条数据, {len(bad_case_df)} 条错误)[/dim]")
275
+
276
+ return record_path
@@ -0,0 +1,124 @@
1
+ """
2
+ 文本清洗工具
3
+
4
+ 提供 LLM 输出的常见清洗函数:
5
+ - strip_think_tags: 去除 <think>...</think> 思考链内容
6
+ - extract_code_snippets: 提取 ``` 代码块
7
+ - parse_generic_tags: 解析 <tag>content</tag> 格式标签
8
+ """
9
+
10
+ import re
11
+ from typing import Dict, List
12
+
13
+
14
+ def strip_think_tags(text: str) -> str:
15
+ """去除 <think>...</think> 包裹的内容
16
+
17
+ Args:
18
+ text: 输入文本
19
+
20
+ Returns:
21
+ 去除思考链后的文本
22
+
23
+ Examples:
24
+ >>> strip_think_tags("<think>让我想想...</think>答案是42")
25
+ '答案是42'
26
+ """
27
+ if not text:
28
+ return text
29
+ return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
30
+
31
+
32
+ def extract_code_snippets(text: str, strict: bool = True) -> List[Dict[str, str]]:
33
+ """提取 ``` 代码块
34
+
35
+ Args:
36
+ text: 输入文本
37
+ strict: True 仅匹配 ```lang...``` 格式,False 额外匹配 {...} 格式
38
+
39
+ Returns:
40
+ 代码片段列表,每项为 {"language": ..., "code": ...}
41
+
42
+ Examples:
43
+ >>> extract_code_snippets("```json\\n{\"a\": 1}\\n```")
44
+ [{'language': 'json', 'code': '{"a": 1}'}]
45
+ """
46
+ pattern = r"```(\w+)?\s*([\s\S]*?)```"
47
+ matches = re.findall(pattern, text)
48
+
49
+ code_snippets = []
50
+ for lang, code in matches:
51
+ code_snippets.append(
52
+ {
53
+ "language": lang.strip() if lang else "unknown",
54
+ "code": code.strip(),
55
+ }
56
+ )
57
+
58
+ if not strict:
59
+ # 移除已匹配的 ``` 块,在剩余文本中匹配 { ... }
60
+ text = re.sub(pattern, "", text)
61
+ brace_matches = re.findall(r"\{[\s\S]*?\}", text)
62
+ for code in brace_matches:
63
+ code_snippets.append(
64
+ {
65
+ "language": "unknown",
66
+ "code": code.strip(),
67
+ }
68
+ )
69
+
70
+ return code_snippets
71
+
72
+
73
+ def parse_generic_tags(text: str, strict: bool = False) -> Dict[str, str]:
74
+ """解析 XML 风格标签
75
+
76
+ 支持两种模式:
77
+ - strict=True: 仅匹配闭合标签 <label>content</label>
78
+ - strict=False: 同时匹配开放式标签 <label>content,闭合标签优先
79
+
80
+ Args:
81
+ text: 输入文本
82
+ strict: 是否严格模式
83
+
84
+ Returns:
85
+ {标签名: 内容} 字典
86
+
87
+ Examples:
88
+ >>> parse_generic_tags("<标签>内容</标签>")
89
+ {'标签': '内容'}
90
+ >>> parse_generic_tags("<a>hello<b>world", strict=False)
91
+ {'a': 'hello', 'b': 'world'}
92
+ """
93
+ if not text:
94
+ return {}
95
+
96
+ result = {}
97
+
98
+ if strict:
99
+ pattern_closed = r"<([^>]+)>\s*(.*?)\s*</\1>"
100
+ matches = re.findall(pattern_closed, text, re.DOTALL)
101
+ for label, content in matches:
102
+ result[label.strip()] = content.strip()
103
+ else:
104
+ remaining_text = str(text)
105
+
106
+ # 1. 优先处理闭合标签
107
+ def process_closed_tag(match_obj):
108
+ label = match_obj.group(1).strip()
109
+ content = match_obj.group(2).strip()
110
+ result[label] = content
111
+ return ""
112
+
113
+ pattern_closed = r"<([^>]+)>\s*(.*?)\s*</\1>"
114
+ remaining_text = re.sub(pattern_closed, process_closed_tag, remaining_text, flags=re.DOTALL)
115
+
116
+ # 2. 在剩余文本中处理开放式标签
117
+ pattern_open = r"<([^>]+)>\s*(.*?)(?=<[^>]+>|$)"
118
+ matches_open = re.findall(pattern_open, remaining_text, re.DOTALL)
119
+ for label, content in matches_open:
120
+ label_stripped = label.strip()
121
+ if label_stripped not in result:
122
+ result[label_stripped] = content.strip()
123
+
124
+ return result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dtflow
3
- Version: 0.5.8
3
+ Version: 0.5.10
4
4
  Summary: A flexible data transformation tool for ML training formats (SFT, RLHF, Pretrain)
5
5
  Project-URL: Homepage, https://github.com/yourusername/DataTransformer
6
6
  Project-URL: Documentation, https://github.com/yourusername/DataTransformer#readme
@@ -44,6 +44,7 @@ Requires-Dist: flake8>=3.9.0; extra == 'dev'
44
44
  Requires-Dist: huggingface-hub>=0.20.0; extra == 'dev'
45
45
  Requires-Dist: isort>=5.9.0; extra == 'dev'
46
46
  Requires-Dist: mypy>=0.910; extra == 'dev'
47
+ Requires-Dist: pandas>=1.3.0; extra == 'dev'
47
48
  Requires-Dist: pyarrow; extra == 'dev'
48
49
  Requires-Dist: pytest-cov>=2.12.0; extra == 'dev'
49
50
  Requires-Dist: pytest>=6.0.0; extra == 'dev'
@@ -57,10 +58,14 @@ Provides-Extra: docs
57
58
  Requires-Dist: myst-parser>=0.15.0; extra == 'docs'
58
59
  Requires-Dist: sphinx-rtd-theme>=0.5.0; extra == 'docs'
59
60
  Requires-Dist: sphinx>=4.0.0; extra == 'docs'
61
+ Provides-Extra: eval
62
+ Requires-Dist: pandas>=1.3.0; extra == 'eval'
63
+ Requires-Dist: scikit-learn>=0.24.0; extra == 'eval'
60
64
  Provides-Extra: full
61
65
  Requires-Dist: datasets>=2.0.0; extra == 'full'
62
66
  Requires-Dist: datasketch>=1.5.0; extra == 'full'
63
67
  Requires-Dist: huggingface-hub>=0.20.0; extra == 'full'
68
+ Requires-Dist: pandas>=1.3.0; extra == 'full'
64
69
  Requires-Dist: pyarrow; extra == 'full'
65
70
  Requires-Dist: rich>=10.0.0; extra == 'full'
66
71
  Requires-Dist: scikit-learn>=0.24.0; extra == 'full'
@@ -435,6 +440,13 @@ dt sample data.jsonl 1000 --by=messages.# # 按消息数量分层采样
435
440
  dt sample data.jsonl --where="category=tech" # 筛选后采样
436
441
  dt sample data.jsonl --where="messages.#>=2" # 多条件筛选
437
442
 
443
+ # 按行范围查看(Python 切片语法)
444
+ dt slice data.jsonl 10:20 # 第 10-19 行(0-based,左闭右开)
445
+ dt slice data.jsonl :100 # 前 100 行
446
+ dt slice data.jsonl 100: # 第 100 行到末尾
447
+ dt slice data.jsonl 10:20 -o sliced.jsonl # 保存到文件
448
+ dt slice data.jsonl 10:20 -f question,answer # 只显示指定字段
449
+
438
450
  # 数据转换 - 预设模式
439
451
  dt transform data.jsonl --preset=openai_chat
440
452
  dt transform data.jsonl --preset=alpaca
@@ -469,6 +481,9 @@ dt clean data.jsonl --max-len=messages[-1].content:500 # 最后一条消息最
469
481
  dt clean data.jsonl --keep=question,answer # 只保留这些字段
470
482
  dt clean data.jsonl --drop=metadata # 删除指定字段
471
483
  dt clean data.jsonl --strip # 去除字符串首尾空白
484
+ dt clean data.jsonl --min-tokens=content:10 # 最少 10 tokens
485
+ dt clean data.jsonl --max-tokens=content:1000 # 最多 1000 tokens
486
+ dt clean data.jsonl --min-tokens=text:50 -m gpt-4 # 指定分词器
472
487
 
473
488
  # 数据去重
474
489
  dt dedupe data.jsonl # 全量精确去重
@@ -477,6 +492,17 @@ dt dedupe data.jsonl --key=meta.id # 按嵌套字段去重
477
492
  dt dedupe data.jsonl --key=messages[0].content # 按第一条消息内容去重
478
493
  dt dedupe data.jsonl --key=text --similar=0.8 # 相似度去重
479
494
 
495
+ # 数据集切分
496
+ dt split data.jsonl --ratio=0.8 --seed=42 # 二分: train/test
497
+ dt split data.jsonl --ratio=0.7,0.15,0.15 # 三分: train/val/test
498
+ dt split data.jsonl --ratio=0.8 -o /tmp/output # 指定输出目录
499
+
500
+ # 训练框架导出
501
+ dt export data.jsonl --framework=llama-factory # 导出到 LLaMA-Factory
502
+ dt export data.jsonl -f swift -o ./swift_out # 导出到 ms-swift
503
+ dt export data.jsonl -f axolotl # 导出到 Axolotl
504
+ dt export data.jsonl -f llama-factory --check # 仅检查兼容性
505
+
480
506
  # 文件拼接
481
507
  dt concat a.jsonl b.jsonl -o merged.jsonl
482
508
 
@@ -522,6 +548,8 @@ CLI 命令中的字段参数支持嵌套路径语法,可访问深层嵌套的
522
548
  | `clean` | `--drop-empty=` | `--drop-empty=meta.source` |
523
549
  | `clean` | `--min-len=` | `--min-len=messages.#:2` |
524
550
  | `clean` | `--max-len=` | `--max-len=messages[-1].content:500` |
551
+ | `clean` | `--min-tokens=` | `--min-tokens=content:10` |
552
+ | `clean` | `--max-tokens=` | `--max-tokens=content:1000` |
525
553
  | `token-stats` | `--field=` | `--field=messages[-1].content` |
526
554
  | `diff` | `--key=` | `--key=meta.uuid` |
527
555
 
@@ -1,8 +1,9 @@
1
- dtflow/SKILL.md,sha256=nh12TTq_eRzl5O2CTgsiS809BBVR49kmpZ8n7UprMHI,9552
2
- dtflow/__init__.py,sha256=tofhUr_PMnsONnB3Hu-mwUrD4Q3bV7Kw_0S6dQw6ig8,3031
3
- dtflow/__main__.py,sha256=p8oZKQhwq04shCB3y_pkXjf-SZ4PZvg5PXdyUP-5rYA,13497
1
+ dtflow/SKILL.md,sha256=Oq8Kb5JghZMJ1WoP8OWhX3qAWaUY9Sip_iWAv8S2eMg,10567
2
+ dtflow/__init__.py,sha256=2A-P6k9VBIWZXRgXwYPFOwHMCmgkfKZVYuGuBziqqhc,3032
3
+ dtflow/__main__.py,sha256=_wrpYfOog6G83I17yuBe-hryBsaCrIwbXSEnzT-r28g,18008
4
4
  dtflow/converters.py,sha256=X3qeFD7FCOMnfiP3MicL5MXimOm4XUYBs5pczIkudU0,22331
5
5
  dtflow/core.py,sha256=qMo6B3LK--TWRK7ZBKObGcs3pKFnd0NPoaM0T8JC7Jw,38135
6
+ dtflow/eval.py,sha256=_c-XP2zsOBznYltSyKEScOqvmPVX2orqepg5cNhXXB0,9836
6
7
  dtflow/framework.py,sha256=jyICi_RWHjX7WfsXdSbWmP1SL7y1OWSPyd5G5Y-lvg4,17578
7
8
  dtflow/lineage.py,sha256=jie3OL1qK90-_cOOqqLbhSJ1oGUktDM1x5HRpQ5Qiyc,12800
8
9
  dtflow/parallel.py,sha256=EnIdGEGMrZUNT2-CBIV93UFfpqr_jU_heqqvdGXcP-Y,3046
@@ -12,14 +13,17 @@ dtflow/schema.py,sha256=zCZNEAqTMT1BS_p2t0CYczR5S9rqyDREa7ZsYI5pFGA,19885
12
13
  dtflow/streaming.py,sha256=dxpNd1-Wz_PTLTdvM5qn06_2TJr5NRlIIuw0LOSS2Iw,24755
13
14
  dtflow/tokenizers.py,sha256=GFQsuLSLn2GHn2kaXhJkP8G85lgsdLzYtJNbppQhYPE,23408
14
15
  dtflow/cli/__init__.py,sha256=QhZ-thgx9IBTFII7T_hdoWFUl0CCsdGQHN5ZEZw2XB0,423
15
- dtflow/cli/clean.py,sha256=KuE9ODjD9gSZUIHaD2mQLTDO-1PDwN7EqUpj8EQfVCs,25663
16
- dtflow/cli/commands.py,sha256=zKUG-B9Az-spqyqM00cR8Sgc2UgeOPQDThJFHWDNO_w,1336
16
+ dtflow/cli/clean.py,sha256=BEQQlH2q6luCbx51M3oxxOwcnwlOA8vo9WX3Fp7I6AY,29498
17
+ dtflow/cli/commands.py,sha256=LvyDQ_nWUM7UlPDEFQadRdw5O2ZKDLgF41_xAJRhYxI,1583
17
18
  dtflow/cli/common.py,sha256=gCwnF5Sw2ploqfZJO_z3Ms9mR1HNT7Lj6ydHn0uVaIw,13817
19
+ dtflow/cli/eval.py,sha256=c53kCRH86k2Q_6vESKFlcepcNnTpO9O68agWK4_oJj8,9582
20
+ dtflow/cli/export.py,sha256=loRfVPwEVsDw3ZMKEYGp0Hy38kYZG2QT8JCMbz1dRzU,2156
18
21
  dtflow/cli/io_ops.py,sha256=BMDisP6dxzzmSjYwmeFwaHmpHHPqirmXAWeNTD-9MQM,13254
19
22
  dtflow/cli/lineage.py,sha256=_lNh35nF9AA0Zy6FyZ4g8IzrXH2ZQnp3inF-o2Hs1pw,1383
20
23
  dtflow/cli/pipeline.py,sha256=QNEo-BJlaC1CVnVeRZr7TwfuZYloJ4TebIzJ5ALzry0,1426
21
- dtflow/cli/sample.py,sha256=pubpx4AIzsarBEalD150MC2apYQSt4bal70IZkTfFO0,15475
24
+ dtflow/cli/sample.py,sha256=etbro5I0pyNgn0Qfhp1M6Bh-95JN-AntDa5AwVe_oKY,18269
22
25
  dtflow/cli/skill.py,sha256=opiTEBejA7JHKrEMftMOPDQlOgZ4n59rwaHXGU1Nukk,2022
26
+ dtflow/cli/split.py,sha256=96bhWnxHnjIqifoliLgciApkLbwQU8bWHovK8bcMk9g,3667
23
27
  dtflow/cli/stats.py,sha256=HkTZD80h4tzYXTtMnfpjLUMP6kl_es6ifcmExxzGdMU,31813
24
28
  dtflow/cli/transform.py,sha256=w6xqMOxPxQvL2u_BPCfpDHuPSC9gmcqMPVN8s-B6bbY,15052
25
29
  dtflow/cli/validate.py,sha256=Frs-jKcDHmYozpmIYZueDSX5o2i1Xn-WW81FGUyUrng,5796
@@ -29,7 +33,8 @@ dtflow/utils/__init__.py,sha256=Pn-ltwV04fBQmeZG7FxInDQmzH29LYOi90LgeLMEuQk,506
29
33
  dtflow/utils/display.py,sha256=OeOdTh6mbDwSkDWlmkjfpTjy2QG8ZUaYU0NpHUWkpEQ,5881
30
34
  dtflow/utils/field_path.py,sha256=K8nU196RxTSJ1OoieTWGcYOWl9KjGq2iSxCAkfjECuM,7621
31
35
  dtflow/utils/helpers.py,sha256=JXN176_B2pm53GLVyZ1wj3wrmBJG52Tkw6AMQSdj7M8,791
32
- dtflow-0.5.8.dist-info/METADATA,sha256=Tm_dfdQfGlShyDt95fNQ87JXiBRnf6mfDgx827h3Rnc,24487
33
- dtflow-0.5.8.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
34
- dtflow-0.5.8.dist-info/entry_points.txt,sha256=dadIDOK7Iu9pMxnMPBfpb4aAPe4hQbBOshpQYjVYpGc,44
35
- dtflow-0.5.8.dist-info/RECORD,,
36
+ dtflow/utils/text_parser.py,sha256=0t2TMOSha4dTiDu9H4ygdb67cI20zhtBH1XavDspL_g,3727
37
+ dtflow-0.5.10.dist-info/METADATA,sha256=OGefMoe17by5IbxdxZgqoJ1Y6OWPt_iGEFM4KgltRZw,26023
38
+ dtflow-0.5.10.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
39
+ dtflow-0.5.10.dist-info/entry_points.txt,sha256=dadIDOK7Iu9pMxnMPBfpb4aAPe4hQbBOshpQYjVYpGc,44
40
+ dtflow-0.5.10.dist-info/RECORD,,