dtflow 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,486 @@
1
+ """
2
+ CLI 数据转换相关命令
3
+ """
4
+
5
+ import os
6
+ import shutil
7
+ import tempfile
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List, Optional
11
+
12
+ import orjson
13
+
14
+ from ..core import DataTransformer, DictWrapper
15
+ from ..presets import get_preset, list_presets
16
+ from ..storage.io import load_data, save_data
17
+ from ..streaming import load_stream
18
+ from .common import _check_file_format, _is_streaming_supported
19
+
20
+ CONFIG_DIR = ".dt"
21
+
22
+
23
+ def _get_config_path(input_path: Path, config_override: Optional[str] = None) -> Path:
24
+ """获取配置文件路径"""
25
+ if config_override:
26
+ return Path(config_override)
27
+
28
+ # 使用输入文件名(不含扩展名)作为配置文件名
29
+ config_name = input_path.stem + ".py"
30
+ return input_path.parent / CONFIG_DIR / config_name
31
+
32
+
33
+ def transform(
34
+ filename: str,
35
+ num: Optional[int] = None,
36
+ preset: Optional[str] = None,
37
+ config: Optional[str] = None,
38
+ output: Optional[str] = None,
39
+ ) -> None:
40
+ """
41
+ 转换数据格式。
42
+
43
+ 两种使用方式:
44
+ 1. 配置文件模式(默认):自动生成配置文件,编辑后再次运行
45
+ 2. 预设模式:使用 --preset 直接转换
46
+
47
+ Args:
48
+ filename: 输入文件路径,支持 csv/excel/jsonl/json/parquet/arrow/feather 格式
49
+ num: 只转换前 N 条数据(可选)
50
+ preset: 使用预设模板(openai_chat, alpaca, sharegpt, dpo_pair, simple_qa)
51
+ config: 配置文件路径(可选,默认 .dt/<filename>.py)
52
+ output: 输出文件路径
53
+
54
+ Examples:
55
+ dt transform data.jsonl # 首次生成配置
56
+ dt transform data.jsonl 10 # 只转换前 10 条
57
+ dt transform data.jsonl --preset=openai_chat # 使用预设
58
+ dt transform data.jsonl 100 --preset=alpaca # 预设 + 限制数量
59
+ """
60
+ filepath = Path(filename)
61
+ if not filepath.exists():
62
+ print(f"错误: 文件不存在 - {filename}")
63
+ return
64
+
65
+ if not _check_file_format(filepath):
66
+ return
67
+
68
+ # 预设模式:直接使用预设转换
69
+ if preset:
70
+ _execute_preset_transform(filepath, preset, output, num)
71
+ return
72
+
73
+ # 配置文件模式
74
+ config_path = _get_config_path(filepath, config)
75
+
76
+ if not config_path.exists():
77
+ _generate_config(filepath, config_path)
78
+ else:
79
+ _execute_transform(filepath, config_path, output, num)
80
+
81
+
82
+ def _generate_config(input_path: Path, config_path: Path) -> None:
83
+ """分析输入数据并生成配置文件"""
84
+ print(f"📊 分析输入数据: {input_path}")
85
+
86
+ # 读取数据
87
+ try:
88
+ data = load_data(str(input_path))
89
+ except Exception as e:
90
+ print(f"错误: 无法读取文件 - {e}")
91
+ return
92
+
93
+ if not data:
94
+ print("错误: 文件为空")
95
+ return
96
+
97
+ total_count = len(data)
98
+ sample_item = data[0]
99
+
100
+ print(f" 检测到 {total_count} 条数据")
101
+
102
+ # 生成配置内容
103
+ config_content = _build_config_content(sample_item, input_path.name, total_count)
104
+
105
+ # 确保配置目录存在
106
+ config_path.parent.mkdir(parents=True, exist_ok=True)
107
+
108
+ # 写入配置文件
109
+ config_path.write_text(config_content, encoding="utf-8")
110
+
111
+ print(f"\n📝 已生成配置文件: {config_path}")
112
+ print("\n👉 下一步:")
113
+ print(f" 1. 编辑 {config_path},定义 transform 函数")
114
+ print(f" 2. 再次执行 dt transform {input_path.name} 完成转换")
115
+
116
+
117
+ def _build_config_content(sample: Dict[str, Any], filename: str, total: int) -> str:
118
+ """构建配置文件内容"""
119
+ now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
120
+
121
+ # 生成 Item 类的字段定义
122
+ fields_def = _generate_fields_definition(sample)
123
+
124
+ # 生成默认的 transform 函数(简单重命名)
125
+ field_names = list(sample.keys())
126
+
127
+ # 生成规范化的字段名用于示例
128
+ safe_field1 = _sanitize_field_name(field_names[0])[0] if field_names else "field1"
129
+ safe_field2 = _sanitize_field_name(field_names[1])[0] if len(field_names) > 1 else "field2"
130
+
131
+ # 生成默认输出文件名
132
+ base_name = Path(filename).stem
133
+ output_filename = f"{base_name}_output.jsonl"
134
+
135
+ config = f'''"""
136
+ DataTransformer 配置文件
137
+ 生成时间: {now}
138
+ 输入文件: {filename} ({total} 条)
139
+ """
140
+
141
+
142
+ # ===== 输入数据结构(自动生成,IDE 可补全)=====
143
+
144
+ class Item:
145
+ {fields_def}
146
+
147
+
148
+ # ===== 定义转换逻辑 =====
149
+ # 提示:输入 item. 后 IDE 会自动补全可用字段
150
+
151
+ def transform(item: Item):
152
+ return {{
153
+ {_generate_default_transform(field_names)}
154
+ }}
155
+
156
+
157
+ # 输出文件路径
158
+ output = "{output_filename}"
159
+
160
+
161
+ # ===== 示例 =====
162
+ #
163
+ # 示例1: 构建 OpenAI Chat 格式
164
+ # def transform(item: Item):
165
+ # return {{
166
+ # "messages": [
167
+ # {{"role": "user", "content": item.{safe_field1}}},
168
+ # {{"role": "assistant", "content": item.{safe_field2}}},
169
+ # ]
170
+ # }}
171
+ #
172
+ # 示例2: Alpaca 格式
173
+ # def transform(item: Item):
174
+ # return {{
175
+ # "instruction": item.{safe_field1},
176
+ # "input": "",
177
+ # "output": item.{safe_field2},
178
+ # }}
179
+ '''
180
+ return config
181
+
182
+
183
+ def _generate_fields_definition(sample: Dict[str, Any], indent: int = 4) -> str:
184
+ """生成 Item 类的字段定义"""
185
+ lines = []
186
+ prefix = " " * indent
187
+
188
+ for key, value in sample.items():
189
+ type_name = _get_type_name(value)
190
+ example = _format_example_value(value)
191
+ safe_key, changed = _sanitize_field_name(key)
192
+ comment = f" # 原字段名: {key}" if changed else ""
193
+ lines.append(f"{prefix}{safe_key}: {type_name} = {example}{comment}")
194
+
195
+ return "\n".join(lines) if lines else f"{prefix}pass"
196
+
197
+
198
+ def _get_type_name(value: Any) -> str:
199
+ """获取值的类型名称"""
200
+ if value is None:
201
+ return "str"
202
+ if isinstance(value, str):
203
+ return "str"
204
+ if isinstance(value, bool):
205
+ return "bool"
206
+ if isinstance(value, int):
207
+ return "int"
208
+ if isinstance(value, float):
209
+ return "float"
210
+ if isinstance(value, list):
211
+ return "list"
212
+ if isinstance(value, dict):
213
+ return "dict"
214
+ return "str"
215
+
216
+
217
+ def _format_example_value(value: Any, max_len: int = 50) -> str:
218
+ """格式化示例值"""
219
+ if value is None:
220
+ return '""'
221
+ if isinstance(value, str):
222
+ # 截断长字符串
223
+ if len(value) > max_len:
224
+ value = value[:max_len] + "..."
225
+ # 使用 repr() 自动处理所有转义字符
226
+ return repr(value)
227
+ if isinstance(value, bool):
228
+ return str(value)
229
+ if isinstance(value, (int, float)):
230
+ return str(value)
231
+ if isinstance(value, (list, dict)):
232
+ s = orjson.dumps(value).decode("utf-8")
233
+ if len(s) > max_len:
234
+ return repr(s[:max_len] + "...")
235
+ return s
236
+ return '""'
237
+
238
+
239
+ def _sanitize_field_name(name: str) -> tuple:
240
+ """
241
+ 将字段名规范化为合法的 Python 标识符。
242
+
243
+ Returns:
244
+ tuple: (规范化后的名称, 是否被修改)
245
+ """
246
+ if name.isidentifier():
247
+ return name, False
248
+
249
+ # 替换常见的非法字符
250
+ sanitized = name.replace("-", "_").replace(" ", "_").replace(".", "_")
251
+
252
+ # 如果以数字开头,添加前缀
253
+ if sanitized and sanitized[0].isdigit():
254
+ sanitized = "f_" + sanitized
255
+
256
+ # 移除其他非法字符
257
+ sanitized = "".join(c if c.isalnum() or c == "_" else "_" for c in sanitized)
258
+
259
+ # 确保不为空
260
+ if not sanitized:
261
+ sanitized = "field"
262
+
263
+ return sanitized, True
264
+
265
+
266
+ def _generate_default_transform(field_names: List[str]) -> str:
267
+ """生成默认的 transform 函数体"""
268
+ lines = []
269
+ for name in field_names[:5]: # 最多显示 5 个字段
270
+ safe_name, _ = _sanitize_field_name(name)
271
+ lines.append(f' "{name}": item.{safe_name},')
272
+ return "\n".join(lines) if lines else " # 在这里定义输出字段"
273
+
274
+
275
+ def _unwrap(obj: Any) -> Any:
276
+ """递归将 DictWrapper 转换为普通 dict"""
277
+ if hasattr(obj, "to_dict"):
278
+ return _unwrap(obj.to_dict())
279
+ if isinstance(obj, dict):
280
+ return {k: _unwrap(v) for k, v in obj.items()}
281
+ if isinstance(obj, list):
282
+ return [_unwrap(v) for v in obj]
283
+ return obj
284
+
285
+
286
+ def _execute_transform(
287
+ input_path: Path,
288
+ config_path: Path,
289
+ output_override: Optional[str],
290
+ num: Optional[int],
291
+ ) -> None:
292
+ """执行数据转换(默认流式处理)"""
293
+ print(f"📂 加载配置: {config_path}")
294
+
295
+ # 动态加载配置文件
296
+ try:
297
+ config_ns = _load_config(config_path)
298
+ except Exception as e:
299
+ print(f"错误: 无法加载配置文件 - {e}")
300
+ return
301
+
302
+ # 获取 transform 函数
303
+ if "transform" not in config_ns:
304
+ print("错误: 配置文件中未定义 transform 函数")
305
+ return
306
+
307
+ transform_func = config_ns["transform"]
308
+
309
+ # 获取输出路径
310
+ output_path = output_override or config_ns.get("output", "output.jsonl")
311
+
312
+ # 对于 JSONL 文件使用流式处理
313
+ if _is_streaming_supported(input_path):
314
+ print(f"📊 流式加载: {input_path}")
315
+ print("🔄 执行转换...")
316
+ try:
317
+ # 包装转换函数以支持属性访问(配置文件中定义的 Item 类)
318
+ def wrapped_transform(item):
319
+ result = transform_func(DictWrapper(item))
320
+ return _unwrap(result)
321
+
322
+ st = load_stream(str(input_path))
323
+ if num:
324
+ st = st.head(num)
325
+ count = st.transform(wrapped_transform).save(output_path)
326
+ print(f"💾 保存结果: {output_path}")
327
+ print(f"\n✅ 完成! 已转换 {count} 条数据到 {output_path}")
328
+ except Exception as e:
329
+ print(f"错误: 转换失败 - {e}")
330
+ import traceback
331
+
332
+ traceback.print_exc()
333
+ return
334
+
335
+ # 非 JSONL 文件使用传统方式
336
+ print(f"📊 加载数据: {input_path}")
337
+ try:
338
+ dt = DataTransformer.load(str(input_path))
339
+ except Exception as e:
340
+ print(f"错误: 无法读取文件 - {e}")
341
+ return
342
+
343
+ total = len(dt)
344
+ if num:
345
+ dt = DataTransformer(dt.data[:num])
346
+ print(f" 处理前 {len(dt)}/{total} 条数据")
347
+ else:
348
+ print(f" 共 {total} 条数据")
349
+
350
+ # 执行转换(使用 Core 的 to 方法,自动支持属性访问)
351
+ print("🔄 执行转换...")
352
+ try:
353
+ results = dt.to(transform_func)
354
+ except Exception as e:
355
+ print(f"错误: 转换失败 - {e}")
356
+ import traceback
357
+
358
+ traceback.print_exc()
359
+ return
360
+
361
+ # 保存结果
362
+ print(f"💾 保存结果: {output_path}")
363
+ try:
364
+ save_data(results, output_path)
365
+ except Exception as e:
366
+ print(f"错误: 无法保存文件 - {e}")
367
+ return
368
+
369
+ print(f"\n✅ 完成! 已转换 {len(results)} 条数据到 {output_path}")
370
+
371
+
372
+ def _execute_preset_transform(
373
+ input_path: Path,
374
+ preset_name: str,
375
+ output_override: Optional[str],
376
+ num: Optional[int],
377
+ ) -> None:
378
+ """使用预设模板执行转换(默认流式处理)"""
379
+ print(f"📂 使用预设: {preset_name}")
380
+
381
+ # 获取预设函数
382
+ try:
383
+ transform_func = get_preset(preset_name)
384
+ except ValueError as e:
385
+ print(f"错误: {e}")
386
+ print(f"可用预设: {', '.join(list_presets())}")
387
+ return
388
+
389
+ output_path = output_override or f"{input_path.stem}_{preset_name}.jsonl"
390
+
391
+ # 检查输入输出是否相同
392
+ input_resolved = input_path.resolve()
393
+ output_resolved = Path(output_path).resolve()
394
+ use_temp_file = input_resolved == output_resolved
395
+
396
+ # 对于 JSONL 文件使用流式处理
397
+ if _is_streaming_supported(input_path):
398
+ print(f"📊 流式加载: {input_path}")
399
+ print("🔄 执行转换...")
400
+
401
+ # 如果输入输出相同,使用临时文件
402
+ if use_temp_file:
403
+ print("⚠ 检测到输出文件与输入文件相同,将使用临时文件")
404
+ temp_fd, temp_path = tempfile.mkstemp(
405
+ suffix=output_resolved.suffix,
406
+ prefix=".tmp_",
407
+ dir=output_resolved.parent,
408
+ )
409
+ os.close(temp_fd)
410
+ actual_output = temp_path
411
+ else:
412
+ actual_output = output_path
413
+
414
+ try:
415
+ # 包装转换函数以支持属性访问
416
+ def wrapped_transform(item):
417
+ result = transform_func(DictWrapper(item))
418
+ return _unwrap(result)
419
+
420
+ st = load_stream(str(input_path))
421
+ if num:
422
+ st = st.head(num)
423
+ count = st.transform(wrapped_transform).save(actual_output)
424
+
425
+ # 如果使用了临时文件,移动到目标位置
426
+ if use_temp_file:
427
+ shutil.move(temp_path, output_path)
428
+
429
+ print(f"💾 保存结果: {output_path}")
430
+ print(f"\n✅ 完成! 已转换 {count} 条数据到 {output_path}")
431
+ except Exception as e:
432
+ # 清理临时文件
433
+ if use_temp_file and os.path.exists(temp_path):
434
+ os.unlink(temp_path)
435
+ print(f"错误: 转换失败 - {e}")
436
+ import traceback
437
+
438
+ traceback.print_exc()
439
+ return
440
+
441
+ # 非 JSONL 文件使用传统方式
442
+ print(f"📊 加载数据: {input_path}")
443
+ try:
444
+ dt = DataTransformer.load(str(input_path))
445
+ except Exception as e:
446
+ print(f"错误: 无法读取文件 - {e}")
447
+ return
448
+
449
+ total = len(dt)
450
+ if num:
451
+ dt = DataTransformer(dt.data[:num])
452
+ print(f" 处理前 {len(dt)}/{total} 条数据")
453
+ else:
454
+ print(f" 共 {total} 条数据")
455
+
456
+ # 执行转换
457
+ print("🔄 执行转换...")
458
+ try:
459
+ results = dt.to(transform_func)
460
+ except Exception as e:
461
+ print(f"错误: 转换失败 - {e}")
462
+ import traceback
463
+
464
+ traceback.print_exc()
465
+ return
466
+
467
+ # 保存结果
468
+ print(f"💾 保存结果: {output_path}")
469
+ try:
470
+ save_data(results, output_path)
471
+ except Exception as e:
472
+ print(f"错误: 无法保存文件 - {e}")
473
+ return
474
+
475
+ print(f"\n✅ 完成! 已转换 {len(results)} 条数据到 {output_path}")
476
+
477
+
478
+ def _load_config(config_path: Path) -> Dict[str, Any]:
479
+ """动态加载 Python 配置文件"""
480
+ import importlib.util
481
+
482
+ spec = importlib.util.spec_from_file_location("dt_config", config_path)
483
+ module = importlib.util.module_from_spec(spec)
484
+ spec.loader.exec_module(module)
485
+
486
+ return {name: getattr(module, name) for name in dir(module) if not name.startswith("_")}
dtflow/core.py CHANGED
@@ -351,6 +351,41 @@ class DataTransformer:
351
351
  tracker.record("tail", {"n": n}, len(self._data), len(data))
352
352
  return DataTransformer(data, _lineage_tracker=tracker)
353
353
 
354
+ def validate(
355
+ self,
356
+ func: Callable[[Any], bool],
357
+ raw: bool = False,
358
+ ) -> List[TransformError]:
359
+ """
360
+ 验证数据,返回不通过的记录列表。
361
+
362
+ Args:
363
+ func: 验证函数,返回 True 表示通过,False 表示失败
364
+ raw: 原始模式,跳过 DictWrapper 包装
365
+
366
+ Returns:
367
+ 验证失败的记录列表(TransformError)
368
+
369
+ Examples:
370
+ >>> dt = DataTransformer([{"a": 1}, {"a": -1}])
371
+ >>> errors = dt.validate(lambda x: x.a > 0)
372
+ >>> len(errors) # 1
373
+ >>> errors[0].index # 1
374
+ """
375
+ errors = []
376
+ wrapper_func = (lambda x: x) if raw else DictWrapper
377
+
378
+ for i, item in enumerate(self._data):
379
+ try:
380
+ if not func(wrapper_func(item)):
381
+ errors.append(
382
+ TransformError(index=i, item=item, error=ValueError("验证未通过"))
383
+ )
384
+ except Exception as e:
385
+ errors.append(TransformError(index=i, item=item, error=e))
386
+
387
+ return errors
388
+
354
389
  def dedupe(
355
390
  self,
356
391
  key: Union[None, str, List[str], Callable[[Any], Any]] = None,
dtflow/storage/io.py CHANGED
@@ -115,13 +115,40 @@ def _save_jsonl(data: List[Dict[str, Any]], filepath: Path) -> None:
115
115
 
116
116
 
117
117
  def _load_jsonl(filepath: Path) -> List[Dict[str, Any]]:
118
- """Load data from JSONL format."""
118
+ """Load data from JSONL format.
119
+
120
+ 使用 orjson 解析,如果失败(如遇到 NaN 等非标准 JSON)则回退到标准 json。
121
+ """
122
+ import json
123
+ import sys
124
+
119
125
  data = []
126
+ use_fallback = False
127
+
120
128
  with open(filepath, "rb") as f:
121
- for line in f:
129
+ for i, line in enumerate(f):
122
130
  line = line.strip()
123
- if line:
124
- data.append(orjson.loads(line))
131
+ if not line:
132
+ continue
133
+
134
+ if use_fallback:
135
+ # 已确认需要回退,直接用标准 json
136
+ data.append(json.loads(line))
137
+ else:
138
+ try:
139
+ data.append(orjson.loads(line))
140
+ except orjson.JSONDecodeError:
141
+ # orjson 解析失败,尝试标准 json(支持 NaN/Infinity)
142
+ try:
143
+ data.append(json.loads(line))
144
+ use_fallback = True
145
+ print(
146
+ f"[Warning] 第 {i+1} 行包含非标准 JSON(如 NaN),已切换到标准 json 解析",
147
+ file=sys.stderr,
148
+ )
149
+ except json.JSONDecodeError:
150
+ raise # 标准 json 也失败,抛出原始错误
151
+
125
152
  return data
126
153
 
127
154
 
@@ -135,9 +162,25 @@ def _save_json(data: List[Dict[str, Any]], filepath: Path) -> None:
135
162
 
136
163
 
137
164
  def _load_json(filepath: Path) -> List[Dict[str, Any]]:
138
- """Load data from JSON format."""
165
+ """Load data from JSON format.
166
+
167
+ 使用 orjson 解析,如果失败(如遇到 NaN 等非标准 JSON)则回退到标准 json。
168
+ """
169
+ import json
170
+ import sys
171
+
139
172
  with open(filepath, "rb") as f:
140
- data = orjson.loads(f.read())
173
+ content = f.read()
174
+
175
+ try:
176
+ data = orjson.loads(content)
177
+ except orjson.JSONDecodeError:
178
+ # orjson 解析失败,回退到标准 json
179
+ print(
180
+ f"[Warning] 文件包含非标准 JSON(如 NaN),使用标准 json 解析",
181
+ file=sys.stderr,
182
+ )
183
+ data = json.loads(content)
141
184
 
142
185
  if not isinstance(data, list):
143
186
  data = [data]
dtflow/streaming.py CHANGED
@@ -622,12 +622,33 @@ def process_shards(
622
622
 
623
623
 
624
624
  def _stream_jsonl(filepath: str) -> Generator[Dict[str, Any], None, None]:
625
- """JSONL 流式读取(使用 orjson)"""
625
+ """JSONL 流式读取(使用 orjson,失败时回退到标准 json)"""
626
+ import json
627
+ import sys
628
+
629
+ use_fallback = False
630
+
626
631
  with open(filepath, "rb") as f:
627
- for line in f:
632
+ for i, line in enumerate(f):
628
633
  line = line.strip()
629
- if line:
630
- yield orjson.loads(line)
634
+ if not line:
635
+ continue
636
+
637
+ if use_fallback:
638
+ yield json.loads(line)
639
+ else:
640
+ try:
641
+ yield orjson.loads(line)
642
+ except orjson.JSONDecodeError:
643
+ try:
644
+ yield json.loads(line)
645
+ use_fallback = True
646
+ print(
647
+ f"[Warning] 第 {i+1} 行包含非标准 JSON(如 NaN),已切换到标准 json 解析",
648
+ file=sys.stderr,
649
+ )
650
+ except json.JSONDecodeError:
651
+ raise
631
652
 
632
653
 
633
654
  def _stream_csv(filepath: str, batch_size: int = 10000) -> Generator[Dict[str, Any], None, None]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dtflow
3
- Version: 0.4.2
3
+ Version: 0.4.3
4
4
  Summary: A flexible data transformation tool for ML training formats (SFT, RLHF, Pretrain)
5
5
  Project-URL: Homepage, https://github.com/yourusername/DataTransformer
6
6
  Project-URL: Documentation, https://github.com/yourusername/DataTransformer#readme
@@ -126,6 +126,17 @@ dt.filter(lambda x: x.score > 0.8)
126
126
  dt.filter(lambda x: x.language == "zh")
127
127
  ```
128
128
 
129
+ ### 数据验证
130
+
131
+ ```python
132
+ # 验证数据,返回不通过的记录列表
133
+ errors = dt.validate(lambda x: len(x.messages) >= 2)
134
+
135
+ if errors:
136
+ for e in errors[:5]:
137
+ print(f"第 {e.index} 行: {e.error}")
138
+ ```
139
+
129
140
  ### 数据转换
130
141
 
131
142
  ```python
@@ -0,0 +1,33 @@
1
+ dtflow/__init__.py,sha256=F5fBna3PxmrOK34SnZxvolzmyi_pjIxNeDkNzGg8wvA,2347
2
+ dtflow/__main__.py,sha256=vuX2--_gXI25vy-Xb1uqhURFzu05VeMUYxIJ2q-XE7M,11656
3
+ dtflow/converters.py,sha256=gyy-K15zjzGBawFnZa8D9JX37JZ47rey2GhjKa2pxFo,22081
4
+ dtflow/core.py,sha256=5XivbEdcKMrj1wSfju6MDkqdCqkO_mRS-ALWJ3DOcKo,29937
5
+ dtflow/lineage.py,sha256=vQ06lxBHftu-Ma5HlISp3F2eiIvwagQSnUGaLeABDZY,12190
6
+ dtflow/pipeline.py,sha256=zZaC4fg5vsp_30Fhbg75vu0yggsdvf28bWBiVDWzZ6Y,13901
7
+ dtflow/presets.py,sha256=OP1nnM5NFk5Kli9FsXK0xAot48E5OQ6-VOIJT9ffXPg,5023
8
+ dtflow/streaming.py,sha256=jtWQjkhhZqfyzIaFskXNvooGAYDQBn1b6X8FHgaCZYk,22704
9
+ dtflow/tokenizers.py,sha256=zxE6XZGjZ_DOGCjRSClI9xaAbFVf8FS6jwwssGoi_9U,18111
10
+ dtflow/cli/__init__.py,sha256=QhZ-thgx9IBTFII7T_hdoWFUl0CCsdGQHN5ZEZw2XB0,423
11
+ dtflow/cli/clean.py,sha256=y9VCRibgK1j8WIY3h0XZX0m93EdELQC7TdnseMWwS-0,17799
12
+ dtflow/cli/commands.py,sha256=ExcD8Z_uXQhcewvgcPtIlPzsQG4QF93K8Bg6C3uUJHk,1094
13
+ dtflow/cli/common.py,sha256=FsDFVNcLj_874qSg2dGef4V7mqPU9THLchT8PxJpBt8,12955
14
+ dtflow/cli/io_ops.py,sha256=BMDisP6dxzzmSjYwmeFwaHmpHHPqirmXAWeNTD-9MQM,13254
15
+ dtflow/cli/lineage.py,sha256=_lNh35nF9AA0Zy6FyZ4g8IzrXH2ZQnp3inF-o2Hs1pw,1383
16
+ dtflow/cli/pipeline.py,sha256=QNEo-BJlaC1CVnVeRZr7TwfuZYloJ4TebIzJ5ALzry0,1426
17
+ dtflow/cli/sample.py,sha256=vPTQlF0OXEry4QjO8uaD9vOae4AQbX9zDwVYOxg59ZI,10339
18
+ dtflow/cli/stats.py,sha256=HByF0sFMqY1kM75dnjTcJbMKDdQNdOt4iDba4au_-pI,20495
19
+ dtflow/cli/transform.py,sha256=w6xqMOxPxQvL2u_BPCfpDHuPSC9gmcqMPVN8s-B6bbY,15052
20
+ dtflow/mcp/__init__.py,sha256=huEJ3rXDbxDRjsLPEvjNT2u3tWs6Poiv6fokPIrByjw,897
21
+ dtflow/mcp/__main__.py,sha256=PoT2ZZmJq9xDZxDACJfqDW9Ld_ukHrGNK-0XUd7WGnY,448
22
+ dtflow/mcp/cli.py,sha256=ck0oOS_642cNktxULaMRE7BJfMxsBCwotmCj3PSPwVk,13110
23
+ dtflow/mcp/docs.py,sha256=DI2Vf-eFo4chRP_bDLsv4Uc3kJt8_1emz8N-NBSVirM,8834
24
+ dtflow/mcp/server.py,sha256=Nf0UlqDGhV55ndGuEglfr7VRjDWAC_9rRsNhdr0-ssM,4275
25
+ dtflow/storage/__init__.py,sha256=C0jpWNQU808Ezz7lWneddABal3wILy8ijFUNiSKbHV4,362
26
+ dtflow/storage/io.py,sha256=ZH2aSE-S89gpy3z4oTqhcqWf4u10OdkDoyul7o_YBDI,23374
27
+ dtflow/utils/__init__.py,sha256=f8v9HJZMWRI5AL64Vjr76Pf2Na_whOF9nJBKgPbXXYg,429
28
+ dtflow/utils/display.py,sha256=OeOdTh6mbDwSkDWlmkjfpTjy2QG8ZUaYU0NpHUWkpEQ,5881
29
+ dtflow/utils/field_path.py,sha256=WcNA-LZh3H61a77FEzB_R7YAyyZl3M8ofdq05ytQGmI,7459
30
+ dtflow-0.4.3.dist-info/METADATA,sha256=LdjtTbPEuHlqxthUuMX2lqrj2l2kBIfVf-u6jDZmCeI,18524
31
+ dtflow-0.4.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
32
+ dtflow-0.4.3.dist-info/entry_points.txt,sha256=dadIDOK7Iu9pMxnMPBfpb4aAPe4hQbBOshpQYjVYpGc,44
33
+ dtflow-0.4.3.dist-info/RECORD,,