dtflow 0.4.3__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dtflow/tokenizers.py CHANGED
@@ -210,7 +210,10 @@ def token_counter(
210
210
  创建 token 计数转换函数。
211
211
 
212
212
  Args:
213
- fields: 要统计的字段(单个或多个)
213
+ fields: 要统计的字段(单个或多个),支持嵌套路径语法
214
+ - 简单字段: "text"
215
+ - 嵌套字段: "meta.content", "data.text"
216
+ - 索引: "messages[0].content", "messages[-1].content"
214
217
  model: 模型名称或别名,如 "qwen2.5", "gpt-4", "llama3" 等
215
218
  backend: 后端选择,None 则自动检测
216
219
  output_field: 输出字段名
@@ -221,6 +224,7 @@ def token_counter(
221
224
  Examples:
222
225
  >>> dt.transform(token_counter("text"))
223
226
  >>> dt.transform(token_counter(["question", "answer"], model="qwen3"))
227
+ >>> dt.transform(token_counter("messages[-1].content")) # 最后一条消息
224
228
  """
225
229
  if isinstance(fields, str):
226
230
  fields = [fields]
@@ -229,7 +233,7 @@ def token_counter(
229
233
  result = item.to_dict() if hasattr(item, "to_dict") else dict(item)
230
234
  total = 0
231
235
  for field in fields:
232
- value = item.get(field, "") if hasattr(item, "get") else item[field]
236
+ value = get_field_with_spec(item, field, default="")
233
237
  if value:
234
238
  total += count_tokens(str(value), model=model, backend=backend)
235
239
  result[output_field] = total
@@ -249,7 +253,10 @@ def token_filter(
249
253
  创建基于 token 长度的过滤函数。
250
254
 
251
255
  Args:
252
- fields: 要统计的字段(单个或多个)
256
+ fields: 要统计的字段(单个或多个),支持嵌套路径语法
257
+ - 简单字段: "text"
258
+ - 嵌套字段: "meta.content", "data.text"
259
+ - 索引: "messages[0].content", "messages[-1].content"
253
260
  min_tokens: 最小 token 数(包含)
254
261
  max_tokens: 最大 token 数(包含)
255
262
  model: 模型名称
@@ -261,6 +268,7 @@ def token_filter(
261
268
  Examples:
262
269
  >>> dt.filter(token_filter("text", min_tokens=10, max_tokens=512))
263
270
  >>> dt.filter(token_filter(["q", "a"], max_tokens=2048))
271
+ >>> dt.filter(token_filter("messages[-1].content", max_tokens=1024))
264
272
  """
265
273
  if isinstance(fields, str):
266
274
  fields = [fields]
@@ -268,7 +276,7 @@ def token_filter(
268
276
  def filter_func(item) -> bool:
269
277
  total = 0
270
278
  for field in fields:
271
- value = item.get(field, "") if hasattr(item, "get") else item[field]
279
+ value = get_field_with_spec(item, field, default="")
272
280
  if value:
273
281
  total += count_tokens(str(value), model=model, backend=backend)
274
282
 
@@ -281,11 +289,32 @@ def token_filter(
281
289
  return filter_func
282
290
 
283
291
 
292
+ def _percentile(sorted_data: List[int], p: float) -> int:
293
+ """计算百分位数"""
294
+ n = len(sorted_data)
295
+ if n == 0:
296
+ return 0
297
+ idx = (n - 1) * p / 100
298
+ lower = int(idx)
299
+ upper = min(lower + 1, n - 1)
300
+ weight = idx - lower
301
+ return int(sorted_data[lower] * (1 - weight) + sorted_data[upper] * weight)
302
+
303
+
304
+ def _std(counts: List[int], avg: float) -> float:
305
+ """计算标准差"""
306
+ if len(counts) < 2:
307
+ return 0.0
308
+ variance = sum((x - avg) ** 2 for x in counts) / len(counts)
309
+ return variance**0.5
310
+
311
+
284
312
  def token_stats(
285
313
  data: List[Dict[str, Any]],
286
314
  fields: Union[str, List[str]],
287
315
  model: str = DEFAULT_MODEL,
288
316
  backend: Optional[str] = None,
317
+ progress_callback: Optional[Callable[[int, int], None]] = None,
289
318
  ) -> Dict[str, Any]:
290
319
  """
291
320
  统计数据集的 token 信息。
@@ -295,9 +324,17 @@ def token_stats(
295
324
  fields: 要统计的字段,支持嵌套路径语法(如 meta.text, messages[-1].content)
296
325
  model: 模型名称或别名,如 "qwen2.5", "gpt-4" 等
297
326
  backend: 后端选择,None 则自动检测
327
+ progress_callback: 进度回调函数,接收 (current, total) 两个参数
298
328
 
299
329
  Returns:
300
- 统计信息字典
330
+ 统计信息字典,包含:
331
+ - total_tokens: 总 token 数
332
+ - count: 样本数
333
+ - avg_tokens: 平均 token 数
334
+ - std_tokens: 标准差
335
+ - min_tokens, max_tokens: 最小/最大值
336
+ - median_tokens: 中位数 (p50)
337
+ - p25, p75, p90, p95, p99: 百分位数
301
338
  """
302
339
  if isinstance(fields, str):
303
340
  fields = [fields]
@@ -306,21 +343,33 @@ def token_stats(
306
343
  return {"total_tokens": 0, "count": 0}
307
344
 
308
345
  counts = []
309
- for item in data:
346
+ total_items = len(data)
347
+ for i, item in enumerate(data):
310
348
  total = 0
311
349
  for field in fields:
312
350
  value = get_field_with_spec(item, field, default="")
313
351
  if value:
314
352
  total += count_tokens(str(value), model=model, backend=backend)
315
353
  counts.append(total)
354
+ if progress_callback:
355
+ progress_callback(i + 1, total_items)
356
+
357
+ sorted_counts = sorted(counts)
358
+ avg = sum(counts) / len(counts)
316
359
 
317
360
  return {
318
361
  "total_tokens": sum(counts),
319
362
  "count": len(counts),
320
- "avg_tokens": sum(counts) / len(counts),
363
+ "avg_tokens": avg,
364
+ "std_tokens": _std(counts, avg),
321
365
  "min_tokens": min(counts),
322
366
  "max_tokens": max(counts),
323
- "median_tokens": sorted(counts)[len(counts) // 2],
367
+ "median_tokens": _percentile(sorted_counts, 50),
368
+ "p25": _percentile(sorted_counts, 25),
369
+ "p75": _percentile(sorted_counts, 75),
370
+ "p90": _percentile(sorted_counts, 90),
371
+ "p95": _percentile(sorted_counts, 95),
372
+ "p99": _percentile(sorted_counts, 99),
324
373
  }
325
374
 
326
375
 
@@ -504,6 +553,7 @@ def messages_token_stats(
504
553
  messages_field: str = "messages",
505
554
  model: str = DEFAULT_MODEL,
506
555
  backend: Optional[str] = None,
556
+ progress_callback: Optional[Callable[[int, int], None]] = None,
507
557
  ) -> Dict[str, Any]:
508
558
  """
509
559
  统计数据集中 messages 的 token 信息。
@@ -513,25 +563,18 @@ def messages_token_stats(
513
563
  messages_field: messages 字段名,支持嵌套路径语法(如 conversation.messages)
514
564
  model: 模型名称或别名
515
565
  backend: 后端,None 则自动检测
566
+ progress_callback: 进度回调函数,接收 (current, total) 两个参数
516
567
 
517
568
  Returns:
518
- 统计信息字典
519
-
520
- Examples:
521
- >>> stats = messages_token_stats(dt.data) # 使用默认 qwen2.5
522
- >>> stats = messages_token_stats(dt.data, model="qwen3")
523
- >>> print(stats)
524
- {
525
- "count": 1000,
526
- "total_tokens": 500000,
527
- "user_tokens": 200000,
528
- "assistant_tokens": 290000,
529
- "system_tokens": 10000,
530
- "avg_tokens": 500,
531
- "max_tokens": 2048,
532
- "min_tokens": 50,
533
- "avg_turns": 4,
534
- }
569
+ 统计信息字典,包含:
570
+ - count: 样本数
571
+ - total_tokens: 总 token 数
572
+ - user_tokens, assistant_tokens, system_tokens: 各角色 token 数
573
+ - avg_tokens, std_tokens: 平均值和标准差
574
+ - min_tokens, max_tokens: 最小/最大值
575
+ - median_tokens: 中位数
576
+ - p25, p75, p90, p95, p99: 百分位数
577
+ - avg_turns: 平均对话轮数
535
578
  """
536
579
  _backend = backend or _auto_backend(model)
537
580
 
@@ -539,24 +582,36 @@ def messages_token_stats(
539
582
  return {"count": 0, "total_tokens": 0}
540
583
 
541
584
  all_stats = []
542
- for item in data:
585
+ total_items = len(data)
586
+ for i, item in enumerate(data):
543
587
  messages = get_field_with_spec(item, messages_field, default=[])
544
588
  if messages:
545
589
  all_stats.append(_count_messages_tokens(messages, model=model, backend=_backend))
590
+ if progress_callback:
591
+ progress_callback(i + 1, total_items)
546
592
 
547
593
  if not all_stats:
548
594
  return {"count": 0, "total_tokens": 0}
549
595
 
550
596
  totals = [s["total"] for s in all_stats]
597
+ sorted_totals = sorted(totals)
598
+ avg = sum(totals) / len(totals)
599
+
551
600
  return {
552
601
  "count": len(all_stats),
553
602
  "total_tokens": sum(totals),
554
603
  "user_tokens": sum(s["user"] for s in all_stats),
555
604
  "assistant_tokens": sum(s["assistant"] for s in all_stats),
556
605
  "system_tokens": sum(s["system"] for s in all_stats),
557
- "avg_tokens": sum(totals) // len(totals),
558
- "max_tokens": max(totals),
606
+ "avg_tokens": int(avg),
607
+ "std_tokens": _std(totals, avg),
559
608
  "min_tokens": min(totals),
560
- "median_tokens": sorted(totals)[len(totals) // 2],
609
+ "max_tokens": max(totals),
610
+ "median_tokens": _percentile(sorted_totals, 50),
611
+ "p25": _percentile(sorted_totals, 25),
612
+ "p75": _percentile(sorted_totals, 75),
613
+ "p90": _percentile(sorted_totals, 90),
614
+ "p95": _percentile(sorted_totals, 95),
615
+ "p99": _percentile(sorted_totals, 99),
561
616
  "avg_turns": sum(s["turns"] for s in all_stats) // len(all_stats),
562
617
  }
@@ -96,7 +96,9 @@ def _parse_path(path: str) -> List[Union[str, int, Literal["*", "#"]]]:
96
96
  continue
97
97
 
98
98
  # 解析 field[index] 格式
99
- match = re.match(r"([a-zA-Z_\u4e00-\u9fff][a-zA-Z0-9_\u4e00-\u9fff]*)?(?:\[(-?\d+|\*)\])?", part)
99
+ match = re.match(
100
+ r"([a-zA-Z_\u4e00-\u9fff][a-zA-Z0-9_\u4e00-\u9fff]*)?(?:\[(-?\d+|\*)\])?", part
101
+ )
100
102
  if match:
101
103
  field_name, index = match.groups()
102
104
 
@@ -175,10 +177,12 @@ def _get_value_by_segments(
175
177
 
176
178
  return values
177
179
 
178
- # 字典字段访问
180
+ # 字典字段访问(支持 dict 和类 dict 对象如 DictWrapper)
179
181
  if isinstance(seg, str):
180
182
  if isinstance(current, dict):
181
183
  current = current.get(seg)
184
+ elif hasattr(current, "get"):
185
+ current = current.get(seg)
182
186
  else:
183
187
  return None
184
188
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dtflow
3
- Version: 0.4.3
3
+ Version: 0.5.2
4
4
  Summary: A flexible data transformation tool for ML training formats (SFT, RLHF, Pretrain)
5
5
  Project-URL: Homepage, https://github.com/yourusername/DataTransformer
6
6
  Project-URL: Documentation, https://github.com/yourusername/DataTransformer#readme
@@ -32,16 +32,26 @@ Requires-Dist: orjson>=3.9.0
32
32
  Requires-Dist: polars>=0.20.0
33
33
  Requires-Dist: pyyaml>=5.4.0
34
34
  Requires-Dist: rich>=10.0.0
35
+ Requires-Dist: tiktoken>=0.5.0
35
36
  Requires-Dist: typer>=0.9.0
36
37
  Provides-Extra: converters
37
38
  Requires-Dist: datasets>=2.0.0; extra == 'converters'
38
39
  Provides-Extra: dev
39
40
  Requires-Dist: black>=21.0; extra == 'dev'
41
+ Requires-Dist: datasets>=2.0.0; extra == 'dev'
42
+ Requires-Dist: datasketch>=1.5.0; extra == 'dev'
40
43
  Requires-Dist: flake8>=3.9.0; extra == 'dev'
44
+ Requires-Dist: huggingface-hub>=0.20.0; extra == 'dev'
41
45
  Requires-Dist: isort>=5.9.0; extra == 'dev'
42
46
  Requires-Dist: mypy>=0.910; extra == 'dev'
47
+ Requires-Dist: pyarrow; extra == 'dev'
43
48
  Requires-Dist: pytest-cov>=2.12.0; extra == 'dev'
44
49
  Requires-Dist: pytest>=6.0.0; extra == 'dev'
50
+ Requires-Dist: rich>=10.0.0; extra == 'dev'
51
+ Requires-Dist: scikit-learn>=0.24.0; extra == 'dev'
52
+ Requires-Dist: tiktoken>=0.5.0; extra == 'dev'
53
+ Requires-Dist: tokenizers>=0.15.0; extra == 'dev'
54
+ Requires-Dist: toolong>=1.5.0; extra == 'dev'
45
55
  Provides-Extra: display
46
56
  Provides-Extra: docs
47
57
  Requires-Dist: myst-parser>=0.15.0; extra == 'docs'
@@ -129,7 +139,7 @@ dt.filter(lambda x: x.language == "zh")
129
139
  ### 数据验证
130
140
 
131
141
  ```python
132
- # 验证数据,返回不通过的记录列表
142
+ # 简单验证,返回不通过的记录列表
133
143
  errors = dt.validate(lambda x: len(x.messages) >= 2)
134
144
 
135
145
  if errors:
@@ -137,6 +147,53 @@ if errors:
137
147
  print(f"第 {e.index} 行: {e.error}")
138
148
  ```
139
149
 
150
+ ### Schema 验证
151
+
152
+ 使用 Schema 进行结构化数据验证:
153
+
154
+ ```python
155
+ from dtflow import Schema, Field, openai_chat_schema
156
+
157
+ # 使用预设 Schema
158
+ result = dt.validate_schema(openai_chat_schema)
159
+ print(result) # ValidationResult(valid=950, invalid=50, errors=[...])
160
+
161
+ # 自定义 Schema
162
+ schema = Schema({
163
+ "messages": Field(type="list", required=True, min_length=1),
164
+ "messages[*].role": Field(type="str", choices=["user", "assistant", "system"]),
165
+ "messages[*].content": Field(type="str", min_length=1),
166
+ "score": Field(type="float", min=0, max=1),
167
+ })
168
+
169
+ result = dt.validate_schema(schema)
170
+
171
+ # 过滤出有效数据
172
+ valid_dt = dt.validate_schema(schema, filter_invalid=True)
173
+ valid_dt.save("valid.jsonl")
174
+ ```
175
+
176
+ **预设 Schema**:
177
+
178
+ | Schema 名称 | 用途 |
179
+ |------------|------|
180
+ | `openai_chat_schema` | OpenAI messages 格式验证 |
181
+ | `alpaca_schema` | Alpaca instruction/output 格式 |
182
+ | `sharegpt_schema` | ShareGPT conversations 格式 |
183
+ | `dpo_schema` | DPO prompt/chosen/rejected 格式 |
184
+
185
+ **Field 参数**:
186
+
187
+ | 参数 | 说明 | 示例 |
188
+ |------|------|------|
189
+ | `type` | 类型验证 | `"str"`, `"int"`, `"float"`, `"bool"`, `"list"`, `"dict"` |
190
+ | `required` | 是否必填 | `True` / `False` |
191
+ | `min` / `max` | 数值范围 | `min=0, max=1` |
192
+ | `min_length` / `max_length` | 长度范围 | `min_length=1` |
193
+ | `choices` | 枚举值 | `choices=["user", "assistant"]` |
194
+ | `pattern` | 正则匹配 | `pattern=r"^\d{4}-\d{2}-\d{2}$"` |
195
+ | `custom` | 自定义验证 | `custom=lambda x: x > 0` |
196
+
140
197
  ### 数据转换
141
198
 
142
199
  ```python
@@ -286,6 +343,58 @@ dt.transform(to_swift_vlm(images_field="images")).save("swift_vlm.jsonl")
286
343
  # 输出: {"messages": [...], "images": ["/path/to/img.jpg"]}
287
344
  ```
288
345
 
346
+ ### 训练框架一键导出
347
+
348
+ 将数据导出为目标训练框架可直接使用的格式,自动生成配置文件:
349
+
350
+ ```python
351
+ from dtflow import DataTransformer
352
+
353
+ dt = DataTransformer.load("data.jsonl")
354
+
355
+ # 1. 检查框架兼容性
356
+ result = dt.check_compatibility("llama-factory")
357
+ print(result)
358
+ # ✅ 兼容 - LLaMA-Factory (openai_chat)
359
+ # 或
360
+ # ❌ 不兼容 - 错误: xxx
361
+
362
+ # 2. 一键导出到 LLaMA-Factory
363
+ files = dt.export_for("llama-factory", "./llama_ready/")
364
+ # 生成文件:
365
+ # - ./llama_ready/custom_dataset.json # 数据文件
366
+ # - ./llama_ready/dataset_info.json # 数据集配置
367
+ # - ./llama_ready/train_args.yaml # 训练参数模板
368
+
369
+ # 3. 导出到 ms-swift
370
+ files = dt.export_for("swift", "./swift_ready/")
371
+ # 生成: data.jsonl + train_swift.sh
372
+
373
+ # 4. 导出到 Axolotl
374
+ files = dt.export_for("axolotl", "./axolotl_ready/")
375
+ # 生成: data.jsonl + config.yaml
376
+
377
+ # 指定数据集名称
378
+ dt.export_for("llama-factory", "./output/", dataset_name="my_sft_data")
379
+ ```
380
+
381
+ **支持的框架**:
382
+
383
+ | 框架 | 导出内容 | 使用方式 |
384
+ |------|---------|---------|
385
+ | `llama-factory` | data.json + dataset_info.json + train_args.yaml | `llamafactory-cli train train_args.yaml` |
386
+ | `swift` | data.jsonl + train_swift.sh | `bash train_swift.sh` |
387
+ | `axolotl` | data.jsonl + config.yaml | `accelerate launch -m axolotl.cli.train config.yaml` |
388
+
389
+ **自动格式检测**:
390
+
391
+ | 检测到的格式 | 数据结构 |
392
+ |------------|---------|
393
+ | `openai_chat` | `{"messages": [{"role": "user", ...}]}` |
394
+ | `alpaca` | `{"instruction": ..., "output": ...}` |
395
+ | `sharegpt` | `{"conversations": [{"from": "human", ...}]}` |
396
+ | `dpo` | `{"prompt": ..., "chosen": ..., "rejected": ...}` |
397
+
289
398
  ### 其他操作
290
399
 
291
400
  ```python
@@ -361,6 +470,12 @@ dt concat a.jsonl b.jsonl -o merged.jsonl
361
470
 
362
471
  # 数据统计
363
472
  dt stats data.jsonl
473
+
474
+ # 数据验证
475
+ dt validate data.jsonl --preset=openai_chat # 使用预设 schema 验证
476
+ dt validate data.jsonl --preset=alpaca --verbose # 详细输出
477
+ dt validate data.jsonl --preset=sharegpt --filter-invalid -o valid.jsonl # 过滤出有效数据
478
+ dt validate data.jsonl --preset=dpo --max-errors=100 # 限制错误输出数量
364
479
  ```
365
480
 
366
481
  ### 字段路径语法
@@ -1,22 +1,25 @@
1
- dtflow/__init__.py,sha256=F5fBna3PxmrOK34SnZxvolzmyi_pjIxNeDkNzGg8wvA,2347
2
- dtflow/__main__.py,sha256=vuX2--_gXI25vy-Xb1uqhURFzu05VeMUYxIJ2q-XE7M,11656
1
+ dtflow/__init__.py,sha256=PTqh_6-F6eEwg1RxQ0ueP6CYnZauMuqYhlZe2BJphr0,3031
2
+ dtflow/__main__.py,sha256=ySpqvEn7k-vsrYFPx-8O6p-yx_24KccgnOSPd2XybhM,12572
3
3
  dtflow/converters.py,sha256=gyy-K15zjzGBawFnZa8D9JX37JZ47rey2GhjKa2pxFo,22081
4
- dtflow/core.py,sha256=5XivbEdcKMrj1wSfju6MDkqdCqkO_mRS-ALWJ3DOcKo,29937
5
- dtflow/lineage.py,sha256=vQ06lxBHftu-Ma5HlISp3F2eiIvwagQSnUGaLeABDZY,12190
4
+ dtflow/core.py,sha256=qMo6B3LK--TWRK7ZBKObGcs3pKFnd0NPoaM0T8JC7Jw,38135
5
+ dtflow/framework.py,sha256=jyICi_RWHjX7WfsXdSbWmP1SL7y1OWSPyd5G5Y-lvg4,17578
6
+ dtflow/lineage.py,sha256=jie3OL1qK90-_cOOqqLbhSJ1oGUktDM1x5HRpQ5Qiyc,12800
6
7
  dtflow/pipeline.py,sha256=zZaC4fg5vsp_30Fhbg75vu0yggsdvf28bWBiVDWzZ6Y,13901
7
8
  dtflow/presets.py,sha256=OP1nnM5NFk5Kli9FsXK0xAot48E5OQ6-VOIJT9ffXPg,5023
8
- dtflow/streaming.py,sha256=jtWQjkhhZqfyzIaFskXNvooGAYDQBn1b6X8FHgaCZYk,22704
9
- dtflow/tokenizers.py,sha256=zxE6XZGjZ_DOGCjRSClI9xaAbFVf8FS6jwwssGoi_9U,18111
9
+ dtflow/schema.py,sha256=IFcij22_UFKcgKT1YWwRg2QJO0vcAvCb1arZmsGByts,16824
10
+ dtflow/streaming.py,sha256=dxpNd1-Wz_PTLTdvM5qn06_2TJr5NRlIIuw0LOSS2Iw,24755
11
+ dtflow/tokenizers.py,sha256=7ZAelSmcDxLWH5kICgH9Q1ULH3_BfDZb9suHMjJJRZU,20589
10
12
  dtflow/cli/__init__.py,sha256=QhZ-thgx9IBTFII7T_hdoWFUl0CCsdGQHN5ZEZw2XB0,423
11
13
  dtflow/cli/clean.py,sha256=y9VCRibgK1j8WIY3h0XZX0m93EdELQC7TdnseMWwS-0,17799
12
- dtflow/cli/commands.py,sha256=ExcD8Z_uXQhcewvgcPtIlPzsQG4QF93K8Bg6C3uUJHk,1094
13
- dtflow/cli/common.py,sha256=FsDFVNcLj_874qSg2dGef4V7mqPU9THLchT8PxJpBt8,12955
14
+ dtflow/cli/commands.py,sha256=ST65Ox_MKu-CKAtPVaxECAPXYOJiF7BhL32A4nsZZl0,1175
15
+ dtflow/cli/common.py,sha256=nIPc9GBK61r6kmaI9OS3IyhcfPqShpDEHx1ddjFPnlM,13131
14
16
  dtflow/cli/io_ops.py,sha256=BMDisP6dxzzmSjYwmeFwaHmpHHPqirmXAWeNTD-9MQM,13254
15
17
  dtflow/cli/lineage.py,sha256=_lNh35nF9AA0Zy6FyZ4g8IzrXH2ZQnp3inF-o2Hs1pw,1383
16
18
  dtflow/cli/pipeline.py,sha256=QNEo-BJlaC1CVnVeRZr7TwfuZYloJ4TebIzJ5ALzry0,1426
17
19
  dtflow/cli/sample.py,sha256=vPTQlF0OXEry4QjO8uaD9vOae4AQbX9zDwVYOxg59ZI,10339
18
- dtflow/cli/stats.py,sha256=HByF0sFMqY1kM75dnjTcJbMKDdQNdOt4iDba4au_-pI,20495
20
+ dtflow/cli/stats.py,sha256=u4ehCfgw1X8WuOyAjrApMRgcIO3BVmINbsTjxEscQro,24086
19
21
  dtflow/cli/transform.py,sha256=w6xqMOxPxQvL2u_BPCfpDHuPSC9gmcqMPVN8s-B6bbY,15052
22
+ dtflow/cli/validate.py,sha256=65aGVlMS_Rq0Ch0YQ-TclVJ03RQP4CnG137wthzb8Ao,4384
20
23
  dtflow/mcp/__init__.py,sha256=huEJ3rXDbxDRjsLPEvjNT2u3tWs6Poiv6fokPIrByjw,897
21
24
  dtflow/mcp/__main__.py,sha256=PoT2ZZmJq9xDZxDACJfqDW9Ld_ukHrGNK-0XUd7WGnY,448
22
25
  dtflow/mcp/cli.py,sha256=ck0oOS_642cNktxULaMRE7BJfMxsBCwotmCj3PSPwVk,13110
@@ -26,8 +29,8 @@ dtflow/storage/__init__.py,sha256=C0jpWNQU808Ezz7lWneddABal3wILy8ijFUNiSKbHV4,36
26
29
  dtflow/storage/io.py,sha256=ZH2aSE-S89gpy3z4oTqhcqWf4u10OdkDoyul7o_YBDI,23374
27
30
  dtflow/utils/__init__.py,sha256=f8v9HJZMWRI5AL64Vjr76Pf2Na_whOF9nJBKgPbXXYg,429
28
31
  dtflow/utils/display.py,sha256=OeOdTh6mbDwSkDWlmkjfpTjy2QG8ZUaYU0NpHUWkpEQ,5881
29
- dtflow/utils/field_path.py,sha256=WcNA-LZh3H61a77FEzB_R7YAyyZl3M8ofdq05ytQGmI,7459
30
- dtflow-0.4.3.dist-info/METADATA,sha256=LdjtTbPEuHlqxthUuMX2lqrj2l2kBIfVf-u6jDZmCeI,18524
31
- dtflow-0.4.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
32
- dtflow-0.4.3.dist-info/entry_points.txt,sha256=dadIDOK7Iu9pMxnMPBfpb4aAPe4hQbBOshpQYjVYpGc,44
33
- dtflow-0.4.3.dist-info/RECORD,,
32
+ dtflow/utils/field_path.py,sha256=K8nU196RxTSJ1OoieTWGcYOWl9KjGq2iSxCAkfjECuM,7621
33
+ dtflow-0.5.2.dist-info/METADATA,sha256=RlpGaySrAIgTviom_Wyn6o2LWzQQVihff12Jpazy10o,22544
34
+ dtflow-0.5.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
35
+ dtflow-0.5.2.dist-info/entry_points.txt,sha256=dadIDOK7Iu9pMxnMPBfpb4aAPe4hQbBOshpQYjVYpGc,44
36
+ dtflow-0.5.2.dist-info/RECORD,,
File without changes