dtflow 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dtflow/__init__.py CHANGED
@@ -7,61 +7,88 @@ DataTransformer: 简洁的数据格式转换工具
7
7
  - tokenizers: Token 统计和过滤
8
8
  - converters: HuggingFace/OpenAI 等格式转换
9
9
  """
10
+
11
+ from .converters import ( # LLaMA-Factory 扩展; ms-swift
12
+ from_hf_dataset,
13
+ from_openai_batch,
14
+ messages_to_text,
15
+ to_axolotl,
16
+ to_hf_chat_format,
17
+ to_hf_dataset,
18
+ to_llama_factory,
19
+ to_llama_factory_sharegpt,
20
+ to_llama_factory_vlm,
21
+ to_llama_factory_vlm_sharegpt,
22
+ to_openai_batch,
23
+ to_swift_messages,
24
+ to_swift_query_response,
25
+ to_swift_vlm,
26
+ )
10
27
  from .core import DataTransformer, DictWrapper, TransformError, TransformErrors
11
28
  from .presets import get_preset, list_presets
12
- from .storage import save_data, load_data, sample_file
29
+ from .storage import load_data, sample_file, save_data
30
+ from .streaming import StreamingTransformer, load_sharded, load_stream, process_shards
13
31
  from .tokenizers import (
14
- count_tokens, token_counter, token_filter, token_stats,
15
- messages_token_counter, messages_token_filter, messages_token_stats,
16
- )
17
- from .converters import (
18
- to_hf_dataset, from_hf_dataset, to_hf_chat_format,
19
- from_openai_batch, to_openai_batch,
20
- to_llama_factory, to_axolotl, messages_to_text,
21
- # LLaMA-Factory 扩展
22
- to_llama_factory_sharegpt, to_llama_factory_vlm, to_llama_factory_vlm_sharegpt,
23
- # ms-swift
24
- to_swift_messages, to_swift_query_response, to_swift_vlm,
32
+ DEFAULT_MODEL,
33
+ MODEL_ALIASES,
34
+ OPENAI_MODELS,
35
+ count_tokens,
36
+ messages_token_counter,
37
+ messages_token_filter,
38
+ messages_token_stats,
39
+ resolve_model,
40
+ token_counter,
41
+ token_filter,
42
+ token_stats,
25
43
  )
26
44
 
27
- __version__ = '0.3.0'
45
+ __version__ = "0.3.2"
28
46
 
29
47
  __all__ = [
30
48
  # core
31
- 'DataTransformer',
32
- 'DictWrapper',
33
- 'TransformError',
34
- 'TransformErrors',
49
+ "DataTransformer",
50
+ "DictWrapper",
51
+ "TransformError",
52
+ "TransformErrors",
35
53
  # presets
36
- 'get_preset',
37
- 'list_presets',
54
+ "get_preset",
55
+ "list_presets",
38
56
  # storage
39
- 'save_data',
40
- 'load_data',
41
- 'sample_file',
57
+ "save_data",
58
+ "load_data",
59
+ "sample_file",
42
60
  # tokenizers
43
- 'count_tokens',
44
- 'token_counter',
45
- 'token_filter',
46
- 'token_stats',
47
- 'messages_token_counter',
48
- 'messages_token_filter',
49
- 'messages_token_stats',
61
+ "count_tokens",
62
+ "token_counter",
63
+ "token_filter",
64
+ "token_stats",
65
+ "messages_token_counter",
66
+ "messages_token_filter",
67
+ "messages_token_stats",
68
+ "DEFAULT_MODEL",
69
+ "MODEL_ALIASES",
70
+ "OPENAI_MODELS",
71
+ "resolve_model",
50
72
  # converters
51
- 'to_hf_dataset',
52
- 'from_hf_dataset',
53
- 'to_hf_chat_format',
54
- 'from_openai_batch',
55
- 'to_openai_batch',
56
- 'to_llama_factory',
57
- 'to_axolotl',
58
- 'messages_to_text',
73
+ "to_hf_dataset",
74
+ "from_hf_dataset",
75
+ "to_hf_chat_format",
76
+ "from_openai_batch",
77
+ "to_openai_batch",
78
+ "to_llama_factory",
79
+ "to_axolotl",
80
+ "messages_to_text",
59
81
  # LLaMA-Factory 扩展
60
- 'to_llama_factory_sharegpt',
61
- 'to_llama_factory_vlm',
62
- 'to_llama_factory_vlm_sharegpt',
82
+ "to_llama_factory_sharegpt",
83
+ "to_llama_factory_vlm",
84
+ "to_llama_factory_vlm_sharegpt",
63
85
  # ms-swift
64
- 'to_swift_messages',
65
- 'to_swift_query_response',
66
- 'to_swift_vlm',
86
+ "to_swift_messages",
87
+ "to_swift_query_response",
88
+ "to_swift_vlm",
89
+ # streaming
90
+ "StreamingTransformer",
91
+ "load_stream",
92
+ "load_sharded",
93
+ "process_shards",
67
94
  ]