robo-lib 0.0.11__tar.gz → 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- robo_lib-1.0.1/.gitignore +1 -0
- {robo_lib-0.0.11 → robo_lib-1.0.1}/PKG-INFO +8 -17
- {robo_lib-0.0.11 → robo_lib-1.0.1}/README.md +6 -16
- {robo_lib-0.0.11 → robo_lib-1.0.1}/pyproject.toml +2 -2
- {robo_lib-0.0.11 → robo_lib-1.0.1}/robo_lib/__init__.py +2 -3
- {robo_lib-0.0.11 → robo_lib-1.0.1}/robo_lib/components.py +246 -269
- robo_lib-1.0.1/tests/test_data_processor.py +82 -0
- robo_lib-1.0.1/tests/test_functions.py +176 -0
- robo_lib-1.0.1/tests/test_robo_constructor.py +130 -0
- robo_lib-1.0.1/tests/test_tokenizer_constructor.py +89 -0
- {robo_lib-0.0.11 → robo_lib-1.0.1}/LICENSE +0 -0
- {robo_lib-0.0.11 → robo_lib-1.0.1}/tests/__init__.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
__pycache__/
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: robo_lib
|
3
|
-
Version:
|
3
|
+
Version: 1.0.1
|
4
4
|
Summary: A package to create, configure, and train transformer models.
|
5
5
|
Project-URL: Homepage, https://github.com/hamburgerfish/robo_pack
|
6
6
|
Project-URL: Issues, https://github.com/hamburgerfish/robo_pack/issues
|
@@ -13,6 +13,7 @@ Requires-Python: >=3.8
|
|
13
13
|
Requires-Dist: numpy
|
14
14
|
Requires-Dist: tokenizers
|
15
15
|
Requires-Dist: torch
|
16
|
+
Requires-Dist: typing
|
16
17
|
Description-Content-Type: text/markdown
|
17
18
|
|
18
19
|
# robo-lib
|
@@ -83,10 +84,8 @@ proc.process_list(
|
|
83
84
|
save_path="data/training",
|
84
85
|
dec_data=french_train,
|
85
86
|
dec_max_block_size=100,
|
86
|
-
dec_block_size_exceeded_policy="skip",
|
87
87
|
enc_data=english_train,
|
88
|
-
enc_max_block_size=100
|
89
|
-
enc_block_size_exceeded_policy="skip"
|
88
|
+
enc_max_block_size=100
|
90
89
|
)
|
91
90
|
|
92
91
|
# process and save validation data as data/validation*.pt
|
@@ -94,10 +93,8 @@ proc.process_list(
|
|
94
93
|
save_path="data/validation",
|
95
94
|
dec_data=french_val,
|
96
95
|
dec_max_block_size=100,
|
97
|
-
dec_block_size_exceeded_policy="skip",
|
98
96
|
enc_data=english_val,
|
99
|
-
enc_max_block_size=100
|
100
|
-
enc_block_size_exceeded_policy="skip"
|
97
|
+
enc_max_block_size=100
|
101
98
|
)
|
102
99
|
```
|
103
100
|
- The `RoboConstructor` class is used to create and configure transformer models before trainin.
|
@@ -128,14 +125,8 @@ robo.train_robo(
|
|
128
125
|
max_iters=20000,
|
129
126
|
eval_interval=200,
|
130
127
|
batch_size=128,
|
131
|
-
|
132
|
-
|
133
|
-
dec_training_masks_path="data/training_decoder_mask_data.pt",
|
134
|
-
dec_eval_masks_path="data/validation_decoder_mask_data.pt",
|
135
|
-
enc_training_path="data/training_encoder_data.pt",
|
136
|
-
enc_eval_path="data/validation_encoder_data.pt",
|
137
|
-
enc_training_masks_path="data/training_encoder_mask_data.pt",
|
138
|
-
enc_eval_masks_path="data/validation_encoder_mask_data.pt",
|
128
|
+
training_dir_path="data/training",
|
129
|
+
eval_dir_path="data/validation",
|
139
130
|
dec_tokenizer=decoder_tok,
|
140
131
|
save_path="models/eng_to_fr_robo.pkl"
|
141
132
|
)
|
@@ -223,8 +214,8 @@ robo.train(
|
|
223
214
|
max_iters=20000,
|
224
215
|
eval_interval=200,
|
225
216
|
batch_size=64,
|
226
|
-
|
227
|
-
|
217
|
+
training_dir_path="data/shakespeare_train",
|
218
|
+
eval_dir_path="data/shakespeare_valid",
|
228
219
|
dec_tokenizer=tok,
|
229
220
|
save_path="models/shakespeare_robo.pkl"
|
230
221
|
)
|
@@ -66,10 +66,8 @@ proc.process_list(
|
|
66
66
|
save_path="data/training",
|
67
67
|
dec_data=french_train,
|
68
68
|
dec_max_block_size=100,
|
69
|
-
dec_block_size_exceeded_policy="skip",
|
70
69
|
enc_data=english_train,
|
71
|
-
enc_max_block_size=100
|
72
|
-
enc_block_size_exceeded_policy="skip"
|
70
|
+
enc_max_block_size=100
|
73
71
|
)
|
74
72
|
|
75
73
|
# process and save validation data as data/validation*.pt
|
@@ -77,10 +75,8 @@ proc.process_list(
|
|
77
75
|
save_path="data/validation",
|
78
76
|
dec_data=french_val,
|
79
77
|
dec_max_block_size=100,
|
80
|
-
dec_block_size_exceeded_policy="skip",
|
81
78
|
enc_data=english_val,
|
82
|
-
enc_max_block_size=100
|
83
|
-
enc_block_size_exceeded_policy="skip"
|
79
|
+
enc_max_block_size=100
|
84
80
|
)
|
85
81
|
```
|
86
82
|
- The `RoboConstructor` class is used to create and configure transformer models before trainin.
|
@@ -111,14 +107,8 @@ robo.train_robo(
|
|
111
107
|
max_iters=20000,
|
112
108
|
eval_interval=200,
|
113
109
|
batch_size=128,
|
114
|
-
|
115
|
-
|
116
|
-
dec_training_masks_path="data/training_decoder_mask_data.pt",
|
117
|
-
dec_eval_masks_path="data/validation_decoder_mask_data.pt",
|
118
|
-
enc_training_path="data/training_encoder_data.pt",
|
119
|
-
enc_eval_path="data/validation_encoder_data.pt",
|
120
|
-
enc_training_masks_path="data/training_encoder_mask_data.pt",
|
121
|
-
enc_eval_masks_path="data/validation_encoder_mask_data.pt",
|
110
|
+
training_dir_path="data/training",
|
111
|
+
eval_dir_path="data/validation",
|
122
112
|
dec_tokenizer=decoder_tok,
|
123
113
|
save_path="models/eng_to_fr_robo.pkl"
|
124
114
|
)
|
@@ -206,8 +196,8 @@ robo.train(
|
|
206
196
|
max_iters=20000,
|
207
197
|
eval_interval=200,
|
208
198
|
batch_size=64,
|
209
|
-
|
210
|
-
|
199
|
+
training_dir_path="data/shakespeare_train",
|
200
|
+
eval_dir_path="data/shakespeare_valid",
|
211
201
|
dec_tokenizer=tok,
|
212
202
|
save_path="models/shakespeare_robo.pkl"
|
213
203
|
)
|
@@ -4,14 +4,14 @@ build-backend = "hatchling.build"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "robo_lib"
|
7
|
-
version = "
|
7
|
+
version = "1.0.1"
|
8
8
|
authors = [
|
9
9
|
{ name="Erik Papp", email="erik3papp@gmail.com" },
|
10
10
|
]
|
11
11
|
description = "A package to create, configure, and train transformer models."
|
12
12
|
readme = "README.md"
|
13
13
|
requires-python = ">=3.8"
|
14
|
-
dependencies = ["torch", "tokenizers", "numpy"]
|
14
|
+
dependencies = ["torch", "tokenizers", "numpy", "typing"]
|
15
15
|
classifiers = [
|
16
16
|
"Programming Language :: Python :: 3",
|
17
17
|
"License :: OSI Approved :: MIT License",
|
@@ -1,8 +1,7 @@
|
|
1
1
|
from .components import TokenizerConstructor as TokenizerConstructor
|
2
2
|
from .components import create_mask as create_mask
|
3
|
-
from .components import
|
4
|
-
from .components import
|
5
|
-
from .components import scan_max_block_size as scan_max_block_size
|
3
|
+
from .components import pre_process_data as pre_process_data
|
4
|
+
from .components import safe_stack as safe_stack
|
6
5
|
from .components import DataProcessor as DataProcessor
|
7
6
|
from .components import get_valid_samples as get_valid_samples
|
8
7
|
from .components import get_batch as get_batch
|