robo-lib 0.0.11__tar.gz → 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ __pycache__/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: robo_lib
3
- Version: 0.0.11
3
+ Version: 1.0.1
4
4
  Summary: A package to create, configure, and train transformer models.
5
5
  Project-URL: Homepage, https://github.com/hamburgerfish/robo_pack
6
6
  Project-URL: Issues, https://github.com/hamburgerfish/robo_pack/issues
@@ -13,6 +13,7 @@ Requires-Python: >=3.8
13
13
  Requires-Dist: numpy
14
14
  Requires-Dist: tokenizers
15
15
  Requires-Dist: torch
16
+ Requires-Dist: typing
16
17
  Description-Content-Type: text/markdown
17
18
 
18
19
  # robo-lib
@@ -83,10 +84,8 @@ proc.process_list(
83
84
  save_path="data/training",
84
85
  dec_data=french_train,
85
86
  dec_max_block_size=100,
86
- dec_block_size_exceeded_policy="skip",
87
87
  enc_data=english_train,
88
- enc_max_block_size=100,
89
- enc_block_size_exceeded_policy="skip"
88
+ enc_max_block_size=100
90
89
  )
91
90
 
92
91
  # process and save validation data as data/validation*.pt
@@ -94,10 +93,8 @@ proc.process_list(
94
93
  save_path="data/validation",
95
94
  dec_data=french_val,
96
95
  dec_max_block_size=100,
97
- dec_block_size_exceeded_policy="skip",
98
96
  enc_data=english_val,
99
- enc_max_block_size=100,
100
- enc_block_size_exceeded_policy="skip"
97
+ enc_max_block_size=100
101
98
  )
102
99
  ```
103
100
  - The `RoboConstructor` class is used to create and configure transformer models before trainin.
@@ -128,14 +125,8 @@ robo.train_robo(
128
125
  max_iters=20000,
129
126
  eval_interval=200,
130
127
  batch_size=128,
131
- dec_training_path="data/training_decoder_data.pt",
132
- dec_eval_path="data/validation_decoder_data.pt",
133
- dec_training_masks_path="data/training_decoder_mask_data.pt",
134
- dec_eval_masks_path="data/validation_decoder_mask_data.pt",
135
- enc_training_path="data/training_encoder_data.pt",
136
- enc_eval_path="data/validation_encoder_data.pt",
137
- enc_training_masks_path="data/training_encoder_mask_data.pt",
138
- enc_eval_masks_path="data/validation_encoder_mask_data.pt",
128
+ training_dir_path="data/training",
129
+ eval_dir_path="data/validation",
139
130
  dec_tokenizer=decoder_tok,
140
131
  save_path="models/eng_to_fr_robo.pkl"
141
132
  )
@@ -223,8 +214,8 @@ robo.train(
223
214
  max_iters=20000,
224
215
  eval_interval=200,
225
216
  batch_size=64,
226
- dec_training_path="data/shakespeare_train_decoder_data.pt",
227
- dec_eval_path="data/shakespeare_valid_decoder_data.pt",
217
+ training_dir_path="data/shakespeare_train",
218
+ eval_dir_path="data/shakespeare_valid",
228
219
  dec_tokenizer=tok,
229
220
  save_path="models/shakespeare_robo.pkl"
230
221
  )
@@ -66,10 +66,8 @@ proc.process_list(
66
66
  save_path="data/training",
67
67
  dec_data=french_train,
68
68
  dec_max_block_size=100,
69
- dec_block_size_exceeded_policy="skip",
70
69
  enc_data=english_train,
71
- enc_max_block_size=100,
72
- enc_block_size_exceeded_policy="skip"
70
+ enc_max_block_size=100
73
71
  )
74
72
 
75
73
  # process and save validation data as data/validation*.pt
@@ -77,10 +75,8 @@ proc.process_list(
77
75
  save_path="data/validation",
78
76
  dec_data=french_val,
79
77
  dec_max_block_size=100,
80
- dec_block_size_exceeded_policy="skip",
81
78
  enc_data=english_val,
82
- enc_max_block_size=100,
83
- enc_block_size_exceeded_policy="skip"
79
+ enc_max_block_size=100
84
80
  )
85
81
  ```
86
82
  - The `RoboConstructor` class is used to create and configure transformer models before trainin.
@@ -111,14 +107,8 @@ robo.train_robo(
111
107
  max_iters=20000,
112
108
  eval_interval=200,
113
109
  batch_size=128,
114
- dec_training_path="data/training_decoder_data.pt",
115
- dec_eval_path="data/validation_decoder_data.pt",
116
- dec_training_masks_path="data/training_decoder_mask_data.pt",
117
- dec_eval_masks_path="data/validation_decoder_mask_data.pt",
118
- enc_training_path="data/training_encoder_data.pt",
119
- enc_eval_path="data/validation_encoder_data.pt",
120
- enc_training_masks_path="data/training_encoder_mask_data.pt",
121
- enc_eval_masks_path="data/validation_encoder_mask_data.pt",
110
+ training_dir_path="data/training",
111
+ eval_dir_path="data/validation",
122
112
  dec_tokenizer=decoder_tok,
123
113
  save_path="models/eng_to_fr_robo.pkl"
124
114
  )
@@ -206,8 +196,8 @@ robo.train(
206
196
  max_iters=20000,
207
197
  eval_interval=200,
208
198
  batch_size=64,
209
- dec_training_path="data/shakespeare_train_decoder_data.pt",
210
- dec_eval_path="data/shakespeare_valid_decoder_data.pt",
199
+ training_dir_path="data/shakespeare_train",
200
+ eval_dir_path="data/shakespeare_valid",
211
201
  dec_tokenizer=tok,
212
202
  save_path="models/shakespeare_robo.pkl"
213
203
  )
@@ -4,14 +4,14 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "robo_lib"
7
- version = "0.0.11"
7
+ version = "1.0.1"
8
8
  authors = [
9
9
  { name="Erik Papp", email="erik3papp@gmail.com" },
10
10
  ]
11
11
  description = "A package to create, configure, and train transformer models."
12
12
  readme = "README.md"
13
13
  requires-python = ">=3.8"
14
- dependencies = ["torch", "tokenizers", "numpy"]
14
+ dependencies = ["torch", "tokenizers", "numpy", "typing"]
15
15
  classifiers = [
16
16
  "Programming Language :: Python :: 3",
17
17
  "License :: OSI Approved :: MIT License",
@@ -1,8 +1,7 @@
1
1
  from .components import TokenizerConstructor as TokenizerConstructor
2
2
  from .components import create_mask as create_mask
3
- from .components import pad as pad
4
- from .components import process_row as process_row
5
- from .components import scan_max_block_size as scan_max_block_size
3
+ from .components import pre_process_data as pre_process_data
4
+ from .components import safe_stack as safe_stack
6
5
  from .components import DataProcessor as DataProcessor
7
6
  from .components import get_valid_samples as get_valid_samples
8
7
  from .components import get_batch as get_batch