gptmodel 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gptmodel/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from .gptmodel import *
gptmodel/gptmodel.py ADDED
@@ -0,0 +1,476 @@
1
+ # This is a standard code of a GPT (Generative Pre-trained Transformer) model, developed by Sapiens Technology®️,
2
+ # which faithfully follows the mathematical structure of the article “Attention Is All You Need” for the construction of the Transformer architecture
3
+ # used in the pattern recognition of the model that is saved. Some optimizations that do not influence the Transformer architecture
4
+ # were applied only to facilitate the adjustments of the parameters and variables of the training, saving, loading, fine-tuning and inference of the pre-trained model.
5
+ # --------------------------> A SAPIENS TECHNOLOGY®️ PRODUCTION) <--------------------------
6
+ class GPTModel:
7
+ def __init__(self, embedding_dim=384, block_size=500, batch_size=32, number_heads=6, number_layers=6, dropout=0.1, learning_rate=3e-4, eval_interval=500, epochs=2000):
8
+ self.__embedding_dim = max((1, int(embedding_dim))) if type(embedding_dim) in (bool, int, float) else 384
9
+ self.__block_size = max((1, int(block_size))) if type(block_size) in (bool, int, float) else 500
10
+ self.__batch_size = max((1, int(batch_size))) if type(batch_size) in (bool, int, float) else 32
11
+ self.__number_heads = max((1, int(number_heads))) if type(number_heads) in (bool, int, float) else 6
12
+ self.__number_layers = max((1, int(number_layers))) if type(number_layers) in (bool, int, float) else 6
13
+ self.dropout = max((0, float(dropout))) if type(dropout) in (bool, int, float) else 0.1
14
+ self.__learning_rate = max((0, float(learning_rate))) if type(learning_rate) in (bool, int, float) else 3e-4
15
+ self.__eval_interval = max((1, int(eval_interval))) if type(eval_interval) in (bool, int, float) else 500
16
+ self.__epochs = max((1, int(epochs))) if type(epochs) in (bool, int, float) else 2000
17
+ from torch import cuda, device, backends
18
+ from torch.utils.data import Dataset, DataLoader
19
+ from torch.nn import Module, functional as Function, utils
20
+ from torch import nn as artificial_neural_network, triu, ones
21
+ from torch import tensor, no_grad, int64, multinomial, cat, topk, where, sort, cumsum, zeros_like, bool as torch_bool, save, load
22
+ from tiktoken import get_encoding
23
+ from json import load as json_load
24
+ from torch import optim
25
+ from tqdm import tqdm
26
+ from os import path as os_path, makedirs as os_makedirs
27
+ if cuda.is_available(): local_device = device('cuda')
28
+ elif backends.mps.is_available(): local_device = device('mps')
29
+ else: local_device = device('cpu')
30
+ self.__Dataset = Dataset
31
+ self.__Module = Module
32
+ self.__neural_network = artificial_neural_network
33
+ self.__tensor = tensor
34
+ self.__triu = triu
35
+ self.__ones = ones
36
+ self.__no_grad = no_grad
37
+ self.__device = local_device
38
+ self.__Function = Function
39
+ self.__int64 = int64
40
+ self.__multinomial = multinomial
41
+ self.__cat = cat
42
+ self.__topk = topk
43
+ self.__where = where
44
+ self.__sort = sort
45
+ self.__cumsum = cumsum
46
+ self.__zeros_like = zeros_like
47
+ self.__bool = torch_bool
48
+ self.__get_encoding = get_encoding
49
+ self.__json_load = json_load
50
+ self.__DataLoader = DataLoader
51
+ self.__optim = optim
52
+ self.__utils = utils
53
+ self.__tqdm = tqdm
54
+ self.__os_path = os_path
55
+ self.__os_makedirs = os_makedirs
56
+ self.__save = save
57
+ self.__load = load
58
+ self.__model = None
59
+ self.__encode = None
60
+ self.__decode = None
61
+ self.__end_tag = None
62
+ self.__string = ''
63
+ self.__vocab_size = 0
64
+ self.__char_to_idx = {}
65
+ self.__idx_to_char = {}
66
+ self.__tokenizer = 'gpt'
67
+ self.__optimizer = None
68
+ self.__train = False
69
+ self.parameters_number = 0
70
+ class TextDataset(self.__Dataset):
71
+ def __init__(self, data={}, block_size=0): self.data, self.block_size = data, block_size
72
+ def __len__(self): return len(self.data) - self.block_size
73
+ def __getitem__(self, index=0):
74
+ input_sequence = self.data[index:index + self.block_size]
75
+ target_sequence = self.data[index + 1:index + self.block_size + 1]
76
+ return input_sequence, target_sequence
77
+ class Transformer(self.__Module):
78
+ def __init__(self, outer=None, vocab_size=0, embedding_dim=0, number_heads=0, number_layers=0, dropout=None, block_size=0):
79
+ super().__init__()
80
+ self.outer = outer
81
+ self.positional_encoding = outer._GPTModel__neural_network.Parameter(outer._GPTModel__tensor([]).new_zeros(1, block_size, embedding_dim))
82
+ self.dropout = outer._GPTModel__neural_network.Dropout(dropout)
83
+ self.input_embedding = outer._GPTModel__neural_network.Embedding(vocab_size, embedding_dim)
84
+ self.multi_head_attention = outer._GPTModel__neural_network.TransformerDecoder(outer._GPTModel__neural_network.TransformerDecoderLayer(d_model=embedding_dim, nhead=number_heads, dropout=dropout), num_layers=number_layers)
85
+ self.output_function = outer._GPTModel__neural_network.Linear(embedding_dim, vocab_size)
86
+ self.block_size = block_size
87
+ def forward(self, input_tensor=[]):
88
+ outer = self.outer
89
+ batch_size, sequence_length = input_tensor.size()
90
+ positions = self.positional_encoding[:, :sequence_length, :].to(input_tensor.device)
91
+ output_embedding = self.dropout(self.input_embedding(input_tensor) + positions)
92
+ transposed = output_embedding.transpose(0, 1)
93
+ masked_multi_head_attention = outer._GPTModel__triu(outer._GPTModel__ones(sequence_length, sequence_length, device=input_tensor.device) * float('-inf'), diagonal=1)
94
+ add_and_norm = self.multi_head_attention(transposed, transposed, tgt_mask=masked_multi_head_attention)
95
+ add_and_norm = add_and_norm.transpose(0, 1)
96
+ return self.output_function(add_and_norm)
97
+ self.__TextDatasets = TextDataset
98
+ self.__Transformers = Transformer
99
+ def __compute_loss(self, loader=[]):
100
+ self.__model.eval()
101
+ total_loss = 0
102
+ with self.__no_grad():
103
+ for input_batch, target_batch in loader:
104
+ input_batch, target_batch = input_batch.to(self.__device), target_batch.to(self.__device)
105
+ logits = self.__model(input_batch)
106
+ loss = self.__Function.cross_entropy(logits.view(-1, logits.size(-1)), target_batch.view(-1))
107
+ total_loss += loss.item()
108
+ return total_loss / len(loader)
109
+ def __format_params(self, number_params=0):
110
+ if number_params < 1_000: return f'{number_params}U'
111
+ elif number_params < 1_000_000: return f'{number_params // 1_000}K'
112
+ elif number_params < 1_000_000_000: return f'{number_params // 1_000_000}M'
113
+ elif number_params < 1_000_000_000_000: return f'{number_params // 1_000_000_000}B'
114
+ else: return f'{number_params // 1_000_000_000_000}T'
115
+ def __get_found_end_tag(self, decoded_token='', decoded_tokens='', limits=[]):
116
+ if self.__end_tag is None: return False
117
+ decoded_token, decoded_tokens, limits = str(decoded_token).strip(), str(decoded_tokens).strip(), list(limits)
118
+ for limit in ['']+limits+[' ']:
119
+ if decoded_token.endswith(limit+self.__end_tag) or decoded_tokens.endswith(limit+self.__end_tag): return True
120
+ elif decoded_token.endswith(limit+self.__end_tag[0]) or decoded_tokens.endswith(limit+self.__end_tag[0]): return True
121
+ return False
122
+ def __generate_tokens_x(self, prompt='', max_tokens=500, temperature=1.0):
123
+ self.__model.eval()
124
+ encoded_prompt = self.__encode(prompt)
125
+ input_tensor = self.__tensor(encoded_prompt, dtype=self.__int64).unsqueeze(0).to(self.__device)
126
+ limits = ('.', '\n', '!', '?', ';')
127
+ with self.__no_grad():
128
+ tokens_generated, decoded_tokens = 0, ''
129
+ while True:
130
+ conditioned_input = input_tensor[:, -self.__block_size:] if input_tensor.size(1) > self.__block_size else input_tensor
131
+ logits = self.__model(conditioned_input)
132
+ logits = logits[:, -1, :] / temperature
133
+ output_probabilities = self.__Function.softmax(logits, dim=-1)
134
+ shifted_right = self.__multinomial(output_probabilities, num_samples=1)
135
+ input_tensor = self.__cat((input_tensor, shifted_right), dim=1)
136
+ token = shifted_right.item()
137
+ decoded_token, found_end_tag = self.__decode([token]), False
138
+ if tokens_generated == 0 and '\n' in decoded_token: continue
139
+ tokens_generated += 1
140
+ decoded_tokens += decoded_token
141
+ found_end_tag = self.__get_found_end_tag(decoded_token=decoded_token, decoded_tokens=decoded_tokens, limits=limits)
142
+ if found_end_tag and decoded_token.endswith(self.__end_tag[0]): decoded_token = decoded_token[:-1]
143
+ yield decoded_token
144
+ if found_end_tag or ((tokens_generated >= max_tokens) and (decoded_token[-1] in limits)) or (tokens_generated >= (max_tokens*2)): break
145
+ def __generate_tokens_y(self, prompt='', max_tokens=500, temperature=1.0, top_k=50, top_p=0.9):
146
+ self.__model.eval()
147
+ encoded_prompt = self.__encode(prompt)
148
+ input_tensor = self.__tensor(encoded_prompt, dtype=self.__int64).unsqueeze(0).to(self.__device)
149
+ limits = ('.', '\n', '!', '?', ';')
150
+ with self.__no_grad():
151
+ tokens_generated, decoded_tokens = 0, ''
152
+ while True:
153
+ conditioned_input = (input_tensor[:, -self.__block_size:] if input_tensor.size(1) > self.__block_size else input_tensor)
154
+ logits = self.__model(conditioned_input)
155
+ logits = logits[:, -1, :] / temperature
156
+ if top_k > 0:
157
+ top_k = min(top_k, logits.size(-1))
158
+ value, _ = self.__topk(logits, top_k)
159
+ thresh = value[:, -1].unsqueeze(-1)
160
+ logits = self.__where(logits < thresh, self.__tensor(float('-inf')).to(logits), logits)
161
+ if top_p < 1.0:
162
+ sorted_logits, sorted_index = self.__sort(logits, dim=-1, descending=True)
163
+ sorted_probabilities = self.__Function.softmax(sorted_logits, dim=-1)
164
+ cumulative_probabilities = self.__cumsum(sorted_probabilities, dim=-1)
165
+ sorted_mask = cumulative_probabilities > top_p
166
+ sorted_mask[:, 0] = False
167
+ mask = self.__zeros_like(logits, dtype=self.__bool)
168
+ mask.scatter_(-1, sorted_index, sorted_mask)
169
+ logits = logits.masked_fill(mask, float('-inf'))
170
+ output_probabilities = self.__Function.softmax(logits, dim=-1)
171
+ shifted_right = self.__multinomial(output_probabilities, num_samples=1)
172
+ input_tensor = self.__cat((input_tensor, shifted_right), dim=1)
173
+ token = shifted_right.item()
174
+ decoded_token, found_end_tag = self.__decode([token]), False
175
+ if tokens_generated == 0 and '\n' in decoded_token: continue
176
+ tokens_generated += 1
177
+ decoded_tokens += decoded_token
178
+ found_end_tag = self.__get_found_end_tag(decoded_token=decoded_token, decoded_tokens=decoded_tokens, limits=limits)
179
+ if found_end_tag and decoded_token.endswith(self.__end_tag[0]): decoded_token = decoded_token[:-1]
180
+ yield decoded_token
181
+ if found_end_tag or ((tokens_generated >= max_tokens) and (decoded_token[-1] in limits)) or (tokens_generated >= (max_tokens*2)): break
182
+ def __generate_tokens(self, prompt='', max_tokens=500, temperature=1.0, top_k=0, top_p=1.0):
183
+ prompt = '?' if len(str(prompt).strip()) < 1 else str(prompt).strip()
184
+ def get_last_n_tokens(text='', n=0):
185
+ if self.__tokenizer == 'sapi': return text[-n:]
186
+ else:
187
+ encoding = self.__get_encoding('gpt2')
188
+ tokens = encoding.encode(text)
189
+ last_n_tokens = tokens[-n:]
190
+ truncated_text = encoding.decode(last_n_tokens)
191
+ return truncated_text
192
+ prompt = get_last_n_tokens(text=prompt, n=self.__block_size)
193
+ if top_k > 0 or top_p < 1.0: return self.__generate_tokens_y(prompt=prompt, max_tokens=max_tokens, temperature=temperature, top_k=top_k, top_p=top_p)
194
+ else: return self.__generate_tokens_x(prompt=prompt, max_tokens=max_tokens, temperature=temperature)
195
+ def train(self, dataset_path='', string='', precision=0.9, tokenizer='gpt', context_window=500, end_tag=None, validate=0.0, progress=True):
196
+ try:
197
+ training_metrics = {'val_loss': 0.0, 'loss': 0.0, 'generalization_rate': 0.0, 'precision': 0.0}
198
+ dataset_path = str(dataset_path).strip()
199
+ string = str(string).strip()
200
+ precision = min((1.0, max((0.0, float(precision))))) if type(precision) in (bool, int, float) else 0.9
201
+ tokenizer = str(tokenizer).lower().strip()
202
+ self.__block_size = max((1, int(context_window))) if type(context_window) in (bool, int, float) else 500
203
+ if end_tag is not None and self.__end_tag is None: self.__end_tag = str(end_tag)
204
+ validate = min((1.0, max((0.0, float(validate))))) if type(validate) in (bool, int, float) else 0.0
205
+ progress = bool(progress) if type(progress) in (bool, int, float) else True
206
+ if tokenizer not in ('sapi', 'gpt'): tokenizer = 'gpt'
207
+ self.__string = str(self.__string+'\n\n'+string).strip()
208
+ loss_limit = min(1.0, max(0.0, 1.0 - precision))
209
+ is_txt, is_json, text_data = dataset_path.endswith('.txt'), dataset_path.endswith('.json'), ''
210
+ def prepare_json(json_data={}):
211
+ if type(json_data) == dict: pairs = json_data[list(json_data.keys())[0]]
212
+ else: pairs = json_data
213
+ if self.__end_tag is None: self.__end_tag = '<|end|>'
214
+ return '\n\n'.join([str(pair[list(pair.keys())[0]]+'\n'+pair[list(pair.keys())[1]]).replace(self.__end_tag, '').strip()+self.__end_tag for pair in pairs])
215
+ def is_web_address(url_path=''):
216
+ url_path = str(url_path).lower().strip()
217
+ return url_path.startswith('https://') or url_path.startswith('http://') or url_path.startswith('www.')
218
+ _is_web_address = is_web_address(url_path=dataset_path)
219
+ if _is_web_address:
220
+ is_json = True if '.json' in dataset_path.lower() else False
221
+ def read_remote_file(url_path=''):
222
+ from urllib.request import urlopen
223
+ with urlopen(url_path) as response: return str(response.read().decode('utf-8', errors='replace').replace('\r\n', '\n').replace('\r', '\n')).strip()
224
+ text_data = read_remote_file(url_path=dataset_path)
225
+ if is_json:
226
+ def load_json(string_content=''):
227
+ json_content = {}
228
+ string_content = str(string_content)
229
+ try:
230
+ from json import loads
231
+ json_content = loads(string_content)
232
+ except:
233
+ from ast import literal_eval
234
+ json_content = literal_eval(string_content)
235
+ return json_content
236
+ json_data = load_json(string_content=text_data)
237
+ text_data = prepare_json(json_data=json_data)
238
+ else:
239
+ if not is_txt and not is_json and len(self.__string) < 1: raise ValueError('Unsupported file format. Use .txt or .json.')
240
+ if is_txt:
241
+ with open(dataset_path, 'r', encoding='utf-8') as file: text_data = str(file.read()).strip()
242
+ elif is_json:
243
+ with open(dataset_path, 'r', encoding='utf-8') as file: json_data = self.__json_load(file)
244
+ text_data = prepare_json(json_data=json_data)
245
+ if len(self.__string) > 0: text_data += '\n\n' + self.__string
246
+ text_data = text_data.strip()
247
+ if tokenizer == 'sapi':
248
+ chars = sorted(list(set(text_data)))
249
+ self.__vocab_size = len(chars)
250
+ self.__char_to_idx = {char: index for index, char in enumerate(chars)}
251
+ self.__idx_to_char = {index: char for index, char in enumerate(chars)}
252
+ self.__encode = lambda string: [self.__char_to_idx[char] for char in string]
253
+ self.__decode = lambda indices: ''.join([self.__idx_to_char[index] for index in indices])
254
+ else:
255
+ encode = self.__get_encoding('gpt2')
256
+ self.__vocab_size = encode.n_vocab
257
+ self.__encode = encode.encode
258
+ self.__decode = encode.decode
259
+ data = self.__tensor(self.__encode(text_data), dtype=self.__int64)
260
+ if validate > 0:
261
+ split_point = int((1-validate) * len(data))
262
+ train_data, validation_data = data[:split_point], data[split_point:]
263
+ minimum_length = min(len(train_data), len(validation_data))
264
+ if minimum_length >= 2:
265
+ desired_block_size = int(context_window) if context_window else 500
266
+ self.__block_size = max(1, min(desired_block_size, minimum_length - 1))
267
+ else: self.__block_size = 1
268
+ else:
269
+ train_data = data
270
+ data_length = len(train_data)
271
+ self.__block_size = max(1, min(self.__block_size, data_length - 1))
272
+ self.__tokenizer = tokenizer
273
+ train_dataset = self.__TextDatasets(train_data, self.__block_size)
274
+ if validate > 0: validation_dataset = self.__TextDatasets(validation_data, self.__block_size)
275
+ train_loader = self.__DataLoader(train_dataset, batch_size=self.__batch_size, shuffle=True)
276
+ if validate > 0: validation_loader = self.__DataLoader(validation_dataset, batch_size=self.__batch_size, shuffle=False)
277
+ self.__model = self.__Transformers(self, self.__vocab_size, self.__embedding_dim, self.__number_heads, self.__number_layers, self.dropout, self.__block_size).to(self.__device)
278
+ self.__optimizer = self.__optim.AdamW(self.__model.parameters(), lr=self.__learning_rate)
279
+ scheduler, feed_forward = self.__optim.lr_scheduler.ReduceLROnPlateau(self.__optimizer, mode='min', factor=0.5, patience=3), True
280
+ Nx, last_validation_loss, step, best_val_loss = 0, 1.0, 0, float('inf')
281
+ string_precision = f'{precision:.4f}'.ljust(5, '0')
282
+ formatted_string = '{desc}: {percentage:3.0f}%|{bar:10}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt:>9}]'
283
+ while feed_forward:
284
+ self.__model.train()
285
+ loss_item, total_train_loss = 1.0, 1.0
286
+ epoch = str(Nx+1).rjust(10, '0')
287
+ for input_batch, target_batch in train_loader:
288
+ input_batch, target_batch = input_batch.to(self.__device), target_batch.to(self.__device)
289
+ logits = self.__model(input_batch)
290
+ loss = self.__Function.cross_entropy(logits.view(-1, logits.size(-1)), target_batch.view(-1))
291
+ self.__optimizer.zero_grad()
292
+ loss.backward()
293
+ self.__utils.clip_grad_norm_(self.__model.parameters(), 1.0)
294
+ self.__optimizer.step()
295
+ loss_item = loss.item()
296
+ total_train_loss += loss_item
297
+ last_validation_loss = validation_loss = self.__compute_loss(validation_loader) if validate > 0 else 1.0
298
+ training_metrics['generalization_rate'] = min((1.0, max((0.0, 1.0-validation_loss))))
299
+ if step > 0 and step % self.__eval_interval == 0:
300
+ scheduler.step(validation_loss)
301
+ if validation_loss < best_val_loss: best_val_loss = validation_loss
302
+ step += 1
303
+ current_precision = min(1.0, max(0.0, 1.0 - loss_item))
304
+ average_train_loss = total_train_loss / max((1, len(train_loader)))
305
+ if current_precision >= precision or average_train_loss <= loss_limit or Nx >= self.__epochs:
306
+ training_metrics['loss'] = loss_item if current_precision >= precision else average_train_loss
307
+ training_metrics['precision'] = current_precision
308
+ if progress:
309
+ description = f'Backpropagation epoch: {epoch} - current precision is '+f'{current_precision:.4f}'.ljust(5, '0')+f'; aiming for precision >= {string_precision} in training'
310
+ self.__tqdm(train_loader, desc=description, unit='it', unit_scale=True, unit_divisor=1000, smoothing=0.1, bar_format=formatted_string).update(len(train_loader))
311
+ print()
312
+ break
313
+ elif progress:
314
+ description = f'Backpropagation epoch: {epoch} - current precision is '+f'{current_precision:.4f}'.ljust(5, '0')+f'; aiming for precision >= {string_precision} in training'
315
+ train_loader = self.__tqdm(train_loader, desc=description, unit='it', unit_scale=True, unit_divisor=1000, smoothing=0.1, bar_format=formatted_string)
316
+ Nx += 1
317
+ training_metrics['val_loss'] = best_val_loss if best_val_loss < 1.0 else min((1.0, max((0.0, last_validation_loss))))
318
+ self.__train = True
319
+ return training_metrics
320
+ except Exception as error:
321
+ print('ERROR in train: ' + str(error))
322
+ try: return training_metrics
323
+ except: return {'val_loss': 1.0, 'loss': 1.0, 'generalization_rate': 0.0, 'precision': 0.0}
324
+ def saveModel(self, model_path='', progress=True):
325
+ try:
326
+ model_path = str(model_path).strip()
327
+ progress = bool(progress) if type(progress) in (bool, int, float) else True
328
+ if self.__model is None: raise ValueError('Model is not initialized. Call train or loadModel first.')
329
+ self.parameters_number = sum(parameters.numel() for parameters in self.__model.parameters())
330
+ formatted_params = self.__format_params(self.parameters_number)
331
+ if len(model_path) > 0:
332
+ directory, file_name = self.__os_path.split(model_path)
333
+ if not file_name: file_name = 'model.gpt'
334
+ elif not file_name.endswith('.gpt'): file_name += '.gpt'
335
+ else: directory, file_name = str(model_path), 'model.gpt'
336
+ if directory and not self.__os_path.exists(directory): self.__os_makedirs(directory)
337
+ save_path = self.__os_path.join(directory, file_name)
338
+ save_dict = {
339
+ 'tokenizer': str(self.__tokenizer).lower().strip(),
340
+ 'embedding_dim': max((1, int(self.__embedding_dim))) if type(self.__embedding_dim) in (bool, int, float) else -1,
341
+ 'vocab_size': max((0, int(self.__vocab_size))) if type(self.__vocab_size) in (bool, int, float) else 0,
342
+ 'block_size': max((1, int(self.__block_size))) if type(self.__block_size) in (bool, int, float) else -1,
343
+ 'end_tag': str(self.__end_tag) if type(self.__end_tag) is not None else '',
344
+ 'number_heads': max((1, int(self.__number_heads))) if type(self.__number_heads) in (bool, int, float) else -1,
345
+ 'number_layers': max((1, int(self.__number_layers))) if type(self.__number_layers) in (bool, int, float) else -1,
346
+ 'dropout': max((0, int(self.dropout))) if type(self.dropout) in (bool, int, float) else 0.1,
347
+ 'parameters_number': max((0, int(self.parameters_number))) if type(self.parameters_number) in (bool, int, float) else 0,
348
+ 'architecture_type': 'gpt_model',
349
+ 'model_state_dict': self.__model.state_dict(),
350
+ 'fine_tuning': [],
351
+ 'precision': 1.0
352
+
353
+ }
354
+ if self.__tokenizer == 'sapi':
355
+ save_dict['char_to_idx'] = self.__char_to_idx if type(self.__char_to_idx) == dict else {}
356
+ save_dict['idx_to_char'] = self.__idx_to_char if type(self.__idx_to_char) == dict else {}
357
+ if progress:
358
+ for _ in self.__tqdm(range(10), desc=f'Saving model with {formatted_params} parameters', leave=False): self.__save(save_dict, save_path)
359
+ else: self.__save(save_dict, save_path)
360
+ return True
361
+ except Exception as error:
362
+ print('ERROR in saveModel: ' + str(error))
363
+ return False
364
+ def loadModel(self, model_path='', progress=True):
365
+ try:
366
+ model_path = str(model_path).strip()
367
+ progress = bool(progress) if type(progress) in (bool, int, float) else True
368
+ if len(model_path) > 0:
369
+ directory, file_name = self.__os_path.split(model_path)
370
+ if not file_name: file_name = 'model.gpt'
371
+ elif not file_name.endswith('.gpt'): file_name += '.gpt'
372
+ else: directory, file_name = str(model_path), 'model.gpt'
373
+ model_file = self.__os_path.join(directory, file_name)
374
+ if progress:
375
+ for _ in self.__tqdm(range(10), desc='Loading model', leave=False): checkpoint = self.__load(model_file, map_location=self.__device)
376
+ else: checkpoint = self.__load(model_file, map_location=self.__device)
377
+ try: self.__tokenizer = str(checkpoint['tokenizer']).lower().strip()
378
+ except: self.__tokenizer = 'gpt'
379
+ try: self.__embedding_dim = max((1, int(checkpoint['embedding_dim']))) if checkpoint['embedding_dim'] != -1 else None
380
+ except: self.__embedding_dim = None
381
+ try: self.__vocab_size = max((0, int(checkpoint['vocab_size']))) if type(checkpoint['vocab_size']) in (bool, int, float) else 0
382
+ except: self.__vocab_size = 0
383
+ try: self.__block_size = max((1, int(checkpoint['block_size']))) if type(checkpoint['block_size']) != -1 else None
384
+ except: self.__block_size = None
385
+ try: self.__end_tag = str(checkpoint['end_tag'])
386
+ except: self.__end_tag = ''
387
+ try: self.__number_heads = max((1, int(checkpoint['number_heads']))) if type(checkpoint['number_heads']) != -1 else None
388
+ except: self.__number_heads = None
389
+ try: self.__number_layers = max((1, int(checkpoint['number_layers']))) if type(checkpoint['number_layers']) != -1 else None
390
+ except: self.__number_layers = None
391
+ try: self.dropout = max((0, float(checkpoint['dropout']))) if type(checkpoint['dropout']) in (bool, int, float) else 0.1
392
+ except: self.dropout = 0.1
393
+ try: self.parameters_number = max((0, int(checkpoint['parameters_number']))) if type(checkpoint['parameters_number']) in (bool, int, float) else 0
394
+ except: self.parameters_number = 0
395
+ if self.__tokenizer == 'sapi':
396
+ try: self.__char_to_idx = dict(checkpoint['char_to_idx'])
397
+ except: self.__char_to_idx = {}
398
+ try: self.__idx_to_char = dict(checkpoint['idx_to_char'])
399
+ except: self.__idx_to_char = {}
400
+ self.__encode = lambda string: [self.__char_to_idx[char] for char in string]
401
+ self.__decode = lambda indexes: ''.join([self.__idx_to_char[index] for index in indexes])
402
+ else:
403
+ encode = self.__get_encoding('gpt2')
404
+ self.__encode = encode.encode
405
+ self.__decode = encode.decode
406
+ if len(self.__end_tag) < 1: self.__end_tag = None
407
+ self.__model = self.__Transformers(outer=self, vocab_size=self.__vocab_size, embedding_dim=self.__embedding_dim, number_heads=self.__number_heads, number_layers=self.__number_layers, dropout=self.dropout, block_size=self.__block_size).to(self.__device)
408
+ state_dict = checkpoint['model_state_dict']
409
+ self.__model.load_state_dict(state_dict)
410
+ self.__optimizer, self.__train = None, True
411
+ return True
412
+ except Exception as error:
413
+ print('ERROR in loadModel: ' + str(error))
414
+ return False
415
+ def addFit(self, prompt='', answer=''):
416
+ try:
417
+ prompt = str(prompt).strip()
418
+ answer = str(answer).strip()
419
+ if not self.__train:
420
+ if self.__end_tag is None: self.__end_tag = '<|end|>'
421
+ self.__string += prompt+'\n'+answer+self.__end_tag+'\n\n'
422
+ else:
423
+ if self.__model is None: raise ValueError('Model is not initialized. Call train or loadModel first.')
424
+ if self.__optimizer is None: self.__optimizer = self.__optim.AdamW(self.__model.parameters(), lr=self.__learning_rate)
425
+ if self.__end_tag is None: formatted = prompt+'\n'+answer+'\n\n'
426
+ else: formatted = prompt+'\n'+answer+self.__end_tag+'\n\n'
427
+ encoded = self.__encode(formatted)
428
+ if len(encoded) > self.__block_size: encoded = encoded[:self.__block_size]
429
+ input_tensor = self.__tensor(encoded[:-1], dtype=self.__int64).unsqueeze(0).to(self.__device)
430
+ target_tensor = self.__tensor(encoded[1:], dtype=self.__int64).unsqueeze(0).to(self.__device)
431
+ self.__model.train()
432
+ logits = self.__model(input_tensor)
433
+ loss = self.__Function.cross_entropy(logits.view(-1, logits.size(-1)), target_tensor.view(-1))
434
+ self.__optimizer.zero_grad()
435
+ loss.backward()
436
+ self.__utils.clip_grad_norm_(self.__model.parameters(), 1.0)
437
+ self.__optimizer.step()
438
+ return True
439
+ except Exception as error:
440
+ print('ERROR in addFit: ' + str(error))
441
+ return False
442
+ def predict(self, prompt='', max_tokens=500, temperature=0.5, top_k=0, top_p=1.0, stream=False):
443
+ try:
444
+ prompt = str(prompt).strip()
445
+ max_tokens = max((1, int(max_tokens))) if type(max_tokens) in (bool, int, float) else 500
446
+ temperature = max((0, float(temperature))) if type(temperature) in (bool, int, float) else 0.5
447
+ top_k = max((0, int(top_k))) if type(top_k) in (bool, int, float) else 0
448
+ top_p = min((1.0, max((0.0, int(top_p))))) if type(top_p) in (bool, int, float) else 1.0
449
+ stream = bool(stream) if type(stream) in (bool, int, float) else False
450
+ if self.__model is None: raise ValueError('Model is not initialized. Call train or loadModel first.')
451
+ if stream: return self.__generate_tokens(prompt=prompt, max_tokens=max_tokens, temperature=temperature, top_k=top_k, top_p=top_p)
452
+ tokens = list(self.__generate_tokens(prompt=prompt, max_tokens=max_tokens, temperature=temperature, top_k=top_k, top_p=top_p))
453
+ return ''.join(tokens)
454
+ except Exception as error:
455
+ print('ERROR in predict: ' + str(error))
456
+ return ''
457
+ def print_predict(self, prompt='', max_tokens=500, temperature=0.5, top_k=0, top_p=1.0, stream=False):
458
+ try:
459
+ prompt = str(prompt).strip()
460
+ max_tokens = max((1, int(max_tokens))) if type(max_tokens) in (bool, int, float) else 500
461
+ temperature = max((0, float(temperature))) if type(temperature) in (bool, int, float) else 0.5
462
+ top_k = max((0, int(top_k))) if type(top_k) in (bool, int, float) else 0
463
+ top_p = min((1.0, max((0.0, int(top_p))))) if type(top_p) in (bool, int, float) else 1.0
464
+ stream = bool(stream) if type(stream) in (bool, int, float) else False
465
+ if self.__model is None: raise ValueError('Model is not initialized. Call train or loadModel first.')
466
+ if stream:
467
+ [print(token, end='', flush=True) for token in self.__generate_tokens(prompt=prompt, max_tokens=max_tokens, temperature=temperature, top_k=top_k, top_p=top_p)]
468
+ print()
469
+ else: print(self.predict(prompt=prompt, max_tokens=max_tokens, temperature=temperature, stream=stream))
470
+ except Exception as error:
471
+ print('ERROR in print_predict: ' + str(error))
472
+ # This is a standard code of a GPT (Generative Pre-trained Transformer) model, developed by Sapiens Technology®️,
473
+ # which faithfully follows the mathematical structure of the article “Attention Is All You Need” for the construction of the Transformer architecture
474
+ # used in the pattern recognition of the model that is saved. Some optimizations that do not influence the Transformer architecture
475
+ # were applied only to facilitate the adjustments of the parameters and variables of the training, saving, loading, fine-tuning and inference of the pre-trained model.
476
+ # --------------------------> A SAPIENS TECHNOLOGY®️ PRODUCTION) <--------------------------
@@ -0,0 +1,3 @@
1
+ This is proprietary code.
2
+
3
+ Its copying, alteration and distribution outside official media is strictly prohibited.
@@ -0,0 +1,14 @@
1
+ Metadata-Version: 2.2
2
+ Name: gptmodel
3
+ Version: 1.0.0
4
+ Home-page: https://github.com/
5
+ Author: SAPIENS TECHNOLOGY
6
+ License: Proprietary Software
7
+ License-File: LICENSE.txt
8
+ Requires-Dist: torch==2.4.1
9
+ Requires-Dist: tiktoken==0.4.0
10
+ Requires-Dist: tqdm==4.67.1
11
+ Dynamic: author
12
+ Dynamic: home-page
13
+ Dynamic: license
14
+ Dynamic: requires-dist
@@ -0,0 +1,7 @@
1
+ gptmodel/__init__.py,sha256=UCXG7pgytIN6ODBWxcfF5tQJkE6uMdzbvvVRDuRsSYs,24
2
+ gptmodel/gptmodel.py,sha256=GBPnOgY547dZiI3LJfOpCHzbCSq1vzEzoQ_oeMWUxhw,33411
3
+ gptmodel-1.0.0.dist-info/LICENSE.txt,sha256=WqB2vIA5tH5lqLTr53yT_oy1m0wYfuvCPQKxdDHWimg,115
4
+ gptmodel-1.0.0.dist-info/METADATA,sha256=OXJF4uVDZbuVatpnD0_RJJJgR1VxS0RdT9EcO_WRPmo,328
5
+ gptmodel-1.0.0.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
6
+ gptmodel-1.0.0.dist-info/top_level.txt,sha256=585C0QclguIkVPKKPpoeD2FxYAc5n5EAuvJNK4vMeQk,9
7
+ gptmodel-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (76.0.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ gptmodel