lecrapaud 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- lecrapaud/__init__.py +0 -0
- lecrapaud/config.py +16 -0
- lecrapaud/db/__init__.py +0 -0
- lecrapaud/db/alembic/README +1 -0
- lecrapaud/db/alembic/env.py +78 -0
- lecrapaud/db/alembic/script.py.mako +26 -0
- lecrapaud/db/alembic/versions/2025_04_06_1738-7390745388e4_initial_setup.py +295 -0
- lecrapaud/db/alembic/versions/2025_04_06_1755-40cd8d3e798e_unique_constraint_for_data.py +30 -0
- lecrapaud/db/alembic/versions/2025_05_23_1724-2360941fa0bd_longer_string.py +52 -0
- lecrapaud/db/alembic/versions/2025_05_27_1159-b96396dcfaff_add_env_to_trading_tables.py +34 -0
- lecrapaud/db/alembic/versions/2025_05_27_1337-40cbfc215f7c_fix_nb_character_on_portfolio.py +39 -0
- lecrapaud/db/alembic/versions/2025_05_27_1526-3de994115317_to_datetime.py +36 -0
- lecrapaud/db/alembic/versions/2025_05_27_2003-25c227c684f8_add_fees_to_transactions.py +30 -0
- lecrapaud/db/alembic/versions/2025_05_27_2047-6b6f2d38e9bc_double_instead_of_float.py +132 -0
- lecrapaud/db/alembic/versions/2025_05_31_1111-c175e4a36d68_generalise_stock_to_group.py +36 -0
- lecrapaud/db/alembic/versions/2025_05_31_1256-5681095bfc27_create_investment_run_and_portfolio_.py +62 -0
- lecrapaud/db/alembic/versions/2025_05_31_1806-339927587383_add_investment_run_id.py +107 -0
- lecrapaud/db/alembic/versions/2025_05_31_1834-52b809a34371_make_nullablee.py +38 -0
- lecrapaud/db/alembic/versions/2025_05_31_1849-3b8550297e8e_change_date_to_datetime.py +44 -0
- lecrapaud/db/alembic/versions/2025_05_31_1852-e6b8c95d8243_add_date_to_portfolio_history.py +30 -0
- lecrapaud/db/alembic/versions/2025_06_10_1136-db8cdd83563a_addnewsandoptiontodata.py +32 -0
- lecrapaud/db/crud.py +179 -0
- lecrapaud/db/models/__init__.py +11 -0
- lecrapaud/db/models/base.py +6 -0
- lecrapaud/db/models/dataset.py +124 -0
- lecrapaud/db/models/feature.py +46 -0
- lecrapaud/db/models/feature_selection.py +126 -0
- lecrapaud/db/models/feature_selection_rank.py +80 -0
- lecrapaud/db/models/model.py +41 -0
- lecrapaud/db/models/model_selection.py +56 -0
- lecrapaud/db/models/model_training.py +54 -0
- lecrapaud/db/models/score.py +62 -0
- lecrapaud/db/models/target.py +59 -0
- lecrapaud/db/services.py +0 -0
- lecrapaud/db/setup.py +58 -0
- lecrapaud/directory_management.py +28 -0
- lecrapaud/feature_engineering.py +1119 -0
- lecrapaud/feature_selection.py +1229 -0
- lecrapaud/jobs/__init__.py +13 -0
- lecrapaud/jobs/config.py +17 -0
- lecrapaud/jobs/scheduler.py +36 -0
- lecrapaud/jobs/tasks.py +57 -0
- lecrapaud/model_selection.py +1571 -0
- lecrapaud/predictions.py +292 -0
- lecrapaud/search_space.py +844 -0
- lecrapaud/services/__init__.py +0 -0
- lecrapaud/services/embedding_categorical.py +71 -0
- lecrapaud/services/indicators.py +309 -0
- lecrapaud/speed_tests/experiments.py +139 -0
- lecrapaud/speed_tests/test-gpu-bilstm.ipynb +261 -0
- lecrapaud/speed_tests/test-gpu-resnet.ipynb +166 -0
- lecrapaud/speed_tests/test-gpu-transformers.ipynb +254 -0
- lecrapaud/speed_tests/tests.ipynb +145 -0
- lecrapaud/speed_tests/trash.py +37 -0
- lecrapaud/training.py +151 -0
- lecrapaud/utils.py +246 -0
- lecrapaud-0.4.0.dist-info/LICENSE +201 -0
- lecrapaud-0.4.0.dist-info/METADATA +103 -0
- lecrapaud-0.4.0.dist-info/RECORD +60 -0
- lecrapaud-0.4.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "markdown",
|
|
5
|
+
"metadata": {},
|
|
6
|
+
"source": [
|
|
7
|
+
"# Speed Test\n",
|
|
8
|
+
"\n",
|
|
9
|
+
"### **Conclusion**\n",
|
|
10
|
+
"- **~1.8X faster on GPU compared to CPU on Apple Silicon. On Intel processors, 2X faster with CPU (but metal is not supported on intel processor)** \n",
|
|
11
|
+
"- **CuDNN** (NVIDIA’s deep learning library) significantly improves performance for LSTM-based models compared to vanilla RNNs. (See LSTM vs RNN lstm in keras documentation)\n",
|
|
12
|
+
"\n",
|
|
13
|
+
"---\n",
|
|
14
|
+
"\n",
|
|
15
|
+
"### **Model Details**\n",
|
|
16
|
+
"- **Model Type:** BiLSTM \n",
|
|
17
|
+
"- **Total Parameters:** 2,658,945 (~10.14 MB) \n",
|
|
18
|
+
"- **Training Data:** 25,000 sequences \n",
|
|
19
|
+
"- **Features:** 1 (number of occurrences in the corpus)\n",
|
|
20
|
+
"\n",
|
|
21
|
+
"---\n",
|
|
22
|
+
"\n",
|
|
23
|
+
"### **Rules of Thumb**\n",
|
|
24
|
+
"1. **Sample Size:** \n",
|
|
25
|
+
" `nb_samples > 10 × nb_features × nb_classes` \n",
|
|
26
|
+
" (with `nb_classes = 5` for regression).\n",
|
|
27
|
+
"\n",
|
|
28
|
+
"2. **Parameter-Sample Ratio:** \n",
|
|
29
|
+
" `nb_parameters < nb_samples / 10` \n",
|
|
30
|
+
" (or even `nb_samples / 50` for deep learning).\n",
|
|
31
|
+
"\n",
|
|
32
|
+
"---\n",
|
|
33
|
+
"\n",
|
|
34
|
+
"### **GPU Performance**\n",
|
|
35
|
+
"\n",
|
|
36
|
+
"#### **tf.keras** \n",
|
|
37
|
+
"- Run 1: **146s** (~179ms/step) — loss: `0.4118`, accuracy: `0.8075`, val_loss: `0.3565`, val_accuracy: `0.8464` \n",
|
|
38
|
+
"- Run 2 (Apple Silicon): **29s** (~32ms/step) — loss: `0.4167`, accuracy: `0.8018`, val_loss: `0.3612`, val_accuracy: `0.8474`\n",
|
|
39
|
+
"\n",
|
|
40
|
+
"#### **Keras** \n",
|
|
41
|
+
"- Run 1: **119s** (~146ms/step) — loss: `0.4102`, accuracy: `0.8142`, val_loss: `0.3496`, val_accuracy: `0.8467` \n",
|
|
42
|
+
"- Run 2 (Apple Silicon): **25s** (~30ms/step) — loss: `0.4167`, accuracy: `0.8018`, val_loss: `0.3611`, val_accuracy: `0.8474`\n",
|
|
43
|
+
"\n",
|
|
44
|
+
"---\n",
|
|
45
|
+
"\n",
|
|
46
|
+
"### **CPU Performance**\n",
|
|
47
|
+
"\n",
|
|
48
|
+
"#### **tf.keras** \n",
|
|
49
|
+
"- Run 1: **73s** (~89ms/step) — loss: `0.4184`, accuracy: `0.8078`, val_loss: `0.3444`, val_accuracy: `0.8507` \n",
|
|
50
|
+
"- Run 2 (Apple Silicon): **47s** (~59ms/step) — loss: `0.5113`, accuracy: `0.7238`, val_loss: `0.3405`, val_accuracy: `0.8538`\n",
|
|
51
|
+
"\n",
|
|
52
|
+
"#### **Keras** \n",
|
|
53
|
+
"- Run 1: **81s** (~97ms/step) — loss: `0.4027`, accuracy: `0.8147`, val_loss: `0.3395`, val_accuracy: `0.8508` \n",
|
|
54
|
+
"- Run 2 (Apple Silicon): **47s** (~58ms/step) — loss: `0.5113`, accuracy: `0.7238`, val_loss: `0.3405`, val_accuracy: `0.853\n",
|
|
55
|
+
"\n",
|
|
56
|
+
"---\n",
|
|
57
|
+
"\n",
|
|
58
|
+
"### **Key Observations**\n",
|
|
59
|
+
"1. **CPU vs GPU:** \n",
|
|
60
|
+
" - GPU training is approximately **2X faster** than CPU for this BiLSTM model. \n",
|
|
61
|
+
"\n",
|
|
62
|
+
"2. **Accuracy and Loss:** \n",
|
|
63
|
+
" - Both CPU and GPU achieve similar final accuracy and loss values, demonstrating functional equivalence. \n",
|
|
64
|
+
"\n",
|
|
65
|
+
"3. **Recommendation:** \n",
|
|
66
|
+
" - Use CPU for smaller datasets or models like BiLSTM. \n",
|
|
67
|
+
" - For larger datasets or deeper architectures, GPU with CuDNN can provide significant speedups.\n"
|
|
68
|
+
]
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
"cell_type": "code",
|
|
72
|
+
"execution_count": null,
|
|
73
|
+
"metadata": {},
|
|
74
|
+
"outputs": [],
|
|
75
|
+
"source": [
|
|
76
|
+
"import tensorflow as tf\n",
|
|
77
|
+
"\n",
|
|
78
|
+
"devices = tf.config.list_physical_devices()\n",
|
|
79
|
+
"print(\"\\nDevices: \", devices)\n",
|
|
80
|
+
"\n",
|
|
81
|
+
"gpus = tf.config.list_physical_devices(\"GPU\")\n",
|
|
82
|
+
"if gpus:\n",
|
|
83
|
+
" details = tf.config.experimental.get_device_details(gpus[0])\n",
|
|
84
|
+
" print(\"GPU details: \", details)"
|
|
85
|
+
]
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
"cell_type": "code",
|
|
89
|
+
"execution_count": null,
|
|
90
|
+
"metadata": {},
|
|
91
|
+
"outputs": [],
|
|
92
|
+
"source": [
|
|
93
|
+
"# get data\n",
|
|
94
|
+
"import tensorflow as tf\n",
|
|
95
|
+
"from tensorflow import keras\n",
|
|
96
|
+
"import numpy as np\n",
|
|
97
|
+
"\n",
|
|
98
|
+
"np.random.seed(42) # for reproducibility\n",
|
|
99
|
+
"\n",
|
|
100
|
+
"max_features = 20000\n",
|
|
101
|
+
"maxlen = 100 # cut texts after this number of words (among top max_features most common words)\n",
|
|
102
|
+
"batch_size = 32\n",
|
|
103
|
+
"\n",
|
|
104
|
+
"(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(\n",
|
|
105
|
+
" num_words=max_features\n",
|
|
106
|
+
")\n",
|
|
107
|
+
"\n",
|
|
108
|
+
"x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)\n",
|
|
109
|
+
"x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen)\n",
|
|
110
|
+
"print(x_train.shape, x_train[:2], y_train.shape, y_train[:2])\n",
|
|
111
|
+
"print(\n",
|
|
112
|
+
" f\"{x_train.shape[0]} train samples and {y_test.shape[0]} tests samples, for a total of {x_train.shape[0] + x_test.shape[0]} samples.\"\n",
|
|
113
|
+
")\n",
|
|
114
|
+
"y_train = np.array(y_train)\n",
|
|
115
|
+
"y_test = np.array(y_test)"
|
|
116
|
+
]
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
"cell_type": "code",
|
|
120
|
+
"execution_count": null,
|
|
121
|
+
"metadata": {},
|
|
122
|
+
"outputs": [],
|
|
123
|
+
"source": [
|
|
124
|
+
"from tensorflow import keras\n",
|
|
125
|
+
"\n",
|
|
126
|
+
"print(f\"Using tf.keras version {keras.__version__}\")\n",
|
|
127
|
+
"\n",
|
|
128
|
+
"# Define the input layer\n",
|
|
129
|
+
"inputs = keras.Input(shape=(maxlen,)) # maxlen is the input length for each sequence\n",
|
|
130
|
+
"\n",
|
|
131
|
+
"# Embedding layer\n",
|
|
132
|
+
"x = keras.layers.Embedding(max_features, 128)(\n",
|
|
133
|
+
" inputs\n",
|
|
134
|
+
") # max_features is the vocabulary size\n",
|
|
135
|
+
"\n",
|
|
136
|
+
"# Bidirectional LSTM layer\n",
|
|
137
|
+
"x = keras.layers.Bidirectional(keras.layers.LSTM(64))(x)\n",
|
|
138
|
+
"\n",
|
|
139
|
+
"# Dropout layer for regularization\n",
|
|
140
|
+
"x = keras.layers.Dropout(0.5)(x)\n",
|
|
141
|
+
"\n",
|
|
142
|
+
"# Output layer (for binary classification)\n",
|
|
143
|
+
"outputs = keras.layers.Dense(1, activation=\"sigmoid\")(x)\n",
|
|
144
|
+
"\n",
|
|
145
|
+
"# Define the model with inputs and outputs\n",
|
|
146
|
+
"model = keras.Model(inputs=inputs, outputs=outputs, name=\"BiLSTM\")\n",
|
|
147
|
+
"\n",
|
|
148
|
+
"# Print model summary\n",
|
|
149
|
+
"model.summary()\n",
|
|
150
|
+
"\n",
|
|
151
|
+
"# Compile the model\n",
|
|
152
|
+
"model.compile(optimizer=\"adam\", loss=\"binary_crossentropy\", metrics=[\"accuracy\"])\n",
|
|
153
|
+
"\n",
|
|
154
|
+
"# Train the model\n",
|
|
155
|
+
"print(\"Train...\")\n",
|
|
156
|
+
"model.fit(\n",
|
|
157
|
+
" x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test)\n",
|
|
158
|
+
")"
|
|
159
|
+
]
|
|
160
|
+
},
|
|
161
|
+
{
|
|
162
|
+
"cell_type": "code",
|
|
163
|
+
"execution_count": null,
|
|
164
|
+
"metadata": {},
|
|
165
|
+
"outputs": [],
|
|
166
|
+
"source": [
|
|
167
|
+
"import keras\n",
|
|
168
|
+
"\n",
|
|
169
|
+
"print(f\"Using keras version {keras.__version__}\")\n",
|
|
170
|
+
"\n",
|
|
171
|
+
"model = keras.models.Sequential(name=\"BiLSTM\")\n",
|
|
172
|
+
"model.add(keras.layers.Embedding(max_features, 128, input_length=maxlen))\n",
|
|
173
|
+
"model.add(keras.layers.Bidirectional(keras.layers.LSTM(64)))\n",
|
|
174
|
+
"model.add(keras.layers.Dropout(0.5))\n",
|
|
175
|
+
"model.add(keras.layers.Dense(1, activation=\"sigmoid\"))\n",
|
|
176
|
+
"model.summary()\n",
|
|
177
|
+
"# try using different optimizers and different optimizer configs\n",
|
|
178
|
+
"model.compile(\"adam\", \"binary_crossentropy\", metrics=[\"accuracy\"])\n",
|
|
179
|
+
"\n",
|
|
180
|
+
"print(\"Train...\")\n",
|
|
181
|
+
"model.fit(\n",
|
|
182
|
+
" x_train, y_train, batch_size=batch_size, epochs=1, validation_data=[x_test, y_test]\n",
|
|
183
|
+
")"
|
|
184
|
+
]
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
"cell_type": "code",
|
|
188
|
+
"execution_count": null,
|
|
189
|
+
"metadata": {},
|
|
190
|
+
"outputs": [],
|
|
191
|
+
"source": [
|
|
192
|
+
"# # Comparing GPU performance of LSTM using CuDNN (keras.layers.LSTM) and not using CuDNN (keras.layers.LSTMCell)\n",
|
|
193
|
+
"\n",
|
|
194
|
+
"# from tensorflow import keras\n",
|
|
195
|
+
"# from keras.models import Sequential\n",
|
|
196
|
+
"# from keras.layers import Dense, Dropout, Embedding, LSTM, Input, Bidirectional\n",
|
|
197
|
+
"\n",
|
|
198
|
+
"# keras.utils.set_random_seed(42)\n",
|
|
199
|
+
"\n",
|
|
200
|
+
"\n",
|
|
201
|
+
"# def build_model(allow_cudnn_kernel=True):\n",
|
|
202
|
+
"# # CuDNN is only available at the layer level, and not at the cell level.\n",
|
|
203
|
+
"# # This means `LSTM(units)` will use the CuDNN kernel,\n",
|
|
204
|
+
"# # while RNN(LSTMCell(units)) will run on non-CuDNN kernel.\n",
|
|
205
|
+
"# if allow_cudnn_kernel:\n",
|
|
206
|
+
"# # The LSTM layer with default options uses CuDNN.\n",
|
|
207
|
+
"# model = Sequential(name=\"BiLSTM with CuDNN\")\n",
|
|
208
|
+
"# lstm_layer = keras.layers.LSTM(64)\n",
|
|
209
|
+
"# else:\n",
|
|
210
|
+
"# # Wrapping a LSTMCell in a RNN layer will not use CuDNN.\n",
|
|
211
|
+
"# model = Sequential(name=\"BiLSTM without CuDNN\")\n",
|
|
212
|
+
"# lstm_layer = keras.layers.RNN(keras.layers.LSTMCell(64))\n",
|
|
213
|
+
"# model.add(Embedding(max_features, 128, input_length=maxlen))\n",
|
|
214
|
+
"# model.add(Bidirectional(lstm_layer))\n",
|
|
215
|
+
"# model.add(Dropout(0.5))\n",
|
|
216
|
+
"# model.add(Dense(1, activation=\"sigmoid\"))\n",
|
|
217
|
+
"# model.summary()\n",
|
|
218
|
+
"# return model\n",
|
|
219
|
+
"\n",
|
|
220
|
+
"\n",
|
|
221
|
+
"# model = build_model(allow_cudnn_kernel=True)\n",
|
|
222
|
+
"# model.compile(\"adam\", \"binary_crossentropy\", metrics=[\"accuracy\"])\n",
|
|
223
|
+
"\n",
|
|
224
|
+
"# print(\"Train model using CuDNN kernel...\")\n",
|
|
225
|
+
"# model.fit(\n",
|
|
226
|
+
"# x_train, y_train, batch_size=batch_size, epochs=1, validation_data=[x_test, y_test]\n",
|
|
227
|
+
"# )\n",
|
|
228
|
+
"\n",
|
|
229
|
+
"\n",
|
|
230
|
+
"# model_noncudnn = build_model(allow_cudnn_kernel=False)\n",
|
|
231
|
+
"# model_noncudnn.compile(\"adam\", \"binary_crossentropy\", metrics=[\"accuracy\"])\n",
|
|
232
|
+
"\n",
|
|
233
|
+
"# print(\"Train model not using CuDNN kernel...\")\n",
|
|
234
|
+
"# model_noncudnn.fit(\n",
|
|
235
|
+
"# x_train, y_train, batch_size=batch_size, epochs=1, validation_data=[x_test, y_test]\n",
|
|
236
|
+
"# )"
|
|
237
|
+
]
|
|
238
|
+
}
|
|
239
|
+
],
|
|
240
|
+
"metadata": {
|
|
241
|
+
"kernelspec": {
|
|
242
|
+
"display_name": ".venv",
|
|
243
|
+
"language": "python",
|
|
244
|
+
"name": "python3"
|
|
245
|
+
},
|
|
246
|
+
"language_info": {
|
|
247
|
+
"codemirror_mode": {
|
|
248
|
+
"name": "ipython",
|
|
249
|
+
"version": 3
|
|
250
|
+
},
|
|
251
|
+
"file_extension": ".py",
|
|
252
|
+
"mimetype": "text/x-python",
|
|
253
|
+
"name": "python",
|
|
254
|
+
"nbconvert_exporter": "python",
|
|
255
|
+
"pygments_lexer": "ipython3",
|
|
256
|
+
"version": "3.12.8"
|
|
257
|
+
}
|
|
258
|
+
},
|
|
259
|
+
"nbformat": 4,
|
|
260
|
+
"nbformat_minor": 2
|
|
261
|
+
}
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "markdown",
|
|
5
|
+
"metadata": {},
|
|
6
|
+
"source": [
|
|
7
|
+
"# Speed Test\n",
|
|
8
|
+
"\n",
|
|
9
|
+
"### **Conclusion**\n",
|
|
10
|
+
"- **12x faster with GPU compared to CPU. (53s vs 680s) on Apple Silicon (test jan 24)**\n",
|
|
11
|
+
"\n",
|
|
12
|
+
"---\n",
|
|
13
|
+
"\n",
|
|
14
|
+
"### **Model Details**\n",
|
|
15
|
+
"- **Model Type:** Very Deep CNN (ResNet, 50 layers)\n",
|
|
16
|
+
"- **Total Parameters:** 23,792,612 (~90.76 MB)\n",
|
|
17
|
+
"- **Training Data:** 50,000 samples\n",
|
|
18
|
+
"- **Input Features:** `32 × 32 × 3 ≈ 3,000`\n",
|
|
19
|
+
"\n",
|
|
20
|
+
"---\n",
|
|
21
|
+
"\n",
|
|
22
|
+
"### **Rules of Thumb**\n",
|
|
23
|
+
"1. **Sample Size:** \n",
|
|
24
|
+
" `nb_samples > 10 × nb_features × nb_classes` \n",
|
|
25
|
+
" (with `nb_classes = 5` for regression).\n",
|
|
26
|
+
"\n",
|
|
27
|
+
"2. **Parameter-Sample Ratio:** \n",
|
|
28
|
+
" `nb_parameters < nb_samples / 10` \n",
|
|
29
|
+
" (or even `nb_samples / 50` for deep learning).\n",
|
|
30
|
+
"\n",
|
|
31
|
+
"---\n",
|
|
32
|
+
"\n",
|
|
33
|
+
"### **GPU Performance**\n",
|
|
34
|
+
"\n",
|
|
35
|
+
"#### **tf.keras** \n",
|
|
36
|
+
"- Run 1: **221s** (~256ms/step) — loss: `4.6651`, accuracy: `0.0858`\n",
|
|
37
|
+
"- Run 2 (with Apple Silicon M2 GPU): **47s** (~53ms/step) - loss: `4.8064` - accuracy: `0.0620`\n",
|
|
38
|
+
"\n",
|
|
39
|
+
"#### **Keras** \n",
|
|
40
|
+
"- Run 1: **225s** (~263ms/step) — loss: `4.7491`, accuracy: `0.0738` \n",
|
|
41
|
+
"- Run 2 (with Apple Silicon M2 GPU): **47s** (~53ms/step) - loss: `4.6743` - accuracy: `0.0761`\n",
|
|
42
|
+
"\n",
|
|
43
|
+
"---\n",
|
|
44
|
+
"\n",
|
|
45
|
+
"### **CPU Performance**\n",
|
|
46
|
+
"\n",
|
|
47
|
+
"#### **tf.keras** \n",
|
|
48
|
+
"- Run 1: **1440s** (~1841ms/step) — accuracy: `0.0346`, loss: `5.2095` \n",
|
|
49
|
+
"- Run 2 (with Apple Silicon M2 CPU): **541s** (~679ms/step) — accuracy: `0.0534`, loss: `4.9699` \n",
|
|
50
|
+
"\n",
|
|
51
|
+
"#### **Keras** \n",
|
|
52
|
+
"- Run 1: **1440s** (~1841ms/step) — accuracy: `0.0210`, loss: `5.6996` \n",
|
|
53
|
+
"- Run 2 (with Apple Silicon M2 CPU): **535s** (~671ms/step) — accuracy: `0.0534`, loss: `5.0862`\n",
|
|
54
|
+
"\n",
|
|
55
|
+
"\n",
|
|
56
|
+
"---\n",
|
|
57
|
+
"\n",
|
|
58
|
+
"### **Key Takeaways**\n",
|
|
59
|
+
"- GPUs offer a **6x speed improvement** compared to CPUs for deep CNN training.\n",
|
|
60
|
+
"- CPU training becomes impractically slow for large models and datasets.\n",
|
|
61
|
+
"- GPU is highly recommended for deep learning tasks, especially for very deep architectures like ResNet.\n"
|
|
62
|
+
]
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"cell_type": "code",
|
|
66
|
+
"execution_count": null,
|
|
67
|
+
"metadata": {},
|
|
68
|
+
"outputs": [],
|
|
69
|
+
"source": [
|
|
70
|
+
"import tensorflow as tf\n",
|
|
71
|
+
"\n",
|
|
72
|
+
"devices = tf.config.list_physical_devices()\n",
|
|
73
|
+
"print(\"\\nDevices: \", devices)\n",
|
|
74
|
+
"\n",
|
|
75
|
+
"gpus = tf.config.list_physical_devices(\"GPU\")\n",
|
|
76
|
+
"if gpus:\n",
|
|
77
|
+
" details = tf.config.experimental.get_device_details(gpus[0])\n",
|
|
78
|
+
" print(\"GPU details: \", details)"
|
|
79
|
+
]
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
"cell_type": "code",
|
|
83
|
+
"execution_count": null,
|
|
84
|
+
"metadata": {},
|
|
85
|
+
"outputs": [],
|
|
86
|
+
"source": [
|
|
87
|
+
"# get data\n",
|
|
88
|
+
"from tensorflow import keras\n",
|
|
89
|
+
"\n",
|
|
90
|
+
"cifar = keras.datasets.cifar100\n",
|
|
91
|
+
"(x_train, y_train), (x_test, y_test) = cifar.load_data()\n",
|
|
92
|
+
"print(x_train.shape, x_train[:2], y_train.shape, y_train[:2])\n",
|
|
93
|
+
"print(\n",
|
|
94
|
+
" f\"{x_train.shape[0]} train samples and {y_test.shape[0]} tests samples, for a total of {x_train.shape[0] + x_test.shape[0]} samples.\"\n",
|
|
95
|
+
")"
|
|
96
|
+
]
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
"cell_type": "code",
|
|
100
|
+
"execution_count": null,
|
|
101
|
+
"metadata": {},
|
|
102
|
+
"outputs": [],
|
|
103
|
+
"source": [
|
|
104
|
+
"import tensorflow as tf\n",
|
|
105
|
+
"\n",
|
|
106
|
+
"model = tf.keras.applications.ResNet50(\n",
|
|
107
|
+
" include_top=True,\n",
|
|
108
|
+
" weights=None,\n",
|
|
109
|
+
" input_shape=(32, 32, 3),\n",
|
|
110
|
+
" classes=100,\n",
|
|
111
|
+
")\n",
|
|
112
|
+
"model.summary()\n",
|
|
113
|
+
"\n",
|
|
114
|
+
"loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)\n",
|
|
115
|
+
"\n",
|
|
116
|
+
"model.compile(optimizer=\"adam\", loss=loss_fn, metrics=[\"accuracy\"])\n",
|
|
117
|
+
"\n",
|
|
118
|
+
"model.fit(x_train, y_train, epochs=1, batch_size=64)"
|
|
119
|
+
]
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"cell_type": "code",
|
|
123
|
+
"execution_count": null,
|
|
124
|
+
"metadata": {},
|
|
125
|
+
"outputs": [],
|
|
126
|
+
"source": [
|
|
127
|
+
"import keras\n",
|
|
128
|
+
"\n",
|
|
129
|
+
"model = keras.applications.ResNet50(\n",
|
|
130
|
+
" include_top=True,\n",
|
|
131
|
+
" weights=None,\n",
|
|
132
|
+
" input_shape=(32, 32, 3),\n",
|
|
133
|
+
" classes=100,\n",
|
|
134
|
+
")\n",
|
|
135
|
+
"model.summary()\n",
|
|
136
|
+
"\n",
|
|
137
|
+
"loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=False)\n",
|
|
138
|
+
"\n",
|
|
139
|
+
"model.compile(optimizer=\"adam\", loss=loss_fn, metrics=[\"accuracy\"])\n",
|
|
140
|
+
"\n",
|
|
141
|
+
"model.fit(x_train, y_train, epochs=1, batch_size=64)"
|
|
142
|
+
]
|
|
143
|
+
}
|
|
144
|
+
],
|
|
145
|
+
"metadata": {
|
|
146
|
+
"kernelspec": {
|
|
147
|
+
"display_name": ".venv",
|
|
148
|
+
"language": "python",
|
|
149
|
+
"name": "python3"
|
|
150
|
+
},
|
|
151
|
+
"language_info": {
|
|
152
|
+
"codemirror_mode": {
|
|
153
|
+
"name": "ipython",
|
|
154
|
+
"version": 3
|
|
155
|
+
},
|
|
156
|
+
"file_extension": ".py",
|
|
157
|
+
"mimetype": "text/x-python",
|
|
158
|
+
"name": "python",
|
|
159
|
+
"nbconvert_exporter": "python",
|
|
160
|
+
"pygments_lexer": "ipython3",
|
|
161
|
+
"version": "3.12.8"
|
|
162
|
+
}
|
|
163
|
+
},
|
|
164
|
+
"nbformat": 4,
|
|
165
|
+
"nbformat_minor": 2
|
|
166
|
+
}
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "markdown",
|
|
5
|
+
"metadata": {},
|
|
6
|
+
"source": [
|
|
7
|
+
"# Speed Test\n",
|
|
8
|
+
"\n",
|
|
9
|
+
"### **Conclusion**\n",
|
|
10
|
+
"- **~same on CPU compared to GPU** for this model and setup. \n",
|
|
11
|
+
"\n",
|
|
12
|
+
"---\n",
|
|
13
|
+
"\n",
|
|
14
|
+
"### **Model Details**\n",
|
|
15
|
+
"- **Model Type:** Transformer (encoder only) \n",
|
|
16
|
+
"- **Total Parameters:** 4,934,529 (18.82 MB)\n",
|
|
17
|
+
"- **Training Data:** 25,000 sequences \n",
|
|
18
|
+
"- **Features:** 1 (number of occurrences in the corpus)\n",
|
|
19
|
+
"\n",
|
|
20
|
+
"---\n",
|
|
21
|
+
"\n",
|
|
22
|
+
"### **Rules of Thumb**\n",
|
|
23
|
+
"1. **Sample Size:** \n",
|
|
24
|
+
" `nb_samples > 10 × nb_features × nb_classes` \n",
|
|
25
|
+
" (with `nb_classes = 5` for regression).\n",
|
|
26
|
+
"\n",
|
|
27
|
+
"2. **Parameter-Sample Ratio:** \n",
|
|
28
|
+
" `nb_parameters < nb_samples / 10` \n",
|
|
29
|
+
" (or even `nb_samples / 50` for deep learning).\n",
|
|
30
|
+
"\n",
|
|
31
|
+
"---\n",
|
|
32
|
+
"\n",
|
|
33
|
+
"### **GPU Performance**\n",
|
|
34
|
+
"\n",
|
|
35
|
+
"#### **tf.keras** \n",
|
|
36
|
+
"- Run 1 (Apple Silicon) : 412s 525ms/step - loss: 7.6209 - accuracy: 0.4999 - val_loss: 7.6246 - val_accuracy: 0.5000\n",
|
|
37
|
+
"\n",
|
|
38
|
+
"#### **Keras** \n",
|
|
39
|
+
"- Run 1 (Apple Silicon) : 419s 534ms/step - loss: 7.6191 - accuracy: 0.5002 - val_loss: 7.6246 - val_accuracy: 0.5000\n",
|
|
40
|
+
"\n",
|
|
41
|
+
"\n",
|
|
42
|
+
"---\n",
|
|
43
|
+
"\n",
|
|
44
|
+
"### **CPU Performance**\n",
|
|
45
|
+
"\n",
|
|
46
|
+
"#### **tf.keras** \n",
|
|
47
|
+
"- Run 1 (Apple Silicon) : 439s 560ms/step - accuracy: 0.4992 - loss: 8.0630 - val_accuracy: 0.5000 - val_loss: 8.0590\n",
|
|
48
|
+
"\n",
|
|
49
|
+
"#### **Keras** \n",
|
|
50
|
+
"- Run 1 (Apple Silicon) : 417s 531ms/step - accuracy: 0.5006 - loss: 7.9368 - val_accuracy: 0.5000 - val_loss: 7.9712\n",
|
|
51
|
+
"\n"
|
|
52
|
+
]
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"cell_type": "code",
|
|
56
|
+
"execution_count": null,
|
|
57
|
+
"metadata": {},
|
|
58
|
+
"outputs": [],
|
|
59
|
+
"source": [
|
|
60
|
+
"import tensorflow as tf\n",
|
|
61
|
+
"\n",
|
|
62
|
+
"devices = tf.config.list_physical_devices()\n",
|
|
63
|
+
"print(\"\\nDevices: \", devices)\n",
|
|
64
|
+
"\n",
|
|
65
|
+
"gpus = tf.config.list_physical_devices(\"GPU\")\n",
|
|
66
|
+
"if gpus:\n",
|
|
67
|
+
" details = tf.config.experimental.get_device_details(gpus[0])\n",
|
|
68
|
+
" print(\"GPU details: \", details)"
|
|
69
|
+
]
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
"cell_type": "code",
|
|
73
|
+
"execution_count": null,
|
|
74
|
+
"metadata": {},
|
|
75
|
+
"outputs": [],
|
|
76
|
+
"source": [
|
|
77
|
+
"# get data\n",
|
|
78
|
+
"import tensorflow as tf\n",
|
|
79
|
+
"from tensorflow import keras\n",
|
|
80
|
+
"import numpy as np\n",
|
|
81
|
+
"\n",
|
|
82
|
+
"np.random.seed(42) # for reproducibility\n",
|
|
83
|
+
"\n",
|
|
84
|
+
"max_features = 20000\n",
|
|
85
|
+
"maxlen = 100 # cut texts after this number of words (among top max_features most common words)\n",
|
|
86
|
+
"batch_size = 32\n",
|
|
87
|
+
"\n",
|
|
88
|
+
"(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(\n",
|
|
89
|
+
" num_words=max_features\n",
|
|
90
|
+
")\n",
|
|
91
|
+
"\n",
|
|
92
|
+
"x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)\n",
|
|
93
|
+
"x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen)\n",
|
|
94
|
+
"print(x_train.shape, x_train[:2], y_train.shape, y_train[:2])\n",
|
|
95
|
+
"print(\n",
|
|
96
|
+
" f\"{x_train.shape[0]} train samples and {y_test.shape[0]} tests samples, for a total of {x_train.shape[0] + x_test.shape[0]} samples.\"\n",
|
|
97
|
+
")\n",
|
|
98
|
+
"y_train = np.array(y_train)\n",
|
|
99
|
+
"y_test = np.array(y_test)"
|
|
100
|
+
]
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
"cell_type": "code",
|
|
104
|
+
"execution_count": null,
|
|
105
|
+
"metadata": {},
|
|
106
|
+
"outputs": [],
|
|
107
|
+
"source": [
|
|
108
|
+
"from tensorflow import keras\n",
|
|
109
|
+
"\n",
|
|
110
|
+
"print(f\"Using tf.keras version {keras.__version__}\")\n",
|
|
111
|
+
"\n",
|
|
112
|
+
"ff_dim = 512\n",
|
|
113
|
+
"head_size = 128\n",
|
|
114
|
+
"num_heads = 16\n",
|
|
115
|
+
"num_layers = 2\n",
|
|
116
|
+
"\n",
|
|
117
|
+
"\n",
|
|
118
|
+
"def transformer_encoder(inputs, num_layers, head_size, num_heads, ff_dim, dropout=0):\n",
|
|
119
|
+
" for _ in range(num_layers):\n",
|
|
120
|
+
" # Attention and Normalization\n",
|
|
121
|
+
" x = keras.layers.LayerNormalization(epsilon=1e-6)(inputs)\n",
|
|
122
|
+
" x = keras.layers.MultiHeadAttention(\n",
|
|
123
|
+
" key_dim=head_size, num_heads=num_heads, dropout=dropout\n",
|
|
124
|
+
" )(x, x)\n",
|
|
125
|
+
" x = keras.layers.Add()([x, inputs])\n",
|
|
126
|
+
"\n",
|
|
127
|
+
" # Feed Forward Part\n",
|
|
128
|
+
" y = keras.layers.LayerNormalization(epsilon=1e-6)(x)\n",
|
|
129
|
+
" y = keras.layers.Dense(ff_dim, activation=\"relu\")(y)\n",
|
|
130
|
+
" y = keras.layers.Dropout(dropout)(y)\n",
|
|
131
|
+
" y = keras.layers.Dense(inputs.shape[-1])(y)\n",
|
|
132
|
+
" inputs = keras.layers.Add()([y, x])\n",
|
|
133
|
+
"\n",
|
|
134
|
+
" return inputs\n",
|
|
135
|
+
"\n",
|
|
136
|
+
"\n",
|
|
137
|
+
"# Define the input layer\n",
|
|
138
|
+
"inputs = keras.layers.Input(\n",
|
|
139
|
+
" shape=(maxlen,)\n",
|
|
140
|
+
") # maxlen is the input length for each sequence\n",
|
|
141
|
+
"\n",
|
|
142
|
+
"# Embedding layer\n",
|
|
143
|
+
"x = keras.layers.Embedding(max_features, 128)(\n",
|
|
144
|
+
" inputs\n",
|
|
145
|
+
") # max_features is the vocabulary size\n",
|
|
146
|
+
"x = transformer_encoder(x, num_layers, head_size, num_heads, ff_dim)\n",
|
|
147
|
+
"x = keras.layers.GlobalAveragePooling1D()(x)\n",
|
|
148
|
+
"x = keras.layers.LayerNormalization(epsilon=1e-6)(x)\n",
|
|
149
|
+
"# Define output layer based on target type\n",
|
|
150
|
+
"outputs = keras.layers.Dense(1, activation=\"linear\")(x)\n",
|
|
151
|
+
"\n",
|
|
152
|
+
"# Build the model\n",
|
|
153
|
+
"model = keras.Model(inputs=inputs, outputs=outputs, name=\"transformer\")\n",
|
|
154
|
+
"\n",
|
|
155
|
+
"model.summary()\n",
|
|
156
|
+
"# try using different optimizers and different optimizer configs\n",
|
|
157
|
+
"model.compile(\"adam\", \"binary_crossentropy\", metrics=[\"accuracy\"])\n",
|
|
158
|
+
"\n",
|
|
159
|
+
"print(\"Train...\")\n",
|
|
160
|
+
"batch_size = 32\n",
|
|
161
|
+
"epoch = 100\n",
|
|
162
|
+
"model.fit(\n",
|
|
163
|
+
" x_train, y_train, batch_size=batch_size, epochs=1, validation_data=[x_test, y_test]\n",
|
|
164
|
+
")"
|
|
165
|
+
]
|
|
166
|
+
},
|
|
167
|
+
{
|
|
168
|
+
"cell_type": "code",
|
|
169
|
+
"execution_count": null,
|
|
170
|
+
"metadata": {},
|
|
171
|
+
"outputs": [],
|
|
172
|
+
"source": [
|
|
173
|
+
"import keras\n",
|
|
174
|
+
"\n",
|
|
175
|
+
"print(f\"Using keras version {keras.__version__}\")\n",
|
|
176
|
+
"\n",
|
|
177
|
+
"ff_dim = 512\n",
|
|
178
|
+
"head_size = 128\n",
|
|
179
|
+
"num_heads = 16\n",
|
|
180
|
+
"num_layers = 2\n",
|
|
181
|
+
"\n",
|
|
182
|
+
"\n",
|
|
183
|
+
"def transformer_encoder(inputs, num_layers, head_size, num_heads, ff_dim, dropout=0):\n",
|
|
184
|
+
" for _ in range(num_layers):\n",
|
|
185
|
+
" # Attention and Normalization\n",
|
|
186
|
+
" x = keras.layers.LayerNormalization(epsilon=1e-6)(inputs)\n",
|
|
187
|
+
" x = keras.layers.MultiHeadAttention(\n",
|
|
188
|
+
" key_dim=head_size, num_heads=num_heads, dropout=dropout\n",
|
|
189
|
+
" )(x, x)\n",
|
|
190
|
+
" x = keras.layers.Add()([x, inputs])\n",
|
|
191
|
+
"\n",
|
|
192
|
+
" # Feed Forward Part\n",
|
|
193
|
+
" y = keras.layers.LayerNormalization(epsilon=1e-6)(x)\n",
|
|
194
|
+
" y = keras.layers.Dense(ff_dim, activation=\"relu\")(y)\n",
|
|
195
|
+
" y = keras.layers.Dropout(dropout)(y)\n",
|
|
196
|
+
" y = keras.layers.Dense(inputs.shape[-1])(y)\n",
|
|
197
|
+
" inputs = keras.layers.Add()([y, x])\n",
|
|
198
|
+
"\n",
|
|
199
|
+
" return inputs\n",
|
|
200
|
+
"\n",
|
|
201
|
+
"\n",
|
|
202
|
+
"# Define the input layer\n",
|
|
203
|
+
"inputs = keras.layers.Input(\n",
|
|
204
|
+
" shape=(maxlen,)\n",
|
|
205
|
+
") # maxlen is the input length for each sequence\n",
|
|
206
|
+
"\n",
|
|
207
|
+
"# Embedding layer\n",
|
|
208
|
+
"x = keras.layers.Embedding(max_features, 128)(\n",
|
|
209
|
+
" inputs\n",
|
|
210
|
+
") # max_features is the vocabulary size\n",
|
|
211
|
+
"x = transformer_encoder(x, num_layers, head_size, num_heads, ff_dim)\n",
|
|
212
|
+
"x = keras.layers.GlobalAveragePooling1D()(x)\n",
|
|
213
|
+
"x = keras.layers.LayerNormalization(epsilon=1e-6)(x)\n",
|
|
214
|
+
"# Define output layer based on target type\n",
|
|
215
|
+
"outputs = keras.layers.Dense(1, activation=\"linear\")(x)\n",
|
|
216
|
+
"\n",
|
|
217
|
+
"# Build the model\n",
|
|
218
|
+
"model = keras.Model(inputs=inputs, outputs=outputs, name=\"transformer\")\n",
|
|
219
|
+
"\n",
|
|
220
|
+
"model.summary()\n",
|
|
221
|
+
"# try using different optimizers and different optimizer configs\n",
|
|
222
|
+
"model.compile(\"adam\", \"binary_crossentropy\", metrics=[\"accuracy\"])\n",
|
|
223
|
+
"\n",
|
|
224
|
+
"print(\"Train...\")\n",
|
|
225
|
+
"batch_size = 32\n",
|
|
226
|
+
"epoch = 100\n",
|
|
227
|
+
"model.fit(\n",
|
|
228
|
+
" x_train, y_train, batch_size=batch_size, epochs=1, validation_data=[x_test, y_test]\n",
|
|
229
|
+
")"
|
|
230
|
+
]
|
|
231
|
+
}
|
|
232
|
+
],
|
|
233
|
+
"metadata": {
|
|
234
|
+
"kernelspec": {
|
|
235
|
+
"display_name": ".venv",
|
|
236
|
+
"language": "python",
|
|
237
|
+
"name": "python3"
|
|
238
|
+
},
|
|
239
|
+
"language_info": {
|
|
240
|
+
"codemirror_mode": {
|
|
241
|
+
"name": "ipython",
|
|
242
|
+
"version": 3
|
|
243
|
+
},
|
|
244
|
+
"file_extension": ".py",
|
|
245
|
+
"mimetype": "text/x-python",
|
|
246
|
+
"name": "python",
|
|
247
|
+
"nbconvert_exporter": "python",
|
|
248
|
+
"pygments_lexer": "ipython3",
|
|
249
|
+
"version": "3.12.8"
|
|
250
|
+
}
|
|
251
|
+
},
|
|
252
|
+
"nbformat": 4,
|
|
253
|
+
"nbformat_minor": 2
|
|
254
|
+
}
|