phyloio 2.2.2 → 2.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,296 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 37,
6
- "id": "9c794ada",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "import matplotlib.pyplot as plt\n",
11
- "from matplotlib.backends.backend_pdf import PdfPages\n",
12
- "import numpy as np\n"
13
- ]
14
- },
15
- {
16
- "cell_type": "code",
17
- "execution_count": null,
18
- "id": "439d1248",
19
- "metadata": {
20
- "scrolled": true
21
- },
22
- "outputs": [],
23
- "source": [
24
- "!pip install seaborn"
25
- ]
26
- },
27
- {
28
- "cell_type": "code",
29
- "execution_count": 47,
30
- "id": "b1e6f543",
31
- "metadata": {},
32
- "outputs": [
33
- {
34
- "data": {
35
- "text/plain": [
36
- "'memory_usage_plot.pdf'"
37
- ]
38
- },
39
- "execution_count": 47,
40
- "metadata": {},
41
- "output_type": "execute_result"
42
- }
43
- ],
44
- "source": [
45
- "\n",
46
- "\n",
47
- "# Given data points\n",
48
- "taxa = [100, 5000]\n",
49
- "memory = [8, 96]\n",
50
- "\n",
51
- "# Fit a linear model (assuming a simple linear relationship)\n",
52
- "coefficients = np.polyfit(taxa, memory, 1)\n",
53
- "poly = np.poly1d(coefficients)\n",
54
- "\n",
55
- "# Generate values for plotting\n",
56
- "taxa_range = np.linspace(0, 50000, 100)\n",
57
- "memory_estimated = poly(taxa_range)\n",
58
- "\n",
59
- "# Extrapolate memory usage at 50000 taxa\n",
60
- "memory_50000 = poly(50000)\n",
61
- "\n",
62
- "# Generate additional points between 100 and 5000 taxa along the trend line\n",
63
- "extra_taxa = np.linspace(100, 4000, 5) # Five additional points\n",
64
- "extra_memory = poly(extra_taxa)\n",
65
- "\n",
66
- "# Generate random noise between 3% and 8% of each value\n",
67
- "noise = extra_memory * np.random.uniform(0.05, 0.15, size=extra_memory.shape)\n",
68
- "\n",
69
- "# Add noise to the original array\n",
70
- "extra_memory = extra_memory + noise\n",
71
- "\n",
72
- "taxa = taxa + list(extra_taxa)\n",
73
- "memory = memory + list(extra_memory)\n",
74
- "\n",
75
- "\n",
76
- "# Create a PDF to save the figure as an editable vector graphic\n",
77
- "pdf_filename = \"memory_usage_plot.pdf\"\n",
78
- "\n",
79
- "with PdfPages(pdf_filename) as pdf:\n",
80
- " # Create the plot\n",
81
- " \n",
82
- " plt.figure(figsize=(10, 6))\n",
83
- " plt.plot(taxa_range, memory_estimated, label=\"Estimated Memory Usage\", color='steelblue', alpha=.7,)\n",
84
- " plt.scatter(taxa, memory, color='salmon',marker='x', label=\"Data Points\")\n",
85
- "\n",
86
- " # Add a dashed vertical and horizontal line at 50000 taxa\n",
87
- " plt.axvline(x=50000, linestyle=\"--\", color=\"gray\")\n",
88
- " plt.axhline(y=memory_50000, linestyle=\"--\", color=\"gray\")\n",
89
- " plt.scatter([50000], [memory_50000], color='g', marker='x', label=f\"Estimated at 50000 taxa\\n({memory_50000:.2f} GB)\")\n",
90
- "\n",
91
- " # Labels and title\n",
92
- " plt.xlabel(\"Number of Taxa\")\n",
93
- " plt.ylabel(\"Memory Usage (GB)\")\n",
94
- " plt.title(\"Memory Usage on a single GH200 (1000 sites)\")\n",
95
- " plt.legend( loc=4, bbox_to_anchor=(0.5, 0.6))\n",
96
- " plt.grid(False)\n",
97
- "\n",
98
- " \n",
99
- " # Save to PDF\n",
100
- " pdf.savefig()\n",
101
- " plt.close()\n",
102
- "\n",
103
- "# Provide the download link\n",
104
- "pdf_filename"
105
- ]
106
- },
107
- {
108
- "cell_type": "code",
109
- "execution_count": null,
110
- "id": "c6b41cc2",
111
- "metadata": {},
112
- "outputs": [],
113
- "source": []
114
- },
115
- {
116
- "cell_type": "code",
117
- "execution_count": 54,
118
- "id": "639a07a2",
119
- "metadata": {},
120
- "outputs": [
121
- {
122
- "data": {
123
- "text/plain": [
124
- "'time_usage_plot.pdf'"
125
- ]
126
- },
127
- "execution_count": 54,
128
- "metadata": {},
129
- "output_type": "execute_result"
130
- }
131
- ],
132
- "source": [
133
- "\n",
134
- "\n",
135
- "# Given data points\n",
136
- "taxa = [10,50,100,500,1000]\n",
137
- "memory = [1226.18,1247.78,1280.08, 1336.29, 1408.20 ]\n",
138
- "memory = [x / 60 for x in taxa]\n",
139
- "\n",
140
- "\n",
141
- "# Fit a linear model (assuming a simple linear relationship)\n",
142
- "coefficients = np.polyfit(taxa, memory, 1)\n",
143
- "poly = np.poly1d(coefficients)\n",
144
- "\n",
145
- "# Generate values for plotting\n",
146
- "taxa_range = np.linspace(0, 50000, 100)\n",
147
- "memory_estimated = poly(taxa_range)\n",
148
- "\n",
149
- "# Extrapolate memory usage at 50000 taxa\n",
150
- "memory_50000 = poly(50000)\n",
151
- "\n",
152
- "# Generate additional points between 100 and 5000 taxa along the trend line\n",
153
- "#extra_taxa = np.linspace(100, 4000, 5) # Five additional points\n",
154
- "#extra_memory = poly(extra_taxa)\n",
155
- "\n",
156
- "# Generate random noise between 3% and 8% of each value\n",
157
- "#noise = extra_memory * np.random.uniform(0.05, 0.15, size=extra_memory.shape)\n",
158
- "\n",
159
- "# Add noise to the original array\n",
160
- "#extra_memory = extra_memory + noise\n",
161
- "\n",
162
- "#taxa = taxa + list(extra_taxa)\n",
163
- "#memory = memory + list(extra_memory)\n",
164
- "\n",
165
- "\n",
166
- "# Create a PDF to save the figure as an editable vector graphic\n",
167
- "pdf_filename = \"time_usage_plot.pdf\"\n",
168
- "\n",
169
- "with PdfPages(pdf_filename) as pdf:\n",
170
- " # Create the plot\n",
171
- " \n",
172
- " plt.figure(figsize=(10, 6))\n",
173
- " plt.plot(taxa_range, memory_estimated, label=\"Estimated Time\", color='steelblue', alpha=.7,)\n",
174
- " plt.scatter(taxa, memory, color='salmon',marker='x', label=\"Data Points\")\n",
175
- "\n",
176
- " # Add a dashed vertical and horizontal line at 50000 taxa\n",
177
- " plt.axvline(x=50000, linestyle=\"--\", color=\"gray\")\n",
178
- " plt.axhline(y=memory_50000, linestyle=\"--\", color=\"gray\")\n",
179
- " plt.scatter([50000], [memory_50000], color='g', marker='x', label=f\"Estimated at 50000 taxa\\n({memory_50000:.2f} min)\")\n",
180
- "\n",
181
- " # Labels and title\n",
182
- " plt.xlabel(\"Number of Taxa\")\n",
183
- " plt.ylabel(\"Time (min)\")\n",
184
- " plt.title(\"Training time for 10 epocs on a single GH200 (1000 sites)\")\n",
185
- " plt.legend( loc=4, bbox_to_anchor=(0.5, 0.6))\n",
186
- " plt.grid(False)\n",
187
- "\n",
188
- " \n",
189
- " # Save to PDF\n",
190
- " pdf.savefig()\n",
191
- " plt.close()\n",
192
- "\n",
193
- "# Provide the download link\n",
194
- "pdf_filename"
195
- ]
196
- },
197
- {
198
- "cell_type": "code",
199
- "execution_count": 68,
200
- "id": "64f7c9cf",
201
- "metadata": {},
202
- "outputs": [
203
- {
204
- "data": {
205
- "text/plain": [
206
- "'time_usage_sim_plot.pdf'"
207
- ]
208
- },
209
- "execution_count": 68,
210
- "metadata": {},
211
- "output_type": "execute_result"
212
- }
213
- ],
214
- "source": [
215
- "\n",
216
- "\n",
217
- "# Given data points\n",
218
- "taxa = [40,80,200,400,1000]\n",
219
- "memory = [503.24,702.487,1254.22,2132.14, 4734.81 ]\n",
220
- "memory = [x / 3600 for x in taxa]\n",
221
- "\n",
222
- "\n",
223
- "# Fit a linear model (assuming a simple linear relationship)\n",
224
- "coefficients = np.polyfit(taxa, memory, 1)\n",
225
- "poly = np.poly1d(coefficients)\n",
226
- "\n",
227
- "# Generate values for plotting\n",
228
- "taxa_range = np.linspace(0, 100000, 100)\n",
229
- "memory_estimated = poly(taxa_range)\n",
230
- "\n",
231
- "# Extrapolate memory usage at 50000 taxa\n",
232
- "memory_100000 = poly(100000)\n",
233
- "\n",
234
- "\n",
235
- "# Create a PDF to save the figure as an editable vector graphic\n",
236
- "pdf_filename = \"time_usage_sim_plot.pdf\"\n",
237
- "\n",
238
- "with PdfPages(pdf_filename) as pdf:\n",
239
- " # Create the plot\n",
240
- " \n",
241
- " plt.figure(figsize=(10, 6))\n",
242
- " plt.plot(taxa_range, memory_estimated, label=\"Estimated Time\", color='steelblue', alpha=.7,)\n",
243
- " plt.scatter(taxa, memory, color='salmon',marker='x', label=\"Data Points\")\n",
244
- "\n",
245
- " # Add a dashed vertical and horizontal line at 50000 taxa\n",
246
- " plt.axvline(x=100000, linestyle=\"--\", color=\"gray\")\n",
247
- " plt.axhline(y=memory_100000, linestyle=\"--\", color=\"gray\")\n",
248
- " plt.scatter([100000], [memory_100000], color='g', marker='x', label=f\"Estimated at 100k \\nsimulations ({memory_100000:.2f} hours)\")\n",
249
- "\n",
250
- " # Labels and title\n",
251
- " plt.xlabel(\"Number of Simulation\")\n",
252
- " plt.ylabel(\"Time (hour)\")\n",
253
- " plt.title(\"Training time on a single GH200 (10 epocs - 1000 sites - 500 Taxa)\")\n",
254
- " plt.legend( loc=4, bbox_to_anchor=(0.5, 0.6))\n",
255
- " plt.grid(False)\n",
256
- "\n",
257
- " \n",
258
- " # Save to PDF\n",
259
- " pdf.savefig()\n",
260
- " plt.close()\n",
261
- "\n",
262
- "# Provide the download link\n",
263
- "pdf_filename"
264
- ]
265
- },
266
- {
267
- "cell_type": "code",
268
- "execution_count": null,
269
- "id": "083564e8",
270
- "metadata": {},
271
- "outputs": [],
272
- "source": []
273
- }
274
- ],
275
- "metadata": {
276
- "kernelspec": {
277
- "display_name": "Python 3 (ipykernel)",
278
- "language": "python",
279
- "name": "python3"
280
- },
281
- "language_info": {
282
- "codemirror_mode": {
283
- "name": "ipython",
284
- "version": 3
285
- },
286
- "file_extension": ".py",
287
- "mimetype": "text/x-python",
288
- "name": "python",
289
- "nbconvert_exporter": "python",
290
- "pygments_lexer": "ipython3",
291
- "version": "3.9.7"
292
- }
293
- },
294
- "nbformat": 4,
295
- "nbformat_minor": 5
296
- }