deepboard 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepboard/__init__.py +1 -0
- deepboard/__version__.py +4 -0
- deepboard/gui/THEME.yml +28 -0
- deepboard/gui/__init__.py +0 -0
- deepboard/gui/assets/artefacts.css +108 -0
- deepboard/gui/assets/base.css +208 -0
- deepboard/gui/assets/base.js +77 -0
- deepboard/gui/assets/charts.css +188 -0
- deepboard/gui/assets/compare.css +90 -0
- deepboard/gui/assets/datagrid.css +120 -0
- deepboard/gui/assets/fileview.css +13 -0
- deepboard/gui/assets/right_panel.css +227 -0
- deepboard/gui/assets/theme.css +85 -0
- deepboard/gui/components/__init__.py +8 -0
- deepboard/gui/components/artefact_group.py +12 -0
- deepboard/gui/components/chart_type.py +22 -0
- deepboard/gui/components/legend.py +34 -0
- deepboard/gui/components/log_selector.py +22 -0
- deepboard/gui/components/modal.py +20 -0
- deepboard/gui/components/smoother.py +21 -0
- deepboard/gui/components/split_selector.py +21 -0
- deepboard/gui/components/stat_line.py +8 -0
- deepboard/gui/entry.py +21 -0
- deepboard/gui/main.py +93 -0
- deepboard/gui/pages/__init__.py +1 -0
- deepboard/gui/pages/compare_page/__init__.py +6 -0
- deepboard/gui/pages/compare_page/compare_page.py +22 -0
- deepboard/gui/pages/compare_page/components/__init__.py +4 -0
- deepboard/gui/pages/compare_page/components/card_list.py +19 -0
- deepboard/gui/pages/compare_page/components/chart.py +54 -0
- deepboard/gui/pages/compare_page/components/compare_setup.py +30 -0
- deepboard/gui/pages/compare_page/components/split_card.py +51 -0
- deepboard/gui/pages/compare_page/components/utils.py +20 -0
- deepboard/gui/pages/compare_page/routes.py +58 -0
- deepboard/gui/pages/main_page/__init__.py +4 -0
- deepboard/gui/pages/main_page/datagrid/__init__.py +5 -0
- deepboard/gui/pages/main_page/datagrid/compare_button.py +21 -0
- deepboard/gui/pages/main_page/datagrid/datagrid.py +67 -0
- deepboard/gui/pages/main_page/datagrid/handlers.py +54 -0
- deepboard/gui/pages/main_page/datagrid/header.py +43 -0
- deepboard/gui/pages/main_page/datagrid/routes.py +112 -0
- deepboard/gui/pages/main_page/datagrid/row.py +20 -0
- deepboard/gui/pages/main_page/datagrid/sortable_column_js.py +45 -0
- deepboard/gui/pages/main_page/datagrid/utils.py +9 -0
- deepboard/gui/pages/main_page/handlers.py +16 -0
- deepboard/gui/pages/main_page/main_page.py +21 -0
- deepboard/gui/pages/main_page/right_panel/__init__.py +12 -0
- deepboard/gui/pages/main_page/right_panel/config.py +57 -0
- deepboard/gui/pages/main_page/right_panel/fragments.py +133 -0
- deepboard/gui/pages/main_page/right_panel/hparams.py +25 -0
- deepboard/gui/pages/main_page/right_panel/images.py +358 -0
- deepboard/gui/pages/main_page/right_panel/run_info.py +86 -0
- deepboard/gui/pages/main_page/right_panel/scalars.py +251 -0
- deepboard/gui/pages/main_page/right_panel/template.py +151 -0
- deepboard/gui/pages/main_page/routes.py +25 -0
- deepboard/gui/pages/not_found.py +3 -0
- deepboard/gui/requirements.txt +5 -0
- deepboard/gui/utils.py +267 -0
- deepboard/resultTable/__init__.py +2 -0
- deepboard/resultTable/cursor.py +20 -0
- deepboard/resultTable/logwritter.py +667 -0
- deepboard/resultTable/resultTable.py +529 -0
- deepboard/resultTable/scalar.py +29 -0
- deepboard/resultTable/table_schema.py +135 -0
- deepboard/resultTable/utils.py +50 -0
- deepboard-0.2.0.dist-info/METADATA +164 -0
- deepboard-0.2.0.dist-info/RECORD +69 -0
- deepboard-0.2.0.dist-info/WHEEL +4 -0
- deepboard-0.2.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,529 @@
|
|
1
|
+
import sys
|
2
|
+
from typing import *
|
3
|
+
from enum import Enum
|
4
|
+
from pathlib import PurePath
|
5
|
+
import os
|
6
|
+
from glob import glob
|
7
|
+
from datetime import datetime
|
8
|
+
import warnings
|
9
|
+
import shutil
|
10
|
+
import sqlite3
|
11
|
+
import hashlib
|
12
|
+
import pandas as pd
|
13
|
+
import shlex
|
14
|
+
from PIL import Image
|
15
|
+
from io import BytesIO
|
16
|
+
|
17
|
+
from .logwritter import LogWriter
|
18
|
+
from .cursor import Cursor
|
19
|
+
from .utils import get_last_commit, get_diff
|
20
|
+
from .table_schema import create_database
|
21
|
+
|
22
|
+
class NoCommitAction(Enum):
|
23
|
+
"""
|
24
|
+
How to notify the user when there are changes that are not committed and a new run is started not in DEBUG mode
|
25
|
+
- `NOP`: No action
|
26
|
+
- `WARN`: Show a warning
|
27
|
+
- `RAISE`: Raise an exception
|
28
|
+
"""
|
29
|
+
NOP = "NOP"
|
30
|
+
WARN = "WARN"
|
31
|
+
RAISE = "RAISE"
|
32
|
+
|
33
|
+
|
34
|
+
class ResultTable:
|
35
|
+
"""
|
36
|
+
This class represents all the results. There are a lot of method to interact with the resultTable (the database).
|
37
|
+
|
38
|
+
All actions performed by the GUI (DeepBoard) are available by public methods to get a programmatic access.
|
39
|
+
|
40
|
+
How to use:
|
41
|
+
|
42
|
+
- First, specify a path the result table. If the db was not created, it will be created automatically.
|
43
|
+
|
44
|
+
- Then, create a run with all the specific parameters describing the run. A unique run_id will be generated.
|
45
|
+
Note that each run must be unique. This security allows more reproducible runs. If one run perform better than
|
46
|
+
the others, you can run the code again with all the parameters in the result table and you should get the same
|
47
|
+
results.
|
48
|
+
|
49
|
+
- If you simply want to test your code, you can create a debug run. It won't create a permanent entry in the
|
50
|
+
table. You will still be able to see the logged scalars in the GUI with the run_id -1 that is reserved for
|
51
|
+
debug runs. This run will be overwritten by the next debug run.
|
52
|
+
|
53
|
+
- Finally, you can interact with the table with the different available methods.
|
54
|
+
"""
|
55
|
+
def __init__(self, db_path: str = "results/result_table.db", nocommit_action: NoCommitAction = NoCommitAction.WARN):
|
56
|
+
"""
|
57
|
+
:param db_path: The path to the databse file
|
58
|
+
:param nocommit_action: What to do if changes are not committed
|
59
|
+
"""
|
60
|
+
if not os.path.exists(db_path):
|
61
|
+
self._create_database(db_path)
|
62
|
+
db_path = PurePath(db_path) if not isinstance(db_path, PurePath) else db_path
|
63
|
+
|
64
|
+
# The configuration files will be back up there
|
65
|
+
self.configs_path = db_path.parent / "configs"
|
66
|
+
if not os.path.exists(self.configs_path):
|
67
|
+
os.mkdir(self.configs_path)
|
68
|
+
|
69
|
+
self.db_path = db_path
|
70
|
+
self.nocommit_action = nocommit_action
|
71
|
+
|
72
|
+
def new_run(self, experiment_name: str,
|
73
|
+
config_path: Union[str, PurePath],
|
74
|
+
cli: dict,
|
75
|
+
comment: Optional[str] = None,
|
76
|
+
flush_each: int = 10,
|
77
|
+
keep_each: int = 1,
|
78
|
+
auto_log_plt: bool = True,
|
79
|
+
disable: bool = False
|
80
|
+
) -> LogWriter:
|
81
|
+
"""
|
82
|
+
Create a new logwritter object bound to a run entry in the table. Think of it as a socket.
|
83
|
+
:param experiment_name: The name of the current experiment
|
84
|
+
:param config_path: The path to the configuration path
|
85
|
+
:param cli: The cli arguments
|
86
|
+
:param comment: The comment, if any
|
87
|
+
:param flush_each: Every how many logs does the logger save them to the database?
|
88
|
+
:param keep_each: If the training has a lot of steps, it might be preferable to not log every step to save space and speed up the process. This parameter controls every how many step we store the log. 1 means we save at every steps. 10 would mean that we drop 9 steps to save 1.
|
89
|
+
:param auto_log_plt: If True, automatically detect if matplotlib figures were generated and log them. Note that it checks only when a method on the socket is called. It is better to log them manually because you can set the appropriate step, epoch and split.
|
90
|
+
:param disable: If true, disable the logwriter, meaning that nothing will be written to the database.
|
91
|
+
:return: The log writer
|
92
|
+
"""
|
93
|
+
diff = get_diff()
|
94
|
+
if diff is not None and len(diff) > 0:
|
95
|
+
if self.nocommit_action == NoCommitAction.RAISE:
|
96
|
+
raise RuntimeError("You have uncommitted changes. Please commit your changes before running the experiment in prod mode.")
|
97
|
+
elif self.nocommit_action == NoCommitAction.WARN:
|
98
|
+
warnings.warn("You have uncommitted changes. Please commit your changes before running the experiment in prod mode.", RuntimeWarning)
|
99
|
+
|
100
|
+
commit = get_last_commit()
|
101
|
+
start = datetime.now()
|
102
|
+
config_str = str(config_path)
|
103
|
+
config_hash = self.get_file_hash(config_path)
|
104
|
+
comment = "" if comment is None else comment
|
105
|
+
cli = " ".join([f'{key}={value}' for key, value in cli.items()])
|
106
|
+
command = " ".join(shlex.quote(arg) for arg in sys.argv)
|
107
|
+
if not disable:
|
108
|
+
with self.cursor as cursor:
|
109
|
+
# Step 1: Check if the configuration already exists
|
110
|
+
cursor.execute("""
|
111
|
+
SELECT * FROM Experiments
|
112
|
+
WHERE experiment = ?
|
113
|
+
AND config = ?
|
114
|
+
AND config_hash = ?
|
115
|
+
AND cli = ?
|
116
|
+
AND comment = ?
|
117
|
+
""", (experiment_name, config_str, config_hash, cli, comment))
|
118
|
+
result = cursor.fetchall()
|
119
|
+
if result is not None:
|
120
|
+
status = [res[7] for res in result]
|
121
|
+
run_id = [res[0] for res in result]
|
122
|
+
|
123
|
+
# We ignore debug runs
|
124
|
+
status = [status for i, status in enumerate(status) if run_id[i] != -1]
|
125
|
+
run_id = [runID for i, runID in enumerate(run_id) if runID != -1]
|
126
|
+
if len(status) == 0:
|
127
|
+
# If here, only a debug run exists. So we need to create a new one
|
128
|
+
run_id = None
|
129
|
+
status = None
|
130
|
+
else:
|
131
|
+
# If here, the run does exist. So we will not create a new one
|
132
|
+
run_id = run_id[0]
|
133
|
+
status = status[0]
|
134
|
+
|
135
|
+
if status is not None and status != "failed":
|
136
|
+
# If here, the run does exist and is not failed. So we will not create a new one
|
137
|
+
raise RuntimeError(f"Configuration has already been run with runID {run_id}. Consider changing "
|
138
|
+
f"parameter to avoid duplicate runs or add a comment.")
|
139
|
+
elif run_id is not None and status == "failed":
|
140
|
+
# If here, the run does exist, but failed. So we will retry it
|
141
|
+
self._create_run_with_id(run_id, experiment_name, config_str, config_hash, cli, command, comment, start, commit, diff)
|
142
|
+
|
143
|
+
elif run_id is None:
|
144
|
+
# Only a debug run exists. So we need to create a new one
|
145
|
+
run_id = self._create_run(experiment_name, config_str, config_hash, cli, command, comment, start, commit, diff)
|
146
|
+
|
147
|
+
else:
|
148
|
+
run_id = self._create_run(experiment_name, config_str, config_hash, cli, command, comment, start, commit, diff)
|
149
|
+
|
150
|
+
if not isinstance(config_path, PurePath):
|
151
|
+
config_path = PurePath(config_path)
|
152
|
+
config_name = config_path.name
|
153
|
+
else:
|
154
|
+
run_id = -2 # If disabled and not debug, we use -2 to indicate that it is a disabled run
|
155
|
+
if not isinstance(config_path, PurePath):
|
156
|
+
config_path = PurePath(config_path)
|
157
|
+
config_name = config_path.name
|
158
|
+
|
159
|
+
extension = config_name.split(".")[-1]
|
160
|
+
shutil.copy(config_path, self.configs_path / f'{run_id}.{extension}')
|
161
|
+
|
162
|
+
return LogWriter(self.db_path, run_id, datetime.now(), flush_each=flush_each, keep_each=keep_each,
|
163
|
+
disable=disable, auto_log_plt=auto_log_plt)
|
164
|
+
|
165
|
+
def new_debug_run(self, experiment_name: str,
|
166
|
+
config_path: Union[str, PurePath],
|
167
|
+
cli: dict,
|
168
|
+
comment: Optional[str] = None,
|
169
|
+
flush_each: int = 10,
|
170
|
+
keep_each: int = 1,
|
171
|
+
auto_log_plt: bool = True,
|
172
|
+
disable: bool = False
|
173
|
+
) -> LogWriter:
|
174
|
+
"""
|
175
|
+
Create a new DEBUG socket to log the results. The results will be entered in the result table, but as the runID -1.
|
176
|
+
This means that everytime you run the same code, it will overwrite the previous one. This is useful to avoid
|
177
|
+
adding too many rows to the table when testing the code or debugging.
|
178
|
+
|
179
|
+
Note:
|
180
|
+
It will not log the git diff or git hash
|
181
|
+
:param experiment_name: The name of the current experiment
|
182
|
+
:param config_path: The path to the configuration path
|
183
|
+
:param cli: The cli arguments
|
184
|
+
:param comment: The comment, if any
|
185
|
+
:param flush_each: Every how many logs does the logger save them to the database?
|
186
|
+
:param keep_each: If the training has a lot of steps, it might be preferable to not log every step to save space and speed up the process. This parameter controls every how many step we store the log. 1 means we save at every steps. 10 would mean that we drop 9 steps to save 1.
|
187
|
+
:param auto_log_plt: If True, automatically detect if matplotlib figures were generated and log them. Note that it checks only when a method on the socket is called. It is better to log them manually because you can set the appropriate step, epoch and split.
|
188
|
+
:param disable: If true, disable the logwriter, meaning that nothing will be written to the database.
|
189
|
+
:return: The log writer
|
190
|
+
"""
|
191
|
+
|
192
|
+
start = datetime.now()
|
193
|
+
config_str = str(config_path)
|
194
|
+
config_hash = self.get_file_hash(config_path)
|
195
|
+
comment = "" if comment is None else comment
|
196
|
+
cli = " ".join([f'{key}={value}' for key, value in cli.items()])
|
197
|
+
command = " ".join(shlex.quote(arg) for arg in sys.argv)
|
198
|
+
if not disable:
|
199
|
+
self._create_run_with_id(-1, experiment_name, config_str, config_hash, cli, command, comment, start, None, None)
|
200
|
+
|
201
|
+
if not isinstance(config_path, PurePath):
|
202
|
+
config_path = PurePath(config_path)
|
203
|
+
config_name = config_path.name
|
204
|
+
|
205
|
+
extension = config_name.split(".")[-1]
|
206
|
+
shutil.copy(config_path, self.configs_path / f'{-1}.{extension}')
|
207
|
+
|
208
|
+
return LogWriter(self.db_path, -1, datetime.now(), flush_each=flush_each, keep_each=keep_each, disable=disable,
|
209
|
+
auto_log_plt=auto_log_plt)
|
210
|
+
|
211
|
+
def load_config(self, run_id: int) -> str:
|
212
|
+
"""
|
213
|
+
Load the configuration file of a given run id
|
214
|
+
:param run_id: The run id
|
215
|
+
:return: The path to the configuration file
|
216
|
+
"""
|
217
|
+
valid_files = glob(str(self.configs_path / f"{run_id}.*"))
|
218
|
+
if len(valid_files) > 1:
|
219
|
+
print(f"Warning: More than one configuration file found for run {run_id}. ")
|
220
|
+
with open(valid_files[0], 'r') as f:
|
221
|
+
content = f.read()
|
222
|
+
return content
|
223
|
+
|
224
|
+
def load_run(self, run_id) -> LogWriter:
|
225
|
+
"""
|
226
|
+
Load a specific run's LogWriter in read-only mode.
|
227
|
+
:param run_id: The run id
|
228
|
+
:return: The logWriter bound to the run
|
229
|
+
"""
|
230
|
+
logwriter = LogWriter(self.db_path, run_id, datetime.now())
|
231
|
+
logwriter.enabled = False # We cannot log with a used writer
|
232
|
+
return logwriter
|
233
|
+
|
234
|
+
def hide_run(self, run_id: int):
|
235
|
+
"""
|
236
|
+
Instead of deleting runs and lose information, you can hide it. It will not be visible in the default view of
|
237
|
+
the result Table, however, it can be unhidden if it was a mistake.
|
238
|
+
:param run_id: The run id to hide
|
239
|
+
:return: None
|
240
|
+
"""
|
241
|
+
with self.cursor as cursor:
|
242
|
+
cursor.execute("UPDATE Experiments SET hidden=1 WHERE run_id=?", (run_id,))
|
243
|
+
|
244
|
+
def show_run(self, run_id: int):
|
245
|
+
"""
|
246
|
+
This method unhide a run that has been hidden. It undo the operation performed by `hide_run`.
|
247
|
+
:param run_id: The run id to show
|
248
|
+
:return:
|
249
|
+
"""
|
250
|
+
with self.cursor as cursor:
|
251
|
+
cursor.execute("UPDATE Experiments SET hidden=0 WHERE run_id=?", (run_id,))
|
252
|
+
|
253
|
+
def get_hidden_runs(self) -> List[int]:
|
254
|
+
"""
|
255
|
+
Get the list of all hidden run ids.
|
256
|
+
:return: A list of run ids associated to hidden runs.
|
257
|
+
"""
|
258
|
+
with self.cursor as cursor:
|
259
|
+
cursor.execute("SELECT run_id FROM Experiments WHERE hidden>0")
|
260
|
+
runs = cursor.fetchall()
|
261
|
+
return [r[0] for r in runs]
|
262
|
+
|
263
|
+
def fetch_experiment(self, run_id: int) -> Dict[str, Any]:
|
264
|
+
"""
|
265
|
+
Load the row of an experiment. It will return a dictionary with the keys being the column names and the values
|
266
|
+
the actual values. Note that this does not perform any other operations than fetch in the database. This means
|
267
|
+
that it will also show columns that were hidden.
|
268
|
+
:param run_id: The run id to fetch
|
269
|
+
:return: The raw row of an experiment
|
270
|
+
"""
|
271
|
+
with self.dict_cursor as cursor:
|
272
|
+
cursor.execute("SELECT * FROM Experiments WHERE run_id=?", (run_id,))
|
273
|
+
row = cursor.fetchone()
|
274
|
+
return row
|
275
|
+
|
276
|
+
def set_column_order(self, columns: Dict[str, Optional[int]]):
|
277
|
+
"""
|
278
|
+
Set the order of the column in the result table. If order is None, it will be set to NULL
|
279
|
+
:param columns: A dict of column name and their order. The order is the index of the column in the table. If the order is None, it will be set to NULL and be hidden
|
280
|
+
:return: None
|
281
|
+
"""
|
282
|
+
with self.cursor as cursor:
|
283
|
+
# Batch update
|
284
|
+
for column, order in columns.items():
|
285
|
+
cursor.execute("UPDATE ResultDisplay SET display_order=? WHERE Name=?", (order, column))
|
286
|
+
|
287
|
+
def set_column_alias(self, columns: Dict[str, str]):
|
288
|
+
"""
|
289
|
+
Set the alias of the column in the result table.
|
290
|
+
:param columns: A dict of column name and their alias. The alias is the name displayed in the table.
|
291
|
+
:return: None
|
292
|
+
"""
|
293
|
+
with self.cursor as cursor:
|
294
|
+
# Batch update
|
295
|
+
for column, alias in columns.items():
|
296
|
+
cursor.execute("UPDATE ResultDisplay SET alias=? WHERE Name=?", (alias, column))
|
297
|
+
|
298
|
+
def hide_column(self, column: str):
|
299
|
+
"""
|
300
|
+
Hide a column in the result table.
|
301
|
+
:param column: The column name to hide.
|
302
|
+
:return: None
|
303
|
+
"""
|
304
|
+
with self.cursor as cursor:
|
305
|
+
cursor.execute("UPDATE ResultDisplay SET display_order=NULL WHERE Name=?", (column,))
|
306
|
+
# Change the order of every other columns such that they are continuous
|
307
|
+
cursor.execute("""
|
308
|
+
UPDATE ResultDisplay
|
309
|
+
SET display_order=(
|
310
|
+
SELECT COUNT(*) FROM ResultDisplay AS R2
|
311
|
+
WHERE R2.display_order < ResultDisplay.display_order
|
312
|
+
) + 1
|
313
|
+
WHERE display_order IS NOT NULL;""", )
|
314
|
+
|
315
|
+
def show_column(self, column: str, order: int = -1):
|
316
|
+
"""
|
317
|
+
Show a column in the result table.
|
318
|
+
If order is -1, it will be set to the last column.
|
319
|
+
"""
|
320
|
+
# If the column is already at this order, do nothing
|
321
|
+
current = {col_id: order for col_id, (order, alias) in self.result_columns.items()}[column]
|
322
|
+
if current == order:
|
323
|
+
return
|
324
|
+
with self.cursor as cursor:
|
325
|
+
# Get the max order
|
326
|
+
cursor.execute("SELECT MAX(display_order) FROM ResultDisplay")
|
327
|
+
max_order = cursor.fetchone()[0]
|
328
|
+
if max_order is None:
|
329
|
+
max_order = 0
|
330
|
+
else:
|
331
|
+
max_order += 1
|
332
|
+
|
333
|
+
if order == -1:
|
334
|
+
order = max_order
|
335
|
+
|
336
|
+
# Update all display orders
|
337
|
+
cursor.execute("""
|
338
|
+
UPDATE ResultDisplay
|
339
|
+
SET display_order = display_order + 1
|
340
|
+
WHERE display_order >= ?
|
341
|
+
""", (order,))
|
342
|
+
# print(self.result_columns)
|
343
|
+
# Insert the column
|
344
|
+
cursor.execute("UPDATE ResultDisplay SET display_order=? WHERE Name=?", (order, column))
|
345
|
+
|
346
|
+
def get_results(self, run_id: Optional[int] = None, show_hidden: bool = False) -> Tuple[List[str], List[str], List[List[Any]]]:
|
347
|
+
"""
|
348
|
+
This function will build the result table and return it as a list. It will also return the column names and
|
349
|
+
their unique id. It will not return the columns that were hidden and will format the table to respect the
|
350
|
+
column order. By default, it does not include hidden runs, but they can be included by setting the show_hidden.
|
351
|
+
You can also get a single row by passing a run_id to the method.
|
352
|
+
:param run_id: the run id. If none is specified, it fetches all results
|
353
|
+
:param show_hidden: Show hidden runs.
|
354
|
+
:return: A list of columns names, a list of column ids and a list of rows
|
355
|
+
"""
|
356
|
+
out = {}
|
357
|
+
exp_info = {}
|
358
|
+
with self.cursor as cursor:
|
359
|
+
command = "SELECT E.run_id, E.experiment, E.config, E.config_hash, E.cli, E.command, E.comment, E.start, E.status, E.commit_hash, E.diff, E.hidden, R.metric, R.value " \
|
360
|
+
"FROM Experiments E LEFT JOIN Results R ON E.run_id = R.run_id"
|
361
|
+
params = []
|
362
|
+
if run_id is not None:
|
363
|
+
command += " WHERE E.run_id = ?"
|
364
|
+
params.append(run_id)
|
365
|
+
if not show_hidden:
|
366
|
+
command += " WHERE E.hidden = 0"
|
367
|
+
|
368
|
+
cursor.execute(command, params)
|
369
|
+
rows = cursor.fetchall()
|
370
|
+
|
371
|
+
for row in rows:
|
372
|
+
run_id, metric, value = row[0], row[-2], row[-1]
|
373
|
+
if run_id not in out: # Run id already stored
|
374
|
+
out[run_id] = {}
|
375
|
+
exp_info[run_id] = dict(
|
376
|
+
run_id=run_id,
|
377
|
+
experiment=row[1],
|
378
|
+
config=row[2],
|
379
|
+
config_hash=row[3],
|
380
|
+
cli=row[4],
|
381
|
+
command=row[5],
|
382
|
+
comment=row[6],
|
383
|
+
start=datetime.fromisoformat(row[7]),
|
384
|
+
status=row[8],
|
385
|
+
commit_hash=row[9],
|
386
|
+
diff=row[10],
|
387
|
+
hidden=row[11]
|
388
|
+
)
|
389
|
+
out[run_id][metric] = value
|
390
|
+
|
391
|
+
# Merge them together:
|
392
|
+
for run_id, metrics in out.items():
|
393
|
+
exp_info[run_id].update(metrics)
|
394
|
+
|
395
|
+
# Sort the columns of the result table
|
396
|
+
columns = [(col_id, col_order, col_alias) for col_id, (col_order, col_alias) in self.result_columns.items() if
|
397
|
+
col_order is not None]
|
398
|
+
columns.sort(key=lambda x: x[1])
|
399
|
+
|
400
|
+
table = [[row.get(col[0]) for col in columns] for key, row in exp_info.items()]
|
401
|
+
return [col[2] for col in columns], [col[0] for col in columns], table
|
402
|
+
|
403
|
+
def get_image_by_id(self, image_id: int) -> Optional[Image.Image]:
|
404
|
+
"""
|
405
|
+
If the image_id is valid, it will return the image as a PIL Image object.
|
406
|
+
:param image_id: The id of the image to fetch.
|
407
|
+
:return: The image as a PIL Image object or None if the image_id is not valid.
|
408
|
+
"""
|
409
|
+
with self.cursor as cursor:
|
410
|
+
cursor.execute("SELECT image FROM Images WHERE id_=?", (image_id,))
|
411
|
+
row = cursor.fetchone()
|
412
|
+
if row is None:
|
413
|
+
return None
|
414
|
+
|
415
|
+
# get raw bytes
|
416
|
+
image_data = row[0]
|
417
|
+
|
418
|
+
# Convert bytes to PIL Image
|
419
|
+
image = Image.open(BytesIO(image_data))
|
420
|
+
return image
|
421
|
+
|
422
|
+
def to_pd(self, get_hidden: bool = False) -> pd.DataFrame:
|
423
|
+
"""
|
424
|
+
Export the table to a pandas dataframe.
|
425
|
+
:param get_hidden: If True, it will include the hidden runs.
|
426
|
+
:return: The table as a pandas dataframe.
|
427
|
+
"""
|
428
|
+
|
429
|
+
columns, col_ids, data = self.get_results(show_hidden=get_hidden)
|
430
|
+
df = pd.DataFrame(data, columns=columns)
|
431
|
+
if "run_id" in col_ids:
|
432
|
+
idx = col_ids.index("run_id")
|
433
|
+
colname = columns[idx]
|
434
|
+
df.set_index(colname, inplace=True)
|
435
|
+
return df
|
436
|
+
|
437
|
+
@property
|
438
|
+
def runs(self) -> List[int]:
|
439
|
+
"""
|
440
|
+
Get all the runs in the result table. It will return a list of run ids.
|
441
|
+
:return: A list of run ids.
|
442
|
+
"""
|
443
|
+
with self.cursor as cursor:
|
444
|
+
cursor.execute("SELECT run_id FROM Experiments")
|
445
|
+
rows = cursor.fetchall()
|
446
|
+
return [row[0] for row in rows]
|
447
|
+
|
448
|
+
@property
|
449
|
+
def result_columns(self) -> Dict[str, Tuple[Optional[int], str]]:
|
450
|
+
"""
|
451
|
+
Get all the columns in the result table that can be shown. It will return a dictionary where the keys are the
|
452
|
+
columns ids and the values a tuple containing the column position (order) and its name (alias).
|
453
|
+
{col_id: (order, alias), ...}
|
454
|
+
:return: The available columns.
|
455
|
+
"""
|
456
|
+
with self.cursor as cursor:
|
457
|
+
cursor.execute("SELECT Name, display_order, alias FROM ResultDisplay")
|
458
|
+
rows = cursor.fetchall()
|
459
|
+
return {row[0]: (row[1], row[2]) for row in rows}
|
460
|
+
|
461
|
+
@property
|
462
|
+
def cursor(self):
|
463
|
+
"""
|
464
|
+
Get access to the cursor to interact with the db. It is usable in a with statement.
|
465
|
+
"""
|
466
|
+
return Cursor(self.db_path)
|
467
|
+
|
468
|
+
@property
|
469
|
+
def dict_cursor(self):
|
470
|
+
"""
|
471
|
+
Get access to the cursor to interact with the db, but it returns the data as a dict. It is usable in a with
|
472
|
+
statement.
|
473
|
+
"""
|
474
|
+
return Cursor(self.db_path, format_as_dict=True)
|
475
|
+
|
476
|
+
|
477
|
+
def _create_run_with_id(self, run_id: int, experiment_name: str, config_str: str, config_hash: str, cli: str, command: str,
|
478
|
+
comment: str, start: datetime, commit: Optional[str], diff: Optional[str]):
|
479
|
+
self._delete_run(run_id)
|
480
|
+
|
481
|
+
with self.cursor as cursor:
|
482
|
+
cursor.execute("""
|
483
|
+
INSERT INTO Experiments (run_id, experiment, config, config_hash, cli, command, comment, start, commit_hash, diff)
|
484
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
|
485
|
+
""",
|
486
|
+
(run_id, experiment_name, config_str, config_hash, cli, command, comment, start, commit, diff))
|
487
|
+
|
488
|
+
def _create_run(self, experiment_name: str, config_str: str, config_hash: str, cli: str, command: str,
|
489
|
+
comment: str, start: datetime, commit: str, diff: str):
|
490
|
+
|
491
|
+
with self.cursor as cursor:
|
492
|
+
cursor.execute("""
|
493
|
+
INSERT INTO Experiments (experiment, config, config_hash, cli, command, comment, start, commit_hash, diff)
|
494
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);
|
495
|
+
""",
|
496
|
+
(experiment_name, config_str, config_hash, cli, command, comment, start, commit, diff))
|
497
|
+
return cursor.lastrowid
|
498
|
+
|
499
|
+
def _delete_run(self, run_id: int):
|
500
|
+
"""
|
501
|
+
Delete the run with the given run_id from the database.
|
502
|
+
|
503
|
+
IMPORTANT: You should not call this method directly.
|
504
|
+
The result Table is supposed to be immutable.
|
505
|
+
"""
|
506
|
+
with self.cursor as cursor:
|
507
|
+
# Delete the failed run and replace it with the new one
|
508
|
+
cursor.execute("DELETE FROM Experiments WHERE run_id=?", (run_id,))
|
509
|
+
# Delete logs
|
510
|
+
cursor.execute("DELETE FROM Logs WHERE run_id=?", (run_id,))
|
511
|
+
cursor.execute("DELETE FROM Images WHERE run_id=?", (run_id,))
|
512
|
+
cursor.execute("DELETE FROM Fragments WHERE run_id=?", (run_id,))
|
513
|
+
# Delete results
|
514
|
+
cursor.execute("DELETE FROM Results WHERE run_id=?", (run_id,))
|
515
|
+
|
516
|
+
@staticmethod
|
517
|
+
def get_file_hash(file_path: str, hash_algo: str = 'sha256') -> str:
|
518
|
+
"""Returns the hash of the file at file_path using the specified hashing algorithm."""
|
519
|
+
hash_func = hashlib.new(hash_algo) # Create a new hash object for the specified algorithm
|
520
|
+
|
521
|
+
with open(file_path, 'rb') as file:
|
522
|
+
while chunk := file.read(8192): # Read the file in chunks to avoid memory overflow
|
523
|
+
hash_func.update(chunk) # Update the hash with the current chunk
|
524
|
+
|
525
|
+
return hash_func.hexdigest()
|
526
|
+
|
527
|
+
@staticmethod
|
528
|
+
def _create_database(db_path):
|
529
|
+
create_database(db_path)
|
@@ -0,0 +1,29 @@
|
|
1
|
+
from typing import *
|
2
|
+
|
3
|
+
class Scalar:
|
4
|
+
"""
|
5
|
+
Dataclass containing a scalar datapoint. The datapoints fetched from the DB are formatted into this class.
|
6
|
+
"""
|
7
|
+
def __init__(self, run_id: int, epoch: Optional[int], step: int, split: Optional[str], label: str, value: float, wall_time: int, run_rep: int):
|
8
|
+
"""
|
9
|
+
:param run_id: The run identifier.
|
10
|
+
:param epoch: The epoch if availalable else None
|
11
|
+
:param step: The training step of the datapoint
|
12
|
+
:param split: The split (Train, Val, Test, or other) of the datapoint if given else None.
|
13
|
+
:param label: The label, usually the metric name (ex: Accuracy).
|
14
|
+
:param value: The value of the datapoint
|
15
|
+
:param wall_time: The time since the start in seconds
|
16
|
+
:param run_rep: The run repetition (When multiple runs are done of the same experiment) else 0.
|
17
|
+
"""
|
18
|
+
self.run_id = run_id
|
19
|
+
self.epoch = epoch
|
20
|
+
self.step = step
|
21
|
+
self.split = split
|
22
|
+
self.label = label
|
23
|
+
self.value = value
|
24
|
+
self.wall_time = wall_time
|
25
|
+
self.run_rep = run_rep
|
26
|
+
|
27
|
+
def __str__(self):
|
28
|
+
return (f"Scalar(run_id={self.run_id}, epoch={self.epoch}, step={self.step}, split={self.split}, "
|
29
|
+
f"label={self.label}, value={self.value}, wall_time={self.wall_time}, run_rep={self.run_rep})")
|