labdata 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
labdata/__init__.py ADDED
@@ -0,0 +1,17 @@
1
+ VERSION = "0.0.3"
2
+
3
+ from .utils import *
4
+ from .copy import copy_to_upload_server
5
+ from .s3 import copy_to_s3
6
+
7
+
8
+ plugins = {} # to have all plugins in the same place
9
+ if 'plugins' in prefs.keys():
10
+ for modkey in prefs['plugins'].keys():
11
+ try:
12
+ modpath = str(Path(prefs['plugins'][modkey])/"__init__.py")
13
+ exec(f"{modkey} = plugin_lazy_import('{modkey}')")
14
+ exec(f'plugins["{modkey}"] = {modkey}')
15
+ except Exception as err:
16
+ print(err)
17
+ print(f'Failed to load plugin: {modkey}')
labdata/cli.py ADDED
@@ -0,0 +1,499 @@
1
+ from .utils import *
2
+ import argparse
3
+
4
+ class CLI_parser():
5
+ def __init__(self):
6
+ parser = argparse.ArgumentParser(
7
+ description = f'{tcolor["y"]("labdata")} - tools to manage data in an experimental neuroscience lab',
8
+ usage = f''' labdata <command> [args]
9
+
10
+ Command to start the dashboard webpage:
11
+ {tcolor["y"]("dashboard")} Starts the dashboard monitor webpage
12
+
13
+ Data manipulation commands are:
14
+
15
+ {tcolor["y"]("subjects")} List subjects
16
+ {tcolor["y"]("sessions")} -a <subject> List sessions
17
+ {tcolor["y"]("get")} -a <subject> -s <session> Download data from one session if not already there
18
+ {tcolor["y"]("put")} -a <subject> -s <session> Copies a dataset to the server to be used
19
+ {tcolor["y"]("clean")} Deletes files that are already added
20
+
21
+ Data analysis commands:
22
+
23
+ {tcolor["g"]("run")} <analysis> -a <subject> -s <session> Allocates and runs analysis, local, queued or on AWS
24
+ {tcolor["g"]("task")} <compute_task_number> Runs an allocated analysis task
25
+
26
+ Other
27
+ logpipe <compute_task_number> Appends stdout log to a ComputeTask
28
+ build-container Builds and uploads singularity/apptainer containers to S3
29
+ run-container Launches a container in execution mode
30
+
31
+ Server commands (don't run on experimental computers):
32
+ {tcolor["r"]("upload")} Sends pending data to S3 (applies upload rules)
33
+
34
+ ''')
35
+
36
+ parser.add_argument('command',
37
+ help= 'type: labdata2 <command> -h for help')
38
+
39
+ args = parser.parse_args(sys.argv[1:2])
40
+ command = args.command.replace('-','_') # can use - in command
41
+ if not hasattr(self, command):
42
+ print('The command [{0}] was not recognized. '.format(args.command))
43
+ parser.print_help()
44
+ exit(1)
45
+ getattr(self,command)() # Runs the following parser
46
+
47
+ def dashboard(self):
48
+ import subprocess as sub
49
+ main_webpage_path = Path(__file__).parent / 'dashboard' / 'index.py'
50
+ command = f'streamlit run {str(main_webpage_path)}'.split(' ')
51
+ sub.run(command)
52
+
53
+ def subjects(self):
54
+ parser = argparse.ArgumentParser(
55
+ description = 'List sessions and datatypes',
56
+ usage = '''labdata subject -u <USER> -f <FILTER>''')
57
+ parser.add_argument('-u','--user',
58
+ default = None,
59
+ type = str,
60
+ help= 'User name restriction')
61
+
62
+ parser.add_argument('-f','--filter-name',
63
+ default = None,
64
+ type = str,
65
+ help= 'Filter for subject_name')
66
+
67
+ parser.add_argument('-s','--filter-sex',
68
+ default = None,
69
+ type = str,
70
+ help= 'Filter for subject_sex')
71
+
72
+ args = parser.parse_args(sys.argv[2:])
73
+
74
+ from .schema import Subject,LabMember
75
+ query = Subject()
76
+ if not args.filter_name is None:
77
+ query = query & f'subject_name LIKE "%{args.filter_name}%"'
78
+ if not args.filter_sex is None:
79
+ query = query & f'subject_sex LIKE "%{args.filter_sex}%"'
80
+ if not args.user is None:
81
+ if len(LabMember & dict(user_name = args.user)):
82
+ query = query & f'user_name LIKE "%{args.user}%"'
83
+ else:
84
+ print(f'User name {args.user} not found.')
85
+ subjects = pd.DataFrame(query)
86
+ if not len(subjects):
87
+ print('No subjects in the query.')
88
+ return
89
+ for uname in np.unique(subjects.user_name):
90
+ ss = subjects[subjects.user_name == uname]
91
+ sss = ('Experimenter \033[96m{first_name} {last_name}\033[0m [{user_name}]'.format(**(LabMember() & dict(user_name = uname)).fetch1()))
92
+ print(sss)
93
+ for i,s in ss.reset_index().iterrows():
94
+ if np.mod(i,2):
95
+ c = '\033[96m'
96
+ else:
97
+ c = '\033[91m'
98
+ cc = '\033[0m'
99
+ print(f'{c}\t{i+1}.{cc} {s.subject_name}\t{s.subject_sex}\t{s.subject_dob}\t{s.strain_name}\t')
100
+ print('')
101
+
102
+ def sessions(self):
103
+ parser = argparse.ArgumentParser(
104
+ description = 'List sessions and datatypes',
105
+ usage = '''labdata sessions -a <SUBJECT>''')
106
+ parser = self._add_default_arguments(parser,1)
107
+
108
+ args = parser.parse_args(sys.argv[2:])
109
+ from .schema import Subject, Session, Dataset
110
+ for s in args.subject:
111
+ subject_name = s
112
+ datasets = pd.DataFrame((Dataset()*Session() &
113
+ dict(subject_name = subject_name)).fetch())
114
+ sessions = np.sort(np.unique(datasets.session_datetime.values))
115
+ print(f'\n {s} - {len(sessions)} sessions - {len(datasets)} datasets')
116
+ for ses in sessions:
117
+ dsets = datasets[datasets.session_datetime == ses]
118
+ print(f'\t {tcolor["c"](dsets.iloc[0].session_name)}')
119
+ for i,t in dsets.iterrows():
120
+ if t.dataset_type is None:
121
+ print(f'\t\t *{t.dataset_name}')
122
+ else:
123
+ print(f'\t\t {t.dataset_type} - {t.dataset_name}')
124
+
125
+ def get(self):
126
+ parser = argparse.ArgumentParser(
127
+ description = 'Download data from one or multiple sessions',
128
+ usage = '''labdata sessions -a <SUBJECTS> -s <SESSIONS>''')
129
+ parser = self._add_default_arguments(parser,3)
130
+
131
+ # TODO: Add an argument to include files that match a pattern..
132
+
133
+ args = parser.parse_args(sys.argv[2:])
134
+ from .schema import Subject, Session, Dataset, File
135
+ keys = []
136
+ # do all combinations of sessions and datasets
137
+ if not args.subject is None:
138
+ for a in args.subject:
139
+ keys.append(dict(subject_name = a))
140
+ if not args.session is None:
141
+ for ses in args.session:
142
+ if len(keys):
143
+ for k in keys:
144
+ k['session_name'] = ses
145
+ else:
146
+ keys.append(dict(session_name = ses))
147
+ if not args.datatype is None:
148
+ for d in args.datatype:
149
+ if len(keys):
150
+ for k in keys:
151
+ k['dataset_type'] = d
152
+ else:
153
+ keys.append(dict(dataset_name = d))
154
+ # download the files
155
+ (File() & (Dataset.DataFiles() & keys).proj()).get()
156
+
157
+ def put(self):
158
+ parser = argparse.ArgumentParser(
159
+ description = 'Copies data to the server to be uploaded [THIS DOES NOT UPLOAD TO THE CLOUD]',
160
+ usage = '''labdata put -a <SUBJECT> -s <SESSION>''')
161
+
162
+ parser = self._add_default_arguments(parser)
163
+ parser.add_argument('filepaths', action = 'store',
164
+ default = [''], type = str, nargs = '+')
165
+ parser.add_argument('-t','--datatype-name',
166
+ action = 'store',
167
+ default = None, type = str, nargs = 1)
168
+ parser.add_argument('-r','--rule',
169
+ action = 'store',
170
+ default = None, type = str, nargs = 1)
171
+ parser.add_argument('--overwrite', action = 'store_true',
172
+ default = False)
173
+ parser.add_argument('--ask', action = 'store_true',
174
+ default = False)
175
+ parser.add_argument('--select',
176
+ action = 'store_true',
177
+ default = False)
178
+
179
+ args = parser.parse_args(sys.argv[2:])
180
+ if args.select: # open a gui to select files, not working at the moment
181
+ from .widgets import QApplication, LABDATA_PUT
182
+ app = QApplication(sys.argv)
183
+ w = LABDATA_PUT()
184
+ sys.exit(app.exec_())
185
+ else:
186
+ from .widgets import QApplication, ServerCopyWidget
187
+ app = QApplication(sys.argv)
188
+ filepaths = args.filepaths
189
+ if len(filepaths) == 1:
190
+ if Path(filepaths[0]).is_dir(): # then select all files inside the folder
191
+ filepaths = list(Path(filepaths[0]).rglob('**/*'))
192
+ filepaths = list(filter(lambda f: f.is_file(),filepaths))
193
+ w = ServerCopyWidget(src_filepaths = filepaths,
194
+ upload_rule = args.rule,
195
+ user_confirmation = args.ask,
196
+ overwrite = args.overwrite)
197
+ app.exit()
198
+ #sys.exit(app.exec_())
199
+ def clean(self):
200
+ parser = argparse.ArgumentParser(
201
+ description = 'Releases local storage space.',
202
+ usage = '''labdata clean -f "ephys"''')
203
+ parser.add_argument('-f','--filter',action = 'store',default = [], type = str, nargs = '+')
204
+ parser.add_argument('--dry-run',action = 'store_true', default = False)
205
+ args = parser.parse_args(sys.argv[2:])
206
+ from .copy import clean_local_path
207
+
208
+
209
+ deleted,kept = clean_local_path(filterkeys = args.filter, dry_run = args.dry_run)
210
+ print(deleted,kept)
211
+ def run(self):
212
+ parser = argparse.ArgumentParser(
213
+ description = 'Allocates or runs an analysis',
214
+ usage = '''labdata run <ANALYSIS> -a <SUBJECT> -s <SESSION>''')
215
+ parser.add_argument('analysis',action = 'store',default = '',type = str)
216
+ parser.add_argument('-j','--job',action = 'store',default = None, type = int)
217
+ parser.add_argument('-t','--target',action = 'store', default = prefs['compute']['default_target'], type = str)
218
+ parser.add_argument('--force-submit',action = 'store_true', default = False)
219
+
220
+ parser = self._add_default_arguments(parser)
221
+ secondary_args = []
222
+ argum = sys.argv[2:]
223
+ if '--' in sys.argv:
224
+ argum = sys.argv[2:sys.argv.index('--')]
225
+ secondary_args = sys.argv[sys.argv.index('--'):]
226
+ args = parser.parse_args(argum)
227
+ from .compute import parse_analysis,run_analysis
228
+ # parse analysis will check if the analysis is defined
229
+ jobids,obj = parse_analysis(analysis = args.analysis,
230
+ job_id = args.job,
231
+ subject = args.subject,
232
+ session = args.session,
233
+ datatype = args.datatype,
234
+ secondary_args = secondary_args,
235
+ force_submit = args.force_submit,
236
+ full_command = ' '.join(sys.argv[1:]))
237
+ if not len(jobids):
238
+ print('Nothing to run.')
239
+ return
240
+ target = args.target
241
+ run_analysis(target,jobids, obj)
242
+
243
+ def task_reset(self):
244
+ parser = argparse.ArgumentParser(
245
+ description = 'Reset a task in the ComputeTask so the job can be ran again',
246
+ usage = '''labdata task_reset <JOB_ID> ''')
247
+ parser.add_argument('job_id', action = 'store', type = int, nargs='+')
248
+ parser.add_argument('-t','--target', action = 'store', default = None, type = str)
249
+ parser.add_argument('--resubmit', action = 'store_true', default = False)
250
+ parser.add_argument('--clear-all', action = 'store_true', default = False)
251
+
252
+ args = parser.parse_args(sys.argv[2:])
253
+ if args.clear_all:
254
+ print('Deleting all compute tasks - please confirm.')
255
+ from .schema import ComputeTask
256
+ ComputeTask.delete()
257
+ sys.exit()
258
+ for job_id in args.job_id:
259
+ from .schema import ComputeTask
260
+ jb = (ComputeTask() & f'job_id = {job_id}').fetch(as_dict = True)
261
+ if not len(jb):
262
+ raise(ValueError(f'ComputeTask job_id: {job_id} not found.'))
263
+ jb = jb[0]
264
+ ComputeTask.update1(dict(job_id = job_id,
265
+ task_waiting = 1,
266
+ task_status = 'WAITING',
267
+ task_starttime = None,
268
+ task_endtime = None))
269
+ if args.resubmit:
270
+ # then re-submit the compute task
271
+ from .compute.utils import load_analysis_object, run_analysis
272
+ obj = load_analysis_object(jb['task_name'])(None)
273
+ if not args.target is None:
274
+ target = args.target
275
+ else:
276
+ target = jb['task_target'].split('@')[0]
277
+ run_analysis(target,[job_id],obj)
278
+
279
+ def task(self):
280
+ parser = argparse.ArgumentParser(
281
+ description = 'Runs a ComputeTask',
282
+ usage = '''labdata task <JOB_ID> ''')
283
+ parser.add_argument('job_id',action = 'store',default = None,type = int)
284
+ args = parser.parse_args(sys.argv[2:])
285
+ job_id = args.job_id
286
+ if not job_id is None:
287
+ from .compute import handle_compute
288
+ task = handle_compute(job_id)
289
+ task.compute()
290
+
291
+ def upload(self):
292
+ parser = argparse.ArgumentParser(
293
+ description = 'Runs an UploadTask',
294
+ usage = '''labdata upload <JOB_ID> (optional) ''')
295
+ parser.add_argument('job_id',action = 'store',default = [], type = int, nargs = '*')
296
+ parser.add_argument('--all-hosts',action = 'store_true',default = False)
297
+ parser.add_argument('--reset-failed',action = 'store_true',default = False)
298
+ parser.add_argument('--queue',action='store_true',default = False)
299
+ args = parser.parse_args(sys.argv[2:])
300
+ if args.queue:
301
+ from .schema import UploadJob
302
+ j = UploadJob & 'job_status = "WORKING"'
303
+ if len(j):
304
+ print(tcolor['m']("++++++++++++++++++++ JOBS IN PROGRESS ++++++++++++++++++++"))
305
+ print(j)
306
+ j = UploadJob & 'job_status = "FAILED"' & 'job_waiting = 0'
307
+ if len(j):
308
+ print(tcolor['r']("++++++++++++++++++++ FAILED JOBS ++++++++++++++++++++"))
309
+ print(j)
310
+ j = UploadJob & 'job_waiting = 1'
311
+ if len(j):
312
+ print(tcolor['y']("++++++++++++++++++++ WAITING JOBS ++++++++++++++++++++"))
313
+ print(j)
314
+ j = UploadJob & 'job_status = "COMPLETED"'
315
+ lenj = len(j)
316
+ if (lenj < 10) & (lenj > 0):
317
+ print(tcolor['g']("++++++++++++++++++++ COMPLETED JOBS ++++++++++++++++++++"))
318
+ print(j)
319
+ else:
320
+ print(tcolor['g'](f"++++++++++++++++++++ {lenj} COMPLETED JOBS ++++++++++++++++++++"))
321
+ return
322
+ job_ids = args.job_id
323
+
324
+ if args.reset_failed:
325
+ from .schema import UploadJob
326
+ print(len(job_ids))
327
+ if not len(job_ids):
328
+ jbs = (UploadJob() & 'job_status = "FAILED"').proj().fetch(as_dict=True)
329
+ else:
330
+ jbs = (UploadJob() & [f'job_id = {i}' for i in job_ids]).proj().fetch(as_dict=True)
331
+ for jb in jbs:
332
+ jb['job_waiting'] = 1
333
+ jb['job_status'] = "WAITING"
334
+ UploadJob().update1(jb)
335
+ keys = []
336
+ for j in job_ids:
337
+ keys.append(dict(job_id = j))
338
+ if not args.all_hosts:
339
+ keys[-1]['job_host'] = prefs['hostname']
340
+ from .rules import process_upload_jobs
341
+ if len(keys):
342
+ task = process_upload_jobs(keys)
343
+ else:
344
+ key = dict(job_waiting = 1)
345
+ if not args.all_hosts:
346
+ key['job_host'] = prefs['hostname']
347
+ tasks = process_upload_jobs(key)
348
+
349
+ def logpipe(self):
350
+ parser = argparse.ArgumentParser(
351
+ description = 'Sends the stdout to a log',
352
+ usage = '''labdata logpipe <JOB_ID> ''')
353
+ parser.add_argument('job_id',action = 'store',default = None,type = int)
354
+ parser.add_argument('-i','--refresh-period',action = 'store',default = 5., type = float)
355
+ args = parser.parse_args(sys.argv[2:])
356
+ job_id = args.job_id
357
+ refresh_period = args.refresh_period
358
+ from .schema import ComputeTask
359
+ from time import time as toc
360
+ tic = toc()
361
+ if job_id is None:
362
+ print('No task specified.')
363
+ else:
364
+ # Check first if the job exists
365
+ t = ComputeTask & f'job_id = {job_id}'
366
+ if not len(t):
367
+ print(f'Could not find ComputeTask: {job_id}.')
368
+ return
369
+ print(f'Appending stdout to ComputeTask [{job_id}] ')
370
+
371
+ def handle_line(line,tic):
372
+ if toc()-tic > refresh_period:
373
+ tic = toc()
374
+ log = t.fetch('task_log')[0]
375
+ if not line is None:
376
+ if log is None:
377
+ log = line
378
+ else:
379
+ log += '\n' + line
380
+ if len(log) > 2000:
381
+ log = log[-1999:]
382
+ ComputeTask().update1(dict(job_id = job_id, task_log = log))
383
+ if not line is None:
384
+ print(line,end = '',flush = True)
385
+ return line,tic
386
+ from select import select
387
+ while True:
388
+ if select([sys.stdin],[],[],refresh_period):
389
+ line,tic = handle_line(sys.stdin.readline(),tic)
390
+ if not line:
391
+ print('Pipe closed.')
392
+ break
393
+
394
+ def run_container(self):
395
+ '''
396
+ Runs a container
397
+ '''
398
+ parser = argparse.ArgumentParser(
399
+ description = 'Run a container',
400
+ usage = '''labdata run_container <container> ''')
401
+ parser.add_argument('container_name',
402
+ action = 'store',
403
+ type = str)
404
+ parser.add_argument('-t','--target',action = 'store', default = prefs['compute']['default_target'], type = str)
405
+ parser.add_argument('--jupyter', action = 'store_true', default = False)
406
+ parser.add_argument('--cuda',action = 'store_true', default = False)
407
+
408
+ args = parser.parse_args(sys.argv[2:])
409
+ container = args.container_name
410
+ container_file = None
411
+ cmd = None
412
+ target = args.target
413
+ if target in ['local','slurm']:
414
+ container_store = Path(prefs['compute']['containers']['local'])
415
+ container_file = (container_store/container).with_suffix('.sif')
416
+ if not container_file.exists():
417
+ print( f"Container {tcolor['r'](container)} not found in {tcolor['r'](container_store)}")
418
+ return
419
+
420
+ if args.jupyter:
421
+ cmd = "jupyter lab"
422
+
423
+ if not cmd is None:
424
+ from .compute.singularity import run_on_apptainer
425
+ launchcmd = run_on_apptainer(container_file,
426
+ command = cmd,
427
+ cuda = args.cuda,
428
+ bind_from_prefs = True,
429
+ launch_cmd = 'run', # use exec for running ephemeral
430
+ dry_run = True)
431
+ print(launchcmd)
432
+
433
+ def build_container(self):
434
+ '''
435
+ Build containers and upload to S3.
436
+ '''
437
+ parser = argparse.ArgumentParser(
438
+ description = 'Builds container(s).',
439
+ usage = '''labdata build_container <container_file> ''')
440
+ parser.add_argument('container_file',
441
+ action = 'store',
442
+ type = str,
443
+ nargs = '+')
444
+ parser.add_argument('--upload',
445
+ action='store_true',
446
+ default=False)
447
+ parser.add_argument('--skip-build',
448
+ action='store_true',
449
+ default=False)
450
+ args = parser.parse_args(sys.argv[2:])
451
+ container_files = args.container_file
452
+ destination = Path(prefs['compute']['containers']['local'])
453
+ built = []
454
+ for definition_file in container_files:
455
+ definition_file = Path(definition_file).resolve().absolute()
456
+ container = Path(destination/definition_file.stem).with_suffix('.sif')
457
+ cmd = f'apptainer build --fakeroot --force {container} {definition_file}'
458
+ if args.skip_build:
459
+ print(f'Skipping build: {cmd}')
460
+ else:
461
+ os.system(cmd)
462
+ if container.exists():
463
+ built.append(container)
464
+ if args.upload:
465
+ from .s3 import copy_to_s3
466
+ storage_name = prefs['compute']['containers']['storage']
467
+ dst = ['containers/' + b.name for b in built]
468
+ print('Uploading containers to s3, this may take a while.')
469
+ copy_to_s3(built,dst,storage_name = storage_name)
470
+ print(f'Uploaded {dst} to storage {storage_name}.')
471
+
472
+ def _add_default_arguments(self, parser,level = 3):
473
+ if level >= 1:
474
+ parser.add_argument('-a','--subject',
475
+ action='store',
476
+ default=None, type=str,nargs='+')
477
+ if level >= 2:
478
+
479
+ parser.add_argument('-s','--session',
480
+ action='store',
481
+ default=None, type=str,nargs='+')
482
+ if level >= 3:
483
+ parser.add_argument('-d','--datatype',
484
+ action='store',
485
+ default=None, type=str,nargs='+')
486
+ return parser
487
+
488
+ def _get_default_arg(self,argument,cli_arg = 'submit', default = None):
489
+ # checks if there is a default in the options
490
+ if not f'{cli_arg}_defaults' in labdata_preferences.keys():
491
+ return default # no defaults
492
+ if labdata_preferences[f'{cli_arg}_defaults'] is None:
493
+ return default # not defined dict
494
+ if not argument in labdata_preferences[f'{cli_arg}_defaults'].keys():
495
+ return default # not defined
496
+ return labdata_preferences[f'{cli_arg}_defaults'][argument]
497
+
498
+ def main():
499
+ CLI_parser()
@@ -0,0 +1,27 @@
1
+ '''Compute tasks for lab data processing and analysis.
2
+
3
+ This module provides compute task classes for running analyses on lab data.
4
+ Tasks can be scheduled and run on dedicated containers using job schedulers.
5
+
6
+ Available compute tasks:
7
+ - SpksCompute: Spike sorting using Kilosort/Phy via SPKS
8
+ - DeeplabcutCompute: Animal pose estimation using DeepLabCut
9
+
10
+ Each compute task can be:
11
+ - Scheduled to run on compute clusters via SLURM/PBS
12
+ - Executed in isolated Singularity/Docker containers
13
+ - Tracked and monitored through the database
14
+ - Configured via user preferences
15
+
16
+ The compute tasks handle:
17
+ - Input/output file management
18
+ - Container and environment setup
19
+ - Job scheduling and resource allocation
20
+ - Progress tracking and error handling
21
+ - Result storage and validation
22
+ '''
23
+
24
+ from .utils import *
25
+ from .singularity import build_singularity_container
26
+ from .ephys import SpksCompute
27
+ from .pose import DeeplabcutCompute