QuLab 2.1.0__cp310-cp310-macosx_10_9_universal2.whl → 2.1.2__cp310-cp310-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
qulab/scan/recorder.py CHANGED
@@ -1,18 +1,11 @@
1
1
  import asyncio
2
- import itertools
3
2
  import os
4
3
  import pickle
5
- import sys
6
4
  import time
7
- import uuid
8
- from collections import defaultdict
9
5
  from pathlib import Path
10
- from threading import Lock
11
- from types import EllipsisType
12
6
 
13
7
  import click
14
8
  import dill
15
- import numpy as np
16
9
  import zmq
17
10
  from loguru import logger
18
11
 
@@ -21,8 +14,7 @@ from qulab.sys.rpc.zmq_socket import ZMQContextManager
21
14
  from .curd import query_record, remove_tags, tag, update_tags
22
15
  from .models import Record as RecordInDB
23
16
  from .models import Session, create_engine, create_tables, sessionmaker, utcnow
24
-
25
- _notgiven = object()
17
+ from .record import Record
26
18
 
27
19
  try:
28
20
  default_record_port = int(os.getenv('QULAB_RECORD_PORT', 6789))
@@ -36,487 +28,7 @@ else:
36
28
  datapath.mkdir(parents=True, exist_ok=True)
37
29
 
38
30
  record_cache = {}
39
-
40
-
41
- def random_path(base):
42
- while True:
43
- s = uuid.uuid4().hex
44
- path = base / s[:2] / s[2:4] / s[4:6] / s[6:]
45
- if not path.exists():
46
- return path
47
-
48
-
49
- def index_in_slice(slice_obj: slice | int, index: int):
50
- if isinstance(slice_obj, int):
51
- return slice_obj == index
52
- start, stop, step = slice_obj.start, slice_obj.stop, slice_obj.step
53
- if start is None:
54
- start = 0
55
- if step is None:
56
- step = 1
57
- if stop is None:
58
- stop = sys.maxsize
59
-
60
- if step > 0:
61
- return start <= index < stop and (index - start) % step == 0
62
- else:
63
- return stop < index <= start and (index - start) % step == 0
64
-
65
-
66
- class BufferList():
67
-
68
- def __init__(self, file=None, slice=None):
69
- self._list = []
70
- self.lu = ()
71
- self.rd = ()
72
- self.inner_shape = ()
73
- self.file = file
74
- self._slice = slice
75
- self._lock = Lock()
76
- self._data_id = None
77
-
78
- def __repr__(self):
79
- return f"<BufferList: shape={self.shape}, lu={self.lu}, rd={self.rd}, slice={self._slice}>"
80
-
81
- def __getstate__(self):
82
- self.flush()
83
- if isinstance(self.file, Path):
84
- file = '/'.join(self.file.parts[-4:])
85
- else:
86
- file = self.file
87
- return {
88
- 'file': file,
89
- 'lu': self.lu,
90
- 'rd': self.rd,
91
- 'inner_shape': self.inner_shape,
92
- }
93
-
94
- def __setstate__(self, state):
95
- self.file = state['file']
96
- self.lu = state['lu']
97
- self.rd = state['rd']
98
- self.inner_shape = state['inner_shape']
99
- self._list = []
100
- self._slice = None
101
- self._lock = Lock()
102
- self._data_id = None
103
-
104
- @property
105
- def shape(self):
106
- return tuple([i - j
107
- for i, j in zip(self.rd, self.lu)]) + self.inner_shape
108
-
109
- def flush(self):
110
- if not self._list:
111
- return
112
- if isinstance(self.file, Path):
113
- with self._lock:
114
- with open(self.file, 'ab') as f:
115
- for item in self._list:
116
- dill.dump(item, f)
117
- self._list.clear()
118
-
119
- def append(self, pos, value, dims=None):
120
- if dims is not None:
121
- if any([p != 0 for i, p in enumerate(pos) if i not in dims]):
122
- return
123
- pos = tuple([pos[i] for i in dims])
124
- self.lu = tuple([min(i, j) for i, j in zip(pos, self.lu)])
125
- self.rd = tuple([max(i + 1, j) for i, j in zip(pos, self.rd)])
126
- if hasattr(value, 'shape'):
127
- if self.inner_shape is None:
128
- self.inner_shape = value.shape
129
- elif self.inner_shape != value.shape:
130
- self.inner_shape = ()
131
- self._list.append((pos, value))
132
- if len(self._list) > 1000:
133
- self.flush()
134
-
135
- def _iter_file(self):
136
- if isinstance(self.file, Path) and self.file.exists():
137
- with self._lock:
138
- with open(self.file, 'rb') as f:
139
- while True:
140
- try:
141
- pos, value = dill.load(f)
142
- yield pos, value
143
- except EOFError:
144
- break
145
-
146
- def iter(self):
147
- if self._data_id is None:
148
- for pos, value in itertools.chain(self._iter_file(), self._list):
149
- if not self._slice:
150
- yield pos, value
151
- elif all(
152
- [index_in_slice(s, i) for s, i in zip(self._slice, pos)]):
153
- if self.inner_shape:
154
- yield pos, value[self._slice[len(pos):]]
155
- else:
156
- yield pos, value
157
- else:
158
- server, record_id, key = self._data_id
159
- with ZMQContextManager(zmq.DEALER, connect=server) as socket:
160
- socket.send_pyobj({
161
- 'method': 'bufferlist_slice',
162
- 'record_id': record_id,
163
- 'key': key,
164
- 'slice': self._slice
165
- })
166
- ret = socket.recv_pyobj()
167
- yield from ret
168
-
169
- def value(self):
170
- d = []
171
- for pos, value in self.iter():
172
- d.append(value)
173
- return d
174
-
175
- def pos(self):
176
- p = []
177
- for pos, value in self.iter():
178
- p.append(pos)
179
- return p
180
-
181
- def items(self):
182
- p, d = [], []
183
- for pos, value in self.iter():
184
- p.append(pos)
185
- d.append(value)
186
- return p, d
187
-
188
- def array(self):
189
- pos, data = self.items()
190
- if self._slice:
191
- pos = np.asarray(pos)
192
- lu = tuple(np.min(pos, axis=0))
193
- rd = tuple(np.max(pos, axis=0) + 1)
194
- pos = np.asarray(pos) - np.asarray(lu)
195
- shape = []
196
- for k, (s, i, j) in enumerate(zip(self._slice, rd, lu)):
197
- if s.step is not None:
198
- pos[:, k] = pos[:, k] / s.step
199
- shape.append(round(np.ceil((i - j) / s.step)))
200
- else:
201
- shape.append(i - j)
202
- shape = tuple(shape)
203
- else:
204
- shape = tuple([i - j for i, j in zip(self.rd, self.lu)])
205
- pos = np.asarray(pos) - np.asarray(self.lu)
206
- data = np.asarray(data)
207
- inner_shape = data.shape[1:]
208
- x = np.full(shape + inner_shape, np.nan, dtype=data[0].dtype)
209
- x.__setitem__(tuple(pos.T), data)
210
- return x
211
-
212
- def _full_slice(self, slice_tuple: slice
213
- | tuple[slice | int | EllipsisType, ...]):
214
- ndim = len(self.lu)
215
- if self.inner_shape:
216
- ndim += len(self.inner_shape)
217
-
218
- if isinstance(slice_tuple, slice):
219
- slice_tuple = (
220
- slice_tuple, ) + (slice(0, sys.maxsize, 1), ) * (ndim - 1)
221
- if slice_tuple is Ellipsis:
222
- slice_tuple = (slice(0, sys.maxsize, 1), ) * ndim
223
- else:
224
- head, tail = (), ()
225
- for i, s in enumerate(slice_tuple):
226
- if s is Ellipsis:
227
- head = slice_tuple[:i]
228
- tail = slice_tuple[i + 1:]
229
- break
230
- else:
231
- head = slice_tuple
232
- tail = ()
233
- slice_tuple = head + (slice(
234
- 0, sys.maxsize, 1), ) * (ndim - len(head) - len(tail)) + tail
235
- slice_list = []
236
- contract = []
237
- reversed = []
238
- for i, s in enumerate(slice_tuple):
239
- if isinstance(s, int):
240
- if s >= 0:
241
- slice_list.append(slice(s, s + 1, 1))
242
- elif i < len(self.lu):
243
- s = self.rd[i] + s
244
- slice_list.append(slice(s, s + 1, 1))
245
- else:
246
- slice_list.append(slice(s, s - 1, -1))
247
- contract.append(i)
248
- else:
249
- start, stop, step = s.start, s.stop, s.step
250
- if step is None:
251
- step = 1
252
- if step < 0 and i < len(self.lu):
253
- step = -step
254
- reversed.append(i)
255
- if start is None and stop is None:
256
- start, stop = 0, sys.maxsize
257
- elif start is None:
258
- start, stop = self.lu[i], sys.maxsize
259
- elif stop is None:
260
- start, stop = 0, start + self.lu[i]
261
- else:
262
- start, stop = stop + self.lu[i] + 1, start + self.lu[
263
- i] + 1
264
-
265
- if start is None:
266
- start = 0
267
- elif start < 0 and i < len(self.lu):
268
- start = self.rd[i] + start
269
- if step is None:
270
- step = 1
271
- if stop is None:
272
- stop = sys.maxsize
273
- elif stop < 0 and i < len(self.lu):
274
- stop = self.rd[i] + stop
275
-
276
- slice_list.append(slice(start, stop, step))
277
- return tuple(slice_list), contract, reversed
278
-
279
- def __getitem__(self, slice_tuple: slice | EllipsisType
280
- | tuple[slice | int | EllipsisType, ...]):
281
- self._slice, contract, reversed = self._full_slice(slice_tuple)
282
- ret = self.array()
283
- slices = []
284
- for i, s in enumerate(self._slice):
285
- if i in contract:
286
- slices.append(0)
287
- elif isinstance(s, slice):
288
- if i in reversed:
289
- slices.append(slice(None, None, -1))
290
- else:
291
- slices.append(slice(None, None, 1))
292
- ret = ret.__getitem__(tuple(slices))
293
- self._slice = None
294
- return ret
295
-
296
-
297
- class Record():
298
-
299
- def __init__(self, id, database, description=None):
300
- from .scan import OptimizeSpace
301
-
302
- self.id = id
303
- self.database = database
304
- self.description = description
305
- self._keys = set()
306
- self._items = {}
307
- self._index = []
308
- self._pos = []
309
- self._last_vars = set()
310
- self._file = None
311
- self.independent_variables = {}
312
- self.constants = {}
313
- self.dims = {}
314
-
315
- for name, value in self.description['consts'].items():
316
- if name not in self._items:
317
- self._items[name] = value
318
- self.constants[name] = value
319
- self.dims[name] = ()
320
- for level, range_list in self.description['loops'].items():
321
- for name, iterable in range_list:
322
- if isinstance(iterable, OptimizeSpace):
323
- self.dims[name] = tuple(range(level + 1))
324
- continue
325
- elif isinstance(iterable, (np.ndarray, list, tuple, range)):
326
- self._items[name] = iterable
327
- self.independent_variables[name] = iterable
328
- self.dims[name] = (level, )
329
-
330
- for level, group in self.description['order'].items():
331
- for names in group:
332
- for name in names:
333
- if name not in self.description['dependents']:
334
- if name not in self.dims:
335
- self.dims[name] = (level, )
336
- else:
337
- d = set()
338
- for n in self.description['dependents'][name]:
339
- d.update(self.dims[n])
340
- if name not in self.dims:
341
- self.dims[name] = tuple(sorted(d))
342
- else:
343
- self.dims[name] = tuple(
344
- sorted(set(self.dims[name]) | d))
345
-
346
- if self.is_local_record():
347
- self.database = Path(self.database)
348
- self._file = random_path(self.database / 'objects')
349
- self._file.parent.mkdir(parents=True, exist_ok=True)
350
-
351
- def __getstate__(self) -> dict:
352
- return {
353
- 'id': self.id,
354
- 'database': self.database,
355
- 'description': self.description,
356
- '_keys': self._keys,
357
- '_items': self._items,
358
- '_index': self._index,
359
- '_pos': self._pos,
360
- '_last_vars': self._last_vars,
361
- 'independent_variables': self.independent_variables,
362
- 'constants': self.constants,
363
- 'dims': self.dims,
364
- }
365
-
366
- def __setstate__(self, state: dict):
367
- self.id = state['id']
368
- self.database = state['database']
369
- self.description = state['description']
370
- self._keys = state['_keys']
371
- self._items = state['_items']
372
- self._index = state['_index']
373
- self._pos = state['_pos']
374
- self._last_vars = state['_last_vars']
375
- self.independent_variables = state['independent_variables']
376
- self.constants = state['constants']
377
- self.dims = state['dims']
378
- self._file = None
379
-
380
- def is_local_record(self):
381
- return not self.is_cache_record() and not self.is_remote_record()
382
-
383
- def is_cache_record(self):
384
- return self.database is None
385
-
386
- def is_remote_record(self):
387
- return isinstance(self.database,
388
- str) and self.database.startswith("tcp://")
389
-
390
- def __del__(self):
391
- self.flush()
392
-
393
- def __getitem__(self, key):
394
- return self.get(key, buffer_to_array=True)
395
-
396
- def get(self, key, default=_notgiven, buffer_to_array=False, slice=None):
397
- if self.is_remote_record():
398
- with ZMQContextManager(zmq.DEALER,
399
- connect=self.database) as socket:
400
- socket.send_pyobj({
401
- 'method': 'record_getitem',
402
- 'record_id': self.id,
403
- 'key': key
404
- })
405
- ret = socket.recv_pyobj()
406
- if isinstance(ret, BufferList):
407
- if buffer_to_array:
408
- socket.send_pyobj({
409
- 'method': 'bufferlist_slice',
410
- 'record_id': self.id,
411
- 'key': key,
412
- 'slice': slice
413
- })
414
- lst = socket.recv_pyobj()
415
- ret._list = lst
416
- ret._slice = slice
417
- return ret.array()
418
- else:
419
- ret._data_id = self.database, self.id, key
420
- return ret
421
- else:
422
- return ret
423
- else:
424
- if default is _notgiven:
425
- d = self._items.get(key)
426
- else:
427
- d = self._items.get(key, default)
428
- if isinstance(d, BufferList):
429
- if isinstance(d.file, str):
430
- d.file = self._file.parent.parent.parent.parent / d.file
431
- d._slice = slice
432
- if buffer_to_array:
433
- return d.array()
434
- else:
435
- return d
436
- else:
437
- return d
438
-
439
- def keys(self):
440
- if self.is_remote_record():
441
- with ZMQContextManager(zmq.DEALER,
442
- connect=self.database) as socket:
443
- socket.send_pyobj({
444
- 'method': 'record_keys',
445
- 'record_id': self.id
446
- })
447
- return socket.recv_pyobj()
448
- else:
449
- return list(self._keys)
450
-
451
- def append(self, level, step, position, variables):
452
- if level < 0:
453
- self.flush()
454
- return
455
-
456
- for key in set(variables.keys()) - self._last_vars:
457
- if key not in self.dims:
458
- self.dims[key] = tuple(range(level + 1))
459
-
460
- self._last_vars = set(variables.keys())
461
- self._keys.update(variables.keys())
462
-
463
- if level >= len(self._pos):
464
- l = level + 1 - len(self._pos)
465
- self._index.extend(([0] * (l - 1)) + [step])
466
- self._pos.extend(([0] * (l - 1)) + [position])
467
- pos = tuple(self._pos)
468
- elif level == len(self._pos) - 1:
469
- self._index[-1] = step
470
- self._pos[-1] = position
471
- pos = tuple(self._pos)
472
- else:
473
- self._index = self._index[:level + 1]
474
- self._pos = self._pos[:level + 1]
475
- self._index[-1] = step + 1
476
- self._pos[-1] = position
477
- pos = tuple(self._pos)
478
- self._pos[-1] += 1
479
-
480
- for key, value in variables.items():
481
- if self.dims[key] == ():
482
- if key not in self._items:
483
- self._items[key] = value
484
- elif level == self.dims[key][-1]:
485
- if key not in self._items:
486
- if self.is_local_record():
487
- bufferlist_file = random_path(self.database /
488
- 'objects')
489
- bufferlist_file.parent.mkdir(parents=True,
490
- exist_ok=True)
491
- self._items[key] = BufferList(bufferlist_file)
492
- else:
493
- self._items[key] = BufferList()
494
- self._items[key].lu = pos
495
- self._items[key].rd = tuple([i + 1 for i in pos])
496
- self._items[key].append(pos, value, self.dims[key])
497
- elif isinstance(self._items[key], BufferList):
498
- self._items[key].append(pos, value, self.dims[key])
499
-
500
- def flush(self):
501
- if self.is_remote_record() or self.is_cache_record():
502
- return
503
-
504
- for key, value in self._items.items():
505
- if isinstance(value, BufferList):
506
- value.flush()
507
-
508
- with open(self._file, 'wb') as f:
509
- dill.dump(self, f)
510
-
511
- def __repr__(self):
512
- return f"<Record: id={self.id} app={self.description['app']}, keys={self.keys()}>"
513
-
514
- # def _repr_html_(self):
515
- # return f"""
516
- # <h3>Record: id={self.id}, app={self.description['app']}</h3>
517
- # <p>keys={self.keys()}</p>
518
- # <p>dims={self.dims}</p>
519
- # """
31
+ CACHE_SIZE = 1024
520
32
 
521
33
 
522
34
  class Request():
@@ -534,11 +46,11 @@ async def reply(req, resp):
534
46
 
535
47
 
536
48
  def clear_cache():
537
- if len(record_cache) < 1024:
49
+ if len(record_cache) < CACHE_SIZE:
538
50
  return
539
51
 
540
52
  for k, (t, _) in zip(sorted(record_cache.items(), key=lambda x: x[1][0]),
541
- range(len(record_cache) - 1024)):
53
+ range(len(record_cache) - CACHE_SIZE)):
542
54
  del record_cache[k]
543
55
 
544
56
 
@@ -547,14 +59,20 @@ def flush_cache():
547
59
  r.flush()
548
60
 
549
61
 
62
+ def get_local_record(session: Session, id: int, datapath: Path) -> Record:
63
+ record_in_db = session.get(RecordInDB, id)
64
+ record_in_db.atime = utcnow()
65
+ path = datapath / 'objects' / record_in_db.file
66
+ with open(path, 'rb') as f:
67
+ record = dill.load(f)
68
+ record.database = datapath
69
+ record._file = path
70
+ return record
71
+
72
+
550
73
  def get_record(session: Session, id: int, datapath: Path) -> Record:
551
74
  if id not in record_cache:
552
- record_in_db = session.get(RecordInDB, id)
553
- record_in_db.atime = utcnow()
554
- path = datapath / 'objects' / record_in_db.file
555
- with open(path, 'rb') as f:
556
- record = dill.load(f)
557
- record._file = path
75
+ record = get_local_record(session, id, datapath)
558
76
  else:
559
77
  record = record_cache[id][1]
560
78
  clear_cache()
@@ -570,6 +88,7 @@ def record_create(session: Session, description: dict, datapath: Path) -> int:
570
88
  if 'tags' in description:
571
89
  record_in_db.tags = [tag(session, t) for t in description['tags']]
572
90
  record_in_db.file = '/'.join(record._file.parts[-4:])
91
+ record._file = datapath / 'objects' / record_in_db.file
573
92
  session.add(record_in_db)
574
93
  try:
575
94
  session.commit()
@@ -618,7 +137,7 @@ async def handle(session: Session, request: Request, datapath: Path):
618
137
  msg['position'], msg['variables'], datapath)
619
138
  case 'record_description':
620
139
  record = get_record(session, msg['record_id'], datapath)
621
- await reply(request, dill.dumps(record.description))
140
+ await reply(request, dill.dumps(record))
622
141
  case 'record_getitem':
623
142
  record = get_record(session, msg['record_id'], datapath)
624
143
  await reply(request, record.get(msg['key'], buffer_to_array=False))