QuLab 2.1.0__cp310-cp310-macosx_10_9_universal2.whl → 2.1.1__cp310-cp310-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
qulab/scan/recorder.py CHANGED
@@ -1,18 +1,11 @@
1
1
  import asyncio
2
- import itertools
3
2
  import os
4
3
  import pickle
5
- import sys
6
4
  import time
7
- import uuid
8
- from collections import defaultdict
9
5
  from pathlib import Path
10
- from threading import Lock
11
- from types import EllipsisType
12
6
 
13
7
  import click
14
8
  import dill
15
- import numpy as np
16
9
  import zmq
17
10
  from loguru import logger
18
11
 
@@ -21,8 +14,7 @@ from qulab.sys.rpc.zmq_socket import ZMQContextManager
21
14
  from .curd import query_record, remove_tags, tag, update_tags
22
15
  from .models import Record as RecordInDB
23
16
  from .models import Session, create_engine, create_tables, sessionmaker, utcnow
24
-
25
- _notgiven = object()
17
+ from .record import Record
26
18
 
27
19
  try:
28
20
  default_record_port = int(os.getenv('QULAB_RECORD_PORT', 6789))
@@ -38,487 +30,6 @@ datapath.mkdir(parents=True, exist_ok=True)
38
30
  record_cache = {}
39
31
 
40
32
 
41
- def random_path(base):
42
- while True:
43
- s = uuid.uuid4().hex
44
- path = base / s[:2] / s[2:4] / s[4:6] / s[6:]
45
- if not path.exists():
46
- return path
47
-
48
-
49
- def index_in_slice(slice_obj: slice | int, index: int):
50
- if isinstance(slice_obj, int):
51
- return slice_obj == index
52
- start, stop, step = slice_obj.start, slice_obj.stop, slice_obj.step
53
- if start is None:
54
- start = 0
55
- if step is None:
56
- step = 1
57
- if stop is None:
58
- stop = sys.maxsize
59
-
60
- if step > 0:
61
- return start <= index < stop and (index - start) % step == 0
62
- else:
63
- return stop < index <= start and (index - start) % step == 0
64
-
65
-
66
- class BufferList():
67
-
68
- def __init__(self, file=None, slice=None):
69
- self._list = []
70
- self.lu = ()
71
- self.rd = ()
72
- self.inner_shape = ()
73
- self.file = file
74
- self._slice = slice
75
- self._lock = Lock()
76
- self._data_id = None
77
-
78
- def __repr__(self):
79
- return f"<BufferList: shape={self.shape}, lu={self.lu}, rd={self.rd}, slice={self._slice}>"
80
-
81
- def __getstate__(self):
82
- self.flush()
83
- if isinstance(self.file, Path):
84
- file = '/'.join(self.file.parts[-4:])
85
- else:
86
- file = self.file
87
- return {
88
- 'file': file,
89
- 'lu': self.lu,
90
- 'rd': self.rd,
91
- 'inner_shape': self.inner_shape,
92
- }
93
-
94
- def __setstate__(self, state):
95
- self.file = state['file']
96
- self.lu = state['lu']
97
- self.rd = state['rd']
98
- self.inner_shape = state['inner_shape']
99
- self._list = []
100
- self._slice = None
101
- self._lock = Lock()
102
- self._data_id = None
103
-
104
- @property
105
- def shape(self):
106
- return tuple([i - j
107
- for i, j in zip(self.rd, self.lu)]) + self.inner_shape
108
-
109
- def flush(self):
110
- if not self._list:
111
- return
112
- if isinstance(self.file, Path):
113
- with self._lock:
114
- with open(self.file, 'ab') as f:
115
- for item in self._list:
116
- dill.dump(item, f)
117
- self._list.clear()
118
-
119
- def append(self, pos, value, dims=None):
120
- if dims is not None:
121
- if any([p != 0 for i, p in enumerate(pos) if i not in dims]):
122
- return
123
- pos = tuple([pos[i] for i in dims])
124
- self.lu = tuple([min(i, j) for i, j in zip(pos, self.lu)])
125
- self.rd = tuple([max(i + 1, j) for i, j in zip(pos, self.rd)])
126
- if hasattr(value, 'shape'):
127
- if self.inner_shape is None:
128
- self.inner_shape = value.shape
129
- elif self.inner_shape != value.shape:
130
- self.inner_shape = ()
131
- self._list.append((pos, value))
132
- if len(self._list) > 1000:
133
- self.flush()
134
-
135
- def _iter_file(self):
136
- if isinstance(self.file, Path) and self.file.exists():
137
- with self._lock:
138
- with open(self.file, 'rb') as f:
139
- while True:
140
- try:
141
- pos, value = dill.load(f)
142
- yield pos, value
143
- except EOFError:
144
- break
145
-
146
- def iter(self):
147
- if self._data_id is None:
148
- for pos, value in itertools.chain(self._iter_file(), self._list):
149
- if not self._slice:
150
- yield pos, value
151
- elif all(
152
- [index_in_slice(s, i) for s, i in zip(self._slice, pos)]):
153
- if self.inner_shape:
154
- yield pos, value[self._slice[len(pos):]]
155
- else:
156
- yield pos, value
157
- else:
158
- server, record_id, key = self._data_id
159
- with ZMQContextManager(zmq.DEALER, connect=server) as socket:
160
- socket.send_pyobj({
161
- 'method': 'bufferlist_slice',
162
- 'record_id': record_id,
163
- 'key': key,
164
- 'slice': self._slice
165
- })
166
- ret = socket.recv_pyobj()
167
- yield from ret
168
-
169
- def value(self):
170
- d = []
171
- for pos, value in self.iter():
172
- d.append(value)
173
- return d
174
-
175
- def pos(self):
176
- p = []
177
- for pos, value in self.iter():
178
- p.append(pos)
179
- return p
180
-
181
- def items(self):
182
- p, d = [], []
183
- for pos, value in self.iter():
184
- p.append(pos)
185
- d.append(value)
186
- return p, d
187
-
188
- def array(self):
189
- pos, data = self.items()
190
- if self._slice:
191
- pos = np.asarray(pos)
192
- lu = tuple(np.min(pos, axis=0))
193
- rd = tuple(np.max(pos, axis=0) + 1)
194
- pos = np.asarray(pos) - np.asarray(lu)
195
- shape = []
196
- for k, (s, i, j) in enumerate(zip(self._slice, rd, lu)):
197
- if s.step is not None:
198
- pos[:, k] = pos[:, k] / s.step
199
- shape.append(round(np.ceil((i - j) / s.step)))
200
- else:
201
- shape.append(i - j)
202
- shape = tuple(shape)
203
- else:
204
- shape = tuple([i - j for i, j in zip(self.rd, self.lu)])
205
- pos = np.asarray(pos) - np.asarray(self.lu)
206
- data = np.asarray(data)
207
- inner_shape = data.shape[1:]
208
- x = np.full(shape + inner_shape, np.nan, dtype=data[0].dtype)
209
- x.__setitem__(tuple(pos.T), data)
210
- return x
211
-
212
- def _full_slice(self, slice_tuple: slice
213
- | tuple[slice | int | EllipsisType, ...]):
214
- ndim = len(self.lu)
215
- if self.inner_shape:
216
- ndim += len(self.inner_shape)
217
-
218
- if isinstance(slice_tuple, slice):
219
- slice_tuple = (
220
- slice_tuple, ) + (slice(0, sys.maxsize, 1), ) * (ndim - 1)
221
- if slice_tuple is Ellipsis:
222
- slice_tuple = (slice(0, sys.maxsize, 1), ) * ndim
223
- else:
224
- head, tail = (), ()
225
- for i, s in enumerate(slice_tuple):
226
- if s is Ellipsis:
227
- head = slice_tuple[:i]
228
- tail = slice_tuple[i + 1:]
229
- break
230
- else:
231
- head = slice_tuple
232
- tail = ()
233
- slice_tuple = head + (slice(
234
- 0, sys.maxsize, 1), ) * (ndim - len(head) - len(tail)) + tail
235
- slice_list = []
236
- contract = []
237
- reversed = []
238
- for i, s in enumerate(slice_tuple):
239
- if isinstance(s, int):
240
- if s >= 0:
241
- slice_list.append(slice(s, s + 1, 1))
242
- elif i < len(self.lu):
243
- s = self.rd[i] + s
244
- slice_list.append(slice(s, s + 1, 1))
245
- else:
246
- slice_list.append(slice(s, s - 1, -1))
247
- contract.append(i)
248
- else:
249
- start, stop, step = s.start, s.stop, s.step
250
- if step is None:
251
- step = 1
252
- if step < 0 and i < len(self.lu):
253
- step = -step
254
- reversed.append(i)
255
- if start is None and stop is None:
256
- start, stop = 0, sys.maxsize
257
- elif start is None:
258
- start, stop = self.lu[i], sys.maxsize
259
- elif stop is None:
260
- start, stop = 0, start + self.lu[i]
261
- else:
262
- start, stop = stop + self.lu[i] + 1, start + self.lu[
263
- i] + 1
264
-
265
- if start is None:
266
- start = 0
267
- elif start < 0 and i < len(self.lu):
268
- start = self.rd[i] + start
269
- if step is None:
270
- step = 1
271
- if stop is None:
272
- stop = sys.maxsize
273
- elif stop < 0 and i < len(self.lu):
274
- stop = self.rd[i] + stop
275
-
276
- slice_list.append(slice(start, stop, step))
277
- return tuple(slice_list), contract, reversed
278
-
279
- def __getitem__(self, slice_tuple: slice | EllipsisType
280
- | tuple[slice | int | EllipsisType, ...]):
281
- self._slice, contract, reversed = self._full_slice(slice_tuple)
282
- ret = self.array()
283
- slices = []
284
- for i, s in enumerate(self._slice):
285
- if i in contract:
286
- slices.append(0)
287
- elif isinstance(s, slice):
288
- if i in reversed:
289
- slices.append(slice(None, None, -1))
290
- else:
291
- slices.append(slice(None, None, 1))
292
- ret = ret.__getitem__(tuple(slices))
293
- self._slice = None
294
- return ret
295
-
296
-
297
- class Record():
298
-
299
- def __init__(self, id, database, description=None):
300
- from .scan import OptimizeSpace
301
-
302
- self.id = id
303
- self.database = database
304
- self.description = description
305
- self._keys = set()
306
- self._items = {}
307
- self._index = []
308
- self._pos = []
309
- self._last_vars = set()
310
- self._file = None
311
- self.independent_variables = {}
312
- self.constants = {}
313
- self.dims = {}
314
-
315
- for name, value in self.description['consts'].items():
316
- if name not in self._items:
317
- self._items[name] = value
318
- self.constants[name] = value
319
- self.dims[name] = ()
320
- for level, range_list in self.description['loops'].items():
321
- for name, iterable in range_list:
322
- if isinstance(iterable, OptimizeSpace):
323
- self.dims[name] = tuple(range(level + 1))
324
- continue
325
- elif isinstance(iterable, (np.ndarray, list, tuple, range)):
326
- self._items[name] = iterable
327
- self.independent_variables[name] = iterable
328
- self.dims[name] = (level, )
329
-
330
- for level, group in self.description['order'].items():
331
- for names in group:
332
- for name in names:
333
- if name not in self.description['dependents']:
334
- if name not in self.dims:
335
- self.dims[name] = (level, )
336
- else:
337
- d = set()
338
- for n in self.description['dependents'][name]:
339
- d.update(self.dims[n])
340
- if name not in self.dims:
341
- self.dims[name] = tuple(sorted(d))
342
- else:
343
- self.dims[name] = tuple(
344
- sorted(set(self.dims[name]) | d))
345
-
346
- if self.is_local_record():
347
- self.database = Path(self.database)
348
- self._file = random_path(self.database / 'objects')
349
- self._file.parent.mkdir(parents=True, exist_ok=True)
350
-
351
- def __getstate__(self) -> dict:
352
- return {
353
- 'id': self.id,
354
- 'database': self.database,
355
- 'description': self.description,
356
- '_keys': self._keys,
357
- '_items': self._items,
358
- '_index': self._index,
359
- '_pos': self._pos,
360
- '_last_vars': self._last_vars,
361
- 'independent_variables': self.independent_variables,
362
- 'constants': self.constants,
363
- 'dims': self.dims,
364
- }
365
-
366
- def __setstate__(self, state: dict):
367
- self.id = state['id']
368
- self.database = state['database']
369
- self.description = state['description']
370
- self._keys = state['_keys']
371
- self._items = state['_items']
372
- self._index = state['_index']
373
- self._pos = state['_pos']
374
- self._last_vars = state['_last_vars']
375
- self.independent_variables = state['independent_variables']
376
- self.constants = state['constants']
377
- self.dims = state['dims']
378
- self._file = None
379
-
380
- def is_local_record(self):
381
- return not self.is_cache_record() and not self.is_remote_record()
382
-
383
- def is_cache_record(self):
384
- return self.database is None
385
-
386
- def is_remote_record(self):
387
- return isinstance(self.database,
388
- str) and self.database.startswith("tcp://")
389
-
390
- def __del__(self):
391
- self.flush()
392
-
393
- def __getitem__(self, key):
394
- return self.get(key, buffer_to_array=True)
395
-
396
- def get(self, key, default=_notgiven, buffer_to_array=False, slice=None):
397
- if self.is_remote_record():
398
- with ZMQContextManager(zmq.DEALER,
399
- connect=self.database) as socket:
400
- socket.send_pyobj({
401
- 'method': 'record_getitem',
402
- 'record_id': self.id,
403
- 'key': key
404
- })
405
- ret = socket.recv_pyobj()
406
- if isinstance(ret, BufferList):
407
- if buffer_to_array:
408
- socket.send_pyobj({
409
- 'method': 'bufferlist_slice',
410
- 'record_id': self.id,
411
- 'key': key,
412
- 'slice': slice
413
- })
414
- lst = socket.recv_pyobj()
415
- ret._list = lst
416
- ret._slice = slice
417
- return ret.array()
418
- else:
419
- ret._data_id = self.database, self.id, key
420
- return ret
421
- else:
422
- return ret
423
- else:
424
- if default is _notgiven:
425
- d = self._items.get(key)
426
- else:
427
- d = self._items.get(key, default)
428
- if isinstance(d, BufferList):
429
- if isinstance(d.file, str):
430
- d.file = self._file.parent.parent.parent.parent / d.file
431
- d._slice = slice
432
- if buffer_to_array:
433
- return d.array()
434
- else:
435
- return d
436
- else:
437
- return d
438
-
439
- def keys(self):
440
- if self.is_remote_record():
441
- with ZMQContextManager(zmq.DEALER,
442
- connect=self.database) as socket:
443
- socket.send_pyobj({
444
- 'method': 'record_keys',
445
- 'record_id': self.id
446
- })
447
- return socket.recv_pyobj()
448
- else:
449
- return list(self._keys)
450
-
451
- def append(self, level, step, position, variables):
452
- if level < 0:
453
- self.flush()
454
- return
455
-
456
- for key in set(variables.keys()) - self._last_vars:
457
- if key not in self.dims:
458
- self.dims[key] = tuple(range(level + 1))
459
-
460
- self._last_vars = set(variables.keys())
461
- self._keys.update(variables.keys())
462
-
463
- if level >= len(self._pos):
464
- l = level + 1 - len(self._pos)
465
- self._index.extend(([0] * (l - 1)) + [step])
466
- self._pos.extend(([0] * (l - 1)) + [position])
467
- pos = tuple(self._pos)
468
- elif level == len(self._pos) - 1:
469
- self._index[-1] = step
470
- self._pos[-1] = position
471
- pos = tuple(self._pos)
472
- else:
473
- self._index = self._index[:level + 1]
474
- self._pos = self._pos[:level + 1]
475
- self._index[-1] = step + 1
476
- self._pos[-1] = position
477
- pos = tuple(self._pos)
478
- self._pos[-1] += 1
479
-
480
- for key, value in variables.items():
481
- if self.dims[key] == ():
482
- if key not in self._items:
483
- self._items[key] = value
484
- elif level == self.dims[key][-1]:
485
- if key not in self._items:
486
- if self.is_local_record():
487
- bufferlist_file = random_path(self.database /
488
- 'objects')
489
- bufferlist_file.parent.mkdir(parents=True,
490
- exist_ok=True)
491
- self._items[key] = BufferList(bufferlist_file)
492
- else:
493
- self._items[key] = BufferList()
494
- self._items[key].lu = pos
495
- self._items[key].rd = tuple([i + 1 for i in pos])
496
- self._items[key].append(pos, value, self.dims[key])
497
- elif isinstance(self._items[key], BufferList):
498
- self._items[key].append(pos, value, self.dims[key])
499
-
500
- def flush(self):
501
- if self.is_remote_record() or self.is_cache_record():
502
- return
503
-
504
- for key, value in self._items.items():
505
- if isinstance(value, BufferList):
506
- value.flush()
507
-
508
- with open(self._file, 'wb') as f:
509
- dill.dump(self, f)
510
-
511
- def __repr__(self):
512
- return f"<Record: id={self.id} app={self.description['app']}, keys={self.keys()}>"
513
-
514
- # def _repr_html_(self):
515
- # return f"""
516
- # <h3>Record: id={self.id}, app={self.description['app']}</h3>
517
- # <p>keys={self.keys()}</p>
518
- # <p>dims={self.dims}</p>
519
- # """
520
-
521
-
522
33
  class Request():
523
34
  __slots__ = ['sock', 'identity', 'msg', 'method']
524
35
 
@@ -554,6 +65,7 @@ def get_record(session: Session, id: int, datapath: Path) -> Record:
554
65
  path = datapath / 'objects' / record_in_db.file
555
66
  with open(path, 'rb') as f:
556
67
  record = dill.load(f)
68
+ record.database = datapath
557
69
  record._file = path
558
70
  else:
559
71
  record = record_cache[id][1]
@@ -570,6 +82,7 @@ def record_create(session: Session, description: dict, datapath: Path) -> int:
570
82
  if 'tags' in description:
571
83
  record_in_db.tags = [tag(session, t) for t in description['tags']]
572
84
  record_in_db.file = '/'.join(record._file.parts[-4:])
85
+ record._file = datapath / 'objects' / record_in_db.file
573
86
  session.add(record_in_db)
574
87
  try:
575
88
  session.commit()
@@ -618,7 +131,7 @@ async def handle(session: Session, request: Request, datapath: Path):
618
131
  msg['position'], msg['variables'], datapath)
619
132
  case 'record_description':
620
133
  record = get_record(session, msg['record_id'], datapath)
621
- await reply(request, dill.dumps(record.description))
134
+ await reply(request, dill.dumps(record))
622
135
  case 'record_getitem':
623
136
  record = get_record(session, msg['record_id'], datapath)
624
137
  await reply(request, record.get(msg['key'], buffer_to_array=False))