QuLab 2.0.9__cp311-cp311-macosx_10_9_universal2.whl → 2.1.1__cp311-cp311-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
qulab/scan/recorder.py CHANGED
@@ -1,18 +1,11 @@
1
1
  import asyncio
2
- import itertools
3
2
  import os
4
3
  import pickle
5
- import sys
6
4
  import time
7
- import uuid
8
- from collections import defaultdict
9
5
  from pathlib import Path
10
- from threading import Lock
11
- from types import EllipsisType
12
6
 
13
7
  import click
14
8
  import dill
15
- import numpy as np
16
9
  import zmq
17
10
  from loguru import logger
18
11
 
@@ -21,8 +14,7 @@ from qulab.sys.rpc.zmq_socket import ZMQContextManager
21
14
  from .curd import query_record, remove_tags, tag, update_tags
22
15
  from .models import Record as RecordInDB
23
16
  from .models import Session, create_engine, create_tables, sessionmaker, utcnow
24
-
25
- _notgiven = object()
17
+ from .record import Record
26
18
 
27
19
  try:
28
20
  default_record_port = int(os.getenv('QULAB_RECORD_PORT', 6789))
@@ -38,406 +30,6 @@ datapath.mkdir(parents=True, exist_ok=True)
38
30
  record_cache = {}
39
31
 
40
32
 
41
- def random_path(base):
42
- while True:
43
- s = uuid.uuid4().hex
44
- path = base / s[:2] / s[2:4] / s[4:6] / s[6:]
45
- if not path.exists():
46
- return path
47
-
48
-
49
- def index_in_slice(slice_obj: slice | int, index: int):
50
- if isinstance(slice_obj, int):
51
- return slice_obj == index
52
- start, stop, step = slice_obj.start, slice_obj.stop, slice_obj.step
53
- if start is None:
54
- start = 0
55
- if step is None:
56
- step = 1
57
- if stop is None:
58
- stop = sys.maxsize
59
-
60
- if step > 0:
61
- return start <= index < stop and (index - start) % step == 0
62
- else:
63
- return stop < index <= start and (index - start) % step == 0
64
-
65
-
66
- class BufferList():
67
-
68
- def __init__(self, file=None, slice=None):
69
- self._list = []
70
- self.lu = ()
71
- self.rd = ()
72
- self.inner_shape = None
73
- self.file = file
74
- self._slice = slice
75
- self._lock = Lock()
76
- self._database = None
77
-
78
- def __repr__(self):
79
- return f"<BufferList: lu={self.lu}, rd={self.rd}, slice={self._slice}>"
80
-
81
- def __getstate__(self):
82
- self.flush()
83
- if isinstance(self.file, Path):
84
- file = '/'.join(self.file.parts[-4:])
85
- else:
86
- file = self.file
87
- return {
88
- 'file': file,
89
- 'lu': self.lu,
90
- 'rd': self.rd,
91
- 'inner_shape': self.inner_shape,
92
- }
93
-
94
- def __setstate__(self, state):
95
- self.file = state['file']
96
- self.lu = state['lu']
97
- self.rd = state['rd']
98
- self.inner_shape = state['inner_shape']
99
- self._list = []
100
- self._slice = None
101
- self._lock = Lock()
102
- self._database = None
103
-
104
- @property
105
- def shape(self):
106
- return tuple([i - j for i, j in zip(self.rd, self.lu)])
107
-
108
- def flush(self):
109
- if not self._list:
110
- return
111
- if isinstance(self.file, Path):
112
- with self._lock:
113
- with open(self.file, 'ab') as f:
114
- for item in self._list:
115
- dill.dump(item, f)
116
- self._list.clear()
117
-
118
- def append(self, pos, value, dims=None):
119
- if dims is not None:
120
- if any([p != 0 for i, p in enumerate(pos) if i not in dims]):
121
- return
122
- pos = tuple([pos[i] for i in dims])
123
- self.lu = tuple([min(i, j) for i, j in zip(pos, self.lu)])
124
- self.rd = tuple([max(i + 1, j) for i, j in zip(pos, self.rd)])
125
- if hasattr(value, 'shape'):
126
- if self.inner_shape is None:
127
- self.inner_shape = value.shape
128
- elif self.inner_shape != value.shape:
129
- self.inner_shape = ()
130
- self._list.append((pos, value))
131
- if len(self._list) > 1000:
132
- self.flush()
133
-
134
- def _iter_file(self):
135
- if isinstance(self.file, Path) and self.file.exists():
136
- with self._lock:
137
- with open(self.file, 'rb') as f:
138
- while True:
139
- try:
140
- pos, value = dill.load(f)
141
- yield pos, value
142
- except EOFError:
143
- break
144
-
145
- def iter(self):
146
- for pos, value in itertools.chain(self._iter_file(), self._list):
147
- if not self._slice:
148
- yield pos, value
149
- elif all([index_in_slice(s, i) for s, i in zip(self._slice, pos)]):
150
- yield pos, value[self._slice[len(pos):]]
151
-
152
- def value(self):
153
- d = []
154
- for pos, value in self.iter():
155
- d.append(value)
156
- return d
157
-
158
- def pos(self):
159
- p = []
160
- for pos, value in self.iter():
161
- p.append(pos)
162
- return p
163
-
164
- def items(self):
165
- p, d = [], []
166
- for pos, value in self.iter():
167
- p.append(pos)
168
- d.append(value)
169
- return p, d
170
-
171
- def array(self):
172
- pos, data = self.items()
173
- pos = np.asarray(pos) - np.asarray(self.lu)
174
- data = np.asarray(data)
175
- inner_shape = data.shape[1:]
176
- x = np.full(self.shape + inner_shape, np.nan, dtype=data[0].dtype)
177
- x.__setitem__(tuple(pos.T), data)
178
- return x
179
-
180
- def _full_slice(self, slice_tuple: slice
181
- | tuple[slice | int | EllipsisType, ...]):
182
- if isinstance(slice_tuple, slice):
183
- slice_tuple = (slice_tuple, ) + (slice(0, sys.maxsize,
184
- 1), ) * (len(self.lu) - 1)
185
- if slice_tuple is Ellipsis:
186
- slice_tuple = (slice(0, sys.maxsize, 1), ) * len(self.lu)
187
- else:
188
- head, tail = [], []
189
- for i, s in enumerate(slice_tuple):
190
- if s is Ellipsis:
191
- head = slice_tuple[:i]
192
- tail = slice_tuple[i + 1:]
193
- break
194
- slice_tuple = head + (slice(0, sys.maxsize, 1), ) * (
195
- len(self.lu) - len(head) - len(tail)) + tail
196
- slice_list = []
197
- for s in slice_tuple:
198
- if isinstance(s, int):
199
- slice_list.append(s)
200
- else:
201
- start, stop, step = s.start, s.stop, s.step
202
- if start is None:
203
- start = 0
204
- if step is None:
205
- step = 1
206
- if stop is None:
207
- stop = sys.maxsize
208
- slice_list.append(slice(start, stop, step))
209
- return tuple(slice_list)
210
-
211
- def __getitem__(self, slice_tuple: slice | EllipsisType
212
- | tuple[slice | int | EllipsisType, ...]):
213
- return super().__getitem__(self._full_slice(slice_tuple))
214
-
215
-
216
- class Record():
217
-
218
- def __init__(self, id, database, description=None):
219
- from .scan import OptimizeSpace
220
-
221
- self.id = id
222
- self.database = database
223
- self.description = description
224
- self._keys = set()
225
- self._items = {}
226
- self._index = []
227
- self._pos = []
228
- self._last_vars = set()
229
- self._file = None
230
- self.independent_variables = {}
231
- self.constants = {}
232
- self.dims = {}
233
-
234
- for name, value in self.description['consts'].items():
235
- if name not in self._items:
236
- self._items[name] = value
237
- self.constants[name] = value
238
- self.dims[name] = ()
239
- for level, range_list in self.description['loops'].items():
240
- for name, iterable in range_list:
241
- if isinstance(iterable, OptimizeSpace):
242
- self.dims[name] = tuple(range(level + 1))
243
- continue
244
- elif isinstance(iterable, (np.ndarray, list, tuple, range)):
245
- self._items[name] = iterable
246
- self.independent_variables[name] = iterable
247
- self.dims[name] = (level, )
248
-
249
- for level, group in self.description['order'].items():
250
- for names in group:
251
- for name in names:
252
- if name not in self.description['dependents']:
253
- if name not in self.dims:
254
- self.dims[name] = (level, )
255
- else:
256
- d = set()
257
- for n in self.description['dependents'][name]:
258
- d.update(self.dims[n])
259
- if name not in self.dims:
260
- self.dims[name] = tuple(sorted(d))
261
- else:
262
- self.dims[name] = tuple(
263
- sorted(set(self.dims[name]) | d))
264
-
265
- if self.is_local_record():
266
- self.database = Path(self.database)
267
- self._file = random_path(self.database / 'objects')
268
- self._file.parent.mkdir(parents=True, exist_ok=True)
269
-
270
- def __getstate__(self) -> dict:
271
- return {
272
- 'id': self.id,
273
- 'database': self.database,
274
- 'description': self.description,
275
- '_keys': self._keys,
276
- '_items': self._items,
277
- '_index': self._index,
278
- '_pos': self._pos,
279
- '_last_vars': self._last_vars,
280
- 'independent_variables': self.independent_variables,
281
- 'constants': self.constants,
282
- 'dims': self.dims,
283
- }
284
-
285
- def __setstate__(self, state: dict):
286
- self.id = state['id']
287
- self.database = state['database']
288
- self.description = state['description']
289
- self._keys = state['_keys']
290
- self._items = state['_items']
291
- self._index = state['_index']
292
- self._pos = state['_pos']
293
- self._last_vars = state['_last_vars']
294
- self.independent_variables = state['independent_variables']
295
- self.constants = state['constants']
296
- self.dims = state['dims']
297
- self._file = None
298
-
299
- def is_local_record(self):
300
- return not self.is_cache_record() and not self.is_remote_record()
301
-
302
- def is_cache_record(self):
303
- return self.database is None
304
-
305
- def is_remote_record(self):
306
- return isinstance(self.database,
307
- str) and self.database.startswith("tcp://")
308
-
309
- def __del__(self):
310
- self.flush()
311
-
312
- def __getitem__(self, key):
313
- return self.get(key)
314
-
315
- def get(self, key, default=_notgiven, buffer_to_array=True, slice=None):
316
- if self.is_remote_record():
317
- with ZMQContextManager(zmq.DEALER,
318
- connect=self.database) as socket:
319
- socket.send_pyobj({
320
- 'method': 'record_getitem',
321
- 'record_id': self.id,
322
- 'key': key
323
- })
324
- ret = socket.recv_pyobj()
325
- if isinstance(ret, BufferList):
326
- socket.send_pyobj({
327
- 'method': 'bufferlist_slice',
328
- 'record_id': self.id,
329
- 'key': key,
330
- 'slice': slice
331
- })
332
- lst = socket.recv_pyobj()
333
- ret._list = lst
334
- ret._slice = slice
335
- if buffer_to_array:
336
- return ret.array()
337
- else:
338
- ret._database = self.database
339
- return ret
340
- else:
341
- return ret
342
- else:
343
- if default is _notgiven:
344
- d = self._items.get(key)
345
- else:
346
- d = self._items.get(key, default)
347
- if isinstance(d, BufferList):
348
- if isinstance(d.file, str):
349
- d.file = self._file.parent.parent.parent.parent / d.file
350
- d._slice = slice
351
- if buffer_to_array:
352
- return d.array()
353
- else:
354
- return d
355
- else:
356
- return d
357
-
358
- def keys(self):
359
- if self.is_remote_record():
360
- with ZMQContextManager(zmq.DEALER,
361
- connect=self.database) as socket:
362
- socket.send_pyobj({
363
- 'method': 'record_keys',
364
- 'record_id': self.id
365
- })
366
- return socket.recv_pyobj()
367
- else:
368
- return list(self._keys)
369
-
370
- def append(self, level, step, position, variables):
371
- if level < 0:
372
- self.flush()
373
- return
374
-
375
- for key in set(variables.keys()) - self._last_vars:
376
- if key not in self.dims:
377
- self.dims[key] = tuple(range(level + 1))
378
-
379
- self._last_vars = set(variables.keys())
380
- self._keys.update(variables.keys())
381
-
382
- if level >= len(self._pos):
383
- l = level + 1 - len(self._pos)
384
- self._index.extend(([0] * (l - 1)) + [step])
385
- self._pos.extend(([0] * (l - 1)) + [position])
386
- pos = tuple(self._pos)
387
- elif level == len(self._pos) - 1:
388
- self._index[-1] = step
389
- self._pos[-1] = position
390
- pos = tuple(self._pos)
391
- else:
392
- self._index = self._index[:level + 1]
393
- self._pos = self._pos[:level + 1]
394
- self._index[-1] = step + 1
395
- self._pos[-1] = position
396
- pos = tuple(self._pos)
397
- self._pos[-1] += 1
398
-
399
- for key, value in variables.items():
400
- if self.dims[key] == ():
401
- if key not in self._items:
402
- self._items[key] = value
403
- elif level == self.dims[key][-1]:
404
- if key not in self._items:
405
- if self.is_local_record():
406
- bufferlist_file = random_path(self.database /
407
- 'objects')
408
- bufferlist_file.parent.mkdir(parents=True,
409
- exist_ok=True)
410
- self._items[key] = BufferList(bufferlist_file)
411
- else:
412
- self._items[key] = BufferList()
413
- self._items[key].lu = pos
414
- self._items[key].rd = tuple([i + 1 for i in pos])
415
- self._items[key].append(pos, value, self.dims[key])
416
- elif isinstance(self._items[key], BufferList):
417
- self._items[key].append(pos, value, self.dims[key])
418
-
419
- def flush(self):
420
- if self.is_remote_record() or self.is_cache_record():
421
- return
422
-
423
- for key, value in self._items.items():
424
- if isinstance(value, BufferList):
425
- value.flush()
426
-
427
- with open(self._file, 'wb') as f:
428
- dill.dump(self, f)
429
-
430
- def __repr__(self):
431
- return f"<Record: id={self.id} app={self.description['app']}, keys={self.keys()}>"
432
-
433
- # def _repr_html_(self):
434
- # return f"""
435
- # <h3>Record: id={self.id}, app={self.description['app']}</h3>
436
- # <p>keys={self.keys()}</p>
437
- # <p>dims={self.dims}</p>
438
- # """
439
-
440
-
441
33
  class Request():
442
34
  __slots__ = ['sock', 'identity', 'msg', 'method']
443
35
 
@@ -473,6 +65,7 @@ def get_record(session: Session, id: int, datapath: Path) -> Record:
473
65
  path = datapath / 'objects' / record_in_db.file
474
66
  with open(path, 'rb') as f:
475
67
  record = dill.load(f)
68
+ record.database = datapath
476
69
  record._file = path
477
70
  else:
478
71
  record = record_cache[id][1]
@@ -489,6 +82,7 @@ def record_create(session: Session, description: dict, datapath: Path) -> int:
489
82
  if 'tags' in description:
490
83
  record_in_db.tags = [tag(session, t) for t in description['tags']]
491
84
  record_in_db.file = '/'.join(record._file.parts[-4:])
85
+ record._file = datapath / 'objects' / record_in_db.file
492
86
  session.add(record_in_db)
493
87
  try:
494
88
  session.commit()
@@ -537,7 +131,7 @@ async def handle(session: Session, request: Request, datapath: Path):
537
131
  msg['position'], msg['variables'], datapath)
538
132
  case 'record_description':
539
133
  record = get_record(session, msg['record_id'], datapath)
540
- await reply(request, dill.dumps(record.description))
134
+ await reply(request, dill.dumps(record))
541
135
  case 'record_getitem':
542
136
  record = get_record(session, msg['record_id'], datapath)
543
137
  await reply(request, record.get(msg['key'], buffer_to_array=False))