emod-api 3.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. emod_api/__init__.py +1 -0
  2. emod_api/campaign.py +170 -0
  3. emod_api/channelreports/__init__.py +0 -0
  4. emod_api/channelreports/channels.py +433 -0
  5. emod_api/channelreports/icj_to_csv.py +65 -0
  6. emod_api/channelreports/plot_icj_means.py +149 -0
  7. emod_api/channelreports/plot_prop_report.py +205 -0
  8. emod_api/channelreports/utils.py +326 -0
  9. emod_api/config/__init__.py +0 -0
  10. emod_api/config/default_from_schema.py +16 -0
  11. emod_api/config/default_from_schema_no_validation.py +177 -0
  12. emod_api/config/from_overrides.py +135 -0
  13. emod_api/demographics/__init__.py +0 -0
  14. emod_api/demographics/age_distribution.py +163 -0
  15. emod_api/demographics/base_input_file.py +28 -0
  16. emod_api/demographics/calculators.py +159 -0
  17. emod_api/demographics/demographic_exceptions.py +54 -0
  18. emod_api/demographics/demographics.py +249 -0
  19. emod_api/demographics/demographics_base.py +752 -0
  20. emod_api/demographics/demographics_overlay.py +41 -0
  21. emod_api/demographics/fertility_distribution.py +235 -0
  22. emod_api/demographics/implicit_functions.py +112 -0
  23. emod_api/demographics/mortality_distribution.py +227 -0
  24. emod_api/demographics/node.py +456 -0
  25. emod_api/demographics/overlay_node.py +16 -0
  26. emod_api/demographics/properties_and_attributes.py +737 -0
  27. emod_api/demographics/service/__init__.py +0 -0
  28. emod_api/demographics/service/grid_construction.py +143 -0
  29. emod_api/demographics/service/service.py +55 -0
  30. emod_api/demographics/susceptibility_distribution.py +170 -0
  31. emod_api/demographics/updateable.py +58 -0
  32. emod_api/legacy/__init__.py +0 -0
  33. emod_api/legacy/plotAllCharts.py +230 -0
  34. emod_api/migration/__init__.py +0 -0
  35. emod_api/migration/__main__.py +22 -0
  36. emod_api/migration/migration.py +782 -0
  37. emod_api/multidim_plotter.py +80 -0
  38. emod_api/schema_to_class.py +440 -0
  39. emod_api/serialization/__init__.py +0 -0
  40. emod_api/serialization/census_and_mod_pop.py +48 -0
  41. emod_api/serialization/dtk_file_support.py +61 -0
  42. emod_api/serialization/dtk_file_tools.py +1378 -0
  43. emod_api/serialization/dtk_file_utility.py +141 -0
  44. emod_api/serialization/serialized_population.py +205 -0
  45. emod_api/spatialreports/__init__.py +0 -0
  46. emod_api/spatialreports/__main__.py +67 -0
  47. emod_api/spatialreports/plot_spat_means.py +99 -0
  48. emod_api/spatialreports/spatial.py +210 -0
  49. emod_api/utils/__init__.py +26 -0
  50. emod_api/utils/distributions/__init__.py +0 -0
  51. emod_api/utils/distributions/base_distribution.py +38 -0
  52. emod_api/utils/distributions/bimodal_distribution.py +64 -0
  53. emod_api/utils/distributions/constant_distribution.py +58 -0
  54. emod_api/utils/distributions/demographic_distribution_flag.py +16 -0
  55. emod_api/utils/distributions/distribution_type.py +15 -0
  56. emod_api/utils/distributions/dual_constant_distribution.py +68 -0
  57. emod_api/utils/distributions/dual_exponential_distribution.py +75 -0
  58. emod_api/utils/distributions/exponential_distribution.py +63 -0
  59. emod_api/utils/distributions/gaussian_distribution.py +69 -0
  60. emod_api/utils/distributions/log_normal_distribution.py +61 -0
  61. emod_api/utils/distributions/poisson_distribution.py +59 -0
  62. emod_api/utils/distributions/uniform_distribution.py +70 -0
  63. emod_api/utils/distributions/weibull_distribution.py +69 -0
  64. emod_api/utils/str_enum.py +6 -0
  65. emod_api/weather/__init__.py +0 -0
  66. emod_api/weather/weather.py +428 -0
  67. emod_api-3.0.2.dist-info/METADATA +131 -0
  68. emod_api-3.0.2.dist-info/RECORD +71 -0
  69. emod_api-3.0.2.dist-info/WHEEL +5 -0
  70. emod_api-3.0.2.dist-info/licenses/LICENSE +21 -0
  71. emod_api-3.0.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1378 @@
1
+ #!/usr/bin/python
2
+
3
+ """
4
+ Support for three formats of serialized population files:
5
+ 1. "Original version": single payload chunk with simulation and all nodes, uncompressed or snappy or LZ4
6
+ 2. "First chunked version": multiple payload chunks, one for simulation and one each for nodes
7
+ 3. "Second chunked version": multiple payload chunks, simulation and node objects are "root" objects in each chunk
8
+ 4. "Metadata update": compressed: true|false + engine: NONE|LZ4|SNAPPY replaced with compression: NONE|LZ4|SNAPPY
9
+ 5. "Emod info added": emod_info added to header
10
+ 6. "Per-node/human compression and chunk sizes": sim_compression, sim_chunk_size, node_compressions,
11
+ node_chunk_sizes, human_compressions, human_node_suids, human_chunk_sizes added to header
12
+ """
13
+
14
+ import copy
15
+ import gc
16
+ from collections.abc import MutableMapping
17
+ import json
18
+ import os
19
+ import time
20
+ import emod_api.serialization.dtk_file_support as support
21
+
22
+
23
+ IDTK = 'IDTK'
24
+ MAX_VERSION = 6
25
+
26
+
27
+ # -----------------------------------------------------------------------------
28
+ # --- compression helpers
29
+ # -----------------------------------------------------------------------------
30
+ NONE = 'NONE'
31
+ LZ4 = 'LZ4'
32
+ SNAPPY = 'SNAPPY'
33
+
34
+ __engines__ = {LZ4: support.EllZeeFour, SNAPPY: support.Snappy, NONE: support.Uncompressed}
35
+
36
+ # V6 compression strings are fixed to always be three characters so that
37
+ # the header size is predictable regardless of compression type used.
38
+ V6_COMPRESSION_STR_NONE = "NON"
39
+ V6_COMPRESSION_STR_LZ4 = "LZ4"
40
+ V6_COMPRESSION_STR_SNAPPY = "SNA"
41
+
42
+
43
+ def _determine_v6_compression_type(data):
44
+ if len(data) < 0x7E000000:
45
+ return V6_COMPRESSION_STR_LZ4
46
+ elif len(data) < 0xFFFFFFFF:
47
+ return V6_COMPRESSION_STR_SNAPPY
48
+ else:
49
+ return V6_COMPRESSION_STR_NONE
50
+
51
+
52
+ def _compression_type_v6_to_old(compression_str):
53
+ if compression_str == V6_COMPRESSION_STR_NONE:
54
+ return NONE
55
+ elif compression_str == V6_COMPRESSION_STR_LZ4:
56
+ return LZ4
57
+ elif compression_str == V6_COMPRESSION_STR_SNAPPY:
58
+ return SNAPPY
59
+ else:
60
+ raise RuntimeError(f"Unknown/unsupported compression scheme '{compression_str}'")
61
+
62
+
63
+ def _compression_type_old_to_v6(compression_str):
64
+ if compression_str == NONE:
65
+ return V6_COMPRESSION_STR_NONE
66
+ elif compression_str == LZ4:
67
+ return V6_COMPRESSION_STR_LZ4
68
+ elif compression_str == SNAPPY:
69
+ return V6_COMPRESSION_STR_SNAPPY
70
+ else:
71
+ raise RuntimeError(f"Unknown/unsupported compression scheme '{compression_str}'")
72
+
73
+
74
+ def uncompress(data, engine):
75
+ if engine in __engines__:
76
+ return __engines__[engine].uncompress(data)
77
+ else:
78
+ raise RuntimeError(f"Unknown/unsupported compression scheme '{engine}'")
79
+
80
+
81
+ def compress(data, engine):
82
+ if engine in __engines__:
83
+ return __engines__[engine].compress(data)
84
+ else:
85
+ raise RuntimeError(f"Unknown/unsupported compression scheme '{engine}'")
86
+
87
+
88
+ # -----------------------------------------------------------------------------
89
+ # --- DtkHeader
90
+ # -----------------------------------------------------------------------------
91
+
92
+ class DtkHeader(support.SerialObject):
93
+ # noinspection PyDefaultArgument
94
+ def __init__(self, dictionary=None):
95
+ if dictionary is None:
96
+ dictionary = {
97
+ 'author': 'unknown',
98
+ 'bytecount': 0,
99
+ 'chunkcount': 0,
100
+ 'chunksizes': [],
101
+ 'compressed': True,
102
+ 'date': time.strftime('%a %b %d %H:%M:%S %Y'),
103
+ 'engine': LZ4,
104
+ 'tool': os.path.basename(__file__),
105
+ 'version': 1}
106
+ super(DtkHeader, self).__init__(dictionary)
107
+ return
108
+
109
+ def __str__(self):
110
+ text = json.dumps(self, separators=(',', ':'))
111
+ return text
112
+
113
+ def __len__(self):
114
+ length = len(self.__str__())
115
+ return length
116
+
117
+ # -----------------------------------------------------------------------------
118
+ # --- DtkFile
119
+ # -----------------------------------------------------------------------------
120
+
121
+
122
+ class DtkFile(object):
123
+
124
+ class Contents(object):
125
+ def __init__(self, parent):
126
+ self.__parent__ = parent
127
+ return
128
+
129
+ def __iter__(self):
130
+ index = 0
131
+ while index < len(self):
132
+ yield self.__getitem__(index)
133
+ index += 1
134
+
135
+ def __getitem__(self, index):
136
+ data = str(uncompress(self.__parent__.chunks[index], self.__parent__.compression), 'utf-8')
137
+ return data
138
+
139
+ def __setitem__(self, index, value):
140
+ data = compress(value.encode(), self.__parent__.compression)
141
+ self.__parent__.chunks[index] = data
142
+ return
143
+
144
+ def append(self, item):
145
+ data = compress(item, self.__parent__.compression)
146
+ self.__parent__.chunks.append(data)
147
+
148
+ def __len__(self):
149
+ length = len(self.__parent__.chunks)
150
+ return length
151
+
152
+ class Objects(object):
153
+ def __init__(self, parent):
154
+ self.__parent__ = parent
155
+ return
156
+
157
+ def __iter__(self):
158
+ index = 0
159
+ while index < len(self):
160
+ yield self.__getitem__(index)
161
+ index += 1
162
+
163
+ def __getitem__(self, index):
164
+ try:
165
+ contents = self.__parent__.contents[index]
166
+ item = json.loads(contents, object_hook=support.SerialObject)
167
+ except Exception:
168
+ raise UserWarning(f"Could not parse JSON in chunk {index}")
169
+ return item
170
+
171
+ def __setitem__(self, index, value):
172
+ contents = json.dumps(value, separators=(',', ':'))
173
+ self.__parent__.contents[index] = contents
174
+ return
175
+
176
+ def append(self, item):
177
+ contents = json.dumps(item, separators=(',', ':'))
178
+ self.__parent__.contents.append(contents)
179
+ return
180
+
181
+ def __len__(self):
182
+ length = len(self.__parent__.chunks)
183
+ return length
184
+
185
+ def __init__(self, header):
186
+ self.__header__ = header
187
+ self._chunks = [None for index in range(header.chunkcount)]
188
+ self.contents = self.Contents(self)
189
+ self.objects = self.Objects(self)
190
+ return
191
+
192
+ @property
193
+ def header(self):
194
+ return self.__header__
195
+
196
+ @property
197
+ def compressed(self):
198
+ is_compressed = (self.__header__.engine.upper() != NONE)
199
+ return is_compressed
200
+
201
+ @property
202
+ def compression(self):
203
+ engine = self.__header__.engine.upper()
204
+ return engine
205
+
206
+ @compression.setter
207
+ def compression(self, engine):
208
+ self.__set_compression__(engine.upper())
209
+
210
+ @property
211
+ def byte_count(self):
212
+ total = sum(self.chunk_sizes)
213
+ return total
214
+
215
+ @property
216
+ def chunk_count(self):
217
+ length = len(self.chunks)
218
+ return length
219
+
220
+ @property
221
+ def chunk_sizes(self):
222
+ sizes = [len(chunk) for chunk in self.chunks]
223
+ return sizes
224
+
225
+ # Optional header entries
226
+ @property
227
+ def author(self):
228
+ return self.__header__.author if 'author' in self.__header__ else ''
229
+
230
+ @author.setter
231
+ def author(self, value):
232
+ self.__header__['author'] = str(value)
233
+ return
234
+
235
+ @property
236
+ def date(self):
237
+ return self.__header__.date if 'date' in self.__header__ else ''
238
+
239
+ @date.setter
240
+ def date(self, value):
241
+ self.__header__['date'] = str(value)
242
+
243
+ @property
244
+ def tool(self):
245
+ return self.__header__.tool if 'tool' in self.__header__ else ''
246
+
247
+ @tool.setter
248
+ def tool(self, value):
249
+ self.__header__['tool'] = str(value)
250
+ return
251
+
252
+ @property
253
+ def version(self):
254
+ return self.__header__.version
255
+
256
+ @property
257
+ def chunks(self):
258
+ return self._chunks
259
+
260
+ @property
261
+ def nodes(self):
262
+ return self._nodes
263
+
264
+ def _sync_header(self):
265
+
266
+ self.__header__.date = time.strftime('%a %b %d %H:%M:%S %Y')
267
+ self.__header__.chunkcount = len(self.chunks)
268
+ self.__header__.chunksizes = [len(chunk) for chunk in self.chunks]
269
+ self.__header__.bytecount = sum(self.__header__.chunksizes)
270
+
271
+ return
272
+
273
+ def __set_compression__(self, engine):
274
+ if engine != self.compression:
275
+ for index in range(self.chunk_count):
276
+ chunk = compress(self.contents[index], engine)
277
+ self._chunks[index] = chunk
278
+ self.__header__.engine = engine
279
+ self.__header__['compressed'] = (engine != NONE)
280
+ return
281
+
282
+ # -----------------------------------------------------------------------------
283
+ # --- DtkFileV1
284
+ # ---
285
+ # --- "Original version": single payload chunk with simulation and all nodes,
286
+ # --- uncompressed or snappy or LZ4
287
+ # -----------------------------------------------------------------------------
288
+
289
+
290
+ class DtkFileV1(DtkFile):
291
+
292
+ def __init__(self, header=None, filename='', handle=None):
293
+ if header is None:
294
+ header = DtkHeader()
295
+ header.version = 1
296
+ super(DtkFileV1, self).__init__(header)
297
+ if handle is not None:
298
+ self.chunks[0] = handle.read(header.chunksizes[0])
299
+ self._nodes = [entry.node for entry in self.simulation.nodes]
300
+ return
301
+
302
+ @property
303
+ def simulation(self):
304
+ return self.objects[0].simulation
305
+
306
+ @simulation.setter
307
+ def simulation(self, value):
308
+ self.objects[0] = {'simulation': value}
309
+ return
310
+
311
+ # -----------------------------------------------------------------------------
312
+ # --- DtkFileV2
313
+ # ---
314
+ # --- "First chunked version": multiple payload chunks, one for simulation and
315
+ # --- one each for nodes
316
+ # -----------------------------------------------------------------------------
317
+
318
+
319
+ class DtkFileV2(DtkFile):
320
+
321
+ class NodesV2(object):
322
+ def __init__(self, parent):
323
+ self.__parent__ = parent
324
+ return
325
+
326
+ def __iter__(self):
327
+ index = 0
328
+ while index < len(self):
329
+ # Version 2 looks like this {'suid':{'id':id},'node':{...}}, dereference the node here for simplicity.
330
+ yield self.__getitem__(index)
331
+ index += 1
332
+
333
+ def __getitem__(self, index):
334
+ item = self.__parent__.objects[index + 1]
335
+ return item.node
336
+
337
+ def __setitem__(self, index, value):
338
+ # Version 2 actually saves the entry from simulation.nodes (C++) which is a map of suid to node.
339
+ self.__parent__.objects[index + 1] = {'suid': {'id': value.suid.id}, 'node': value}
340
+ return
341
+
342
+ def __len__(self):
343
+ length = self.__parent__.chunk_count - 1
344
+ return length
345
+
346
+ def __init__(self, header=None, filename='', handle=None):
347
+ if header is None:
348
+ header = DtkHeader()
349
+ header.version = 2
350
+ super(DtkFileV2, self).__init__(header)
351
+ for index, size in enumerate(header.chunksizes):
352
+ self.chunks[index] = handle.read(size)
353
+ if len(self.chunks[index]) != size:
354
+ raise UserWarning(
355
+ f"Only read {len(self.chunks[index])} bytes of {size} for chunk {index} of file '{filename}'")
356
+ # Version 2 looks like this: {'simulation':{...}} so we dereference the simulation here for simplicity.
357
+ self._nodes = self.NodesV2(self)
358
+ return
359
+
360
+ @property
361
+ def simulation(self):
362
+ sim = self.objects[0]['simulation']
363
+ del sim['nodes']
364
+ return sim
365
+
366
+ @simulation.setter
367
+ def simulation(self, value):
368
+ sim = copy.deepcopy(value)
369
+ sim['nodes'] = []
370
+ self.objects[0] = {'simulation': sim}
371
+ return
372
+
373
+ # -----------------------------------------------------------------------------
374
+ # --- DtkFileV3
375
+ # ---
376
+ # --- "Second chunked version": multiple payload chunks, simulation and
377
+ # --- node objects are "root" objects in each chunk
378
+ # -----------------------------------------------------------------------------
379
+
380
+
381
+ class DtkFileV3(DtkFile):
382
+
383
+ class NodesV3(object):
384
+ def __init__(self, parent):
385
+ self.__parent__ = parent
386
+ return
387
+
388
+ def __iter__(self):
389
+ index = 0
390
+ while index < len(self):
391
+ yield self.__getitem__(index)
392
+ index += 1
393
+
394
+ def __getitem__(self, index):
395
+ item = self.__parent__.objects[index + 1]
396
+ return item
397
+
398
+ def __setitem__(self, index, value):
399
+ self.__parent__.objects[index + 1] = value
400
+ return
401
+
402
+ def __len__(self):
403
+ length = self.__parent__.chunk_count - 1
404
+ return length
405
+
406
+ def __init__(self, header=None, filename='', handle=None):
407
+ if header is None:
408
+ header = DtkHeader()
409
+ header.version = 3
410
+ super(DtkFileV3, self).__init__(header)
411
+ for index, size in enumerate(header.chunksizes):
412
+ self.chunks[index] = handle.read(size)
413
+ if len(self.chunks[index]) != size:
414
+ raise UserWarning(f"Only read {len(self.chunks[index])} bytes of {size} for chunk {index} of file '{filename}'")
415
+ self._nodes = self.NodesV3(self)
416
+ return
417
+
418
+ @property
419
+ def simulation(self):
420
+ # from dtk-tools
421
+ # if len(self.objects) > 0:
422
+ # sim = self.objects[0]
423
+ # del sim['nodes']
424
+ # else:
425
+ # sim = {}
426
+
427
+ sim = self.objects[0]
428
+ del sim['nodes']
429
+ return sim
430
+
431
+ @simulation.setter
432
+ def simulation(self, value):
433
+ sim = copy.deepcopy(value)
434
+ sim['nodes'] = []
435
+ # from dtk-tools
436
+ # if len(self.objects) == 0:
437
+ # self.objects.append(None)
438
+ self.objects[0] = sim
439
+ return
440
+
441
+ # -----------------------------------------------------------------------------
442
+ # --- DtkFileV4
443
+ # ---
444
+ # --- "Metadata update": compressed: true|false + engine: NONE|LZ4|SNAPPY replaced
445
+ # --- with compression: NONE|LZ4|SNAPPY
446
+ # -----------------------------------------------------------------------------
447
+
448
+
449
+ class DtkFileV4(DtkFileV3):
450
+
451
+ def __init__(self, header=None, filename='', handle=None):
452
+ if header is None:
453
+ header = DtkHeader()
454
+ super(DtkFileV4, self).__init__(header, filename, handle)
455
+ header.version = 4
456
+ return
457
+
458
+ # -----------------------------------------------------------------------------
459
+ # --- DtkFileV5
460
+ # ---
461
+ # --- "Emod info added": emod_info added to header
462
+ # -----------------------------------------------------------------------------
463
+
464
+
465
+ class DtkFileV5(DtkFileV4):
466
+ def __init__(self, header=None, filename='', handle=None):
467
+ if header is None:
468
+ header = DtkHeader()
469
+ version5_params = {
470
+ 'emod_info': {
471
+ 'emod_major_version': 0,
472
+ 'emod_minor_version': 0,
473
+ 'emod_revision_number': 0,
474
+ 'ser_pop_major_version': 0,
475
+ 'ser_pop_minor_version': 0,
476
+ 'ser_pop_patch_version': 0,
477
+ 'emod_build_date': "Mon Jan 1 00:00:00 1970",
478
+ 'emod_builder_name': "",
479
+ 'emod_sccs_branch': 0,
480
+ 'emod_sccs_date': "Mon Jan 1 00:00:00 1970"
481
+ }
482
+ }
483
+ header.update(version5_params)
484
+ super(DtkFileV5, self).__init__(header, filename, handle)
485
+ header.version = 5
486
+ return
487
+
488
+ # -----------------------------------------------------------------------------
489
+ # --- DtkHeaderV6
490
+ # -----------------------------------------------------------------------------
491
+
492
+
493
+ class DtkHeaderV6(support.SerialObject):
494
+ """
495
+ The header for V6 is quite different because we distinguish the different types
496
+ of chunks - sim, node, human collection. It also specifies the compression type
497
+ for each type of chunk separately.
498
+
499
+ The 'human_num_humans' was added to the format to support this python code.
500
+ It helps the code to know how many humans are in a particular chunk so that
501
+ we can hide that the humans for one node are actually in different collections.
502
+ """
503
+ def __init__(self, dictionary=None):
504
+ if dictionary is None:
505
+ dictionary = {
506
+ "version": 6,
507
+ "author": "IDM",
508
+ "tool": "DTK",
509
+ "date": time.strftime('%a %b %d %H:%M:%S %Y'),
510
+ "emod_info": {
511
+ 'emod_major_version': 0,
512
+ 'emod_minor_version': 0,
513
+ 'emod_revision_number': 0,
514
+ 'ser_pop_major_version': 0,
515
+ 'ser_pop_minor_version': 0,
516
+ 'ser_pop_patch_version': 0,
517
+ 'emod_build_date': "Mon Jan 1 00:00:00 1970",
518
+ 'emod_builder_name': "",
519
+ 'emod_sccs_branch': 0,
520
+ 'emod_sccs_date': "Mon Jan 1 00:00:00 1970"
521
+ },
522
+ "sim_compression": "NON",
523
+ "sim_chunk_size": "0000000000000000",
524
+ "node_suids": [],
525
+ "node_compressions": [],
526
+ "node_chunk_sizes": [],
527
+ "human_compressions": [],
528
+ "human_node_suids": [],
529
+ "human_num_humans": [],
530
+ "human_chunk_sizes": []
531
+ }
532
+ super(DtkHeaderV6, self).__init__(dictionary)
533
+ return
534
+
535
+ def __str__(self):
536
+ text = json.dumps(self, separators=(',', ':'))
537
+ return text
538
+
539
+ def __len__(self):
540
+ length = len(self.__str__())
541
+ return length
542
+
543
+ # -----------------------------------------------------------------------------
544
+ # --- DtkFileV6
545
+ # ---
546
+ # --- "Per-node/human compression and chunk sizes": sim_compression, sim_chunk_size, node_compressions,
547
+ # --- node_chunk_sizes, human_compressions, human_node_suids, human_chunk_sizes added to header
548
+ # -----------------------------------------------------------------------------
549
+
550
+
551
+ class DtkFileV6(object):
552
+ """
553
+ The V6 file moves the humans out of the JSON serialized for the node and puts
554
+ them into their own chunks. This helps to reduce the size of the JSON for
555
+ one node and allows the memory for one collection of humans be freed before
556
+ we get the next set. This greatly reduces the peak memory usage when processing
557
+ populations that require lots of memory.
558
+ """
559
+ class Chunk(object):
560
+ """
561
+ Chunk represnts a compressed chunk of data in a V6 serialized population file.
562
+ In the code, _json and _chunk are mutually exclusive - only one is populated at a time.
563
+
564
+ Args:
565
+ filename (str): The name of the file being read (for error messages).
566
+ obj_type_str (str): The type of object in the chunk (for error messages).
567
+ v6_compression_str (str): The V6 compression string for the chunk.
568
+ node_suid (int): The SUID of the node the chunk belongs to.
569
+ chunk_size (int): The size of the chunk in bytes.
570
+ chunk (bytes): The compressed chunk data.
571
+ """
572
+ def __init__(self,
573
+ filename,
574
+ obj_type_str,
575
+ v6_compression_str,
576
+ node_suid,
577
+ chunk_size,
578
+ chunk):
579
+ if chunk is None and chunk_size != 0:
580
+ msg = f"Chunk is None but chunk size is {chunk_size} for {obj_type_str} chunk of file '{filename}'"
581
+ raise UserWarning(msg)
582
+ elif (chunk is not None) and (len(chunk) != chunk_size):
583
+ msg = f"Only read {len(chunk)} bytes of {chunk_size} for {obj_type_str} chunk of file '{filename}'"
584
+ raise UserWarning(msg)
585
+
586
+ self._v6_compression_str = v6_compression_str
587
+ self._node_suid = node_suid
588
+ self._chunk_size = chunk_size
589
+ self._chunk = chunk
590
+ self._json = None
591
+ return
592
+
593
+ def get_json(self):
594
+ """
595
+ Return the JSON dictionary for the chunk, uncompressing and parsing it if necessary.
596
+ """
597
+ if self._json is None:
598
+ old_compression_type = _compression_type_v6_to_old(self._v6_compression_str)
599
+ uncomp_data = str(uncompress(self._chunk, old_compression_type), 'utf-8')
600
+ try:
601
+ json_data = json.loads(uncomp_data, object_hook=support.SerialObject)
602
+ except Exception:
603
+ raise UserWarning(f"Could not parse JSON in chunk with size {self._chunk_size}")
604
+ self._json = json_data
605
+ self._chunk = None
606
+ self._chunk_size = 0
607
+ gc.collect()
608
+ return self._json
609
+
610
+ def set_json(self, json_data):
611
+ """
612
+ Replace the existing JSON with the provided JSON dictionary.
613
+ Also compresses and stores the chunk.
614
+ """
615
+ self._json = json_data
616
+ self.store()
617
+ return
618
+
619
+ def store(self):
620
+ """
621
+ Compress and store the JSON dictionary as a chunk.
622
+ """
623
+ if self._chunk is None:
624
+ json_data = json.dumps(self._json, separators=(',', ':'))
625
+ self._v6_compression_str = _determine_v6_compression_type(json_data)
626
+ old_compression_type = _compression_type_v6_to_old(self._v6_compression_str)
627
+ self._chunk = compress(json_data.encode(), old_compression_type)
628
+ self._chunk_size = len(self._chunk)
629
+ self._json = None
630
+ gc.collect()
631
+ return
632
+
633
+ @property
634
+ def v6_compression_str(self):
635
+ """
636
+ Return the V6 compression string for the chunk - NON, LZ4, SNA.
637
+ """
638
+ return self._v6_compression_str
639
+
640
+ @property
641
+ def node_suid(self):
642
+ """
643
+ Return the SUID of the node the chunk belongs to - not the external ID.
644
+ """
645
+ return self._node_suid
646
+
647
+ @property
648
+ def chunk_size(self):
649
+ """
650
+ Return the size of the chunk in bytes.
651
+ """
652
+ return self._chunk_size
653
+
654
+ @property
655
+ def chunk(self):
656
+ """
657
+ Return the compressed chunk data.
658
+ """
659
+ return self._chunk
660
+
661
+ class HumanCollectionChunkV6(Chunk):
662
+ """
663
+ This represents one collection of humans in a node.
664
+
665
+ Args:
666
+ filename (str): The name of the file being read (for error messages).
667
+ obj_type_str (str): The type of object in the chunk (for error messages).
668
+ v6_compression_str (str): The V6 compression string for the chunk.
669
+ node_suid (int): The SUID of the node the chunk belongs to.
670
+ num_humans (int): The number of humans in the collection.
671
+ chunk_size (int): The size of the chunk in bytes.
672
+ chunk (bytes): The compressed chunk data.
673
+ """
674
+ def __init__(self,
675
+ filename,
676
+ obj_type_str,
677
+ v6_compression_str,
678
+ node_suid,
679
+ num_humans,
680
+ chunk_size,
681
+ chunk):
682
+ super(DtkFileV6.HumanCollectionChunkV6, self).__init__(filename,
683
+ obj_type_str,
684
+ v6_compression_str,
685
+ node_suid,
686
+ chunk_size,
687
+ chunk)
688
+ self._num_humans = num_humans
689
+ return
690
+
691
+ def get_json(self):
692
+ """
693
+ Return an list of JSON IndividualHuman dictionaries.
694
+ """
695
+ json_data = super(DtkFileV6.HumanCollectionChunkV6, self).get_json()
696
+ return json_data['human_collection']
697
+
698
+ def set_json(self, human_list):
699
+ """
700
+ Replace the existing JSON with the provided list of IndividualHuman dictionaries.
701
+ """
702
+ self._json = {}
703
+ self._json['human_collection'] = human_list
704
+ self._num_humans = len(human_list)
705
+ self.store()
706
+ return
707
+
708
+ @property
709
+ def num_humans(self):
710
+ """
711
+ Return the number of humans in the collection.
712
+ """
713
+ return self._num_humans
714
+
715
+ class NodeV6(MutableMapping):
716
+ """
717
+ NodeV6 represents one node in a V6 serialized population file.
718
+ The purpose of this class is to delay loading the full JSON for the node
719
+ until it is actually needed and to provide a backwards compatible interface
720
+ to the human data via the individualHumans property.
721
+
722
+ Implementation notes:
723
+ - This class inherits from MutableMapping to provide dictionary-like
724
+ access to the node JSON data. The individualHumans property is
725
+ handled specially to return the human list.
726
+ - __get_item__, __setitem__, __delitem__ each handle the key 'individualHumans'
727
+ specially to return or raise an error as appropriate. This helps to provide
728
+ a backwards compatible interface to the human data.
729
+
730
+ Args:
731
+ parent (DtkFileV6): The parent DtkFileV6 object.
732
+ node_chunk (DtkFileV6.Chunk): The chunk containing the node data.
733
+ human_chunk_list (list of DtkFileV6.HumanCollectionChunkV6):
734
+ The list of chunks containing the human data for the node.
735
+ """
736
+ def __init__(self, parent, node_chunk, human_chunk_list):
737
+ super(DtkFileV6.NodeV6, self).__init__()
738
+ self.__parent__ = parent
739
+ self._node_chunk = node_chunk
740
+ self._human_list = DtkFileV6.HumanListV6(self, human_chunk_list)
741
+ self._json = None
742
+ return
743
+
744
+ def __getitem__(self, key):
745
+ """
746
+ Return the value for the given key in the node JSON dictionary.
747
+ If the key is 'individualHumans', return the human list instead.
748
+ """
749
+ if key == 'individualHumans':
750
+ return self._human_list
751
+ else:
752
+ self.load()
753
+ return self._json[key]
754
+
755
+ def __setitem__(self, key, value):
756
+ """
757
+ Set the value for the given key in the node JSON dictionary.
758
+ Cannot set the 'individualHumans' key directly.
759
+ """
760
+ self.load()
761
+ if key == 'individualHumans':
762
+ self.individualHumans = value
763
+ else:
764
+ self._json[key] = value
765
+
766
+ def __delitem__(self, key):
767
+ """
768
+ Delete the given key from the node JSON dictionary.
769
+ Cannot delete the 'individualHumans' key.
770
+ """
771
+ if key == 'individualHumans':
772
+ raise RuntimeError("Cannot set individualHumans property directly")
773
+ self.load()
774
+ del self._json[key]
775
+
776
+ def __iter__(self):
777
+ self.load()
778
+ return iter(self._json)
779
+
780
+ def __len__(self):
781
+ self.load()
782
+ return len(self._json)
783
+
784
+ def __repr__(self):
785
+ self.load()
786
+ return repr(self._json)
787
+
788
+ def keys(self):
789
+ """
790
+ Return the keys of the node JSON dictionary as a list.
791
+ """
792
+ self.load()
793
+ return list(super(DtkFileV6.NodeV6, self).keys())
794
+
795
+ def load(self):
796
+ """
797
+ Load the node JSON dictionary from the chunk if it is not already loaded.
798
+ """
799
+ if self._json is None:
800
+ keys = list(self.__dict__.keys())
801
+ values = list(self.__dict__.values())
802
+ tmp_json = self._node_chunk.get_json()
803
+ self.__dict__ = tmp_json
804
+ for key, value in zip(keys, values):
805
+ self.__dict__[key] = value
806
+ self._json = tmp_json
807
+ self._node_chunk._chunk = None
808
+ self._node_chunk._chunk_size = 0
809
+ gc.collect()
810
+ return
811
+
812
+ def store(self):
813
+ """
814
+ Store the node JSON dictionary back to the chunk if it is loaded.
815
+
816
+ Implementation note:
817
+ We need to temporarily remove references to the member variables of
818
+ this class from the _json/__dict__ before storing it back to the chunk.
819
+ This keeps us from compressing the wrong stuff. We add them back afterwards.
820
+ """
821
+ if self._json is not None:
822
+ # save member variables
823
+ parent = self.__parent__
824
+ node_chunk = self._node_chunk
825
+ human_list = self._human_list
826
+ tmp_json = self._json
827
+
828
+ # remove member variables from json
829
+ keys_to_remove = ['__parent__', '_node_chunk', '_human_list', '_json']
830
+ for key in keys_to_remove:
831
+ del tmp_json[key]
832
+
833
+ # compress json
834
+ node_chunk.set_json(tmp_json)
835
+
836
+ # restore member variables
837
+ self.__parent__ = parent
838
+ self._node_chunk = node_chunk
839
+ self._human_list = human_list
840
+
841
+ # clear json to free memory
842
+ self._json = None
843
+ gc.collect()
844
+ return
845
+
846
+ def _clear_human_list(self):
847
+ """
848
+ Clear the human list for the node.
849
+ """
850
+ self.__parent__._remove_humans_for_node(self._node_chunk.node_suid)
851
+ self._human_list = DtkFileV6.HumanListV6(node=self, human_chunk_list=[])
852
+ return
853
+
854
+ @property
855
+ def individualHumans(self):
856
+ """
857
+ Return a list of IndividualHuman dictionaries for the node.
858
+ """
859
+ return self._human_list
860
+
861
+ @individualHumans.setter
862
+ def individualHumans(self, json_dict_list):
863
+ self._clear_human_list()
864
+ human_chunk = DtkFileV6.HumanCollectionChunkV6(
865
+ filename="no file",
866
+ obj_type_str="human",
867
+ v6_compression_str=None,
868
+ node_suid=self._node_chunk.node_suid,
869
+ num_humans=0,
870
+ chunk_size=0,
871
+ chunk=None)
872
+ human_chunk.set_json(json_dict_list)
873
+ self.__parent__._human_chunks.append(human_chunk)
874
+ self._human_list._add_human_chunk(human_chunk)
875
+ return
876
+
877
+ class NodeListV6(object):
878
+ """
879
+ The NodeListV6 provides an interface to a list of NodeV6 objects.
880
+ The main purpose of this class is to manage loading and unloading
881
+ the node data when iterating over the nodes.
882
+ """
883
+ def __init__(self, parent):
884
+ self.__parent__ = parent
885
+ self._node_list = []
886
+ return
887
+
888
+ def __iter__(self):
889
+ index = 0
890
+ while index < len(self):
891
+ self._node_list[index].load()
892
+ yield self.__getitem__(index)
893
+ self._node_list[index].store()
894
+ index += 1
895
+
896
+ def __getitem__(self, index):
897
+ node = self._node_list[index]
898
+ node.load()
899
+ return node
900
+
901
+ def __setitem__(self, index, node):
902
+ self._node_list[index] = node
903
+ return
904
+
905
+ def __len__(self):
906
+ length = len(self._node_list)
907
+ return length
908
+
909
+ def append(self, node_chunk):
910
+ self._node_list.append(node_chunk)
911
+ return
912
+
913
+ class HumanListV6(object):
914
+ """
915
+ A HumanListV6 provides an interface to a list of IndividualHuman dictionaries
916
+ that may be stored in multiple HumanCollectionChunkV6 chunks. The purpose of
917
+ this class is to manage loading and unloading the human collection chunks
918
+ when iterating over the humans. It hides the fact that the humans for one node
919
+ may be stored in multiple collections.
920
+ """
921
+ def __init__(self, node, human_chunk_list):
922
+ self._node = node
923
+ self._human_chunk_list = human_chunk_list
924
+ self._num_humans = 0
925
+ for human_chunk in self._human_chunk_list:
926
+ self._num_humans += human_chunk.num_humans
927
+ self._human_chunk_index = 0
928
+ self._current_collection = None
929
+ self._current_min_index = 0
930
+ self._current_max_index = 0
931
+ self.__init_current()
932
+ return
933
+
934
+ def __init_current(self):
935
+ """
936
+ Initialize the current human collection chunk.
937
+ """
938
+ if (len(self._human_chunk_list) > 0) and (self._current_collection is None):
939
+ self._current_collection = self._human_chunk_list[0].get_json()
940
+ self._current_min_index = 0
941
+ self._current_max_index = len(self._current_collection) - 1
942
+ if len(self._current_collection) != self._human_chunk_list[0].num_humans:
943
+ msg = f"Number of humans in first human chunk [{len(self._current_collection)}]"
944
+ msg += f" does not match num_humans attribute [{self._human_chunk_list[0].num_humans}]"
945
+ raise RuntimeError(msg)
946
+ return
947
+
948
+ def _add_human_chunk(self, human_chunk):
949
+ """
950
+ Add a new human collection chunk to the list.
951
+ """
952
+ self._human_chunk_list.append(human_chunk)
953
+ self._num_humans += human_chunk.num_humans
954
+ self.__init_current()
955
+ return
956
+
957
+ def __iter__(self):
958
+ human_index = 0
959
+ self.__update_current_collection__(human_index)
960
+ while human_index < len(self):
961
+ yield self.__getitem__(human_index)
962
+ human_index += 1
963
+
964
+ def __update_current_collection__(self, human_index):
965
+ """
966
+ Update/load the current human collection chunk to include the specified human index.
967
+ 0-based human_index is the index of the human in the full list of humans for the node.
968
+ 0-based _current_min_index and _current_max_index are the min and max indices of the
969
+ currently loaded human collection chunk and are inclusive.
970
+ """
971
+ if self._num_humans == 0:
972
+ return
973
+
974
+ if human_index < self._current_min_index:
975
+ while human_index < self._current_min_index:
976
+ self._human_chunk_list[self._human_chunk_index].store()
977
+ self._human_chunk_index -= 1
978
+ if self._human_chunk_index < 0:
979
+ raise IndexError(f"Index {human_index} is out of range for human collection")
980
+ self._current_collection = self._human_chunk_list[self._human_chunk_index].get_json()
981
+ self._current_max_index = self._current_min_index - 1
982
+ self._current_min_index = self._current_max_index - len(self._current_collection) + 1
983
+ if len(self._current_collection) != self._human_chunk_list[self._human_chunk_index].num_humans:
984
+ raise RuntimeError("Number of humans in first human chunk does not match num_humans attribute")
985
+ else:
986
+ while human_index > self._current_max_index:
987
+ self._human_chunk_list[self._human_chunk_index].store()
988
+ self._human_chunk_index += 1
989
+ if self._human_chunk_index >= len(self._human_chunk_list):
990
+ raise IndexError(f"Index {human_index} is out of range for human collection")
991
+ self._current_collection = self._human_chunk_list[self._human_chunk_index].get_json()
992
+ self._current_min_index = self._current_max_index + 1
993
+ self._current_max_index = self._current_min_index + len(self._current_collection) - 1
994
+ if len(self._current_collection) != self._human_chunk_list[self._human_chunk_index].num_humans:
995
+ raise RuntimeError(f"current collection = {len(self._current_collection)} but num_humans = {self._human_chunk_list[self._human_chunk_index].num_humans}")
996
+ return
997
+
998
+ def __getitem__(self, human_index):
999
+ """
1000
+ Return the IndividualHuman dictionary at the specified index.
1001
+ """
1002
+ if human_index < self._current_min_index or human_index > self._current_max_index:
1003
+ self.__update_current_collection__(human_index)
1004
+ return self._current_collection[human_index - self._current_min_index]
1005
+
1006
+ def __setitem__(self, human_index, value):
1007
+ """
1008
+ Set the IndividualHuman dictionary at the specified index.
1009
+ """
1010
+ if human_index < self._current_min_index or human_index > self._current_max_index:
1011
+ self.__update_current_collection__(human_index)
1012
+ self._current_collection[human_index - self._current_min_index] = value
1013
+ return
1014
+
1015
+ def __len__(self):
1016
+ return self._num_humans
1017
+
1018
+ def append(self, human_dict):
1019
+ if self._human_chunk_index != (len(self._human_chunk_list) - 1):
1020
+ self._human_chunk_list[self._human_chunk_index].store()
1021
+ self._human_chunk_index = len(self._human_chunk_list) - 1
1022
+ self._current_collection = self._human_chunk_list[self._human_chunk_index].get_json()
1023
+ self._current_min_index = self._num_humans - len(self._current_collection)
1024
+ self._current_max_index = self._num_humans - 1
1025
+ self._current_collection.append(human_dict)
1026
+ self._current_max_index += 1
1027
+ self._num_humans += 1
1028
+ self._human_chunk_list[self._human_chunk_index]._num_humans += 1
1029
+
1030
+ def __init__(self, header=None, filename='', handle=None):
1031
+ """
1032
+ Initialize a DtkFileV6 object from the provided header and file handle.
1033
+ This should read the file and create chunk objects for the simulation, nodes,
1034
+ and humans. It will not uncompress or parse any of the JSON data until it is needed.
1035
+
1036
+ Args:
1037
+ header (DtkHeaderV6): The header for the file.
1038
+ filename (str): The name of the file being read (for error messages).
1039
+ handle (file-like object): The file handle to read the data from.
1040
+ """
1041
+ if header is None:
1042
+ header = DtkHeaderV6()
1043
+ self.__header__ = header
1044
+ self._sim_chunk = None
1045
+ self._node_chunks = []
1046
+ self._human_chunks = []
1047
+ self._nodes = DtkFileV6.NodeListV6(self)
1048
+
1049
+ if handle is not None:
1050
+ sim_chunk_size = int(header.sim_chunk_size, 16)
1051
+ sim_chunk_data = handle.read(sim_chunk_size)
1052
+ self._sim_chunk = DtkFileV6.Chunk(filename,
1053
+ "sim",
1054
+ header.sim_compression,
1055
+ -1,
1056
+ sim_chunk_size,
1057
+ sim_chunk_data)
1058
+
1059
+ for index, size_string in enumerate(header.node_chunk_sizes):
1060
+ v6_compression_str = header.node_compressions[index]
1061
+ node_suid = int(header.node_suids[index], 16)
1062
+ chunk_size = int(size_string, 16)
1063
+ chunk_data = handle.read(chunk_size)
1064
+ node_chunk = DtkFileV6.Chunk(filename,
1065
+ "node",
1066
+ v6_compression_str,
1067
+ node_suid,
1068
+ chunk_size,
1069
+ chunk_data)
1070
+ self._node_chunks.append(node_chunk)
1071
+
1072
+ for index, size_string in enumerate(header.human_chunk_sizes):
1073
+ v6_compression_str = header.human_compressions[index]
1074
+ node_suid_str = header.human_node_suids[index]
1075
+ num_humans_str = header.human_num_humans[index]
1076
+ node_suid = int(node_suid_str, 16)
1077
+ num_humans = int(num_humans_str, 16)
1078
+ chunk_size = int(size_string, 16)
1079
+ chunk_data = handle.read(chunk_size)
1080
+ human_chunk = DtkFileV6.HumanCollectionChunkV6(filename,
1081
+ "human",
1082
+ v6_compression_str,
1083
+ node_suid,
1084
+ num_humans,
1085
+ chunk_size,
1086
+ chunk_data)
1087
+ self._human_chunks.append(human_chunk)
1088
+
1089
+ for node_chunk in self._node_chunks:
1090
+ human_chunk_list = []
1091
+ for human_chunk in self._human_chunks:
1092
+ if human_chunk.node_suid == node_chunk.node_suid:
1093
+ human_chunk_list.append(human_chunk)
1094
+ self._nodes.append(DtkFileV6.NodeV6(self, node_chunk, human_chunk_list))
1095
+
1096
+ return
1097
+
1098
+ def _remove_humans_for_node(self, node_suid):
1099
+ """
1100
+ Remove all human chunks for the specified node SUID.
1101
+ """
1102
+ new_human_chunks = []
1103
+ for human_chunk in self._human_chunks:
1104
+ if human_chunk.node_suid != node_suid:
1105
+ new_human_chunks.append(human_chunk)
1106
+ self._human_chunks = new_human_chunks
1107
+ return
1108
+
1109
+ @property
1110
+ def header(self):
1111
+ return self.__header__
1112
+
1113
+ # Optional header entries
1114
+ @property
1115
+ def author(self):
1116
+ return self.__header__.author if 'author' in self.__header__ else ''
1117
+
1118
+ @author.setter
1119
+ def author(self, value):
1120
+ self.__header__['author'] = str(value)
1121
+ return
1122
+
1123
+ @property
1124
+ def date(self):
1125
+ return self.__header__.date if 'date' in self.__header__ else ''
1126
+
1127
+ @date.setter
1128
+ def date(self, value):
1129
+ self.__header__['date'] = str(value)
1130
+
1131
+ @property
1132
+ def tool(self):
1133
+ return self.__header__.tool if 'tool' in self.__header__ else ''
1134
+
1135
+ @tool.setter
1136
+ def tool(self, value):
1137
+ self.__header__['tool'] = str(value)
1138
+ return
1139
+
1140
+ @property
1141
+ def version(self):
1142
+ return self.__header__.version
1143
+
1144
+ @property
1145
+ def nodes(self):
1146
+ """
1147
+ Return the list of NodeV6 objects in the file.
1148
+ Do not try to access the nodes via the simulation property of this class.
1149
+ This keeps it backwards compatible.
1150
+ """
1151
+ return self._nodes
1152
+
1153
+ def _sync_header(self):
1154
+ self._sim_chunk.store()
1155
+ for node in self.nodes:
1156
+ node.store()
1157
+ for human_chunk in self._human_chunks:
1158
+ human_chunk.store()
1159
+
1160
+ self.__header__['date'] = time.strftime('%a %b %d %H:%M:%S %Y')
1161
+ self.__header__['sim_compression'] = self._sim_chunk.v6_compression_str
1162
+ self.__header__['sim_chunk_size'] = format(self._sim_chunk.chunk_size, '016x')
1163
+ self.__header__['node_compressions'] = []
1164
+ self.__header__['node_chunk_sizes'] = []
1165
+ self.__header__['node_suids'] = []
1166
+ for node_chunk in self._node_chunks:
1167
+ self.__header__['node_compressions'].append(node_chunk.v6_compression_str)
1168
+ self.__header__['node_chunk_sizes'].append(format(node_chunk.chunk_size, '016x'))
1169
+ self.__header__['node_suids'].append(format(node_chunk.node_suid, '016x'))
1170
+ self.__header__['human_compressions'] = []
1171
+ self.__header__['human_chunk_sizes'] = []
1172
+ self.__header__['human_node_suids'] = []
1173
+ self.__header__['human_num_humans'] = []
1174
+ for human_chunk in self._human_chunks:
1175
+ self.__header__['human_compressions'].append(human_chunk.v6_compression_str)
1176
+ self.__header__['human_chunk_sizes'].append(format(human_chunk.chunk_size, '016x'))
1177
+ self.__header__['human_node_suids'].append(format(human_chunk.node_suid, '016x'))
1178
+ self.__header__['human_num_humans'].append(format(human_chunk.num_humans, '016x'))
1179
+ return
1180
+
1181
+ @property
1182
+ def simulation(self):
1183
+ """
1184
+ Return the simulation JSON dictionary. Do not try to access the nodes
1185
+ from this dictionary - use the nodes property of this class instead.
1186
+ """
1187
+ return self._sim_chunk.get_json()
1188
+
1189
+ @simulation.setter
1190
+ def simulation(self, value):
1191
+ value["nodes"] = []
1192
+ self._sim_chunk.set_json(value)
1193
+ return
1194
+
1195
+ # -----------------------------------------------------------------------------
1196
+ # --- Reading Functions
1197
+ # -----------------------------------------------------------------------------
1198
+
1199
+
1200
+ def read(filename):
1201
+
1202
+ new_file = None
1203
+ with open(filename, 'rb') as handle:
1204
+ __check_magic_number__(handle)
1205
+ header = __read_header__(handle)
1206
+ if header.version == 1:
1207
+ new_file = DtkFileV1(header, filename=filename, handle=handle)
1208
+ elif header.version == 2:
1209
+ new_file = DtkFileV2(header, filename=filename, handle=handle)
1210
+ elif header.version == 3:
1211
+ new_file = DtkFileV3(header, filename=filename, handle=handle)
1212
+ elif header.version == 4:
1213
+ new_file = DtkFileV4(header, filename=filename, handle=handle)
1214
+ elif header.version == 5:
1215
+ new_file = DtkFileV5(header, filename=filename, handle=handle)
1216
+ elif header.version == 6:
1217
+ new_file = DtkFileV6(header, filename=filename, handle=handle)
1218
+ else:
1219
+ raise UserWarning(f'Unknown serialized population file version: {header.version}')
1220
+
1221
+ return new_file
1222
+
1223
+
1224
+ def __check_magic_number__(handle):
1225
+ magic = handle.read(4).decode()
1226
+ if magic != IDTK:
1227
+ raise UserWarning(f"File has incorrect magic 'number': '{magic}'")
1228
+ return
1229
+
1230
+
1231
+ def __read_header__(handle):
1232
+
1233
+ size_string = handle.read(12)
1234
+ header_size = int(size_string)
1235
+ __check_header_size__(header_size)
1236
+ header_text = handle.read(header_size)
1237
+ header_json = __try_parse_header_text__(header_text)
1238
+
1239
+ if 'metadata' in header_json:
1240
+ header_json = header_json["metadata"]
1241
+
1242
+ if 'version' not in header_json:
1243
+ header_json['version'] = 1
1244
+
1245
+ header = None
1246
+ if header_json['version'] < 6:
1247
+ header = DtkHeader(header_json)
1248
+ if header.version < 2:
1249
+ header.engine = SNAPPY if header.compressed else NONE
1250
+ header.chunkcount = 1
1251
+ header.chunksizes = [header.bytecount]
1252
+
1253
+ __check_version__(header.version)
1254
+
1255
+ if header.version < 4:
1256
+ header.engine = header.engine.upper()
1257
+ __check_chunk_sizes__(header.chunksizes)
1258
+ else:
1259
+ header['engine'] = header.compression.upper()
1260
+ __check_chunk_sizes__(header.chunksizes)
1261
+ else:
1262
+ header = DtkHeaderV6(header_json)
1263
+ __check_version__(header.version)
1264
+ __check_chunk_sizes_v6__(header)
1265
+
1266
+ return header
1267
+
1268
+
1269
+ def __check_header_size__(header_size):
1270
+ if header_size <= 0:
1271
+ raise UserWarning(f"Invalid header size: {header_size}")
1272
+ return
1273
+
1274
+
1275
+ def __try_parse_header_text__(header_text):
1276
+ try:
1277
+ header_json = json.loads(header_text)
1278
+ except ValueError as err:
1279
+ raise UserWarning(f"Couldn't decode JSON header '{err}'")
1280
+ return header_json
1281
+
1282
+
1283
+ def __check_version__(version):
1284
+ if version <= 0 or version > MAX_VERSION:
1285
+ raise UserWarning(f"Unknown version: {version}")
1286
+ return
1287
+
1288
+
1289
+ def __check_chunk_sizes__(chunk_sizes):
1290
+ for size in chunk_sizes:
1291
+ if size <= 0:
1292
+ raise UserWarning(f"Invalid chunk size: {size}")
1293
+ return
1294
+
1295
+
1296
+ def __check_chunk_sizes_v6__(header):
1297
+ # "version": 6,
1298
+ # "author": "IDM",
1299
+ # "tool": "DTK",
1300
+ # "date": "Day Mon day HH:MM:SS year",
1301
+ # "emod_info": {},
1302
+ # "sim_compression": "LZ4",
1303
+ # "sim_chunk_size": "FFFFFFFF",
1304
+ # "node_suids": [ "00000001", "00000002", "00000002", ..., "00000002" ],
1305
+ # "node_compressions": [ "NON", "LZ4", "SNA", ..., "SNA" ]
1306
+ # "node_chunk_sizes": [ "FFFFFFFF", "FFFFFFFF", "FFFFFFFF", ..., "FFFFFFFF" ],
1307
+ # "human_compressions": [ "NON", "LZ4", "SNA", ..., "SNA" ]
1308
+ # "human_node_suids": [ "00000001", "00000002", "00000002", ..., "00000002" ],
1309
+ # "human_num_humans": [ "0000000A", "00000014", "00000014", ..., "00000014" ],
1310
+ # "human_chunk_sizes": [ "FFFFFFFF", "FFFFFFFF", "FFFFFFFF", ..., "FFFFFFFF" ]
1311
+
1312
+ sim_chunk_size = int(header["sim_chunk_size"], 16)
1313
+ if sim_chunk_size <= 0:
1314
+ raise UserWarning(f"Invalid 'sim_chunk_size': {sim_chunk_size}")
1315
+
1316
+ for size_string in header["node_chunk_sizes"]:
1317
+ size = int(size_string, 16)
1318
+ if size <= 0:
1319
+ raise UserWarning(f"Invalid 'node_chunk_size': {size}")
1320
+
1321
+ for size_string in header["human_chunk_sizes"]:
1322
+ size = int(size_string, 16)
1323
+ if size <= 0:
1324
+ raise UserWarning(f"Invalid 'human_chunk_size': {size}")
1325
+
1326
+ return
1327
+
1328
+ # -----------------------------------------------------------------------------
1329
+ # --- Writing Functions
1330
+ # -----------------------------------------------------------------------------
1331
+
1332
+
1333
+ def write(dtk_file, filename):
1334
+
1335
+ dtk_file._sync_header()
1336
+
1337
+ with open(filename, 'wb') as handle:
1338
+ __write_magic_number__(handle)
1339
+ print(f"Writing file: {filename}")
1340
+ if dtk_file.version <= 3:
1341
+ header = json.dumps({'metadata': dtk_file.header}, separators=(',', ':'))
1342
+ else:
1343
+ header = json.dumps(dtk_file.header, separators=(',', ':')).replace('"engine"', '"compression"')
1344
+
1345
+ __write_header_size__(len(header), handle)
1346
+ __write_header__(header, handle)
1347
+ if dtk_file.version <= 5:
1348
+ __write_chunks__(dtk_file.chunks, handle)
1349
+ else:
1350
+ handle.write(dtk_file._sim_chunk._chunk)
1351
+ for node_chunk in dtk_file._node_chunks:
1352
+ handle.write(node_chunk._chunk)
1353
+ for human_chunk in dtk_file._human_chunks:
1354
+ handle.write(human_chunk._chunk)
1355
+
1356
+ return
1357
+
1358
+
1359
+ def __write_magic_number__(handle):
1360
+ handle.write('IDTK'.encode())
1361
+ return
1362
+
1363
+
1364
+ def __write_header_size__(size, handle):
1365
+ size_string = '{:>12}'.format(size) # decimal value right aligned in 12 character space
1366
+ handle.write(size_string.encode())
1367
+ return
1368
+
1369
+
1370
+ def __write_header__(string, handle):
1371
+ handle.write(string.encode())
1372
+ return
1373
+
1374
+
1375
+ def __write_chunks__(chunks, handle):
1376
+ for chunk in chunks:
1377
+ handle.write(chunk if type(chunk) is bytes else chunk.encode())
1378
+ return