cosmoglint 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1022 @@
1
+ # HEADER
2
+
3
+ """
4
+ Routine for reading Pinocchio's binary catalogs and PLCs.
5
+ Usage:
6
+
7
+ # CATALOGS
8
+ import ReadPinocchio5 as rp
9
+ mycat = rp.catalog("pinocchio.0.0000.example.catalog.out")
10
+ print(mycat.Mass)
11
+
12
+ # PLC
13
+ import ReadPinocchio5 as rp
14
+ myplc = rp.plc("pinocchio.example.plc.out")
15
+ print(myplc.redshift)
16
+
17
+ # HISTORIES
18
+ import ReadPinocchio5 as rp
19
+ myhist = rp.histories("pinocchio.example.histories.out")
20
+ print(myhist.name)
21
+
22
+ Written by Pierluigi Monaco, Matteo Biagetti and Emiliano Munari
23
+
24
+ LAST MODIFIED:
25
+ Pierlugi Monaco 13/3/2025
26
+
27
+ """
28
+
29
+
30
+ import numpy as np
31
+ import os
32
+ import sys
33
+ import copy
34
+ import struct
35
+
36
+
37
+ VERBOSE=False
38
+
39
+ class catalog:
40
+
41
+ '''
42
+ Reads a pinocchio catalog at fixed redshift
43
+ Usage: mycat=rp.catalog(filename, silent=False, first_file=None, last_file=None)
44
+
45
+ filename: name of catalog file, ending with .out. The code will recognise if the catalog is writen in several files
46
+ silent: if True, the script does not give messages in the standard output
47
+ first_file ,last_file: if the catalog if written in several files, it allows to select a limited range of output files
48
+
49
+ Returns:
50
+ mycat.data: the catalog, a structured numpy array
51
+ mycat.cat_dtype: the data type of the catalog:
52
+ [('name', numpy.int64),
53
+ ('Mass', numpy.float32),
54
+ ('pos', numpy.float32, 3),
55
+ ('vel', numpy.float32, 3),
56
+ ('posin', numpy.float32, 3),
57
+ ('npart', numpy.int32)] (absent in light format)
58
+ mycat.Nhalos: number of halos in the catalog
59
+ mycat.Nfiles: number of files in which the catalog is written
60
+
61
+ mycat.Mass: Mass field (for faster access)
62
+ mycat.pos: pos field
63
+ mycat.vel: vel field
64
+ mycat.posin: posin field
65
+ mycat.npart: npart field (absent in light format)
66
+
67
+ '''
68
+
69
+ def __init__(self,filename,silent=False,first_file=None,last_file=None):
70
+
71
+ if VERBOSE:
72
+ silent=False
73
+
74
+ # checks that the filename contains 'catalog'
75
+ if not 'catalog' in filename:
76
+ print("Are you sure you are providing the right file name?")
77
+ if 'plc' in filename:
78
+ print("...this looks like a plc file, please read it with rp.plc(filename)")
79
+ elif 'histories' in filename:
80
+ print("...this looks like a histories file, please read it with rp.histories(filename)")
81
+ return None
82
+
83
+ # checks that the input file ends by ".out"
84
+ last_ext=filename.rfind('.')
85
+ if filename[last_ext:]!='.out':
86
+ print("The catalog file should end with .out, the file number extension will be checked by the code")
87
+ return None
88
+
89
+ # checks that the file exists, or that there are multiple files, and in case count them
90
+ if not os.path.exists(filename):
91
+
92
+ if not os.path.exists(filename+'.0'):
93
+
94
+ print("file {} or {} not found:".format(filename,filename+'.0'))
95
+ return None
96
+
97
+ else:
98
+
99
+ Nfiles=1
100
+ while os.path.exists(filename+'.{}'.format(Nfiles)):
101
+ Nfiles+=1
102
+ if not silent:
103
+ print("The catalog is written in {} files".format(Nfiles))
104
+
105
+ else:
106
+
107
+ Nfiles=1
108
+ if not silent:
109
+ print("The catalog is written in 1 file")
110
+
111
+ # opens the (first) file and reads the record length
112
+ if Nfiles==1:
113
+ if not silent:
114
+ print('reading header of file '+filename)
115
+ reading = np.fromfile(filename,dtype=np.int32,count=10)
116
+ else:
117
+ if not silent:
118
+ print('reading header of file '+filename+'.0')
119
+ reading = np.fromfile(filename+'.0',dtype=np.int32,count=10)
120
+
121
+ # number of tasks that write into a single file
122
+ NTasksPerFile = reading[1]
123
+ if not silent:
124
+ print('This file has been written by {} tasks'.format(NTasksPerFile))
125
+
126
+ # the header gives either the number of slices (always < 10) or the record length
127
+ # in the first case the record length can be read from the 8th integer
128
+ if reading[2]>10:
129
+ newRun=True
130
+ record_length=reading[2]
131
+ Nslices=1
132
+ if not silent:
133
+ print('This is new output format, record length: {}'.format(record_length))
134
+ else:
135
+ newRun=False
136
+ record_length=reading[7]
137
+ Nslices=reading[2]
138
+ if not silent:
139
+ print('This is classic output format, record length: {}'.format(record_length))
140
+ if Nslices==1:
141
+ print('The box has been fragmented in 1 slice')
142
+ else:
143
+ print('The box has been fragmented in {} slices'.format(Nslices))
144
+
145
+
146
+ # sets the record
147
+ if record_length==96: # this is the classic format in double precision
148
+
149
+ self.cat_dtype=[ ('name', np.int64),
150
+ ('Mass', np.float64),
151
+ ('posin', np.float64, 3),
152
+ ('pos', np.float64, 3),
153
+ ('vel', np.float64, 3),
154
+ ('npart', np.int32) ]
155
+
156
+ stored_dtype =[ ('fort', np.int32),
157
+ ('name', np.int64),
158
+ ('Mass', np.float64),
159
+ ('posin', np.float64, 3),
160
+ ('pos', np.float64, 3),
161
+ ('vel', np.float64, 3),
162
+ ('npart', np.int32),
163
+ ('pad' , np.int32),
164
+ ('trof', np.int32) ]
165
+
166
+
167
+ elif record_length==56:
168
+
169
+ if newRun: # the new format has posin in a different position
170
+
171
+ self.cat_dtype=[ ('name', np.int64),
172
+ ('Mass', np.float32),
173
+ ('pos', np.float32, 3),
174
+ ('vel', np.float32, 3),
175
+ ('posin', np.float32, 3),
176
+ ('npart', np.int32) ]
177
+
178
+ stored_dtype =[ ('name', np.int64),
179
+ ('Mass', np.float32),
180
+ ('pos', np.float32, 3),
181
+ ('vel', np.float32, 3),
182
+ ('posin', np.float32, 3),
183
+ ('npart', np.int32),
184
+ ('pad' , np.int32) ]
185
+
186
+ else: # this is the classic format in single precision
187
+
188
+ self.cat_dtype=[ ('name', np.int64),
189
+ ('Mass', np.float32),
190
+ ('posin', np.float32, 3),
191
+ ('pos', np.float32, 3),
192
+ ('vel', np.float32, 3),
193
+ ('npart', np.int32) ]
194
+
195
+ stored_dtype =[ ('fort', np.int32),
196
+ ('name', np.int64),
197
+ ('Mass', np.float32),
198
+ ('posin', np.float32, 3),
199
+ ('pos', np.float32, 3),
200
+ ('vel', np.float32, 3),
201
+ ('npart', np.int32),
202
+ ('pad' , np.int32),
203
+ ('trof', np.int32) ]
204
+
205
+ elif record_length==48: # this is the new light format
206
+
207
+ self.cat_dtype=[ ('name', np.int64),
208
+ ('Mass', np.float32),
209
+ ('pos', np.float32, 3),
210
+ ('vel', np.float32, 3),
211
+ ('posin', np.float32, 3) ]
212
+
213
+ stored_dtype = self.cat_dtype
214
+
215
+
216
+ elif record_length==40: # this was used in NewClusterMocks
217
+
218
+ self.cat_dtype=[ ('name', np.int64),
219
+ ('Mass', np.float32),
220
+ ('pos', np.float32, 3),
221
+ ('vel', np.float32, 3),
222
+ ('npart', np.int32) ]
223
+
224
+ stored_dtype =[ ('fort', np.int32),
225
+ ('name', np.int64),
226
+ ('Mass', np.float32),
227
+ ('pos', np.float32, 3),
228
+ ('vel', np.float32, 3),
229
+ ('npart', np.int32),
230
+ ('trof', np.int32) ]
231
+
232
+ else:
233
+ print("sorry, I do not recognize this record length")
234
+ return None
235
+
236
+ # decides what files to read
237
+ if Nfiles>1:
238
+ if first_file is None:
239
+ first_file=0
240
+ elif first_file<0:
241
+ first_file=0
242
+ elif first_file>Nfiles:
243
+ first_file=Nfiles
244
+ if last_file is None:
245
+ last_file=Nfiles
246
+ else:
247
+ last_file += 1
248
+ if last_file<first_file:
249
+ last_file=first_file+1
250
+ elif last_file>Nfiles:
251
+ last_file=Nfiles
252
+ # this is to be used to define a pythonic range
253
+ if not silent:
254
+ print("I will read files in the python range from {} to {}".format(first_file,last_file))
255
+ else:
256
+ first_file=0
257
+ last_file=Nfiles
258
+ self.Nfiles=Nfiles
259
+
260
+ # prepares to read the file(s)
261
+ self.data=None
262
+ NhalosPerFile=np.zeros(Nfiles,dtype=np.int64)
263
+
264
+ # loops on the files to be read
265
+ for myfile in range(first_file,last_file):
266
+
267
+ if Nfiles==1:
268
+ myfname = filename
269
+ else:
270
+ myfname = filename+'.{}'.format(myfile)
271
+
272
+ if VERBOSE:
273
+ print("reading file {}".format(myfname))
274
+
275
+ # reads the file as a binary object
276
+ with open(myfname,'rb') as f:
277
+ bindata=f.read()
278
+ f.close()
279
+ filesize = len(bindata)
280
+
281
+ # reads the number of halos contained in each header
282
+ Nblocks=NTasksPerFile * Nslices # this is the number of blocks to read
283
+ Nwritten=0 # some tasks may have no halos to write
284
+ pos=16
285
+ vechalo=[]
286
+ while pos < filesize:
287
+ # reads the number of halos to read
288
+ vec=struct.Struct('iii').unpack(bindata[pos:pos+12])[1]
289
+ pos+=12
290
+ vechalo.append(vec)
291
+ if vec>0:
292
+ if newRun:
293
+ pos += 8+vec*record_length
294
+ else:
295
+ pos += vec*(record_length+8)
296
+
297
+ vechalo = np.asarray(vechalo)
298
+ #print(vechalo.size, Nblocks, vechalo)
299
+ NhalosPerFile[myfile]=vechalo.sum()
300
+ Nwritten=(vechalo>0).sum()
301
+
302
+ if VERBOSE:
303
+ print(f"Found {Nwritten} non-void blocks in this file over {Nblocks}")
304
+
305
+ # checks that the lenght of data is as expected
306
+ if newRun:
307
+ file_length = 16 + Nblocks*12 + Nwritten*8 + NhalosPerFile[myfile]*record_length
308
+ else:
309
+ file_length = 16 + Nblocks*12 + NhalosPerFile[myfile]*(record_length+8)
310
+
311
+ if file_length != filesize:
312
+ print(f'ERROR: inconsistency in the file size, should be {file_length} but I found {filesize} bytes')
313
+ return None
314
+ elif VERBOSE:
315
+ print(f'predicted file length {file_length} matches with file size {filesize}')
316
+
317
+ # format to select information from the byte object
318
+ if newRun:
319
+ cleanForm='16x '
320
+ for block in range(Nblocks):
321
+ if vechalo[block]>0:
322
+ cleanForm+='16x {}s 4x '.format(vechalo[block]*record_length)
323
+ else:
324
+ cleanForm+='12x '
325
+
326
+ else:
327
+ cleanForm='16x '
328
+ for block in range(Nblocks):
329
+ if vechalo[block]>0:
330
+ cleanForm+='12x {}s '.format(vechalo[block]*(record_length+8))
331
+ else:
332
+ cleanForm+='12x '
333
+
334
+ # removes all unwanted information from the binary structure
335
+ try:
336
+ cleaned = b''.join(struct.unpack(cleanForm, bindata))
337
+ except:
338
+ print("ERROR: I do not recognise the data structure!")
339
+ return None
340
+ del bindata
341
+
342
+ # reads the catalog from the cleaned bynary structure
343
+ thiscat = np.frombuffer(cleaned, dtype=stored_dtype)
344
+ del cleaned
345
+
346
+ # removes unwanted columns from the catalog
347
+ if self.data is None:
348
+ self.data = np.zeros(NhalosPerFile[myfile], dtype=self.cat_dtype)
349
+ else:
350
+ self.data.resize(self.data.shape[0]+NhalosPerFile[myfile])
351
+
352
+ for name in self.data.dtype.names:
353
+ self.data[name][-NhalosPerFile[myfile]:]=thiscat[name]
354
+ del thiscat
355
+
356
+ if not silent:
357
+ print("done with file {}".format(myfname))
358
+
359
+ if not silent:
360
+ print("Reading catalog done, {} groups found".format(NhalosPerFile.sum()))
361
+
362
+ self.Nhalos = len(self.data)
363
+
364
+ # Create few pointers to make it compatible with previous versions
365
+ self.Mass = self.data['Mass']
366
+ self.pos = self.data['pos']
367
+ if record_length>48:
368
+ self.Npart = self.data['npart']
369
+ self.vel = self.data['vel']
370
+
371
+ class plc:
372
+
373
+ '''
374
+ Reads a pinocchio catalog on the past light cone
375
+ Usage: myplc=rp.plc(filename, silent=False, first_file=None, last_file=None, onlyNfiles=False)
376
+
377
+ filename: name of catalog file, ending with .out. The code will recognise if the catalog is writen in several files
378
+ silent: if True, the script does not give messages in the standard output
379
+ first_file ,last_file: if the catalog if written in several files, it allows to select a limited range of output files
380
+ onlyNfiles: it computes Nfiles and exits returning its value, without reading the catalog
381
+
382
+ Returns:
383
+ myplc.data: the catalog, a structured numpy array
384
+ myplc.cat_dtype: the data type of the catalog:
385
+ [('name', numpy.uint64),
386
+ ('truez', numpy.float32),
387
+ ('pos', numpy.float32, 3), (absent in light format)
388
+ ('vel', numpy.float32, 3), (absent in light format)
389
+ ('Mass', numpy.float32),
390
+ ('theta', numpy.float32),
391
+ ('phi', numpy.float32),
392
+ ('vlos', numpy.float32), (absent in light format)
393
+ ('obsz', numpy.float32)]
394
+ myplc.Nhalos: number of halos in the catalog
395
+ myplc.Nfiles: number of files in which the catalog is written
396
+
397
+ '''
398
+
399
+ def __init__(self,filename,silent=False,first_file=None,last_file=None,onlyNfiles=False):
400
+
401
+ if VERBOSE:
402
+ silent=False
403
+
404
+ # checks that the filename contains 'plc'
405
+ if not 'plc' in filename:
406
+ print("Are you sure you are providing the right file name?")
407
+ if 'catalog' in filename:
408
+ print("...this looks like a catalog file, please read it with rp.catalog(filename)")
409
+ elif 'histories' in filename:
410
+ print("...this looks like a histories file, please read it with rp.histories(filename)")
411
+ return None
412
+
413
+ # checks that the input file ends by ".out"
414
+ last_ext=filename.rfind('.')
415
+ if filename[last_ext:]!='.out':
416
+
417
+ print("The catalog file should end with .out, the file number extension will be checked by the code")
418
+ return None
419
+
420
+ # checks that the file exists, of that there are multiple files
421
+ if not os.path.exists(filename):
422
+
423
+ if not os.path.exists(filename+'.0'):
424
+
425
+ print("file {} or {} not found:".format(filename,filename+'.0'))
426
+ return None
427
+
428
+ else:
429
+
430
+ Nfiles=1
431
+ while os.path.exists(filename+'.{}'.format(Nfiles)):
432
+ Nfiles+=1
433
+ if not silent:
434
+ print("The catalog is written in {} files".format(Nfiles))
435
+
436
+ else:
437
+
438
+ Nfiles=1
439
+ if not silent:
440
+ print("The catalog is written in 1 file")
441
+
442
+ self.Nfiles=Nfiles
443
+ if onlyNfiles:
444
+ return
445
+
446
+ # opens the (first) file and reads the record length
447
+ if Nfiles==1:
448
+ if not silent:
449
+ print('reading header of file '+filename)
450
+ reading = np.fromfile(filename,dtype=np.int32,count=3)
451
+ else:
452
+ if not silent:
453
+ print('reading header of file '+filename+'.0')
454
+ reading = np.fromfile(filename+'.0',dtype=np.int32,count=3)
455
+
456
+ # reads the record length
457
+ if reading[0]==4:
458
+ record_length=reading[1]
459
+ newRun=True
460
+ if not silent:
461
+ if record_length==32:
462
+ print('This is new light output format, record length: {}'.format(record_length))
463
+ else:
464
+ print('This is new full output format, record length: {}'.format(record_length))
465
+ else:
466
+ record_length=reading[0]
467
+ newRun=False
468
+ if not silent:
469
+ print('This is classic output format, record length: {}'.format(record_length))
470
+
471
+ # sets the record
472
+ if record_length==104:
473
+
474
+ self.cat_dtype = [ ( 'name' , np.uint64 ),
475
+ ( 'truez' , np.float64 ),
476
+ ( 'pos' , np.float64,3 ),
477
+ ( 'vel' , np.float64,3 ),
478
+ ( 'Mass' , np.float64 ),
479
+ ( 'theta' , np.float64 ),
480
+ ( 'phi' , np.float64 ),
481
+ ( 'vlos' , np.float64 ),
482
+ ( 'obsz' , np.float64 ) ]
483
+
484
+ stored_dtype = [ ('fort', np.int32),
485
+ ( 'name', np.uint64 ),
486
+ ( 'truez', np.float64 ),
487
+ ( 'pos', np.float64,3 ),
488
+ ( 'vel', np.float64,3 ),
489
+ ( 'Mass' , np.float64 ),
490
+ ( 'theta', np.float64 ),
491
+ ( 'phi', np.float64 ),
492
+ ( 'vlos' , np.float64 ),
493
+ ( 'obsz' , np.float64 ) ,
494
+ ( 'trof', np.int32) ]
495
+
496
+ elif record_length==56:
497
+
498
+ self.cat_dtype = [ ( 'name', np.uint64 ),
499
+ ( 'truez', np.float32 ),
500
+ ( 'pos', np.float32,3 ),
501
+ ( 'vel', np.float32,3 ),
502
+ ( 'Mass', np.float32 ),
503
+ ( 'theta', np.float32 ),
504
+ ( 'phi', np.float32 ),
505
+ ( 'vlos', np.float32 ),
506
+ ( 'obsz', np.float32 ) ]
507
+ if newRun:
508
+
509
+ stored_dtype = [ ( 'name', np.uint64 ),
510
+ ( 'truez', np.float32 ),
511
+ ( 'pos', np.float32,3 ),
512
+ ( 'vel', np.float32,3 ),
513
+ ( 'Mass', np.float32 ),
514
+ ( 'theta', np.float32 ),
515
+ ( 'phi', np.float32 ),
516
+ ( 'vlos', np.float32 ),
517
+ ( 'obsz', np.float32 ) ]
518
+
519
+ else:
520
+
521
+ stored_dtype = [ ( 'fort', np.int32),
522
+ ( 'name', np.uint64 ),
523
+ ( 'truez', np.float32 ),
524
+ ( 'pos', np.float32,3 ),
525
+ ( 'vel', np.float32,3 ),
526
+ ( 'Mass', np.float32 ),
527
+ ( 'theta', np.float32 ),
528
+ ( 'phi', np.float32 ),
529
+ ( 'vlos', np.float32 ),
530
+ ( 'obsz', np.float32 ),
531
+ ( 'trof', np.int32) ]
532
+
533
+
534
+ elif record_length==32:
535
+
536
+ self.cat_dtype = [ ( 'name' , np.uint64 ),
537
+ ( 'truez' , np.float32 ),
538
+ ( 'Mass' , np.float32 ),
539
+ ( 'theta' , np.float32 ),
540
+ ( 'phi' , np.float32 ),
541
+ ( 'obsz' , np.float32 ) ]
542
+
543
+ stored_dtype = [ ( 'name' , np.uint64 ),
544
+ ( 'truez' , np.float32 ),
545
+ ( 'Mass' , np.float32 ),
546
+ ( 'theta' , np.float32 ),
547
+ ( 'phi' , np.float32 ),
548
+ ( 'obsz' , np.float32 ),
549
+ ( 'pad' , np.float32 ) ]
550
+
551
+ else:
552
+ print("sorry, I do not recognize this record length")
553
+ return None
554
+
555
+ # decides what files to read
556
+ if Nfiles>1:
557
+ if first_file is None:
558
+ first_file=0
559
+ elif first_file<0:
560
+ first_file=0
561
+ elif first_file>Nfiles:
562
+ first_file=Nfiles
563
+ if last_file is None:
564
+ last_file=Nfiles
565
+ else:
566
+ last_file += 1
567
+ if last_file<first_file:
568
+ last_file=first_file+1
569
+ elif last_file>Nfiles:
570
+ last_file=Nfiles
571
+ # this is to be used to define a pythonic range
572
+ if not silent:
573
+ print("I will read files in the python range from {} to {}".format(first_file,last_file))
574
+ else:
575
+ first_file=0
576
+ last_file=Nfiles
577
+
578
+ # prepares to read the file(s)
579
+ self.data=None
580
+ NhalosPerFile=np.zeros(Nfiles,dtype=np.int64)
581
+
582
+ # loops on the files to be read
583
+ for myfile in range(first_file,last_file):
584
+
585
+ if Nfiles==1:
586
+ myfname = filename
587
+ else:
588
+ myfname = filename+'.{}'.format(myfile)
589
+
590
+ if VERBOSE:
591
+ print("reading file {}".format(myfname))
592
+
593
+ # reads the file as a binary object
594
+ with open(myfname,'rb') as f:
595
+ bindata=f.read()
596
+ f.close()
597
+ filesize = len(bindata)
598
+
599
+ if newRun:
600
+
601
+ # scrolls the file to map the blocks
602
+
603
+ if VERBOSE:
604
+ print("scrolling file to reconstruct the blocks:")
605
+ cleanForm='12x '
606
+ pos=12
607
+ vechalo=[]
608
+ Nblocks=0
609
+ while pos < filesize:
610
+ # reads the number of halos to read
611
+ vec=struct.Struct('iii').unpack(bindata[pos:pos+12])[1]
612
+ pos+=12
613
+ vechalo.append(vec)
614
+ if vec==0:
615
+ print("THIS SHOULD NOT HAPPEN!")
616
+ return None
617
+ Nblocks+=1
618
+ pos += 8+vec*record_length
619
+ cleanForm+='16x {}s 4x '.format(vec*record_length)
620
+
621
+ vechalo = np.asarray(vechalo)
622
+ NhalosPerFile[myfile]=vechalo.sum()
623
+
624
+ if VERBOSE:
625
+ print(f"data are written in {Nblocks} blocks")
626
+
627
+ # removes all unwanted information from the binary structure
628
+ try:
629
+ cleaned = b''.join(struct.unpack(cleanForm, bindata))
630
+ if VERBOSE:
631
+ print("cleaning of binary object done")
632
+ except:
633
+ print("ERROR: I do not recognise the data structure!")
634
+ return None
635
+ del bindata
636
+
637
+ # reads the catalog from the cleaned bynary structure
638
+ thiscat = np.frombuffer(cleaned, dtype=stored_dtype)
639
+ if VERBOSE:
640
+ print("catalog extracted")
641
+ del cleaned
642
+
643
+ else:
644
+
645
+ # reads the catalog directly from the binary object
646
+ thiscat = np.frombuffer(bindata, dtype=stored_dtype)
647
+ if VERBOSE:
648
+ print("catalog extracted")
649
+ del bindata
650
+ NhalosPerFile[myfile]=len(thiscat)
651
+
652
+ if newRun:
653
+ file_length = 12 + Nblocks*20 + NhalosPerFile[myfile]*record_length
654
+ else:
655
+ file_length = NhalosPerFile[myfile]*(record_length+8)
656
+ if file_length != filesize:
657
+ print(f'ERROR: inconsistency in the file size, should be {file_length} but I found {filesize} bytes')
658
+ return None
659
+ elif VERBOSE:
660
+ print(f'predicted file length {file_length} matches with file size {filesize}')
661
+
662
+
663
+ # removes unwanted columns from the catalog
664
+ if self.data is None:
665
+ self.data = np.zeros(NhalosPerFile[myfile], dtype=self.cat_dtype)
666
+ else:
667
+ self.data.resize(self.data.shape[0]+NhalosPerFile[myfile])
668
+
669
+ for name in self.data.dtype.names:
670
+ self.data[name][-NhalosPerFile[myfile]:]=thiscat[name]
671
+ del thiscat
672
+
673
+
674
+ if not silent:
675
+ print("done with file {}".format(myfname))
676
+
677
+ self.Nhalos = len(self.data)
678
+
679
+ if not silent:
680
+ print("Reading plc done, {} groups found".format(NhalosPerFile.sum()))
681
+
682
+
683
+
684
+ class histories:
685
+ '''
686
+ Reads a pinocchio histories catalog
687
+ Usage: myhist=rp.histories(filename, silent=False, first_file=None, last_file=None)
688
+
689
+ filename: name of catalog file, ending with .out. The code will recognise if the catalog is writen in several files
690
+ silent: if True, the script does not give messages in the standard output
691
+ first_file ,last_file: if the catalog if written in several files, it allows to select a limited range of output files
692
+
693
+ Returns:
694
+ myhist.data: the catalog, a structured numpy array
695
+ myhist.cat_dtype: the data type of the catalog:
696
+ [('name', numpy.uint64),
697
+ ('nickname', numpy.int32),
698
+ ('link', numpy.int32),
699
+ ('merged_with', numpy.int32),
700
+ ('mass_at_merger', numpy.int32),
701
+ ('mass_of_main', numpy.int32),
702
+ ('z_merging', numpy.float32),
703
+ ('z_peak', numpy.float32),
704
+ ('z_appear', numpy.float32)]
705
+ myhist.Nfiles: number of files in which the catalog is written
706
+ myhist.Ntrees: number of trees in the catalog
707
+ myhist.Nbranches_tot: total number of branches in the catalog
708
+ myhist.Nbranches: array of size Ntrees giving the number of branches for each tree
709
+ myhist.pointers; array of size Ntrees giving a pointer to the start of the tree in the catalog
710
+
711
+ '''
712
+
713
+ def __init__(self,filename,silent=False,first_file=None,last_file=None):
714
+
715
+ if VERBOSE:
716
+ silent=False
717
+
718
+ # checks that the filename contains 'catalog'
719
+ if not 'histories' in filename:
720
+ print("Are you sure you are providing the right file name?")
721
+ if 'plc' in filename:
722
+ print("...this looks like a plc file, please read it with rp.plc(filename)")
723
+ elif 'catalog' in filename:
724
+ print("...this looks like a catalog file, please read it with rp.catalog(filename)")
725
+ return None
726
+
727
+ # checks that the input file ends by ".out"
728
+ last_ext=filename.rfind('.')
729
+ if filename[last_ext:]!='.out':
730
+
731
+ print("The history file should end with .out, the file number extension will be checked by the code")
732
+ return None
733
+
734
+ # checks that the file exists, of that there are multiple files
735
+ if not os.path.exists(filename):
736
+
737
+ if not os.path.exists(filename+'.0'):
738
+
739
+ print("file {} or {} not found:".format(filename,filename+'.0'))
740
+ return None
741
+
742
+ else:
743
+
744
+ Nfiles=1
745
+ while os.path.exists(filename+'.{}'.format(Nfiles)):
746
+ Nfiles+=1
747
+ if not silent:
748
+ print("The catalog is written in {} files".format(Nfiles))
749
+
750
+ else:
751
+
752
+ Nfiles=1
753
+ if not silent:
754
+ print("The catalog is written in 1 file")
755
+
756
+ # opens the (first) file and reads the record length
757
+ if Nfiles==1:
758
+ if not silent:
759
+ print('opening file '+filename)
760
+ reading = np.fromfile(filename,dtype=np.int32,count=12)
761
+ FileLength = os.path.getsize(filename)
762
+ else:
763
+ if not silent:
764
+ print('opening file '+filename+'.0')
765
+ reading = np.fromfile(filename+'.0',dtype=np.int32,count=12)
766
+ FileLength = os.path.getsize(filename+'.0')
767
+
768
+ # reads the header and the record length
769
+ if reading[1]>10:
770
+ newRun=True
771
+ record_length=reading[1]
772
+ Nslices=1
773
+ Light = FileLength==np.int64(record_length)*np.int64(reading[5])+np.int64(4*7)
774
+ if not silent:
775
+ if not Light:
776
+ print('This is new output format, record length: {}'.format(record_length))
777
+ else:
778
+ print('This is new light output format, record length: {}'.format(record_length))
779
+ else:
780
+
781
+ if not silent:
782
+ print("WARNING: reading of V4 histories is very slow")
783
+
784
+ newRun=False
785
+ record_length=reading[11]
786
+ Nslices=reading[1]
787
+ if not silent:
788
+ print('This is classic output format, record length: {}'.format(record_length))
789
+ if Nslices==1:
790
+ print('The box has been fragmented in 1 slice')
791
+ else:
792
+ print('The box has been fragmented in {} slices'.format(Nslices))
793
+
794
+
795
+ if record_length==40:
796
+
797
+ self.cat_dtype=[ ( 'name' , np.uint64 ),
798
+ ( 'nickname' , np.int32 ),
799
+ ( 'link' , np.int32 ),
800
+ ( 'merged_with' , np.int32 ),
801
+ ( 'mass_at_merger' , np.int32 ),
802
+ ( 'mass_of_main' , np.int32 ),
803
+ ( 'z_merging' , np.float32 ),
804
+ ( 'z_peak' , np.float32 ),
805
+ ( 'z_appear' , np.float32 ) ]
806
+
807
+ elif record_length==56:
808
+
809
+ self.cat_dtype=[ ( 'name' , np.uint64 ),
810
+ ( 'nickname' , np.int32 ),
811
+ ( 'link' , np.int32 ),
812
+ ( 'merged_with' , np.int32 ),
813
+ ( 'mass_at_merger' , np.int32 ),
814
+ ( 'mass_of_main' , np.int32 ),
815
+ ( 'z_merging' , np.float64 ),
816
+ ( 'z_peak' , np.float64 ),
817
+ ( 'z_appear' , np.float64 ) ]
818
+
819
+ stored_dtype =[ ( 'fort' , np.int32 ),
820
+ ( 'name' , np.uint64 ),
821
+ ( 'nickname' , np.int32 ),
822
+ ( 'link' , np.int32 ),
823
+ ( 'merged_with' , np.int32 ),
824
+ ( 'mass_at_merger' , np.int32 ),
825
+ ( 'mass_of_main' , np.int32 ),
826
+ ( 'pad' , np.int32 ),
827
+ ( 'z_merging' , np.float64 ),
828
+ ( 'z_peak' , np.float64 ),
829
+ ( 'z_appear' , np.float64 ),
830
+ ( 'trof' , np.int32 ) ]
831
+
832
+ else:
833
+ print("sorry, I do not recognize this record length")
834
+ return None
835
+
836
+ self.Nfiles=Nfiles
837
+ # decides what files to read
838
+ if Nfiles>1:
839
+ if first_file is None:
840
+ first_file=0
841
+ elif first_file<0:
842
+ first_file=0
843
+ elif first_file>Nfiles:
844
+ first_file=Nfiles
845
+ if last_file is None:
846
+ last_file=Nfiles
847
+ else:
848
+ last_file += 1
849
+ if last_file<first_file:
850
+ last_file=first_file+1
851
+ elif last_file>Nfiles:
852
+ last_file=Nfiles
853
+ # this is to be used to define a pythonic range
854
+ if not silent:
855
+ print("I will read files in the python range from {} to {}".format(first_file,last_file))
856
+ else:
857
+ first_file=0
858
+ last_file=Nfiles
859
+
860
+ # prepares to read the file(s)
861
+ self.data=None
862
+ self.Nbranches=None
863
+ TTotal=np.int64(0)
864
+ BTotal=np.int64(0)
865
+
866
+ for myfile in range(first_file,last_file):
867
+
868
+ if Nfiles==1:
869
+ myfname = filename
870
+ else:
871
+ myfname = filename+'.{}'.format(myfile)
872
+
873
+ if VERBOSE:
874
+ print("reading file {}".format(myfname))
875
+
876
+ # reads the file as a binary object
877
+ with open(myfname,'rb') as f:
878
+ bindata=f.read()
879
+ f.close()
880
+ FileLength = len(bindata)
881
+ Tthisfile, Bthisfile = struct.Struct('ii').unpack(bindata[16:24])
882
+ TTotal += Tthisfile
883
+ BTotal += Bthisfile
884
+
885
+ if newRun:
886
+
887
+ if Light:
888
+
889
+ # light format is straightforward to read
890
+ if VERBOSE:
891
+ print("reading in data")
892
+
893
+ if self.data is None:
894
+ self.data = np.copy(np.frombuffer(bindata[28:], dtype=self.cat_dtype))
895
+ else:
896
+ thiscat = np.frombuffer(bindata[28:], dtype=self.cat_dtype)
897
+ self.data.resize(self.data.shape[0]+len(thiscat))
898
+ self.data[-len(thiscat):]=np.copy(thiscat)
899
+ del thiscat
900
+
901
+ else:
902
+
903
+ # reading standard V5 format
904
+ cleanForm='28x '
905
+ pos=28
906
+ Nblocks=0
907
+ while pos < FileLength:
908
+ # reads the number of branches
909
+ Nthisblock = struct.Struct('iii').unpack(bindata[pos:pos+12])[1]
910
+ pos+=16
911
+ Nbranches = np.frombuffer(bindata[pos:pos+Nthisblock*4],
912
+ dtype=np.int32,count=Nthisblock)
913
+ Bthisblock=Nbranches.sum()
914
+ pos+=Nthisblock*4 + 8 + Bthisblock*record_length + 4
915
+ cleanForm+='{}x {}s 4x'.format(12+Nthisblock*4+12,Bthisblock*record_length)
916
+ Nblocks+=1
917
+
918
+ if self.Nbranches is None:
919
+ self.Nbranches = np.copy(Nbranches)
920
+ else:
921
+ self.Nbranches.resize(self.Nbranches.shape[0]+Nthisblock)
922
+ self.Nbranches[-Nthisblock:]=np.copy(Nbranches)
923
+ del Nbranches
924
+
925
+ if VERBOSE:
926
+ print(f"data are written in {Nblocks} blocks")
927
+
928
+ # removes all unwanted information from the binary structure
929
+ try:
930
+ cleaned = b''.join(struct.unpack(cleanForm, bindata))
931
+ if VERBOSE:
932
+ print("cleaning of binary object done")
933
+ except:
934
+ print("ERROR: I do not recognise the data structure!")
935
+ return None
936
+ del bindata
937
+
938
+ thiscat = np.frombuffer(cleaned, dtype=self.cat_dtype)
939
+ if self.data is None:
940
+ self.data = np.copy(thiscat)
941
+ else:
942
+ self.data.resize(self.data.shape[0]+len(thiscat))
943
+ self.data[-len(thiscat):]=np.copy(thiscat)
944
+ del thiscat
945
+
946
+ else:
947
+
948
+ # first reads the total number of branches
949
+ pos=12
950
+ Bthisfile=0
951
+ Tthisfile=0
952
+ while pos < FileLength:
953
+ Nt, Nb = struct.Struct('iiii').unpack(bindata[pos:pos+16])[1:3]
954
+ pos+=16 + Nt*16 + Nb*(record_length+8)
955
+ Tthisfile+=Nt
956
+ Bthisfile+=Nb
957
+
958
+ if self.data is None:
959
+ cNb=0
960
+ cTr=0
961
+ self.data = np.empty(Bthisfile, dtype=self.cat_dtype)
962
+ self.Nbranches = np.empty(Tthisfile, dtype=np.int32)
963
+ else:
964
+ cNb=self.Nbranches.shape[0]
965
+ cTr=self.data.shape[0]
966
+ self.data.resize(self.data.shape[0]+Bthisfile)
967
+ self.Nbranches.resize(self.Nbranches.shape[0]+Tthisfile)
968
+
969
+ pos=12
970
+ while pos < FileLength:
971
+ Nt = struct.Struct('iiii').unpack(bindata[pos:pos+16])[1]
972
+ pos+=16
973
+ for t in range(Nt):
974
+ Nb = struct.Struct('iiii').unpack(bindata[pos:pos+16])[2]
975
+ pos+=16
976
+ self.Nbranches[cNb]=Nb
977
+ cNb+=1
978
+ thiscat=np.frombuffer(bindata[pos:pos+Nb*(record_length+8)],dtype=stored_dtype,count=Nb)
979
+ for name in self.data.dtype.names:
980
+ self.data[name][cTr:cTr+Nb]=thiscat[name]
981
+ del thiscat
982
+ pos+=Nb*(record_length+8)
983
+ cTr+=Nb
984
+
985
+
986
+ if newRun:
987
+ if Light:
988
+ file_length = 28 + Bthisfile*record_length
989
+ else:
990
+ file_length = 28 + Nblocks*28 + Tthisfile*4 + Bthisfile*record_length
991
+ else:
992
+ file_length = 12 + Nslices*16 + Tthisfile*16 + Bthisfile*(record_length+8)
993
+ if FileLength != file_length:
994
+ print("Error: inconsistent length for the file {}, I expected {} and found {}".format(myfname,FileLength,file_length))
995
+ return None
996
+ elif VERBOSE:
997
+ print("File size of {} is as expected".format(myfname))
998
+
999
+
1000
+ if newRun and not Light:
1001
+ # cumulate pointers
1002
+ self.pointers = np.cumsum(np.insert(self.Nbranches,0,0))[:TTotal]
1003
+ else:
1004
+ if not silent:
1005
+ print("Building pointers...")
1006
+ po = np.empty(TTotal+1, dtype=np.int32)
1007
+ self.Nbranches = np.empty(TTotal, dtype=np.int32)
1008
+
1009
+ po[0]=0
1010
+ for i in range(TTotal):
1011
+ self.Nbranches[i]=self.data[po[i]]['nickname']
1012
+ po[i+1]=po[i]+self.Nbranches[i]
1013
+
1014
+ self.pointers = po[0:TTotal]
1015
+ del po
1016
+
1017
+ self.Ntrees=TTotal
1018
+ self.Nbranches_tot=BTotal
1019
+
1020
+ if not silent:
1021
+ print("Reading catalog done")
1022
+