pikobs 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pikobs/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ from .scatter import *
2
+ from .configobs import *
3
+ from .cardio import *
4
+ from .timeserie import *
5
+ from .profile import *
6
+ from .vdedr import *
7
+
@@ -0,0 +1,3 @@
1
+ from .cardio import *
2
+ from .cardio_plot import *
3
+
@@ -0,0 +1,438 @@
1
+ #!/usr/bin/python3
2
+ import sqlite3
3
+ import pikobs
4
+ import re
5
+ import os
6
+ from dask.distributed import Client
7
+ import numpy as np
8
+ import sqlite3
9
+ import os
10
+ import re
11
+ import sqlite3
12
+ from datetime import datetime, timedelta
13
+
14
+
15
+ def create_serie_cardio(family,
16
+ new_db_filename,
17
+ existing_db_filename,
18
+ region_seleccionada,
19
+ selected_flags,
20
+ FONCTION,
21
+ # id_stn,
22
+ # vcoord,
23
+ varno):
24
+ """
25
+ Create a new SQLite database with a 'moyenne' table and populate it with data from an existing database.
26
+
27
+ Args:
28
+ new_db_filename (str): Filename of the new database to be created.
29
+ existing_db_filename (str): Filename of the existing database to be attached.
30
+ region_seleccionada (str): Region selection criteria.
31
+ selected_flags (str): Selected flags criteria.
32
+ FONCTION (float): Value for sum_fonction column.
33
+
34
+ Returns:
35
+ None
36
+ """
37
+
38
+
39
+ pattern = r'(\d{10})'
40
+ match = re.search(pattern, existing_db_filename)
41
+
42
+ if match:
43
+ date = match.group(1)
44
+
45
+ else:
46
+ print("No 10 digits found in the string.")
47
+
48
+ # Connect to the new database
49
+
50
+ new_db_conn = sqlite3.connect(new_db_filename, uri=True, isolation_level=None, timeout=999)
51
+ new_db_cursor = new_db_conn.cursor()
52
+
53
+ FAM, VCOORD, VCOCRIT, STATB, VCOORD, VCOTYP = pikobs.family(family)
54
+ LAT1, LAT2, LON1, LON2 = pikobs.regions(region_seleccionada)
55
+ LATLONCRIT = pikobs.generate_latlon_criteria(LAT1, LAT2, LON1, LON2)
56
+ flag_criteria = pikobs.flag_criteria(selected_flags)
57
+
58
+ # Attach the existing database
59
+ new_db_cursor.execute(f"ATTACH DATABASE '{existing_db_filename}' AS db;")
60
+ # load extension CMC
61
+ new_db_conn.enable_load_extension(True)
62
+ extension_dir = f'{os.path.dirname(pikobs.__file__)}/extension/libudfsqlite-shared.so'
63
+ new_db_conn.execute(f"SELECT load_extension('{extension_dir}')")
64
+ # Create the 'moyenne' table in the new database if it doesn't exist
65
+ new_db_cursor.execute("""
66
+ CREATE TABLE IF NOT EXISTS serie_cardio (
67
+ DATE INTEGER,
68
+ Chan INTEGER,
69
+ Nrej INTEGER,
70
+ Nacc INTIGER,
71
+ AvgOMP FLOAT,
72
+ AvgOMA FLOAT,
73
+ StdOMP FLOAT,
74
+ StdOMA FLOAT,
75
+ NDATA INTEGER,
76
+ Nprofile INTEGER,
77
+ AvgBCOR FLOAT,
78
+ AvgOBS FLOAT,
79
+ Ntot INTEGER,
80
+ varno INTEGER,
81
+ id_stn TEXT
82
+ );
83
+ """)
84
+
85
+ query=f"""INSERT INTO serie_cardio (
86
+
87
+ DATE,
88
+ Chan,
89
+ Nrej,
90
+ Nacc,
91
+ AvgOMP,
92
+ AvgOMA,
93
+ StdOMP,
94
+ StdOMA,
95
+ NDATA,
96
+ Nprofile,
97
+ AvgBCOR,
98
+ AvgOBS,
99
+ Ntot,
100
+ varno,
101
+ id_stn
102
+ )
103
+
104
+
105
+ SELECT
106
+ isodatetime({date}) AS DATE,
107
+ VCOORD As Chan,
108
+ SUM(flag & 512=512) AS Nrej,
109
+ SUM(flag & 4096=4096) AS Nacc,
110
+ ROUND(AVG(OMP), 4) AS AvgOMP,
111
+ ROUND(AVG(OMA), 4) AS AvgOMA,
112
+ ROUND(STDDEV(OMP), 4) AS StdOMP,
113
+ ROUND(STDDEV(OMA), 4) AS StdOMA,
114
+ SUM(OMP IS NOT NULL) AS NDATA,
115
+ COUNT(DISTINCT id_obs) AS Nprofils,
116
+ ROUND(AVG(BIAS_CORR), 4) AS AvgBCOR,
117
+ ROUND(AVG(OBSVALUE), 4) AS AvgOBS,
118
+ (SELECT COUNT(*) FROM header h2 WHERE h2.ID_STN = header.ID_STN) AS Ntot,
119
+ varno AS varno,
120
+ id_stn AS id_stn
121
+
122
+ FROM
123
+ header
124
+ NATURAL JOIN
125
+ data
126
+ WHERE
127
+ VARNO = {varno}
128
+ -- AND ID_STN LIKE 'id_stn'
129
+ -- AND vcoord IN (vcoord)
130
+ {flag_criteria}
131
+ {LATLONCRIT}
132
+ {VCOCRIT}
133
+ GROUP BY
134
+ VCOORD, ID_STN
135
+ HAVING
136
+ SUM(OMP IS NOT NULL) >= 50;"""
137
+ new_db_cursor.execute(query)
138
+
139
+ # Commit changes and detach the existing database
140
+ #new_db_cursor.execute("DETACH DATABASE db;")
141
+ new_db_conn.commit()
142
+
143
+
144
+
145
+
146
+ # Commit changes and detach the existing database
147
+ #new_db_cursor.execute("DETACH DATABASE db;")
148
+
149
+
150
+ # Close the connections
151
+ new_db_conn.close()
152
+
153
+ def create_data_list_cardio(datestart1,
154
+ dateend1,
155
+ family,
156
+ pathin,
157
+ pathwork,
158
+ fonction,
159
+ flag_criteria,
160
+ region_seleccionada):
161
+
162
+ data_list_cardio = []
163
+
164
+ # Convert datestart and dateend to datetime objects
165
+ datestart = datetime.strptime(datestart1, '%Y%m%d%H')
166
+ dateend = datetime.strptime(dateend1, '%Y%m%d%H')
167
+
168
+ # Initialize the current_date to datestart
169
+ current_date = datestart
170
+
171
+ # Define a timedelta of 6 hours
172
+ delta = timedelta(hours=6)
173
+ FAM, VCOORD, VCOCRIT, STATB, element, VCOTYP = pikobs.family(family)
174
+ # print (flag_criteria)
175
+
176
+ #flag_criteria = generate_flag_criteria(flag_criteria)
177
+
178
+ element_array = np.array([float(x) for x in element.split(',')])
179
+ for varno in element_array:
180
+ # print ("VCOORD", vcoord, element, type(element))
181
+ # Iterate through the date range in 6-hour intervals
182
+ while current_date <= dateend:
183
+ # Format the current date as a string
184
+ formatted_date = current_date.strftime('%Y%m%d%H')
185
+
186
+ # Build the file name using the date and family
187
+ filename = f'{formatted_date}_{family}'
188
+
189
+ file_path_name = f'{pathin}/{filename}'
190
+ # print ( "file_path_name", file_path_name)
191
+ conn = sqlite3.connect(file_path_name)
192
+ # Create a cursor to execute SQL queries
193
+ cursor = conn.cursor()
194
+
195
+ channel ='all'
196
+ id_stn ='all'
197
+ # Create a new dictionary and append it to the list
198
+ data_dict = {'family': family,
199
+ 'filein': f'{pathin}/{filename}',
200
+ 'db_new': f'{pathwork}/cardio_{datestart1}_{dateend1}_{fonction}_{flag_criteria}_{family}.db',
201
+ 'region': region_seleccionada,
202
+ 'flag_criteria': flag_criteria,
203
+ 'fonction': fonction,
204
+ 'varno': varno,
205
+ 'vcoord': channel,
206
+ 'id_stn': id_stn}
207
+ data_list_cardio.append(data_dict)
208
+ conn.close()
209
+
210
+ # Update the current_date in the loop by adding 6 hours
211
+ current_date += delta
212
+
213
+ return data_list_cardio
214
+
215
+
216
+
217
+
218
+
219
+ def create_data_list_plot(datestart1,
220
+ dateend1,
221
+ family,
222
+ pathin,
223
+ pathwork,
224
+ fonction,
225
+ flag_criteria,
226
+ region_seleccionada,
227
+ id_stn,
228
+ channel):
229
+ data_list_plot = []
230
+ filedb = f'{pathwork}/cardio_{datestart1}_{dateend1}_{fonction}_{flag_criteria}_{family}.db'
231
+ conn = sqlite3.connect(filedb)
232
+ cursor = conn.cursor()
233
+
234
+ if id_stn == 'all':
235
+ query = "SELECT DISTINCT id_stn FROM serie_cardio;"
236
+ cursor.execute(query)
237
+ id_stns = cursor.fetchall()
238
+ else:
239
+ id_stns = [f'id_stn']
240
+
241
+ for idstn in id_stns:
242
+ criter =f'where id_stn = "{idstn[0]}"'
243
+
244
+ if channel == 'all':
245
+ query = f"SELECT DISTINCT chan, varno FROM serie_cardio {criter} ORDER BY chan ASC;"
246
+ else:
247
+ query = f"SELECT DISTINCT chan, varno FROM serie_cardio {criter} and vcoord={channel} ORDER BY chan ASC;"
248
+
249
+ cursor.execute(query)
250
+ vcoords = cursor.fetchall()
251
+ for vcoord, varno in vcoords:
252
+ data_dict_plot = {
253
+ 'id_stn': idstn[0],
254
+ 'vcoord': vcoord,
255
+ 'varno': varno}
256
+ data_list_plot.append(data_dict_plot)
257
+ return data_list_plot
258
+
259
+
260
+ def make_cardio(path_experience_files,
261
+ experience_names,
262
+ pathwork,
263
+ datestart,
264
+ dateend,
265
+ region,
266
+ family,
267
+ flag_criteria,
268
+ fonction,
269
+ id_stn,
270
+ channel,
271
+ plot_type,
272
+ plot_title,
273
+ n_cpu):
274
+
275
+ fonction='omp'
276
+ pikobs.delete_create_folder(pathwork)
277
+ data_list_cardio = create_data_list_cardio(datestart,
278
+ dateend,
279
+ family,
280
+ path_experience_files,
281
+ pathwork,
282
+ fonction,
283
+ flag_criteria,
284
+ region)
285
+
286
+ import time
287
+ import dask
288
+ t0 = time.time()
289
+ if n_cpu==1:
290
+ for data_ in data_list_cardio:
291
+ print ("Serie")
292
+ create_serie_cardio(data_['family'],
293
+ data_['db_new'],
294
+ data_['filein'],
295
+ data_['region'],
296
+ data_['flag_criteria'],
297
+ data_['fonction'],
298
+ data_['varno'])
299
+
300
+
301
+
302
+
303
+ else:
304
+ print (f'Number of files used in calculating statistics = {len(data_list_cardio)}')
305
+ with dask.distributed.Client(processes=True, threads_per_worker=1,
306
+ n_workers=n_cpu,
307
+ silence_logs=40) as client:
308
+ delayed_funcs = [dask.delayed(create_serie_cardio)(data_['family'],
309
+ data_['db_new'],
310
+ data_['filein'],
311
+ data_['region'],
312
+ data_['flag_criteria'],
313
+ data_['fonction'],
314
+ data_['varno'])for data_ in data_list_cardio]
315
+ results = dask.compute(*delayed_funcs)
316
+
317
+ tn= time.time()
318
+ print ('Total time for statistics:',tn-t0 )
319
+ data_list_plot = create_data_list_plot(datestart,
320
+ dateend,
321
+ family,
322
+ path_experience_files,
323
+ pathwork,
324
+ fonction,
325
+ flag_criteria,
326
+ region,
327
+ id_stn,
328
+ channel)
329
+
330
+
331
+
332
+ os.makedirs(f'{pathwork}/cardio')
333
+ t0 = time.time()
334
+ if n_cpu==1:
335
+ print (f'serie= {len(data_list_plot)}')
336
+ for data_ in data_list_plot:
337
+ pikobs.cardio_plot(pathwork,
338
+ datestart,
339
+ dateend,
340
+ 'omp',
341
+ flag_criteria,
342
+ family,
343
+ plot_title,
344
+ plot_type,
345
+ data_['vcoord'],
346
+ data_['id_stn'],
347
+ data_['varno'])
348
+ else:
349
+ print (f'number of plot in paralle = {len(data_list_plot)}')
350
+ with dask.distributed.Client(processes=True, threads_per_worker=1,
351
+ n_workers=n_cpu,
352
+ silence_logs=40) as client:
353
+ delayed_funcs = [dask.delayed(pikobs.cardio_plot)(
354
+ pathwork,
355
+ datestart,
356
+ dateend,
357
+ 'omp',
358
+ flag_criteria,
359
+ family,
360
+ plot_title,
361
+ plot_type,
362
+ data_['vcoord'],
363
+ data_['id_stn'],
364
+ data_['varno'])for data_ in data_list_plot]
365
+
366
+ results = dask.compute(*delayed_funcs)
367
+ tn= time.time()
368
+ print ('total time for plotting:',tn-t0 )
369
+
370
+
371
+
372
+
373
+
374
+ def arg_call():
375
+ import argparse
376
+ import sys
377
+ parser = argparse.ArgumentParser()
378
+ parser.add_argument('--path_experience_files', default='undefined', type=str, help="Directory where input sqlite files are located")
379
+ parser.add_argument('--experience_name', default='undefined', type=str, help="experience's name")
380
+ parser.add_argument('--pathwork', default='undefined', type=str, help="Working directory")
381
+ parser.add_argument('--datestart', default='undefined', type=str, help="Start date")
382
+ parser.add_argument('--dateend', default='undefined', type=str, help="End date")
383
+ parser.add_argument('--region', default='undefined', type=str, help="Region")
384
+ parser.add_argument('--family', default='undefined', type=str, help="Family")
385
+ parser.add_argument('--flags_criteria', default='undefined', type=str, help="Flags criteria")
386
+ parser.add_argument('--fonction', default='undefined', type=str, help="Function")
387
+ parser.add_argument('--id_stn', default='all', type=str, help="id_stn")
388
+ parser.add_argument('--channel', default='all', type=str, help="channel")
389
+ parser.add_argument('--plot_type', default='classic', type=str, help="channel")
390
+ parser.add_argument('--plot_title', default='plot', type=str, help="channel")
391
+ parser.add_argument('--n_cpus', default=1, type=int, help="Number of cpus")
392
+
393
+ args = parser.parse_args()
394
+ print ( "Inputs in cardiogram calculation")
395
+ print ("----------------------------------------")
396
+
397
+ for arg in vars(args):
398
+
399
+ print (f'--{arg} {getattr(args, arg)}')
400
+ print ("----------------------------------------")
401
+ # check if each argument is 'undefined'
402
+ if args.path_experience_files == 'undefined':
403
+ raise ValueError('You must specify --path_experience_files')
404
+ if args.experience_name == 'undefined':
405
+ raise ValueError('You must specify -experience_name')
406
+ if args.pathwork == 'undefined':
407
+ raise ValueError('You must specify --pathwork')
408
+ if args.datestart == 'undefined':
409
+ raise ValueError('You must specify --datestart')
410
+ if args.dateend == 'undefined':
411
+ raise ValueError('You must specify --dateend')
412
+ if args.region == 'undefined':
413
+ raise ValueError('You must specify --region')
414
+ if args.family == 'undefined':
415
+ raise ValueError('You must specify --family')
416
+ if args.flags_criteria == 'undefined':
417
+ raise ValueError('You must specify --flags_criteria')
418
+ # if args.fonction == 'undefined':
419
+ # raise ValueError('You must specify --fonction')
420
+
421
+ #Call your function with the arguments
422
+ sys.exit(make_cardio (args.path_experience_files,
423
+ args.experience_name,
424
+ args.pathwork,
425
+ args.datestart,
426
+ args.dateend,
427
+ args.region,
428
+ args.family,
429
+ args.flags_criteria,
430
+ args.fonction,
431
+ args.id_stn,
432
+ args.channel,
433
+ args.plot_type,
434
+ args.plot_title,
435
+ args.n_cpus))
436
+
437
+ if __name__ == '__main__':
438
+ args = arg_call()