roc-film 1.13.5__py3-none-any.whl → 1.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- roc/__init__.py +2 -1
- roc/film/__init__.py +2 -2
- roc/film/commands.py +372 -323
- roc/film/config/__init__.py +0 -1
- roc/film/constants.py +101 -65
- roc/film/descriptor.json +127 -96
- roc/film/exceptions.py +28 -27
- roc/film/tasks/__init__.py +16 -16
- roc/film/tasks/cat_solo_hk.py +86 -74
- roc/film/tasks/cdf_postpro.py +438 -309
- roc/film/tasks/check_dds.py +39 -45
- roc/film/tasks/db_to_anc_bia_sweep_table.py +381 -0
- roc/film/tasks/dds_to_l0.py +232 -180
- roc/film/tasks/export_solo_coord.py +147 -0
- roc/film/tasks/file_handler.py +124 -70
- roc/film/tasks/l0_to_hk.py +117 -103
- roc/film/tasks/l0_to_l1_bia_current.py +44 -30
- roc/film/tasks/l0_to_l1_bia_sweep.py +417 -329
- roc/film/tasks/l0_to_l1_sbm.py +250 -208
- roc/film/tasks/l0_to_l1_surv.py +185 -130
- roc/film/tasks/make_daily_tm.py +40 -37
- roc/film/tasks/merge_tcreport.py +77 -71
- roc/film/tasks/merge_tmraw.py +101 -88
- roc/film/tasks/parse_dds_xml.py +21 -20
- roc/film/tasks/set_l0_utc.py +51 -49
- roc/film/tests/cdf_compare.py +565 -0
- roc/film/tests/hdf5_compare.py +84 -62
- roc/film/tests/test_dds_to_l0.py +93 -51
- roc/film/tests/test_dds_to_tc.py +8 -11
- roc/film/tests/test_dds_to_tm.py +8 -10
- roc/film/tests/test_film.py +161 -116
- roc/film/tests/test_l0_to_hk.py +64 -36
- roc/film/tests/test_l0_to_l1_bia.py +10 -14
- roc/film/tests/test_l0_to_l1_sbm.py +14 -19
- roc/film/tests/test_l0_to_l1_surv.py +68 -41
- roc/film/tests/test_metadata.py +21 -20
- roc/film/tests/tests.py +743 -396
- roc/film/tools/__init__.py +5 -5
- roc/film/tools/dataset_tasks.py +34 -2
- roc/film/tools/file_helpers.py +402 -271
- roc/film/tools/l0.py +402 -324
- roc/film/tools/metadata.py +147 -127
- roc/film/tools/skeleton.py +12 -17
- roc/film/tools/tools.py +109 -92
- roc/film/tools/xlsx2skt.py +161 -139
- {roc_film-1.13.5.dist-info → roc_film-1.14.1.dist-info}/LICENSE +127 -125
- roc_film-1.14.1.dist-info/METADATA +60 -0
- roc_film-1.14.1.dist-info/RECORD +50 -0
- {roc_film-1.13.5.dist-info → roc_film-1.14.1.dist-info}/WHEEL +1 -1
- roc/film/tasks/l0_to_anc_bia_sweep_table.py +0 -348
- roc_film-1.13.5.dist-info/METADATA +0 -120
- roc_film-1.13.5.dist-info/RECORD +0 -48
roc/film/tools/file_helpers.py
CHANGED
@@ -1,48 +1,64 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
# -*- coding: utf-8 -*-
|
3
|
-
|
3
|
+
|
4
4
|
import os
|
5
5
|
from glob import glob
|
6
6
|
from datetime import datetime, timedelta
|
7
7
|
import uuid
|
8
|
+
import calendar
|
8
9
|
|
9
10
|
import h5py
|
10
11
|
import numpy as np
|
12
|
+
from spacepy.pycdf import CDF
|
11
13
|
|
12
14
|
from poppy.core import MissingArgument
|
13
15
|
from poppy.core.logger import logger
|
16
|
+
from poppy.core.task import Task
|
17
|
+
from poppy.core.pipeline import Pipeline
|
14
18
|
|
15
19
|
from roc.rpl.time import Time
|
16
20
|
|
17
21
|
# Import methods to extract data from RPW packets
|
22
|
+
from roc.film.tools.dataset_tasks import dataset_func
|
18
23
|
|
19
24
|
from roc.rap.tasks.utils import order_by_increasing_time
|
20
25
|
|
21
|
-
from roc.film.tools.metadata import
|
22
|
-
|
26
|
+
from roc.film.tools.metadata import (
|
27
|
+
init_cdf_global,
|
28
|
+
set_logical_file_id,
|
29
|
+
get_spice_kernels,
|
30
|
+
)
|
31
|
+
from roc.film.exceptions import (
|
32
|
+
UnknownPipeline,
|
33
|
+
LoadDataSetError,
|
34
|
+
NoData,
|
35
|
+
)
|
23
36
|
from roc.film.tools import valid_data_version, get_datasets
|
24
|
-
from roc.film.constants import
|
37
|
+
from roc.film.constants import (
|
38
|
+
TIME_ISO_STRFORMAT,
|
39
|
+
CDF_TRANGE_STRFORMAT,
|
40
|
+
TIME_DAILY_STRFORMAT,
|
41
|
+
INPUT_DATETIME_STRFTIME,
|
42
|
+
)
|
25
43
|
|
26
44
|
# Import methods to extract data from RPW packets
|
27
|
-
from roc.film.tools.dataset_tasks import *
|
28
|
-
|
29
|
-
__all__ = ['build_file_basename',
|
30
|
-
'generate_filepath',
|
31
|
-
'put_cdf_global',
|
32
|
-
'is_packet',
|
33
|
-
'put_cdf_zvars',
|
34
|
-
'l0_to_trange_cdf',
|
35
|
-
'get_l0_file',
|
36
|
-
'get_l0_files',
|
37
|
-
'get_output_dir',
|
38
|
-
'get_master_cdf_dir',
|
39
|
-
'is_output_dir']
|
40
|
-
|
41
|
-
from roc.film.tools.tools import extract_file_fields
|
42
45
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
+
__all__ = [
|
47
|
+
"build_file_basename",
|
48
|
+
"generate_filepath",
|
49
|
+
"put_cdf_global",
|
50
|
+
"is_packet",
|
51
|
+
"put_cdf_zvars",
|
52
|
+
"l0_to_trange_cdf",
|
53
|
+
"get_l0_file",
|
54
|
+
"get_l0_files",
|
55
|
+
"get_output_dir",
|
56
|
+
"get_master_cdf_dir",
|
57
|
+
"is_output_dir",
|
58
|
+
]
|
59
|
+
|
60
|
+
|
61
|
+
def build_file_basename(metadata: dict, is_cdag: bool = False) -> str:
|
46
62
|
"""
|
47
63
|
"Build Solar Orbiter convention basename (without extension)
|
48
64
|
using metadata.
|
@@ -50,43 +66,50 @@ def build_file_basename(metadata,
|
|
50
66
|
See SOL-SGS-TN-0009 for more details about SolO data standards.
|
51
67
|
|
52
68
|
:param metadata: dictionary contains output metadata used to build filename
|
69
|
+
:type metadata: dict
|
53
70
|
:param is_cdag: If True, add a '-cdag' suffix to the descriptor field of the filename
|
54
|
-
:
|
71
|
+
:type is_cdag: bool
|
72
|
+
:return: RPW file basename
|
73
|
+
:rtype: str
|
55
74
|
"""
|
56
75
|
|
57
76
|
# if Logical_file_id attribute exists, it should contain
|
58
77
|
# the file name without the extension
|
59
|
-
if not is_cdag and
|
60
|
-
return str(metadata[
|
78
|
+
if not is_cdag and "Logical_file_id" in metadata:
|
79
|
+
return str(metadata["Logical_file_id"])
|
61
80
|
|
62
81
|
# Add -cdag suffix if input is_cdag=True
|
63
82
|
if not is_cdag:
|
64
|
-
cdag_suffix =
|
83
|
+
cdag_suffix = ""
|
65
84
|
else:
|
66
|
-
cdag_suffix =
|
85
|
+
cdag_suffix = "-cdag"
|
67
86
|
|
68
87
|
# Else build the file basename from scratch
|
69
88
|
# file basename mandatory fields
|
70
89
|
fields = [
|
71
|
-
str(metadata[
|
72
|
-
str(metadata[
|
73
|
-
str(metadata[
|
74
|
-
str(metadata[
|
75
|
-
|
90
|
+
str(metadata["Source_name"]).split(">")[0].lower(),
|
91
|
+
str(metadata["LEVEL"]).split(">")[0],
|
92
|
+
str(metadata["Descriptor"]).split(">")[0].lower() + cdag_suffix,
|
93
|
+
str(metadata["Datetime"]),
|
94
|
+
"V" + str(metadata["Data_version"]),
|
76
95
|
]
|
77
96
|
|
78
97
|
# Add free_field at the end of the file basename if it exists
|
79
|
-
free_field = metadata.get(
|
80
|
-
if free_field:
|
81
|
-
fields.append(str(metadata[
|
98
|
+
free_field = metadata.get("Free_field", "")
|
99
|
+
if free_field and str(free_field).lower().strip() not in ["none", ""]:
|
100
|
+
fields.append(str(metadata["Free_field"]))
|
82
101
|
|
83
|
-
return
|
102
|
+
return "_".join(fields)
|
84
103
|
|
85
104
|
|
86
|
-
def generate_filepath(
|
87
|
-
|
88
|
-
|
89
|
-
|
105
|
+
def generate_filepath(
|
106
|
+
task: type[Task],
|
107
|
+
metadata: dict,
|
108
|
+
extension: str,
|
109
|
+
output_dir: str = None,
|
110
|
+
is_cdag: bool = False,
|
111
|
+
overwrite: bool = False,
|
112
|
+
) -> str:
|
90
113
|
"""
|
91
114
|
Generate output filepath from input task and metadata info
|
92
115
|
|
@@ -96,12 +119,13 @@ def generate_filepath(task, metadata, extension,
|
|
96
119
|
:param output_dir: Directory path of the output file. (If not passed, then try to get it from pipeline properties)
|
97
120
|
:param is_cdag: If True, add a '-cdag' suffix in the descriptor of the filename
|
98
121
|
:param overwrite: If True, overwrite existing file
|
99
|
-
:return:
|
122
|
+
:return: Output file path
|
123
|
+
:rtype: str
|
100
124
|
"""
|
101
125
|
|
102
126
|
# Add dot '.' to the extension if not provided
|
103
|
-
if not extension.startswith(
|
104
|
-
extension =
|
127
|
+
if not extension.startswith("."):
|
128
|
+
extension = "." + extension
|
105
129
|
|
106
130
|
# Build output filepath from pipeline task properties, metadata
|
107
131
|
# and extension
|
@@ -109,85 +133,88 @@ def generate_filepath(task, metadata, extension,
|
|
109
133
|
|
110
134
|
if not output_dir:
|
111
135
|
output_dir = get_output_dir(task.pipeline)
|
112
|
-
filepath = os.path.join(output_dir,
|
113
|
-
filename)
|
136
|
+
filepath = os.path.join(output_dir, filename)
|
114
137
|
|
115
138
|
# check if the file to generate is already existing, and remove it
|
116
139
|
# if --overwrite input keyword is set
|
117
140
|
if os.path.isfile(filepath) and overwrite:
|
118
|
-
logger.warning(f
|
141
|
+
logger.warning(f"Existing {filepath} will be overwritten!")
|
119
142
|
os.remove(filepath)
|
120
143
|
elif os.path.isfile(filepath):
|
121
|
-
logger.info(
|
144
|
+
logger.info(
|
145
|
+
f"{filepath} already exists, create a new version of the data file."
|
146
|
+
)
|
122
147
|
# Else, if the output file already exists, create a new
|
123
148
|
# version of the file (i.e., increment the data_version)
|
124
149
|
|
125
150
|
# Get file basename (without version and extension)
|
126
|
-
data_version = metadata[
|
127
|
-
basename = os.path.basename(filename).split(f
|
151
|
+
data_version = metadata["Data_version"]
|
152
|
+
basename = os.path.basename(filename).split(f"_V{data_version}")[0]
|
128
153
|
|
129
154
|
# Check number of files in the output directory which have the
|
130
155
|
# same basename
|
131
|
-
pattern = os.path.join(output_dir, basename +
|
156
|
+
pattern = os.path.join(output_dir, basename + "*" + extension)
|
132
157
|
file_number = len(glob(pattern))
|
133
158
|
|
134
159
|
# Increment the data_version
|
135
|
-
metadata[
|
160
|
+
metadata["Data_version"] = valid_data_version(file_number + 1)
|
136
161
|
|
137
162
|
# Update Logical_file_id
|
138
|
-
metadata[
|
163
|
+
metadata["Logical_file_id"] = set_logical_file_id(metadata)
|
139
164
|
|
140
165
|
# Update filepath
|
141
166
|
filename = build_file_basename(metadata, is_cdag=is_cdag) + extension
|
142
167
|
output_dir = get_output_dir(task.pipeline)
|
143
|
-
filepath = os.path.join(output_dir,
|
144
|
-
|
145
|
-
logger.debug(f'New file version V{metadata["Data_version"]} has been defined')
|
168
|
+
filepath = os.path.join(output_dir, filename)
|
169
|
+
logger.debug(f"New file version V{metadata['Data_version']} has been defined")
|
146
170
|
|
147
171
|
logger.debug(f'Output file basename has been generated from metadata: "{filename}"')
|
148
172
|
|
149
173
|
return filepath
|
150
174
|
|
151
175
|
|
152
|
-
def get_l0_file(pipeline):
|
176
|
+
def get_l0_file(pipeline) -> str:
|
153
177
|
try:
|
154
178
|
return pipeline.args.l0_file[0]
|
155
|
-
except:
|
179
|
+
except Exception:
|
156
180
|
# If not defined as input argument, then assume that it is already
|
157
181
|
# defined as target input
|
158
182
|
pass
|
159
183
|
|
160
184
|
|
161
|
-
def get_l0_files(pipeline):
|
185
|
+
def get_l0_files(pipeline: Pipeline) -> list:
|
162
186
|
try:
|
163
187
|
l0_files = pipeline.args.l0_files
|
164
188
|
if not isinstance(l0_files, list):
|
165
189
|
l0_files = [l0_files]
|
166
190
|
return l0_files
|
167
|
-
except:
|
191
|
+
except Exception:
|
168
192
|
# If not defined as input argument, then assume that it is already
|
169
193
|
# defined as target input
|
170
194
|
pass
|
171
195
|
|
172
196
|
|
173
|
-
def put_cdf_global(cdf, metadata):
|
197
|
+
def put_cdf_global(cdf: CDF, metadata: dict) -> bool:
|
174
198
|
"""
|
175
199
|
Write the global attributes into the input CDF.
|
176
200
|
|
177
201
|
:param cdf: input CDF object
|
178
|
-
:
|
202
|
+
:type cdf: spacepy.pycdf.CDF
|
203
|
+
:param metadata: Global attributes to save in the CDF
|
204
|
+
:type metadata: dict
|
179
205
|
:return: True, if succeeded, else raise an exception
|
206
|
+
:rtype: bool
|
180
207
|
"""
|
181
208
|
|
182
209
|
for key, value in metadata.items():
|
183
|
-
if not
|
184
|
-
logger.debug(f
|
210
|
+
if key not in cdf.attrs:
|
211
|
+
logger.debug(f"{key} global attribute not found in CDF: force insertion!")
|
185
212
|
cdf.attrs[key] = value
|
186
213
|
|
187
214
|
return True
|
188
215
|
|
189
216
|
|
190
|
-
def get_master_cdf_dir(task):
|
217
|
+
def get_master_cdf_dir(task: type[Task]) -> str:
|
191
218
|
"""
|
192
219
|
Try to load the master_dir directory from :
|
193
220
|
1. the input argument --master-cdf-dir
|
@@ -195,85 +222,91 @@ def get_master_cdf_dir(task):
|
|
195
222
|
3. the OS environment
|
196
223
|
If it does not exist, set to ".".
|
197
224
|
|
198
|
-
:param task:
|
199
|
-
:
|
225
|
+
:param task: Poppy pipeline task instance
|
226
|
+
:type task: Task
|
227
|
+
:return: Master CDF directory
|
228
|
+
:rtype: str
|
200
229
|
"""
|
201
|
-
|
202
|
-
master_cdf_dir = task.pipeline.get('master_cdf_dir', default=None)
|
230
|
+
master_cdf_dir = task.pipeline.get("master_cdf_dir", default=None)
|
203
231
|
|
204
232
|
if master_cdf_dir is None:
|
205
233
|
# 2. Else from the config.json
|
206
|
-
if
|
234
|
+
if (
|
235
|
+
"RPW_CDF_MASTER_PATH"
|
236
|
+
in task.pipeline.properties.configuration["environment"]
|
237
|
+
):
|
207
238
|
master_cdf_dir = task.pipeline.properties.configuration[
|
208
|
-
|
239
|
+
"environment.RPW_CDF_MASTER_PATH"
|
240
|
+
]
|
209
241
|
# 3. Else from the OS environment
|
210
|
-
elif
|
211
|
-
master_cdf_dir = os.environ[
|
242
|
+
elif "RPW_CDF_MASTER_PATH" in os.environ:
|
243
|
+
master_cdf_dir = os.environ["RPW_CDF_MASTER_PATH"]
|
212
244
|
# Otherwise raise an exception
|
213
245
|
else:
|
214
|
-
raise MissingArgument(
|
246
|
+
raise MissingArgument("No value found for master_cdf_dir!")
|
215
247
|
else:
|
216
248
|
master_cdf_dir = master_cdf_dir[0]
|
217
249
|
|
218
250
|
return master_cdf_dir
|
219
251
|
|
220
252
|
|
221
|
-
def get_output_dir(pipeline):
|
253
|
+
def get_output_dir(pipeline: Pipeline) -> str:
|
222
254
|
"""
|
223
255
|
Generate the output directory from the information provided in the
|
224
256
|
pipeline properties and metadata.
|
225
257
|
|
226
|
-
:param
|
227
|
-
:
|
258
|
+
:param pipeline: POPPy pipeline instance
|
259
|
+
:type pipeline: Poppy Task class
|
228
260
|
:return: output_dir
|
261
|
+
:rtype: str
|
229
262
|
"""
|
230
263
|
|
231
|
-
# Initialize output
|
232
|
-
output_dir = None
|
233
|
-
|
234
264
|
# get pipeline id (can be "RGTS" or "RODP")
|
235
|
-
pipeline_id = pipeline.properties.configuration[
|
265
|
+
pipeline_id = pipeline.properties.configuration["environment.ROC_PIP_NAME"]
|
236
266
|
|
237
267
|
# Case for the ROC Ground Test SGSE (RGTS)
|
238
|
-
if pipeline_id ==
|
268
|
+
if pipeline_id == "RGTS":
|
239
269
|
# Generate output directory for current test
|
240
270
|
try:
|
241
271
|
output_dir = pipeline.args.test_log.output_directory(pipeline)
|
242
|
-
except:
|
272
|
+
except Exception as e:
|
273
|
+
logger.debug(e)
|
243
274
|
output_dir = pipeline.output
|
244
275
|
|
245
276
|
# Case for the RPW Operation and Data Pipeline (RODP)
|
246
|
-
elif pipeline_id ==
|
277
|
+
elif pipeline_id == "RODP":
|
247
278
|
# First get the input LZ File object from the properties
|
248
279
|
try:
|
249
280
|
output_dir = pipeline.output
|
250
|
-
except:
|
251
|
-
|
281
|
+
except Exception:
|
282
|
+
logger.error("NO OUTPUT DIRECTORY DEFINED, ABORTING!")
|
283
|
+
raise
|
252
284
|
else:
|
253
|
-
raise UnknownPipeline(f
|
254
|
-
f' {pipeline_id}, ABORTING!')
|
285
|
+
raise UnknownPipeline(f"UNKNOWN PIPELINE TYPE: {pipeline_id}, ABORTING!")
|
255
286
|
|
256
287
|
return output_dir
|
257
288
|
|
258
289
|
|
259
|
-
def get_products_dir(pipeline):
|
290
|
+
def get_products_dir(pipeline: Pipeline) -> str:
|
260
291
|
"""
|
261
292
|
Get the path of the directory where final products must be moved.
|
262
293
|
|
263
294
|
:param pipeline: POPPy pipeline instance
|
295
|
+
:type pipeline: Poppy Pipeline class
|
264
296
|
:return: string containing the products directory path
|
297
|
+
:rtype: str
|
265
298
|
"""
|
266
|
-
|
267
|
-
products_dir = pipeline.get('products_dir', default=None, args=True)
|
299
|
+
products_dir = pipeline.get("products_dir", default=None, args=True)
|
268
300
|
|
269
301
|
if products_dir is None:
|
270
302
|
# 2. Else from the config.json
|
271
|
-
if
|
303
|
+
if "ROC_PRODUCTS_PATH" in pipeline.properties.configuration["environment"]:
|
272
304
|
products_dir = pipeline.properties.configuration[
|
273
|
-
|
305
|
+
"environment.ROC_PRODUCTS_PATH"
|
306
|
+
]
|
274
307
|
# 3. Else from the OS environment
|
275
|
-
elif
|
276
|
-
products_dir = os.environ[
|
308
|
+
elif "ROC_PRODUCTS_PATH" in os.environ:
|
309
|
+
products_dir = os.environ["ROC_PRODUCTS_PATH"]
|
277
310
|
# Otherwise return "."
|
278
311
|
else:
|
279
312
|
products_dir = None
|
@@ -283,36 +316,38 @@ def get_products_dir(pipeline):
|
|
283
316
|
return products_dir
|
284
317
|
|
285
318
|
|
286
|
-
def is_output_dir(output_dir, products_dir=None):
|
319
|
+
def is_output_dir(output_dir: str, products_dir: str = None) -> bool:
|
287
320
|
"""
|
288
321
|
Check if the output directory exists and if its basename is found in the
|
289
322
|
products_dir.
|
290
323
|
|
291
324
|
:param output_dir: String containing output directory
|
325
|
+
:type output_dir: str
|
292
326
|
:param products_dir: String containing products directory
|
293
327
|
(if provided, check if output_dir basename
|
294
328
|
is already saved inside)
|
329
|
+
:type products_dir: str
|
295
330
|
:return: True if output_dir is found, False otherwise
|
331
|
+
:rtype: bool
|
296
332
|
"""
|
297
333
|
|
298
334
|
# Get output_dir value
|
299
335
|
if output_dir:
|
300
336
|
output_dir_basename = os.path.basename(output_dir)
|
301
337
|
else:
|
302
|
-
raise MissingArgument(
|
338
|
+
raise MissingArgument("Output directory is not defined!")
|
303
339
|
|
304
340
|
# Check if output_dir already exists
|
305
341
|
if os.path.isdir(output_dir):
|
306
|
-
logger.debug(f
|
342
|
+
logger.debug(f"{output_dir} already created")
|
307
343
|
return True
|
308
344
|
|
309
345
|
# Check products_dir
|
310
346
|
if products_dir:
|
311
347
|
# Build target directory path
|
312
|
-
target_dir = os.path.join(
|
313
|
-
products_dir, os.path.basename(output_dir))
|
348
|
+
target_dir = os.path.join(products_dir, os.path.basename(output_dir))
|
314
349
|
if os.path.isdir(target_dir):
|
315
|
-
logger.debug(f
|
350
|
+
logger.debug(f"{output_dir_basename} already found in {products_dir}")
|
316
351
|
return True
|
317
352
|
else:
|
318
353
|
logger.debug("Input argument 'products_dir' is not defined")
|
@@ -320,13 +355,16 @@ def is_output_dir(output_dir, products_dir=None):
|
|
320
355
|
return False
|
321
356
|
|
322
357
|
|
323
|
-
def is_packet(expected_packets, packets):
|
358
|
+
def is_packet(expected_packets: list, packets: list) -> bool:
|
324
359
|
"""
|
325
360
|
Check if packet(s) is/are in the input packet_list
|
326
361
|
|
327
362
|
:param expected_packets: Name of the packet(s) expected for the dataset
|
328
|
-
:
|
363
|
+
:type expected_packets: list
|
364
|
+
:param packets: Input packet(s) provided as a h5 group
|
365
|
+
:type packets: list
|
329
366
|
:return: True if at least one expected packet found, False if all expected packets not found
|
367
|
+
:rtype: bool
|
330
368
|
"""
|
331
369
|
|
332
370
|
if not isinstance(expected_packets, list):
|
@@ -340,51 +378,58 @@ def is_packet(expected_packets, packets):
|
|
340
378
|
return False
|
341
379
|
|
342
380
|
|
343
|
-
def put_cdf_zvars(
|
344
|
-
|
345
|
-
|
381
|
+
def put_cdf_zvars(
|
382
|
+
cdf: CDF, data: np.ndarray, start_time: datetime = None, end_time: datetime = None
|
383
|
+
) -> tuple:
|
346
384
|
"""
|
347
385
|
Write input data into CDF zVariable
|
348
386
|
|
349
387
|
:param cdf: pycdf.CDF object to update
|
350
|
-
:
|
388
|
+
:type cdf: spacepy.pycdf.CDF
|
389
|
+
:param data: zVariable data to write into the CDF
|
390
|
+
:type data: np.ndarray
|
351
391
|
:param start_time: only store data after start_time
|
392
|
+
:type start_time: datetime
|
352
393
|
:param end_time: only store date before end_time
|
353
|
-
:
|
394
|
+
:type end_time: datetime
|
395
|
+
:return: time_min, time_max and nrec:
|
396
|
+
:rtype: tuple
|
354
397
|
"""
|
355
398
|
|
356
399
|
# check size of the data
|
357
400
|
nrec = data.shape[0]
|
358
401
|
if nrec == 0:
|
359
|
-
raise NoData(
|
360
|
-
|
402
|
+
raise NoData(
|
403
|
+
message="Data for {0} is empty".format(cdf.pathname), ll=logger.warning
|
404
|
+
)
|
361
405
|
|
362
406
|
# Check that 'epoch' variable exists, convert it to tt2000 and filter data
|
363
407
|
# between start_time/end_time if required
|
364
408
|
try:
|
365
|
-
epoch = data[
|
366
|
-
except:
|
367
|
-
|
368
|
-
|
409
|
+
epoch = data["epoch"][:].astype(float)
|
410
|
+
except KeyError:
|
411
|
+
logger.error('No valid "epoch" variable found in the input data')
|
412
|
+
raise
|
369
413
|
else:
|
414
|
+
# Instantiate roc.rpl.time.Time class
|
415
|
+
time_instance = Time()
|
370
416
|
|
371
417
|
# Get start_time in TT2000
|
372
418
|
if start_time:
|
373
419
|
# Convert start_time into TT2000
|
374
|
-
start_time_tt2000 = float(
|
420
|
+
start_time_tt2000 = float(time_instance.utc_to_tt2000(start_time))
|
375
421
|
else:
|
376
422
|
# By default, get lowest possible value for TT2000 datatype
|
377
|
-
start_time_tt2000 = -2**64
|
423
|
+
start_time_tt2000 = -(2**64)
|
378
424
|
|
379
425
|
# Get end_time in TT2000
|
380
426
|
if end_time:
|
381
427
|
# Convert end_time into TT2000
|
382
|
-
end_time_tt2000 = float(
|
428
|
+
end_time_tt2000 = float(time_instance.utc_to_tt2000(end_time))
|
383
429
|
else:
|
384
430
|
# By default, get highest possible value for TT2000 datatype
|
385
431
|
end_time_tt2000 = 2**64
|
386
432
|
|
387
|
-
|
388
433
|
# Define array indices to keep between start_time/end_time
|
389
434
|
idx = (epoch >= start_time_tt2000) & (epoch <= end_time_tt2000)
|
390
435
|
|
@@ -399,64 +444,98 @@ def put_cdf_zvars(cdf, data,
|
|
399
444
|
# Fill Epoch CDF zVariable
|
400
445
|
epoch_min = epoch.min()
|
401
446
|
epoch_max = epoch.max()
|
402
|
-
cdf[
|
403
|
-
cdf[
|
404
|
-
|
447
|
+
cdf["Epoch"] = epoch
|
448
|
+
cdf["Epoch"].attrs["SCALEMIN"] = time_instance.tt2000_to_utc(
|
449
|
+
epoch_min, to_datetime=True
|
450
|
+
)
|
451
|
+
cdf["Epoch"].attrs["SCALEMAX"] = time_instance.tt2000_to_utc(
|
452
|
+
epoch_max, to_datetime=True
|
453
|
+
)
|
405
454
|
|
406
455
|
# Fill other CDF zVariables
|
407
456
|
for i, name in enumerate(data.dtype.names):
|
408
|
-
#
|
409
|
-
if name.lower() ==
|
457
|
+
# skip if epoch (already processed above)
|
458
|
+
if name.lower() == "epoch":
|
410
459
|
continue
|
411
460
|
else:
|
412
|
-
logger.debug(f
|
413
|
-
|
414
|
-
|
461
|
+
logger.debug(f"Writing {nrec} records for {name} zVariable...")
|
462
|
+
|
463
|
+
if name.lower() == "quality_bitmask":
|
464
|
+
# Make sure quality_bitmask is a UINT16
|
465
|
+
data_i = data[name][idx].astype(np.uint16)
|
466
|
+
else:
|
467
|
+
data_i = data[name][idx]
|
415
468
|
|
469
|
+
# Write data into the zVariable
|
416
470
|
cdf[name.upper()] = data_i
|
417
471
|
|
418
472
|
# Get min/max value of the current zVariable
|
419
|
-
cdf[name.upper()].attrs[
|
420
|
-
cdf[name.upper()].attrs[
|
473
|
+
cdf[name.upper()].attrs["SCALEMIN"] = data_i.min()
|
474
|
+
cdf[name.upper()].attrs["SCALEMAX"] = data_i.max()
|
421
475
|
|
422
476
|
# Set quality_flag
|
423
477
|
logger.debug('Set "QUALITY_FLAG" zVar default value to 3')
|
424
|
-
cdf[
|
425
|
-
cdf[
|
426
|
-
cdf[
|
478
|
+
cdf["QUALITY_FLAG"] = np.full(nrec, 3, dtype=np.uint8)
|
479
|
+
cdf["QUALITY_FLAG"].attrs["SCALEMIN"] = 0
|
480
|
+
cdf["QUALITY_FLAG"].attrs["SCALEMAX"] = 5
|
427
481
|
|
428
482
|
return epoch_min, epoch_max, nrec
|
429
483
|
|
430
484
|
|
431
|
-
def l0_to_trange_cdf(
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
485
|
+
def l0_to_trange_cdf(
|
486
|
+
task: type[Task],
|
487
|
+
task_name: str,
|
488
|
+
l0_file_list: list,
|
489
|
+
output_dir: str,
|
490
|
+
time_instance: Time = None,
|
491
|
+
start_time: datetime = None,
|
492
|
+
end_time: datetime = None,
|
493
|
+
failed_files: list = None,
|
494
|
+
processed_files: list = None,
|
495
|
+
monthly: bool = False,
|
496
|
+
unique: bool = False,
|
497
|
+
overwrite: bool = False,
|
498
|
+
is_cdag: bool = True,
|
499
|
+
):
|
440
500
|
"""
|
441
501
|
Task to generate time range CDF from l0 file(s)
|
442
502
|
|
443
|
-
:param task: instance of the task
|
503
|
+
:param task: instance of the task:
|
504
|
+
:type task: Poppy Task class type object
|
444
505
|
:param task_name: string containing the name of the task (as defined in descriptor)
|
445
|
-
:
|
506
|
+
:type task_name: str
|
507
|
+
:param l0_file_list: Input L0 files
|
508
|
+
:type l0_file_list: list
|
446
509
|
:param output_dir: path of the output directory
|
510
|
+
:type output_dir: str
|
511
|
+
:param time_instance: roc.rpl.time.Time object:
|
512
|
+
:type time_instance: roc.rpl.time.Time
|
447
513
|
:param start_time: start time of the data written in the output CDF
|
514
|
+
:type start_time: datetime
|
448
515
|
:param end_time: end time of the data written in the output CDF
|
516
|
+
:type end_time: datetime
|
517
|
+
:param failed_files: CDF files for which saving data has failed
|
518
|
+
:type failed_files: list
|
519
|
+
:param processed_files: CDF files for which saving data has succeeded
|
520
|
+
:type processed_files: list
|
449
521
|
:param monthly: Produce monthly file (Datetime format will be YYYYMMDD1-YYYYMMDD2,
|
450
522
|
where YYYYMMDD1 is the first day of the month and YYYYMMDD2 is the last day).
|
451
523
|
Month number is extracted from start_time value.
|
524
|
+
:type monthly: bool
|
452
525
|
:param unique: If True, make sure that return data are uniquely stored
|
526
|
+
:type unique: bool
|
453
527
|
:param overwrite: If True, overwrite existing output files
|
454
|
-
:
|
528
|
+
:type overwrite: bool
|
529
|
+
:param is_cdag: If True, generate 'CDAG' output files (CDAG == private data files)
|
530
|
+
:type is_cdag: bool
|
455
531
|
:return: output CDF filepath if it has been successfully generated, None otherwise
|
532
|
+
:rtype: str
|
456
533
|
"""
|
457
534
|
|
458
|
-
|
459
|
-
|
535
|
+
if not processed_files:
|
536
|
+
processed_files = []
|
537
|
+
if not failed_files:
|
538
|
+
failed_files = []
|
460
539
|
|
461
540
|
# Initialize output list (containing filepath)
|
462
541
|
output_file_list = []
|
@@ -468,15 +547,20 @@ def l0_to_trange_cdf(task, task_name, l0_file_list, output_dir,
|
|
468
547
|
# the output filename
|
469
548
|
# (used to indicate preliminary files to distributed to the CDAG members only)
|
470
549
|
if is_cdag:
|
471
|
-
logger.info('Producing "cdag" output CDF')
|
550
|
+
logger.info(f'Producing "cdag" output CDF [{task.job_id}]')
|
472
551
|
|
473
552
|
# Retrieve list of output datasets to produce for the given task
|
474
553
|
try:
|
475
554
|
dataset_list = get_datasets(task, task_name)
|
476
|
-
except:
|
477
|
-
raise LoadDataSetError(
|
555
|
+
except Exception:
|
556
|
+
raise LoadDataSetError(
|
557
|
+
f"Cannot load the list of datasets to produce for {task_name} [{task.job_id}]"
|
558
|
+
)
|
478
559
|
else:
|
479
|
-
logger.debug(
|
560
|
+
logger.debug(
|
561
|
+
"Produce L1 CDF file(s) for the following dataset(s):"
|
562
|
+
f" {[ds['name'] for ds in dataset_list]} [{task.job_id}]"
|
563
|
+
)
|
480
564
|
|
481
565
|
# Get list of input l0 file(s)
|
482
566
|
if not isinstance(l0_file_list, list):
|
@@ -488,107 +572,137 @@ def l0_to_trange_cdf(task, task_name, l0_file_list, output_dir,
|
|
488
572
|
l0_file_len = len(l0_file_list)
|
489
573
|
|
490
574
|
# Get L0 files time_min/time_max
|
491
|
-
|
575
|
+
try:
|
576
|
+
l0_time_min, l0_time_max = get_l0_trange(l0_file_list)
|
577
|
+
if None in l0_time_min or None in l0_time_max:
|
578
|
+
raise ValueError
|
579
|
+
except TypeError:
|
580
|
+
logger.error(f'Input "l0_files" must be a list!\t[{task.job_id}]')
|
581
|
+
return []
|
582
|
+
except ValueError:
|
583
|
+
logger.error(f"Output L0 time min. or max. list is not valid!\t[{task.job_id}]")
|
584
|
+
return []
|
492
585
|
|
493
586
|
# Get start_time for output CDF (use time min of L0 files if not defined)
|
494
587
|
if not start_time:
|
495
|
-
start_time = task.pipeline.get(
|
496
|
-
'start_time', default=[min(l0_time_min)])[0]
|
588
|
+
start_time = task.pipeline.get("start_time", default=[min(l0_time_min)])[0]
|
497
589
|
|
498
|
-
logger.debug(f
|
590
|
+
logger.debug(f"start_time value is {start_time}\t[{task.job_id}]")
|
499
591
|
|
500
592
|
# Get end_time for output CDF (use time max of L0 files if not defined)
|
501
593
|
if not end_time:
|
502
|
-
end_time = task.pipeline.get(
|
594
|
+
end_time = task.pipeline.get("end_time", default=[max(l0_time_max)])[0]
|
503
595
|
|
504
|
-
logger.debug(f
|
596
|
+
logger.debug(f"end_time value is {end_time}\t[{task.job_id}]")
|
505
597
|
|
506
598
|
# Loops over each output dataset to produce for the current task
|
507
599
|
for current_dataset in dataset_list:
|
600
|
+
dataset_name = current_dataset["name"]
|
601
|
+
data_descr = current_dataset["descr"]
|
602
|
+
data_version = current_dataset["version"]
|
508
603
|
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
logger.debug(f'Running file production for the dataset {dataset_name} (V{data_version})')
|
604
|
+
logger.debug(
|
605
|
+
"Running file production for the dataset "
|
606
|
+
f"{dataset_name} (V{data_version})\t[{task.job_id}]"
|
607
|
+
)
|
514
608
|
|
515
609
|
# get the path to the master CDF file of this dataset
|
516
610
|
master_cdf_dir = get_master_cdf_dir(task)
|
517
611
|
|
518
612
|
# Get master cdf filename from descriptor
|
519
|
-
master_cdf_file = data_descr[
|
613
|
+
master_cdf_file = data_descr["template"]
|
520
614
|
|
521
615
|
# Build master file pattern
|
522
|
-
master_pattern = os.path.join(master_cdf_dir,
|
523
|
-
master_cdf_file)
|
616
|
+
master_pattern = os.path.join(master_cdf_dir, master_cdf_file)
|
524
617
|
|
525
618
|
# Get master file path
|
526
619
|
master_path = glob(master_pattern)
|
527
620
|
|
528
621
|
# Check existence
|
529
622
|
if not master_path:
|
530
|
-
raise FileNotFoundError(
|
531
|
-
|
532
|
-
|
623
|
+
raise FileNotFoundError(
|
624
|
+
"{0} master CDF not found for the dataset {1}!\t[{2}]".format(
|
625
|
+
master_pattern, dataset_name, task.job_id
|
626
|
+
)
|
627
|
+
)
|
533
628
|
else:
|
534
629
|
master_path = sorted(master_path)[-1]
|
535
|
-
logger.info(
|
536
|
-
|
537
|
-
|
630
|
+
logger.info(
|
631
|
+
'Producing dataset "{0}" with the master CDF "{1}"\t[{2}]'.format(
|
632
|
+
dataset_name, master_path, task.job_id
|
633
|
+
)
|
634
|
+
)
|
538
635
|
|
539
636
|
# Initialize loop variables
|
540
637
|
data = np.empty(0)
|
541
|
-
nrec = 0
|
542
638
|
parents = []
|
543
639
|
# Loop over l0_files list
|
544
640
|
for i, l0_file in enumerate(l0_file_list):
|
545
|
-
|
546
|
-
with h5py.File(l0_file, 'r') as l0:
|
547
|
-
|
641
|
+
with h5py.File(l0_file, "r") as l0:
|
548
642
|
# Skip L0 file for which start_time/end_time is not inside the
|
549
643
|
# time range
|
550
644
|
if l0_time_max[i] < start_time or l0_time_min[i] > end_time:
|
551
|
-
logger.debug(
|
552
|
-
|
645
|
+
logger.debug(
|
646
|
+
f"{l0_file} is outside the time range: "
|
647
|
+
f"[{start_time}, {end_time}], skip it\t[{task.job_id}]"
|
648
|
+
)
|
553
649
|
continue
|
554
650
|
else:
|
555
|
-
logger.debug(
|
651
|
+
logger.debug(
|
652
|
+
f"Processing {l0_file} [{l0_file_len - i - 1}]\t[{task.job_id}]"
|
653
|
+
)
|
556
654
|
|
557
655
|
# Append current l0 file to parent list
|
558
|
-
parents.append(os.path.basename(l0_file))
|
656
|
+
parents.append(os.path.basename(str(l0_file)))
|
559
657
|
|
560
658
|
# Get TM packet(s) required to generate HK CDF for the current
|
561
659
|
# dataset
|
562
|
-
expected_packet = data_descr[
|
660
|
+
expected_packet = data_descr["packet"]
|
563
661
|
# Check that TM packet(s) are in the input l0 data
|
564
|
-
if
|
565
|
-
|
566
|
-
|
567
|
-
|
662
|
+
if not is_packet(expected_packet, l0["TM"]) and not is_packet(
|
663
|
+
expected_packet, l0["TC"]
|
664
|
+
):
|
665
|
+
logger.info(
|
666
|
+
f"No expected packet found for {dataset_name}"
|
667
|
+
f" in {l0_file} [{','.join(expected_packet)}]\t[{task.job_id}]"
|
668
|
+
)
|
568
669
|
continue
|
569
670
|
|
570
671
|
# Get function to process data
|
571
672
|
# IMPORTANT: function alias in import should have the same name
|
572
|
-
#
|
573
|
-
func =
|
673
|
+
# as the dataset alias in the descriptor
|
674
|
+
func = dataset_func.get(dataset_name)
|
675
|
+
if func is None:
|
676
|
+
logger.error(
|
677
|
+
f"No function found for {dataset_name}\t[{task.job_id}]"
|
678
|
+
)
|
679
|
+
failed_files.append(l0_file)
|
680
|
+
break
|
574
681
|
|
575
682
|
# call the dataset-related function
|
576
683
|
try:
|
577
|
-
logger.debug(f
|
684
|
+
logger.debug(f"Running {func}\t[{task.job_id}]")
|
578
685
|
result = func(l0, task)
|
579
|
-
except:
|
686
|
+
except Exception as e:
|
580
687
|
# TODO catch exception in the ROC database
|
581
|
-
logger.exception(
|
688
|
+
logger.exception(
|
689
|
+
f'Running "{func}" function has failed\t[{task.job_id}]: \n{e}'
|
690
|
+
)
|
582
691
|
# TODO - Add the current failed dataset processing to failed_files
|
583
|
-
|
692
|
+
failed_files.append(l0_file)
|
584
693
|
continue
|
585
694
|
|
586
695
|
# Make sure result is a numpy array and not a NoneType
|
587
696
|
if result is None or result.shape[0] == 0:
|
588
|
-
logger.debug(
|
589
|
-
|
697
|
+
logger.debug(
|
698
|
+
f"Returned {dataset_name} dataset array"
|
699
|
+
f" is empty for {l0_file}\t[{task.job_id}]"
|
700
|
+
)
|
590
701
|
else:
|
591
|
-
logger.debug(
|
702
|
+
logger.debug(
|
703
|
+
f"{result.shape[0]} {dataset_name} dataset samples"
|
704
|
+
f" returned from {l0_file}\t[{task.job_id}]"
|
705
|
+
)
|
592
706
|
|
593
707
|
# If data is empty
|
594
708
|
if data.shape[0] == 0:
|
@@ -601,116 +715,131 @@ def l0_to_trange_cdf(task, task_name, l0_file_list, output_dir,
|
|
601
715
|
# Checking resulting data length
|
602
716
|
nrec = data.shape[0]
|
603
717
|
if nrec == 0:
|
604
|
-
logger.warning(
|
718
|
+
logger.warning(
|
719
|
+
"No data for dataset"
|
720
|
+
f" {dataset_name}: skip output cdf creation\t[{task.job_id}]"
|
721
|
+
)
|
605
722
|
continue
|
606
723
|
|
607
724
|
# reorder the data by increasing time
|
608
725
|
data = order_by_increasing_time(data, unique=unique)
|
609
726
|
|
610
727
|
# Generation date
|
611
|
-
generation_date = datetime.utcnow().
|
612
|
-
logger.debug(
|
728
|
+
generation_date = datetime.utcnow().strftime(INPUT_DATETIME_STRFTIME)
|
729
|
+
logger.debug(
|
730
|
+
f'Set "Generation_date" attr. value to {generation_date}\t[{task.job_id}]'
|
731
|
+
)
|
613
732
|
|
614
733
|
# file ID
|
615
734
|
file_id = str(uuid.uuid4())
|
616
|
-
logger.debug(f'Set "File_ID" attr. value to {file_id}')
|
735
|
+
logger.debug(f'Set "File_ID" attr. value to {file_id}\t[{task.job_id}]')
|
617
736
|
|
618
737
|
# Re-define datetime and parents g.attribute for time range CDF data
|
619
738
|
# products
|
620
739
|
if monthly:
|
621
740
|
# Get number of days in the start_time month
|
622
|
-
|
623
|
-
mday_num = calendar.monthrange(
|
624
|
-
start_time.year, start_time.month)[1]
|
741
|
+
mday_num = calendar.monthrange(start_time.year, start_time.month)[1]
|
625
742
|
# Get latest day of the month
|
626
|
-
mday_end = datetime(start_time.year, start_time.month,
|
627
|
-
|
743
|
+
mday_end = datetime(start_time.year, start_time.month, 1) + timedelta(
|
744
|
+
days=mday_num - 1
|
745
|
+
)
|
628
746
|
# Build datetime metadata used to generate time ranged file name
|
629
|
-
l0_datetime =
|
630
|
-
|
747
|
+
l0_datetime = "-".join(
|
748
|
+
[
|
749
|
+
start_time.strftime(TIME_DAILY_STRFORMAT),
|
750
|
+
mday_end.strftime(TIME_DAILY_STRFORMAT),
|
751
|
+
]
|
752
|
+
)
|
631
753
|
else:
|
632
|
-
l0_datetime =
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
754
|
+
l0_datetime = "-".join(
|
755
|
+
[
|
756
|
+
start_time.strftime(CDF_TRANGE_STRFORMAT),
|
757
|
+
end_time.strftime(CDF_TRANGE_STRFORMAT),
|
758
|
+
]
|
759
|
+
)
|
760
|
+
l0_parents = "L0>" + ", ".join(parents)
|
637
761
|
|
638
762
|
# Set CDF global attributes using first l0_file metadata in the list
|
639
|
-
with h5py.File(l0_file_list[0],
|
640
|
-
metadata = init_cdf_global(
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
763
|
+
with h5py.File(l0_file_list[0], "r") as l0:
|
764
|
+
metadata = init_cdf_global(
|
765
|
+
l0.attrs,
|
766
|
+
task,
|
767
|
+
master_path,
|
768
|
+
overwrite={
|
769
|
+
"Datetime": l0_datetime,
|
770
|
+
"Parents": l0_parents,
|
771
|
+
"File_ID": file_id,
|
772
|
+
"Generation_date": generation_date,
|
773
|
+
"Data_version": data_version,
|
774
|
+
"MODS": data_descr["mods"],
|
775
|
+
},
|
776
|
+
)
|
649
777
|
|
650
778
|
# Generate output CDF filename and open it
|
651
|
-
filepath = generate_filepath(
|
652
|
-
|
779
|
+
filepath = generate_filepath(
|
780
|
+
task, metadata, "cdf", is_cdag=is_cdag, overwrite=overwrite
|
781
|
+
)
|
653
782
|
|
654
783
|
# Get the instance of the output target
|
655
784
|
target = task.outputs[dataset_name]
|
656
785
|
|
657
786
|
# Add SPICE SCLK kernel as an entry
|
658
787
|
# of the "Kernels" g. attr
|
659
|
-
sclk_file = get_spice_kernels(
|
660
|
-
|
788
|
+
sclk_file = get_spice_kernels(
|
789
|
+
time_instance=time_instance, pattern="solo_ANC_soc-sclk"
|
790
|
+
)
|
661
791
|
if sclk_file:
|
662
|
-
metadata[
|
792
|
+
metadata["SPICE_KERNELS"] = sclk_file[-1]
|
663
793
|
else:
|
664
|
-
logger.warning(
|
665
|
-
|
794
|
+
logger.warning(
|
795
|
+
f"No SPICE SCLK kernel saved for {filepath}\t[{task.job_id}]"
|
796
|
+
)
|
666
797
|
|
667
798
|
# open the target to update its status according to errors etc
|
668
799
|
with target.activate():
|
669
|
-
#
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
800
|
+
# create the file for the CDF containing results
|
801
|
+
with CDF(filepath, master_path) as cdf:
|
802
|
+
try:
|
803
|
+
# write zVariable data and associated variable attributes in
|
804
|
+
# the CDF
|
805
|
+
time_min, time_max, nrec = put_cdf_zvars(
|
806
|
+
cdf, data, start_time=start_time, end_time=end_time
|
807
|
+
)
|
808
|
+
if nrec > 0:
|
809
|
+
# Update TIME_MIN, TIME_MAX (in julian days)
|
810
|
+
metadata["TIME_MIN"] = (
|
811
|
+
str(time_instance.tt2000_to_utc(time_min)) + "Z"
|
812
|
+
).replace(" ", "T")
|
813
|
+
metadata["TIME_MAX"] = (
|
814
|
+
str(time_instance.tt2000_to_utc(time_max)) + "Z"
|
815
|
+
).replace(" ", "T")
|
816
|
+
|
817
|
+
# write global attribute entries on the CDF
|
818
|
+
put_cdf_global(cdf, metadata)
|
819
|
+
else:
|
820
|
+
logger.warning(
|
821
|
+
f"No data found between {start_time} and {end_time}"
|
822
|
+
f" to be written into {filepath}\t[{task.job_id}]"
|
823
|
+
)
|
824
|
+
|
825
|
+
except Exception as e:
|
826
|
+
logger.exception(
|
827
|
+
f"{filepath} production has failed\t[{task.job_id}]:\n{e}"
|
828
|
+
)
|
829
|
+
cdf.attrs["Validate"] = "-1"
|
698
830
|
failed_files.append(filepath)
|
699
|
-
finally:
|
700
|
-
if cdf:
|
701
|
-
cdf.close()
|
702
831
|
|
703
832
|
if nrec == 0:
|
704
|
-
logger.info(f
|
833
|
+
logger.info(f"Removing empty file {filepath}...\t[{task.job_id}]")
|
705
834
|
os.remove(filepath)
|
706
|
-
filepath =
|
835
|
+
filepath = ""
|
707
836
|
elif os.path.isfile(filepath):
|
708
837
|
processed_files.append(filepath)
|
709
|
-
logger.info(f
|
838
|
+
logger.info(f"{filepath} saved\t[{task.job_id}]")
|
710
839
|
output_file_list.append(filepath)
|
711
840
|
else:
|
712
841
|
failed_files.append(filepath)
|
713
|
-
logger.error(f
|
842
|
+
logger.error(f"Writing {filepath} has failed!\t[{task.job_id}]")
|
714
843
|
|
715
844
|
# Set output target filepath
|
716
845
|
target.filepath = filepath
|
@@ -718,19 +847,20 @@ def l0_to_trange_cdf(task, task_name, l0_file_list, output_dir,
|
|
718
847
|
return output_file_list
|
719
848
|
|
720
849
|
|
721
|
-
def get_l0_trange(l0_files, minmax=False):
|
850
|
+
def get_l0_trange(l0_files: list, minmax: bool = False) -> tuple:
|
722
851
|
"""
|
723
852
|
Get start_time/end_time from an input list of L0 files.
|
724
853
|
|
725
|
-
|
726
854
|
:param l0_files: List of L0 files for which start_time/end_time must be extracted
|
855
|
+
:type l0_files: list
|
727
856
|
:param minmax: If True, return the minimal start_time value and maximal end_time value from over all the L0 files.
|
728
|
-
:
|
857
|
+
:type minmax: bool
|
858
|
+
:return: Input L0 files start_time/end_time (as datetime object)
|
859
|
+
:rtype: tuple
|
729
860
|
"""
|
730
861
|
|
731
862
|
if not isinstance(l0_files, list):
|
732
|
-
|
733
|
-
return None
|
863
|
+
raise TypeError
|
734
864
|
|
735
865
|
# Get number of l0_files
|
736
866
|
nl0 = len(l0_files)
|
@@ -740,17 +870,18 @@ def get_l0_trange(l0_files, minmax=False):
|
|
740
870
|
end_time = [None] * nl0
|
741
871
|
for i, l0_file in enumerate(l0_files):
|
742
872
|
try:
|
743
|
-
with h5py.File(l0_file,
|
744
|
-
|
873
|
+
with h5py.File(l0_file, "r") as l0:
|
745
874
|
# Get TIME_MIN/TIME_MAX L0 attributes value as datetime
|
746
875
|
start_time[i] = datetime.strptime(
|
747
|
-
l0.attrs[
|
876
|
+
l0.attrs["TIME_MIN"], TIME_ISO_STRFORMAT
|
877
|
+
)
|
748
878
|
end_time[i] = datetime.strptime(
|
749
|
-
l0.attrs[
|
750
|
-
|
751
|
-
|
879
|
+
l0.attrs["TIME_MAX"], TIME_ISO_STRFORMAT
|
880
|
+
)
|
881
|
+
except Exception as e:
|
882
|
+
logger.exception(f"Cannot parse {l0_file}: \n{e}")
|
752
883
|
|
753
884
|
if minmax:
|
754
|
-
return
|
885
|
+
return min(start_time), max(end_time)
|
755
886
|
else:
|
756
887
|
return start_time, end_time
|