roc-film 1.13.5__py3-none-any.whl → 1.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- roc/__init__.py +2 -1
- roc/film/__init__.py +2 -2
- roc/film/commands.py +372 -323
- roc/film/config/__init__.py +0 -1
- roc/film/constants.py +101 -65
- roc/film/descriptor.json +127 -96
- roc/film/exceptions.py +28 -27
- roc/film/tasks/__init__.py +16 -16
- roc/film/tasks/cat_solo_hk.py +86 -74
- roc/film/tasks/cdf_postpro.py +438 -309
- roc/film/tasks/check_dds.py +39 -45
- roc/film/tasks/db_to_anc_bia_sweep_table.py +381 -0
- roc/film/tasks/dds_to_l0.py +232 -180
- roc/film/tasks/export_solo_coord.py +147 -0
- roc/film/tasks/file_handler.py +91 -75
- roc/film/tasks/l0_to_hk.py +117 -103
- roc/film/tasks/l0_to_l1_bia_current.py +38 -30
- roc/film/tasks/l0_to_l1_bia_sweep.py +417 -329
- roc/film/tasks/l0_to_l1_sbm.py +250 -208
- roc/film/tasks/l0_to_l1_surv.py +185 -130
- roc/film/tasks/make_daily_tm.py +40 -37
- roc/film/tasks/merge_tcreport.py +77 -71
- roc/film/tasks/merge_tmraw.py +101 -88
- roc/film/tasks/parse_dds_xml.py +21 -20
- roc/film/tasks/set_l0_utc.py +51 -49
- roc/film/tests/cdf_compare.py +565 -0
- roc/film/tests/hdf5_compare.py +84 -62
- roc/film/tests/test_dds_to_l0.py +93 -51
- roc/film/tests/test_dds_to_tc.py +8 -11
- roc/film/tests/test_dds_to_tm.py +8 -10
- roc/film/tests/test_film.py +161 -116
- roc/film/tests/test_l0_to_hk.py +64 -36
- roc/film/tests/test_l0_to_l1_bia.py +10 -14
- roc/film/tests/test_l0_to_l1_sbm.py +14 -19
- roc/film/tests/test_l0_to_l1_surv.py +68 -41
- roc/film/tests/test_metadata.py +21 -20
- roc/film/tests/tests.py +743 -396
- roc/film/tools/__init__.py +5 -5
- roc/film/tools/dataset_tasks.py +34 -2
- roc/film/tools/file_helpers.py +390 -269
- roc/film/tools/l0.py +402 -324
- roc/film/tools/metadata.py +147 -127
- roc/film/tools/skeleton.py +12 -17
- roc/film/tools/tools.py +109 -92
- roc/film/tools/xlsx2skt.py +161 -139
- {roc_film-1.13.5.dist-info → roc_film-1.14.0.dist-info}/LICENSE +127 -125
- roc_film-1.14.0.dist-info/METADATA +60 -0
- roc_film-1.14.0.dist-info/RECORD +50 -0
- {roc_film-1.13.5.dist-info → roc_film-1.14.0.dist-info}/WHEEL +1 -1
- roc/film/tasks/l0_to_anc_bia_sweep_table.py +0 -348
- roc_film-1.13.5.dist-info/METADATA +0 -120
- roc_film-1.13.5.dist-info/RECORD +0 -48
roc/film/tools/file_helpers.py
CHANGED
@@ -1,48 +1,64 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
# -*- coding: utf-8 -*-
|
3
|
-
|
3
|
+
|
4
4
|
import os
|
5
5
|
from glob import glob
|
6
6
|
from datetime import datetime, timedelta
|
7
7
|
import uuid
|
8
|
+
import calendar
|
8
9
|
|
9
10
|
import h5py
|
10
11
|
import numpy as np
|
12
|
+
from spacepy.pycdf import CDF
|
11
13
|
|
12
14
|
from poppy.core import MissingArgument
|
13
15
|
from poppy.core.logger import logger
|
16
|
+
from poppy.core.task import Task
|
17
|
+
from poppy.core.pipeline import Pipeline
|
14
18
|
|
15
19
|
from roc.rpl.time import Time
|
16
20
|
|
17
21
|
# Import methods to extract data from RPW packets
|
22
|
+
from roc.film.tools.dataset_tasks import dataset_func
|
18
23
|
|
19
24
|
from roc.rap.tasks.utils import order_by_increasing_time
|
20
25
|
|
21
|
-
from roc.film.tools.metadata import
|
22
|
-
|
26
|
+
from roc.film.tools.metadata import (
|
27
|
+
init_cdf_global,
|
28
|
+
set_logical_file_id,
|
29
|
+
get_spice_kernels,
|
30
|
+
)
|
31
|
+
from roc.film.exceptions import (
|
32
|
+
UnknownPipeline,
|
33
|
+
LoadDataSetError,
|
34
|
+
NoData,
|
35
|
+
)
|
23
36
|
from roc.film.tools import valid_data_version, get_datasets
|
24
|
-
from roc.film.constants import
|
37
|
+
from roc.film.constants import (
|
38
|
+
TIME_ISO_STRFORMAT,
|
39
|
+
CDF_TRANGE_STRFORMAT,
|
40
|
+
TIME_DAILY_STRFORMAT,
|
41
|
+
INPUT_DATETIME_STRFTIME,
|
42
|
+
)
|
25
43
|
|
26
44
|
# Import methods to extract data from RPW packets
|
27
|
-
from roc.film.tools.dataset_tasks import *
|
28
|
-
|
29
|
-
__all__ = ['build_file_basename',
|
30
|
-
'generate_filepath',
|
31
|
-
'put_cdf_global',
|
32
|
-
'is_packet',
|
33
|
-
'put_cdf_zvars',
|
34
|
-
'l0_to_trange_cdf',
|
35
|
-
'get_l0_file',
|
36
|
-
'get_l0_files',
|
37
|
-
'get_output_dir',
|
38
|
-
'get_master_cdf_dir',
|
39
|
-
'is_output_dir']
|
40
|
-
|
41
|
-
from roc.film.tools.tools import extract_file_fields
|
42
45
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
+
__all__ = [
|
47
|
+
"build_file_basename",
|
48
|
+
"generate_filepath",
|
49
|
+
"put_cdf_global",
|
50
|
+
"is_packet",
|
51
|
+
"put_cdf_zvars",
|
52
|
+
"l0_to_trange_cdf",
|
53
|
+
"get_l0_file",
|
54
|
+
"get_l0_files",
|
55
|
+
"get_output_dir",
|
56
|
+
"get_master_cdf_dir",
|
57
|
+
"is_output_dir",
|
58
|
+
]
|
59
|
+
|
60
|
+
|
61
|
+
def build_file_basename(metadata: dict, is_cdag: bool = False) -> str:
|
46
62
|
"""
|
47
63
|
"Build Solar Orbiter convention basename (without extension)
|
48
64
|
using metadata.
|
@@ -50,43 +66,50 @@ def build_file_basename(metadata,
|
|
50
66
|
See SOL-SGS-TN-0009 for more details about SolO data standards.
|
51
67
|
|
52
68
|
:param metadata: dictionary contains output metadata used to build filename
|
69
|
+
:type metadata: dict
|
53
70
|
:param is_cdag: If True, add a '-cdag' suffix to the descriptor field of the filename
|
54
|
-
:
|
71
|
+
:type is_cdag: bool
|
72
|
+
:return: RPW file basename
|
73
|
+
:rtype: str
|
55
74
|
"""
|
56
75
|
|
57
76
|
# if Logical_file_id attribute exists, it should contain
|
58
77
|
# the file name without the extension
|
59
|
-
if not is_cdag and
|
60
|
-
return str(metadata[
|
78
|
+
if not is_cdag and "Logical_file_id" in metadata:
|
79
|
+
return str(metadata["Logical_file_id"])
|
61
80
|
|
62
81
|
# Add -cdag suffix if input is_cdag=True
|
63
82
|
if not is_cdag:
|
64
|
-
cdag_suffix =
|
83
|
+
cdag_suffix = ""
|
65
84
|
else:
|
66
|
-
cdag_suffix =
|
85
|
+
cdag_suffix = "-cdag"
|
67
86
|
|
68
87
|
# Else build the file basename from scratch
|
69
88
|
# file basename mandatory fields
|
70
89
|
fields = [
|
71
|
-
str(metadata[
|
72
|
-
str(metadata[
|
73
|
-
str(metadata[
|
74
|
-
str(metadata[
|
75
|
-
|
90
|
+
str(metadata["Source_name"]).split(">")[0].lower(),
|
91
|
+
str(metadata["LEVEL"]).split(">")[0],
|
92
|
+
str(metadata["Descriptor"]).split(">")[0].lower() + cdag_suffix,
|
93
|
+
str(metadata["Datetime"]),
|
94
|
+
"V" + str(metadata["Data_version"]),
|
76
95
|
]
|
77
96
|
|
78
97
|
# Add free_field at the end of the file basename if it exists
|
79
|
-
free_field = metadata.get(
|
80
|
-
if free_field:
|
81
|
-
fields.append(str(metadata[
|
98
|
+
free_field = metadata.get("Free_field", "")
|
99
|
+
if free_field and str(free_field).lower().strip() not in ["none", ""]:
|
100
|
+
fields.append(str(metadata["Free_field"]))
|
82
101
|
|
83
|
-
return
|
102
|
+
return "_".join(fields)
|
84
103
|
|
85
104
|
|
86
|
-
def generate_filepath(
|
87
|
-
|
88
|
-
|
89
|
-
|
105
|
+
def generate_filepath(
|
106
|
+
task: type[Task],
|
107
|
+
metadata: dict,
|
108
|
+
extension: str,
|
109
|
+
output_dir: str = None,
|
110
|
+
is_cdag: bool = False,
|
111
|
+
overwrite: bool = False,
|
112
|
+
) -> str:
|
90
113
|
"""
|
91
114
|
Generate output filepath from input task and metadata info
|
92
115
|
|
@@ -96,12 +119,13 @@ def generate_filepath(task, metadata, extension,
|
|
96
119
|
:param output_dir: Directory path of the output file. (If not passed, then try to get it from pipeline properties)
|
97
120
|
:param is_cdag: If True, add a '-cdag' suffix in the descriptor of the filename
|
98
121
|
:param overwrite: If True, overwrite existing file
|
99
|
-
:return:
|
122
|
+
:return: Output file path
|
123
|
+
:rtype: str
|
100
124
|
"""
|
101
125
|
|
102
126
|
# Add dot '.' to the extension if not provided
|
103
|
-
if not extension.startswith(
|
104
|
-
extension =
|
127
|
+
if not extension.startswith("."):
|
128
|
+
extension = "." + extension
|
105
129
|
|
106
130
|
# Build output filepath from pipeline task properties, metadata
|
107
131
|
# and extension
|
@@ -109,85 +133,88 @@ def generate_filepath(task, metadata, extension,
|
|
109
133
|
|
110
134
|
if not output_dir:
|
111
135
|
output_dir = get_output_dir(task.pipeline)
|
112
|
-
filepath = os.path.join(output_dir,
|
113
|
-
filename)
|
136
|
+
filepath = os.path.join(output_dir, filename)
|
114
137
|
|
115
138
|
# check if the file to generate is already existing, and remove it
|
116
139
|
# if --overwrite input keyword is set
|
117
140
|
if os.path.isfile(filepath) and overwrite:
|
118
|
-
logger.warning(f
|
141
|
+
logger.warning(f"Existing {filepath} will be overwritten!")
|
119
142
|
os.remove(filepath)
|
120
143
|
elif os.path.isfile(filepath):
|
121
|
-
logger.info(
|
144
|
+
logger.info(
|
145
|
+
f"{filepath} already exists, create a new version of the data file."
|
146
|
+
)
|
122
147
|
# Else, if the output file already exists, create a new
|
123
148
|
# version of the file (i.e., increment the data_version)
|
124
149
|
|
125
150
|
# Get file basename (without version and extension)
|
126
|
-
data_version = metadata[
|
127
|
-
basename = os.path.basename(filename).split(f
|
151
|
+
data_version = metadata["Data_version"]
|
152
|
+
basename = os.path.basename(filename).split(f"_V{data_version}")[0]
|
128
153
|
|
129
154
|
# Check number of files in the output directory which have the
|
130
155
|
# same basename
|
131
|
-
pattern = os.path.join(output_dir, basename +
|
156
|
+
pattern = os.path.join(output_dir, basename + "*" + extension)
|
132
157
|
file_number = len(glob(pattern))
|
133
158
|
|
134
159
|
# Increment the data_version
|
135
|
-
metadata[
|
160
|
+
metadata["Data_version"] = valid_data_version(file_number + 1)
|
136
161
|
|
137
162
|
# Update Logical_file_id
|
138
|
-
metadata[
|
163
|
+
metadata["Logical_file_id"] = set_logical_file_id(metadata)
|
139
164
|
|
140
165
|
# Update filepath
|
141
166
|
filename = build_file_basename(metadata, is_cdag=is_cdag) + extension
|
142
167
|
output_dir = get_output_dir(task.pipeline)
|
143
|
-
filepath = os.path.join(output_dir,
|
144
|
-
|
145
|
-
logger.debug(f'New file version V{metadata["Data_version"]} has been defined')
|
168
|
+
filepath = os.path.join(output_dir, filename)
|
169
|
+
logger.debug(f"New file version V{metadata['Data_version']} has been defined")
|
146
170
|
|
147
171
|
logger.debug(f'Output file basename has been generated from metadata: "{filename}"')
|
148
172
|
|
149
173
|
return filepath
|
150
174
|
|
151
175
|
|
152
|
-
def get_l0_file(pipeline):
|
176
|
+
def get_l0_file(pipeline) -> str:
|
153
177
|
try:
|
154
178
|
return pipeline.args.l0_file[0]
|
155
|
-
except:
|
179
|
+
except Exception:
|
156
180
|
# If not defined as input argument, then assume that it is already
|
157
181
|
# defined as target input
|
158
182
|
pass
|
159
183
|
|
160
184
|
|
161
|
-
def get_l0_files(pipeline):
|
185
|
+
def get_l0_files(pipeline: Pipeline) -> list:
|
162
186
|
try:
|
163
187
|
l0_files = pipeline.args.l0_files
|
164
188
|
if not isinstance(l0_files, list):
|
165
189
|
l0_files = [l0_files]
|
166
190
|
return l0_files
|
167
|
-
except:
|
191
|
+
except Exception:
|
168
192
|
# If not defined as input argument, then assume that it is already
|
169
193
|
# defined as target input
|
170
194
|
pass
|
171
195
|
|
172
196
|
|
173
|
-
def put_cdf_global(cdf, metadata):
|
197
|
+
def put_cdf_global(cdf: CDF, metadata: dict) -> bool:
|
174
198
|
"""
|
175
199
|
Write the global attributes into the input CDF.
|
176
200
|
|
177
201
|
:param cdf: input CDF object
|
178
|
-
:
|
202
|
+
:type cdf: spacepy.pycdf.CDF
|
203
|
+
:param metadata: Global attributes to save in the CDF
|
204
|
+
:type metadata: dict
|
179
205
|
:return: True, if succeeded, else raise an exception
|
206
|
+
:rtype: bool
|
180
207
|
"""
|
181
208
|
|
182
209
|
for key, value in metadata.items():
|
183
|
-
if not
|
184
|
-
logger.debug(f
|
210
|
+
if key not in cdf.attrs:
|
211
|
+
logger.debug(f"{key} global attribute not found in CDF: force insertion!")
|
185
212
|
cdf.attrs[key] = value
|
186
213
|
|
187
214
|
return True
|
188
215
|
|
189
216
|
|
190
|
-
def get_master_cdf_dir(task):
|
217
|
+
def get_master_cdf_dir(task: type[Task]) -> str:
|
191
218
|
"""
|
192
219
|
Try to load the master_dir directory from :
|
193
220
|
1. the input argument --master-cdf-dir
|
@@ -195,85 +222,91 @@ def get_master_cdf_dir(task):
|
|
195
222
|
3. the OS environment
|
196
223
|
If it does not exist, set to ".".
|
197
224
|
|
198
|
-
:param task:
|
199
|
-
:
|
225
|
+
:param task: Poppy pipeline task instance
|
226
|
+
:type task: Task
|
227
|
+
:return: Master CDF directory
|
228
|
+
:rtype: str
|
200
229
|
"""
|
201
|
-
|
202
|
-
master_cdf_dir = task.pipeline.get('master_cdf_dir', default=None)
|
230
|
+
master_cdf_dir = task.pipeline.get("master_cdf_dir", default=None)
|
203
231
|
|
204
232
|
if master_cdf_dir is None:
|
205
233
|
# 2. Else from the config.json
|
206
|
-
if
|
234
|
+
if (
|
235
|
+
"RPW_CDF_MASTER_PATH"
|
236
|
+
in task.pipeline.properties.configuration["environment"]
|
237
|
+
):
|
207
238
|
master_cdf_dir = task.pipeline.properties.configuration[
|
208
|
-
|
239
|
+
"environment.RPW_CDF_MASTER_PATH"
|
240
|
+
]
|
209
241
|
# 3. Else from the OS environment
|
210
|
-
elif
|
211
|
-
master_cdf_dir = os.environ[
|
242
|
+
elif "RPW_CDF_MASTER_PATH" in os.environ:
|
243
|
+
master_cdf_dir = os.environ["RPW_CDF_MASTER_PATH"]
|
212
244
|
# Otherwise raise an exception
|
213
245
|
else:
|
214
|
-
raise MissingArgument(
|
246
|
+
raise MissingArgument("No value found for master_cdf_dir!")
|
215
247
|
else:
|
216
248
|
master_cdf_dir = master_cdf_dir[0]
|
217
249
|
|
218
250
|
return master_cdf_dir
|
219
251
|
|
220
252
|
|
221
|
-
def get_output_dir(pipeline):
|
253
|
+
def get_output_dir(pipeline: Pipeline) -> str:
|
222
254
|
"""
|
223
255
|
Generate the output directory from the information provided in the
|
224
256
|
pipeline properties and metadata.
|
225
257
|
|
226
|
-
:param
|
227
|
-
:
|
258
|
+
:param pipeline: POPPy pipeline instance
|
259
|
+
:type pipeline: Poppy Task class
|
228
260
|
:return: output_dir
|
261
|
+
:rtype: str
|
229
262
|
"""
|
230
263
|
|
231
|
-
# Initialize output
|
232
|
-
output_dir = None
|
233
|
-
|
234
264
|
# get pipeline id (can be "RGTS" or "RODP")
|
235
|
-
pipeline_id = pipeline.properties.configuration[
|
265
|
+
pipeline_id = pipeline.properties.configuration["environment.ROC_PIP_NAME"]
|
236
266
|
|
237
267
|
# Case for the ROC Ground Test SGSE (RGTS)
|
238
|
-
if pipeline_id ==
|
268
|
+
if pipeline_id == "RGTS":
|
239
269
|
# Generate output directory for current test
|
240
270
|
try:
|
241
271
|
output_dir = pipeline.args.test_log.output_directory(pipeline)
|
242
|
-
except:
|
272
|
+
except Exception as e:
|
273
|
+
logger.debug(e)
|
243
274
|
output_dir = pipeline.output
|
244
275
|
|
245
276
|
# Case for the RPW Operation and Data Pipeline (RODP)
|
246
|
-
elif pipeline_id ==
|
277
|
+
elif pipeline_id == "RODP":
|
247
278
|
# First get the input LZ File object from the properties
|
248
279
|
try:
|
249
280
|
output_dir = pipeline.output
|
250
|
-
except:
|
251
|
-
|
281
|
+
except Exception:
|
282
|
+
logger.error("NO OUTPUT DIRECTORY DEFINED, ABORTING!")
|
283
|
+
raise
|
252
284
|
else:
|
253
|
-
raise UnknownPipeline(f
|
254
|
-
f' {pipeline_id}, ABORTING!')
|
285
|
+
raise UnknownPipeline(f"UNKNOWN PIPELINE TYPE: {pipeline_id}, ABORTING!")
|
255
286
|
|
256
287
|
return output_dir
|
257
288
|
|
258
289
|
|
259
|
-
def get_products_dir(pipeline):
|
290
|
+
def get_products_dir(pipeline: Pipeline) -> str:
|
260
291
|
"""
|
261
292
|
Get the path of the directory where final products must be moved.
|
262
293
|
|
263
294
|
:param pipeline: POPPy pipeline instance
|
295
|
+
:type pipeline: Poppy Pipeline class
|
264
296
|
:return: string containing the products directory path
|
297
|
+
:rtype: str
|
265
298
|
"""
|
266
|
-
|
267
|
-
products_dir = pipeline.get('products_dir', default=None, args=True)
|
299
|
+
products_dir = pipeline.get("products_dir", default=None, args=True)
|
268
300
|
|
269
301
|
if products_dir is None:
|
270
302
|
# 2. Else from the config.json
|
271
|
-
if
|
303
|
+
if "ROC_PRODUCTS_PATH" in pipeline.properties.configuration["environment"]:
|
272
304
|
products_dir = pipeline.properties.configuration[
|
273
|
-
|
305
|
+
"environment.ROC_PRODUCTS_PATH"
|
306
|
+
]
|
274
307
|
# 3. Else from the OS environment
|
275
|
-
elif
|
276
|
-
products_dir = os.environ[
|
308
|
+
elif "ROC_PRODUCTS_PATH" in os.environ:
|
309
|
+
products_dir = os.environ["ROC_PRODUCTS_PATH"]
|
277
310
|
# Otherwise return "."
|
278
311
|
else:
|
279
312
|
products_dir = None
|
@@ -283,36 +316,38 @@ def get_products_dir(pipeline):
|
|
283
316
|
return products_dir
|
284
317
|
|
285
318
|
|
286
|
-
def is_output_dir(output_dir, products_dir=None):
|
319
|
+
def is_output_dir(output_dir: str, products_dir: str = None) -> bool:
|
287
320
|
"""
|
288
321
|
Check if the output directory exists and if its basename is found in the
|
289
322
|
products_dir.
|
290
323
|
|
291
324
|
:param output_dir: String containing output directory
|
325
|
+
:type output_dir: str
|
292
326
|
:param products_dir: String containing products directory
|
293
327
|
(if provided, check if output_dir basename
|
294
328
|
is already saved inside)
|
329
|
+
:type products_dir: str
|
295
330
|
:return: True if output_dir is found, False otherwise
|
331
|
+
:rtype: bool
|
296
332
|
"""
|
297
333
|
|
298
334
|
# Get output_dir value
|
299
335
|
if output_dir:
|
300
336
|
output_dir_basename = os.path.basename(output_dir)
|
301
337
|
else:
|
302
|
-
raise MissingArgument(
|
338
|
+
raise MissingArgument("Output directory is not defined!")
|
303
339
|
|
304
340
|
# Check if output_dir already exists
|
305
341
|
if os.path.isdir(output_dir):
|
306
|
-
logger.debug(f
|
342
|
+
logger.debug(f"{output_dir} already created")
|
307
343
|
return True
|
308
344
|
|
309
345
|
# Check products_dir
|
310
346
|
if products_dir:
|
311
347
|
# Build target directory path
|
312
|
-
target_dir = os.path.join(
|
313
|
-
products_dir, os.path.basename(output_dir))
|
348
|
+
target_dir = os.path.join(products_dir, os.path.basename(output_dir))
|
314
349
|
if os.path.isdir(target_dir):
|
315
|
-
logger.debug(f
|
350
|
+
logger.debug(f"{output_dir_basename} already found in {products_dir}")
|
316
351
|
return True
|
317
352
|
else:
|
318
353
|
logger.debug("Input argument 'products_dir' is not defined")
|
@@ -320,13 +355,16 @@ def is_output_dir(output_dir, products_dir=None):
|
|
320
355
|
return False
|
321
356
|
|
322
357
|
|
323
|
-
def is_packet(expected_packets, packets):
|
358
|
+
def is_packet(expected_packets: list, packets: list) -> bool:
|
324
359
|
"""
|
325
360
|
Check if packet(s) is/are in the input packet_list
|
326
361
|
|
327
362
|
:param expected_packets: Name of the packet(s) expected for the dataset
|
328
|
-
:
|
363
|
+
:type expected_packets: list
|
364
|
+
:param packets: Input packet(s) provided as a h5 group
|
365
|
+
:type packets: list
|
329
366
|
:return: True if at least one expected packet found, False if all expected packets not found
|
367
|
+
:rtype: bool
|
330
368
|
"""
|
331
369
|
|
332
370
|
if not isinstance(expected_packets, list):
|
@@ -340,51 +378,58 @@ def is_packet(expected_packets, packets):
|
|
340
378
|
return False
|
341
379
|
|
342
380
|
|
343
|
-
def put_cdf_zvars(
|
344
|
-
|
345
|
-
|
381
|
+
def put_cdf_zvars(
|
382
|
+
cdf: CDF, data: np.ndarray, start_time: datetime = None, end_time: datetime = None
|
383
|
+
) -> tuple:
|
346
384
|
"""
|
347
385
|
Write input data into CDF zVariable
|
348
386
|
|
349
387
|
:param cdf: pycdf.CDF object to update
|
350
|
-
:
|
388
|
+
:type cdf: spacepy.pycdf.CDF
|
389
|
+
:param data: zVariable data to write into the CDF
|
390
|
+
:type data: np.ndarray
|
351
391
|
:param start_time: only store data after start_time
|
392
|
+
:type start_time: datetime
|
352
393
|
:param end_time: only store date before end_time
|
353
|
-
:
|
394
|
+
:type end_time: datetime
|
395
|
+
:return: time_min, time_max and nrec:
|
396
|
+
:rtype: tuple
|
354
397
|
"""
|
355
398
|
|
356
399
|
# check size of the data
|
357
400
|
nrec = data.shape[0]
|
358
401
|
if nrec == 0:
|
359
|
-
raise NoData(
|
360
|
-
|
402
|
+
raise NoData(
|
403
|
+
message="Data for {0} is empty".format(cdf.pathname), ll=logger.warning
|
404
|
+
)
|
361
405
|
|
362
406
|
# Check that 'epoch' variable exists, convert it to tt2000 and filter data
|
363
407
|
# between start_time/end_time if required
|
364
408
|
try:
|
365
|
-
epoch = data[
|
366
|
-
except:
|
367
|
-
|
368
|
-
|
409
|
+
epoch = data["epoch"][:].astype(float)
|
410
|
+
except KeyError:
|
411
|
+
logger.error('No valid "epoch" variable found in the input data')
|
412
|
+
raise
|
369
413
|
else:
|
414
|
+
# Instantiate roc.rpl.time.Time class
|
415
|
+
time_instance = Time()
|
370
416
|
|
371
417
|
# Get start_time in TT2000
|
372
418
|
if start_time:
|
373
419
|
# Convert start_time into TT2000
|
374
|
-
start_time_tt2000 = float(
|
420
|
+
start_time_tt2000 = float(time_instance.utc_to_tt2000(start_time))
|
375
421
|
else:
|
376
422
|
# By default, get lowest possible value for TT2000 datatype
|
377
|
-
start_time_tt2000 = -2**64
|
423
|
+
start_time_tt2000 = -(2**64)
|
378
424
|
|
379
425
|
# Get end_time in TT2000
|
380
426
|
if end_time:
|
381
427
|
# Convert end_time into TT2000
|
382
|
-
end_time_tt2000 = float(
|
428
|
+
end_time_tt2000 = float(time_instance.utc_to_tt2000(end_time))
|
383
429
|
else:
|
384
430
|
# By default, get highest possible value for TT2000 datatype
|
385
431
|
end_time_tt2000 = 2**64
|
386
432
|
|
387
|
-
|
388
433
|
# Define array indices to keep between start_time/end_time
|
389
434
|
idx = (epoch >= start_time_tt2000) & (epoch <= end_time_tt2000)
|
390
435
|
|
@@ -399,64 +444,98 @@ def put_cdf_zvars(cdf, data,
|
|
399
444
|
# Fill Epoch CDF zVariable
|
400
445
|
epoch_min = epoch.min()
|
401
446
|
epoch_max = epoch.max()
|
402
|
-
cdf[
|
403
|
-
cdf[
|
404
|
-
|
447
|
+
cdf["Epoch"] = epoch
|
448
|
+
cdf["Epoch"].attrs["SCALEMIN"] = time_instance.tt2000_to_utc(
|
449
|
+
epoch_min, to_datetime=True
|
450
|
+
)
|
451
|
+
cdf["Epoch"].attrs["SCALEMAX"] = time_instance.tt2000_to_utc(
|
452
|
+
epoch_max, to_datetime=True
|
453
|
+
)
|
405
454
|
|
406
455
|
# Fill other CDF zVariables
|
407
456
|
for i, name in enumerate(data.dtype.names):
|
408
|
-
#
|
409
|
-
if name.lower() ==
|
457
|
+
# skip if epoch (already processed above)
|
458
|
+
if name.lower() == "epoch":
|
410
459
|
continue
|
411
460
|
else:
|
412
|
-
logger.debug(f
|
413
|
-
|
414
|
-
|
461
|
+
logger.debug(f"Writing {nrec} records for {name} zVariable...")
|
462
|
+
|
463
|
+
if name.lower() == "quality_bitmask":
|
464
|
+
# Make sure quality_bitmask is a UINT16
|
465
|
+
data_i = data[name][idx].astype(np.uint16)
|
466
|
+
else:
|
467
|
+
data_i = data[name][idx]
|
415
468
|
|
469
|
+
# Write data into the zVariable
|
416
470
|
cdf[name.upper()] = data_i
|
417
471
|
|
418
472
|
# Get min/max value of the current zVariable
|
419
|
-
cdf[name.upper()].attrs[
|
420
|
-
cdf[name.upper()].attrs[
|
473
|
+
cdf[name.upper()].attrs["SCALEMIN"] = data_i.min()
|
474
|
+
cdf[name.upper()].attrs["SCALEMAX"] = data_i.max()
|
421
475
|
|
422
476
|
# Set quality_flag
|
423
477
|
logger.debug('Set "QUALITY_FLAG" zVar default value to 3')
|
424
|
-
cdf[
|
425
|
-
cdf[
|
426
|
-
cdf[
|
478
|
+
cdf["QUALITY_FLAG"] = np.full(nrec, 3, dtype=np.uint8)
|
479
|
+
cdf["QUALITY_FLAG"].attrs["SCALEMIN"] = 0
|
480
|
+
cdf["QUALITY_FLAG"].attrs["SCALEMAX"] = 5
|
427
481
|
|
428
482
|
return epoch_min, epoch_max, nrec
|
429
483
|
|
430
484
|
|
431
|
-
def l0_to_trange_cdf(
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
485
|
+
def l0_to_trange_cdf(
|
486
|
+
task: type[Task],
|
487
|
+
task_name: str,
|
488
|
+
l0_file_list: list,
|
489
|
+
output_dir: str,
|
490
|
+
time_instance: Time = None,
|
491
|
+
start_time: datetime = None,
|
492
|
+
end_time: datetime = None,
|
493
|
+
failed_files: list = None,
|
494
|
+
processed_files: list = None,
|
495
|
+
monthly: bool = False,
|
496
|
+
unique: bool = False,
|
497
|
+
overwrite: bool = False,
|
498
|
+
is_cdag: bool = True,
|
499
|
+
):
|
440
500
|
"""
|
441
501
|
Task to generate time range CDF from l0 file(s)
|
442
502
|
|
443
|
-
:param task: instance of the task
|
503
|
+
:param task: instance of the task:
|
504
|
+
:type task: Poppy Task class type object
|
444
505
|
:param task_name: string containing the name of the task (as defined in descriptor)
|
445
|
-
:
|
506
|
+
:type task_name: str
|
507
|
+
:param l0_file_list: Input L0 files
|
508
|
+
:type l0_file_list: list
|
446
509
|
:param output_dir: path of the output directory
|
510
|
+
:type output_dir: str
|
511
|
+
:param time_instance: roc.rpl.time.Time object:
|
512
|
+
:type time_instance: roc.rpl.time.Time
|
447
513
|
:param start_time: start time of the data written in the output CDF
|
514
|
+
:type start_time: datetime
|
448
515
|
:param end_time: end time of the data written in the output CDF
|
516
|
+
:type end_time: datetime
|
517
|
+
:param failed_files: CDF files for which saving data has failed
|
518
|
+
:type failed_files: list
|
519
|
+
:param processed_files: CDF files for which saving data has succeeded
|
520
|
+
:type processed_files: list
|
449
521
|
:param monthly: Produce monthly file (Datetime format will be YYYYMMDD1-YYYYMMDD2,
|
450
522
|
where YYYYMMDD1 is the first day of the month and YYYYMMDD2 is the last day).
|
451
523
|
Month number is extracted from start_time value.
|
524
|
+
:type monthly: bool
|
452
525
|
:param unique: If True, make sure that return data are uniquely stored
|
526
|
+
:type unique: bool
|
453
527
|
:param overwrite: If True, overwrite existing output files
|
454
|
-
:
|
528
|
+
:type overwrite: bool
|
529
|
+
:param is_cdag: If True, generate 'CDAG' output files (CDAG == private data files)
|
530
|
+
:type is_cdag: bool
|
455
531
|
:return: output CDF filepath if it has been successfully generated, None otherwise
|
532
|
+
:rtype: str
|
456
533
|
"""
|
457
534
|
|
458
|
-
|
459
|
-
|
535
|
+
if not processed_files:
|
536
|
+
processed_files = []
|
537
|
+
if not failed_files:
|
538
|
+
failed_files = []
|
460
539
|
|
461
540
|
# Initialize output list (containing filepath)
|
462
541
|
output_file_list = []
|
@@ -468,15 +547,20 @@ def l0_to_trange_cdf(task, task_name, l0_file_list, output_dir,
|
|
468
547
|
# the output filename
|
469
548
|
# (used to indicate preliminary files to distributed to the CDAG members only)
|
470
549
|
if is_cdag:
|
471
|
-
logger.info('Producing "cdag" output CDF')
|
550
|
+
logger.info(f'Producing "cdag" output CDF [{task.job_id}]')
|
472
551
|
|
473
552
|
# Retrieve list of output datasets to produce for the given task
|
474
553
|
try:
|
475
554
|
dataset_list = get_datasets(task, task_name)
|
476
|
-
except:
|
477
|
-
raise LoadDataSetError(
|
555
|
+
except Exception:
|
556
|
+
raise LoadDataSetError(
|
557
|
+
f"Cannot load the list of datasets to produce for {task_name} [{task.job_id}]"
|
558
|
+
)
|
478
559
|
else:
|
479
|
-
logger.debug(
|
560
|
+
logger.debug(
|
561
|
+
"Produce L1 CDF file(s) for the following dataset(s):"
|
562
|
+
f" {[ds['name'] for ds in dataset_list]} [{task.job_id}]"
|
563
|
+
)
|
480
564
|
|
481
565
|
# Get list of input l0 file(s)
|
482
566
|
if not isinstance(l0_file_list, list):
|
@@ -492,103 +576,122 @@ def l0_to_trange_cdf(task, task_name, l0_file_list, output_dir,
|
|
492
576
|
|
493
577
|
# Get start_time for output CDF (use time min of L0 files if not defined)
|
494
578
|
if not start_time:
|
495
|
-
start_time = task.pipeline.get(
|
496
|
-
'start_time', default=[min(l0_time_min)])[0]
|
579
|
+
start_time = task.pipeline.get("start_time", default=[min(l0_time_min)])[0]
|
497
580
|
|
498
|
-
logger.debug(f
|
581
|
+
logger.debug(f"start_time value is {start_time} [{task.job_id}]")
|
499
582
|
|
500
583
|
# Get end_time for output CDF (use time max of L0 files if not defined)
|
501
584
|
if not end_time:
|
502
|
-
end_time = task.pipeline.get(
|
585
|
+
end_time = task.pipeline.get("end_time", default=[max(l0_time_max)])[0]
|
503
586
|
|
504
|
-
logger.debug(f
|
587
|
+
logger.debug(f"end_time value is {end_time} [{task.job_id}]")
|
505
588
|
|
506
589
|
# Loops over each output dataset to produce for the current task
|
507
590
|
for current_dataset in dataset_list:
|
591
|
+
dataset_name = current_dataset["name"]
|
592
|
+
data_descr = current_dataset["descr"]
|
593
|
+
data_version = current_dataset["version"]
|
508
594
|
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
logger.debug(f'Running file production for the dataset {dataset_name} (V{data_version})')
|
595
|
+
logger.debug(
|
596
|
+
"Running file production for the dataset "
|
597
|
+
f"{dataset_name} (V{data_version}) [{task.job_id}]"
|
598
|
+
)
|
514
599
|
|
515
600
|
# get the path to the master CDF file of this dataset
|
516
601
|
master_cdf_dir = get_master_cdf_dir(task)
|
517
602
|
|
518
603
|
# Get master cdf filename from descriptor
|
519
|
-
master_cdf_file = data_descr[
|
604
|
+
master_cdf_file = data_descr["template"]
|
520
605
|
|
521
606
|
# Build master file pattern
|
522
|
-
master_pattern = os.path.join(master_cdf_dir,
|
523
|
-
master_cdf_file)
|
607
|
+
master_pattern = os.path.join(master_cdf_dir, master_cdf_file)
|
524
608
|
|
525
609
|
# Get master file path
|
526
610
|
master_path = glob(master_pattern)
|
527
611
|
|
528
612
|
# Check existence
|
529
613
|
if not master_path:
|
530
|
-
raise FileNotFoundError(
|
531
|
-
|
532
|
-
|
614
|
+
raise FileNotFoundError(
|
615
|
+
"{0} master CDF not found for the dataset {1}! [{2}]".format(
|
616
|
+
master_pattern, dataset_name, task.job_id
|
617
|
+
)
|
618
|
+
)
|
533
619
|
else:
|
534
620
|
master_path = sorted(master_path)[-1]
|
535
|
-
logger.info(
|
536
|
-
|
537
|
-
|
621
|
+
logger.info(
|
622
|
+
'Producing dataset "{0}" with the master CDF "{1}" [{2}]'.format(
|
623
|
+
dataset_name, master_path, task.job_id
|
624
|
+
)
|
625
|
+
)
|
538
626
|
|
539
627
|
# Initialize loop variables
|
540
628
|
data = np.empty(0)
|
541
|
-
nrec = 0
|
542
629
|
parents = []
|
543
630
|
# Loop over l0_files list
|
544
631
|
for i, l0_file in enumerate(l0_file_list):
|
545
|
-
|
546
|
-
with h5py.File(l0_file, 'r') as l0:
|
547
|
-
|
632
|
+
with h5py.File(l0_file, "r") as l0:
|
548
633
|
# Skip L0 file for which start_time/end_time is not inside the
|
549
634
|
# time range
|
550
635
|
if l0_time_max[i] < start_time or l0_time_min[i] > end_time:
|
551
|
-
logger.debug(
|
552
|
-
|
636
|
+
logger.debug(
|
637
|
+
f"{l0_file} is outside the time range: "
|
638
|
+
f"[{start_time}, {end_time}], skip it [{task.job_id}]"
|
639
|
+
)
|
553
640
|
continue
|
554
641
|
else:
|
555
|
-
logger.debug(
|
642
|
+
logger.debug(
|
643
|
+
f"Processing {l0_file} [{l0_file_len - i - 1}] [{task.job_id}]"
|
644
|
+
)
|
556
645
|
|
557
646
|
# Append current l0 file to parent list
|
558
|
-
parents.append(os.path.basename(l0_file))
|
647
|
+
parents.append(os.path.basename(str(l0_file)))
|
559
648
|
|
560
649
|
# Get TM packet(s) required to generate HK CDF for the current
|
561
650
|
# dataset
|
562
|
-
expected_packet = data_descr[
|
651
|
+
expected_packet = data_descr["packet"]
|
563
652
|
# Check that TM packet(s) are in the input l0 data
|
564
|
-
if
|
565
|
-
|
566
|
-
|
567
|
-
|
653
|
+
if not is_packet(expected_packet, l0["TM"]) and not is_packet(
|
654
|
+
expected_packet, l0["TC"]
|
655
|
+
):
|
656
|
+
logger.info(
|
657
|
+
f"No expected packet found for {dataset_name}"
|
658
|
+
f" in {l0_file} [{','.join(expected_packet)}] [{task.job_id}]"
|
659
|
+
)
|
568
660
|
continue
|
569
661
|
|
570
662
|
# Get function to process data
|
571
663
|
# IMPORTANT: function alias in import should have the same name
|
572
|
-
#
|
573
|
-
func =
|
664
|
+
# as the dataset alias in the descriptor
|
665
|
+
func = dataset_func.get(dataset_name)
|
666
|
+
if func is None:
|
667
|
+
logger.error(f"No function found for {dataset_name}")
|
668
|
+
failed_files.append(l0_file)
|
669
|
+
break
|
574
670
|
|
575
671
|
# call the dataset-related function
|
576
672
|
try:
|
577
|
-
logger.debug(f
|
673
|
+
logger.debug(f"Running {func} [{task.job_id}]")
|
578
674
|
result = func(l0, task)
|
579
|
-
except:
|
675
|
+
except Exception as e:
|
580
676
|
# TODO catch exception in the ROC database
|
581
|
-
logger.exception(
|
677
|
+
logger.exception(
|
678
|
+
f'Running "{func}" function has failed [{task.job_id}]: \n{e}'
|
679
|
+
)
|
582
680
|
# TODO - Add the current failed dataset processing to failed_files
|
583
|
-
|
681
|
+
failed_files.append(l0_file)
|
584
682
|
continue
|
585
683
|
|
586
684
|
# Make sure result is a numpy array and not a NoneType
|
587
685
|
if result is None or result.shape[0] == 0:
|
588
|
-
logger.debug(
|
589
|
-
|
686
|
+
logger.debug(
|
687
|
+
f"Returned {dataset_name} dataset array"
|
688
|
+
f" is empty for {l0_file} [{task.job_id}]"
|
689
|
+
)
|
590
690
|
else:
|
591
|
-
logger.debug(
|
691
|
+
logger.debug(
|
692
|
+
f"{result.shape[0]} {dataset_name} dataset samples"
|
693
|
+
f" returned from {l0_file} [{task.job_id}]"
|
694
|
+
)
|
592
695
|
|
593
696
|
# If data is empty
|
594
697
|
if data.shape[0] == 0:
|
@@ -601,116 +704,131 @@ def l0_to_trange_cdf(task, task_name, l0_file_list, output_dir,
|
|
601
704
|
# Checking resulting data length
|
602
705
|
nrec = data.shape[0]
|
603
706
|
if nrec == 0:
|
604
|
-
logger.warning(
|
707
|
+
logger.warning(
|
708
|
+
"No data for dataset"
|
709
|
+
f" {dataset_name}: skip output cdf creation [{task.job_id}]"
|
710
|
+
)
|
605
711
|
continue
|
606
712
|
|
607
713
|
# reorder the data by increasing time
|
608
714
|
data = order_by_increasing_time(data, unique=unique)
|
609
715
|
|
610
716
|
# Generation date
|
611
|
-
generation_date = datetime.utcnow().
|
612
|
-
logger.debug(
|
717
|
+
generation_date = datetime.utcnow().strftime(INPUT_DATETIME_STRFTIME)
|
718
|
+
logger.debug(
|
719
|
+
f'Set "Generation_date" attr. value to {generation_date} [{task.job_id}]'
|
720
|
+
)
|
613
721
|
|
614
722
|
# file ID
|
615
723
|
file_id = str(uuid.uuid4())
|
616
|
-
logger.debug(f'Set "File_ID" attr. value to {file_id}')
|
724
|
+
logger.debug(f'Set "File_ID" attr. value to {file_id} [{task.job_id}]')
|
617
725
|
|
618
726
|
# Re-define datetime and parents g.attribute for time range CDF data
|
619
727
|
# products
|
620
728
|
if monthly:
|
621
729
|
# Get number of days in the start_time month
|
622
|
-
|
623
|
-
mday_num = calendar.monthrange(
|
624
|
-
start_time.year, start_time.month)[1]
|
730
|
+
mday_num = calendar.monthrange(start_time.year, start_time.month)[1]
|
625
731
|
# Get latest day of the month
|
626
|
-
mday_end = datetime(start_time.year, start_time.month,
|
627
|
-
|
732
|
+
mday_end = datetime(start_time.year, start_time.month, 1) + timedelta(
|
733
|
+
days=mday_num - 1
|
734
|
+
)
|
628
735
|
# Build datetime metadata used to generate time ranged file name
|
629
|
-
l0_datetime =
|
630
|
-
|
736
|
+
l0_datetime = "-".join(
|
737
|
+
[
|
738
|
+
start_time.strftime(TIME_DAILY_STRFORMAT),
|
739
|
+
mday_end.strftime(TIME_DAILY_STRFORMAT),
|
740
|
+
]
|
741
|
+
)
|
631
742
|
else:
|
632
|
-
l0_datetime =
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
743
|
+
l0_datetime = "-".join(
|
744
|
+
[
|
745
|
+
start_time.strftime(CDF_TRANGE_STRFORMAT),
|
746
|
+
end_time.strftime(CDF_TRANGE_STRFORMAT),
|
747
|
+
]
|
748
|
+
)
|
749
|
+
l0_parents = "L0>" + ", ".join(parents)
|
637
750
|
|
638
751
|
# Set CDF global attributes using first l0_file metadata in the list
|
639
|
-
with h5py.File(l0_file_list[0],
|
640
|
-
metadata = init_cdf_global(
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
752
|
+
with h5py.File(l0_file_list[0], "r") as l0:
|
753
|
+
metadata = init_cdf_global(
|
754
|
+
l0.attrs,
|
755
|
+
task,
|
756
|
+
master_path,
|
757
|
+
overwrite={
|
758
|
+
"Datetime": l0_datetime,
|
759
|
+
"Parents": l0_parents,
|
760
|
+
"File_ID": file_id,
|
761
|
+
"Generation_date": generation_date,
|
762
|
+
"Data_version": data_version,
|
763
|
+
"MODS": data_descr["mods"],
|
764
|
+
},
|
765
|
+
)
|
649
766
|
|
650
767
|
# Generate output CDF filename and open it
|
651
|
-
filepath = generate_filepath(
|
652
|
-
|
768
|
+
filepath = generate_filepath(
|
769
|
+
task, metadata, "cdf", is_cdag=is_cdag, overwrite=overwrite
|
770
|
+
)
|
653
771
|
|
654
772
|
# Get the instance of the output target
|
655
773
|
target = task.outputs[dataset_name]
|
656
774
|
|
657
775
|
# Add SPICE SCLK kernel as an entry
|
658
776
|
# of the "Kernels" g. attr
|
659
|
-
sclk_file = get_spice_kernels(
|
660
|
-
|
777
|
+
sclk_file = get_spice_kernels(
|
778
|
+
time_instance=time_instance, pattern="solo_ANC_soc-sclk"
|
779
|
+
)
|
661
780
|
if sclk_file:
|
662
|
-
metadata[
|
781
|
+
metadata["SPICE_KERNELS"] = sclk_file[-1]
|
663
782
|
else:
|
664
|
-
logger.warning(
|
665
|
-
|
783
|
+
logger.warning(
|
784
|
+
f"No SPICE SCLK kernel saved for {filepath} [{task.job_id}]"
|
785
|
+
)
|
666
786
|
|
667
787
|
# open the target to update its status according to errors etc
|
668
788
|
with target.activate():
|
669
|
-
#
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
789
|
+
# create the file for the CDF containing results
|
790
|
+
with CDF(filepath, master_path) as cdf:
|
791
|
+
try:
|
792
|
+
# write zVariable data and associated variable attributes in
|
793
|
+
# the CDF
|
794
|
+
time_min, time_max, nrec = put_cdf_zvars(
|
795
|
+
cdf, data, start_time=start_time, end_time=end_time
|
796
|
+
)
|
797
|
+
if nrec > 0:
|
798
|
+
# Update TIME_MIN, TIME_MAX (in julian days)
|
799
|
+
metadata["TIME_MIN"] = (
|
800
|
+
str(time_instance.tt2000_to_utc(time_min)) + "Z"
|
801
|
+
).replace(" ", "T")
|
802
|
+
metadata["TIME_MAX"] = (
|
803
|
+
str(time_instance.tt2000_to_utc(time_max)) + "Z"
|
804
|
+
).replace(" ", "T")
|
805
|
+
|
806
|
+
# write global attribute entries on the CDF
|
807
|
+
put_cdf_global(cdf, metadata)
|
808
|
+
else:
|
809
|
+
logger.warning(
|
810
|
+
f"No data found between {start_time} and {end_time}"
|
811
|
+
f" to be written into {filepath} [{task.job_id}]"
|
812
|
+
)
|
813
|
+
|
814
|
+
except Exception as e:
|
815
|
+
logger.exception(
|
816
|
+
f"{filepath} production has failed [{task.job_id}]:\n{e}"
|
817
|
+
)
|
818
|
+
cdf.attrs["Validate"] = "-1"
|
698
819
|
failed_files.append(filepath)
|
699
|
-
finally:
|
700
|
-
if cdf:
|
701
|
-
cdf.close()
|
702
820
|
|
703
821
|
if nrec == 0:
|
704
|
-
logger.info(f
|
822
|
+
logger.info(f"Removing empty file {filepath}... [{task.job_id}]")
|
705
823
|
os.remove(filepath)
|
706
|
-
filepath =
|
824
|
+
filepath = ""
|
707
825
|
elif os.path.isfile(filepath):
|
708
826
|
processed_files.append(filepath)
|
709
|
-
logger.info(f
|
827
|
+
logger.info(f"{filepath} saved [{task.job_id}]")
|
710
828
|
output_file_list.append(filepath)
|
711
829
|
else:
|
712
830
|
failed_files.append(filepath)
|
713
|
-
logger.error(f
|
831
|
+
logger.error(f"Writing {filepath} has failed! [{task.job_id}]")
|
714
832
|
|
715
833
|
# Set output target filepath
|
716
834
|
target.filepath = filepath
|
@@ -718,19 +836,21 @@ def l0_to_trange_cdf(task, task_name, l0_file_list, output_dir,
|
|
718
836
|
return output_file_list
|
719
837
|
|
720
838
|
|
721
|
-
def get_l0_trange(l0_files, minmax=False):
|
839
|
+
def get_l0_trange(l0_files: list, minmax: bool = False) -> tuple:
|
722
840
|
"""
|
723
841
|
Get start_time/end_time from an input list of L0 files.
|
724
842
|
|
725
|
-
|
726
843
|
:param l0_files: List of L0 files for which start_time/end_time must be extracted
|
844
|
+
:type l0_files: list
|
727
845
|
:param minmax: If True, return the minimal start_time value and maximal end_time value from over all the L0 files.
|
728
|
-
:
|
846
|
+
:type minmax: bool
|
847
|
+
:return: Input L0 files start_time/end_time (as datetime object)
|
848
|
+
:rtype: tuple
|
729
849
|
"""
|
730
850
|
|
731
851
|
if not isinstance(l0_files, list):
|
732
852
|
logger.error('Input "l0_files" must be a list!')
|
733
|
-
return None
|
853
|
+
return None, None
|
734
854
|
|
735
855
|
# Get number of l0_files
|
736
856
|
nl0 = len(l0_files)
|
@@ -740,17 +860,18 @@ def get_l0_trange(l0_files, minmax=False):
|
|
740
860
|
end_time = [None] * nl0
|
741
861
|
for i, l0_file in enumerate(l0_files):
|
742
862
|
try:
|
743
|
-
with h5py.File(l0_file,
|
744
|
-
|
863
|
+
with h5py.File(l0_file, "r") as l0:
|
745
864
|
# Get TIME_MIN/TIME_MAX L0 attributes value as datetime
|
746
865
|
start_time[i] = datetime.strptime(
|
747
|
-
l0.attrs[
|
866
|
+
l0.attrs["TIME_MIN"], TIME_ISO_STRFORMAT
|
867
|
+
)
|
748
868
|
end_time[i] = datetime.strptime(
|
749
|
-
l0.attrs[
|
750
|
-
|
751
|
-
|
869
|
+
l0.attrs["TIME_MAX"], TIME_ISO_STRFORMAT
|
870
|
+
)
|
871
|
+
except Exception as e:
|
872
|
+
logger.exception(f"Cannot parse {l0_file}: \n{e}")
|
752
873
|
|
753
874
|
if minmax:
|
754
|
-
return
|
875
|
+
return min(start_time), max(end_time)
|
755
876
|
else:
|
756
877
|
return start_time, end_time
|