roc-film 1.13.5__py3-none-any.whl → 1.14.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. roc/__init__.py +2 -1
  2. roc/film/__init__.py +2 -2
  3. roc/film/commands.py +372 -323
  4. roc/film/config/__init__.py +0 -1
  5. roc/film/constants.py +101 -65
  6. roc/film/descriptor.json +127 -96
  7. roc/film/exceptions.py +28 -27
  8. roc/film/tasks/__init__.py +16 -16
  9. roc/film/tasks/cat_solo_hk.py +86 -74
  10. roc/film/tasks/cdf_postpro.py +438 -309
  11. roc/film/tasks/check_dds.py +39 -45
  12. roc/film/tasks/db_to_anc_bia_sweep_table.py +381 -0
  13. roc/film/tasks/dds_to_l0.py +232 -180
  14. roc/film/tasks/export_solo_coord.py +147 -0
  15. roc/film/tasks/file_handler.py +91 -75
  16. roc/film/tasks/l0_to_hk.py +117 -103
  17. roc/film/tasks/l0_to_l1_bia_current.py +38 -30
  18. roc/film/tasks/l0_to_l1_bia_sweep.py +417 -329
  19. roc/film/tasks/l0_to_l1_sbm.py +250 -208
  20. roc/film/tasks/l0_to_l1_surv.py +185 -130
  21. roc/film/tasks/make_daily_tm.py +40 -37
  22. roc/film/tasks/merge_tcreport.py +77 -71
  23. roc/film/tasks/merge_tmraw.py +101 -88
  24. roc/film/tasks/parse_dds_xml.py +21 -20
  25. roc/film/tasks/set_l0_utc.py +51 -49
  26. roc/film/tests/cdf_compare.py +565 -0
  27. roc/film/tests/hdf5_compare.py +84 -62
  28. roc/film/tests/test_dds_to_l0.py +93 -51
  29. roc/film/tests/test_dds_to_tc.py +8 -11
  30. roc/film/tests/test_dds_to_tm.py +8 -10
  31. roc/film/tests/test_film.py +161 -116
  32. roc/film/tests/test_l0_to_hk.py +64 -36
  33. roc/film/tests/test_l0_to_l1_bia.py +10 -14
  34. roc/film/tests/test_l0_to_l1_sbm.py +14 -19
  35. roc/film/tests/test_l0_to_l1_surv.py +68 -41
  36. roc/film/tests/test_metadata.py +21 -20
  37. roc/film/tests/tests.py +743 -396
  38. roc/film/tools/__init__.py +5 -5
  39. roc/film/tools/dataset_tasks.py +34 -2
  40. roc/film/tools/file_helpers.py +390 -269
  41. roc/film/tools/l0.py +402 -324
  42. roc/film/tools/metadata.py +147 -127
  43. roc/film/tools/skeleton.py +12 -17
  44. roc/film/tools/tools.py +109 -92
  45. roc/film/tools/xlsx2skt.py +161 -139
  46. {roc_film-1.13.5.dist-info → roc_film-1.14.0.dist-info}/LICENSE +127 -125
  47. roc_film-1.14.0.dist-info/METADATA +60 -0
  48. roc_film-1.14.0.dist-info/RECORD +50 -0
  49. {roc_film-1.13.5.dist-info → roc_film-1.14.0.dist-info}/WHEEL +1 -1
  50. roc/film/tasks/l0_to_anc_bia_sweep_table.py +0 -348
  51. roc_film-1.13.5.dist-info/METADATA +0 -120
  52. roc_film-1.13.5.dist-info/RECORD +0 -48
@@ -4,17 +4,17 @@
4
4
  """
5
5
  Contains task to check for SolO MOC DDS files.
6
6
  """
7
+
7
8
  import os
8
9
  from glob import glob
9
10
 
10
- from edds_process.response import \
11
- count_packets
11
+ from edds_process.response import count_packets
12
12
 
13
13
  from poppy.core.task import Task
14
14
  from poppy.core.logger import logger
15
15
  from poppy.core.target import FileTarget
16
16
 
17
- __all__ = ['CheckDds']
17
+ __all__ = ["CheckDds"]
18
18
 
19
19
 
20
20
  class CheckDds(Task):
@@ -22,59 +22,59 @@ class CheckDds(Task):
22
22
  Task to check and load every SolO MOC DDS XML files found into
23
23
  a given input directory or list of files.
24
24
  """
25
- plugin_name = 'roc.film'
26
- name = 'check_dds'
25
+
26
+ plugin_name = "roc.film"
27
+ name = "check_dds"
27
28
 
28
29
  def add_targets(self):
29
- self.add_output(identifier='dds_xml_files', many=True,
30
- target_class=FileTarget)
30
+ self.add_output(identifier="dds_xml_files", many=True, target_class=FileTarget)
31
31
 
32
32
  def run(self):
33
-
34
33
  # Get optional filters from input keywords
35
- no_tmraw = self.pipeline.get('no_tmraw', default=False)
36
- no_tcreport = self.pipeline.get('no_tcreport', default=False)
34
+ no_tmraw = self.pipeline.get("no_tmraw", default=False)
35
+ no_tcreport = self.pipeline.get("no_tcreport", default=False)
37
36
 
38
37
  if no_tmraw and no_tcreport:
39
- logger.warning('"no-tmraw" and "no-tcreport" input keywords cannot be both '
40
- 'set to True')
38
+ logger.warning(
39
+ '"no-tmraw" and "no-tcreport" input keywords cannot be both set to True'
40
+ )
41
41
  self.pipeline.exit()
42
42
 
43
43
  # Get input directory containing DDS file(s)
44
- input_dir = self.pipeline.get('input_dir')
44
+ input_dir = self.pipeline.get("input_dir")
45
45
  if input_dir:
46
- self.dds_xml_list = glob(os.path.join(input_dir, '*.xml'))
47
- elif self.pipeline.get('dds_files'):
48
- self.dds_xml_list = self.pipeline.get('dds_files')
46
+ self.dds_xml_list = glob(os.path.join(input_dir, "*.xml"))
47
+ elif self.pipeline.get("dds_files"):
48
+ self.dds_xml_list = self.pipeline.get("dds_files")
49
49
  else:
50
- logger.warning(
51
- 'No valid input argument passed to the CheckDds Task')
50
+ logger.warning("No valid input argument passed to the CheckDds Task")
52
51
  self.pipeline.exit()
53
52
 
54
53
  # Filtering input DDS files
55
- self.dds_xml_list = self.filtering_dds(self.dds_xml_list,
56
- no_tcreport=no_tcreport,
57
- no_tmraw=no_tmraw)
54
+ self.dds_xml_list = self.filtering_dds(
55
+ self.dds_xml_list, no_tcreport=no_tcreport, no_tmraw=no_tmraw
56
+ )
58
57
 
59
58
  # Check if no DDS file
60
59
  self.dds_xml_num = len(self.dds_xml_list)
61
60
  if self.dds_xml_num == 0:
62
61
  self.dds_packet_num_list = []
63
62
  self.dds_packet_num = 0
64
- logger.warning(f'No input DDS file found')
63
+ logger.warning("No input DDS file found")
65
64
  else:
66
- logger.debug('Getting total number of packets...')
67
- self.dds_packet_num_list = [count_packets(current_dds)
68
- for current_dds in self.dds_xml_list]
65
+ logger.debug("Getting total number of packets...")
66
+ self.dds_packet_num_list = [
67
+ count_packets(current_dds) for current_dds in self.dds_xml_list
68
+ ]
69
69
  self.dds_packet_num = sum(self.dds_packet_num_list)
70
- logger.info(f'{self.dds_xml_num} DDS file(s) with {self.dds_packet_num} packets loaded')
70
+ logger.info(
71
+ f"{self.dds_xml_num} DDS file(s) with {self.dds_packet_num} packets loaded"
72
+ )
71
73
 
72
- self.outputs['dds_xml_files'].filepath = self.dds_xml_list
74
+ self.outputs["dds_xml_files"].filepath = self.dds_xml_list
73
75
 
74
76
  @staticmethod
75
- def filtering_dds(dds_xml_list,
76
- no_tmraw=False,
77
- no_tcreport=False):
77
+ def filtering_dds(dds_xml_list, no_tmraw=False, no_tcreport=False):
78
78
  """
79
79
  Filter input DDS files.
80
80
 
@@ -86,32 +86,26 @@ class CheckDds(Task):
86
86
 
87
87
  # Filtering input DDS file(s)
88
88
  for current_file in dds_xml_list:
89
- file_pattern = os.path.splitext(
90
- os.path.basename(current_file))[0].lower()
91
- if 'tmraw' in file_pattern and not no_tmraw:
89
+ file_pattern = os.path.splitext(os.path.basename(current_file))[0].lower()
90
+ if "tmraw" in file_pattern and not no_tmraw:
92
91
  output_list.append(current_file)
93
- logger.debug(f'{current_file} added to the list')
94
- elif 'tcreport' in file_pattern and not no_tcreport:
92
+ logger.debug(f"{current_file} added to the list")
93
+ elif "tcreport" in file_pattern and not no_tcreport:
95
94
  output_list.append(current_file)
96
- logger.debug(f'{current_file} added to the list')
95
+ logger.debug(f"{current_file} added to the list")
97
96
 
98
97
  return output_list
99
98
 
100
99
  def loop_generator(self, loop):
101
-
102
- # Get pipeline
103
- pipeline = loop.pipeline
104
-
105
100
  # Get list of dds XML files
106
- dds_xml_list = loop.inputs['dds_xml_files'].filepath
101
+ dds_xml_list = loop.inputs["dds_xml_files"].filepath
107
102
  if dds_xml_list is None or len(dds_xml_list) == 0:
108
- logger.info('No input DDS file for loop')
103
+ logger.info("No input DDS file for loop")
109
104
  return
110
105
 
111
106
  # Loop over each DDS file in the list
112
- for i, dds_file_target in enumerate(loop.inputs['dds_xml_files']):
113
-
107
+ for i, dds_file_target in enumerate(loop.inputs["dds_xml_files"]):
114
108
  # pass DDS to first task in the pipeline workflow loop
115
- dds_file_target.link('dds_xml')
109
+ dds_file_target.link("dds_xml")
116
110
 
117
111
  yield
@@ -0,0 +1,381 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """Contains task to create the RPW ANC Bias sweep table CDF files."""
5
+
6
+ import csv
7
+ import os
8
+ import uuid
9
+ from datetime import datetime
10
+ import pandas as pd
11
+ from sqlalchemy import and_
12
+
13
+ from poppy.core.logger import logger
14
+ from poppy.core import TargetFileNotSaved
15
+ from poppy.core.db.connector import Connector
16
+ from poppy.core.generic.cache import CachedProperty
17
+ from poppy.core.target import FileTarget
18
+ from poppy.core.task import Task
19
+
20
+ from roc.dingo.models.data import EventLog
21
+ from roc.dingo.tools import valid_time, query_db, get_columns
22
+
23
+ from roc.film import TIME_DAILY_STRFORMAT, TIME_ISO_STRFORMAT
24
+ from roc.film.constants import (
25
+ PIPELINE_DATABASE,
26
+ TRYOUTS,
27
+ SQL_LIMIT,
28
+ TIME_WAIT_SEC,
29
+ BIA_SWEEP_TABLE_PACKETS,
30
+ )
31
+ from roc.film.tools import get_datasets
32
+ from roc.film.tools.file_helpers import get_output_dir, is_output_dir, generate_filepath
33
+ from roc.film.tools.metadata import set_logical_file_id
34
+
35
+ __all__ = ["DbToAncBiaSweepTable"]
36
+
37
+
38
+ class DbToAncBiaSweepTable(Task):
39
+ """
40
+ Task to generate ANC bias sweep table file from pipeline.event_log table data.
41
+
42
+ For more information about the Bias sweeping, see section 'BIAS sweeping' of
43
+ the RPW DAS User Manual (RPW-SYS-MEB-DPS-NTT-000859-LES)
44
+
45
+ """
46
+
47
+ plugin_name = "roc.film"
48
+ name = "db_to_anc_bia_sweep_table"
49
+
50
+ csv_fieldnames = [
51
+ "TC_EXE_UTC_TIME",
52
+ "BIA_SWEEP_TABLE_CUR",
53
+ "EEPROM_LOADING",
54
+ "TC_NAME",
55
+ "TC_EXE_STATE",
56
+ ]
57
+
58
+ def add_targets(self):
59
+ self.add_output(target_class=FileTarget, identifier="anc_bia_sweep_table")
60
+
61
+ def setup_inputs(self):
62
+ # get a database session, table model and table columns (except primary key)
63
+ self.session = Connector.manager[PIPELINE_DATABASE].session
64
+ self.model = EventLog
65
+ self.columns = get_columns(self.model, remove=["id"])
66
+
67
+ # Get tryouts from pipeline properties
68
+ self.tryouts = self.pipeline.get("tryouts", default=[TRYOUTS], create=True)[0]
69
+
70
+ # Get wait from pipeline properties
71
+ self.wait = self.pipeline.get("wait", default=[TIME_WAIT_SEC], create=True)[0]
72
+
73
+ # Retrieve --limit keyword value
74
+ self.limit = self.pipeline.get(
75
+ "limit",
76
+ default=[SQL_LIMIT],
77
+ )[0]
78
+
79
+ # Get products directory (folder where final output files will be
80
+ # moved)
81
+ self.products_dir = self.pipeline.get(
82
+ "products_dir", default=[None], args=True
83
+ )[0]
84
+
85
+ # Get output dir
86
+ self.output_dir = get_output_dir(self.pipeline)
87
+ if not is_output_dir(self.output_dir, products_dir=self.products_dir):
88
+ logger.info(f"Making {self.output_dir}")
89
+ os.makedirs(self.output_dir)
90
+ else:
91
+ logger.debug(f"Output files will be saved into folder {self.output_dir}")
92
+
93
+ # Get or create failed_files list from pipeline properties
94
+ self.failed_files = self.pipeline.get("failed_files", default=[], create=True)
95
+
96
+ # Get or create processed_files list from pipeline properties
97
+ self.processed_files = self.pipeline.get(
98
+ "processed_files", default=[], create=True
99
+ )
100
+
101
+ # Get or create ignored_target list from pipeline properties
102
+ self.ignored_target = self.pipeline.get(
103
+ "ignored_target", default=[], create=True
104
+ )
105
+
106
+ # Get overwrite argument
107
+ self.overwrite = self.pipeline.get("overwrite", default=False, args=True)
108
+
109
+ # Get force optional keyword
110
+ self.force = self.pipeline.get("force", default=False, args=True)
111
+
112
+ # Retrieve output dataset to produce for the task (it should be one)
113
+ self.dataset = get_datasets(self, self.name)[0]
114
+ logger.debug(
115
+ f"Produce file(s) for the following dataset: {self.dataset['name']}"
116
+ )
117
+
118
+ # Get start_time input value
119
+ self.start_time = valid_time(
120
+ self.pipeline.get("start_time", default=[None])[0],
121
+ format=TIME_DAILY_STRFORMAT,
122
+ )
123
+
124
+ # Get end_time input value
125
+ self.end_time = valid_time(
126
+ self.pipeline.get("end_time", default=[None])[0],
127
+ format=TIME_DAILY_STRFORMAT,
128
+ )
129
+
130
+ # Define query filters for existing data in database
131
+ self.filters = []
132
+ if self.start_time:
133
+ self.filters.append(self.model.start_time >= str(self.start_time))
134
+ if self.end_time:
135
+ self.filters.append(self.model.end_time < str(self.end_time))
136
+
137
+ return True
138
+
139
+ @CachedProperty
140
+ def output_filepath(self):
141
+ # Build output filename using metadata
142
+ filename_items = {}
143
+ filename_items["File_naming_convention"] = (
144
+ "<Source_name>_<LEVEL>_<Descriptor>_<Datetime>_V<Data_version>"
145
+ )
146
+ filename_items["Source_name"] = "SOLO>Solar Orbiter"
147
+ filename_items["Descriptor"] = "RPW-BIA-SWEEP-TABLE>RPW Bias sweep table report"
148
+ filename_items["LEVEL"] = "ANC>Ancillary data"
149
+ filename_items["Data_version"] = self.dataset["version"]
150
+
151
+ filename_items["Datetime"] = (
152
+ self.start_time.strftime(TIME_DAILY_STRFORMAT)
153
+ + "-"
154
+ + self.end_time.strftime(TIME_DAILY_STRFORMAT)
155
+ )
156
+ filename_items["Logical_file_id"] = set_logical_file_id(filename_items)
157
+ return generate_filepath(
158
+ self,
159
+ filename_items,
160
+ "csv",
161
+ output_dir=self.output_dir,
162
+ overwrite=self.overwrite,
163
+ )
164
+
165
+ def run(self):
166
+ # Define task job ID (long and short)
167
+ self.job_uuid = str(uuid.uuid4())
168
+ self.job_id = f"{self.job_uuid[:8]}"
169
+ logger.info(f"Task {self.job_id} is starting")
170
+ try:
171
+ self.setup_inputs()
172
+ except Exception:
173
+ logger.exception(f"Initializing inputs has failed for {self.job_id}!")
174
+ try:
175
+ os.makedirs(os.path.join(self.output_dir, "failed"))
176
+ except Exception:
177
+ logger.error("output_dir argument is not defined!")
178
+ self.pipeline.exit()
179
+ return
180
+
181
+ # First retrieve sweep table data from pipeline.event_log table
182
+ self.filters.append(self.model.label.in_(BIA_SWEEP_TABLE_PACKETS))
183
+ logger.debug(
184
+ f"Getting existing event_log data between {self.start_time} and {self.end_time}"
185
+ )
186
+ # Return existing data as a pandas.DataFrame object
187
+ table_data = query_db(
188
+ self.session,
189
+ self.model,
190
+ filters=and_(*self.filters),
191
+ tryouts=self.tryouts,
192
+ wait=self.wait,
193
+ limit=self.limit,
194
+ )
195
+ n_data = table_data.shape[0]
196
+ if n_data == 0:
197
+ logger.warning("No sweep table TC found in the database")
198
+ return
199
+ else:
200
+ logger.info(f"{n_data} sweep table TCs found in the database")
201
+ # Prepare table data to be saved in the CSV file
202
+ table_data = DbToAncBiaSweepTable.prep_sweep_table(table_data)
203
+
204
+ # Convert current list to string values separated by ;
205
+ table_data["BIA_SWEEP_TABLE_CUR"] = table_data["BIA_SWEEP_TABLE_CUR"].apply(
206
+ lambda x: na_to_str(x)
207
+ )
208
+
209
+ if not self.start_time:
210
+ self.start_time = table_data["TC_EXE_UTC_TIME"].min()
211
+ self.end_time = table_data["TC_EXE_UTC_TIME"].max()
212
+
213
+ # Write output CSV file
214
+ output_filepath = self.output_filepath
215
+ logger.info(f"Writing {output_filepath}...")
216
+ try:
217
+ with open(output_filepath, "w", newline="") as csvfile:
218
+ table_data.to_csv(csvfile, sep=",")
219
+ except Exception:
220
+ if output_filepath not in self.failed_files:
221
+ self.failed_files.append(output_filepath)
222
+ raise TargetFileNotSaved(
223
+ "Anc Bias sweep table csv file production has failed!"
224
+ )
225
+
226
+ if not os.path.isfile(output_filepath):
227
+ if output_filepath not in self.failed_files:
228
+ self.failed_files.append(output_filepath)
229
+ raise FileNotFoundError(f"{output_filepath} not found")
230
+ else:
231
+ logger.info(f"{output_filepath} saved")
232
+ if output_filepath not in self.processed_files:
233
+ self.processed_files.append(output_filepath)
234
+
235
+ self.outputs["anc_bia_sweep_table"] = output_filepath
236
+
237
+ @staticmethod
238
+ def parse_bia_sweep_table_file(sweep_table_file):
239
+ """
240
+ Parse an input bia sweep table CSV file
241
+
242
+ :param sweep_table_file: File to parse
243
+ :return: list of sweep tables
244
+ """
245
+
246
+ # Initialize output list
247
+ sweep_table_list = []
248
+
249
+ if not os.path.isfile(sweep_table_file):
250
+ logger.error(f"{sweep_table_file} not found!")
251
+ else:
252
+ # Read file and store in output list
253
+ with open(sweep_table_file, "r", newline="") as csv_file:
254
+ reader = csv.DictReader(csv_file)
255
+
256
+ # Loop over rows
257
+ for row in reader:
258
+ row["TC_EXE_UTC_TIME"] = datetime.strptime(
259
+ row["TC_EXE_UTC_TIME"], TIME_ISO_STRFORMAT
260
+ )
261
+ row["BIA_SWEEP_TABLE_CUR"] = row["BIA_SWEEP_TABLE_CUR"].split(";")
262
+ sweep_table_list.append(row)
263
+
264
+ return sweep_table_list
265
+
266
+ @staticmethod
267
+ def get_latest_sweep_table(current_time, sweep_table_list):
268
+ """
269
+ Get the latest sweep table for a given datetime
270
+
271
+ :param current_time: Time for which sweep table must be returned (datetime object)
272
+ :param sweep_table_list: list of sweep tables (pandas.DataFrame)
273
+ :return: row of the sweep table list
274
+ """
275
+
276
+ w = (sweep_table_list["TC_EXE_STATE"] == "PASSED") & (
277
+ sweep_table_list["TC_EXE_UTC_TIME"] <= current_time
278
+ )
279
+ output_table = sweep_table_list[w]
280
+
281
+ if output_table.shape[0] > 0:
282
+ output_table = output_table.iloc[-1]
283
+
284
+ return output_table
285
+
286
+ @staticmethod
287
+ def prep_sweep_table(table_data):
288
+ """
289
+ Preprocess sweep table data coming from pipeline.event_log
290
+ to be compatible with output CSV content
291
+
292
+ :param table_data: Sweep table data
293
+ extracted from event_log table
294
+ (Pandas.DataFrame object as returned by query_db())
295
+ :return: modified table_data
296
+ """
297
+ n_data = table_data.shape[0]
298
+ # First, sort by ascending start_time
299
+ table_data.sort_values(by=["start_time"], inplace=True, ignore_index=True)
300
+
301
+ # rename some columns
302
+ table_data.rename(
303
+ columns={
304
+ "start_time": "TC_EXE_UTC_TIME",
305
+ "label": "TC_NAME",
306
+ },
307
+ inplace=True,
308
+ )
309
+
310
+ # add columns for TC_EXE_STATE, EEPROM_LOADING and BIA_SWEEP_TABLE_CUR
311
+ new_data = {
312
+ "TC_EXE_STATE": ["PASSED"] * n_data,
313
+ "EEPROM_LOADING": [
314
+ int(row["sweep_eeprom"] is True) for row in table_data["description"]
315
+ ],
316
+ "BIA_SWEEP_TABLE_CUR": [
317
+ row["sweep_step_na"] for row in table_data["description"]
318
+ ],
319
+ }
320
+ table_data = pd.concat([table_data, pd.DataFrame.from_dict(new_data)], axis=1)
321
+
322
+ # delete unwanted columns
323
+ for table_col in [
324
+ "id",
325
+ "end_time",
326
+ "is_predictive",
327
+ "description",
328
+ "insert_time",
329
+ ]:
330
+ del table_data[table_col]
331
+
332
+ # Convert current values from string to float
333
+ table_data["BIA_SWEEP_TABLE_CUR"] = table_data["BIA_SWEEP_TABLE_CUR"].apply(
334
+ str_to_float
335
+ )
336
+
337
+ return table_data
338
+
339
+
340
+ def na_to_str(bia_current):
341
+ """
342
+
343
+ :param bia_current:
344
+ :return:
345
+ """
346
+ if not bia_current:
347
+ return ""
348
+ else:
349
+ return ";".join([str(current) for current in bia_current])
350
+
351
+
352
+ def str_to_float(str_values):
353
+ """
354
+ Convert string(s) to float(s)
355
+ (with some extra rules)
356
+
357
+ :param str_values: string value(s) to convert
358
+ :return: converted float value(s)
359
+ """
360
+ if not str_values:
361
+ return None
362
+
363
+ is_list = isinstance(str_values, list)
364
+ if not is_list:
365
+ out_values = [str_values]
366
+ else:
367
+ out_values = str_values
368
+
369
+ for i, out_value in enumerate(out_values):
370
+ if out_value:
371
+ if out_value == "nan":
372
+ out_values[i] = None
373
+ else:
374
+ out_values[i] = float(out_value)
375
+ else:
376
+ out_values[i] = None
377
+
378
+ if not is_list:
379
+ out_values = out_values[0]
380
+
381
+ return out_values