AeroViz 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (39) hide show
  1. AeroViz/dataProcess/Chemistry/__init__.py +21 -20
  2. AeroViz/dataProcess/Chemistry/_isoropia.py +9 -12
  3. AeroViz/dataProcess/Chemistry/_mass_volume.py +4 -3
  4. AeroViz/dataProcess/Chemistry/_ocec.py +20 -45
  5. AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
  6. AeroViz/dataProcess/Optical/_IMPROVE.py +2 -3
  7. AeroViz/dataProcess/Optical/fRH.pkl +0 -0
  8. AeroViz/dataProcess/SizeDistr/__init__.py +6 -10
  9. AeroViz/dataProcess/VOC/__init__.py +1 -6
  10. AeroViz/dataProcess/VOC/_potential_par.py +71 -37
  11. AeroViz/dataProcess/VOC/{voc_par.json → support_voc.json} +321 -339
  12. AeroViz/rawDataReader/__init__.py +52 -5
  13. AeroViz/rawDataReader/config/supported_instruments.py +45 -53
  14. AeroViz/rawDataReader/core/__init__.py +113 -98
  15. AeroViz/rawDataReader/script/AE33.py +3 -3
  16. AeroViz/rawDataReader/script/AE43.py +2 -2
  17. AeroViz/rawDataReader/script/APS_3321.py +4 -4
  18. AeroViz/rawDataReader/script/Aurora.py +5 -2
  19. AeroViz/rawDataReader/script/BC1054.py +2 -2
  20. AeroViz/rawDataReader/script/EPA_vertical.py +2 -2
  21. AeroViz/rawDataReader/script/GRIMM.py +4 -4
  22. AeroViz/rawDataReader/script/IGAC.py +2 -2
  23. AeroViz/rawDataReader/script/MA350.py +2 -2
  24. AeroViz/rawDataReader/script/Minion.py +2 -2
  25. AeroViz/rawDataReader/script/NEPH.py +9 -14
  26. AeroViz/rawDataReader/script/{Sunset_OCEC.py → OCEC.py} +24 -18
  27. AeroViz/rawDataReader/script/SMPS.py +76 -0
  28. AeroViz/rawDataReader/script/TEOM.py +2 -2
  29. AeroViz/rawDataReader/script/Table.py +3 -3
  30. AeroViz/rawDataReader/script/VOC.py +16 -9
  31. AeroViz/rawDataReader/script/__init__.py +2 -4
  32. {AeroViz-0.1.4.dist-info → AeroViz-0.1.6.dist-info}/METADATA +13 -10
  33. {AeroViz-0.1.4.dist-info → AeroViz-0.1.6.dist-info}/RECORD +36 -36
  34. AeroViz/rawDataReader/script/SMPS_TH.py +0 -41
  35. AeroViz/rawDataReader/script/SMPS_aim11.py +0 -51
  36. AeroViz/rawDataReader/script/SMPS_genr.py +0 -51
  37. {AeroViz-0.1.4.dist-info → AeroViz-0.1.6.dist-info}/LICENSE +0 -0
  38. {AeroViz-0.1.4.dist-info → AeroViz-0.1.6.dist-info}/WHEEL +0 -0
  39. {AeroViz-0.1.4.dist-info → AeroViz-0.1.6.dist-info}/top_level.txt +0 -0
@@ -12,20 +12,67 @@ def RawDataReader(instrument_name: str,
12
12
  qc: bool = True,
13
13
  csv_raw: bool = True,
14
14
  reset: bool = False,
15
- rate: bool = False,
15
+ rate: bool = True,
16
16
  append_data: bool = False,
17
17
  start: datetime | None = None,
18
18
  end: datetime | None = None,
19
19
  mean_freq='1h',
20
20
  csv_out=True,
21
21
  ):
22
+ """
23
+ Factory function to instantiate the appropriate reader module for a given instrument and
24
+ return the processed data over the specified time range.
25
+
26
+ Parameters
27
+ ----------
28
+ instrument_name : str
29
+ The name of the instrument for which to read data. Must be a valid key in the `meta` dictionary.
30
+ path : Path
31
+ The directory where raw data files for the instrument are stored.
32
+ qc : bool, optional (default=True)
33
+ If True, apply quality control (QC) to the raw data.
34
+ csv_raw : bool, optional (default=True)
35
+ If True, read raw data from CSV files.
36
+ reset : bool, optional (default=False)
37
+ If True, reset the state and reprocess the data from scratch.
38
+ rate : bool, optional (default=False)
39
+ If True, calculate rates from the data.
40
+ append_data : bool, optional (default=False)
41
+ If True, append new data to the existing dataset instead of overwriting it.
42
+ start : datetime, optional
43
+ Start time for filtering the data. If None, no start time filtering will be applied.
44
+ end : datetime, optional
45
+ End time for filtering the data. If None, no end time filtering will be applied.
46
+ mean_freq : str, optional (default='1h')
47
+ Resampling frequency for averaging the data. Example: '1h' for hourly mean.
48
+ csv_out : bool, optional (default=True)
49
+ If True, output the processed data as a CSV file.
50
+
51
+ Return
52
+ ------
53
+ reader_module : Reader
54
+ An instance of the reader module corresponding to the specified instrument, which processes
55
+ the data and returns it in a usable format.
56
+
57
+ Raises
58
+ ------
59
+ ValueError
60
+ If the `instrument_name` provided is not a valid key in the `meta` dictionary.
61
+
62
+ Example
63
+ -------
64
+ To read and process data for the BC1054 instrument:
65
+
66
+ >>> from pathlib import Path
67
+ >>> from datetime import datetime
68
+ >>> data = RawDataReader(instrument_name='BC1054', path=Path('/path/to/data'),
69
+ >>> start=datetime(2024, 1, 1), end=datetime(2024, 2, 1))
70
+ """
22
71
  # Mapping of instrument names to their respective classes
23
72
  instrument_class_map = {
24
73
  'NEPH': NEPH,
25
74
  'Aurora': Aurora,
26
- 'SMPS_genr': SMPS_genr,
27
- 'SMPS_aim11': SMPS_aim11,
28
- 'SMPS_TH': SMPS_TH,
75
+ 'SMPS': SMPS,
29
76
  'GRIMM': GRIMM,
30
77
  'APS_3321': APS_3321,
31
78
  'AE33': AE33,
@@ -33,7 +80,7 @@ def RawDataReader(instrument_name: str,
33
80
  'BC1054': BC1054,
34
81
  'MA350': MA350,
35
82
  'TEOM': TEOM,
36
- 'Sunset_OCEC': Sunset_OCEC,
83
+ 'OCEC': OCEC,
37
84
  'IGAC': IGAC,
38
85
  'VOC': VOC,
39
86
  'Table': Table,
@@ -2,76 +2,64 @@
2
2
 
3
3
  meta = {
4
4
  "NEPH": {
5
- "pattern": "*.dat",
5
+ "pattern": ["*.dat"],
6
6
  "freq": "5min",
7
7
  "deter_key": {"Scatter Coe. (550 nm)": ["G"]},
8
8
  },
9
9
 
10
10
  "Aurora": {
11
- "pattern": "*.csv",
11
+ "pattern": ["*.csv"],
12
12
  "freq": "1min",
13
13
  "deter_key": {"Scatter Coe. (550 nm)": ["G"]},
14
14
  },
15
15
 
16
- "SMPS_TH": {
17
- "pattern": "*.txt",
18
- "freq": "6min",
19
- "deter_key": {"Bins": ["all"]},
20
- },
21
-
22
- "SMPS_genr": {
23
- "pattern": "*.txt",
24
- "freq": "6min",
25
- "deter_key": {"Bins": ["all"]},
26
- },
27
-
28
- "SMPS_aim11": {
29
- "pattern": "*.csv",
16
+ "SMPS": {
17
+ "pattern": ["*.txt", "*.csv"],
30
18
  "freq": "6min",
31
19
  "deter_key": {"Bins": ["all"]},
32
20
  },
33
21
 
34
22
  "GRIMM": {
35
- "pattern": "*.dat",
23
+ "pattern": ["*.dat"],
36
24
  "freq": "6min",
37
25
  "deter_key": {"Bins": ["all"]},
38
26
  },
39
27
 
40
28
  "APS_3321": {
41
- "pattern": "*.TXT",
29
+ "pattern": ["*.txt"],
42
30
  "freq": "6min",
43
31
  "deter_key": {"Bins": ["all"]},
44
32
  },
45
33
 
46
34
  "AE33": {
47
- "pattern": "[!ST|!CT|!FV]*[!log]_AE33*.dat",
35
+ "pattern": ["[!ST|!CT|!FV]*[!log]_AE33*.dat"],
48
36
  "freq": "1min",
49
37
  "deter_key": {"BC Mass Conc. (880 nm)": ["BC6"]},
50
38
  "error_state": [],
51
39
  },
52
40
 
53
41
  "AE43": {
54
- "pattern": "[!ST|!CT|!FV]*[!log]_AE43*.dat",
42
+ "pattern": ["[!ST|!CT|!FV]*[!log]_AE43*.dat"],
55
43
  "freq": "1min",
56
44
  "deter_key": {"BC Mass Conc. (880 nm)": ["BC6"]},
57
45
  "error_state": [],
58
46
  },
59
47
 
60
48
  "BC1054": {
61
- "pattern": "*.csv",
49
+ "pattern": ["*.csv"],
62
50
  "freq": "1min",
63
51
  "deter_key": {"BC Mass Conc. (880 nm)": ["BC9"]},
64
52
  "error_state": [1, 2, 4, 8, 16, 32, 65536],
65
53
  },
66
54
 
67
55
  "MA350": {
68
- "pattern": "*.csv",
56
+ "pattern": ["*.csv"],
69
57
  "freq": "1min",
70
58
  "deter_key": {"BC Mass Conc. (880 nm)": ["BC5"]},
71
59
  },
72
60
 
73
61
  "TEOM": {
74
- "pattern": "*.csv",
62
+ "pattern": ["*.csv"],
75
63
  "freq": "6min",
76
64
  "deter_key": {
77
65
  "PM1.0 Mass Conc.": ["PM_Total"],
@@ -79,21 +67,19 @@ meta = {
79
67
  },
80
68
  },
81
69
 
82
- "Sunset_OCEC": {
83
- "pattern": "*LCRes.csv",
70
+ "OCEC": {
71
+ "pattern": ["*LCRes.csv"],
84
72
  "freq": "1h",
85
73
  "deter_key": {
86
- "Thermal OC/EC": ["Thermal_EC", "Thermal_OC"],
87
74
  "Thermal OC": ["Thermal_OC"],
88
75
  "Thermal EC": ["Thermal_EC"],
89
- "Optical OC/EC": ["Optical_EC", "Optical_OC"],
90
76
  "Optical OC": ["Optical_OC"],
91
77
  "Optical EC": ["Optical_EC"],
92
78
  },
93
79
  },
94
80
 
95
81
  "IGAC": {
96
- "pattern": "*.csv",
82
+ "pattern": ["*.csv"],
97
83
  "freq": "1h",
98
84
  "deter_key": {
99
85
  "Na+": ["Na+"],
@@ -110,46 +96,52 @@ meta = {
110
96
  },
111
97
 
112
98
  "VOC": {
113
- "pattern": "*.csv",
99
+ "pattern": ["*.csv"],
114
100
  "freq": "1h",
115
- "key": ['Ethane', 'Propane', 'Isobutane', 'n-Butane', 'Cyclopentane', 'Isopentane',
116
- 'n-Pentane', '2,2-Dimethylbutane', '2,3-Dimethylbutane', '2-Methylpentane',
117
- '3-Methylpentane', 'n-Hexane', 'Methylcyclopentane', '2,4-Dimethylpentane',
118
- 'Cyclohexane', '2-Methylhexane', '2-Methylhexane', '3-Methylheptane',
119
- '2,2,4-Trimethylpentane', 'n-Heptane', 'Methylcyclohexane',
120
- '2,3,4-Trimethylpentane', '2-Methylheptane', '3-Methylhexane', 'n-Octane',
121
- 'n-Nonane', 'n-Decane', 'n-Undecane', 'Ethylene', 'Propylene', 't-2-Butene',
122
- '1-Butene', 'cis-2-Butene', 't-2-Pentene', '1-Pentene', 'cis-2-Pentene',
123
- 'isoprene', 'Acetylene', 'Benzene', 'Toluene', 'Ethylbenzene', 'm,p-Xylene',
124
- 'Styrene', 'o-Xylene', 'Isopropylbenzene', 'n-Propylbenzene', 'm-Ethyltoluene',
125
- 'p-Ethyltoluene', '1,3,5-Trimethylbenzene', 'o-Ethyltoluene',
126
- '1,2,4-Trimethylbenzene', '1,2,3-Trimethylbenzene', 'm-Diethylbenzene',
127
- 'p-Diethylbenzene'],
128
-
129
- "key_2": ['Isopentane', 'Hexane', '2-Methylhexane', '3-Methylhexane', '2-Methylheptane', '3-Methylheptane',
130
- 'Propene', '1.3-Butadiene', 'Isoprene', '1-Octene',
131
- 'Benzene', 'Toluene', 'Ethylbenzene', 'm.p-Xylene', 'o-Xylene', 'Iso-Propylbenzene', 'Styrene',
132
- 'n-Propylbenzene', '3.4-Ethyltoluene', '1.3.5-TMB', '2-Ethyltoluene', '1.2.4-TMB', '1.2.3-TMB',
133
- 'Acetaldehyde', 'Ethanol', 'Acetone', 'IPA', 'Ethyl Acetate', 'Butyl Acetate',
134
- 'VCM', 'TCE', 'PCE', '1.4-DCB', '1.2-DCB'],
101
+ "key": [
102
+ 'Benzene', 'Toluene', 'Ethylbenzene', 'm/p-Xylene', 'o-Xylene', 'Ethane', 'Propane', 'Isobutane',
103
+ 'n-Butane', 'Isopentane', 'n-Pentane', 'n-Hexane', 'n-Heptane', 'n-Octane', 'n-Nonane', 'n-Decane',
104
+ 'n-Undecane', 'n-Dodecane', 'Ethylene', 'Propylene', '1-Butene', 't-2-Butene', 'cis-2-Butene',
105
+ '1-Pentene', 't-2-Pentene', 'cis-2-Pentene', '1-Hexene', 'Acetylene', 'Cyclopentane', 'Methylcyclopentane',
106
+ 'Cyclohexane', 'Methylcyclohexane', 'Isoprene', '2,2-Dimethylbutane', '2,3-Dimethylbutane',
107
+ '2-Methylpentane', '3-Methylpentane', '2,4-Dimethylpentane', '2-Methylhexane', '2,3-Dimethylpentane',
108
+ '3-Methylheptane', '2,2,4-Trimethylpentane', '2,3,4-Trimethylpentane', '2-Methylheptane', '3-Methylhexane',
109
+ 'Styrene', 'Isopropylbenzene', 'n-Propylbenzene', 'm-Ethyltoluene', 'p-Ethyltoluene', 'm-Diethylbenzene',
110
+ 'p-Diethylbenzene', '1,3,5-Trimethylbenzene', 'o-Ethyltoluene', '1,2,4-Trimethylbenzene',
111
+ '1,2,3-Trimethylbenzene',
112
+ '1.2-DCB', '1.4-DCB', '1.3-Butadiene', '1-Octene', '2-Ethyltoluene', '3.4-Ethyltoluene', 'Acetaldehyde',
113
+ 'Acetone', 'Butyl Acetate', 'Ethanol', 'Ethyl Acetate', 'Hexane', 'IPA', 'Iso-Propylbenzene',
114
+ 'PCE', 'Propene', 'TCE', 'VCM',
115
+ ],
135
116
  "deter_key": None,
136
117
  },
137
118
 
138
119
  "Table": {
139
- "pattern": "*.csv",
120
+ "pattern": ["*.csv"],
140
121
  "freq": "1h",
141
122
  "deter_key": None,
142
123
  },
143
124
 
144
125
  "EPA_vertical": {
145
- "pattern": "*.csv",
126
+ "pattern": ["*.csv"],
146
127
  "freq": "1h",
147
128
  "deter_key": None,
148
129
  },
149
130
 
150
131
  "Minion": {
151
- "pattern": "*.csv",
132
+ "pattern": ["*.csv"],
152
133
  "freq": "1h",
153
- "deter_key": None,
134
+ "deter_key": {
135
+ "Na+": ["Na+"],
136
+ "NH4+": ["NH4+"],
137
+ "K+": ["K+"],
138
+ "Mg2+": ["Mg2+"],
139
+ "Ca2+": ["Ca2+"],
140
+ "Cl-": ["Cl-"],
141
+ "NO2-": ["NO2-"],
142
+ "NO3-": ["NO3-"],
143
+ "SO42-": ["SO42-"],
144
+ "Main Salt (NH4+, NO3-, SO42-)": ["NO3-", "SO42-", "NH4+"],
145
+ },
154
146
  },
155
147
  }
@@ -1,4 +1,4 @@
1
- import json as jsn
1
+ import json
2
2
  import logging
3
3
  import pickle as pkl
4
4
  from abc import ABC, abstractmethod
@@ -7,29 +7,37 @@ from pathlib import Path
7
7
  from typing import Any
8
8
 
9
9
  import numpy as np
10
+ import pandas as pd
10
11
  from pandas import DataFrame, date_range, concat, to_numeric, to_datetime
12
+ from rich.console import Console
13
+ from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, TaskProgressColumn
11
14
 
12
15
  from ..config.supported_instruments import meta
13
16
 
14
17
  __all__ = ['AbstractReader']
15
18
 
16
19
 
20
+ console = Console(force_terminal=True, color_system="auto")
21
+
22
+
17
23
  class AbstractReader(ABC):
18
- nam = 'AbstractReader'
24
+ """
25
+ Abstract class for reading raw data from different instruments. Each instrument should have a separate class that
26
+ inherits from this class and implements the abstract methods. The abstract methods are `_raw_reader` and `_QC`.
19
27
 
20
- # initial data
21
- # input : file path, reset switch
28
+ List the file in the path and read pickle file if it exists, else read raw data and dump the pickle file the
29
+ pickle file will be generated after read raw data first time, if you want to re-read the rawdata, please set
30
+ 'reset=True'
31
+ """
22
32
 
23
- # list the file in the path and read pickle file if it exists, else read raw data and dump the pickle file the
24
- # pickle file will be generated after read raw data first time, if you want to re-read the rawdata, please set
25
- # 'reset=True'
33
+ nam = 'AbstractReader'
26
34
 
27
35
  def __init__(self,
28
36
  path: Path | str,
29
37
  qc: bool = True,
30
38
  csv_raw: bool = True,
31
39
  reset: bool = False,
32
- rate: bool = False,
40
+ rate: bool = True,
33
41
  append_data: bool = False):
34
42
 
35
43
  self.path = Path(path)
@@ -40,7 +48,7 @@ class AbstractReader(ABC):
40
48
  self.rate = rate
41
49
  self.qc = qc
42
50
  self.csv = csv_raw
43
- self.apnd = append_data & reset
51
+ self.append = append_data and reset
44
52
 
45
53
  self.pkl_nam = self.path / f'_read_{self.nam.lower()}.pkl'
46
54
  self.csv_nam = self.path / f'_read_{self.nam.lower()}.csv'
@@ -48,21 +56,12 @@ class AbstractReader(ABC):
48
56
  self.csv_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.csv'
49
57
  self.csv_out = self.path / f'output_{self.nam.lower()}.csv'
50
58
 
51
- # dependency injection function, customize each instrument
52
- @abstractmethod
53
- def _raw_reader(self, _file):
54
- pass
55
-
56
- @abstractmethod
57
- def _QC(self, df: DataFrame):
58
- return df
59
-
60
59
  def __call__(self,
61
60
  start: dtm | None = None,
62
61
  end: dtm | None = None,
63
62
  mean_freq: str = '1h',
64
63
  csv_out: bool = True,
65
- ) -> DataFrame | None:
64
+ ) -> DataFrame:
66
65
 
67
66
  if start and end and end <= start:
68
67
  raise ValueError(f"Invalid time range: start {start} is after end {end}")
@@ -77,6 +76,14 @@ class AbstractReader(ABC):
77
76
 
78
77
  return data
79
78
 
79
+ @abstractmethod
80
+ def _raw_reader(self, file):
81
+ pass
82
+
83
+ @abstractmethod
84
+ def _QC(self, df: DataFrame):
85
+ return df
86
+
80
87
  @staticmethod
81
88
  def basic_QC(df: DataFrame):
82
89
  df_ave, df_std = df.mean(), df.std()
@@ -84,40 +91,25 @@ class AbstractReader(ABC):
84
91
 
85
92
  return df.mask(df_lowb | df_highb).copy()
86
93
 
87
- # set each to true datetime(18:30:01 -> 18:30:00) and rindex data
88
- def _raw_process(self, _df):
89
- # get time from df and set time to whole time to create time index
90
- _st, _ed = _df.index.sort_values()[[0, -1]]
91
- _tm_index = date_range(_st.strftime('%Y%m%d %H00'), _ed.floor('h').strftime('%Y%m%d %H00'),
92
- freq=self.meta['freq'])
93
- _tm_index.name = 'time'
94
-
95
- return _df.apply(to_numeric, errors='coerce').resample(self.meta['freq']).mean().reindex(_tm_index)
96
-
97
94
  def _setup_logger(self) -> logging.Logger:
98
95
  logger = logging.getLogger(self.nam)
99
96
  logger.setLevel(logging.INFO)
97
+
98
+ for handler in logger.handlers[:]:
99
+ logger.removeHandler(handler)
100
+
100
101
  handler = logging.FileHandler(self.path / f'{self.nam}.log')
101
102
  handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s'))
102
103
  logger.addHandler(handler)
103
104
  return logger
104
105
 
105
- # acquisition rate and yield rate
106
- def _rate_calculate(self, _fout_raw, _fout_qc, _st_raw, _ed_raw):
106
+ def _rate_calculate(self, _fout_raw, _fout_qc, _st_raw, _ed_raw) -> None:
107
107
  if self.meta['deter_key'] is not None:
108
108
  _start, _end = _fout_qc.index[[0, -1]]
109
109
 
110
110
  _drop_how = 'any'
111
111
  _the_size = len(_fout_raw.resample('1h').mean().index)
112
112
 
113
- self.logger.info(f"{'=' * 60}")
114
- self.logger.info(
115
- f"Raw data time : {_st_raw.strftime('%Y-%m-%d %H:%M:%S')} ~ {_ed_raw.strftime('%Y-%m-%d %H:%M:%S')}")
116
- self.logger.info(
117
- f"Output time : {_start.strftime('%Y-%m-%d %H:%M:%S')} ~ {_end.strftime('%Y-%m-%d %H:%M:%S')}")
118
- self.logger.info(f"{'-' * 60}")
119
- print(f"\n\n\t\tfrom {_start.strftime('%Y-%m-%d %H:%M:%S')} to {_end.strftime('%Y-%m-%d %H:%M:%S')}\n")
120
-
121
113
  for _nam, _key in self.meta['deter_key'].items():
122
114
  if _key == ['all']:
123
115
  _key, _drop_how = _fout_qc.keys(), 'all'
@@ -136,9 +128,19 @@ class AbstractReader(ABC):
136
128
  self.logger.info(f'\tYield rate: {_yid_rate}%')
137
129
  self.logger.info(f"{'=' * 60}")
138
130
 
139
- print(f'\t\t{_nam} : ')
140
- print(f'\t\t\tacquisition rate : \033[91m{_acq_rate}%\033[0m')
141
- print(f'\t\t\tyield rate : \033[91m{_yid_rate}%\033[0m')
131
+ print(f'\n\t{_nam} : ')
132
+ print(f'\t\tacquisition rate : \033[91m{_acq_rate}%\033[0m')
133
+ print(f'\t\tyield rate : \033[91m{_yid_rate}%\033[0m')
134
+
135
+ # set each to true datetime(18:30:01 -> 18:30:00) and rindex data
136
+ def _raw_process(self, _df):
137
+ # get time from df and set time to whole time to create time index
138
+ _st, _ed = _df.index.sort_values()[[0, -1]]
139
+ _tm_index = date_range(_st.strftime('%Y%m%d %H00'), _ed.floor('h').strftime('%Y%m%d %H00'),
140
+ freq=self.meta['freq'])
141
+ _tm_index.name = 'time'
142
+
143
+ return _df.apply(to_numeric, errors='coerce').resample(self.meta['freq']).mean().reindex(_tm_index)
142
144
 
143
145
  # process time index
144
146
  @staticmethod
@@ -152,7 +154,7 @@ class AbstractReader(ABC):
152
154
 
153
155
  # append new data to exist pkl
154
156
  @staticmethod
155
- def _apnd_prcs(_df_done, _df_apnd):
157
+ def _append_process(_df_done, _df_apnd):
156
158
 
157
159
  if _df_apnd is not None:
158
160
  _df = concat([_df_apnd.dropna(how='all').copy(), _df_done.dropna(how='all').copy()])
@@ -164,16 +166,16 @@ class AbstractReader(ABC):
164
166
 
165
167
  return _df_done
166
168
 
167
- # remove outlier
168
- def _outlier_prcs(self, _df):
169
+ def _outlier_process(self, _df):
170
+ outlier_file = self.path / 'outlier.json'
169
171
 
170
- if (self.path / 'outlier.json') not in self.path.glob('*.json'):
172
+ if not outlier_file.exists():
171
173
  return _df
172
174
 
173
- with (self.path / 'outlier.json').open('r', encoding='utf-8', errors='ignore') as f:
174
- self.outlier = jsn.load(f)
175
+ with outlier_file.open('r', encoding='utf-8', errors='ignore') as f:
176
+ outliers = json.load(f)
175
177
 
176
- for _st, _ed in self.outlier.values():
178
+ for _st, _ed in outliers.values():
177
179
  _df.loc[_st:_ed] = np.nan
178
180
 
179
181
  return _df
@@ -191,14 +193,13 @@ class AbstractReader(ABC):
191
193
 
192
194
  @staticmethod
193
195
  def _safe_pickle_dump(file_path: Path, data: Any) -> None:
194
- while True:
195
- try:
196
- with file_path.open('wb') as f:
197
- pkl.dump(data, f, protocol=pkl.HIGHEST_PROTOCOL)
198
- break
199
- except PermissionError as err:
200
- print('\n', err)
201
- input('\t\t\33[41m Please close the file and press "Enter" \33[0m\n')
196
+ try:
197
+ with file_path.open('wb') as f:
198
+ pkl.dump(data, f, protocol=pkl.HIGHEST_PROTOCOL)
199
+ except PermissionError as e:
200
+ raise IOError(f"Unable to write to {file_path}. The file may be in use or you may not have permission: {e}")
201
+ except Exception as e:
202
+ raise IOError(f"Error writing to {file_path}: {e}")
202
203
 
203
204
  # read pickle file
204
205
  def _read_pkl(self):
@@ -206,72 +207,86 @@ class AbstractReader(ABC):
206
207
  return pkl.load(raw_data), pkl.load(qc_data)
207
208
 
208
209
  def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
209
- patterns = {self.meta['pattern'].lower(), self.meta['pattern'].upper(), self.meta['pattern']}
210
- files = [f for pattern in patterns for f in self.path.glob(pattern)
210
+ files = [f
211
+ for file_pattern in self.meta['pattern']
212
+ for pattern in {file_pattern.lower(), file_pattern.upper(), file_pattern}
213
+ for f in self.path.glob(pattern)
211
214
  if f.name not in [self.csv_out.name, self.csv_nam.name, self.csv_nam_raw.name, f'{self.nam}.log']]
212
215
 
213
216
  if not files:
214
- print(f"\t\t\033[31mNo files in '{self.path}' could be read. Please check the current path.\033[0m")
215
- return None, None
217
+ raise FileNotFoundError(f"No files in '{self.path}' could be read. Please check the current path.")
216
218
 
217
219
  df_list = []
218
- for file in files:
219
- print(f"\r\t\treading {file.name}", end='')
220
- df = self._raw_reader(file)
221
- if df is not None:
222
- df_list.append(df)
220
+ with Progress(
221
+ TextColumn("[bold blue]{task.description}", style="bold blue"),
222
+ BarColumn(bar_width=18, complete_style="green", finished_style="bright_green"),
223
+ TaskProgressColumn(),
224
+ TimeRemainingColumn(),
225
+ TextColumn("{task.fields[filename]}", style="yellow"),
226
+ console=console,
227
+ expand=False
228
+ ) as progress:
229
+ task = progress.add_task(f"Reading {self.nam} files", total=len(files), filename="")
230
+ for file in files:
231
+ progress.update(task, advance=1, filename=file.name)
232
+ try:
233
+ df = self._raw_reader(file)
234
+
235
+ if df is not None and not df.empty:
236
+ df_list.append(df)
237
+ else:
238
+ self.logger.warning(f"File {file.name} produced an empty DataFrame or None.")
239
+
240
+ except pd.errors.ParserError as e:
241
+ self.logger.error(f"Error tokenizing data: {e}")
242
+
243
+ except Exception as e:
244
+ self.logger.error(f"Error reading {file.name}: {e}")
223
245
 
224
246
  if not df_list:
225
- return None, None
247
+ raise ValueError("All files were either empty or failed to read.")
226
248
 
227
249
  raw_data = self._raw_process(concat(df_list))
228
250
  qc_data = self._QC(raw_data)
229
251
 
230
252
  return raw_data, qc_data
231
253
 
232
- # main flow
233
254
  def _run(self, _start, _end):
234
- _f_raw_done, _f_qc_done = None, None
235
-
236
255
  # read pickle if pickle file exists and 'reset=False' or process raw data or append new data
237
- if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and (not self.reset or self.apnd):
238
- print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}")
256
+ if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset:
257
+ print(f"\n{dtm.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mPICKLE\033[0m "
258
+ f"from {_start} to {_end}\n")
239
259
 
240
260
  _f_raw_done, _f_qc_done = self._read_pkl()
241
261
 
242
- if not self.apnd:
243
- _f_raw_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw_done)
244
- _f_qc_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc_done)
245
-
246
- _f_qc_done = self._outlier_prcs(_f_qc_done)
247
-
248
- if self.rate:
249
- self._rate_calculate(_f_raw_done, _f_qc_done, _start_raw, _end_raw)
250
-
251
- return _f_qc_done if self.qc else _f_raw_done
262
+ if self.append:
263
+ print(f"Appending new data from {_start} to {_end}")
264
+ _f_raw_new, _f_qc_new = self._read_raw_files()
265
+ _f_raw = self._append_process(_f_raw_done, _f_raw_new)
266
+ _f_qc = self._append_process(_f_qc_done, _f_qc_new)
267
+ else:
268
+ _f_raw, _f_qc = _f_raw_done, _f_qc_done
252
269
 
253
- # read raw data
254
- print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it")
270
+ else:
271
+ print(f"\n{dtm.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mRAW DATA\033[0m "
272
+ f"from {_start} to {_end}\n")
273
+ _f_raw, _f_qc = self._read_raw_files()
255
274
 
256
- _f_raw, _f_qc = self._read_raw_files()
257
- if _f_raw is None:
258
- return None
259
-
260
- # append new data and pickle data
261
- if self.apnd and self.pkl_nam.exists():
262
- _f_raw = self._apnd_prcs(_f_raw_done, _f_raw)
263
- _f_qc = self._apnd_prcs(_f_qc_done, _f_qc)
275
+ # process time index
276
+ _f_raw, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw)
277
+ _f_qc, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc)
264
278
 
265
- _f_qc = self._outlier_prcs(_f_qc)
279
+ _f_qc = self._outlier_process(_f_qc)
266
280
 
267
281
  # save
268
282
  self._save_data(_f_raw, _f_qc)
269
283
 
270
- # process time index
271
- # if (_start is not None)|(_end is not None):
272
- _f_raw, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw)
273
- _f_qc, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc)
284
+ self.logger.info(f"{'=' * 60}")
285
+ self.logger.info(f"Raw data time : {_start_raw} to {_end_raw}")
286
+ self.logger.info(f"Output time : {_start} to {_end}")
287
+ self.logger.info(f"{'-' * 60}")
274
288
 
275
- self._rate_calculate(_f_raw, _f_qc, _start_raw, _end_raw)
289
+ if self.rate:
290
+ self._rate_calculate(_f_raw, _f_qc, _start_raw, _end_raw)
276
291
 
277
292
  return _f_qc if self.qc else _f_raw
@@ -6,11 +6,11 @@ from AeroViz.rawDataReader.core import AbstractReader
6
6
  class Reader(AbstractReader):
7
7
  nam = 'AE33'
8
8
 
9
- def _raw_reader(self, _file):
10
- if _file.stat().st_size / 1024 < 550:
9
+ def _raw_reader(self, file):
10
+ if file.stat().st_size / 1024 < 550:
11
11
  print('\t It may not be a whole daily data.')
12
12
 
13
- _df = read_table(_file, parse_dates={'time': [0, 1]}, index_col='time',
13
+ _df = read_table(file, parse_dates={'time': [0, 1]}, index_col='time',
14
14
  delimiter=r'\s+', skiprows=5, usecols=range(67))
15
15
  _df.columns = _df.columns.str.strip(';')
16
16
 
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
6
6
  class Reader(AbstractReader):
7
7
  nam = 'AE43'
8
8
 
9
- def _raw_reader(self, _file):
10
- _df = read_csv(_file, parse_dates={'time': ['StartTime']}, index_col='time')
9
+ def _raw_reader(self, file):
10
+ _df = read_csv(file, parse_dates={'time': ['StartTime']}, index_col='time')
11
11
  _df_id = _df['SetupID'].iloc[-1]
12
12
 
13
13
  # get last SetupID data