AeroViz 0.1.3b0__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (85) hide show
  1. AeroViz/__init__.py +5 -3
  2. AeroViz/{config → data}/DEFAULT_DATA.csv +1 -1
  3. AeroViz/dataProcess/Chemistry/__init__.py +28 -27
  4. AeroViz/dataProcess/Chemistry/_isoropia.py +11 -11
  5. AeroViz/dataProcess/Chemistry/_mass_volume.py +15 -18
  6. AeroViz/dataProcess/Chemistry/_ocec.py +21 -46
  7. AeroViz/dataProcess/Chemistry/_teom.py +2 -1
  8. AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
  9. AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
  10. AeroViz/dataProcess/Optical/Angstrom_exponent.py +20 -0
  11. AeroViz/dataProcess/Optical/_IMPROVE.py +13 -15
  12. AeroViz/dataProcess/Optical/__init__.py +15 -30
  13. AeroViz/dataProcess/Optical/_absorption.py +21 -47
  14. AeroViz/dataProcess/Optical/_extinction.py +20 -15
  15. AeroViz/dataProcess/Optical/_mie.py +0 -1
  16. AeroViz/dataProcess/Optical/_scattering.py +19 -20
  17. AeroViz/dataProcess/Optical/fRH.pkl +0 -0
  18. AeroViz/dataProcess/SizeDistr/__init__.py +7 -7
  19. AeroViz/dataProcess/SizeDistr/_merge.py +2 -2
  20. AeroViz/dataProcess/SizeDistr/_merge_v1.py +2 -2
  21. AeroViz/dataProcess/SizeDistr/_merge_v2.py +2 -2
  22. AeroViz/dataProcess/SizeDistr/_merge_v3.py +1 -1
  23. AeroViz/dataProcess/SizeDistr/_merge_v4.py +1 -1
  24. AeroViz/dataProcess/VOC/__init__.py +4 -9
  25. AeroViz/dataProcess/VOC/_potential_par.py +71 -37
  26. AeroViz/dataProcess/VOC/{voc_par.json → support_voc.json} +321 -339
  27. AeroViz/dataProcess/__init__.py +28 -6
  28. AeroViz/dataProcess/core/__init__.py +10 -17
  29. AeroViz/plot/__init__.py +1 -1
  30. AeroViz/plot/box.py +2 -1
  31. AeroViz/plot/optical/optical.py +4 -4
  32. AeroViz/plot/regression.py +25 -39
  33. AeroViz/plot/scatter.py +68 -2
  34. AeroViz/plot/templates/__init__.py +2 -1
  35. AeroViz/plot/templates/ammonium_rich.py +34 -0
  36. AeroViz/plot/templates/diurnal_pattern.py +11 -9
  37. AeroViz/plot/templates/koschmieder.py +51 -115
  38. AeroViz/plot/templates/metal_heatmap.py +115 -17
  39. AeroViz/plot/timeseries/__init__.py +1 -0
  40. AeroViz/plot/timeseries/template.py +47 -0
  41. AeroViz/plot/timeseries/timeseries.py +275 -208
  42. AeroViz/plot/utils/plt_utils.py +2 -2
  43. AeroViz/plot/utils/units.json +5 -0
  44. AeroViz/plot/violin.py +9 -8
  45. AeroViz/process/__init__.py +2 -2
  46. AeroViz/process/script/AbstractDistCalc.py +1 -1
  47. AeroViz/process/script/Chemical.py +5 -4
  48. AeroViz/process/script/Others.py +1 -1
  49. AeroViz/rawDataReader/__init__.py +66 -22
  50. AeroViz/rawDataReader/{utils/config.py → config/supported_instruments.py} +33 -54
  51. AeroViz/rawDataReader/core/__init__.py +116 -231
  52. AeroViz/rawDataReader/script/AE33.py +12 -13
  53. AeroViz/rawDataReader/script/AE43.py +10 -13
  54. AeroViz/rawDataReader/script/APS_3321.py +8 -8
  55. AeroViz/rawDataReader/script/Aurora.py +21 -19
  56. AeroViz/rawDataReader/script/BC1054.py +13 -17
  57. AeroViz/rawDataReader/script/EPA_vertical.py +36 -8
  58. AeroViz/rawDataReader/script/GRIMM.py +6 -13
  59. AeroViz/rawDataReader/script/{IGAC_ZM.py → IGAC.py} +18 -18
  60. AeroViz/rawDataReader/script/MA350.py +9 -16
  61. AeroViz/rawDataReader/script/Minion.py +103 -0
  62. AeroViz/rawDataReader/script/NEPH.py +28 -38
  63. AeroViz/rawDataReader/script/SMPS_TH.py +6 -6
  64. AeroViz/rawDataReader/script/SMPS_aim11.py +8 -8
  65. AeroViz/rawDataReader/script/SMPS_genr.py +8 -8
  66. AeroViz/rawDataReader/script/Sunset_OCEC.py +66 -0
  67. AeroViz/rawDataReader/script/TEOM.py +10 -8
  68. AeroViz/rawDataReader/script/Table.py +9 -10
  69. AeroViz/rawDataReader/script/VOC.py +33 -0
  70. AeroViz/rawDataReader/script/__init__.py +10 -12
  71. AeroViz/tools/database.py +7 -9
  72. AeroViz/tools/datareader.py +3 -3
  73. {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.5.dist-info}/METADATA +1 -1
  74. AeroViz-0.1.5.dist-info/RECORD +114 -0
  75. AeroViz/rawDataReader/script/IGAC_TH.py +0 -104
  76. AeroViz/rawDataReader/script/OCEC_LCRES.py +0 -34
  77. AeroViz/rawDataReader/script/OCEC_RES.py +0 -28
  78. AeroViz/rawDataReader/script/VOC_TH.py +0 -30
  79. AeroViz/rawDataReader/script/VOC_ZM.py +0 -37
  80. AeroViz-0.1.3b0.dist-info/RECORD +0 -110
  81. /AeroViz/{config → data}/DEFAULT_PNSD_DATA.csv +0 -0
  82. /AeroViz/rawDataReader/{utils → config}/__init__.py +0 -0
  83. {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.5.dist-info}/LICENSE +0 -0
  84. {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.5.dist-info}/WHEEL +0 -0
  85. {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.5.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,16 @@
1
1
  import json as jsn
2
+ import logging
2
3
  import pickle as pkl
3
4
  from abc import ABC, abstractmethod
4
- from datetime import datetime as dtm, timedelta as dtmdt
5
- from itertools import chain
5
+ from datetime import datetime as dtm
6
6
  from pathlib import Path
7
+ from typing import Any
7
8
 
8
9
  import numpy as np
10
+ import pandas as pd
9
11
  from pandas import DataFrame, date_range, concat, to_numeric, to_datetime
10
12
 
11
- from ..utils.config import meta
13
+ from ..config.supported_instruments import meta
12
14
 
13
15
  __all__ = ['AbstractReader']
14
16
 
@@ -16,117 +18,108 @@ __all__ = ['AbstractReader']
16
18
  class AbstractReader(ABC):
17
19
  nam = 'AbstractReader'
18
20
 
19
- # initial config
21
+ # initial data
20
22
  # input : file path, reset switch
21
23
 
22
24
  # list the file in the path and read pickle file if it exists, else read raw data and dump the pickle file the
23
25
  # pickle file will be generated after read raw data first time, if you want to re-read the rawdata, please set
24
26
  # 'reset=True'
25
27
 
26
- def __init__(self, _path, QC=True, csv_raw=True, reset=False, rate=False, append_data=False, update_meta=None):
27
- # logging.info(f'\n{self.nam}')
28
- # print('='*65)
29
- # logger.info(f"Reading file and process data")
28
+ def __init__(self,
29
+ path: Path | str,
30
+ qc: bool = True,
31
+ csv_raw: bool = True,
32
+ reset: bool = False,
33
+ rate: bool = False,
34
+ append_data: bool = False):
30
35
 
31
- # class parameter
32
- # self.index = lambda _freq: date_range(_sta, _fin, freq=_freq)
33
- self.path = Path(_path)
36
+ self.path = Path(path)
34
37
  self.meta = meta[self.nam]
35
-
36
- if update_meta is not None:
37
- self.meta.update(update_meta)
38
+ self.logger = self._setup_logger()
38
39
 
39
40
  self.reset = reset
40
41
  self.rate = rate
41
- self.qc = QC
42
+ self.qc = qc
42
43
  self.csv = csv_raw
43
- self.apnd = append_data & reset
44
-
45
- self.pkl_nam = f'_read_{self.nam.lower()}.pkl'
46
- self.csv_nam = f'_read_{self.nam.lower()}.csv'
44
+ self.append = append_data & reset
47
45
 
48
- self.pkl_nam_raw = f'_read_{self.nam.lower()}_raw.pkl'
49
- self.csv_nam_raw = f'_read_{self.nam.lower()}_raw.csv'
46
+ self.pkl_nam = self.path / f'_read_{self.nam.lower()}.pkl'
47
+ self.csv_nam = self.path / f'_read_{self.nam.lower()}.csv'
48
+ self.pkl_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.pkl'
49
+ self.csv_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.csv'
50
+ self.csv_out = self.path / f'output_{self.nam.lower()}.csv'
50
51
 
51
- self.csv_out = f'output_{self.nam.lower()}.csv'
52
+ # dependency injection function, customize each instrument
53
+ @abstractmethod
54
+ def _raw_reader(self, file):
55
+ pass
52
56
 
53
- # print(f" from {_sta.strftime('%Y-%m-%d %X')} to {_fin.strftime('%Y-%m-%d %X')}")
54
- # print('='*65)
55
- # print(f"{dtm.now().strftime('%m/%d %X')}")
57
+ @abstractmethod
58
+ def _QC(self, df: DataFrame):
59
+ return df
56
60
 
57
- # get data
58
61
  def __call__(self,
59
62
  start: dtm | None = None,
60
63
  end: dtm | None = None,
61
- mean_freq='1h',
62
- csv_out=True,
63
- **kwarg):
64
-
65
- self._oth_set = kwarg
64
+ mean_freq: str = '1h',
65
+ csv_out: bool = True,
66
+ ) -> DataFrame | None:
66
67
 
67
68
  if start and end and end <= start:
68
- raise ValueError(
69
- f'\nPlease check out input time : '
70
- f'\n\tstart : {start.strftime("%Y-%m-%d %X")}'
71
- f'\n\tend : {end.strftime("%Y-%m-%d %X")}')
72
-
73
- fout = self._run(start, end)
69
+ raise ValueError(f"Invalid time range: start {start} is after end {end}")
74
70
 
75
- if fout is not None:
76
- if mean_freq is not None:
77
- fout = fout.resample(mean_freq).mean()
71
+ data = self._run(start, end)
78
72
 
73
+ if data is not None:
74
+ if mean_freq:
75
+ data = data.resample(mean_freq).mean()
79
76
  if csv_out:
80
- fout.to_csv(self.path / self.csv_out)
77
+ data.to_csv(self.csv_out)
81
78
 
82
- return fout
79
+ return data
83
80
 
84
- # dependency injection function
85
- @abstractmethod
86
- def _raw_reader(self, _file):
87
- # customize each instrument
88
- pass
81
+ @staticmethod
82
+ def basic_QC(df: DataFrame):
83
+ df_ave, df_std = df.mean(), df.std()
84
+ df_lowb, df_highb = df < (df_ave - df_std * 1.5), df > (df_ave + df_std * 1.5)
89
85
 
90
- @abstractmethod
91
- def _QC(self, df: DataFrame):
92
- # customize each instrument
93
- return df
86
+ return df.mask(df_lowb | df_highb).copy()
94
87
 
95
88
  # set each to true datetime(18:30:01 -> 18:30:00) and rindex data
96
89
  def _raw_process(self, _df):
97
90
  # get time from df and set time to whole time to create time index
98
91
  _st, _ed = _df.index.sort_values()[[0, -1]]
99
- _tm_index = date_range(_st.strftime('%Y%m%d %H00'),
100
- (_ed + dtmdt(hours=1)).strftime('%Y%m%d %H00'),
92
+ _tm_index = date_range(_st.strftime('%Y%m%d %H00'), _ed.floor('h').strftime('%Y%m%d %H00'),
101
93
  freq=self.meta['freq'])
102
94
  _tm_index.name = 'time'
103
95
 
104
96
  return _df.apply(to_numeric, errors='coerce').resample(self.meta['freq']).mean().reindex(_tm_index)
105
97
 
98
+ def _setup_logger(self) -> logging.Logger:
99
+ logger = logging.getLogger(self.nam)
100
+ logger.setLevel(logging.INFO)
101
+ handler = logging.FileHandler(self.path / f'{self.nam}.log')
102
+ handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s'))
103
+ logger.addHandler(handler)
104
+ return logger
105
+
106
106
  # acquisition rate and yield rate
107
107
  def _rate_calculate(self, _fout_raw, _fout_qc, _st_raw, _ed_raw):
108
-
109
108
  if self.meta['deter_key'] is not None:
110
109
  _start, _end = _fout_qc.index[[0, -1]]
111
110
 
112
111
  _drop_how = 'any'
113
112
  _the_size = len(_fout_raw.resample('1h').mean().index)
114
113
 
115
- _f_pth = (self.path / f'{self.nam}.log')
116
- _f = _f_pth.open('r+' if _f_pth.exists() else 'w+')
117
-
118
- _cont = _f.read()
119
- _f.seek(0)
120
-
121
- _f.write(f"\n{dtm.now().strftime('%Y/%m/%d %X')}\n")
122
- _f.write(f"{'-' * 60}\n")
123
- _f.write(f"rawdata time : \n\t{_st_raw.strftime('%Y-%m-%d %X')} ~ {_ed_raw.strftime('%Y-%m-%d %X')}\n")
124
- _f.write(f"output time : \n\t{_start.strftime('%Y-%m-%d %X')} ~ {_end.strftime('%Y-%m-%d %X')}\n")
125
- _f.write(f"{'-' * 60}\n")
126
- print(f"\n\t\tfrom {_start.strftime('%Y-%m-%d %X')} to {_end.strftime('%Y-%m-%d %X')}\n")
114
+ self.logger.info(f"{'=' * 60}")
115
+ self.logger.info(
116
+ f"Raw data time : {_st_raw.strftime('%Y-%m-%d %H:%M:%S')} to {_ed_raw.strftime('%Y-%m-%d %H:%M:%S')}")
117
+ self.logger.info(
118
+ f"Output time : {_start.strftime('%Y-%m-%d %H:%M:%S')} to {_end.strftime('%Y-%m-%d %H:%M:%S')}")
119
+ self.logger.info(f"{'-' * 60}")
120
+ print(f"\n\n\t\tfrom {_start.strftime('%Y-%m-%d %H:%M:%S')} to {_end.strftime('%Y-%m-%d %H:%M:%S')}\n")
127
121
 
128
122
  for _nam, _key in self.meta['deter_key'].items():
129
-
130
123
  if _key == ['all']:
131
124
  _key, _drop_how = _fout_qc.keys(), 'all'
132
125
 
@@ -139,18 +132,14 @@ class AbstractReader(ABC):
139
132
  except ZeroDivisionError:
140
133
  _acq_rate, _yid_rate = 0, 0
141
134
 
142
- _f.write(f'{_nam} : \n')
143
- _f.write(f"\tacquisition rate : {_acq_rate}%\n")
144
- _f.write(f'\tyield rate : {_yid_rate}%\n')
135
+ self.logger.info(f'{_nam}:')
136
+ self.logger.info(f"\tAcquisition rate: {_acq_rate}%")
137
+ self.logger.info(f'\tYield rate: {_yid_rate}%')
138
+ self.logger.info(f"{'=' * 60}")
145
139
 
146
140
  print(f'\t\t{_nam} : ')
147
141
  print(f'\t\t\tacquisition rate : \033[91m{_acq_rate}%\033[0m')
148
- print(f'\t\t\tyield rate : \033[91m{_yid_rate}%\033[0m')
149
-
150
- _f.write(f"{'=' * 40}\n")
151
- _f.write(_cont)
152
-
153
- _f.close()
142
+ print(f'\t\t\tyield rate : \033[91m{_yid_rate}%\033[0m')
154
143
 
155
144
  # process time index
156
145
  @staticmethod
@@ -191,86 +180,78 @@ class AbstractReader(ABC):
191
180
  return _df
192
181
 
193
182
  # save pickle file
194
- def _save_dt(self, _save_raw, _save_qc):
195
- # dump pickle file
196
- _check = True
197
- while _check:
198
- try:
199
- with (self.path / self.pkl_nam).open('wb') as f:
200
- pkl.dump(_save_qc, f, protocol=pkl.HIGHEST_PROTOCOL)
201
-
202
- # dump csv file
203
- if self.csv:
204
- _save_qc.to_csv(self.path / self.csv_nam)
205
-
206
- # output raw data if qc file
207
- if self.meta['deter_key'] is not None:
208
- with (self.path / self.pkl_nam_raw).open('wb') as f:
209
- pkl.dump(_save_raw, f, protocol=pkl.HIGHEST_PROTOCOL)
210
-
211
- if self.csv:
212
- _save_raw.to_csv(self.path / self.csv_nam_raw)
183
+ def _save_data(self, raw_data: DataFrame, qc_data: DataFrame) -> None:
184
+ self._safe_pickle_dump(self.pkl_nam, qc_data)
185
+ if self.csv:
186
+ qc_data.to_csv(self.csv_nam)
213
187
 
214
- _check = False
188
+ if self.meta['deter_key'] is not None:
189
+ self._safe_pickle_dump(self.pkl_nam_raw, raw_data)
190
+ if self.csv:
191
+ raw_data.to_csv(self.csv_nam_raw)
215
192
 
216
- except PermissionError as _err:
217
- print('\n', _err)
218
- input('\t\t\33[41m Please Close The File And Press "Enter" \33[0m\n')
193
+ @staticmethod
194
+ def _safe_pickle_dump(file_path: Path, data: Any) -> None:
195
+ while True:
196
+ try:
197
+ with file_path.open('wb') as f:
198
+ pkl.dump(data, f, protocol=pkl.HIGHEST_PROTOCOL)
199
+ break
200
+ except PermissionError as err:
201
+ print('\n', err)
202
+ input('\t\t\33[41m Please close the file and press "Enter" \33[0m\n')
219
203
 
220
204
  # read pickle file
221
- def _read_pkl(self, ):
222
- with (self.path / self.pkl_nam).open('rb') as f:
223
- _fout_qc = pkl.load(f)
205
+ def _read_pkl(self):
206
+ with self.pkl_nam.open('rb') as qc_data, self.pkl_nam_raw.open('rb') as raw_data:
207
+ return pkl.load(raw_data), pkl.load(qc_data)
224
208
 
225
- if (self.path / self.pkl_nam_raw).exists():
226
- with (self.path / self.pkl_nam_raw).open('rb') as f:
227
- _fout_raw = pkl.load(f)
228
- else:
229
- _fout_raw = _fout_qc
209
+ def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
210
+ patterns = {self.meta['pattern'].lower(), self.meta['pattern'].upper(), self.meta['pattern']}
211
+ files = [f for pattern in patterns for f in self.path.glob(pattern)
212
+ if f.name not in [self.csv_out.name, self.csv_nam.name, self.csv_nam_raw.name, f'{self.nam}.log']]
230
213
 
231
- return _fout_raw, _fout_qc
214
+ if not files:
215
+ raise FileNotFoundError(f"\t\t\033[31mNo files in '{self.path}' could be read."
216
+ f"Please check the current path.\033[0m")
232
217
 
233
- # read raw data
234
- def _read_raw(self, ):
235
- pattern = self.meta['pattern']
236
- patterns = {pattern, pattern.lower(), pattern.upper()}
237
- _df_con, _f_list = None, list(chain.from_iterable(self.path.glob(p) for p in patterns))
218
+ df_list = []
219
+ for file in files:
220
+ print(f"\r\t\treading {file.name}", end='')
238
221
 
239
- for file in _f_list:
240
- if file.name in [self.csv_out, self.csv_nam, self.csv_nam_raw, f'{self.nam}.log']:
241
- continue
222
+ try:
223
+ df = self._raw_reader(file)
242
224
 
243
- print(f"\r\t\treading {file.name}", end='')
225
+ if df is not None and not df.empty:
226
+ df_list.append(df)
227
+ else:
228
+ self.logger.warning(f"File {file.name} produced an empty DataFrame or None.")
244
229
 
245
- _df = self._raw_reader(file)
230
+ except pd.errors.ParserError as e:
231
+ self.logger.error(f"Error tokenizing data: {e}")
246
232
 
247
- # concat the concated list
248
- if _df is not None:
249
- _df_con = concat([_df_con, _df]) if _df_con is not None else _df
233
+ except Exception as e:
234
+ self.logger.error(f"Error reading {file.name}: {e}")
250
235
 
251
- if _df_con is None:
252
- print(f"\t\t\033[31mNo File in '{self.path}' Could Read, Please Check Out the Current Path\033[0m")
253
- return None, None
236
+ if not df_list:
237
+ raise ValueError("All files were either empty or failed to read.")
254
238
 
255
- # QC
256
- _fout_raw = self._raw_process(_df_con)
257
- _fout_qc = self._QC(_fout_raw)
239
+ raw_data = self._raw_process(concat(df_list))
240
+ qc_data = self._QC(raw_data)
258
241
 
259
- return _fout_raw, _fout_qc
242
+ return raw_data, qc_data
260
243
 
261
244
  # main flow
262
245
  def _run(self, _start, _end):
263
-
264
246
  _f_raw_done, _f_qc_done = None, None
265
247
 
266
248
  # read pickle if pickle file exists and 'reset=False' or process raw data or append new data
267
- _pkl_exist = self.path / self.pkl_nam in list(self.path.glob('*.pkl'))
268
- if _pkl_exist & ((~self.reset) | self.apnd):
249
+ if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and (not self.reset or self.append):
269
250
  print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}")
270
251
 
271
252
  _f_raw_done, _f_qc_done = self._read_pkl()
272
253
 
273
- if not self.apnd:
254
+ if not self.append:
274
255
  _f_raw_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw_done)
275
256
  _f_qc_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc_done)
276
257
 
@@ -284,19 +265,17 @@ class AbstractReader(ABC):
284
265
  # read raw data
285
266
  print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it")
286
267
 
287
- _f_raw, _f_qc = self._read_raw()
288
- if _f_raw is None:
289
- return None
268
+ _f_raw, _f_qc = self._read_raw_files()
290
269
 
291
270
  # append new data and pickle data
292
- if self.apnd & _pkl_exist:
271
+ if self.append and self.pkl_nam.exists():
293
272
  _f_raw = self._apnd_prcs(_f_raw_done, _f_raw)
294
273
  _f_qc = self._apnd_prcs(_f_qc_done, _f_qc)
295
274
 
296
275
  _f_qc = self._outlier_prcs(_f_qc)
297
276
 
298
277
  # save
299
- self._save_dt(_f_raw, _f_qc)
278
+ self._save_data(_f_raw, _f_qc)
300
279
 
301
280
  # process time index
302
281
  # if (_start is not None)|(_end is not None):
@@ -306,97 +285,3 @@ class AbstractReader(ABC):
306
285
  self._rate_calculate(_f_raw, _f_qc, _start_raw, _end_raw)
307
286
 
308
287
  return _f_qc if self.qc else _f_raw
309
-
310
- # -------------------------------------------------------------------------------------
311
- # old flow
312
- # def __run(self, _start, _end):
313
- #
314
- # ## read pickle if pickle file exists and 'reset=False' or process raw data
315
- # if (self.path / self.pkl_nam in list(self.path.glob('*.pkl'))) & (~self.reset):
316
- # print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}")
317
- #
318
- # with (self.path / self.pkl_nam).open('rb') as f:
319
- # _fout_qc = pkl.load(f)
320
- #
321
- # _exist = (self.path / self.pkl_nam_raw).exists()
322
- # if _exist:
323
- # with (self.path / self.pkl_nam_raw).open('rb') as f:
324
- # _fout_raw = pkl.load(f)
325
- # else:
326
- # _fout_raw = _fout_qc
327
- #
328
- # _start, _end = to_datetime(_start) or _fout_qc.index[0], to_datetime(_end) or _fout_qc.index[-1]
329
- # _idx = date_range(_start, _end, freq=_fout_qc.index.freq.copy())
330
- # _idx.name = 'time'
331
- #
332
- # _fout_raw, _fout_qc = _fout_raw.reindex(_idx), _fout_qc.reindex(_idx)
333
- # if (self.rate) & (_exist):
334
- # self._rate_calculate(_fout_raw, _fout_qc)
335
- #
336
- # return _fout_qc if self.qc else _fout_raw
337
- # else:
338
- # print(
339
- # f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it")
340
- #
341
- # ##=================================================================================================================
342
- # ## read raw data
343
- # _df_con, _f_list = None, list(self.path.glob(self.meta['pattern']))
344
- #
345
- # if len(_f_list) == 0:
346
- # print(f"\t\t\033[31mNo File in '{self.path}' Could Read, Please Check Out the Current Path\033[0m")
347
- # return None
348
- #
349
- # for file in _f_list:
350
- # if file.name in [self.csv_out, self.csv_nam, self.csv_nam_raw, f'{self.nam}.log']: continue
351
- #
352
- # print(f"\r\t\treading {file.name}", end='')
353
- #
354
- # _df = self._raw_reader(file)
355
- #
356
- # ## concat the concated list
357
- # if _df is not None:
358
- # _df_con = concat([_df_con, _df]) if _df_con is not None else _df
359
- # print()
360
- #
361
- # ## QC
362
- # _save_raw = self._raw_process(_df_con)
363
- # _save_qc = self._QC(_save_raw)
364
- #
365
- # _start, _end = to_datetime(_start) or _save_raw.index[0], to_datetime(_end) or _save_raw.index[-1]
366
- # _idx = date_range(_start, _end, freq=_save_raw.index.freq.copy())
367
- # _idx.name = 'time'
368
- #
369
- # _fout_raw, _fout_qc = _save_raw.reindex(_idx).copy(), _save_qc.reindex(_idx).copy()
370
- #
371
- # self._rate_calculate(_fout_raw, _fout_qc)
372
- #
373
- # ##=================================================================================================================
374
- # ## dump pickle file
375
- # _check = True
376
- # while _check:
377
- #
378
- # try:
379
- # with (self.path / self.pkl_nam).open('wb') as f:
380
- # pkl.dump(_save_qc, f, protocol=pkl.HIGHEST_PROTOCOL)
381
- #
382
- # ## dump csv file
383
- # if self.csv:
384
- # _save_qc.to_csv(self.path / self.csv_nam)
385
- #
386
- # ## output raw data if qc file
387
- # if self.meta['deter_key'] is not None:
388
- # with (self.path / self.pkl_nam_raw).open('wb') as f:
389
- # pkl.dump(_save_raw, f, protocol=pkl.HIGHEST_PROTOCOL)
390
- #
391
- # if self.csv:
392
- # _save_raw.to_csv(self.path / self.csv_nam_raw)
393
- #
394
- # return _fout_qc if self.qc else _fout_raw
395
- #
396
- # _check = False
397
- #
398
- # except PermissionError as _err:
399
- # print('\n', _err)
400
- # input('\t\t\33[41m Please Close The File And Press "Enter" \33[0m\n')
401
- #
402
- # return _fout_qc
@@ -6,26 +6,25 @@ from AeroViz.rawDataReader.core import AbstractReader
6
6
  class Reader(AbstractReader):
7
7
  nam = 'AE33'
8
8
 
9
- def _raw_reader(self, _file):
10
- _df = read_table(_file, parse_dates={'time': [0, 1]}, index_col='time',
9
+ def _raw_reader(self, file):
10
+ if file.stat().st_size / 1024 < 550:
11
+ print('\t It may not be a whole daily data.')
12
+
13
+ _df = read_table(file, parse_dates={'time': [0, 1]}, index_col='time',
11
14
  delimiter=r'\s+', skiprows=5, usecols=range(67))
12
15
  _df.columns = _df.columns.str.strip(';')
13
16
 
14
17
  # remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape)
15
- if not self._oth_set.get('ignore_err', False):
16
- _df = _df.where((_df['Status'] != 0) | (_df['Status'] != 128) | (_df['Status'] != 256)).copy()
18
+ if self.meta.get('error_state', False):
19
+ _df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
20
+
21
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
17
22
 
18
- return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'Status']]
23
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
19
24
 
20
25
  def _QC(self, _df):
21
26
  # remove negative value
22
27
  _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].mask((_df < 0).copy())
23
28
 
24
- # QC data in 5 min
25
- def _QC_func(df):
26
- _df_ave, _df_std = df.mean(), df.std()
27
- _df_lowb, _df_highb = df < (_df_ave - _df_std * 1.5), df > (_df_ave + _df_std * 1.5)
28
-
29
- return df.mask(_df_lowb | _df_highb).copy()
30
-
31
- return _df.resample('5min').apply(_QC_func).resample('1h').mean()
29
+ # QC data in 1h
30
+ return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -6,29 +6,26 @@ from AeroViz.rawDataReader.core import AbstractReader
6
6
  class Reader(AbstractReader):
7
7
  nam = 'AE43'
8
8
 
9
- def _raw_reader(self, _file):
10
- _df = read_csv(_file, parse_dates={'time': ['StartTime']}, index_col='time')
9
+ def _raw_reader(self, file):
10
+ _df = read_csv(file, parse_dates={'time': ['StartTime']}, index_col='time')
11
11
  _df_id = _df['SetupID'].iloc[-1]
12
12
 
13
13
  # get last SetupID data
14
14
  _df = _df.groupby('SetupID').get_group(_df_id)[
15
15
  ['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'Status']].copy()
16
16
 
17
- # remove data without Status=0
18
- _df = _df.where(_df['Status'] == 0).copy()
17
+ # remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape)
18
+ if self.meta.get('error_state', False):
19
+ _df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
19
20
 
20
- return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
21
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
22
+
23
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
21
24
 
22
25
  # QC data
23
26
  def _QC(self, _df):
24
27
  # remove negative value
25
28
  _df = _df.mask((_df < 0).copy())
26
29
 
27
- # QC data in 5 min
28
- def _QC_func(df):
29
- _df_ave, _df_std = df.mean(), df.std()
30
- _df_lowb, _df_highb = df < (_df_ave - _df_std * 1.5), df > (_df_ave + _df_std * 1.5)
31
-
32
- return df.mask(_df_lowb | _df_highb).copy()
33
-
34
- return _df.resample('5min').apply(_QC_func).resample('1h').mean()
30
+ # QC data in 1h
31
+ return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -7,33 +7,33 @@ from AeroViz.rawDataReader.core import AbstractReader
7
7
  class Reader(AbstractReader):
8
8
  nam = 'APS_3321'
9
9
 
10
- def _raw_reader(self, _file):
11
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
10
+ def _raw_reader(self, file):
11
+ with open(file, 'r', encoding='utf-8', errors='ignore') as f:
12
12
  _df = read_table(f, skiprows=6, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
13
13
  _key = list(_df.keys()[3:54]) ## 542 ~ 1981
14
14
 
15
- ## create new keys
15
+ # create new keys
16
16
  _newkey = {}
17
17
  for _k in _key:
18
18
  _newkey[_k] = float(_k).__round__(4)
19
19
  # _newkey['Mode(m)'] = 'mode'
20
20
 
21
- ## get new dataframe
21
+ # get new dataframe
22
22
  _df = _df[_newkey.keys()].rename(_newkey, axis=1)
23
- # _df['total'] = _df[list(_newkey.values())[:-1]].sum(axis=1)*(n.diff(n.log(_df.keys()[:-1].to_numpy(float))).mean()).copy()
23
+ # df['total'] = _df[list(_newkey.values())[:-1]].sum(axis=1)*(n.diff(n.log(_df.keys()[:-1].to_numpy(float))).mean()).copy()
24
24
 
25
25
  _df_idx = to_datetime(_df.index, errors='coerce')
26
26
 
27
27
  return _df.set_index(_df_idx).loc[_df_idx.dropna()]
28
28
 
29
- ## QC data
29
+ # QC data
30
30
  def _QC(self, _df):
31
- ## mask out the data size lower than 7
31
+ # mask out the data size lower than 7
32
32
  _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
33
33
  _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
34
34
  _df = _df.mask(_df_size < 7)
35
35
 
36
- ## remove total conc. lower than 700
36
+ # remove total conc. lower than 700
37
37
  _df = _df.mask(_df['total'] > 700)
38
38
 
39
39
  # not confirmed
@@ -6,33 +6,35 @@ from AeroViz.rawDataReader.core import AbstractReader
6
6
  class Reader(AbstractReader):
7
7
  nam = 'Aurora'
8
8
 
9
- def _raw_reader(self, _file):
10
- with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f:
9
+ def _raw_reader(self, file):
10
+ with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
11
11
  _df = read_csv(f, low_memory=False, index_col=0)
12
12
 
13
- _df.index = to_datetime(_df.index, errors='coerce', format=self._oth_set.get('date_format') or 'mixed')
13
+ _df.index = to_datetime(_df.index, errors='coerce')
14
14
  _df.index.name = 'time'
15
15
 
16
16
  _df.columns = _df.keys().str.strip(' ')
17
17
 
18
- _df = _df.loc[
19
- _df.index.dropna(), ['0°σspB', '0°σspG', '0°σspR', '90°σspB', '90°σspG', '90°σspR', 'RH']].copy()
20
- _df.columns = ['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']
18
+ # consider another csv format
19
+ _df = _df.rename(columns={
20
+ '0°σspB': 'B', '0°σspG': 'G', '0°σspR': 'R',
21
+ '90°σspB': 'BB', '90°σspG': 'BG', '90°σspR': 'BR',
22
+ 'Blue': 'B', 'Green': 'G', 'Red': 'R',
23
+ 'B_Blue': 'BB', 'B_Green': 'BG', 'B_Red': 'BR',
24
+ 'RH': 'RH'
25
+ })
21
26
 
22
- return _df
27
+ _df = _df[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
23
28
 
24
- ## QC data
25
- def _QC(self, _df):
26
- ## remove negative value
27
- _df = _df.mask((_df <= 0).copy())
29
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
28
30
 
29
- ## call by _QC function
30
- ## QC data in 1 hr
31
- def _QC_func(_df_1hr):
32
- _df_ave = _df_1hr.mean()
33
- _df_std = _df_1hr.std()
34
- _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
31
+ # QC data
32
+ def _QC(self, _df):
33
+ # remove negative value
34
+ _df = _df.mask((_df <= 0) | (_df > 2000)).copy()
35
35
 
36
- return _df_1hr.mask(_df_lowb | _df_highb).copy()
36
+ # total scattering is larger than back scattering
37
+ _df = _df[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])]
37
38
 
38
- return _df.resample('1h', group_keys=False).apply(_QC_func)
39
+ # QC data in 1h
40
+ return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()