AeroViz 0.1.3b0__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (81) hide show
  1. AeroViz/__init__.py +5 -3
  2. AeroViz/{config → data}/DEFAULT_DATA.csv +1 -1
  3. AeroViz/dataProcess/Chemistry/__init__.py +7 -7
  4. AeroViz/dataProcess/Chemistry/_isoropia.py +5 -2
  5. AeroViz/dataProcess/Chemistry/_mass_volume.py +15 -18
  6. AeroViz/dataProcess/Chemistry/_ocec.py +2 -2
  7. AeroViz/dataProcess/Chemistry/_teom.py +2 -1
  8. AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
  9. AeroViz/dataProcess/Optical/Angstrom_exponent.py +20 -0
  10. AeroViz/dataProcess/Optical/_IMPROVE.py +13 -15
  11. AeroViz/dataProcess/Optical/__init__.py +15 -30
  12. AeroViz/dataProcess/Optical/_absorption.py +21 -47
  13. AeroViz/dataProcess/Optical/_extinction.py +20 -15
  14. AeroViz/dataProcess/Optical/_mie.py +0 -1
  15. AeroViz/dataProcess/Optical/_scattering.py +19 -20
  16. AeroViz/dataProcess/SizeDistr/__init__.py +7 -7
  17. AeroViz/dataProcess/SizeDistr/_merge.py +2 -2
  18. AeroViz/dataProcess/SizeDistr/_merge_v1.py +2 -2
  19. AeroViz/dataProcess/SizeDistr/_merge_v2.py +2 -2
  20. AeroViz/dataProcess/SizeDistr/_merge_v3.py +1 -1
  21. AeroViz/dataProcess/SizeDistr/_merge_v4.py +1 -1
  22. AeroViz/dataProcess/VOC/__init__.py +3 -3
  23. AeroViz/dataProcess/__init__.py +28 -6
  24. AeroViz/dataProcess/core/__init__.py +10 -17
  25. AeroViz/plot/__init__.py +1 -1
  26. AeroViz/plot/box.py +2 -1
  27. AeroViz/plot/optical/optical.py +4 -4
  28. AeroViz/plot/regression.py +25 -39
  29. AeroViz/plot/scatter.py +68 -2
  30. AeroViz/plot/templates/__init__.py +2 -1
  31. AeroViz/plot/templates/ammonium_rich.py +34 -0
  32. AeroViz/plot/templates/diurnal_pattern.py +11 -9
  33. AeroViz/plot/templates/koschmieder.py +51 -115
  34. AeroViz/plot/templates/metal_heatmap.py +115 -17
  35. AeroViz/plot/timeseries/__init__.py +1 -0
  36. AeroViz/plot/timeseries/template.py +47 -0
  37. AeroViz/plot/timeseries/timeseries.py +275 -208
  38. AeroViz/plot/utils/plt_utils.py +2 -2
  39. AeroViz/plot/utils/units.json +5 -0
  40. AeroViz/plot/violin.py +9 -8
  41. AeroViz/process/__init__.py +2 -2
  42. AeroViz/process/script/AbstractDistCalc.py +1 -1
  43. AeroViz/process/script/Chemical.py +5 -4
  44. AeroViz/process/script/Others.py +1 -1
  45. AeroViz/rawDataReader/__init__.py +17 -22
  46. AeroViz/rawDataReader/{utils/config.py → config/supported_instruments.py} +38 -52
  47. AeroViz/rawDataReader/core/__init__.py +104 -229
  48. AeroViz/rawDataReader/script/AE33.py +10 -11
  49. AeroViz/rawDataReader/script/AE43.py +8 -11
  50. AeroViz/rawDataReader/script/APS_3321.py +6 -6
  51. AeroViz/rawDataReader/script/Aurora.py +18 -19
  52. AeroViz/rawDataReader/script/BC1054.py +11 -15
  53. AeroViz/rawDataReader/script/EPA_vertical.py +35 -7
  54. AeroViz/rawDataReader/script/GRIMM.py +2 -9
  55. AeroViz/rawDataReader/script/{IGAC_ZM.py → IGAC.py} +17 -17
  56. AeroViz/rawDataReader/script/MA350.py +7 -14
  57. AeroViz/rawDataReader/script/Minion.py +103 -0
  58. AeroViz/rawDataReader/script/NEPH.py +24 -29
  59. AeroViz/rawDataReader/script/SMPS_TH.py +4 -4
  60. AeroViz/rawDataReader/script/SMPS_aim11.py +6 -6
  61. AeroViz/rawDataReader/script/SMPS_genr.py +6 -6
  62. AeroViz/rawDataReader/script/Sunset_OCEC.py +60 -0
  63. AeroViz/rawDataReader/script/TEOM.py +8 -6
  64. AeroViz/rawDataReader/script/Table.py +7 -8
  65. AeroViz/rawDataReader/script/VOC.py +26 -0
  66. AeroViz/rawDataReader/script/__init__.py +10 -12
  67. AeroViz/tools/database.py +7 -9
  68. AeroViz/tools/datareader.py +3 -3
  69. {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.4.dist-info}/METADATA +1 -1
  70. AeroViz-0.1.4.dist-info/RECORD +112 -0
  71. AeroViz/rawDataReader/script/IGAC_TH.py +0 -104
  72. AeroViz/rawDataReader/script/OCEC_LCRES.py +0 -34
  73. AeroViz/rawDataReader/script/OCEC_RES.py +0 -28
  74. AeroViz/rawDataReader/script/VOC_TH.py +0 -30
  75. AeroViz/rawDataReader/script/VOC_ZM.py +0 -37
  76. AeroViz-0.1.3b0.dist-info/RECORD +0 -110
  77. /AeroViz/{config → data}/DEFAULT_PNSD_DATA.csv +0 -0
  78. /AeroViz/rawDataReader/{utils → config}/__init__.py +0 -0
  79. {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.4.dist-info}/LICENSE +0 -0
  80. {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.4.dist-info}/WHEEL +0 -0
  81. {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.4.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,15 @@
1
1
  import json as jsn
2
+ import logging
2
3
  import pickle as pkl
3
4
  from abc import ABC, abstractmethod
4
- from datetime import datetime as dtm, timedelta as dtmdt
5
- from itertools import chain
5
+ from datetime import datetime as dtm
6
6
  from pathlib import Path
7
+ from typing import Any
7
8
 
8
9
  import numpy as np
9
10
  from pandas import DataFrame, date_range, concat, to_numeric, to_datetime
10
11
 
11
- from ..utils.config import meta
12
+ from ..config.supported_instruments import meta
12
13
 
13
14
  __all__ = ['AbstractReader']
14
15
 
@@ -16,117 +17,108 @@ __all__ = ['AbstractReader']
16
17
  class AbstractReader(ABC):
17
18
  nam = 'AbstractReader'
18
19
 
19
- # initial config
20
+ # initial data
20
21
  # input : file path, reset switch
21
22
 
22
23
  # list the file in the path and read pickle file if it exists, else read raw data and dump the pickle file the
23
24
  # pickle file will be generated after read raw data first time, if you want to re-read the rawdata, please set
24
25
  # 'reset=True'
25
26
 
26
- def __init__(self, _path, QC=True, csv_raw=True, reset=False, rate=False, append_data=False, update_meta=None):
27
- # logging.info(f'\n{self.nam}')
28
- # print('='*65)
29
- # logger.info(f"Reading file and process data")
27
+ def __init__(self,
28
+ path: Path | str,
29
+ qc: bool = True,
30
+ csv_raw: bool = True,
31
+ reset: bool = False,
32
+ rate: bool = False,
33
+ append_data: bool = False):
30
34
 
31
- # class parameter
32
- # self.index = lambda _freq: date_range(_sta, _fin, freq=_freq)
33
- self.path = Path(_path)
35
+ self.path = Path(path)
34
36
  self.meta = meta[self.nam]
35
-
36
- if update_meta is not None:
37
- self.meta.update(update_meta)
37
+ self.logger = self._setup_logger()
38
38
 
39
39
  self.reset = reset
40
40
  self.rate = rate
41
- self.qc = QC
41
+ self.qc = qc
42
42
  self.csv = csv_raw
43
43
  self.apnd = append_data & reset
44
44
 
45
- self.pkl_nam = f'_read_{self.nam.lower()}.pkl'
46
- self.csv_nam = f'_read_{self.nam.lower()}.csv'
47
-
48
- self.pkl_nam_raw = f'_read_{self.nam.lower()}_raw.pkl'
49
- self.csv_nam_raw = f'_read_{self.nam.lower()}_raw.csv'
45
+ self.pkl_nam = self.path / f'_read_{self.nam.lower()}.pkl'
46
+ self.csv_nam = self.path / f'_read_{self.nam.lower()}.csv'
47
+ self.pkl_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.pkl'
48
+ self.csv_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.csv'
49
+ self.csv_out = self.path / f'output_{self.nam.lower()}.csv'
50
50
 
51
- self.csv_out = f'output_{self.nam.lower()}.csv'
51
+ # dependency injection function, customize each instrument
52
+ @abstractmethod
53
+ def _raw_reader(self, _file):
54
+ pass
52
55
 
53
- # print(f" from {_sta.strftime('%Y-%m-%d %X')} to {_fin.strftime('%Y-%m-%d %X')}")
54
- # print('='*65)
55
- # print(f"{dtm.now().strftime('%m/%d %X')}")
56
+ @abstractmethod
57
+ def _QC(self, df: DataFrame):
58
+ return df
56
59
 
57
- # get data
58
60
  def __call__(self,
59
61
  start: dtm | None = None,
60
62
  end: dtm | None = None,
61
- mean_freq='1h',
62
- csv_out=True,
63
- **kwarg):
64
-
65
- self._oth_set = kwarg
63
+ mean_freq: str = '1h',
64
+ csv_out: bool = True,
65
+ ) -> DataFrame | None:
66
66
 
67
67
  if start and end and end <= start:
68
- raise ValueError(
69
- f'\nPlease check out input time : '
70
- f'\n\tstart : {start.strftime("%Y-%m-%d %X")}'
71
- f'\n\tend : {end.strftime("%Y-%m-%d %X")}')
72
-
73
- fout = self._run(start, end)
68
+ raise ValueError(f"Invalid time range: start {start} is after end {end}")
74
69
 
75
- if fout is not None:
76
- if mean_freq is not None:
77
- fout = fout.resample(mean_freq).mean()
70
+ data = self._run(start, end)
78
71
 
72
+ if data is not None:
73
+ if mean_freq:
74
+ data = data.resample(mean_freq).mean()
79
75
  if csv_out:
80
- fout.to_csv(self.path / self.csv_out)
76
+ data.to_csv(self.csv_out)
81
77
 
82
- return fout
78
+ return data
83
79
 
84
- # dependency injection function
85
- @abstractmethod
86
- def _raw_reader(self, _file):
87
- # customize each instrument
88
- pass
80
+ @staticmethod
81
+ def basic_QC(df: DataFrame):
82
+ df_ave, df_std = df.mean(), df.std()
83
+ df_lowb, df_highb = df < (df_ave - df_std * 1.5), df > (df_ave + df_std * 1.5)
89
84
 
90
- @abstractmethod
91
- def _QC(self, df: DataFrame):
92
- # customize each instrument
93
- return df
85
+ return df.mask(df_lowb | df_highb).copy()
94
86
 
95
87
  # set each to true datetime(18:30:01 -> 18:30:00) and rindex data
96
88
  def _raw_process(self, _df):
97
89
  # get time from df and set time to whole time to create time index
98
90
  _st, _ed = _df.index.sort_values()[[0, -1]]
99
- _tm_index = date_range(_st.strftime('%Y%m%d %H00'),
100
- (_ed + dtmdt(hours=1)).strftime('%Y%m%d %H00'),
91
+ _tm_index = date_range(_st.strftime('%Y%m%d %H00'), _ed.floor('h').strftime('%Y%m%d %H00'),
101
92
  freq=self.meta['freq'])
102
93
  _tm_index.name = 'time'
103
94
 
104
95
  return _df.apply(to_numeric, errors='coerce').resample(self.meta['freq']).mean().reindex(_tm_index)
105
96
 
97
+ def _setup_logger(self) -> logging.Logger:
98
+ logger = logging.getLogger(self.nam)
99
+ logger.setLevel(logging.INFO)
100
+ handler = logging.FileHandler(self.path / f'{self.nam}.log')
101
+ handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s'))
102
+ logger.addHandler(handler)
103
+ return logger
104
+
106
105
  # acquisition rate and yield rate
107
106
  def _rate_calculate(self, _fout_raw, _fout_qc, _st_raw, _ed_raw):
108
-
109
107
  if self.meta['deter_key'] is not None:
110
108
  _start, _end = _fout_qc.index[[0, -1]]
111
109
 
112
110
  _drop_how = 'any'
113
111
  _the_size = len(_fout_raw.resample('1h').mean().index)
114
112
 
115
- _f_pth = (self.path / f'{self.nam}.log')
116
- _f = _f_pth.open('r+' if _f_pth.exists() else 'w+')
117
-
118
- _cont = _f.read()
119
- _f.seek(0)
120
-
121
- _f.write(f"\n{dtm.now().strftime('%Y/%m/%d %X')}\n")
122
- _f.write(f"{'-' * 60}\n")
123
- _f.write(f"rawdata time : \n\t{_st_raw.strftime('%Y-%m-%d %X')} ~ {_ed_raw.strftime('%Y-%m-%d %X')}\n")
124
- _f.write(f"output time : \n\t{_start.strftime('%Y-%m-%d %X')} ~ {_end.strftime('%Y-%m-%d %X')}\n")
125
- _f.write(f"{'-' * 60}\n")
126
- print(f"\n\t\tfrom {_start.strftime('%Y-%m-%d %X')} to {_end.strftime('%Y-%m-%d %X')}\n")
113
+ self.logger.info(f"{'=' * 60}")
114
+ self.logger.info(
115
+ f"Raw data time : {_st_raw.strftime('%Y-%m-%d %H:%M:%S')} ~ {_ed_raw.strftime('%Y-%m-%d %H:%M:%S')}")
116
+ self.logger.info(
117
+ f"Output time : {_start.strftime('%Y-%m-%d %H:%M:%S')} ~ {_end.strftime('%Y-%m-%d %H:%M:%S')}")
118
+ self.logger.info(f"{'-' * 60}")
119
+ print(f"\n\n\t\tfrom {_start.strftime('%Y-%m-%d %H:%M:%S')} to {_end.strftime('%Y-%m-%d %H:%M:%S')}\n")
127
120
 
128
121
  for _nam, _key in self.meta['deter_key'].items():
129
-
130
122
  if _key == ['all']:
131
123
  _key, _drop_how = _fout_qc.keys(), 'all'
132
124
 
@@ -139,18 +131,14 @@ class AbstractReader(ABC):
139
131
  except ZeroDivisionError:
140
132
  _acq_rate, _yid_rate = 0, 0
141
133
 
142
- _f.write(f'{_nam} : \n')
143
- _f.write(f"\tacquisition rate : {_acq_rate}%\n")
144
- _f.write(f'\tyield rate : {_yid_rate}%\n')
134
+ self.logger.info(f'{_nam}:')
135
+ self.logger.info(f"\tAcquisition rate: {_acq_rate}%")
136
+ self.logger.info(f'\tYield rate: {_yid_rate}%')
137
+ self.logger.info(f"{'=' * 60}")
145
138
 
146
139
  print(f'\t\t{_nam} : ')
147
140
  print(f'\t\t\tacquisition rate : \033[91m{_acq_rate}%\033[0m')
148
- print(f'\t\t\tyield rate : \033[91m{_yid_rate}%\033[0m')
149
-
150
- _f.write(f"{'=' * 40}\n")
151
- _f.write(_cont)
152
-
153
- _f.close()
141
+ print(f'\t\t\tyield rate : \033[91m{_yid_rate}%\033[0m')
154
142
 
155
143
  # process time index
156
144
  @staticmethod
@@ -191,81 +179,62 @@ class AbstractReader(ABC):
191
179
  return _df
192
180
 
193
181
  # save pickle file
194
- def _save_dt(self, _save_raw, _save_qc):
195
- # dump pickle file
196
- _check = True
197
- while _check:
198
- try:
199
- with (self.path / self.pkl_nam).open('wb') as f:
200
- pkl.dump(_save_qc, f, protocol=pkl.HIGHEST_PROTOCOL)
201
-
202
- # dump csv file
203
- if self.csv:
204
- _save_qc.to_csv(self.path / self.csv_nam)
205
-
206
- # output raw data if qc file
207
- if self.meta['deter_key'] is not None:
208
- with (self.path / self.pkl_nam_raw).open('wb') as f:
209
- pkl.dump(_save_raw, f, protocol=pkl.HIGHEST_PROTOCOL)
182
+ def _save_data(self, raw_data: DataFrame, qc_data: DataFrame) -> None:
183
+ self._safe_pickle_dump(self.pkl_nam, qc_data)
184
+ if self.csv:
185
+ qc_data.to_csv(self.csv_nam)
210
186
 
211
- if self.csv:
212
- _save_raw.to_csv(self.path / self.csv_nam_raw)
213
-
214
- _check = False
187
+ if self.meta['deter_key'] is not None:
188
+ self._safe_pickle_dump(self.pkl_nam_raw, raw_data)
189
+ if self.csv:
190
+ raw_data.to_csv(self.csv_nam_raw)
215
191
 
216
- except PermissionError as _err:
217
- print('\n', _err)
218
- input('\t\t\33[41m Please Close The File And Press "Enter" \33[0m\n')
192
+ @staticmethod
193
+ def _safe_pickle_dump(file_path: Path, data: Any) -> None:
194
+ while True:
195
+ try:
196
+ with file_path.open('wb') as f:
197
+ pkl.dump(data, f, protocol=pkl.HIGHEST_PROTOCOL)
198
+ break
199
+ except PermissionError as err:
200
+ print('\n', err)
201
+ input('\t\t\33[41m Please close the file and press "Enter" \33[0m\n')
219
202
 
220
203
  # read pickle file
221
- def _read_pkl(self, ):
222
- with (self.path / self.pkl_nam).open('rb') as f:
223
- _fout_qc = pkl.load(f)
224
-
225
- if (self.path / self.pkl_nam_raw).exists():
226
- with (self.path / self.pkl_nam_raw).open('rb') as f:
227
- _fout_raw = pkl.load(f)
228
- else:
229
- _fout_raw = _fout_qc
204
+ def _read_pkl(self):
205
+ with self.pkl_nam.open('rb') as qc_data, self.pkl_nam_raw.open('rb') as raw_data:
206
+ return pkl.load(raw_data), pkl.load(qc_data)
230
207
 
231
- return _fout_raw, _fout_qc
208
+ def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
209
+ patterns = {self.meta['pattern'].lower(), self.meta['pattern'].upper(), self.meta['pattern']}
210
+ files = [f for pattern in patterns for f in self.path.glob(pattern)
211
+ if f.name not in [self.csv_out.name, self.csv_nam.name, self.csv_nam_raw.name, f'{self.nam}.log']]
232
212
 
233
- # read raw data
234
- def _read_raw(self, ):
235
- pattern = self.meta['pattern']
236
- patterns = {pattern, pattern.lower(), pattern.upper()}
237
- _df_con, _f_list = None, list(chain.from_iterable(self.path.glob(p) for p in patterns))
238
-
239
- for file in _f_list:
240
- if file.name in [self.csv_out, self.csv_nam, self.csv_nam_raw, f'{self.nam}.log']:
241
- continue
213
+ if not files:
214
+ print(f"\t\t\033[31mNo files in '{self.path}' could be read. Please check the current path.\033[0m")
215
+ return None, None
242
216
 
217
+ df_list = []
218
+ for file in files:
243
219
  print(f"\r\t\treading {file.name}", end='')
220
+ df = self._raw_reader(file)
221
+ if df is not None:
222
+ df_list.append(df)
244
223
 
245
- _df = self._raw_reader(file)
246
-
247
- # concat the concated list
248
- if _df is not None:
249
- _df_con = concat([_df_con, _df]) if _df_con is not None else _df
250
-
251
- if _df_con is None:
252
- print(f"\t\t\033[31mNo File in '{self.path}' Could Read, Please Check Out the Current Path\033[0m")
224
+ if not df_list:
253
225
  return None, None
254
226
 
255
- # QC
256
- _fout_raw = self._raw_process(_df_con)
257
- _fout_qc = self._QC(_fout_raw)
227
+ raw_data = self._raw_process(concat(df_list))
228
+ qc_data = self._QC(raw_data)
258
229
 
259
- return _fout_raw, _fout_qc
230
+ return raw_data, qc_data
260
231
 
261
232
  # main flow
262
233
  def _run(self, _start, _end):
263
-
264
234
  _f_raw_done, _f_qc_done = None, None
265
235
 
266
236
  # read pickle if pickle file exists and 'reset=False' or process raw data or append new data
267
- _pkl_exist = self.path / self.pkl_nam in list(self.path.glob('*.pkl'))
268
- if _pkl_exist & ((~self.reset) | self.apnd):
237
+ if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and (not self.reset or self.apnd):
269
238
  print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}")
270
239
 
271
240
  _f_raw_done, _f_qc_done = self._read_pkl()
@@ -284,19 +253,19 @@ class AbstractReader(ABC):
284
253
  # read raw data
285
254
  print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it")
286
255
 
287
- _f_raw, _f_qc = self._read_raw()
256
+ _f_raw, _f_qc = self._read_raw_files()
288
257
  if _f_raw is None:
289
258
  return None
290
259
 
291
260
  # append new data and pickle data
292
- if self.apnd & _pkl_exist:
261
+ if self.apnd and self.pkl_nam.exists():
293
262
  _f_raw = self._apnd_prcs(_f_raw_done, _f_raw)
294
263
  _f_qc = self._apnd_prcs(_f_qc_done, _f_qc)
295
264
 
296
265
  _f_qc = self._outlier_prcs(_f_qc)
297
266
 
298
267
  # save
299
- self._save_dt(_f_raw, _f_qc)
268
+ self._save_data(_f_raw, _f_qc)
300
269
 
301
270
  # process time index
302
271
  # if (_start is not None)|(_end is not None):
@@ -306,97 +275,3 @@ class AbstractReader(ABC):
306
275
  self._rate_calculate(_f_raw, _f_qc, _start_raw, _end_raw)
307
276
 
308
277
  return _f_qc if self.qc else _f_raw
309
-
310
- # -------------------------------------------------------------------------------------
311
- # old flow
312
- # def __run(self, _start, _end):
313
- #
314
- # ## read pickle if pickle file exists and 'reset=False' or process raw data
315
- # if (self.path / self.pkl_nam in list(self.path.glob('*.pkl'))) & (~self.reset):
316
- # print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}")
317
- #
318
- # with (self.path / self.pkl_nam).open('rb') as f:
319
- # _fout_qc = pkl.load(f)
320
- #
321
- # _exist = (self.path / self.pkl_nam_raw).exists()
322
- # if _exist:
323
- # with (self.path / self.pkl_nam_raw).open('rb') as f:
324
- # _fout_raw = pkl.load(f)
325
- # else:
326
- # _fout_raw = _fout_qc
327
- #
328
- # _start, _end = to_datetime(_start) or _fout_qc.index[0], to_datetime(_end) or _fout_qc.index[-1]
329
- # _idx = date_range(_start, _end, freq=_fout_qc.index.freq.copy())
330
- # _idx.name = 'time'
331
- #
332
- # _fout_raw, _fout_qc = _fout_raw.reindex(_idx), _fout_qc.reindex(_idx)
333
- # if (self.rate) & (_exist):
334
- # self._rate_calculate(_fout_raw, _fout_qc)
335
- #
336
- # return _fout_qc if self.qc else _fout_raw
337
- # else:
338
- # print(
339
- # f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it")
340
- #
341
- # ##=================================================================================================================
342
- # ## read raw data
343
- # _df_con, _f_list = None, list(self.path.glob(self.meta['pattern']))
344
- #
345
- # if len(_f_list) == 0:
346
- # print(f"\t\t\033[31mNo File in '{self.path}' Could Read, Please Check Out the Current Path\033[0m")
347
- # return None
348
- #
349
- # for file in _f_list:
350
- # if file.name in [self.csv_out, self.csv_nam, self.csv_nam_raw, f'{self.nam}.log']: continue
351
- #
352
- # print(f"\r\t\treading {file.name}", end='')
353
- #
354
- # _df = self._raw_reader(file)
355
- #
356
- # ## concat the concated list
357
- # if _df is not None:
358
- # _df_con = concat([_df_con, _df]) if _df_con is not None else _df
359
- # print()
360
- #
361
- # ## QC
362
- # _save_raw = self._raw_process(_df_con)
363
- # _save_qc = self._QC(_save_raw)
364
- #
365
- # _start, _end = to_datetime(_start) or _save_raw.index[0], to_datetime(_end) or _save_raw.index[-1]
366
- # _idx = date_range(_start, _end, freq=_save_raw.index.freq.copy())
367
- # _idx.name = 'time'
368
- #
369
- # _fout_raw, _fout_qc = _save_raw.reindex(_idx).copy(), _save_qc.reindex(_idx).copy()
370
- #
371
- # self._rate_calculate(_fout_raw, _fout_qc)
372
- #
373
- # ##=================================================================================================================
374
- # ## dump pickle file
375
- # _check = True
376
- # while _check:
377
- #
378
- # try:
379
- # with (self.path / self.pkl_nam).open('wb') as f:
380
- # pkl.dump(_save_qc, f, protocol=pkl.HIGHEST_PROTOCOL)
381
- #
382
- # ## dump csv file
383
- # if self.csv:
384
- # _save_qc.to_csv(self.path / self.csv_nam)
385
- #
386
- # ## output raw data if qc file
387
- # if self.meta['deter_key'] is not None:
388
- # with (self.path / self.pkl_nam_raw).open('wb') as f:
389
- # pkl.dump(_save_raw, f, protocol=pkl.HIGHEST_PROTOCOL)
390
- #
391
- # if self.csv:
392
- # _save_raw.to_csv(self.path / self.csv_nam_raw)
393
- #
394
- # return _fout_qc if self.qc else _fout_raw
395
- #
396
- # _check = False
397
- #
398
- # except PermissionError as _err:
399
- # print('\n', _err)
400
- # input('\t\t\33[41m Please Close The File And Press "Enter" \33[0m\n')
401
- #
402
- # return _fout_qc
@@ -7,25 +7,24 @@ class Reader(AbstractReader):
7
7
  nam = 'AE33'
8
8
 
9
9
  def _raw_reader(self, _file):
10
+ if _file.stat().st_size / 1024 < 550:
11
+ print('\t It may not be a whole daily data.')
12
+
10
13
  _df = read_table(_file, parse_dates={'time': [0, 1]}, index_col='time',
11
14
  delimiter=r'\s+', skiprows=5, usecols=range(67))
12
15
  _df.columns = _df.columns.str.strip(';')
13
16
 
14
17
  # remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape)
15
- if not self._oth_set.get('ignore_err', False):
16
- _df = _df.where((_df['Status'] != 0) | (_df['Status'] != 128) | (_df['Status'] != 256)).copy()
18
+ if self.meta.get('error_state', False):
19
+ _df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
20
+
21
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
17
22
 
18
- return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'Status']]
23
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
19
24
 
20
25
  def _QC(self, _df):
21
26
  # remove negative value
22
27
  _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].mask((_df < 0).copy())
23
28
 
24
- # QC data in 5 min
25
- def _QC_func(df):
26
- _df_ave, _df_std = df.mean(), df.std()
27
- _df_lowb, _df_highb = df < (_df_ave - _df_std * 1.5), df > (_df_ave + _df_std * 1.5)
28
-
29
- return df.mask(_df_lowb | _df_highb).copy()
30
-
31
- return _df.resample('5min').apply(_QC_func).resample('1h').mean()
29
+ # QC data in 1h
30
+ return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -14,21 +14,18 @@ class Reader(AbstractReader):
14
14
  _df = _df.groupby('SetupID').get_group(_df_id)[
15
15
  ['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'Status']].copy()
16
16
 
17
- # remove data without Status=0
18
- _df = _df.where(_df['Status'] == 0).copy()
17
+ # remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape)
18
+ if self.meta.get('error_state', False):
19
+ _df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
19
20
 
20
- return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
21
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
22
+
23
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
21
24
 
22
25
  # QC data
23
26
  def _QC(self, _df):
24
27
  # remove negative value
25
28
  _df = _df.mask((_df < 0).copy())
26
29
 
27
- # QC data in 5 min
28
- def _QC_func(df):
29
- _df_ave, _df_std = df.mean(), df.std()
30
- _df_lowb, _df_highb = df < (_df_ave - _df_std * 1.5), df > (_df_ave + _df_std * 1.5)
31
-
32
- return df.mask(_df_lowb | _df_highb).copy()
33
-
34
- return _df.resample('5min').apply(_QC_func).resample('1h').mean()
30
+ # QC data in 1h
31
+ return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -12,28 +12,28 @@ class Reader(AbstractReader):
12
12
  _df = read_table(f, skiprows=6, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
13
13
  _key = list(_df.keys()[3:54]) ## 542 ~ 1981
14
14
 
15
- ## create new keys
15
+ # create new keys
16
16
  _newkey = {}
17
17
  for _k in _key:
18
18
  _newkey[_k] = float(_k).__round__(4)
19
19
  # _newkey['Mode(m)'] = 'mode'
20
20
 
21
- ## get new dataframe
21
+ # get new dataframe
22
22
  _df = _df[_newkey.keys()].rename(_newkey, axis=1)
23
- # _df['total'] = _df[list(_newkey.values())[:-1]].sum(axis=1)*(n.diff(n.log(_df.keys()[:-1].to_numpy(float))).mean()).copy()
23
+ # df['total'] = _df[list(_newkey.values())[:-1]].sum(axis=1)*(n.diff(n.log(_df.keys()[:-1].to_numpy(float))).mean()).copy()
24
24
 
25
25
  _df_idx = to_datetime(_df.index, errors='coerce')
26
26
 
27
27
  return _df.set_index(_df_idx).loc[_df_idx.dropna()]
28
28
 
29
- ## QC data
29
+ # QC data
30
30
  def _QC(self, _df):
31
- ## mask out the data size lower than 7
31
+ # mask out the data size lower than 7
32
32
  _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
33
33
  _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
34
34
  _df = _df.mask(_df_size < 7)
35
35
 
36
- ## remove total conc. lower than 700
36
+ # remove total conc. lower than 700
37
37
  _df = _df.mask(_df['total'] > 700)
38
38
 
39
39
  # not confirmed
@@ -7,32 +7,31 @@ class Reader(AbstractReader):
7
7
  nam = 'Aurora'
8
8
 
9
9
  def _raw_reader(self, _file):
10
- with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f:
10
+ with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
11
11
  _df = read_csv(f, low_memory=False, index_col=0)
12
12
 
13
- _df.index = to_datetime(_df.index, errors='coerce', format=self._oth_set.get('date_format') or 'mixed')
13
+ _df.index = to_datetime(_df.index, errors='coerce')
14
14
  _df.index.name = 'time'
15
15
 
16
16
  _df.columns = _df.keys().str.strip(' ')
17
17
 
18
- _df = _df.loc[
19
- _df.index.dropna(), ['0°σspB', '0°σspG', '0°σspR', '90°σspB', '90°σspG', '90°σspR', 'RH']].copy()
20
- _df.columns = ['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']
18
+ # consider another csv format
19
+ _df = _df.rename(columns={
20
+ '0°σspB': 'B', '0°σspG': 'G', '0°σspR': 'R',
21
+ '90°σspB': 'BB', '90°σspG': 'BG', '90°σspR': 'BR',
22
+ 'Blue': 'B', 'Green': 'G', 'Red': 'R',
23
+ 'B_Blue': 'BB', 'B_Green': 'BG', 'B_Red': 'BR',
24
+ 'RH': 'RH'
25
+ })
21
26
 
22
- return _df
27
+ _df = _df[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
23
28
 
24
- ## QC data
25
- def _QC(self, _df):
26
- ## remove negative value
27
- _df = _df.mask((_df <= 0).copy())
28
-
29
- ## call by _QC function
30
- ## QC data in 1 hr
31
- def _QC_func(_df_1hr):
32
- _df_ave = _df_1hr.mean()
33
- _df_std = _df_1hr.std()
34
- _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
29
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
35
30
 
36
- return _df_1hr.mask(_df_lowb | _df_highb).copy()
31
+ # QC data
32
+ def _QC(self, _df):
33
+ # remove negative value
34
+ _df = _df.mask((_df <= 0) | (_df > 2000)).copy()
37
35
 
38
- return _df.resample('1h', group_keys=False).apply(_QC_func)
36
+ # QC data in 1h
37
+ return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -8,7 +8,9 @@ class Reader(AbstractReader):
8
8
 
9
9
  def _raw_reader(self, _file):
10
10
  with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_csv(f, parse_dates=['Time'], index_col='Time')
11
+ _df = read_csv(f, parse_dates=True, index_col=0)
12
+
13
+ _df.columns = _df.columns.str.replace(' ', '')
12
14
 
13
15
  _df = _df.rename(columns={
14
16
  'BC1(ng/m3)': 'BC1',
@@ -23,24 +25,18 @@ class Reader(AbstractReader):
23
25
  'BC10(ng/m3)': 'BC10'
24
26
  })
25
27
 
26
- # remove data without Status=32 (Automatic Tape Advance), 65536 (Tape Move)
27
- # if not self._oth_set.get('ignore_err', False):
28
- # _df = _df.where((_df['Status'] != 32) | (_df['Status'] != 65536)).copy()
28
+ # remove data without Status=1, 8, 16, 32 (Automatic Tape Advance), 65536 (Tape Move)
29
+ if self.meta.get('error_state', False):
30
+ _df = _df[~_df['Status'].isin(self.meta.get('error_state'))]
31
+
32
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']]
29
33
 
30
- return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10', 'Status']]
34
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
31
35
 
32
36
  # QC data
33
37
  def _QC(self, _df):
34
38
  # remove negative value
35
39
  _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']].mask((_df < 0).copy())
36
40
 
37
- # call by _QC function
38
- # QC data in 1 hr
39
- def _QC_func(_df_1hr):
40
- _df_ave = _df_1hr.mean()
41
- _df_std = _df_1hr.std()
42
- _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
43
-
44
- return _df_1hr.mask(_df_lowb | _df_highb).copy()
45
-
46
- return _df.resample('1h', group_keys=False).apply(_QC_func).resample('5min').mean()
41
+ # QC data in 1h
42
+ return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()