AeroViz 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (121) hide show
  1. AeroViz/__init__.py +7 -5
  2. AeroViz/{config → data}/DEFAULT_DATA.csv +1 -1
  3. AeroViz/dataProcess/Chemistry/__init__.py +40 -40
  4. AeroViz/dataProcess/Chemistry/_calculate.py +15 -15
  5. AeroViz/dataProcess/Chemistry/_isoropia.py +72 -68
  6. AeroViz/dataProcess/Chemistry/_mass_volume.py +158 -161
  7. AeroViz/dataProcess/Chemistry/_ocec.py +109 -109
  8. AeroViz/dataProcess/Chemistry/_partition.py +19 -18
  9. AeroViz/dataProcess/Chemistry/_teom.py +9 -11
  10. AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
  11. AeroViz/dataProcess/Optical/Angstrom_exponent.py +20 -0
  12. AeroViz/dataProcess/Optical/_IMPROVE.py +40 -41
  13. AeroViz/dataProcess/Optical/__init__.py +29 -44
  14. AeroViz/dataProcess/Optical/_absorption.py +21 -47
  15. AeroViz/dataProcess/Optical/_extinction.py +31 -25
  16. AeroViz/dataProcess/Optical/_mie.py +5 -7
  17. AeroViz/dataProcess/Optical/_mie_sd.py +89 -90
  18. AeroViz/dataProcess/Optical/_scattering.py +19 -20
  19. AeroViz/dataProcess/SizeDistr/__init__.py +39 -39
  20. AeroViz/dataProcess/SizeDistr/__merge.py +159 -158
  21. AeroViz/dataProcess/SizeDistr/_merge.py +155 -154
  22. AeroViz/dataProcess/SizeDistr/_merge_v1.py +162 -161
  23. AeroViz/dataProcess/SizeDistr/_merge_v2.py +153 -152
  24. AeroViz/dataProcess/SizeDistr/_merge_v3.py +327 -327
  25. AeroViz/dataProcess/SizeDistr/_merge_v4.py +273 -275
  26. AeroViz/dataProcess/SizeDistr/_size_distr.py +51 -51
  27. AeroViz/dataProcess/VOC/__init__.py +9 -9
  28. AeroViz/dataProcess/VOC/_potential_par.py +53 -55
  29. AeroViz/dataProcess/__init__.py +28 -6
  30. AeroViz/dataProcess/core/__init__.py +59 -65
  31. AeroViz/plot/__init__.py +7 -2
  32. AeroViz/plot/bar.py +126 -0
  33. AeroViz/plot/box.py +69 -0
  34. AeroViz/plot/distribution/distribution.py +421 -427
  35. AeroViz/plot/meteorology/meteorology.py +240 -292
  36. AeroViz/plot/optical/__init__.py +0 -1
  37. AeroViz/plot/optical/optical.py +230 -230
  38. AeroViz/plot/pie.py +198 -0
  39. AeroViz/plot/regression.py +196 -0
  40. AeroViz/plot/scatter.py +165 -0
  41. AeroViz/plot/templates/__init__.py +2 -4
  42. AeroViz/plot/templates/ammonium_rich.py +34 -0
  43. AeroViz/plot/templates/contour.py +25 -25
  44. AeroViz/plot/templates/corr_matrix.py +86 -93
  45. AeroViz/plot/templates/diurnal_pattern.py +28 -26
  46. AeroViz/plot/templates/koschmieder.py +59 -123
  47. AeroViz/plot/templates/metal_heatmap.py +135 -37
  48. AeroViz/plot/timeseries/__init__.py +1 -0
  49. AeroViz/plot/timeseries/template.py +47 -0
  50. AeroViz/plot/timeseries/timeseries.py +324 -264
  51. AeroViz/plot/utils/__init__.py +2 -1
  52. AeroViz/plot/utils/_color.py +57 -57
  53. AeroViz/plot/utils/_unit.py +48 -48
  54. AeroViz/plot/utils/plt_utils.py +92 -0
  55. AeroViz/plot/utils/sklearn_utils.py +49 -0
  56. AeroViz/plot/utils/units.json +5 -0
  57. AeroViz/plot/violin.py +80 -0
  58. AeroViz/process/__init__.py +17 -17
  59. AeroViz/process/core/DataProc.py +9 -9
  60. AeroViz/process/core/SizeDist.py +81 -81
  61. AeroViz/process/method/PyMieScatt_update.py +488 -488
  62. AeroViz/process/method/mie_theory.py +231 -229
  63. AeroViz/process/method/prop.py +40 -40
  64. AeroViz/process/script/AbstractDistCalc.py +103 -103
  65. AeroViz/process/script/Chemical.py +168 -167
  66. AeroViz/process/script/IMPACT.py +40 -40
  67. AeroViz/process/script/IMPROVE.py +152 -152
  68. AeroViz/process/script/Others.py +45 -45
  69. AeroViz/process/script/PSD.py +26 -26
  70. AeroViz/process/script/PSD_dry.py +69 -70
  71. AeroViz/process/script/retrieve_RI.py +50 -51
  72. AeroViz/rawDataReader/__init__.py +53 -58
  73. AeroViz/rawDataReader/config/supported_instruments.py +155 -0
  74. AeroViz/rawDataReader/core/__init__.py +233 -356
  75. AeroViz/rawDataReader/script/AE33.py +17 -18
  76. AeroViz/rawDataReader/script/AE43.py +18 -21
  77. AeroViz/rawDataReader/script/APS_3321.py +30 -30
  78. AeroViz/rawDataReader/script/Aurora.py +23 -24
  79. AeroViz/rawDataReader/script/BC1054.py +36 -40
  80. AeroViz/rawDataReader/script/EPA_vertical.py +37 -9
  81. AeroViz/rawDataReader/script/GRIMM.py +16 -23
  82. AeroViz/rawDataReader/script/IGAC.py +90 -0
  83. AeroViz/rawDataReader/script/MA350.py +32 -39
  84. AeroViz/rawDataReader/script/Minion.py +103 -0
  85. AeroViz/rawDataReader/script/NEPH.py +69 -74
  86. AeroViz/rawDataReader/script/SMPS_TH.py +25 -25
  87. AeroViz/rawDataReader/script/SMPS_aim11.py +32 -32
  88. AeroViz/rawDataReader/script/SMPS_genr.py +31 -31
  89. AeroViz/rawDataReader/script/Sunset_OCEC.py +60 -0
  90. AeroViz/rawDataReader/script/TEOM.py +30 -28
  91. AeroViz/rawDataReader/script/Table.py +13 -14
  92. AeroViz/rawDataReader/script/VOC.py +26 -0
  93. AeroViz/rawDataReader/script/__init__.py +18 -20
  94. AeroViz/tools/database.py +64 -66
  95. AeroViz/tools/dataclassifier.py +106 -106
  96. AeroViz/tools/dataprinter.py +51 -51
  97. AeroViz/tools/datareader.py +38 -38
  98. {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/METADATA +5 -4
  99. AeroViz-0.1.4.dist-info/RECORD +112 -0
  100. AeroViz/plot/improve/__init__.py +0 -1
  101. AeroViz/plot/improve/improve.py +0 -240
  102. AeroViz/plot/optical/aethalometer.py +0 -77
  103. AeroViz/plot/templates/event_evolution.py +0 -65
  104. AeroViz/plot/templates/regression.py +0 -256
  105. AeroViz/plot/templates/scatter.py +0 -130
  106. AeroViz/plot/templates/templates.py +0 -398
  107. AeroViz/plot/utils/_decorator.py +0 -74
  108. AeroViz/rawDataReader/script/IGAC_TH.py +0 -104
  109. AeroViz/rawDataReader/script/IGAC_ZM.py +0 -90
  110. AeroViz/rawDataReader/script/OCEC_LCRES.py +0 -34
  111. AeroViz/rawDataReader/script/OCEC_RES.py +0 -28
  112. AeroViz/rawDataReader/script/VOC_TH.py +0 -30
  113. AeroViz/rawDataReader/script/VOC_ZM.py +0 -37
  114. AeroViz/rawDataReader/utils/__init__.py +0 -0
  115. AeroViz/rawDataReader/utils/config.py +0 -169
  116. AeroViz-0.1.3.dist-info/RECORD +0 -111
  117. /AeroViz/{config → data}/DEFAULT_PNSD_DATA.csv +0 -0
  118. /AeroViz/{config → rawDataReader/config}/__init__.py +0 -0
  119. {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/LICENSE +0 -0
  120. {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/WHEEL +0 -0
  121. {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/top_level.txt +0 -0
@@ -1,400 +1,277 @@
1
1
  import json as jsn
2
+ import logging
2
3
  import pickle as pkl
3
4
  from abc import ABC, abstractmethod
4
- from datetime import datetime as dtm, timedelta as dtmdt
5
- from itertools import chain
5
+ from datetime import datetime as dtm
6
6
  from pathlib import Path
7
+ from typing import Any
7
8
 
8
9
  import numpy as np
9
10
  from pandas import DataFrame, date_range, concat, to_numeric, to_datetime
10
11
 
11
- from ..utils.config import meta
12
+ from ..config.supported_instruments import meta
12
13
 
13
14
  __all__ = ['AbstractReader']
14
15
 
15
16
 
16
17
  class AbstractReader(ABC):
17
- nam = 'AbstractReader'
18
+ nam = 'AbstractReader'
19
+
20
+ # initial data
21
+ # input : file path, reset switch
22
+
23
+ # list the file in the path and read pickle file if it exists, else read raw data and dump the pickle file the
24
+ # pickle file will be generated after read raw data first time, if you want to re-read the rawdata, please set
25
+ # 'reset=True'
26
+
27
+ def __init__(self,
28
+ path: Path | str,
29
+ qc: bool = True,
30
+ csv_raw: bool = True,
31
+ reset: bool = False,
32
+ rate: bool = False,
33
+ append_data: bool = False):
34
+
35
+ self.path = Path(path)
36
+ self.meta = meta[self.nam]
37
+ self.logger = self._setup_logger()
38
+
39
+ self.reset = reset
40
+ self.rate = rate
41
+ self.qc = qc
42
+ self.csv = csv_raw
43
+ self.apnd = append_data & reset
44
+
45
+ self.pkl_nam = self.path / f'_read_{self.nam.lower()}.pkl'
46
+ self.csv_nam = self.path / f'_read_{self.nam.lower()}.csv'
47
+ self.pkl_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.pkl'
48
+ self.csv_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.csv'
49
+ self.csv_out = self.path / f'output_{self.nam.lower()}.csv'
50
+
51
+ # dependency injection function, customize each instrument
52
+ @abstractmethod
53
+ def _raw_reader(self, _file):
54
+ pass
55
+
56
+ @abstractmethod
57
+ def _QC(self, df: DataFrame):
58
+ return df
59
+
60
+ def __call__(self,
61
+ start: dtm | None = None,
62
+ end: dtm | None = None,
63
+ mean_freq: str = '1h',
64
+ csv_out: bool = True,
65
+ ) -> DataFrame | None:
66
+
67
+ if start and end and end <= start:
68
+ raise ValueError(f"Invalid time range: start {start} is after end {end}")
69
+
70
+ data = self._run(start, end)
71
+
72
+ if data is not None:
73
+ if mean_freq:
74
+ data = data.resample(mean_freq).mean()
75
+ if csv_out:
76
+ data.to_csv(self.csv_out)
77
+
78
+ return data
79
+
80
+ @staticmethod
81
+ def basic_QC(df: DataFrame):
82
+ df_ave, df_std = df.mean(), df.std()
83
+ df_lowb, df_highb = df < (df_ave - df_std * 1.5), df > (df_ave + df_std * 1.5)
84
+
85
+ return df.mask(df_lowb | df_highb).copy()
86
+
87
+ # set each to true datetime(18:30:01 -> 18:30:00) and rindex data
88
+ def _raw_process(self, _df):
89
+ # get time from df and set time to whole time to create time index
90
+ _st, _ed = _df.index.sort_values()[[0, -1]]
91
+ _tm_index = date_range(_st.strftime('%Y%m%d %H00'), _ed.floor('h').strftime('%Y%m%d %H00'),
92
+ freq=self.meta['freq'])
93
+ _tm_index.name = 'time'
94
+
95
+ return _df.apply(to_numeric, errors='coerce').resample(self.meta['freq']).mean().reindex(_tm_index)
96
+
97
+ def _setup_logger(self) -> logging.Logger:
98
+ logger = logging.getLogger(self.nam)
99
+ logger.setLevel(logging.INFO)
100
+ handler = logging.FileHandler(self.path / f'{self.nam}.log')
101
+ handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s'))
102
+ logger.addHandler(handler)
103
+ return logger
104
+
105
+ # acquisition rate and yield rate
106
+ def _rate_calculate(self, _fout_raw, _fout_qc, _st_raw, _ed_raw):
107
+ if self.meta['deter_key'] is not None:
108
+ _start, _end = _fout_qc.index[[0, -1]]
109
+
110
+ _drop_how = 'any'
111
+ _the_size = len(_fout_raw.resample('1h').mean().index)
112
+
113
+ self.logger.info(f"{'=' * 60}")
114
+ self.logger.info(
115
+ f"Raw data time : {_st_raw.strftime('%Y-%m-%d %H:%M:%S')} ~ {_ed_raw.strftime('%Y-%m-%d %H:%M:%S')}")
116
+ self.logger.info(
117
+ f"Output time : {_start.strftime('%Y-%m-%d %H:%M:%S')} ~ {_end.strftime('%Y-%m-%d %H:%M:%S')}")
118
+ self.logger.info(f"{'-' * 60}")
119
+ print(f"\n\n\t\tfrom {_start.strftime('%Y-%m-%d %H:%M:%S')} to {_end.strftime('%Y-%m-%d %H:%M:%S')}\n")
120
+
121
+ for _nam, _key in self.meta['deter_key'].items():
122
+ if _key == ['all']:
123
+ _key, _drop_how = _fout_qc.keys(), 'all'
124
+
125
+ _real_size = len(_fout_raw[_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
126
+ _QC_size = len(_fout_qc[_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
127
+
128
+ try:
129
+ _acq_rate = round((_real_size / _the_size) * 100, 1)
130
+ _yid_rate = round((_QC_size / _real_size) * 100, 1)
131
+ except ZeroDivisionError:
132
+ _acq_rate, _yid_rate = 0, 0
133
+
134
+ self.logger.info(f'{_nam}:')
135
+ self.logger.info(f"\tAcquisition rate: {_acq_rate}%")
136
+ self.logger.info(f'\tYield rate: {_yid_rate}%')
137
+ self.logger.info(f"{'=' * 60}")
138
+
139
+ print(f'\t\t{_nam} : ')
140
+ print(f'\t\t\tacquisition rate : \033[91m{_acq_rate}%\033[0m')
141
+ print(f'\t\t\tyield rate : \033[91m{_yid_rate}%\033[0m')
142
+
143
+ # process time index
144
+ @staticmethod
145
+ def _tmidx_process(_start, _end, _df):
146
+ _st, _ed = _df.index.sort_values()[[0, -1]]
147
+ _start, _end = to_datetime(_start) or _st, to_datetime(_end) or _ed
148
+ _idx = date_range(_start, _end, freq=_df.index.freq.copy())
149
+ _idx.name = 'time'
150
+
151
+ return _df.reindex(_idx), _st, _ed
152
+
153
+ # append new data to exist pkl
154
+ @staticmethod
155
+ def _apnd_prcs(_df_done, _df_apnd):
156
+
157
+ if _df_apnd is not None:
158
+ _df = concat([_df_apnd.dropna(how='all').copy(), _df_done.dropna(how='all').copy()])
18
159
 
19
- # initial config
20
- # input : file path, reset switch
160
+ _idx = date_range(*_df.index.sort_values()[[0, -1]], freq=_df_done.index.freq.copy())
161
+ _idx.name = 'time'
162
+
163
+ return _df.loc[~_df.index.duplicated()].copy().reindex(_idx)
21
164
 
22
- # list the file in the path and read pickle file if it exists, else read raw data and dump the pickle file the
23
- # pickle file will be generated after read raw data first time, if you want to re-read the rawdata, please set
24
- # 'reset=True'
165
+ return _df_done
25
166
 
26
- def __init__(self, _path, QC=True, csv_raw=True, reset=False, rate=False, append_data=False, update_meta=None):
27
- # logging.info(f'\n{self.nam}')
28
- # print('='*65)
29
- # logger.info(f"Reading file and process data")
167
+ # remove outlier
168
+ def _outlier_prcs(self, _df):
30
169
 
31
- # class parameter
32
- # self.index = lambda _freq: date_range(_sta, _fin, freq=_freq)
33
- self.path = Path(_path)
34
- self.meta = meta[self.nam]
170
+ if (self.path / 'outlier.json') not in self.path.glob('*.json'):
171
+ return _df
172
+
173
+ with (self.path / 'outlier.json').open('r', encoding='utf-8', errors='ignore') as f:
174
+ self.outlier = jsn.load(f)
35
175
 
36
- if update_meta is not None:
37
- self.meta.update(update_meta)
176
+ for _st, _ed in self.outlier.values():
177
+ _df.loc[_st:_ed] = np.nan
38
178
 
39
- self.reset = reset
40
- self.rate = rate
41
- self.qc = QC
42
- self.csv = csv_raw
43
- self.apnd = append_data & reset
179
+ return _df
44
180
 
45
- self.pkl_nam = f'_read_{self.nam.lower()}.pkl'
46
- self.csv_nam = f'_read_{self.nam.lower()}.csv'
181
+ # save pickle file
182
+ def _save_data(self, raw_data: DataFrame, qc_data: DataFrame) -> None:
183
+ self._safe_pickle_dump(self.pkl_nam, qc_data)
184
+ if self.csv:
185
+ qc_data.to_csv(self.csv_nam)
47
186
 
48
- self.pkl_nam_raw = f'_read_{self.nam.lower()}_raw.pkl'
49
- self.csv_nam_raw = f'_read_{self.nam.lower()}_raw.csv'
187
+ if self.meta['deter_key'] is not None:
188
+ self._safe_pickle_dump(self.pkl_nam_raw, raw_data)
189
+ if self.csv:
190
+ raw_data.to_csv(self.csv_nam_raw)
50
191
 
51
- self.csv_out = f'output_{self.nam.lower()}.csv'
192
+ @staticmethod
193
+ def _safe_pickle_dump(file_path: Path, data: Any) -> None:
194
+ while True:
195
+ try:
196
+ with file_path.open('wb') as f:
197
+ pkl.dump(data, f, protocol=pkl.HIGHEST_PROTOCOL)
198
+ break
199
+ except PermissionError as err:
200
+ print('\n', err)
201
+ input('\t\t\33[41m Please close the file and press "Enter" \33[0m\n')
52
202
 
53
- # print(f" from {_sta.strftime('%Y-%m-%d %X')} to {_fin.strftime('%Y-%m-%d %X')}")
54
- # print('='*65)
55
- # print(f"{dtm.now().strftime('%m/%d %X')}")
203
+ # read pickle file
204
+ def _read_pkl(self):
205
+ with self.pkl_nam.open('rb') as qc_data, self.pkl_nam_raw.open('rb') as raw_data:
206
+ return pkl.load(raw_data), pkl.load(qc_data)
56
207
 
57
- # get data
58
- def __call__(self,
59
- start: dtm | None = None,
60
- end: dtm | None = None,
61
- mean_freq='1h',
62
- csv_out=True,
63
- **kwarg):
208
+ def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
209
+ patterns = {self.meta['pattern'].lower(), self.meta['pattern'].upper(), self.meta['pattern']}
210
+ files = [f for pattern in patterns for f in self.path.glob(pattern)
211
+ if f.name not in [self.csv_out.name, self.csv_nam.name, self.csv_nam_raw.name, f'{self.nam}.log']]
64
212
 
65
- self._oth_set = kwarg
213
+ if not files:
214
+ print(f"\t\t\033[31mNo files in '{self.path}' could be read. Please check the current path.\033[0m")
215
+ return None, None
66
216
 
67
- if start and end and end <= start:
68
- raise ValueError(
69
- f'\nPlease check out input time : \n\tstart : {start.strftime("%Y-%m-%d %X")}\n\tend : {end.strftime("%Y-%m-%d %X")}')
217
+ df_list = []
218
+ for file in files:
219
+ print(f"\r\t\treading {file.name}", end='')
220
+ df = self._raw_reader(file)
221
+ if df is not None:
222
+ df_list.append(df)
70
223
 
71
- fout = self._run(start, end)
224
+ if not df_list:
225
+ return None, None
72
226
 
73
- if fout is not None:
74
- if mean_freq is not None:
75
- fout = fout.resample(mean_freq).mean()
227
+ raw_data = self._raw_process(concat(df_list))
228
+ qc_data = self._QC(raw_data)
76
229
 
77
- if csv_out:
78
- fout.to_csv(self.path / self.csv_out)
230
+ return raw_data, qc_data
79
231
 
80
- return fout
232
+ # main flow
233
+ def _run(self, _start, _end):
234
+ _f_raw_done, _f_qc_done = None, None
81
235
 
82
- # dependency injection function
83
- @abstractmethod
84
- def _raw_reader(self, _file):
85
- # customize each instrument
86
- pass
236
+ # read pickle if pickle file exists and 'reset=False' or process raw data or append new data
237
+ if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and (not self.reset or self.apnd):
238
+ print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}")
87
239
 
88
- @abstractmethod
89
- def _QC(self, df: DataFrame):
90
- # customize each instrument
91
- return df
240
+ _f_raw_done, _f_qc_done = self._read_pkl()
92
241
 
93
- # set each to true datetime(18:30:01 -> 18:30:00) and rindex data
94
- def _raw_process(self, _df):
95
- # get time from df and set time to whole time to create time index
96
- _st, _ed = _df.index.sort_values()[[0, -1]]
97
- _tm_index = date_range(_st.strftime('%Y%m%d %H00'),
98
- (_ed + dtmdt(hours=1)).strftime('%Y%m%d %H00'),
99
- freq=self.meta['freq'])
100
- _tm_index.name = 'time'
242
+ if not self.apnd:
243
+ _f_raw_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw_done)
244
+ _f_qc_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc_done)
101
245
 
102
- return _df.apply(to_numeric, errors='coerce').resample(self.meta['freq']).mean().reindex(_tm_index)
246
+ _f_qc_done = self._outlier_prcs(_f_qc_done)
103
247
 
104
- # acquisition rate and yield rate
105
- def _rate_calculate(self, _fout_raw, _fout_qc, _st_raw, _ed_raw):
248
+ if self.rate:
249
+ self._rate_calculate(_f_raw_done, _f_qc_done, _start_raw, _end_raw)
106
250
 
107
- if self.meta['deter_key'] is not None:
108
- _start, _end = _fout_qc.index[[0, -1]]
251
+ return _f_qc_done if self.qc else _f_raw_done
109
252
 
110
- _drop_how = 'any'
111
- _the_size = len(_fout_raw.resample('1h').mean().index)
253
+ # read raw data
254
+ print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it")
112
255
 
113
- _f_pth = (self.path / f'{self.nam}.log')
114
- _f = _f_pth.open('r+' if _f_pth.exists() else 'w+')
256
+ _f_raw, _f_qc = self._read_raw_files()
257
+ if _f_raw is None:
258
+ return None
115
259
 
116
- _cont = _f.read()
117
- _f.seek(0)
260
+ # append new data and pickle data
261
+ if self.apnd and self.pkl_nam.exists():
262
+ _f_raw = self._apnd_prcs(_f_raw_done, _f_raw)
263
+ _f_qc = self._apnd_prcs(_f_qc_done, _f_qc)
118
264
 
119
- _f.write(f"\n{dtm.now().strftime('%Y/%m/%d %X')}\n")
120
- _f.write(f"{'-' * 60}\n")
121
- _f.write(f"rawdata time : \n\t{_st_raw.strftime('%Y-%m-%d %X')} ~ {_ed_raw.strftime('%Y-%m-%d %X')}\n")
122
- _f.write(f"output time : \n\t{_start.strftime('%Y-%m-%d %X')} ~ {_end.strftime('%Y-%m-%d %X')}\n")
123
- _f.write(f"{'-' * 60}\n")
124
- print(f"\n\t\tfrom {_start.strftime('%Y-%m-%d %X')} to {_end.strftime('%Y-%m-%d %X')}\n")
265
+ _f_qc = self._outlier_prcs(_f_qc)
125
266
 
126
- for _nam, _key in self.meta['deter_key'].items():
267
+ # save
268
+ self._save_data(_f_raw, _f_qc)
127
269
 
128
- if _key == ['all']:
129
- _key, _drop_how = _fout_qc.keys(), 'all'
270
+ # process time index
271
+ # if (_start is not None)|(_end is not None):
272
+ _f_raw, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw)
273
+ _f_qc, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc)
130
274
 
131
- _real_size = len(_fout_raw[_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
132
- _QC_size = len(_fout_qc[_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
275
+ self._rate_calculate(_f_raw, _f_qc, _start_raw, _end_raw)
133
276
 
134
- try:
135
- _acq_rate = round((_real_size / _the_size) * 100, 1)
136
- _yid_rate = round((_QC_size / _real_size) * 100, 1)
137
- except ZeroDivisionError:
138
- _acq_rate, _yid_rate = 0, 0
139
-
140
- _f.write(f'{_nam} : \n')
141
- _f.write(f"\tacquisition rate : {_acq_rate}%\n")
142
- _f.write(f'\tyield rate : {_yid_rate}%\n')
143
-
144
- print(f'\t\t{_nam} : ')
145
- print(f'\t\t\tacquisition rate : \033[91m{_acq_rate}%\033[0m')
146
- print(f'\t\t\tyield rate : \033[91m{_yid_rate}%\033[0m')
147
-
148
- _f.write(f"{'=' * 40}\n")
149
- _f.write(_cont)
150
-
151
- _f.close()
152
-
153
- # process time index
154
- @staticmethod
155
- def _tmidx_process(_start, _end, _df):
156
- _st, _ed = _df.index.sort_values()[[0, -1]]
157
- _start, _end = to_datetime(_start) or _st, to_datetime(_end) or _ed
158
- _idx = date_range(_start, _end, freq=_df.index.freq.copy())
159
- _idx.name = 'time'
160
-
161
- return _df.reindex(_idx), _st, _ed
162
-
163
- # append new data to exist pkl
164
- @staticmethod
165
- def _apnd_prcs(_df_done, _df_apnd):
166
-
167
- if _df_apnd is not None:
168
- _df = concat([_df_apnd.dropna(how='all').copy(), _df_done.dropna(how='all').copy()])
169
-
170
- _idx = date_range(*_df.index.sort_values()[[0, -1]], freq=_df_done.index.freq.copy())
171
- _idx.name = 'time'
172
-
173
- return _df.loc[~_df.index.duplicated()].copy().reindex(_idx)
174
-
175
- return _df_done
176
-
177
- # remove outlier
178
- def _outlier_prcs(self, _df):
179
-
180
- if (self.path / 'outlier.json') not in self.path.glob('*.json'):
181
- return _df
182
-
183
- with (self.path / 'outlier.json').open('r', encoding='utf-8', errors='ignore') as f:
184
- self.outlier = jsn.load(f)
185
-
186
- for _st, _ed in self.outlier.values():
187
- _df.loc[_st:_ed] = np.nan
188
-
189
- return _df
190
-
191
- # save pickle file
192
- def _save_dt(self, _save_raw, _save_qc):
193
- # dump pickle file
194
- _check = True
195
- while _check:
196
- try:
197
- with (self.path / self.pkl_nam).open('wb') as f:
198
- pkl.dump(_save_qc, f, protocol=pkl.HIGHEST_PROTOCOL)
199
-
200
- # dump csv file
201
- if self.csv:
202
- _save_qc.to_csv(self.path / self.csv_nam)
203
-
204
- # output raw data if qc file
205
- if self.meta['deter_key'] is not None:
206
- with (self.path / self.pkl_nam_raw).open('wb') as f:
207
- pkl.dump(_save_raw, f, protocol=pkl.HIGHEST_PROTOCOL)
208
-
209
- if self.csv:
210
- _save_raw.to_csv(self.path / self.csv_nam_raw)
211
-
212
- _check = False
213
-
214
- except PermissionError as _err:
215
- print('\n', _err)
216
- input('\t\t\33[41m Please Close The File And Press "Enter" \33[0m\n')
217
-
218
- # read pickle file
219
- def _read_pkl(self, ):
220
- with (self.path / self.pkl_nam).open('rb') as f:
221
- _fout_qc = pkl.load(f)
222
-
223
- if (self.path / self.pkl_nam_raw).exists():
224
- with (self.path / self.pkl_nam_raw).open('rb') as f:
225
- _fout_raw = pkl.load(f)
226
- else:
227
- _fout_raw = _fout_qc
228
-
229
- return _fout_raw, _fout_qc
230
-
231
- # read raw data
232
- def _read_raw(self, ):
233
- pattern = self.meta['pattern']
234
- patterns = {pattern, pattern.lower(), pattern.upper()}
235
- _df_con, _f_list = None, list(chain.from_iterable(self.path.glob(p) for p in patterns))
236
-
237
- for file in _f_list:
238
- if file.name in [self.csv_out, self.csv_nam, self.csv_nam_raw, f'{self.nam}.log']:
239
- continue
240
-
241
- print(f"\r\t\treading {file.name}", end='')
242
-
243
- _df = self._raw_reader(file)
244
-
245
- # concat the concated list
246
- if _df is not None:
247
- _df_con = concat([_df_con, _df]) if _df_con is not None else _df
248
-
249
- if _df_con is None:
250
- print(f"\t\t\033[31mNo File in '{self.path}' Could Read, Please Check Out the Current Path\033[0m")
251
- return None, None
252
-
253
- # QC
254
- _fout_raw = self._raw_process(_df_con)
255
- _fout_qc = self._QC(_fout_raw)
256
-
257
- return _fout_raw, _fout_qc
258
-
259
- # main flow
260
- def _run(self, _start, _end):
261
-
262
- _f_raw_done, _f_qc_done = None, None
263
-
264
- # read pickle if pickle file exists and 'reset=False' or process raw data or append new data
265
- _pkl_exist = self.path / self.pkl_nam in list(self.path.glob('*.pkl'))
266
- if _pkl_exist & ((~self.reset) | self.apnd):
267
- print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}")
268
-
269
- _f_raw_done, _f_qc_done = self._read_pkl()
270
-
271
- if not self.apnd:
272
- _f_raw_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw_done)
273
- _f_qc_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc_done)
274
-
275
- _f_qc_done = self._outlier_prcs(_f_qc_done)
276
-
277
- if self.rate:
278
- self._rate_calculate(_f_raw_done, _f_qc_done, _start_raw, _end_raw)
279
-
280
- return _f_qc_done if self.qc else _f_raw_done
281
-
282
- # read raw data
283
- print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it")
284
-
285
- _f_raw, _f_qc = self._read_raw()
286
- if _f_raw is None:
287
- return None
288
-
289
- # append new data and pickle data
290
- if self.apnd & _pkl_exist:
291
- _f_raw = self._apnd_prcs(_f_raw_done, _f_raw)
292
- _f_qc = self._apnd_prcs(_f_qc_done, _f_qc)
293
-
294
- _f_qc = self._outlier_prcs(_f_qc)
295
-
296
- # save
297
- self._save_dt(_f_raw, _f_qc)
298
-
299
- # process time index
300
- # if (_start is not None)|(_end is not None):
301
- _f_raw, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw)
302
- _f_qc, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc)
303
-
304
- self._rate_calculate(_f_raw, _f_qc, _start_raw, _end_raw)
305
-
306
- return _f_qc if self.qc else _f_raw
307
-
308
- # -------------------------------------------------------------------------------------
309
- # old flow
310
- # def __run(self, _start, _end):
311
- #
312
- # ## read pickle if pickle file exists and 'reset=False' or process raw data
313
- # if (self.path / self.pkl_nam in list(self.path.glob('*.pkl'))) & (~self.reset):
314
- # print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}")
315
- #
316
- # with (self.path / self.pkl_nam).open('rb') as f:
317
- # _fout_qc = pkl.load(f)
318
- #
319
- # _exist = (self.path / self.pkl_nam_raw).exists()
320
- # if _exist:
321
- # with (self.path / self.pkl_nam_raw).open('rb') as f:
322
- # _fout_raw = pkl.load(f)
323
- # else:
324
- # _fout_raw = _fout_qc
325
- #
326
- # _start, _end = to_datetime(_start) or _fout_qc.index[0], to_datetime(_end) or _fout_qc.index[-1]
327
- # _idx = date_range(_start, _end, freq=_fout_qc.index.freq.copy())
328
- # _idx.name = 'time'
329
- #
330
- # _fout_raw, _fout_qc = _fout_raw.reindex(_idx), _fout_qc.reindex(_idx)
331
- # if (self.rate) & (_exist):
332
- # self._rate_calculate(_fout_raw, _fout_qc)
333
- #
334
- # return _fout_qc if self.qc else _fout_raw
335
- # else:
336
- # print(
337
- # f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it")
338
- #
339
- # ##=================================================================================================================
340
- # ## read raw data
341
- # _df_con, _f_list = None, list(self.path.glob(self.meta['pattern']))
342
- #
343
- # if len(_f_list) == 0:
344
- # print(f"\t\t\033[31mNo File in '{self.path}' Could Read, Please Check Out the Current Path\033[0m")
345
- # return None
346
- #
347
- # for file in _f_list:
348
- # if file.name in [self.csv_out, self.csv_nam, self.csv_nam_raw, f'{self.nam}.log']: continue
349
- #
350
- # print(f"\r\t\treading {file.name}", end='')
351
- #
352
- # _df = self._raw_reader(file)
353
- #
354
- # ## concat the concated list
355
- # if _df is not None:
356
- # _df_con = concat([_df_con, _df]) if _df_con is not None else _df
357
- # print()
358
- #
359
- # ## QC
360
- # _save_raw = self._raw_process(_df_con)
361
- # _save_qc = self._QC(_save_raw)
362
- #
363
- # _start, _end = to_datetime(_start) or _save_raw.index[0], to_datetime(_end) or _save_raw.index[-1]
364
- # _idx = date_range(_start, _end, freq=_save_raw.index.freq.copy())
365
- # _idx.name = 'time'
366
- #
367
- # _fout_raw, _fout_qc = _save_raw.reindex(_idx).copy(), _save_qc.reindex(_idx).copy()
368
- #
369
- # self._rate_calculate(_fout_raw, _fout_qc)
370
- #
371
- # ##=================================================================================================================
372
- # ## dump pickle file
373
- # _check = True
374
- # while _check:
375
- #
376
- # try:
377
- # with (self.path / self.pkl_nam).open('wb') as f:
378
- # pkl.dump(_save_qc, f, protocol=pkl.HIGHEST_PROTOCOL)
379
- #
380
- # ## dump csv file
381
- # if self.csv:
382
- # _save_qc.to_csv(self.path / self.csv_nam)
383
- #
384
- # ## output raw data if qc file
385
- # if self.meta['deter_key'] is not None:
386
- # with (self.path / self.pkl_nam_raw).open('wb') as f:
387
- # pkl.dump(_save_raw, f, protocol=pkl.HIGHEST_PROTOCOL)
388
- #
389
- # if self.csv:
390
- # _save_raw.to_csv(self.path / self.csv_nam_raw)
391
- #
392
- # return _fout_qc if self.qc else _fout_raw
393
- #
394
- # _check = False
395
- #
396
- # except PermissionError as _err:
397
- # print('\n', _err)
398
- # input('\t\t\33[41m Please Close The File And Press "Enter" \33[0m\n')
399
- #
400
- # return _fout_qc
277
+ return _f_qc if self.qc else _f_raw
@@ -4,28 +4,27 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'AE33'
7
+ nam = 'AE33'
8
8
 
9
- def _raw_reader(self, _file):
10
- _df = read_table(_file, parse_dates={'time': [0, 1]}, index_col='time',
11
- delimiter=r'\s+', skiprows=5, usecols=range(67))
12
- _df.columns = _df.columns.str.strip(';')
9
+ def _raw_reader(self, _file):
10
+ if _file.stat().st_size / 1024 < 550:
11
+ print('\t It may not be a whole daily data.')
13
12
 
14
- # remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape)
15
- if not self._oth_set.get('ignore_err', False):
16
- _df = _df.where((_df['Status'] != 0) | (_df['Status'] != 128) | (_df['Status'] != 256)).copy()
13
+ _df = read_table(_file, parse_dates={'time': [0, 1]}, index_col='time',
14
+ delimiter=r'\s+', skiprows=5, usecols=range(67))
15
+ _df.columns = _df.columns.str.strip(';')
17
16
 
18
- return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'Status']]
17
+ # remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape)
18
+ if self.meta.get('error_state', False):
19
+ _df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
19
20
 
20
- def _QC(self, _df):
21
- # remove negative value
22
- _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].mask((_df < 0).copy())
21
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
23
22
 
24
- # QC data in 5 min
25
- def _QC_func(df):
26
- _df_ave, _df_std = df.mean(), df.std()
27
- _df_lowb, _df_highb = df < (_df_ave - _df_std * 1.5), df > (_df_ave + _df_std * 1.5)
23
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
28
24
 
29
- return df.mask(_df_lowb | _df_highb).copy()
25
+ def _QC(self, _df):
26
+ # remove negative value
27
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].mask((_df < 0).copy())
30
28
 
31
- return _df.resample('5min').apply(_QC_func).resample('1h').mean()
29
+ # QC data in 1h
30
+ return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()