AeroViz 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/__init__.py +7 -5
- AeroViz/{config → data}/DEFAULT_DATA.csv +1 -1
- AeroViz/dataProcess/Chemistry/__init__.py +40 -40
- AeroViz/dataProcess/Chemistry/_calculate.py +15 -15
- AeroViz/dataProcess/Chemistry/_isoropia.py +72 -68
- AeroViz/dataProcess/Chemistry/_mass_volume.py +158 -161
- AeroViz/dataProcess/Chemistry/_ocec.py +109 -109
- AeroViz/dataProcess/Chemistry/_partition.py +19 -18
- AeroViz/dataProcess/Chemistry/_teom.py +9 -11
- AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
- AeroViz/dataProcess/Optical/Angstrom_exponent.py +20 -0
- AeroViz/dataProcess/Optical/_IMPROVE.py +40 -41
- AeroViz/dataProcess/Optical/__init__.py +29 -44
- AeroViz/dataProcess/Optical/_absorption.py +21 -47
- AeroViz/dataProcess/Optical/_extinction.py +31 -25
- AeroViz/dataProcess/Optical/_mie.py +5 -7
- AeroViz/dataProcess/Optical/_mie_sd.py +89 -90
- AeroViz/dataProcess/Optical/_scattering.py +19 -20
- AeroViz/dataProcess/SizeDistr/__init__.py +39 -39
- AeroViz/dataProcess/SizeDistr/__merge.py +159 -158
- AeroViz/dataProcess/SizeDistr/_merge.py +155 -154
- AeroViz/dataProcess/SizeDistr/_merge_v1.py +162 -161
- AeroViz/dataProcess/SizeDistr/_merge_v2.py +153 -152
- AeroViz/dataProcess/SizeDistr/_merge_v3.py +327 -327
- AeroViz/dataProcess/SizeDistr/_merge_v4.py +273 -275
- AeroViz/dataProcess/SizeDistr/_size_distr.py +51 -51
- AeroViz/dataProcess/VOC/__init__.py +9 -9
- AeroViz/dataProcess/VOC/_potential_par.py +53 -55
- AeroViz/dataProcess/__init__.py +28 -6
- AeroViz/dataProcess/core/__init__.py +59 -65
- AeroViz/plot/__init__.py +7 -2
- AeroViz/plot/bar.py +126 -0
- AeroViz/plot/box.py +69 -0
- AeroViz/plot/distribution/distribution.py +421 -427
- AeroViz/plot/meteorology/meteorology.py +240 -292
- AeroViz/plot/optical/__init__.py +0 -1
- AeroViz/plot/optical/optical.py +230 -230
- AeroViz/plot/pie.py +198 -0
- AeroViz/plot/regression.py +196 -0
- AeroViz/plot/scatter.py +165 -0
- AeroViz/plot/templates/__init__.py +2 -4
- AeroViz/plot/templates/ammonium_rich.py +34 -0
- AeroViz/plot/templates/contour.py +25 -25
- AeroViz/plot/templates/corr_matrix.py +86 -93
- AeroViz/plot/templates/diurnal_pattern.py +28 -26
- AeroViz/plot/templates/koschmieder.py +59 -123
- AeroViz/plot/templates/metal_heatmap.py +135 -37
- AeroViz/plot/timeseries/__init__.py +1 -0
- AeroViz/plot/timeseries/template.py +47 -0
- AeroViz/plot/timeseries/timeseries.py +324 -264
- AeroViz/plot/utils/__init__.py +2 -1
- AeroViz/plot/utils/_color.py +57 -57
- AeroViz/plot/utils/_unit.py +48 -48
- AeroViz/plot/utils/plt_utils.py +92 -0
- AeroViz/plot/utils/sklearn_utils.py +49 -0
- AeroViz/plot/utils/units.json +5 -0
- AeroViz/plot/violin.py +80 -0
- AeroViz/process/__init__.py +17 -17
- AeroViz/process/core/DataProc.py +9 -9
- AeroViz/process/core/SizeDist.py +81 -81
- AeroViz/process/method/PyMieScatt_update.py +488 -488
- AeroViz/process/method/mie_theory.py +231 -229
- AeroViz/process/method/prop.py +40 -40
- AeroViz/process/script/AbstractDistCalc.py +103 -103
- AeroViz/process/script/Chemical.py +168 -167
- AeroViz/process/script/IMPACT.py +40 -40
- AeroViz/process/script/IMPROVE.py +152 -152
- AeroViz/process/script/Others.py +45 -45
- AeroViz/process/script/PSD.py +26 -26
- AeroViz/process/script/PSD_dry.py +69 -70
- AeroViz/process/script/retrieve_RI.py +50 -51
- AeroViz/rawDataReader/__init__.py +53 -58
- AeroViz/rawDataReader/config/supported_instruments.py +155 -0
- AeroViz/rawDataReader/core/__init__.py +233 -356
- AeroViz/rawDataReader/script/AE33.py +17 -18
- AeroViz/rawDataReader/script/AE43.py +18 -21
- AeroViz/rawDataReader/script/APS_3321.py +30 -30
- AeroViz/rawDataReader/script/Aurora.py +23 -24
- AeroViz/rawDataReader/script/BC1054.py +36 -40
- AeroViz/rawDataReader/script/EPA_vertical.py +37 -9
- AeroViz/rawDataReader/script/GRIMM.py +16 -23
- AeroViz/rawDataReader/script/IGAC.py +90 -0
- AeroViz/rawDataReader/script/MA350.py +32 -39
- AeroViz/rawDataReader/script/Minion.py +103 -0
- AeroViz/rawDataReader/script/NEPH.py +69 -74
- AeroViz/rawDataReader/script/SMPS_TH.py +25 -25
- AeroViz/rawDataReader/script/SMPS_aim11.py +32 -32
- AeroViz/rawDataReader/script/SMPS_genr.py +31 -31
- AeroViz/rawDataReader/script/Sunset_OCEC.py +60 -0
- AeroViz/rawDataReader/script/TEOM.py +30 -28
- AeroViz/rawDataReader/script/Table.py +13 -14
- AeroViz/rawDataReader/script/VOC.py +26 -0
- AeroViz/rawDataReader/script/__init__.py +18 -20
- AeroViz/tools/database.py +64 -66
- AeroViz/tools/dataclassifier.py +106 -106
- AeroViz/tools/dataprinter.py +51 -51
- AeroViz/tools/datareader.py +38 -38
- {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/METADATA +5 -4
- AeroViz-0.1.4.dist-info/RECORD +112 -0
- AeroViz/plot/improve/__init__.py +0 -1
- AeroViz/plot/improve/improve.py +0 -240
- AeroViz/plot/optical/aethalometer.py +0 -77
- AeroViz/plot/templates/event_evolution.py +0 -65
- AeroViz/plot/templates/regression.py +0 -256
- AeroViz/plot/templates/scatter.py +0 -130
- AeroViz/plot/templates/templates.py +0 -398
- AeroViz/plot/utils/_decorator.py +0 -74
- AeroViz/rawDataReader/script/IGAC_TH.py +0 -104
- AeroViz/rawDataReader/script/IGAC_ZM.py +0 -90
- AeroViz/rawDataReader/script/OCEC_LCRES.py +0 -34
- AeroViz/rawDataReader/script/OCEC_RES.py +0 -28
- AeroViz/rawDataReader/script/VOC_TH.py +0 -30
- AeroViz/rawDataReader/script/VOC_ZM.py +0 -37
- AeroViz/rawDataReader/utils/__init__.py +0 -0
- AeroViz/rawDataReader/utils/config.py +0 -169
- AeroViz-0.1.3.dist-info/RECORD +0 -111
- /AeroViz/{config → data}/DEFAULT_PNSD_DATA.csv +0 -0
- /AeroViz/{config → rawDataReader/config}/__init__.py +0 -0
- {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/WHEEL +0 -0
- {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -1,400 +1,277 @@
|
|
|
1
1
|
import json as jsn
|
|
2
|
+
import logging
|
|
2
3
|
import pickle as pkl
|
|
3
4
|
from abc import ABC, abstractmethod
|
|
4
|
-
from datetime import datetime as dtm
|
|
5
|
-
from itertools import chain
|
|
5
|
+
from datetime import datetime as dtm
|
|
6
6
|
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
7
8
|
|
|
8
9
|
import numpy as np
|
|
9
10
|
from pandas import DataFrame, date_range, concat, to_numeric, to_datetime
|
|
10
11
|
|
|
11
|
-
from ..
|
|
12
|
+
from ..config.supported_instruments import meta
|
|
12
13
|
|
|
13
14
|
__all__ = ['AbstractReader']
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class AbstractReader(ABC):
|
|
17
|
-
|
|
18
|
+
nam = 'AbstractReader'
|
|
19
|
+
|
|
20
|
+
# initial data
|
|
21
|
+
# input : file path, reset switch
|
|
22
|
+
|
|
23
|
+
# list the file in the path and read pickle file if it exists, else read raw data and dump the pickle file the
|
|
24
|
+
# pickle file will be generated after read raw data first time, if you want to re-read the rawdata, please set
|
|
25
|
+
# 'reset=True'
|
|
26
|
+
|
|
27
|
+
def __init__(self,
|
|
28
|
+
path: Path | str,
|
|
29
|
+
qc: bool = True,
|
|
30
|
+
csv_raw: bool = True,
|
|
31
|
+
reset: bool = False,
|
|
32
|
+
rate: bool = False,
|
|
33
|
+
append_data: bool = False):
|
|
34
|
+
|
|
35
|
+
self.path = Path(path)
|
|
36
|
+
self.meta = meta[self.nam]
|
|
37
|
+
self.logger = self._setup_logger()
|
|
38
|
+
|
|
39
|
+
self.reset = reset
|
|
40
|
+
self.rate = rate
|
|
41
|
+
self.qc = qc
|
|
42
|
+
self.csv = csv_raw
|
|
43
|
+
self.apnd = append_data & reset
|
|
44
|
+
|
|
45
|
+
self.pkl_nam = self.path / f'_read_{self.nam.lower()}.pkl'
|
|
46
|
+
self.csv_nam = self.path / f'_read_{self.nam.lower()}.csv'
|
|
47
|
+
self.pkl_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.pkl'
|
|
48
|
+
self.csv_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.csv'
|
|
49
|
+
self.csv_out = self.path / f'output_{self.nam.lower()}.csv'
|
|
50
|
+
|
|
51
|
+
# dependency injection function, customize each instrument
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def _raw_reader(self, _file):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
@abstractmethod
|
|
57
|
+
def _QC(self, df: DataFrame):
|
|
58
|
+
return df
|
|
59
|
+
|
|
60
|
+
def __call__(self,
|
|
61
|
+
start: dtm | None = None,
|
|
62
|
+
end: dtm | None = None,
|
|
63
|
+
mean_freq: str = '1h',
|
|
64
|
+
csv_out: bool = True,
|
|
65
|
+
) -> DataFrame | None:
|
|
66
|
+
|
|
67
|
+
if start and end and end <= start:
|
|
68
|
+
raise ValueError(f"Invalid time range: start {start} is after end {end}")
|
|
69
|
+
|
|
70
|
+
data = self._run(start, end)
|
|
71
|
+
|
|
72
|
+
if data is not None:
|
|
73
|
+
if mean_freq:
|
|
74
|
+
data = data.resample(mean_freq).mean()
|
|
75
|
+
if csv_out:
|
|
76
|
+
data.to_csv(self.csv_out)
|
|
77
|
+
|
|
78
|
+
return data
|
|
79
|
+
|
|
80
|
+
@staticmethod
|
|
81
|
+
def basic_QC(df: DataFrame):
|
|
82
|
+
df_ave, df_std = df.mean(), df.std()
|
|
83
|
+
df_lowb, df_highb = df < (df_ave - df_std * 1.5), df > (df_ave + df_std * 1.5)
|
|
84
|
+
|
|
85
|
+
return df.mask(df_lowb | df_highb).copy()
|
|
86
|
+
|
|
87
|
+
# set each to true datetime(18:30:01 -> 18:30:00) and rindex data
|
|
88
|
+
def _raw_process(self, _df):
|
|
89
|
+
# get time from df and set time to whole time to create time index
|
|
90
|
+
_st, _ed = _df.index.sort_values()[[0, -1]]
|
|
91
|
+
_tm_index = date_range(_st.strftime('%Y%m%d %H00'), _ed.floor('h').strftime('%Y%m%d %H00'),
|
|
92
|
+
freq=self.meta['freq'])
|
|
93
|
+
_tm_index.name = 'time'
|
|
94
|
+
|
|
95
|
+
return _df.apply(to_numeric, errors='coerce').resample(self.meta['freq']).mean().reindex(_tm_index)
|
|
96
|
+
|
|
97
|
+
def _setup_logger(self) -> logging.Logger:
|
|
98
|
+
logger = logging.getLogger(self.nam)
|
|
99
|
+
logger.setLevel(logging.INFO)
|
|
100
|
+
handler = logging.FileHandler(self.path / f'{self.nam}.log')
|
|
101
|
+
handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s'))
|
|
102
|
+
logger.addHandler(handler)
|
|
103
|
+
return logger
|
|
104
|
+
|
|
105
|
+
# acquisition rate and yield rate
|
|
106
|
+
def _rate_calculate(self, _fout_raw, _fout_qc, _st_raw, _ed_raw):
|
|
107
|
+
if self.meta['deter_key'] is not None:
|
|
108
|
+
_start, _end = _fout_qc.index[[0, -1]]
|
|
109
|
+
|
|
110
|
+
_drop_how = 'any'
|
|
111
|
+
_the_size = len(_fout_raw.resample('1h').mean().index)
|
|
112
|
+
|
|
113
|
+
self.logger.info(f"{'=' * 60}")
|
|
114
|
+
self.logger.info(
|
|
115
|
+
f"Raw data time : {_st_raw.strftime('%Y-%m-%d %H:%M:%S')} ~ {_ed_raw.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
116
|
+
self.logger.info(
|
|
117
|
+
f"Output time : {_start.strftime('%Y-%m-%d %H:%M:%S')} ~ {_end.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
118
|
+
self.logger.info(f"{'-' * 60}")
|
|
119
|
+
print(f"\n\n\t\tfrom {_start.strftime('%Y-%m-%d %H:%M:%S')} to {_end.strftime('%Y-%m-%d %H:%M:%S')}\n")
|
|
120
|
+
|
|
121
|
+
for _nam, _key in self.meta['deter_key'].items():
|
|
122
|
+
if _key == ['all']:
|
|
123
|
+
_key, _drop_how = _fout_qc.keys(), 'all'
|
|
124
|
+
|
|
125
|
+
_real_size = len(_fout_raw[_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
|
|
126
|
+
_QC_size = len(_fout_qc[_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
_acq_rate = round((_real_size / _the_size) * 100, 1)
|
|
130
|
+
_yid_rate = round((_QC_size / _real_size) * 100, 1)
|
|
131
|
+
except ZeroDivisionError:
|
|
132
|
+
_acq_rate, _yid_rate = 0, 0
|
|
133
|
+
|
|
134
|
+
self.logger.info(f'{_nam}:')
|
|
135
|
+
self.logger.info(f"\tAcquisition rate: {_acq_rate}%")
|
|
136
|
+
self.logger.info(f'\tYield rate: {_yid_rate}%')
|
|
137
|
+
self.logger.info(f"{'=' * 60}")
|
|
138
|
+
|
|
139
|
+
print(f'\t\t{_nam} : ')
|
|
140
|
+
print(f'\t\t\tacquisition rate : \033[91m{_acq_rate}%\033[0m')
|
|
141
|
+
print(f'\t\t\tyield rate : \033[91m{_yid_rate}%\033[0m')
|
|
142
|
+
|
|
143
|
+
# process time index
|
|
144
|
+
@staticmethod
|
|
145
|
+
def _tmidx_process(_start, _end, _df):
|
|
146
|
+
_st, _ed = _df.index.sort_values()[[0, -1]]
|
|
147
|
+
_start, _end = to_datetime(_start) or _st, to_datetime(_end) or _ed
|
|
148
|
+
_idx = date_range(_start, _end, freq=_df.index.freq.copy())
|
|
149
|
+
_idx.name = 'time'
|
|
150
|
+
|
|
151
|
+
return _df.reindex(_idx), _st, _ed
|
|
152
|
+
|
|
153
|
+
# append new data to exist pkl
|
|
154
|
+
@staticmethod
|
|
155
|
+
def _apnd_prcs(_df_done, _df_apnd):
|
|
156
|
+
|
|
157
|
+
if _df_apnd is not None:
|
|
158
|
+
_df = concat([_df_apnd.dropna(how='all').copy(), _df_done.dropna(how='all').copy()])
|
|
18
159
|
|
|
19
|
-
|
|
20
|
-
|
|
160
|
+
_idx = date_range(*_df.index.sort_values()[[0, -1]], freq=_df_done.index.freq.copy())
|
|
161
|
+
_idx.name = 'time'
|
|
162
|
+
|
|
163
|
+
return _df.loc[~_df.index.duplicated()].copy().reindex(_idx)
|
|
21
164
|
|
|
22
|
-
|
|
23
|
-
# pickle file will be generated after read raw data first time, if you want to re-read the rawdata, please set
|
|
24
|
-
# 'reset=True'
|
|
165
|
+
return _df_done
|
|
25
166
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
# print('='*65)
|
|
29
|
-
# logger.info(f"Reading file and process data")
|
|
167
|
+
# remove outlier
|
|
168
|
+
def _outlier_prcs(self, _df):
|
|
30
169
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
170
|
+
if (self.path / 'outlier.json') not in self.path.glob('*.json'):
|
|
171
|
+
return _df
|
|
172
|
+
|
|
173
|
+
with (self.path / 'outlier.json').open('r', encoding='utf-8', errors='ignore') as f:
|
|
174
|
+
self.outlier = jsn.load(f)
|
|
35
175
|
|
|
36
|
-
|
|
37
|
-
|
|
176
|
+
for _st, _ed in self.outlier.values():
|
|
177
|
+
_df.loc[_st:_ed] = np.nan
|
|
38
178
|
|
|
39
|
-
|
|
40
|
-
self.rate = rate
|
|
41
|
-
self.qc = QC
|
|
42
|
-
self.csv = csv_raw
|
|
43
|
-
self.apnd = append_data & reset
|
|
179
|
+
return _df
|
|
44
180
|
|
|
45
|
-
|
|
46
|
-
|
|
181
|
+
# save pickle file
|
|
182
|
+
def _save_data(self, raw_data: DataFrame, qc_data: DataFrame) -> None:
|
|
183
|
+
self._safe_pickle_dump(self.pkl_nam, qc_data)
|
|
184
|
+
if self.csv:
|
|
185
|
+
qc_data.to_csv(self.csv_nam)
|
|
47
186
|
|
|
48
|
-
|
|
49
|
-
|
|
187
|
+
if self.meta['deter_key'] is not None:
|
|
188
|
+
self._safe_pickle_dump(self.pkl_nam_raw, raw_data)
|
|
189
|
+
if self.csv:
|
|
190
|
+
raw_data.to_csv(self.csv_nam_raw)
|
|
50
191
|
|
|
51
|
-
|
|
192
|
+
@staticmethod
|
|
193
|
+
def _safe_pickle_dump(file_path: Path, data: Any) -> None:
|
|
194
|
+
while True:
|
|
195
|
+
try:
|
|
196
|
+
with file_path.open('wb') as f:
|
|
197
|
+
pkl.dump(data, f, protocol=pkl.HIGHEST_PROTOCOL)
|
|
198
|
+
break
|
|
199
|
+
except PermissionError as err:
|
|
200
|
+
print('\n', err)
|
|
201
|
+
input('\t\t\33[41m Please close the file and press "Enter" \33[0m\n')
|
|
52
202
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
203
|
+
# read pickle file
|
|
204
|
+
def _read_pkl(self):
|
|
205
|
+
with self.pkl_nam.open('rb') as qc_data, self.pkl_nam_raw.open('rb') as raw_data:
|
|
206
|
+
return pkl.load(raw_data), pkl.load(qc_data)
|
|
56
207
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
mean_freq='1h',
|
|
62
|
-
csv_out=True,
|
|
63
|
-
**kwarg):
|
|
208
|
+
def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
|
|
209
|
+
patterns = {self.meta['pattern'].lower(), self.meta['pattern'].upper(), self.meta['pattern']}
|
|
210
|
+
files = [f for pattern in patterns for f in self.path.glob(pattern)
|
|
211
|
+
if f.name not in [self.csv_out.name, self.csv_nam.name, self.csv_nam_raw.name, f'{self.nam}.log']]
|
|
64
212
|
|
|
65
|
-
|
|
213
|
+
if not files:
|
|
214
|
+
print(f"\t\t\033[31mNo files in '{self.path}' could be read. Please check the current path.\033[0m")
|
|
215
|
+
return None, None
|
|
66
216
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
217
|
+
df_list = []
|
|
218
|
+
for file in files:
|
|
219
|
+
print(f"\r\t\treading {file.name}", end='')
|
|
220
|
+
df = self._raw_reader(file)
|
|
221
|
+
if df is not None:
|
|
222
|
+
df_list.append(df)
|
|
70
223
|
|
|
71
|
-
|
|
224
|
+
if not df_list:
|
|
225
|
+
return None, None
|
|
72
226
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
fout = fout.resample(mean_freq).mean()
|
|
227
|
+
raw_data = self._raw_process(concat(df_list))
|
|
228
|
+
qc_data = self._QC(raw_data)
|
|
76
229
|
|
|
77
|
-
|
|
78
|
-
fout.to_csv(self.path / self.csv_out)
|
|
230
|
+
return raw_data, qc_data
|
|
79
231
|
|
|
80
|
-
|
|
232
|
+
# main flow
|
|
233
|
+
def _run(self, _start, _end):
|
|
234
|
+
_f_raw_done, _f_qc_done = None, None
|
|
81
235
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
# customize each instrument
|
|
86
|
-
pass
|
|
236
|
+
# read pickle if pickle file exists and 'reset=False' or process raw data or append new data
|
|
237
|
+
if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and (not self.reset or self.apnd):
|
|
238
|
+
print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}")
|
|
87
239
|
|
|
88
|
-
|
|
89
|
-
def _QC(self, df: DataFrame):
|
|
90
|
-
# customize each instrument
|
|
91
|
-
return df
|
|
240
|
+
_f_raw_done, _f_qc_done = self._read_pkl()
|
|
92
241
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
_st, _ed = _df.index.sort_values()[[0, -1]]
|
|
97
|
-
_tm_index = date_range(_st.strftime('%Y%m%d %H00'),
|
|
98
|
-
(_ed + dtmdt(hours=1)).strftime('%Y%m%d %H00'),
|
|
99
|
-
freq=self.meta['freq'])
|
|
100
|
-
_tm_index.name = 'time'
|
|
242
|
+
if not self.apnd:
|
|
243
|
+
_f_raw_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw_done)
|
|
244
|
+
_f_qc_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc_done)
|
|
101
245
|
|
|
102
|
-
|
|
246
|
+
_f_qc_done = self._outlier_prcs(_f_qc_done)
|
|
103
247
|
|
|
104
|
-
|
|
105
|
-
|
|
248
|
+
if self.rate:
|
|
249
|
+
self._rate_calculate(_f_raw_done, _f_qc_done, _start_raw, _end_raw)
|
|
106
250
|
|
|
107
|
-
|
|
108
|
-
_start, _end = _fout_qc.index[[0, -1]]
|
|
251
|
+
return _f_qc_done if self.qc else _f_raw_done
|
|
109
252
|
|
|
110
|
-
|
|
111
|
-
|
|
253
|
+
# read raw data
|
|
254
|
+
print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it")
|
|
112
255
|
|
|
113
|
-
|
|
114
|
-
|
|
256
|
+
_f_raw, _f_qc = self._read_raw_files()
|
|
257
|
+
if _f_raw is None:
|
|
258
|
+
return None
|
|
115
259
|
|
|
116
|
-
|
|
117
|
-
|
|
260
|
+
# append new data and pickle data
|
|
261
|
+
if self.apnd and self.pkl_nam.exists():
|
|
262
|
+
_f_raw = self._apnd_prcs(_f_raw_done, _f_raw)
|
|
263
|
+
_f_qc = self._apnd_prcs(_f_qc_done, _f_qc)
|
|
118
264
|
|
|
119
|
-
|
|
120
|
-
_f.write(f"{'-' * 60}\n")
|
|
121
|
-
_f.write(f"rawdata time : \n\t{_st_raw.strftime('%Y-%m-%d %X')} ~ {_ed_raw.strftime('%Y-%m-%d %X')}\n")
|
|
122
|
-
_f.write(f"output time : \n\t{_start.strftime('%Y-%m-%d %X')} ~ {_end.strftime('%Y-%m-%d %X')}\n")
|
|
123
|
-
_f.write(f"{'-' * 60}\n")
|
|
124
|
-
print(f"\n\t\tfrom {_start.strftime('%Y-%m-%d %X')} to {_end.strftime('%Y-%m-%d %X')}\n")
|
|
265
|
+
_f_qc = self._outlier_prcs(_f_qc)
|
|
125
266
|
|
|
126
|
-
|
|
267
|
+
# save
|
|
268
|
+
self._save_data(_f_raw, _f_qc)
|
|
127
269
|
|
|
128
|
-
|
|
129
|
-
|
|
270
|
+
# process time index
|
|
271
|
+
# if (_start is not None)|(_end is not None):
|
|
272
|
+
_f_raw, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw)
|
|
273
|
+
_f_qc, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc)
|
|
130
274
|
|
|
131
|
-
|
|
132
|
-
_QC_size = len(_fout_qc[_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
|
|
275
|
+
self._rate_calculate(_f_raw, _f_qc, _start_raw, _end_raw)
|
|
133
276
|
|
|
134
|
-
|
|
135
|
-
_acq_rate = round((_real_size / _the_size) * 100, 1)
|
|
136
|
-
_yid_rate = round((_QC_size / _real_size) * 100, 1)
|
|
137
|
-
except ZeroDivisionError:
|
|
138
|
-
_acq_rate, _yid_rate = 0, 0
|
|
139
|
-
|
|
140
|
-
_f.write(f'{_nam} : \n')
|
|
141
|
-
_f.write(f"\tacquisition rate : {_acq_rate}%\n")
|
|
142
|
-
_f.write(f'\tyield rate : {_yid_rate}%\n')
|
|
143
|
-
|
|
144
|
-
print(f'\t\t{_nam} : ')
|
|
145
|
-
print(f'\t\t\tacquisition rate : \033[91m{_acq_rate}%\033[0m')
|
|
146
|
-
print(f'\t\t\tyield rate : \033[91m{_yid_rate}%\033[0m')
|
|
147
|
-
|
|
148
|
-
_f.write(f"{'=' * 40}\n")
|
|
149
|
-
_f.write(_cont)
|
|
150
|
-
|
|
151
|
-
_f.close()
|
|
152
|
-
|
|
153
|
-
# process time index
|
|
154
|
-
@staticmethod
|
|
155
|
-
def _tmidx_process(_start, _end, _df):
|
|
156
|
-
_st, _ed = _df.index.sort_values()[[0, -1]]
|
|
157
|
-
_start, _end = to_datetime(_start) or _st, to_datetime(_end) or _ed
|
|
158
|
-
_idx = date_range(_start, _end, freq=_df.index.freq.copy())
|
|
159
|
-
_idx.name = 'time'
|
|
160
|
-
|
|
161
|
-
return _df.reindex(_idx), _st, _ed
|
|
162
|
-
|
|
163
|
-
# append new data to exist pkl
|
|
164
|
-
@staticmethod
|
|
165
|
-
def _apnd_prcs(_df_done, _df_apnd):
|
|
166
|
-
|
|
167
|
-
if _df_apnd is not None:
|
|
168
|
-
_df = concat([_df_apnd.dropna(how='all').copy(), _df_done.dropna(how='all').copy()])
|
|
169
|
-
|
|
170
|
-
_idx = date_range(*_df.index.sort_values()[[0, -1]], freq=_df_done.index.freq.copy())
|
|
171
|
-
_idx.name = 'time'
|
|
172
|
-
|
|
173
|
-
return _df.loc[~_df.index.duplicated()].copy().reindex(_idx)
|
|
174
|
-
|
|
175
|
-
return _df_done
|
|
176
|
-
|
|
177
|
-
# remove outlier
|
|
178
|
-
def _outlier_prcs(self, _df):
|
|
179
|
-
|
|
180
|
-
if (self.path / 'outlier.json') not in self.path.glob('*.json'):
|
|
181
|
-
return _df
|
|
182
|
-
|
|
183
|
-
with (self.path / 'outlier.json').open('r', encoding='utf-8', errors='ignore') as f:
|
|
184
|
-
self.outlier = jsn.load(f)
|
|
185
|
-
|
|
186
|
-
for _st, _ed in self.outlier.values():
|
|
187
|
-
_df.loc[_st:_ed] = np.nan
|
|
188
|
-
|
|
189
|
-
return _df
|
|
190
|
-
|
|
191
|
-
# save pickle file
|
|
192
|
-
def _save_dt(self, _save_raw, _save_qc):
|
|
193
|
-
# dump pickle file
|
|
194
|
-
_check = True
|
|
195
|
-
while _check:
|
|
196
|
-
try:
|
|
197
|
-
with (self.path / self.pkl_nam).open('wb') as f:
|
|
198
|
-
pkl.dump(_save_qc, f, protocol=pkl.HIGHEST_PROTOCOL)
|
|
199
|
-
|
|
200
|
-
# dump csv file
|
|
201
|
-
if self.csv:
|
|
202
|
-
_save_qc.to_csv(self.path / self.csv_nam)
|
|
203
|
-
|
|
204
|
-
# output raw data if qc file
|
|
205
|
-
if self.meta['deter_key'] is not None:
|
|
206
|
-
with (self.path / self.pkl_nam_raw).open('wb') as f:
|
|
207
|
-
pkl.dump(_save_raw, f, protocol=pkl.HIGHEST_PROTOCOL)
|
|
208
|
-
|
|
209
|
-
if self.csv:
|
|
210
|
-
_save_raw.to_csv(self.path / self.csv_nam_raw)
|
|
211
|
-
|
|
212
|
-
_check = False
|
|
213
|
-
|
|
214
|
-
except PermissionError as _err:
|
|
215
|
-
print('\n', _err)
|
|
216
|
-
input('\t\t\33[41m Please Close The File And Press "Enter" \33[0m\n')
|
|
217
|
-
|
|
218
|
-
# read pickle file
|
|
219
|
-
def _read_pkl(self, ):
|
|
220
|
-
with (self.path / self.pkl_nam).open('rb') as f:
|
|
221
|
-
_fout_qc = pkl.load(f)
|
|
222
|
-
|
|
223
|
-
if (self.path / self.pkl_nam_raw).exists():
|
|
224
|
-
with (self.path / self.pkl_nam_raw).open('rb') as f:
|
|
225
|
-
_fout_raw = pkl.load(f)
|
|
226
|
-
else:
|
|
227
|
-
_fout_raw = _fout_qc
|
|
228
|
-
|
|
229
|
-
return _fout_raw, _fout_qc
|
|
230
|
-
|
|
231
|
-
# read raw data
|
|
232
|
-
def _read_raw(self, ):
|
|
233
|
-
pattern = self.meta['pattern']
|
|
234
|
-
patterns = {pattern, pattern.lower(), pattern.upper()}
|
|
235
|
-
_df_con, _f_list = None, list(chain.from_iterable(self.path.glob(p) for p in patterns))
|
|
236
|
-
|
|
237
|
-
for file in _f_list:
|
|
238
|
-
if file.name in [self.csv_out, self.csv_nam, self.csv_nam_raw, f'{self.nam}.log']:
|
|
239
|
-
continue
|
|
240
|
-
|
|
241
|
-
print(f"\r\t\treading {file.name}", end='')
|
|
242
|
-
|
|
243
|
-
_df = self._raw_reader(file)
|
|
244
|
-
|
|
245
|
-
# concat the concated list
|
|
246
|
-
if _df is not None:
|
|
247
|
-
_df_con = concat([_df_con, _df]) if _df_con is not None else _df
|
|
248
|
-
|
|
249
|
-
if _df_con is None:
|
|
250
|
-
print(f"\t\t\033[31mNo File in '{self.path}' Could Read, Please Check Out the Current Path\033[0m")
|
|
251
|
-
return None, None
|
|
252
|
-
|
|
253
|
-
# QC
|
|
254
|
-
_fout_raw = self._raw_process(_df_con)
|
|
255
|
-
_fout_qc = self._QC(_fout_raw)
|
|
256
|
-
|
|
257
|
-
return _fout_raw, _fout_qc
|
|
258
|
-
|
|
259
|
-
# main flow
|
|
260
|
-
def _run(self, _start, _end):
|
|
261
|
-
|
|
262
|
-
_f_raw_done, _f_qc_done = None, None
|
|
263
|
-
|
|
264
|
-
# read pickle if pickle file exists and 'reset=False' or process raw data or append new data
|
|
265
|
-
_pkl_exist = self.path / self.pkl_nam in list(self.path.glob('*.pkl'))
|
|
266
|
-
if _pkl_exist & ((~self.reset) | self.apnd):
|
|
267
|
-
print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}")
|
|
268
|
-
|
|
269
|
-
_f_raw_done, _f_qc_done = self._read_pkl()
|
|
270
|
-
|
|
271
|
-
if not self.apnd:
|
|
272
|
-
_f_raw_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw_done)
|
|
273
|
-
_f_qc_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc_done)
|
|
274
|
-
|
|
275
|
-
_f_qc_done = self._outlier_prcs(_f_qc_done)
|
|
276
|
-
|
|
277
|
-
if self.rate:
|
|
278
|
-
self._rate_calculate(_f_raw_done, _f_qc_done, _start_raw, _end_raw)
|
|
279
|
-
|
|
280
|
-
return _f_qc_done if self.qc else _f_raw_done
|
|
281
|
-
|
|
282
|
-
# read raw data
|
|
283
|
-
print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it")
|
|
284
|
-
|
|
285
|
-
_f_raw, _f_qc = self._read_raw()
|
|
286
|
-
if _f_raw is None:
|
|
287
|
-
return None
|
|
288
|
-
|
|
289
|
-
# append new data and pickle data
|
|
290
|
-
if self.apnd & _pkl_exist:
|
|
291
|
-
_f_raw = self._apnd_prcs(_f_raw_done, _f_raw)
|
|
292
|
-
_f_qc = self._apnd_prcs(_f_qc_done, _f_qc)
|
|
293
|
-
|
|
294
|
-
_f_qc = self._outlier_prcs(_f_qc)
|
|
295
|
-
|
|
296
|
-
# save
|
|
297
|
-
self._save_dt(_f_raw, _f_qc)
|
|
298
|
-
|
|
299
|
-
# process time index
|
|
300
|
-
# if (_start is not None)|(_end is not None):
|
|
301
|
-
_f_raw, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw)
|
|
302
|
-
_f_qc, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc)
|
|
303
|
-
|
|
304
|
-
self._rate_calculate(_f_raw, _f_qc, _start_raw, _end_raw)
|
|
305
|
-
|
|
306
|
-
return _f_qc if self.qc else _f_raw
|
|
307
|
-
|
|
308
|
-
# -------------------------------------------------------------------------------------
|
|
309
|
-
# old flow
|
|
310
|
-
# def __run(self, _start, _end):
|
|
311
|
-
#
|
|
312
|
-
# ## read pickle if pickle file exists and 'reset=False' or process raw data
|
|
313
|
-
# if (self.path / self.pkl_nam in list(self.path.glob('*.pkl'))) & (~self.reset):
|
|
314
|
-
# print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}")
|
|
315
|
-
#
|
|
316
|
-
# with (self.path / self.pkl_nam).open('rb') as f:
|
|
317
|
-
# _fout_qc = pkl.load(f)
|
|
318
|
-
#
|
|
319
|
-
# _exist = (self.path / self.pkl_nam_raw).exists()
|
|
320
|
-
# if _exist:
|
|
321
|
-
# with (self.path / self.pkl_nam_raw).open('rb') as f:
|
|
322
|
-
# _fout_raw = pkl.load(f)
|
|
323
|
-
# else:
|
|
324
|
-
# _fout_raw = _fout_qc
|
|
325
|
-
#
|
|
326
|
-
# _start, _end = to_datetime(_start) or _fout_qc.index[0], to_datetime(_end) or _fout_qc.index[-1]
|
|
327
|
-
# _idx = date_range(_start, _end, freq=_fout_qc.index.freq.copy())
|
|
328
|
-
# _idx.name = 'time'
|
|
329
|
-
#
|
|
330
|
-
# _fout_raw, _fout_qc = _fout_raw.reindex(_idx), _fout_qc.reindex(_idx)
|
|
331
|
-
# if (self.rate) & (_exist):
|
|
332
|
-
# self._rate_calculate(_fout_raw, _fout_qc)
|
|
333
|
-
#
|
|
334
|
-
# return _fout_qc if self.qc else _fout_raw
|
|
335
|
-
# else:
|
|
336
|
-
# print(
|
|
337
|
-
# f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it")
|
|
338
|
-
#
|
|
339
|
-
# ##=================================================================================================================
|
|
340
|
-
# ## read raw data
|
|
341
|
-
# _df_con, _f_list = None, list(self.path.glob(self.meta['pattern']))
|
|
342
|
-
#
|
|
343
|
-
# if len(_f_list) == 0:
|
|
344
|
-
# print(f"\t\t\033[31mNo File in '{self.path}' Could Read, Please Check Out the Current Path\033[0m")
|
|
345
|
-
# return None
|
|
346
|
-
#
|
|
347
|
-
# for file in _f_list:
|
|
348
|
-
# if file.name in [self.csv_out, self.csv_nam, self.csv_nam_raw, f'{self.nam}.log']: continue
|
|
349
|
-
#
|
|
350
|
-
# print(f"\r\t\treading {file.name}", end='')
|
|
351
|
-
#
|
|
352
|
-
# _df = self._raw_reader(file)
|
|
353
|
-
#
|
|
354
|
-
# ## concat the concated list
|
|
355
|
-
# if _df is not None:
|
|
356
|
-
# _df_con = concat([_df_con, _df]) if _df_con is not None else _df
|
|
357
|
-
# print()
|
|
358
|
-
#
|
|
359
|
-
# ## QC
|
|
360
|
-
# _save_raw = self._raw_process(_df_con)
|
|
361
|
-
# _save_qc = self._QC(_save_raw)
|
|
362
|
-
#
|
|
363
|
-
# _start, _end = to_datetime(_start) or _save_raw.index[0], to_datetime(_end) or _save_raw.index[-1]
|
|
364
|
-
# _idx = date_range(_start, _end, freq=_save_raw.index.freq.copy())
|
|
365
|
-
# _idx.name = 'time'
|
|
366
|
-
#
|
|
367
|
-
# _fout_raw, _fout_qc = _save_raw.reindex(_idx).copy(), _save_qc.reindex(_idx).copy()
|
|
368
|
-
#
|
|
369
|
-
# self._rate_calculate(_fout_raw, _fout_qc)
|
|
370
|
-
#
|
|
371
|
-
# ##=================================================================================================================
|
|
372
|
-
# ## dump pickle file
|
|
373
|
-
# _check = True
|
|
374
|
-
# while _check:
|
|
375
|
-
#
|
|
376
|
-
# try:
|
|
377
|
-
# with (self.path / self.pkl_nam).open('wb') as f:
|
|
378
|
-
# pkl.dump(_save_qc, f, protocol=pkl.HIGHEST_PROTOCOL)
|
|
379
|
-
#
|
|
380
|
-
# ## dump csv file
|
|
381
|
-
# if self.csv:
|
|
382
|
-
# _save_qc.to_csv(self.path / self.csv_nam)
|
|
383
|
-
#
|
|
384
|
-
# ## output raw data if qc file
|
|
385
|
-
# if self.meta['deter_key'] is not None:
|
|
386
|
-
# with (self.path / self.pkl_nam_raw).open('wb') as f:
|
|
387
|
-
# pkl.dump(_save_raw, f, protocol=pkl.HIGHEST_PROTOCOL)
|
|
388
|
-
#
|
|
389
|
-
# if self.csv:
|
|
390
|
-
# _save_raw.to_csv(self.path / self.csv_nam_raw)
|
|
391
|
-
#
|
|
392
|
-
# return _fout_qc if self.qc else _fout_raw
|
|
393
|
-
#
|
|
394
|
-
# _check = False
|
|
395
|
-
#
|
|
396
|
-
# except PermissionError as _err:
|
|
397
|
-
# print('\n', _err)
|
|
398
|
-
# input('\t\t\33[41m Please Close The File And Press "Enter" \33[0m\n')
|
|
399
|
-
#
|
|
400
|
-
# return _fout_qc
|
|
277
|
+
return _f_qc if self.qc else _f_raw
|
|
@@ -4,28 +4,27 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
|
-
|
|
7
|
+
nam = 'AE33'
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
_df.columns = _df.columns.str.strip(';')
|
|
9
|
+
def _raw_reader(self, _file):
|
|
10
|
+
if _file.stat().st_size / 1024 < 550:
|
|
11
|
+
print('\t It may not be a whole daily data.')
|
|
13
12
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
_df = read_table(_file, parse_dates={'time': [0, 1]}, index_col='time',
|
|
14
|
+
delimiter=r'\s+', skiprows=5, usecols=range(67))
|
|
15
|
+
_df.columns = _df.columns.str.strip(';')
|
|
17
16
|
|
|
18
|
-
|
|
17
|
+
# remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape)
|
|
18
|
+
if self.meta.get('error_state', False):
|
|
19
|
+
_df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
|
|
19
20
|
|
|
20
|
-
|
|
21
|
-
# remove negative value
|
|
22
|
-
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].mask((_df < 0).copy())
|
|
21
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
|
|
23
22
|
|
|
24
|
-
|
|
25
|
-
def _QC_func(df):
|
|
26
|
-
_df_ave, _df_std = df.mean(), df.std()
|
|
27
|
-
_df_lowb, _df_highb = df < (_df_ave - _df_std * 1.5), df > (_df_ave + _df_std * 1.5)
|
|
23
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
28
24
|
|
|
29
|
-
|
|
25
|
+
def _QC(self, _df):
|
|
26
|
+
# remove negative value
|
|
27
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].mask((_df < 0).copy())
|
|
30
28
|
|
|
31
|
-
|
|
29
|
+
# QC data in 1h
|
|
30
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|