AeroViz 0.1.3b0__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/__init__.py +5 -3
- AeroViz/{config → data}/DEFAULT_DATA.csv +1 -1
- AeroViz/dataProcess/Chemistry/__init__.py +7 -7
- AeroViz/dataProcess/Chemistry/_isoropia.py +5 -2
- AeroViz/dataProcess/Chemistry/_mass_volume.py +15 -18
- AeroViz/dataProcess/Chemistry/_ocec.py +2 -2
- AeroViz/dataProcess/Chemistry/_teom.py +2 -1
- AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
- AeroViz/dataProcess/Optical/Angstrom_exponent.py +20 -0
- AeroViz/dataProcess/Optical/_IMPROVE.py +13 -15
- AeroViz/dataProcess/Optical/__init__.py +15 -30
- AeroViz/dataProcess/Optical/_absorption.py +21 -47
- AeroViz/dataProcess/Optical/_extinction.py +20 -15
- AeroViz/dataProcess/Optical/_mie.py +0 -1
- AeroViz/dataProcess/Optical/_scattering.py +19 -20
- AeroViz/dataProcess/SizeDistr/__init__.py +7 -7
- AeroViz/dataProcess/SizeDistr/_merge.py +2 -2
- AeroViz/dataProcess/SizeDistr/_merge_v1.py +2 -2
- AeroViz/dataProcess/SizeDistr/_merge_v2.py +2 -2
- AeroViz/dataProcess/SizeDistr/_merge_v3.py +1 -1
- AeroViz/dataProcess/SizeDistr/_merge_v4.py +1 -1
- AeroViz/dataProcess/VOC/__init__.py +3 -3
- AeroViz/dataProcess/__init__.py +28 -6
- AeroViz/dataProcess/core/__init__.py +10 -17
- AeroViz/plot/__init__.py +1 -1
- AeroViz/plot/box.py +2 -1
- AeroViz/plot/optical/optical.py +4 -4
- AeroViz/plot/regression.py +25 -39
- AeroViz/plot/scatter.py +68 -2
- AeroViz/plot/templates/__init__.py +2 -1
- AeroViz/plot/templates/ammonium_rich.py +34 -0
- AeroViz/plot/templates/diurnal_pattern.py +11 -9
- AeroViz/plot/templates/koschmieder.py +51 -115
- AeroViz/plot/templates/metal_heatmap.py +115 -17
- AeroViz/plot/timeseries/__init__.py +1 -0
- AeroViz/plot/timeseries/template.py +47 -0
- AeroViz/plot/timeseries/timeseries.py +275 -208
- AeroViz/plot/utils/plt_utils.py +2 -2
- AeroViz/plot/utils/units.json +5 -0
- AeroViz/plot/violin.py +9 -8
- AeroViz/process/__init__.py +2 -2
- AeroViz/process/script/AbstractDistCalc.py +1 -1
- AeroViz/process/script/Chemical.py +5 -4
- AeroViz/process/script/Others.py +1 -1
- AeroViz/rawDataReader/__init__.py +17 -22
- AeroViz/rawDataReader/{utils/config.py → config/supported_instruments.py} +38 -52
- AeroViz/rawDataReader/core/__init__.py +104 -229
- AeroViz/rawDataReader/script/AE33.py +10 -11
- AeroViz/rawDataReader/script/AE43.py +8 -11
- AeroViz/rawDataReader/script/APS_3321.py +6 -6
- AeroViz/rawDataReader/script/Aurora.py +18 -19
- AeroViz/rawDataReader/script/BC1054.py +11 -15
- AeroViz/rawDataReader/script/EPA_vertical.py +35 -7
- AeroViz/rawDataReader/script/GRIMM.py +2 -9
- AeroViz/rawDataReader/script/{IGAC_ZM.py → IGAC.py} +17 -17
- AeroViz/rawDataReader/script/MA350.py +7 -14
- AeroViz/rawDataReader/script/Minion.py +103 -0
- AeroViz/rawDataReader/script/NEPH.py +24 -29
- AeroViz/rawDataReader/script/SMPS_TH.py +4 -4
- AeroViz/rawDataReader/script/SMPS_aim11.py +6 -6
- AeroViz/rawDataReader/script/SMPS_genr.py +6 -6
- AeroViz/rawDataReader/script/Sunset_OCEC.py +60 -0
- AeroViz/rawDataReader/script/TEOM.py +8 -6
- AeroViz/rawDataReader/script/Table.py +7 -8
- AeroViz/rawDataReader/script/VOC.py +26 -0
- AeroViz/rawDataReader/script/__init__.py +10 -12
- AeroViz/tools/database.py +7 -9
- AeroViz/tools/datareader.py +3 -3
- {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.4.dist-info}/METADATA +1 -1
- AeroViz-0.1.4.dist-info/RECORD +112 -0
- AeroViz/rawDataReader/script/IGAC_TH.py +0 -104
- AeroViz/rawDataReader/script/OCEC_LCRES.py +0 -34
- AeroViz/rawDataReader/script/OCEC_RES.py +0 -28
- AeroViz/rawDataReader/script/VOC_TH.py +0 -30
- AeroViz/rawDataReader/script/VOC_ZM.py +0 -37
- AeroViz-0.1.3b0.dist-info/RECORD +0 -110
- /AeroViz/{config → data}/DEFAULT_PNSD_DATA.csv +0 -0
- /AeroViz/rawDataReader/{utils → config}/__init__.py +0 -0
- {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.4.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.4.dist-info}/WHEEL +0 -0
- {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import json as jsn
|
|
2
|
+
import logging
|
|
2
3
|
import pickle as pkl
|
|
3
4
|
from abc import ABC, abstractmethod
|
|
4
|
-
from datetime import datetime as dtm
|
|
5
|
-
from itertools import chain
|
|
5
|
+
from datetime import datetime as dtm
|
|
6
6
|
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
7
8
|
|
|
8
9
|
import numpy as np
|
|
9
10
|
from pandas import DataFrame, date_range, concat, to_numeric, to_datetime
|
|
10
11
|
|
|
11
|
-
from ..
|
|
12
|
+
from ..config.supported_instruments import meta
|
|
12
13
|
|
|
13
14
|
__all__ = ['AbstractReader']
|
|
14
15
|
|
|
@@ -16,117 +17,108 @@ __all__ = ['AbstractReader']
|
|
|
16
17
|
class AbstractReader(ABC):
|
|
17
18
|
nam = 'AbstractReader'
|
|
18
19
|
|
|
19
|
-
# initial
|
|
20
|
+
# initial data
|
|
20
21
|
# input : file path, reset switch
|
|
21
22
|
|
|
22
23
|
# list the file in the path and read pickle file if it exists, else read raw data and dump the pickle file the
|
|
23
24
|
# pickle file will be generated after read raw data first time, if you want to re-read the rawdata, please set
|
|
24
25
|
# 'reset=True'
|
|
25
26
|
|
|
26
|
-
def __init__(self,
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
def __init__(self,
|
|
28
|
+
path: Path | str,
|
|
29
|
+
qc: bool = True,
|
|
30
|
+
csv_raw: bool = True,
|
|
31
|
+
reset: bool = False,
|
|
32
|
+
rate: bool = False,
|
|
33
|
+
append_data: bool = False):
|
|
30
34
|
|
|
31
|
-
|
|
32
|
-
# self.index = lambda _freq: date_range(_sta, _fin, freq=_freq)
|
|
33
|
-
self.path = Path(_path)
|
|
35
|
+
self.path = Path(path)
|
|
34
36
|
self.meta = meta[self.nam]
|
|
35
|
-
|
|
36
|
-
if update_meta is not None:
|
|
37
|
-
self.meta.update(update_meta)
|
|
37
|
+
self.logger = self._setup_logger()
|
|
38
38
|
|
|
39
39
|
self.reset = reset
|
|
40
40
|
self.rate = rate
|
|
41
|
-
self.qc =
|
|
41
|
+
self.qc = qc
|
|
42
42
|
self.csv = csv_raw
|
|
43
43
|
self.apnd = append_data & reset
|
|
44
44
|
|
|
45
|
-
self.pkl_nam = f'_read_{self.nam.lower()}.pkl'
|
|
46
|
-
self.csv_nam = f'_read_{self.nam.lower()}.csv'
|
|
47
|
-
|
|
48
|
-
self.
|
|
49
|
-
self.
|
|
45
|
+
self.pkl_nam = self.path / f'_read_{self.nam.lower()}.pkl'
|
|
46
|
+
self.csv_nam = self.path / f'_read_{self.nam.lower()}.csv'
|
|
47
|
+
self.pkl_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.pkl'
|
|
48
|
+
self.csv_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.csv'
|
|
49
|
+
self.csv_out = self.path / f'output_{self.nam.lower()}.csv'
|
|
50
50
|
|
|
51
|
-
|
|
51
|
+
# dependency injection function, customize each instrument
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def _raw_reader(self, _file):
|
|
54
|
+
pass
|
|
52
55
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
+
@abstractmethod
|
|
57
|
+
def _QC(self, df: DataFrame):
|
|
58
|
+
return df
|
|
56
59
|
|
|
57
|
-
# get data
|
|
58
60
|
def __call__(self,
|
|
59
61
|
start: dtm | None = None,
|
|
60
62
|
end: dtm | None = None,
|
|
61
|
-
mean_freq='1h',
|
|
62
|
-
csv_out=True,
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
self._oth_set = kwarg
|
|
63
|
+
mean_freq: str = '1h',
|
|
64
|
+
csv_out: bool = True,
|
|
65
|
+
) -> DataFrame | None:
|
|
66
66
|
|
|
67
67
|
if start and end and end <= start:
|
|
68
|
-
raise ValueError(
|
|
69
|
-
f'\nPlease check out input time : '
|
|
70
|
-
f'\n\tstart : {start.strftime("%Y-%m-%d %X")}'
|
|
71
|
-
f'\n\tend : {end.strftime("%Y-%m-%d %X")}')
|
|
72
|
-
|
|
73
|
-
fout = self._run(start, end)
|
|
68
|
+
raise ValueError(f"Invalid time range: start {start} is after end {end}")
|
|
74
69
|
|
|
75
|
-
|
|
76
|
-
if mean_freq is not None:
|
|
77
|
-
fout = fout.resample(mean_freq).mean()
|
|
70
|
+
data = self._run(start, end)
|
|
78
71
|
|
|
72
|
+
if data is not None:
|
|
73
|
+
if mean_freq:
|
|
74
|
+
data = data.resample(mean_freq).mean()
|
|
79
75
|
if csv_out:
|
|
80
|
-
|
|
76
|
+
data.to_csv(self.csv_out)
|
|
81
77
|
|
|
82
|
-
return
|
|
78
|
+
return data
|
|
83
79
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
pass
|
|
80
|
+
@staticmethod
|
|
81
|
+
def basic_QC(df: DataFrame):
|
|
82
|
+
df_ave, df_std = df.mean(), df.std()
|
|
83
|
+
df_lowb, df_highb = df < (df_ave - df_std * 1.5), df > (df_ave + df_std * 1.5)
|
|
89
84
|
|
|
90
|
-
|
|
91
|
-
def _QC(self, df: DataFrame):
|
|
92
|
-
# customize each instrument
|
|
93
|
-
return df
|
|
85
|
+
return df.mask(df_lowb | df_highb).copy()
|
|
94
86
|
|
|
95
87
|
# set each to true datetime(18:30:01 -> 18:30:00) and rindex data
|
|
96
88
|
def _raw_process(self, _df):
|
|
97
89
|
# get time from df and set time to whole time to create time index
|
|
98
90
|
_st, _ed = _df.index.sort_values()[[0, -1]]
|
|
99
|
-
_tm_index = date_range(_st.strftime('%Y%m%d %H00'),
|
|
100
|
-
(_ed + dtmdt(hours=1)).strftime('%Y%m%d %H00'),
|
|
91
|
+
_tm_index = date_range(_st.strftime('%Y%m%d %H00'), _ed.floor('h').strftime('%Y%m%d %H00'),
|
|
101
92
|
freq=self.meta['freq'])
|
|
102
93
|
_tm_index.name = 'time'
|
|
103
94
|
|
|
104
95
|
return _df.apply(to_numeric, errors='coerce').resample(self.meta['freq']).mean().reindex(_tm_index)
|
|
105
96
|
|
|
97
|
+
def _setup_logger(self) -> logging.Logger:
|
|
98
|
+
logger = logging.getLogger(self.nam)
|
|
99
|
+
logger.setLevel(logging.INFO)
|
|
100
|
+
handler = logging.FileHandler(self.path / f'{self.nam}.log')
|
|
101
|
+
handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s'))
|
|
102
|
+
logger.addHandler(handler)
|
|
103
|
+
return logger
|
|
104
|
+
|
|
106
105
|
# acquisition rate and yield rate
|
|
107
106
|
def _rate_calculate(self, _fout_raw, _fout_qc, _st_raw, _ed_raw):
|
|
108
|
-
|
|
109
107
|
if self.meta['deter_key'] is not None:
|
|
110
108
|
_start, _end = _fout_qc.index[[0, -1]]
|
|
111
109
|
|
|
112
110
|
_drop_how = 'any'
|
|
113
111
|
_the_size = len(_fout_raw.resample('1h').mean().index)
|
|
114
112
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
_f.write(f"{'-' * 60}\n")
|
|
123
|
-
_f.write(f"rawdata time : \n\t{_st_raw.strftime('%Y-%m-%d %X')} ~ {_ed_raw.strftime('%Y-%m-%d %X')}\n")
|
|
124
|
-
_f.write(f"output time : \n\t{_start.strftime('%Y-%m-%d %X')} ~ {_end.strftime('%Y-%m-%d %X')}\n")
|
|
125
|
-
_f.write(f"{'-' * 60}\n")
|
|
126
|
-
print(f"\n\t\tfrom {_start.strftime('%Y-%m-%d %X')} to {_end.strftime('%Y-%m-%d %X')}\n")
|
|
113
|
+
self.logger.info(f"{'=' * 60}")
|
|
114
|
+
self.logger.info(
|
|
115
|
+
f"Raw data time : {_st_raw.strftime('%Y-%m-%d %H:%M:%S')} ~ {_ed_raw.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
116
|
+
self.logger.info(
|
|
117
|
+
f"Output time : {_start.strftime('%Y-%m-%d %H:%M:%S')} ~ {_end.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
118
|
+
self.logger.info(f"{'-' * 60}")
|
|
119
|
+
print(f"\n\n\t\tfrom {_start.strftime('%Y-%m-%d %H:%M:%S')} to {_end.strftime('%Y-%m-%d %H:%M:%S')}\n")
|
|
127
120
|
|
|
128
121
|
for _nam, _key in self.meta['deter_key'].items():
|
|
129
|
-
|
|
130
122
|
if _key == ['all']:
|
|
131
123
|
_key, _drop_how = _fout_qc.keys(), 'all'
|
|
132
124
|
|
|
@@ -139,18 +131,14 @@ class AbstractReader(ABC):
|
|
|
139
131
|
except ZeroDivisionError:
|
|
140
132
|
_acq_rate, _yid_rate = 0, 0
|
|
141
133
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
134
|
+
self.logger.info(f'{_nam}:')
|
|
135
|
+
self.logger.info(f"\tAcquisition rate: {_acq_rate}%")
|
|
136
|
+
self.logger.info(f'\tYield rate: {_yid_rate}%')
|
|
137
|
+
self.logger.info(f"{'=' * 60}")
|
|
145
138
|
|
|
146
139
|
print(f'\t\t{_nam} : ')
|
|
147
140
|
print(f'\t\t\tacquisition rate : \033[91m{_acq_rate}%\033[0m')
|
|
148
|
-
print(f'\t\t\tyield
|
|
149
|
-
|
|
150
|
-
_f.write(f"{'=' * 40}\n")
|
|
151
|
-
_f.write(_cont)
|
|
152
|
-
|
|
153
|
-
_f.close()
|
|
141
|
+
print(f'\t\t\tyield rate : \033[91m{_yid_rate}%\033[0m')
|
|
154
142
|
|
|
155
143
|
# process time index
|
|
156
144
|
@staticmethod
|
|
@@ -191,81 +179,62 @@ class AbstractReader(ABC):
|
|
|
191
179
|
return _df
|
|
192
180
|
|
|
193
181
|
# save pickle file
|
|
194
|
-
def
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
try:
|
|
199
|
-
with (self.path / self.pkl_nam).open('wb') as f:
|
|
200
|
-
pkl.dump(_save_qc, f, protocol=pkl.HIGHEST_PROTOCOL)
|
|
201
|
-
|
|
202
|
-
# dump csv file
|
|
203
|
-
if self.csv:
|
|
204
|
-
_save_qc.to_csv(self.path / self.csv_nam)
|
|
205
|
-
|
|
206
|
-
# output raw data if qc file
|
|
207
|
-
if self.meta['deter_key'] is not None:
|
|
208
|
-
with (self.path / self.pkl_nam_raw).open('wb') as f:
|
|
209
|
-
pkl.dump(_save_raw, f, protocol=pkl.HIGHEST_PROTOCOL)
|
|
182
|
+
def _save_data(self, raw_data: DataFrame, qc_data: DataFrame) -> None:
|
|
183
|
+
self._safe_pickle_dump(self.pkl_nam, qc_data)
|
|
184
|
+
if self.csv:
|
|
185
|
+
qc_data.to_csv(self.csv_nam)
|
|
210
186
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
187
|
+
if self.meta['deter_key'] is not None:
|
|
188
|
+
self._safe_pickle_dump(self.pkl_nam_raw, raw_data)
|
|
189
|
+
if self.csv:
|
|
190
|
+
raw_data.to_csv(self.csv_nam_raw)
|
|
215
191
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
192
|
+
@staticmethod
|
|
193
|
+
def _safe_pickle_dump(file_path: Path, data: Any) -> None:
|
|
194
|
+
while True:
|
|
195
|
+
try:
|
|
196
|
+
with file_path.open('wb') as f:
|
|
197
|
+
pkl.dump(data, f, protocol=pkl.HIGHEST_PROTOCOL)
|
|
198
|
+
break
|
|
199
|
+
except PermissionError as err:
|
|
200
|
+
print('\n', err)
|
|
201
|
+
input('\t\t\33[41m Please close the file and press "Enter" \33[0m\n')
|
|
219
202
|
|
|
220
203
|
# read pickle file
|
|
221
|
-
def _read_pkl(self
|
|
222
|
-
with
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
if (self.path / self.pkl_nam_raw).exists():
|
|
226
|
-
with (self.path / self.pkl_nam_raw).open('rb') as f:
|
|
227
|
-
_fout_raw = pkl.load(f)
|
|
228
|
-
else:
|
|
229
|
-
_fout_raw = _fout_qc
|
|
204
|
+
def _read_pkl(self):
|
|
205
|
+
with self.pkl_nam.open('rb') as qc_data, self.pkl_nam_raw.open('rb') as raw_data:
|
|
206
|
+
return pkl.load(raw_data), pkl.load(qc_data)
|
|
230
207
|
|
|
231
|
-
|
|
208
|
+
def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
|
|
209
|
+
patterns = {self.meta['pattern'].lower(), self.meta['pattern'].upper(), self.meta['pattern']}
|
|
210
|
+
files = [f for pattern in patterns for f in self.path.glob(pattern)
|
|
211
|
+
if f.name not in [self.csv_out.name, self.csv_nam.name, self.csv_nam_raw.name, f'{self.nam}.log']]
|
|
232
212
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
patterns = {pattern, pattern.lower(), pattern.upper()}
|
|
237
|
-
_df_con, _f_list = None, list(chain.from_iterable(self.path.glob(p) for p in patterns))
|
|
238
|
-
|
|
239
|
-
for file in _f_list:
|
|
240
|
-
if file.name in [self.csv_out, self.csv_nam, self.csv_nam_raw, f'{self.nam}.log']:
|
|
241
|
-
continue
|
|
213
|
+
if not files:
|
|
214
|
+
print(f"\t\t\033[31mNo files in '{self.path}' could be read. Please check the current path.\033[0m")
|
|
215
|
+
return None, None
|
|
242
216
|
|
|
217
|
+
df_list = []
|
|
218
|
+
for file in files:
|
|
243
219
|
print(f"\r\t\treading {file.name}", end='')
|
|
220
|
+
df = self._raw_reader(file)
|
|
221
|
+
if df is not None:
|
|
222
|
+
df_list.append(df)
|
|
244
223
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
# concat the concated list
|
|
248
|
-
if _df is not None:
|
|
249
|
-
_df_con = concat([_df_con, _df]) if _df_con is not None else _df
|
|
250
|
-
|
|
251
|
-
if _df_con is None:
|
|
252
|
-
print(f"\t\t\033[31mNo File in '{self.path}' Could Read, Please Check Out the Current Path\033[0m")
|
|
224
|
+
if not df_list:
|
|
253
225
|
return None, None
|
|
254
226
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
_fout_qc = self._QC(_fout_raw)
|
|
227
|
+
raw_data = self._raw_process(concat(df_list))
|
|
228
|
+
qc_data = self._QC(raw_data)
|
|
258
229
|
|
|
259
|
-
return
|
|
230
|
+
return raw_data, qc_data
|
|
260
231
|
|
|
261
232
|
# main flow
|
|
262
233
|
def _run(self, _start, _end):
|
|
263
|
-
|
|
264
234
|
_f_raw_done, _f_qc_done = None, None
|
|
265
235
|
|
|
266
236
|
# read pickle if pickle file exists and 'reset=False' or process raw data or append new data
|
|
267
|
-
|
|
268
|
-
if _pkl_exist & ((~self.reset) | self.apnd):
|
|
237
|
+
if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and (not self.reset or self.apnd):
|
|
269
238
|
print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}")
|
|
270
239
|
|
|
271
240
|
_f_raw_done, _f_qc_done = self._read_pkl()
|
|
@@ -284,19 +253,19 @@ class AbstractReader(ABC):
|
|
|
284
253
|
# read raw data
|
|
285
254
|
print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it")
|
|
286
255
|
|
|
287
|
-
_f_raw, _f_qc = self.
|
|
256
|
+
_f_raw, _f_qc = self._read_raw_files()
|
|
288
257
|
if _f_raw is None:
|
|
289
258
|
return None
|
|
290
259
|
|
|
291
260
|
# append new data and pickle data
|
|
292
|
-
if self.apnd
|
|
261
|
+
if self.apnd and self.pkl_nam.exists():
|
|
293
262
|
_f_raw = self._apnd_prcs(_f_raw_done, _f_raw)
|
|
294
263
|
_f_qc = self._apnd_prcs(_f_qc_done, _f_qc)
|
|
295
264
|
|
|
296
265
|
_f_qc = self._outlier_prcs(_f_qc)
|
|
297
266
|
|
|
298
267
|
# save
|
|
299
|
-
self.
|
|
268
|
+
self._save_data(_f_raw, _f_qc)
|
|
300
269
|
|
|
301
270
|
# process time index
|
|
302
271
|
# if (_start is not None)|(_end is not None):
|
|
@@ -306,97 +275,3 @@ class AbstractReader(ABC):
|
|
|
306
275
|
self._rate_calculate(_f_raw, _f_qc, _start_raw, _end_raw)
|
|
307
276
|
|
|
308
277
|
return _f_qc if self.qc else _f_raw
|
|
309
|
-
|
|
310
|
-
# -------------------------------------------------------------------------------------
|
|
311
|
-
# old flow
|
|
312
|
-
# def __run(self, _start, _end):
|
|
313
|
-
#
|
|
314
|
-
# ## read pickle if pickle file exists and 'reset=False' or process raw data
|
|
315
|
-
# if (self.path / self.pkl_nam in list(self.path.glob('*.pkl'))) & (~self.reset):
|
|
316
|
-
# print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}")
|
|
317
|
-
#
|
|
318
|
-
# with (self.path / self.pkl_nam).open('rb') as f:
|
|
319
|
-
# _fout_qc = pkl.load(f)
|
|
320
|
-
#
|
|
321
|
-
# _exist = (self.path / self.pkl_nam_raw).exists()
|
|
322
|
-
# if _exist:
|
|
323
|
-
# with (self.path / self.pkl_nam_raw).open('rb') as f:
|
|
324
|
-
# _fout_raw = pkl.load(f)
|
|
325
|
-
# else:
|
|
326
|
-
# _fout_raw = _fout_qc
|
|
327
|
-
#
|
|
328
|
-
# _start, _end = to_datetime(_start) or _fout_qc.index[0], to_datetime(_end) or _fout_qc.index[-1]
|
|
329
|
-
# _idx = date_range(_start, _end, freq=_fout_qc.index.freq.copy())
|
|
330
|
-
# _idx.name = 'time'
|
|
331
|
-
#
|
|
332
|
-
# _fout_raw, _fout_qc = _fout_raw.reindex(_idx), _fout_qc.reindex(_idx)
|
|
333
|
-
# if (self.rate) & (_exist):
|
|
334
|
-
# self._rate_calculate(_fout_raw, _fout_qc)
|
|
335
|
-
#
|
|
336
|
-
# return _fout_qc if self.qc else _fout_raw
|
|
337
|
-
# else:
|
|
338
|
-
# print(
|
|
339
|
-
# f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it")
|
|
340
|
-
#
|
|
341
|
-
# ##=================================================================================================================
|
|
342
|
-
# ## read raw data
|
|
343
|
-
# _df_con, _f_list = None, list(self.path.glob(self.meta['pattern']))
|
|
344
|
-
#
|
|
345
|
-
# if len(_f_list) == 0:
|
|
346
|
-
# print(f"\t\t\033[31mNo File in '{self.path}' Could Read, Please Check Out the Current Path\033[0m")
|
|
347
|
-
# return None
|
|
348
|
-
#
|
|
349
|
-
# for file in _f_list:
|
|
350
|
-
# if file.name in [self.csv_out, self.csv_nam, self.csv_nam_raw, f'{self.nam}.log']: continue
|
|
351
|
-
#
|
|
352
|
-
# print(f"\r\t\treading {file.name}", end='')
|
|
353
|
-
#
|
|
354
|
-
# _df = self._raw_reader(file)
|
|
355
|
-
#
|
|
356
|
-
# ## concat the concated list
|
|
357
|
-
# if _df is not None:
|
|
358
|
-
# _df_con = concat([_df_con, _df]) if _df_con is not None else _df
|
|
359
|
-
# print()
|
|
360
|
-
#
|
|
361
|
-
# ## QC
|
|
362
|
-
# _save_raw = self._raw_process(_df_con)
|
|
363
|
-
# _save_qc = self._QC(_save_raw)
|
|
364
|
-
#
|
|
365
|
-
# _start, _end = to_datetime(_start) or _save_raw.index[0], to_datetime(_end) or _save_raw.index[-1]
|
|
366
|
-
# _idx = date_range(_start, _end, freq=_save_raw.index.freq.copy())
|
|
367
|
-
# _idx.name = 'time'
|
|
368
|
-
#
|
|
369
|
-
# _fout_raw, _fout_qc = _save_raw.reindex(_idx).copy(), _save_qc.reindex(_idx).copy()
|
|
370
|
-
#
|
|
371
|
-
# self._rate_calculate(_fout_raw, _fout_qc)
|
|
372
|
-
#
|
|
373
|
-
# ##=================================================================================================================
|
|
374
|
-
# ## dump pickle file
|
|
375
|
-
# _check = True
|
|
376
|
-
# while _check:
|
|
377
|
-
#
|
|
378
|
-
# try:
|
|
379
|
-
# with (self.path / self.pkl_nam).open('wb') as f:
|
|
380
|
-
# pkl.dump(_save_qc, f, protocol=pkl.HIGHEST_PROTOCOL)
|
|
381
|
-
#
|
|
382
|
-
# ## dump csv file
|
|
383
|
-
# if self.csv:
|
|
384
|
-
# _save_qc.to_csv(self.path / self.csv_nam)
|
|
385
|
-
#
|
|
386
|
-
# ## output raw data if qc file
|
|
387
|
-
# if self.meta['deter_key'] is not None:
|
|
388
|
-
# with (self.path / self.pkl_nam_raw).open('wb') as f:
|
|
389
|
-
# pkl.dump(_save_raw, f, protocol=pkl.HIGHEST_PROTOCOL)
|
|
390
|
-
#
|
|
391
|
-
# if self.csv:
|
|
392
|
-
# _save_raw.to_csv(self.path / self.csv_nam_raw)
|
|
393
|
-
#
|
|
394
|
-
# return _fout_qc if self.qc else _fout_raw
|
|
395
|
-
#
|
|
396
|
-
# _check = False
|
|
397
|
-
#
|
|
398
|
-
# except PermissionError as _err:
|
|
399
|
-
# print('\n', _err)
|
|
400
|
-
# input('\t\t\33[41m Please Close The File And Press "Enter" \33[0m\n')
|
|
401
|
-
#
|
|
402
|
-
# return _fout_qc
|
|
@@ -7,25 +7,24 @@ class Reader(AbstractReader):
|
|
|
7
7
|
nam = 'AE33'
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, _file):
|
|
10
|
+
if _file.stat().st_size / 1024 < 550:
|
|
11
|
+
print('\t It may not be a whole daily data.')
|
|
12
|
+
|
|
10
13
|
_df = read_table(_file, parse_dates={'time': [0, 1]}, index_col='time',
|
|
11
14
|
delimiter=r'\s+', skiprows=5, usecols=range(67))
|
|
12
15
|
_df.columns = _df.columns.str.strip(';')
|
|
13
16
|
|
|
14
17
|
# remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape)
|
|
15
|
-
if
|
|
16
|
-
_df = _df.where(
|
|
18
|
+
if self.meta.get('error_state', False):
|
|
19
|
+
_df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
|
|
20
|
+
|
|
21
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
|
|
17
22
|
|
|
18
|
-
return _df[
|
|
23
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
19
24
|
|
|
20
25
|
def _QC(self, _df):
|
|
21
26
|
# remove negative value
|
|
22
27
|
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].mask((_df < 0).copy())
|
|
23
28
|
|
|
24
|
-
# QC data in
|
|
25
|
-
|
|
26
|
-
_df_ave, _df_std = df.mean(), df.std()
|
|
27
|
-
_df_lowb, _df_highb = df < (_df_ave - _df_std * 1.5), df > (_df_ave + _df_std * 1.5)
|
|
28
|
-
|
|
29
|
-
return df.mask(_df_lowb | _df_highb).copy()
|
|
30
|
-
|
|
31
|
-
return _df.resample('5min').apply(_QC_func).resample('1h').mean()
|
|
29
|
+
# QC data in 1h
|
|
30
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -14,21 +14,18 @@ class Reader(AbstractReader):
|
|
|
14
14
|
_df = _df.groupby('SetupID').get_group(_df_id)[
|
|
15
15
|
['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'Status']].copy()
|
|
16
16
|
|
|
17
|
-
# remove data without Status=0
|
|
18
|
-
|
|
17
|
+
# remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape)
|
|
18
|
+
if self.meta.get('error_state', False):
|
|
19
|
+
_df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
|
|
19
20
|
|
|
20
|
-
|
|
21
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
|
|
22
|
+
|
|
23
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
21
24
|
|
|
22
25
|
# QC data
|
|
23
26
|
def _QC(self, _df):
|
|
24
27
|
# remove negative value
|
|
25
28
|
_df = _df.mask((_df < 0).copy())
|
|
26
29
|
|
|
27
|
-
# QC data in
|
|
28
|
-
|
|
29
|
-
_df_ave, _df_std = df.mean(), df.std()
|
|
30
|
-
_df_lowb, _df_highb = df < (_df_ave - _df_std * 1.5), df > (_df_ave + _df_std * 1.5)
|
|
31
|
-
|
|
32
|
-
return df.mask(_df_lowb | _df_highb).copy()
|
|
33
|
-
|
|
34
|
-
return _df.resample('5min').apply(_QC_func).resample('1h').mean()
|
|
30
|
+
# QC data in 1h
|
|
31
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -12,28 +12,28 @@ class Reader(AbstractReader):
|
|
|
12
12
|
_df = read_table(f, skiprows=6, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
|
|
13
13
|
_key = list(_df.keys()[3:54]) ## 542 ~ 1981
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
# create new keys
|
|
16
16
|
_newkey = {}
|
|
17
17
|
for _k in _key:
|
|
18
18
|
_newkey[_k] = float(_k).__round__(4)
|
|
19
19
|
# _newkey['Mode(m)'] = 'mode'
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
# get new dataframe
|
|
22
22
|
_df = _df[_newkey.keys()].rename(_newkey, axis=1)
|
|
23
|
-
#
|
|
23
|
+
# df['total'] = _df[list(_newkey.values())[:-1]].sum(axis=1)*(n.diff(n.log(_df.keys()[:-1].to_numpy(float))).mean()).copy()
|
|
24
24
|
|
|
25
25
|
_df_idx = to_datetime(_df.index, errors='coerce')
|
|
26
26
|
|
|
27
27
|
return _df.set_index(_df_idx).loc[_df_idx.dropna()]
|
|
28
28
|
|
|
29
|
-
|
|
29
|
+
# QC data
|
|
30
30
|
def _QC(self, _df):
|
|
31
|
-
|
|
31
|
+
# mask out the data size lower than 7
|
|
32
32
|
_df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
|
|
33
33
|
_df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
|
|
34
34
|
_df = _df.mask(_df_size < 7)
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
# remove total conc. lower than 700
|
|
37
37
|
_df = _df.mask(_df['total'] > 700)
|
|
38
38
|
|
|
39
39
|
# not confirmed
|
|
@@ -7,32 +7,31 @@ class Reader(AbstractReader):
|
|
|
7
7
|
nam = 'Aurora'
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, _file):
|
|
10
|
-
with
|
|
10
|
+
with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
11
11
|
_df = read_csv(f, low_memory=False, index_col=0)
|
|
12
12
|
|
|
13
|
-
_df.index = to_datetime(_df.index, errors='coerce'
|
|
13
|
+
_df.index = to_datetime(_df.index, errors='coerce')
|
|
14
14
|
_df.index.name = 'time'
|
|
15
15
|
|
|
16
16
|
_df.columns = _df.keys().str.strip(' ')
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
18
|
+
# consider another csv format
|
|
19
|
+
_df = _df.rename(columns={
|
|
20
|
+
'0°σspB': 'B', '0°σspG': 'G', '0°σspR': 'R',
|
|
21
|
+
'90°σspB': 'BB', '90°σspG': 'BG', '90°σspR': 'BR',
|
|
22
|
+
'Blue': 'B', 'Green': 'G', 'Red': 'R',
|
|
23
|
+
'B_Blue': 'BB', 'B_Green': 'BG', 'B_Red': 'BR',
|
|
24
|
+
'RH': 'RH'
|
|
25
|
+
})
|
|
21
26
|
|
|
22
|
-
|
|
27
|
+
_df = _df[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
|
|
23
28
|
|
|
24
|
-
|
|
25
|
-
def _QC(self, _df):
|
|
26
|
-
## remove negative value
|
|
27
|
-
_df = _df.mask((_df <= 0).copy())
|
|
28
|
-
|
|
29
|
-
## call by _QC function
|
|
30
|
-
## QC data in 1 hr
|
|
31
|
-
def _QC_func(_df_1hr):
|
|
32
|
-
_df_ave = _df_1hr.mean()
|
|
33
|
-
_df_std = _df_1hr.std()
|
|
34
|
-
_df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
|
|
29
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
35
30
|
|
|
36
|
-
|
|
31
|
+
# QC data
|
|
32
|
+
def _QC(self, _df):
|
|
33
|
+
# remove negative value
|
|
34
|
+
_df = _df.mask((_df <= 0) | (_df > 2000)).copy()
|
|
37
35
|
|
|
38
|
-
|
|
36
|
+
# QC data in 1h
|
|
37
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -8,7 +8,9 @@ class Reader(AbstractReader):
|
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, _file):
|
|
10
10
|
with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
|
-
_df = read_csv(f, parse_dates=
|
|
11
|
+
_df = read_csv(f, parse_dates=True, index_col=0)
|
|
12
|
+
|
|
13
|
+
_df.columns = _df.columns.str.replace(' ', '')
|
|
12
14
|
|
|
13
15
|
_df = _df.rename(columns={
|
|
14
16
|
'BC1(ng/m3)': 'BC1',
|
|
@@ -23,24 +25,18 @@ class Reader(AbstractReader):
|
|
|
23
25
|
'BC10(ng/m3)': 'BC10'
|
|
24
26
|
})
|
|
25
27
|
|
|
26
|
-
# remove data without Status=32 (Automatic Tape Advance), 65536 (Tape Move)
|
|
27
|
-
|
|
28
|
-
|
|
28
|
+
# remove data without Status=1, 8, 16, 32 (Automatic Tape Advance), 65536 (Tape Move)
|
|
29
|
+
if self.meta.get('error_state', False):
|
|
30
|
+
_df = _df[~_df['Status'].isin(self.meta.get('error_state'))]
|
|
31
|
+
|
|
32
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']]
|
|
29
33
|
|
|
30
|
-
return _df[
|
|
34
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
31
35
|
|
|
32
36
|
# QC data
|
|
33
37
|
def _QC(self, _df):
|
|
34
38
|
# remove negative value
|
|
35
39
|
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']].mask((_df < 0).copy())
|
|
36
40
|
|
|
37
|
-
#
|
|
38
|
-
|
|
39
|
-
def _QC_func(_df_1hr):
|
|
40
|
-
_df_ave = _df_1hr.mean()
|
|
41
|
-
_df_std = _df_1hr.std()
|
|
42
|
-
_df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
|
|
43
|
-
|
|
44
|
-
return _df_1hr.mask(_df_lowb | _df_highb).copy()
|
|
45
|
-
|
|
46
|
-
return _df.resample('1h', group_keys=False).apply(_QC_func).resample('5min').mean()
|
|
41
|
+
# QC data in 1h
|
|
42
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|