AeroViz 0.1.3b0__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/__init__.py +5 -3
- AeroViz/{config → data}/DEFAULT_DATA.csv +1 -1
- AeroViz/dataProcess/Chemistry/__init__.py +28 -27
- AeroViz/dataProcess/Chemistry/_isoropia.py +11 -11
- AeroViz/dataProcess/Chemistry/_mass_volume.py +15 -18
- AeroViz/dataProcess/Chemistry/_ocec.py +21 -46
- AeroViz/dataProcess/Chemistry/_teom.py +2 -1
- AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
- AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
- AeroViz/dataProcess/Optical/Angstrom_exponent.py +20 -0
- AeroViz/dataProcess/Optical/_IMPROVE.py +13 -15
- AeroViz/dataProcess/Optical/__init__.py +15 -30
- AeroViz/dataProcess/Optical/_absorption.py +21 -47
- AeroViz/dataProcess/Optical/_extinction.py +20 -15
- AeroViz/dataProcess/Optical/_mie.py +0 -1
- AeroViz/dataProcess/Optical/_scattering.py +19 -20
- AeroViz/dataProcess/Optical/fRH.pkl +0 -0
- AeroViz/dataProcess/SizeDistr/__init__.py +7 -7
- AeroViz/dataProcess/SizeDistr/_merge.py +2 -2
- AeroViz/dataProcess/SizeDistr/_merge_v1.py +2 -2
- AeroViz/dataProcess/SizeDistr/_merge_v2.py +2 -2
- AeroViz/dataProcess/SizeDistr/_merge_v3.py +1 -1
- AeroViz/dataProcess/SizeDistr/_merge_v4.py +1 -1
- AeroViz/dataProcess/VOC/__init__.py +4 -9
- AeroViz/dataProcess/VOC/_potential_par.py +71 -37
- AeroViz/dataProcess/VOC/{voc_par.json → support_voc.json} +321 -339
- AeroViz/dataProcess/__init__.py +28 -6
- AeroViz/dataProcess/core/__init__.py +10 -17
- AeroViz/plot/__init__.py +1 -1
- AeroViz/plot/box.py +2 -1
- AeroViz/plot/optical/optical.py +4 -4
- AeroViz/plot/regression.py +25 -39
- AeroViz/plot/scatter.py +68 -2
- AeroViz/plot/templates/__init__.py +2 -1
- AeroViz/plot/templates/ammonium_rich.py +34 -0
- AeroViz/plot/templates/diurnal_pattern.py +11 -9
- AeroViz/plot/templates/koschmieder.py +51 -115
- AeroViz/plot/templates/metal_heatmap.py +115 -17
- AeroViz/plot/timeseries/__init__.py +1 -0
- AeroViz/plot/timeseries/template.py +47 -0
- AeroViz/plot/timeseries/timeseries.py +275 -208
- AeroViz/plot/utils/plt_utils.py +2 -2
- AeroViz/plot/utils/units.json +5 -0
- AeroViz/plot/violin.py +9 -8
- AeroViz/process/__init__.py +2 -2
- AeroViz/process/script/AbstractDistCalc.py +1 -1
- AeroViz/process/script/Chemical.py +5 -4
- AeroViz/process/script/Others.py +1 -1
- AeroViz/rawDataReader/__init__.py +66 -22
- AeroViz/rawDataReader/{utils/config.py → config/supported_instruments.py} +33 -54
- AeroViz/rawDataReader/core/__init__.py +116 -231
- AeroViz/rawDataReader/script/AE33.py +12 -13
- AeroViz/rawDataReader/script/AE43.py +10 -13
- AeroViz/rawDataReader/script/APS_3321.py +8 -8
- AeroViz/rawDataReader/script/Aurora.py +21 -19
- AeroViz/rawDataReader/script/BC1054.py +13 -17
- AeroViz/rawDataReader/script/EPA_vertical.py +36 -8
- AeroViz/rawDataReader/script/GRIMM.py +6 -13
- AeroViz/rawDataReader/script/{IGAC_ZM.py → IGAC.py} +18 -18
- AeroViz/rawDataReader/script/MA350.py +9 -16
- AeroViz/rawDataReader/script/Minion.py +103 -0
- AeroViz/rawDataReader/script/NEPH.py +28 -38
- AeroViz/rawDataReader/script/SMPS_TH.py +6 -6
- AeroViz/rawDataReader/script/SMPS_aim11.py +8 -8
- AeroViz/rawDataReader/script/SMPS_genr.py +8 -8
- AeroViz/rawDataReader/script/Sunset_OCEC.py +66 -0
- AeroViz/rawDataReader/script/TEOM.py +10 -8
- AeroViz/rawDataReader/script/Table.py +9 -10
- AeroViz/rawDataReader/script/VOC.py +33 -0
- AeroViz/rawDataReader/script/__init__.py +10 -12
- AeroViz/tools/database.py +7 -9
- AeroViz/tools/datareader.py +3 -3
- {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.5.dist-info}/METADATA +1 -1
- AeroViz-0.1.5.dist-info/RECORD +114 -0
- AeroViz/rawDataReader/script/IGAC_TH.py +0 -104
- AeroViz/rawDataReader/script/OCEC_LCRES.py +0 -34
- AeroViz/rawDataReader/script/OCEC_RES.py +0 -28
- AeroViz/rawDataReader/script/VOC_TH.py +0 -30
- AeroViz/rawDataReader/script/VOC_ZM.py +0 -37
- AeroViz-0.1.3b0.dist-info/RECORD +0 -110
- /AeroViz/{config → data}/DEFAULT_PNSD_DATA.csv +0 -0
- /AeroViz/rawDataReader/{utils → config}/__init__.py +0 -0
- {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.5.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.5.dist-info}/WHEEL +0 -0
- {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.5.dist-info}/top_level.txt +0 -0
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
import json as jsn
|
|
2
|
+
import logging
|
|
2
3
|
import pickle as pkl
|
|
3
4
|
from abc import ABC, abstractmethod
|
|
4
|
-
from datetime import datetime as dtm
|
|
5
|
-
from itertools import chain
|
|
5
|
+
from datetime import datetime as dtm
|
|
6
6
|
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
7
8
|
|
|
8
9
|
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
9
11
|
from pandas import DataFrame, date_range, concat, to_numeric, to_datetime
|
|
10
12
|
|
|
11
|
-
from ..
|
|
13
|
+
from ..config.supported_instruments import meta
|
|
12
14
|
|
|
13
15
|
__all__ = ['AbstractReader']
|
|
14
16
|
|
|
@@ -16,117 +18,108 @@ __all__ = ['AbstractReader']
|
|
|
16
18
|
class AbstractReader(ABC):
|
|
17
19
|
nam = 'AbstractReader'
|
|
18
20
|
|
|
19
|
-
# initial
|
|
21
|
+
# initial data
|
|
20
22
|
# input : file path, reset switch
|
|
21
23
|
|
|
22
24
|
# list the file in the path and read pickle file if it exists, else read raw data and dump the pickle file the
|
|
23
25
|
# pickle file will be generated after read raw data first time, if you want to re-read the rawdata, please set
|
|
24
26
|
# 'reset=True'
|
|
25
27
|
|
|
26
|
-
def __init__(self,
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
28
|
+
def __init__(self,
|
|
29
|
+
path: Path | str,
|
|
30
|
+
qc: bool = True,
|
|
31
|
+
csv_raw: bool = True,
|
|
32
|
+
reset: bool = False,
|
|
33
|
+
rate: bool = False,
|
|
34
|
+
append_data: bool = False):
|
|
30
35
|
|
|
31
|
-
|
|
32
|
-
# self.index = lambda _freq: date_range(_sta, _fin, freq=_freq)
|
|
33
|
-
self.path = Path(_path)
|
|
36
|
+
self.path = Path(path)
|
|
34
37
|
self.meta = meta[self.nam]
|
|
35
|
-
|
|
36
|
-
if update_meta is not None:
|
|
37
|
-
self.meta.update(update_meta)
|
|
38
|
+
self.logger = self._setup_logger()
|
|
38
39
|
|
|
39
40
|
self.reset = reset
|
|
40
41
|
self.rate = rate
|
|
41
|
-
self.qc =
|
|
42
|
+
self.qc = qc
|
|
42
43
|
self.csv = csv_raw
|
|
43
|
-
self.
|
|
44
|
-
|
|
45
|
-
self.pkl_nam = f'_read_{self.nam.lower()}.pkl'
|
|
46
|
-
self.csv_nam = f'_read_{self.nam.lower()}.csv'
|
|
44
|
+
self.append = append_data & reset
|
|
47
45
|
|
|
48
|
-
self.
|
|
49
|
-
self.
|
|
46
|
+
self.pkl_nam = self.path / f'_read_{self.nam.lower()}.pkl'
|
|
47
|
+
self.csv_nam = self.path / f'_read_{self.nam.lower()}.csv'
|
|
48
|
+
self.pkl_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.pkl'
|
|
49
|
+
self.csv_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.csv'
|
|
50
|
+
self.csv_out = self.path / f'output_{self.nam.lower()}.csv'
|
|
50
51
|
|
|
51
|
-
|
|
52
|
+
# dependency injection function, customize each instrument
|
|
53
|
+
@abstractmethod
|
|
54
|
+
def _raw_reader(self, file):
|
|
55
|
+
pass
|
|
52
56
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
57
|
+
@abstractmethod
|
|
58
|
+
def _QC(self, df: DataFrame):
|
|
59
|
+
return df
|
|
56
60
|
|
|
57
|
-
# get data
|
|
58
61
|
def __call__(self,
|
|
59
62
|
start: dtm | None = None,
|
|
60
63
|
end: dtm | None = None,
|
|
61
|
-
mean_freq='1h',
|
|
62
|
-
csv_out=True,
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
self._oth_set = kwarg
|
|
64
|
+
mean_freq: str = '1h',
|
|
65
|
+
csv_out: bool = True,
|
|
66
|
+
) -> DataFrame | None:
|
|
66
67
|
|
|
67
68
|
if start and end and end <= start:
|
|
68
|
-
raise ValueError(
|
|
69
|
-
f'\nPlease check out input time : '
|
|
70
|
-
f'\n\tstart : {start.strftime("%Y-%m-%d %X")}'
|
|
71
|
-
f'\n\tend : {end.strftime("%Y-%m-%d %X")}')
|
|
72
|
-
|
|
73
|
-
fout = self._run(start, end)
|
|
69
|
+
raise ValueError(f"Invalid time range: start {start} is after end {end}")
|
|
74
70
|
|
|
75
|
-
|
|
76
|
-
if mean_freq is not None:
|
|
77
|
-
fout = fout.resample(mean_freq).mean()
|
|
71
|
+
data = self._run(start, end)
|
|
78
72
|
|
|
73
|
+
if data is not None:
|
|
74
|
+
if mean_freq:
|
|
75
|
+
data = data.resample(mean_freq).mean()
|
|
79
76
|
if csv_out:
|
|
80
|
-
|
|
77
|
+
data.to_csv(self.csv_out)
|
|
81
78
|
|
|
82
|
-
return
|
|
79
|
+
return data
|
|
83
80
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
pass
|
|
81
|
+
@staticmethod
|
|
82
|
+
def basic_QC(df: DataFrame):
|
|
83
|
+
df_ave, df_std = df.mean(), df.std()
|
|
84
|
+
df_lowb, df_highb = df < (df_ave - df_std * 1.5), df > (df_ave + df_std * 1.5)
|
|
89
85
|
|
|
90
|
-
|
|
91
|
-
def _QC(self, df: DataFrame):
|
|
92
|
-
# customize each instrument
|
|
93
|
-
return df
|
|
86
|
+
return df.mask(df_lowb | df_highb).copy()
|
|
94
87
|
|
|
95
88
|
# set each to true datetime(18:30:01 -> 18:30:00) and rindex data
|
|
96
89
|
def _raw_process(self, _df):
|
|
97
90
|
# get time from df and set time to whole time to create time index
|
|
98
91
|
_st, _ed = _df.index.sort_values()[[0, -1]]
|
|
99
|
-
_tm_index = date_range(_st.strftime('%Y%m%d %H00'),
|
|
100
|
-
(_ed + dtmdt(hours=1)).strftime('%Y%m%d %H00'),
|
|
92
|
+
_tm_index = date_range(_st.strftime('%Y%m%d %H00'), _ed.floor('h').strftime('%Y%m%d %H00'),
|
|
101
93
|
freq=self.meta['freq'])
|
|
102
94
|
_tm_index.name = 'time'
|
|
103
95
|
|
|
104
96
|
return _df.apply(to_numeric, errors='coerce').resample(self.meta['freq']).mean().reindex(_tm_index)
|
|
105
97
|
|
|
98
|
+
def _setup_logger(self) -> logging.Logger:
|
|
99
|
+
logger = logging.getLogger(self.nam)
|
|
100
|
+
logger.setLevel(logging.INFO)
|
|
101
|
+
handler = logging.FileHandler(self.path / f'{self.nam}.log')
|
|
102
|
+
handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s'))
|
|
103
|
+
logger.addHandler(handler)
|
|
104
|
+
return logger
|
|
105
|
+
|
|
106
106
|
# acquisition rate and yield rate
|
|
107
107
|
def _rate_calculate(self, _fout_raw, _fout_qc, _st_raw, _ed_raw):
|
|
108
|
-
|
|
109
108
|
if self.meta['deter_key'] is not None:
|
|
110
109
|
_start, _end = _fout_qc.index[[0, -1]]
|
|
111
110
|
|
|
112
111
|
_drop_how = 'any'
|
|
113
112
|
_the_size = len(_fout_raw.resample('1h').mean().index)
|
|
114
113
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
_f.write(f"{'-' * 60}\n")
|
|
123
|
-
_f.write(f"rawdata time : \n\t{_st_raw.strftime('%Y-%m-%d %X')} ~ {_ed_raw.strftime('%Y-%m-%d %X')}\n")
|
|
124
|
-
_f.write(f"output time : \n\t{_start.strftime('%Y-%m-%d %X')} ~ {_end.strftime('%Y-%m-%d %X')}\n")
|
|
125
|
-
_f.write(f"{'-' * 60}\n")
|
|
126
|
-
print(f"\n\t\tfrom {_start.strftime('%Y-%m-%d %X')} to {_end.strftime('%Y-%m-%d %X')}\n")
|
|
114
|
+
self.logger.info(f"{'=' * 60}")
|
|
115
|
+
self.logger.info(
|
|
116
|
+
f"Raw data time : {_st_raw.strftime('%Y-%m-%d %H:%M:%S')} to {_ed_raw.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
117
|
+
self.logger.info(
|
|
118
|
+
f"Output time : {_start.strftime('%Y-%m-%d %H:%M:%S')} to {_end.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
119
|
+
self.logger.info(f"{'-' * 60}")
|
|
120
|
+
print(f"\n\n\t\tfrom {_start.strftime('%Y-%m-%d %H:%M:%S')} to {_end.strftime('%Y-%m-%d %H:%M:%S')}\n")
|
|
127
121
|
|
|
128
122
|
for _nam, _key in self.meta['deter_key'].items():
|
|
129
|
-
|
|
130
123
|
if _key == ['all']:
|
|
131
124
|
_key, _drop_how = _fout_qc.keys(), 'all'
|
|
132
125
|
|
|
@@ -139,18 +132,14 @@ class AbstractReader(ABC):
|
|
|
139
132
|
except ZeroDivisionError:
|
|
140
133
|
_acq_rate, _yid_rate = 0, 0
|
|
141
134
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
135
|
+
self.logger.info(f'{_nam}:')
|
|
136
|
+
self.logger.info(f"\tAcquisition rate: {_acq_rate}%")
|
|
137
|
+
self.logger.info(f'\tYield rate: {_yid_rate}%')
|
|
138
|
+
self.logger.info(f"{'=' * 60}")
|
|
145
139
|
|
|
146
140
|
print(f'\t\t{_nam} : ')
|
|
147
141
|
print(f'\t\t\tacquisition rate : \033[91m{_acq_rate}%\033[0m')
|
|
148
|
-
print(f'\t\t\tyield
|
|
149
|
-
|
|
150
|
-
_f.write(f"{'=' * 40}\n")
|
|
151
|
-
_f.write(_cont)
|
|
152
|
-
|
|
153
|
-
_f.close()
|
|
142
|
+
print(f'\t\t\tyield rate : \033[91m{_yid_rate}%\033[0m')
|
|
154
143
|
|
|
155
144
|
# process time index
|
|
156
145
|
@staticmethod
|
|
@@ -191,86 +180,78 @@ class AbstractReader(ABC):
|
|
|
191
180
|
return _df
|
|
192
181
|
|
|
193
182
|
# save pickle file
|
|
194
|
-
def
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
try:
|
|
199
|
-
with (self.path / self.pkl_nam).open('wb') as f:
|
|
200
|
-
pkl.dump(_save_qc, f, protocol=pkl.HIGHEST_PROTOCOL)
|
|
201
|
-
|
|
202
|
-
# dump csv file
|
|
203
|
-
if self.csv:
|
|
204
|
-
_save_qc.to_csv(self.path / self.csv_nam)
|
|
205
|
-
|
|
206
|
-
# output raw data if qc file
|
|
207
|
-
if self.meta['deter_key'] is not None:
|
|
208
|
-
with (self.path / self.pkl_nam_raw).open('wb') as f:
|
|
209
|
-
pkl.dump(_save_raw, f, protocol=pkl.HIGHEST_PROTOCOL)
|
|
210
|
-
|
|
211
|
-
if self.csv:
|
|
212
|
-
_save_raw.to_csv(self.path / self.csv_nam_raw)
|
|
183
|
+
def _save_data(self, raw_data: DataFrame, qc_data: DataFrame) -> None:
|
|
184
|
+
self._safe_pickle_dump(self.pkl_nam, qc_data)
|
|
185
|
+
if self.csv:
|
|
186
|
+
qc_data.to_csv(self.csv_nam)
|
|
213
187
|
|
|
214
|
-
|
|
188
|
+
if self.meta['deter_key'] is not None:
|
|
189
|
+
self._safe_pickle_dump(self.pkl_nam_raw, raw_data)
|
|
190
|
+
if self.csv:
|
|
191
|
+
raw_data.to_csv(self.csv_nam_raw)
|
|
215
192
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
193
|
+
@staticmethod
|
|
194
|
+
def _safe_pickle_dump(file_path: Path, data: Any) -> None:
|
|
195
|
+
while True:
|
|
196
|
+
try:
|
|
197
|
+
with file_path.open('wb') as f:
|
|
198
|
+
pkl.dump(data, f, protocol=pkl.HIGHEST_PROTOCOL)
|
|
199
|
+
break
|
|
200
|
+
except PermissionError as err:
|
|
201
|
+
print('\n', err)
|
|
202
|
+
input('\t\t\33[41m Please close the file and press "Enter" \33[0m\n')
|
|
219
203
|
|
|
220
204
|
# read pickle file
|
|
221
|
-
def _read_pkl(self
|
|
222
|
-
with
|
|
223
|
-
|
|
205
|
+
def _read_pkl(self):
|
|
206
|
+
with self.pkl_nam.open('rb') as qc_data, self.pkl_nam_raw.open('rb') as raw_data:
|
|
207
|
+
return pkl.load(raw_data), pkl.load(qc_data)
|
|
224
208
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
_fout_raw = _fout_qc
|
|
209
|
+
def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
|
|
210
|
+
patterns = {self.meta['pattern'].lower(), self.meta['pattern'].upper(), self.meta['pattern']}
|
|
211
|
+
files = [f for pattern in patterns for f in self.path.glob(pattern)
|
|
212
|
+
if f.name not in [self.csv_out.name, self.csv_nam.name, self.csv_nam_raw.name, f'{self.nam}.log']]
|
|
230
213
|
|
|
231
|
-
|
|
214
|
+
if not files:
|
|
215
|
+
raise FileNotFoundError(f"\t\t\033[31mNo files in '{self.path}' could be read."
|
|
216
|
+
f"Please check the current path.\033[0m")
|
|
232
217
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
patterns = {pattern, pattern.lower(), pattern.upper()}
|
|
237
|
-
_df_con, _f_list = None, list(chain.from_iterable(self.path.glob(p) for p in patterns))
|
|
218
|
+
df_list = []
|
|
219
|
+
for file in files:
|
|
220
|
+
print(f"\r\t\treading {file.name}", end='')
|
|
238
221
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
continue
|
|
222
|
+
try:
|
|
223
|
+
df = self._raw_reader(file)
|
|
242
224
|
|
|
243
|
-
|
|
225
|
+
if df is not None and not df.empty:
|
|
226
|
+
df_list.append(df)
|
|
227
|
+
else:
|
|
228
|
+
self.logger.warning(f"File {file.name} produced an empty DataFrame or None.")
|
|
244
229
|
|
|
245
|
-
|
|
230
|
+
except pd.errors.ParserError as e:
|
|
231
|
+
self.logger.error(f"Error tokenizing data: {e}")
|
|
246
232
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
_df_con = concat([_df_con, _df]) if _df_con is not None else _df
|
|
233
|
+
except Exception as e:
|
|
234
|
+
self.logger.error(f"Error reading {file.name}: {e}")
|
|
250
235
|
|
|
251
|
-
if
|
|
252
|
-
|
|
253
|
-
return None, None
|
|
236
|
+
if not df_list:
|
|
237
|
+
raise ValueError("All files were either empty or failed to read.")
|
|
254
238
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
_fout_qc = self._QC(_fout_raw)
|
|
239
|
+
raw_data = self._raw_process(concat(df_list))
|
|
240
|
+
qc_data = self._QC(raw_data)
|
|
258
241
|
|
|
259
|
-
return
|
|
242
|
+
return raw_data, qc_data
|
|
260
243
|
|
|
261
244
|
# main flow
|
|
262
245
|
def _run(self, _start, _end):
|
|
263
|
-
|
|
264
246
|
_f_raw_done, _f_qc_done = None, None
|
|
265
247
|
|
|
266
248
|
# read pickle if pickle file exists and 'reset=False' or process raw data or append new data
|
|
267
|
-
|
|
268
|
-
if _pkl_exist & ((~self.reset) | self.apnd):
|
|
249
|
+
if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and (not self.reset or self.append):
|
|
269
250
|
print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}")
|
|
270
251
|
|
|
271
252
|
_f_raw_done, _f_qc_done = self._read_pkl()
|
|
272
253
|
|
|
273
|
-
if not self.
|
|
254
|
+
if not self.append:
|
|
274
255
|
_f_raw_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw_done)
|
|
275
256
|
_f_qc_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc_done)
|
|
276
257
|
|
|
@@ -284,19 +265,17 @@ class AbstractReader(ABC):
|
|
|
284
265
|
# read raw data
|
|
285
266
|
print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it")
|
|
286
267
|
|
|
287
|
-
_f_raw, _f_qc = self.
|
|
288
|
-
if _f_raw is None:
|
|
289
|
-
return None
|
|
268
|
+
_f_raw, _f_qc = self._read_raw_files()
|
|
290
269
|
|
|
291
270
|
# append new data and pickle data
|
|
292
|
-
if self.
|
|
271
|
+
if self.append and self.pkl_nam.exists():
|
|
293
272
|
_f_raw = self._apnd_prcs(_f_raw_done, _f_raw)
|
|
294
273
|
_f_qc = self._apnd_prcs(_f_qc_done, _f_qc)
|
|
295
274
|
|
|
296
275
|
_f_qc = self._outlier_prcs(_f_qc)
|
|
297
276
|
|
|
298
277
|
# save
|
|
299
|
-
self.
|
|
278
|
+
self._save_data(_f_raw, _f_qc)
|
|
300
279
|
|
|
301
280
|
# process time index
|
|
302
281
|
# if (_start is not None)|(_end is not None):
|
|
@@ -306,97 +285,3 @@ class AbstractReader(ABC):
|
|
|
306
285
|
self._rate_calculate(_f_raw, _f_qc, _start_raw, _end_raw)
|
|
307
286
|
|
|
308
287
|
return _f_qc if self.qc else _f_raw
|
|
309
|
-
|
|
310
|
-
# -------------------------------------------------------------------------------------
|
|
311
|
-
# old flow
|
|
312
|
-
# def __run(self, _start, _end):
|
|
313
|
-
#
|
|
314
|
-
# ## read pickle if pickle file exists and 'reset=False' or process raw data
|
|
315
|
-
# if (self.path / self.pkl_nam in list(self.path.glob('*.pkl'))) & (~self.reset):
|
|
316
|
-
# print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}")
|
|
317
|
-
#
|
|
318
|
-
# with (self.path / self.pkl_nam).open('rb') as f:
|
|
319
|
-
# _fout_qc = pkl.load(f)
|
|
320
|
-
#
|
|
321
|
-
# _exist = (self.path / self.pkl_nam_raw).exists()
|
|
322
|
-
# if _exist:
|
|
323
|
-
# with (self.path / self.pkl_nam_raw).open('rb') as f:
|
|
324
|
-
# _fout_raw = pkl.load(f)
|
|
325
|
-
# else:
|
|
326
|
-
# _fout_raw = _fout_qc
|
|
327
|
-
#
|
|
328
|
-
# _start, _end = to_datetime(_start) or _fout_qc.index[0], to_datetime(_end) or _fout_qc.index[-1]
|
|
329
|
-
# _idx = date_range(_start, _end, freq=_fout_qc.index.freq.copy())
|
|
330
|
-
# _idx.name = 'time'
|
|
331
|
-
#
|
|
332
|
-
# _fout_raw, _fout_qc = _fout_raw.reindex(_idx), _fout_qc.reindex(_idx)
|
|
333
|
-
# if (self.rate) & (_exist):
|
|
334
|
-
# self._rate_calculate(_fout_raw, _fout_qc)
|
|
335
|
-
#
|
|
336
|
-
# return _fout_qc if self.qc else _fout_raw
|
|
337
|
-
# else:
|
|
338
|
-
# print(
|
|
339
|
-
# f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it")
|
|
340
|
-
#
|
|
341
|
-
# ##=================================================================================================================
|
|
342
|
-
# ## read raw data
|
|
343
|
-
# _df_con, _f_list = None, list(self.path.glob(self.meta['pattern']))
|
|
344
|
-
#
|
|
345
|
-
# if len(_f_list) == 0:
|
|
346
|
-
# print(f"\t\t\033[31mNo File in '{self.path}' Could Read, Please Check Out the Current Path\033[0m")
|
|
347
|
-
# return None
|
|
348
|
-
#
|
|
349
|
-
# for file in _f_list:
|
|
350
|
-
# if file.name in [self.csv_out, self.csv_nam, self.csv_nam_raw, f'{self.nam}.log']: continue
|
|
351
|
-
#
|
|
352
|
-
# print(f"\r\t\treading {file.name}", end='')
|
|
353
|
-
#
|
|
354
|
-
# _df = self._raw_reader(file)
|
|
355
|
-
#
|
|
356
|
-
# ## concat the concated list
|
|
357
|
-
# if _df is not None:
|
|
358
|
-
# _df_con = concat([_df_con, _df]) if _df_con is not None else _df
|
|
359
|
-
# print()
|
|
360
|
-
#
|
|
361
|
-
# ## QC
|
|
362
|
-
# _save_raw = self._raw_process(_df_con)
|
|
363
|
-
# _save_qc = self._QC(_save_raw)
|
|
364
|
-
#
|
|
365
|
-
# _start, _end = to_datetime(_start) or _save_raw.index[0], to_datetime(_end) or _save_raw.index[-1]
|
|
366
|
-
# _idx = date_range(_start, _end, freq=_save_raw.index.freq.copy())
|
|
367
|
-
# _idx.name = 'time'
|
|
368
|
-
#
|
|
369
|
-
# _fout_raw, _fout_qc = _save_raw.reindex(_idx).copy(), _save_qc.reindex(_idx).copy()
|
|
370
|
-
#
|
|
371
|
-
# self._rate_calculate(_fout_raw, _fout_qc)
|
|
372
|
-
#
|
|
373
|
-
# ##=================================================================================================================
|
|
374
|
-
# ## dump pickle file
|
|
375
|
-
# _check = True
|
|
376
|
-
# while _check:
|
|
377
|
-
#
|
|
378
|
-
# try:
|
|
379
|
-
# with (self.path / self.pkl_nam).open('wb') as f:
|
|
380
|
-
# pkl.dump(_save_qc, f, protocol=pkl.HIGHEST_PROTOCOL)
|
|
381
|
-
#
|
|
382
|
-
# ## dump csv file
|
|
383
|
-
# if self.csv:
|
|
384
|
-
# _save_qc.to_csv(self.path / self.csv_nam)
|
|
385
|
-
#
|
|
386
|
-
# ## output raw data if qc file
|
|
387
|
-
# if self.meta['deter_key'] is not None:
|
|
388
|
-
# with (self.path / self.pkl_nam_raw).open('wb') as f:
|
|
389
|
-
# pkl.dump(_save_raw, f, protocol=pkl.HIGHEST_PROTOCOL)
|
|
390
|
-
#
|
|
391
|
-
# if self.csv:
|
|
392
|
-
# _save_raw.to_csv(self.path / self.csv_nam_raw)
|
|
393
|
-
#
|
|
394
|
-
# return _fout_qc if self.qc else _fout_raw
|
|
395
|
-
#
|
|
396
|
-
# _check = False
|
|
397
|
-
#
|
|
398
|
-
# except PermissionError as _err:
|
|
399
|
-
# print('\n', _err)
|
|
400
|
-
# input('\t\t\33[41m Please Close The File And Press "Enter" \33[0m\n')
|
|
401
|
-
#
|
|
402
|
-
# return _fout_qc
|
|
@@ -6,26 +6,25 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'AE33'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
if file.stat().st_size / 1024 < 550:
|
|
11
|
+
print('\t It may not be a whole daily data.')
|
|
12
|
+
|
|
13
|
+
_df = read_table(file, parse_dates={'time': [0, 1]}, index_col='time',
|
|
11
14
|
delimiter=r'\s+', skiprows=5, usecols=range(67))
|
|
12
15
|
_df.columns = _df.columns.str.strip(';')
|
|
13
16
|
|
|
14
17
|
# remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape)
|
|
15
|
-
if
|
|
16
|
-
_df = _df.where(
|
|
18
|
+
if self.meta.get('error_state', False):
|
|
19
|
+
_df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
|
|
20
|
+
|
|
21
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
|
|
17
22
|
|
|
18
|
-
return _df[
|
|
23
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
19
24
|
|
|
20
25
|
def _QC(self, _df):
|
|
21
26
|
# remove negative value
|
|
22
27
|
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].mask((_df < 0).copy())
|
|
23
28
|
|
|
24
|
-
# QC data in
|
|
25
|
-
|
|
26
|
-
_df_ave, _df_std = df.mean(), df.std()
|
|
27
|
-
_df_lowb, _df_highb = df < (_df_ave - _df_std * 1.5), df > (_df_ave + _df_std * 1.5)
|
|
28
|
-
|
|
29
|
-
return df.mask(_df_lowb | _df_highb).copy()
|
|
30
|
-
|
|
31
|
-
return _df.resample('5min').apply(_QC_func).resample('1h').mean()
|
|
29
|
+
# QC data in 1h
|
|
30
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -6,29 +6,26 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'AE43'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
_df = read_csv(
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
_df = read_csv(file, parse_dates={'time': ['StartTime']}, index_col='time')
|
|
11
11
|
_df_id = _df['SetupID'].iloc[-1]
|
|
12
12
|
|
|
13
13
|
# get last SetupID data
|
|
14
14
|
_df = _df.groupby('SetupID').get_group(_df_id)[
|
|
15
15
|
['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'Status']].copy()
|
|
16
16
|
|
|
17
|
-
# remove data without Status=0
|
|
18
|
-
|
|
17
|
+
# remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape)
|
|
18
|
+
if self.meta.get('error_state', False):
|
|
19
|
+
_df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
|
|
19
20
|
|
|
20
|
-
|
|
21
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
|
|
22
|
+
|
|
23
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
21
24
|
|
|
22
25
|
# QC data
|
|
23
26
|
def _QC(self, _df):
|
|
24
27
|
# remove negative value
|
|
25
28
|
_df = _df.mask((_df < 0).copy())
|
|
26
29
|
|
|
27
|
-
# QC data in
|
|
28
|
-
|
|
29
|
-
_df_ave, _df_std = df.mean(), df.std()
|
|
30
|
-
_df_lowb, _df_highb = df < (_df_ave - _df_std * 1.5), df > (_df_ave + _df_std * 1.5)
|
|
31
|
-
|
|
32
|
-
return df.mask(_df_lowb | _df_highb).copy()
|
|
33
|
-
|
|
34
|
-
return _df.resample('5min').apply(_QC_func).resample('1h').mean()
|
|
30
|
+
# QC data in 1h
|
|
31
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -7,33 +7,33 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
7
7
|
class Reader(AbstractReader):
|
|
8
8
|
nam = 'APS_3321'
|
|
9
9
|
|
|
10
|
-
def _raw_reader(self,
|
|
11
|
-
with open(
|
|
10
|
+
def _raw_reader(self, file):
|
|
11
|
+
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
12
12
|
_df = read_table(f, skiprows=6, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
|
|
13
13
|
_key = list(_df.keys()[3:54]) ## 542 ~ 1981
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
# create new keys
|
|
16
16
|
_newkey = {}
|
|
17
17
|
for _k in _key:
|
|
18
18
|
_newkey[_k] = float(_k).__round__(4)
|
|
19
19
|
# _newkey['Mode(m)'] = 'mode'
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
# get new dataframe
|
|
22
22
|
_df = _df[_newkey.keys()].rename(_newkey, axis=1)
|
|
23
|
-
#
|
|
23
|
+
# df['total'] = _df[list(_newkey.values())[:-1]].sum(axis=1)*(n.diff(n.log(_df.keys()[:-1].to_numpy(float))).mean()).copy()
|
|
24
24
|
|
|
25
25
|
_df_idx = to_datetime(_df.index, errors='coerce')
|
|
26
26
|
|
|
27
27
|
return _df.set_index(_df_idx).loc[_df_idx.dropna()]
|
|
28
28
|
|
|
29
|
-
|
|
29
|
+
# QC data
|
|
30
30
|
def _QC(self, _df):
|
|
31
|
-
|
|
31
|
+
# mask out the data size lower than 7
|
|
32
32
|
_df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
|
|
33
33
|
_df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
|
|
34
34
|
_df = _df.mask(_df_size < 7)
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
# remove total conc. lower than 700
|
|
37
37
|
_df = _df.mask(_df['total'] > 700)
|
|
38
38
|
|
|
39
39
|
# not confirmed
|
|
@@ -6,33 +6,35 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'Aurora'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
with
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
11
11
|
_df = read_csv(f, low_memory=False, index_col=0)
|
|
12
12
|
|
|
13
|
-
_df.index = to_datetime(_df.index, errors='coerce'
|
|
13
|
+
_df.index = to_datetime(_df.index, errors='coerce')
|
|
14
14
|
_df.index.name = 'time'
|
|
15
15
|
|
|
16
16
|
_df.columns = _df.keys().str.strip(' ')
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
18
|
+
# consider another csv format
|
|
19
|
+
_df = _df.rename(columns={
|
|
20
|
+
'0°σspB': 'B', '0°σspG': 'G', '0°σspR': 'R',
|
|
21
|
+
'90°σspB': 'BB', '90°σspG': 'BG', '90°σspR': 'BR',
|
|
22
|
+
'Blue': 'B', 'Green': 'G', 'Red': 'R',
|
|
23
|
+
'B_Blue': 'BB', 'B_Green': 'BG', 'B_Red': 'BR',
|
|
24
|
+
'RH': 'RH'
|
|
25
|
+
})
|
|
21
26
|
|
|
22
|
-
|
|
27
|
+
_df = _df[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
|
|
23
28
|
|
|
24
|
-
|
|
25
|
-
def _QC(self, _df):
|
|
26
|
-
## remove negative value
|
|
27
|
-
_df = _df.mask((_df <= 0).copy())
|
|
29
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
28
30
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
_df_std = _df_1hr.std()
|
|
34
|
-
_df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
|
|
31
|
+
# QC data
|
|
32
|
+
def _QC(self, _df):
|
|
33
|
+
# remove negative value
|
|
34
|
+
_df = _df.mask((_df <= 0) | (_df > 2000)).copy()
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
# total scattering is larger than back scattering
|
|
37
|
+
_df = _df[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])]
|
|
37
38
|
|
|
38
|
-
|
|
39
|
+
# QC data in 1h
|
|
40
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|