AeroViz 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/dataProcess/Chemistry/_ocec.py +20 -7
- AeroViz/plot/__init__.py +1 -0
- AeroViz/plot/meteorology/meteorology.py +2 -0
- AeroViz/plot/optical/optical.py +1 -1
- AeroViz/plot/pie.py +14 -2
- AeroViz/plot/radar.py +184 -0
- AeroViz/plot/scatter.py +16 -7
- AeroViz/plot/templates/koschmieder.py +11 -8
- AeroViz/plot/timeseries/timeseries.py +0 -1
- AeroViz/rawDataReader/__init__.py +74 -67
- AeroViz/rawDataReader/config/supported_instruments.py +52 -19
- AeroViz/rawDataReader/core/__init__.py +129 -104
- AeroViz/rawDataReader/script/AE33.py +1 -1
- AeroViz/rawDataReader/script/AE43.py +1 -1
- AeroViz/rawDataReader/script/Aurora.py +1 -1
- AeroViz/rawDataReader/script/BC1054.py +1 -1
- AeroViz/rawDataReader/script/EPA.py +39 -0
- AeroViz/rawDataReader/script/GRIMM.py +1 -1
- AeroViz/rawDataReader/script/IGAC.py +6 -23
- AeroViz/rawDataReader/script/MA350.py +1 -1
- AeroViz/rawDataReader/script/Minion.py +102 -30
- AeroViz/rawDataReader/script/NEPH.py +1 -1
- AeroViz/rawDataReader/script/OCEC.py +1 -1
- AeroViz/rawDataReader/script/SMPS.py +1 -0
- AeroViz/rawDataReader/script/TEOM.py +2 -2
- AeroViz/rawDataReader/script/XRF.py +11 -0
- AeroViz/rawDataReader/script/__init__.py +2 -2
- {AeroViz-0.1.6.dist-info → AeroViz-0.1.7.dist-info}/METADATA +46 -24
- {AeroViz-0.1.6.dist-info → AeroViz-0.1.7.dist-info}/RECORD +32 -48
- AeroViz/process/__init__.py +0 -31
- AeroViz/process/core/DataProc.py +0 -19
- AeroViz/process/core/SizeDist.py +0 -90
- AeroViz/process/core/__init__.py +0 -4
- AeroViz/process/method/PyMieScatt_update.py +0 -567
- AeroViz/process/method/__init__.py +0 -2
- AeroViz/process/method/mie_theory.py +0 -260
- AeroViz/process/method/prop.py +0 -62
- AeroViz/process/script/AbstractDistCalc.py +0 -143
- AeroViz/process/script/Chemical.py +0 -177
- AeroViz/process/script/IMPACT.py +0 -49
- AeroViz/process/script/IMPROVE.py +0 -161
- AeroViz/process/script/Others.py +0 -65
- AeroViz/process/script/PSD.py +0 -103
- AeroViz/process/script/PSD_dry.py +0 -93
- AeroViz/process/script/__init__.py +0 -5
- AeroViz/process/script/retrieve_RI.py +0 -69
- AeroViz/rawDataReader/script/EPA_vertical.py +0 -46
- AeroViz/rawDataReader/script/Table.py +0 -27
- {AeroViz-0.1.6.dist-info → AeroViz-0.1.7.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.6.dist-info → AeroViz-0.1.7.dist-info}/WHEEL +0 -0
- {AeroViz-0.1.6.dist-info → AeroViz-0.1.7.dist-info}/top_level.txt +0 -0
|
@@ -95,6 +95,53 @@ meta = {
|
|
|
95
95
|
},
|
|
96
96
|
},
|
|
97
97
|
|
|
98
|
+
"XRF": {
|
|
99
|
+
"pattern": ["*.csv"],
|
|
100
|
+
"freq": "1h",
|
|
101
|
+
"deter_key": {
|
|
102
|
+
"Al": ["Al"],
|
|
103
|
+
"Si": ["Si"],
|
|
104
|
+
"P": ["P"],
|
|
105
|
+
"S": ["S"],
|
|
106
|
+
"Cl": ["Cl"],
|
|
107
|
+
"K": ["K"],
|
|
108
|
+
"Ca": ["Ca"],
|
|
109
|
+
"Ti": ["Ti"],
|
|
110
|
+
"V": ["V"],
|
|
111
|
+
"Cr": ["Cr"],
|
|
112
|
+
"Mn": ["Mn"],
|
|
113
|
+
"Fe": ["Fe"],
|
|
114
|
+
"Ni": ["Ni"],
|
|
115
|
+
"Cu": ["Cu"],
|
|
116
|
+
"Zn": ["Zn"],
|
|
117
|
+
"As": ["As"],
|
|
118
|
+
"Se": ["Se"],
|
|
119
|
+
"Br": ["Br"],
|
|
120
|
+
"Rb": ["Rb"],
|
|
121
|
+
"Sr": ["Sr"],
|
|
122
|
+
"Y": ["Y"],
|
|
123
|
+
"Zr": ["Zr"],
|
|
124
|
+
"Mo": ["Mo"],
|
|
125
|
+
"Ag": ["Ag"],
|
|
126
|
+
"Cd": ["Cd"],
|
|
127
|
+
"In": ["In"],
|
|
128
|
+
"Sn": ["Sn"],
|
|
129
|
+
"Sb": ["Sb"],
|
|
130
|
+
"Te": ["Te"],
|
|
131
|
+
"Cs": ["Cs"],
|
|
132
|
+
"Ba": ["Ba"],
|
|
133
|
+
"La": ["La"],
|
|
134
|
+
"Ce": ["Ce"],
|
|
135
|
+
"W": ["W"],
|
|
136
|
+
"Pt": ["Pt"],
|
|
137
|
+
"Au": ["Au"],
|
|
138
|
+
"Hg": ["Hg"],
|
|
139
|
+
"Tl": ["Tl"],
|
|
140
|
+
"Pb": ["Pb"],
|
|
141
|
+
"Bi": ["Bi"],
|
|
142
|
+
},
|
|
143
|
+
},
|
|
144
|
+
|
|
98
145
|
"VOC": {
|
|
99
146
|
"pattern": ["*.csv"],
|
|
100
147
|
"freq": "1h",
|
|
@@ -116,32 +163,18 @@ meta = {
|
|
|
116
163
|
"deter_key": None,
|
|
117
164
|
},
|
|
118
165
|
|
|
119
|
-
"
|
|
166
|
+
"EPA": {
|
|
120
167
|
"pattern": ["*.csv"],
|
|
121
168
|
"freq": "1h",
|
|
122
|
-
"deter_key":
|
|
123
|
-
},
|
|
124
|
-
|
|
125
|
-
"EPA_vertical": {
|
|
126
|
-
"pattern": ["*.csv"],
|
|
127
|
-
"freq": "1h",
|
|
128
|
-
"deter_key": None,
|
|
169
|
+
"deter_key": {"Items": ["all"]},
|
|
129
170
|
},
|
|
130
171
|
|
|
131
172
|
"Minion": {
|
|
132
|
-
"pattern": ["*.csv"],
|
|
173
|
+
"pattern": ["*.csv", "*.xlsx"],
|
|
133
174
|
"freq": "1h",
|
|
134
175
|
"deter_key": {
|
|
135
|
-
"Na
|
|
136
|
-
"
|
|
137
|
-
"K+": ["K+"],
|
|
138
|
-
"Mg2+": ["Mg2+"],
|
|
139
|
-
"Ca2+": ["Ca2+"],
|
|
140
|
-
"Cl-": ["Cl-"],
|
|
141
|
-
"NO2-": ["NO2-"],
|
|
142
|
-
"NO3-": ["NO3-"],
|
|
143
|
-
"SO42-": ["SO42-"],
|
|
144
|
-
"Main Salt (NH4+, NO3-, SO42-)": ["NO3-", "SO42-", "NH4+"],
|
|
176
|
+
"Main Salt (Na+, NH4+, Cl-, NO3-, SO42-)": ["Na+", "NH4+", "Cl-", "NO3-", "SO42-"],
|
|
177
|
+
"XRF (Al, Ti, V, Cr, Mn, Fe)": ["Al", "Ti", "V", "Cr", "Mn", "Fe"],
|
|
145
178
|
},
|
|
146
179
|
},
|
|
147
180
|
}
|
|
@@ -1,25 +1,21 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
-
import pickle as pkl
|
|
4
3
|
from abc import ABC, abstractmethod
|
|
5
|
-
from datetime import datetime
|
|
4
|
+
from datetime import datetime
|
|
6
5
|
from pathlib import Path
|
|
7
|
-
from typing import
|
|
6
|
+
from typing import Optional
|
|
8
7
|
|
|
9
8
|
import numpy as np
|
|
10
9
|
import pandas as pd
|
|
11
|
-
from pandas import DataFrame,
|
|
10
|
+
from pandas import DataFrame, concat, read_pickle
|
|
12
11
|
from rich.console import Console
|
|
13
12
|
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, TaskProgressColumn
|
|
14
13
|
|
|
15
|
-
from
|
|
14
|
+
from AeroViz.rawDataReader.config.supported_instruments import meta
|
|
16
15
|
|
|
17
16
|
__all__ = ['AbstractReader']
|
|
18
17
|
|
|
19
18
|
|
|
20
|
-
console = Console(force_terminal=True, color_system="auto")
|
|
21
|
-
|
|
22
|
-
|
|
23
19
|
class AbstractReader(ABC):
|
|
24
20
|
"""
|
|
25
21
|
Abstract class for reading raw data from different instruments. Each instrument should have a separate class that
|
|
@@ -34,9 +30,9 @@ class AbstractReader(ABC):
|
|
|
34
30
|
|
|
35
31
|
def __init__(self,
|
|
36
32
|
path: Path | str,
|
|
37
|
-
qc: bool = True,
|
|
38
|
-
csv_raw: bool = True,
|
|
39
33
|
reset: bool = False,
|
|
34
|
+
qc: bool = True,
|
|
35
|
+
qc_freq: Optional[str] = None,
|
|
40
36
|
rate: bool = True,
|
|
41
37
|
append_data: bool = False):
|
|
42
38
|
|
|
@@ -45,9 +41,9 @@ class AbstractReader(ABC):
|
|
|
45
41
|
self.logger = self._setup_logger()
|
|
46
42
|
|
|
47
43
|
self.reset = reset
|
|
48
|
-
self.rate = rate
|
|
49
44
|
self.qc = qc
|
|
50
|
-
self.
|
|
45
|
+
self.qc_freq = qc_freq
|
|
46
|
+
self.rate = rate
|
|
51
47
|
self.append = append_data and reset
|
|
52
48
|
|
|
53
49
|
self.pkl_nam = self.path / f'_read_{self.nam.lower()}.pkl'
|
|
@@ -57,15 +53,12 @@ class AbstractReader(ABC):
|
|
|
57
53
|
self.csv_out = self.path / f'output_{self.nam.lower()}.csv'
|
|
58
54
|
|
|
59
55
|
def __call__(self,
|
|
60
|
-
start:
|
|
61
|
-
end:
|
|
56
|
+
start: datetime,
|
|
57
|
+
end: datetime,
|
|
62
58
|
mean_freq: str = '1h',
|
|
63
59
|
csv_out: bool = True,
|
|
64
60
|
) -> DataFrame:
|
|
65
61
|
|
|
66
|
-
if start and end and end <= start:
|
|
67
|
-
raise ValueError(f"Invalid time range: start {start} is after end {end}")
|
|
68
|
-
|
|
69
62
|
data = self._run(start, end)
|
|
70
63
|
|
|
71
64
|
if data is not None:
|
|
@@ -81,15 +74,8 @@ class AbstractReader(ABC):
|
|
|
81
74
|
pass
|
|
82
75
|
|
|
83
76
|
@abstractmethod
|
|
84
|
-
def _QC(self, df: DataFrame):
|
|
85
|
-
return df
|
|
86
|
-
|
|
87
|
-
@staticmethod
|
|
88
|
-
def basic_QC(df: DataFrame):
|
|
89
|
-
df_ave, df_std = df.mean(), df.std()
|
|
90
|
-
df_lowb, df_highb = df < (df_ave - df_std * 1.5), df > (df_ave + df_std * 1.5)
|
|
91
|
-
|
|
92
|
-
return df.mask(df_lowb | df_highb).copy()
|
|
77
|
+
def _QC(self, df: DataFrame) -> DataFrame:
|
|
78
|
+
return self.n_sigma_QC(df)
|
|
93
79
|
|
|
94
80
|
def _setup_logger(self) -> logging.Logger:
|
|
95
81
|
logger = logging.getLogger(self.nam)
|
|
@@ -99,29 +85,26 @@ class AbstractReader(ABC):
|
|
|
99
85
|
logger.removeHandler(handler)
|
|
100
86
|
|
|
101
87
|
handler = logging.FileHandler(self.path / f'{self.nam}.log')
|
|
102
|
-
handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s'))
|
|
88
|
+
handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
|
|
103
89
|
logger.addHandler(handler)
|
|
104
90
|
return logger
|
|
105
91
|
|
|
106
|
-
def _rate_calculate(self,
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
_drop_how = 'any'
|
|
111
|
-
_the_size = len(_fout_raw.resample('1h').mean().index)
|
|
92
|
+
def _rate_calculate(self, raw_data, qc_data) -> None:
|
|
93
|
+
def __base_rate(raw_data, qc_data):
|
|
94
|
+
period_size = len(raw_data.resample('1h').mean().index)
|
|
112
95
|
|
|
113
96
|
for _nam, _key in self.meta['deter_key'].items():
|
|
114
|
-
if _key
|
|
115
|
-
_key, _drop_how = _fout_qc.keys(), 'all'
|
|
97
|
+
_key, _drop_how = (qc_data.keys(), 'all') if _key is ['all'] else (_key, 'any')
|
|
116
98
|
|
|
117
|
-
|
|
118
|
-
|
|
99
|
+
sample_size = len(raw_data[_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
|
|
100
|
+
qc_size = len(qc_data[_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
|
|
119
101
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
102
|
+
# validate rate calculation
|
|
103
|
+
if period_size < sample_size or sample_size < qc_size or period_size == 0 or sample_size == 0:
|
|
104
|
+
raise ValueError(f"Invalid sample sizes: period={period_size}, sample={sample_size}, QC={qc_size}")
|
|
105
|
+
|
|
106
|
+
_acq_rate = round((sample_size / period_size) * 100, 1)
|
|
107
|
+
_yid_rate = round((qc_size / sample_size) * 100, 1)
|
|
125
108
|
|
|
126
109
|
self.logger.info(f'{_nam}:')
|
|
127
110
|
self.logger.info(f"\tAcquisition rate: {_acq_rate}%")
|
|
@@ -132,39 +115,55 @@ class AbstractReader(ABC):
|
|
|
132
115
|
print(f'\t\tacquisition rate : \033[91m{_acq_rate}%\033[0m')
|
|
133
116
|
print(f'\t\tyield rate : \033[91m{_yid_rate}%\033[0m')
|
|
134
117
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
freq=self.meta['freq'])
|
|
141
|
-
_tm_index.name = 'time'
|
|
118
|
+
if self.meta['deter_key'] is not None:
|
|
119
|
+
# use qc_freq to calculate each period rate
|
|
120
|
+
if self.qc_freq is not None:
|
|
121
|
+
raw_data_grouped = raw_data.groupby(pd.Grouper(freq=self.qc_freq))
|
|
122
|
+
qc_data_grouped = qc_data.groupby(pd.Grouper(freq=self.qc_freq))
|
|
142
123
|
|
|
143
|
-
|
|
124
|
+
for (month, _sub_raw_data), (_, _sub_qc_data) in zip(raw_data_grouped, qc_data_grouped):
|
|
125
|
+
self.logger.info(
|
|
126
|
+
f"\tProcessing: {_sub_raw_data.index[0].strftime('%F')} to {_sub_raw_data.index[-1].strftime('%F')}")
|
|
127
|
+
print(
|
|
128
|
+
f"\n\tProcessing: {_sub_raw_data.index[0].strftime('%F')} to {_sub_raw_data.index[-1].strftime('%F')}")
|
|
144
129
|
|
|
145
|
-
|
|
146
|
-
@staticmethod
|
|
147
|
-
def _tmidx_process(_start, _end, _df):
|
|
148
|
-
_st, _ed = _df.index.sort_values()[[0, -1]]
|
|
149
|
-
_start, _end = to_datetime(_start) or _st, to_datetime(_end) or _ed
|
|
150
|
-
_idx = date_range(_start, _end, freq=_df.index.freq.copy())
|
|
151
|
-
_idx.name = 'time'
|
|
130
|
+
__base_rate(_sub_raw_data, _sub_qc_data)
|
|
152
131
|
|
|
153
|
-
|
|
132
|
+
else:
|
|
133
|
+
__base_rate(raw_data, qc_data)
|
|
154
134
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
135
|
+
def _timeIndex_process(self, _df, user_start=None, user_end=None, append_df=None):
|
|
136
|
+
"""
|
|
137
|
+
Process time index, resample data, extract specified time range, and optionally append new data.
|
|
138
|
+
|
|
139
|
+
:param _df: Input DataFrame with time index
|
|
140
|
+
:param user_start: Start of user-specified time range (optional)
|
|
141
|
+
:param user_end: End of user-specified time range (optional)
|
|
142
|
+
:param append_df: DataFrame to append (optional)
|
|
143
|
+
:return: Processed DataFrame
|
|
144
|
+
"""
|
|
145
|
+
# Round timestamps and remove duplicates
|
|
146
|
+
_df = _df.groupby(_df.index.round('1min')).first()
|
|
147
|
+
|
|
148
|
+
# Determine frequency
|
|
149
|
+
freq = _df.index.inferred_freq or self.meta['freq']
|
|
158
150
|
|
|
159
|
-
|
|
160
|
-
|
|
151
|
+
# Append new data if provided
|
|
152
|
+
if append_df is not None:
|
|
153
|
+
append_df.index = append_df.index.round('1min')
|
|
154
|
+
_df = pd.concat([append_df.dropna(how='all'), _df.dropna(how='all')])
|
|
155
|
+
_df = _df.loc[~_df.index.duplicated()]
|
|
161
156
|
|
|
162
|
-
|
|
163
|
-
|
|
157
|
+
# Determine time range
|
|
158
|
+
df_start, df_end = _df.index.sort_values()[[0, -1]]
|
|
164
159
|
|
|
165
|
-
|
|
160
|
+
# Create new time index
|
|
161
|
+
new_index = pd.date_range(user_start or df_start, user_end or df_end, freq=freq, name='time')
|
|
166
162
|
|
|
167
|
-
|
|
163
|
+
# Process data: convert to numeric, resample, and reindex
|
|
164
|
+
return (_df.apply(pd.to_numeric, errors='coerce')
|
|
165
|
+
.resample(freq).mean()
|
|
166
|
+
.reindex(new_index))
|
|
168
167
|
|
|
169
168
|
def _outlier_process(self, _df):
|
|
170
169
|
outlier_file = self.path / 'outlier.json'
|
|
@@ -180,31 +179,17 @@ class AbstractReader(ABC):
|
|
|
180
179
|
|
|
181
180
|
return _df
|
|
182
181
|
|
|
183
|
-
# save pickle file
|
|
184
182
|
def _save_data(self, raw_data: DataFrame, qc_data: DataFrame) -> None:
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
183
|
+
try:
|
|
184
|
+
raw_data.to_pickle(self.pkl_nam_raw)
|
|
185
|
+
raw_data.to_csv(self.csv_nam_raw)
|
|
188
186
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
raw_data.to_csv(self.csv_nam_raw)
|
|
187
|
+
if self.meta['deter_key'] is not None:
|
|
188
|
+
qc_data.to_pickle(self.pkl_nam)
|
|
189
|
+
qc_data.to_csv(self.csv_nam)
|
|
193
190
|
|
|
194
|
-
@staticmethod
|
|
195
|
-
def _safe_pickle_dump(file_path: Path, data: Any) -> None:
|
|
196
|
-
try:
|
|
197
|
-
with file_path.open('wb') as f:
|
|
198
|
-
pkl.dump(data, f, protocol=pkl.HIGHEST_PROTOCOL)
|
|
199
|
-
except PermissionError as e:
|
|
200
|
-
raise IOError(f"Unable to write to {file_path}. The file may be in use or you may not have permission: {e}")
|
|
201
191
|
except Exception as e:
|
|
202
|
-
raise IOError(f"Error
|
|
203
|
-
|
|
204
|
-
# read pickle file
|
|
205
|
-
def _read_pkl(self):
|
|
206
|
-
with self.pkl_nam.open('rb') as qc_data, self.pkl_nam_raw.open('rb') as raw_data:
|
|
207
|
-
return pkl.load(raw_data), pkl.load(qc_data)
|
|
192
|
+
raise IOError(f"Error saving data. {e}")
|
|
208
193
|
|
|
209
194
|
def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
|
|
210
195
|
files = [f
|
|
@@ -223,7 +208,7 @@ class AbstractReader(ABC):
|
|
|
223
208
|
TaskProgressColumn(),
|
|
224
209
|
TimeRemainingColumn(),
|
|
225
210
|
TextColumn("{task.fields[filename]}", style="yellow"),
|
|
226
|
-
console=
|
|
211
|
+
console=Console(force_terminal=True, color_system="auto"),
|
|
227
212
|
expand=False
|
|
228
213
|
) as progress:
|
|
229
214
|
task = progress.add_task(f"Reading {self.nam} files", total=len(files), filename="")
|
|
@@ -246,47 +231,87 @@ class AbstractReader(ABC):
|
|
|
246
231
|
if not df_list:
|
|
247
232
|
raise ValueError("All files were either empty or failed to read.")
|
|
248
233
|
|
|
249
|
-
raw_data =
|
|
234
|
+
raw_data = concat(df_list, axis=0).groupby(level=0).first()
|
|
235
|
+
|
|
236
|
+
raw_data = self._timeIndex_process(raw_data)
|
|
250
237
|
qc_data = self._QC(raw_data)
|
|
251
238
|
|
|
252
239
|
return raw_data, qc_data
|
|
253
240
|
|
|
254
|
-
def _run(self,
|
|
241
|
+
def _run(self, user_start, user_end):
|
|
255
242
|
# read pickle if pickle file exists and 'reset=False' or process raw data or append new data
|
|
256
243
|
if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset:
|
|
257
|
-
print(f"\n{
|
|
258
|
-
f"from {
|
|
244
|
+
print(f"\n{datetime.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mPICKLE\033[0m "
|
|
245
|
+
f"from {user_start} to {user_end}\n")
|
|
259
246
|
|
|
260
|
-
_f_raw_done, _f_qc_done = self.
|
|
247
|
+
_f_raw_done, _f_qc_done = read_pickle(self.pkl_nam_raw), read_pickle(self.pkl_nam)
|
|
261
248
|
|
|
262
249
|
if self.append:
|
|
263
|
-
print(f"Appending new data from {
|
|
250
|
+
print(f"Appending new data from {user_start} to {user_end}")
|
|
264
251
|
_f_raw_new, _f_qc_new = self._read_raw_files()
|
|
265
|
-
_f_raw = self.
|
|
266
|
-
_f_qc = self.
|
|
252
|
+
_f_raw = self._timeIndex_process(_f_raw_done, append_df=_f_raw_new)
|
|
253
|
+
_f_qc = self._timeIndex_process(_f_qc_done, append_df=_f_qc_new)
|
|
267
254
|
else:
|
|
268
255
|
_f_raw, _f_qc = _f_raw_done, _f_qc_done
|
|
256
|
+
return _f_qc if self.qc else _f_raw
|
|
269
257
|
|
|
270
258
|
else:
|
|
271
|
-
print(f"\n{
|
|
272
|
-
f"from {
|
|
259
|
+
print(f"\n{datetime.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mRAW DATA\033[0m "
|
|
260
|
+
f"from {user_start} to {user_end}\n")
|
|
261
|
+
|
|
273
262
|
_f_raw, _f_qc = self._read_raw_files()
|
|
274
263
|
|
|
275
264
|
# process time index
|
|
276
|
-
|
|
277
|
-
_f_qc, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc)
|
|
265
|
+
data_start, data_end = _f_raw.index.sort_values()[[0, -1]]
|
|
278
266
|
|
|
267
|
+
_f_raw = self._timeIndex_process(_f_raw, user_start, user_end)
|
|
268
|
+
_f_qc = self._timeIndex_process(_f_qc, user_start, user_end)
|
|
279
269
|
_f_qc = self._outlier_process(_f_qc)
|
|
280
270
|
|
|
281
271
|
# save
|
|
282
272
|
self._save_data(_f_raw, _f_qc)
|
|
283
273
|
|
|
284
274
|
self.logger.info(f"{'=' * 60}")
|
|
285
|
-
self.logger.info(f"Raw data time : {
|
|
286
|
-
self.logger.info(f"Output time : {
|
|
275
|
+
self.logger.info(f"Raw data time : {data_start} to {data_end}")
|
|
276
|
+
self.logger.info(f"Output time : {user_start} to {user_end}")
|
|
287
277
|
self.logger.info(f"{'-' * 60}")
|
|
288
278
|
|
|
289
279
|
if self.rate:
|
|
290
|
-
self._rate_calculate(_f_raw, _f_qc
|
|
280
|
+
self._rate_calculate(_f_raw, _f_qc)
|
|
291
281
|
|
|
292
282
|
return _f_qc if self.qc else _f_raw
|
|
283
|
+
|
|
284
|
+
@staticmethod
|
|
285
|
+
def reorder_dataframe_columns(df, order_lists, others_col=False):
|
|
286
|
+
new_order = []
|
|
287
|
+
|
|
288
|
+
for order in order_lists:
|
|
289
|
+
# 只添加存在於DataFrame中的欄位,且不重複添加
|
|
290
|
+
new_order.extend([col for col in order if col in df.columns and col not in new_order])
|
|
291
|
+
|
|
292
|
+
if others_col:
|
|
293
|
+
# 添加所有不在新順序列表中的原始欄位,保持它們的原始順序
|
|
294
|
+
new_order.extend([col for col in df.columns if col not in new_order])
|
|
295
|
+
|
|
296
|
+
return df[new_order]
|
|
297
|
+
|
|
298
|
+
@staticmethod
|
|
299
|
+
def n_sigma_QC(df: DataFrame, std_range: int = 5) -> DataFrame:
|
|
300
|
+
df_ave, df_std = df.mean(), df.std()
|
|
301
|
+
df_lowb, df_highb = df < (df_ave - df_std * std_range), df > (df_ave + df_std * std_range)
|
|
302
|
+
|
|
303
|
+
return df.mask(df_lowb | df_highb).copy()
|
|
304
|
+
|
|
305
|
+
# "四分位數範圍法"(Inter-quartile Range Method)
|
|
306
|
+
@staticmethod
|
|
307
|
+
def IQR_QC(df: DataFrame, log_dist=False) -> tuple[DataFrame, DataFrame]:
|
|
308
|
+
df = np.log10(df) if log_dist else df
|
|
309
|
+
|
|
310
|
+
_df_qua = df.quantile([.25, .75])
|
|
311
|
+
_df_q1, _df_q3 = _df_qua.loc[.25].copy(), _df_qua.loc[.75].copy()
|
|
312
|
+
_df_iqr = _df_q3 - _df_q1
|
|
313
|
+
|
|
314
|
+
_se = concat([_df_q1 - 1.5 * _df_iqr] * len(df), axis=1).T.set_index(df.index)
|
|
315
|
+
_le = concat([_df_q3 + 1.5 * _df_iqr] * len(df), axis=1).T.set_index(df.index)
|
|
316
|
+
|
|
317
|
+
return (10 ** _se, 10 ** _le) if log_dist else (_se, _le)
|
|
@@ -27,4 +27,4 @@ class Reader(AbstractReader):
|
|
|
27
27
|
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].mask((_df < 0).copy())
|
|
28
28
|
|
|
29
29
|
# QC data in 1h
|
|
30
|
-
return _df.resample('1h').apply(self.
|
|
30
|
+
return _df.resample('1h').apply(self.n_sigma_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -37,4 +37,4 @@ class Reader(AbstractReader):
|
|
|
37
37
|
_df = _df[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])]
|
|
38
38
|
|
|
39
39
|
# QC data in 1h
|
|
40
|
-
return _df.resample('1h').apply(self.
|
|
40
|
+
return _df.resample('1h').apply(self.n_sigma_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -39,4 +39,4 @@ class Reader(AbstractReader):
|
|
|
39
39
|
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']].mask((_df < 0).copy())
|
|
40
40
|
|
|
41
41
|
# QC data in 1h
|
|
42
|
-
return _df.resample('1h').apply(self.
|
|
42
|
+
return _df.resample('1h').apply(self.n_sigma_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from pandas import read_csv
|
|
2
|
+
|
|
3
|
+
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
|
+
|
|
5
|
+
desired_order1 = ['SO2', 'NO', 'NOx', 'NO2', 'CO', 'O3', 'THC', 'NMHC',
|
|
6
|
+
'CH4', 'PM10', 'PM2.5', 'PM1', 'WS', 'WD', 'AT', 'RH']
|
|
7
|
+
|
|
8
|
+
desired_order2 = ['Benzene', 'Toluene', 'EthylBenzene', 'm/p-Xylene', 'o-Xylene']
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Reader(AbstractReader):
|
|
12
|
+
nam = 'EPA'
|
|
13
|
+
|
|
14
|
+
def _raw_reader(self, file):
|
|
15
|
+
# 查詢小時值(測項).csv & 查詢小時值(直式).csv (有、無輸出有效值都可以)
|
|
16
|
+
df = read_csv(file, encoding='big5', encoding_errors='ignore', index_col=0, parse_dates=True,
|
|
17
|
+
on_bad_lines='skip')
|
|
18
|
+
|
|
19
|
+
if len(df.groupby('測站')) > 1:
|
|
20
|
+
raise ValueError(f'Multiple stations found in the file: {df['測站'].unique()}')
|
|
21
|
+
else:
|
|
22
|
+
if '測站' in df.columns:
|
|
23
|
+
df.drop(columns=['測站'], inplace=True)
|
|
24
|
+
|
|
25
|
+
if '測項' in df.columns:
|
|
26
|
+
df = df.pivot(columns='測項', values='資料')
|
|
27
|
+
|
|
28
|
+
df.rename(columns={'AMB_TEMP': 'AT', 'WIND_SPEED': 'WS', 'WIND_DIREC': 'WD'}, inplace=True)
|
|
29
|
+
df.index.name = 'Time'
|
|
30
|
+
|
|
31
|
+
# 如果沒有將無效值拿掉就輸出 請將包含 #、L 的字串替換成 # 或 _
|
|
32
|
+
df = df.replace(to_replace=r'\d*[#]\b', value='#', regex=True)
|
|
33
|
+
df = df.replace(to_replace=r'\d*[L]\b', value='_', regex=True)
|
|
34
|
+
|
|
35
|
+
# 欄位排序
|
|
36
|
+
return self.reorder_dataframe_columns(df, [desired_order1])
|
|
37
|
+
|
|
38
|
+
def _QC(self, _df):
|
|
39
|
+
return _df.resample('6h').apply(self.n_sigma_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
# read meteorological data from google sheet
|
|
2
2
|
|
|
3
3
|
|
|
4
|
-
import
|
|
5
|
-
from pandas import read_csv, concat, to_numeric
|
|
4
|
+
from pandas import read_csv, to_numeric
|
|
6
5
|
|
|
7
6
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
8
7
|
|
|
@@ -35,24 +34,8 @@ class Reader(AbstractReader):
|
|
|
35
34
|
'SO42-': 0.08,
|
|
36
35
|
}
|
|
37
36
|
|
|
38
|
-
# _mdl.update(self._oth_set.get('mdl', {}))
|
|
39
|
-
|
|
40
|
-
def _se_le(_df_, _log=False):
|
|
41
|
-
_df_ = np.log10(_df_) if _log else _df_
|
|
42
|
-
|
|
43
|
-
_df_qua = _df_.quantile([.25, .75])
|
|
44
|
-
_df_q1, _df_q3 = _df_qua.loc[.25].copy(), _df_qua.loc[.75].copy()
|
|
45
|
-
_df_iqr = _df_q3 - _df_q1
|
|
46
|
-
|
|
47
|
-
_se = concat([_df_q1 - 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
|
|
48
|
-
_le = concat([_df_q3 + 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
|
|
49
|
-
|
|
50
|
-
if _log:
|
|
51
|
-
return 10 ** _se, 10 ** _le
|
|
52
|
-
return _se, _le
|
|
53
|
-
|
|
54
37
|
_cation, _anion, _main = (['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'],
|
|
55
|
-
['Cl-', 'NO2-', 'NO3-', 'SO42-', ],
|
|
38
|
+
['Cl-', 'NO2-', 'NO3-', 'PO43-', 'SO42-', ],
|
|
56
39
|
['SO42-', 'NO3-', 'NH4+'])
|
|
57
40
|
|
|
58
41
|
_df_salt = _df[_mdl.keys()].copy()
|
|
@@ -68,23 +51,23 @@ class Reader(AbstractReader):
|
|
|
68
51
|
|
|
69
52
|
# calculate SE LE
|
|
70
53
|
# salt < LE
|
|
71
|
-
_se, _le =
|
|
54
|
+
_se, _le = self.IQR_QC(_df_salt, log_dist=True)
|
|
72
55
|
_df_salt = _df_salt.mask(_df_salt > _le).copy()
|
|
73
56
|
|
|
74
57
|
# C/A, A/C
|
|
75
58
|
_rat_CA = (_df_salt[_cation].sum(axis=1) / _df_salt[_anion].sum(axis=1)).to_frame()
|
|
76
59
|
_rat_AC = (1 / _rat_CA).copy()
|
|
77
60
|
|
|
78
|
-
_se, _le =
|
|
61
|
+
_se, _le = self.IQR_QC(_rat_CA, )
|
|
79
62
|
_cond_CA = (_rat_CA < _le) & (_rat_CA > 0)
|
|
80
63
|
|
|
81
|
-
_se, _le =
|
|
64
|
+
_se, _le = self.IQR_QC(_rat_AC, )
|
|
82
65
|
_cond_AC = (_rat_AC < _le) & (_rat_AC > 0)
|
|
83
66
|
|
|
84
67
|
_df_salt = _df_salt.where((_cond_CA * _cond_AC)[0]).copy()
|
|
85
68
|
|
|
86
69
|
# conc. of main salt > SE
|
|
87
|
-
_se, _le =
|
|
70
|
+
_se, _le = self.IQR_QC(_df_salt[_main], log_dist=True)
|
|
88
71
|
_df_salt[_main] = _df_salt[_main].mask(_df_salt[_main] < _se).copy()
|
|
89
72
|
|
|
90
73
|
return _df_salt.reindex(_df.index)
|
|
@@ -35,4 +35,4 @@ class Reader(AbstractReader):
|
|
|
35
35
|
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'AAE', 'BB']].mask((_df < 0).copy())
|
|
36
36
|
|
|
37
37
|
# QC data in 1h
|
|
38
|
-
return _df.resample('1h').apply(self.
|
|
38
|
+
return _df.resample('1h').apply(self.n_sigma_QC).resample(self.meta.get("freq")).mean()
|