AeroViz 0.1.9.0__py3-none-any.whl → 0.1.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/dataProcess/Chemistry/_ocec.py +5 -5
- AeroViz/dataProcess/SizeDistr/_size_distr.py +5 -1
- AeroViz/plot/hysplit/hysplit.py +11 -3
- AeroViz/rawDataReader/__init__.py +1 -1
- AeroViz/rawDataReader/config/supported_instruments.py +31 -41
- AeroViz/rawDataReader/core/__init__.py +28 -96
- AeroViz/rawDataReader/core/qc.py +184 -0
- AeroViz/rawDataReader/script/AE33.py +2 -2
- AeroViz/rawDataReader/script/AE43.py +2 -2
- AeroViz/rawDataReader/script/Aurora.py +2 -2
- AeroViz/rawDataReader/script/BC1054.py +3 -2
- AeroViz/rawDataReader/script/EPA.py +6 -4
- AeroViz/rawDataReader/script/GRIMM.py +0 -1
- AeroViz/rawDataReader/script/IGAC.py +3 -1
- AeroViz/rawDataReader/script/MA350.py +3 -2
- AeroViz/rawDataReader/script/Minion.py +133 -99
- AeroViz/rawDataReader/script/NEPH.py +2 -2
- AeroViz/rawDataReader/script/OCEC.py +4 -2
- AeroViz/rawDataReader/script/TEOM.py +2 -2
- {AeroViz-0.1.9.0.dist-info → AeroViz-0.1.9.2.dist-info}/METADATA +4 -1
- {AeroViz-0.1.9.0.dist-info → AeroViz-0.1.9.2.dist-info}/RECORD +25 -24
- /AeroViz/data/{240228_00.txt → hysplit_example_data.txt} +0 -0
- {AeroViz-0.1.9.0.dist-info → AeroViz-0.1.9.2.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.9.0.dist-info → AeroViz-0.1.9.2.dist-info}/WHEEL +0 -0
- {AeroViz-0.1.9.0.dist-info → AeroViz-0.1.9.2.dist-info}/top_level.txt +0 -0
|
@@ -114,8 +114,7 @@ def _basic(_lcres, _mass, _ocec_ratio, _ocec_ratio_month, _hr_lim, _range, _wiso
|
|
|
114
114
|
_out = {}
|
|
115
115
|
|
|
116
116
|
# OC1, OC2, OC3, OC4, PC
|
|
117
|
-
_df_bsc = _lcres[['
|
|
118
|
-
_df_bsc.rename(columns={'OC1_raw': 'OC1', 'OC2_raw': 'OC2', 'OC3_raw': 'OC3', 'OC4_raw': 'OC4'}, inplace=True)
|
|
117
|
+
_df_bsc = _lcres[['OC1', 'OC2', 'OC3', 'OC4', 'PC']].copy()
|
|
119
118
|
|
|
120
119
|
# SOC, POC, OC/EC
|
|
121
120
|
if _ocec_ratio is not None:
|
|
@@ -144,7 +143,8 @@ def _basic(_lcres, _mass, _ocec_ratio, _ocec_ratio_month, _hr_lim, _range, _wiso
|
|
|
144
143
|
_df_ratio = DataFrame(index=_df_bsc.index)
|
|
145
144
|
|
|
146
145
|
for _ky, _val in _df_bsc.items():
|
|
147
|
-
if 'OC/EC' in _ky:
|
|
146
|
+
if 'OC/EC' in _ky:
|
|
147
|
+
continue
|
|
148
148
|
_df_ratio[f'{_ky}/Thermal_OC'] = _val / _lcres['Thermal_OC']
|
|
149
149
|
_df_ratio[f'{_ky}/Optical_OC'] = _val / _lcres['Optical_OC']
|
|
150
150
|
|
|
@@ -159,14 +159,14 @@ def _basic(_lcres, _mass, _ocec_ratio, _ocec_ratio_month, _hr_lim, _range, _wiso
|
|
|
159
159
|
_df_ratio[f'Optical_EC/PM'] = _lcres['Optical_EC'] / _mass
|
|
160
160
|
|
|
161
161
|
# ratio status
|
|
162
|
-
_df_bsc = concat((_lcres, _df_bsc.copy()), axis=1)
|
|
162
|
+
_df_bsc = concat((_lcres.loc[:, :'Sample_Volume'], _df_bsc.copy()), axis=1)
|
|
163
163
|
|
|
164
164
|
for _ky, _df in _df_ratio.items():
|
|
165
165
|
_df_bsc[f'{_ky}_status'] = 'Normal'
|
|
166
166
|
_df_bsc[f'{_ky}_status'] = _df_bsc[f'{_ky}_status'].mask(_df > 1, 'Warning')
|
|
167
167
|
|
|
168
168
|
# out
|
|
169
|
-
_out['ratio'] = _df_ratio
|
|
170
169
|
_out['basic'] = _df_bsc
|
|
170
|
+
_out['ratio'] = _df_ratio
|
|
171
171
|
|
|
172
172
|
return _out
|
|
@@ -69,7 +69,11 @@ def _basic(df, hybrid, unit, bin_rg, input_type):
|
|
|
69
69
|
|
|
70
70
|
df_oth[f'total_{_tp_nam}_{_md_nam}'], df_oth[f'GMD_{_tp_nam}_{_md_nam}'], df_oth[
|
|
71
71
|
f'GSD_{_tp_nam}_{_md_nam}'] = _geometric_prop(_dia, _dt)
|
|
72
|
-
|
|
72
|
+
|
|
73
|
+
mask = _dt.notna().any(axis=1)
|
|
74
|
+
|
|
75
|
+
df_oth.loc[mask, f'mode_{_tp_nam}_{_md_nam}'] = _dt.loc[mask].idxmax(axis=1)
|
|
76
|
+
df_oth.loc[~mask, f'mode_{_tp_nam}_{_md_nam}'] = n.nan
|
|
73
77
|
|
|
74
78
|
## out
|
|
75
79
|
out_dic['other'] = df_oth
|
AeroViz/plot/hysplit/hysplit.py
CHANGED
|
@@ -7,21 +7,29 @@ import pandas as pd
|
|
|
7
7
|
|
|
8
8
|
from AeroViz.plot.utils import set_figure
|
|
9
9
|
|
|
10
|
-
#
|
|
10
|
+
# Hybrid Single-Particle Lagrangian Integrated Trajectory (HYSPLIT) model
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
__all__ = ['hysplit']
|
|
14
14
|
|
|
15
15
|
# 設置默認文件路徑
|
|
16
|
-
DEFAULT_FILE = Path(__file__).parent.parent.parent / 'data' / '
|
|
16
|
+
DEFAULT_FILE = Path(__file__).parent.parent.parent / 'data' / 'hysplit_example_data.txt'
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def read_hysplit_data(file: Path):
|
|
20
20
|
data = pd.read_csv(file, skiprows=8, sep=r'\s+', names=range(0, 12), engine='python')
|
|
21
21
|
data = data.reset_index(drop=False)
|
|
22
|
-
data.columns = ['category', 'name', '
|
|
22
|
+
data.columns = ['category', 'name', 'year', 'month', 'day', 'hour', 'minute', 'count', 'backward', 'lat', 'lon',
|
|
23
23
|
'height', 'pressure']
|
|
24
24
|
|
|
25
|
+
time_cols = ['year', 'month', 'day', 'hour', 'minute']
|
|
26
|
+
|
|
27
|
+
data['time'] = pd.to_datetime(data[time_cols].astype(str).agg(''.join, axis=1), format='%y%m%d%H%M')
|
|
28
|
+
|
|
29
|
+
data = data.drop(columns=time_cols)
|
|
30
|
+
|
|
31
|
+
data = data[['time'] + [col for col in data.columns if col != 'time']]
|
|
32
|
+
|
|
25
33
|
return data
|
|
26
34
|
|
|
27
35
|
|
|
@@ -74,7 +74,7 @@ def RawDataReader(instrument_name: str,
|
|
|
74
74
|
if not isinstance(path, Path):
|
|
75
75
|
path = Path(path)
|
|
76
76
|
if not path.exists() or not path.is_dir():
|
|
77
|
-
raise
|
|
77
|
+
raise FileNotFoundError(f"The specified path '{path}' does not exist or is not a directory.")
|
|
78
78
|
|
|
79
79
|
# Validate the QC frequency
|
|
80
80
|
if qc_freq is not None:
|
|
@@ -75,6 +75,8 @@ meta = {
|
|
|
75
75
|
"Thermal EC": ["Thermal_EC"],
|
|
76
76
|
"Optical OC": ["Optical_OC"],
|
|
77
77
|
"Optical EC": ["Optical_EC"],
|
|
78
|
+
"Thermal OC & EC": ["Thermal_OC", "Thermal_EC"],
|
|
79
|
+
"Optical OC & EC": ["Optical_OC", "Optical_EC"],
|
|
78
80
|
},
|
|
79
81
|
},
|
|
80
82
|
|
|
@@ -93,53 +95,41 @@ meta = {
|
|
|
93
95
|
"SO42-": ["SO42-"],
|
|
94
96
|
"Main Salt (NH4+, NO3-, SO42-)": ["NO3-", "SO42-", "NH4+"],
|
|
95
97
|
},
|
|
98
|
+
# https://www.yangyao-env.com/web/product/product_in2.jsp?pd_id=PD1640151884502
|
|
99
|
+
|
|
100
|
+
# HF: 0.08, F-: 0.08, PO43-: None is not measured
|
|
101
|
+
"MDL": {
|
|
102
|
+
'HF': None, 'HCl': 0.05, 'HNO2': 0.01, 'HNO3': 0.05, 'G-SO2': 0.05, 'NH3': 0.1,
|
|
103
|
+
'Na+': 0.05, 'NH4+': 0.08, 'K+': 0.08, 'Mg2+': 0.05, 'Ca2+': 0.05,
|
|
104
|
+
'F-': None, 'Cl-': 0.05, 'NO2-': 0.05, 'NO3-': 0.01, 'PO43-': None, 'SO42-': 0.05,
|
|
105
|
+
},
|
|
106
|
+
|
|
107
|
+
"MR": {
|
|
108
|
+
'HF': 200, 'HCl': 200, 'HNO2': 200, 'HNO3': 200, 'G-SO2': 200, 'NH3': 300,
|
|
109
|
+
'Na+': 300, 'NH4+': 300, 'K+': 300, 'Mg2+': 300, 'Ca2+': 300,
|
|
110
|
+
'F-': 300, 'Cl-': 300, 'NO2-': 300, 'NO3-': 300, 'PO43-': None, 'SO42-': 300,
|
|
111
|
+
}
|
|
96
112
|
},
|
|
97
113
|
|
|
98
114
|
"XRF": {
|
|
99
115
|
"pattern": ["*.csv"],
|
|
100
116
|
"freq": "1h",
|
|
101
117
|
"deter_key": {
|
|
102
|
-
"Al": ["Al"],
|
|
103
|
-
|
|
104
|
-
"P": ["P"],
|
|
105
|
-
"S": ["S"],
|
|
106
|
-
"Cl": ["Cl"],
|
|
107
|
-
"K": ["K"],
|
|
108
|
-
"Ca": ["Ca"],
|
|
109
|
-
"Ti": ["Ti"],
|
|
110
|
-
"V": ["V"],
|
|
111
|
-
"Cr": ["Cr"],
|
|
112
|
-
"Mn": ["Mn"],
|
|
113
|
-
"Fe": ["Fe"],
|
|
114
|
-
"Ni": ["Ni"],
|
|
115
|
-
"Cu": ["Cu"],
|
|
116
|
-
"Zn": ["Zn"],
|
|
117
|
-
"As": ["As"],
|
|
118
|
-
"Se": ["Se"],
|
|
119
|
-
"Br": ["Br"],
|
|
120
|
-
"Rb": ["Rb"],
|
|
121
|
-
"Sr": ["Sr"],
|
|
122
|
-
"Y": ["Y"],
|
|
123
|
-
"Zr": ["Zr"],
|
|
124
|
-
"Mo": ["Mo"],
|
|
125
|
-
"Ag": ["Ag"],
|
|
126
|
-
"Cd": ["Cd"],
|
|
127
|
-
"In": ["In"],
|
|
128
|
-
"Sn": ["Sn"],
|
|
129
|
-
"Sb": ["Sb"],
|
|
130
|
-
"Te": ["Te"],
|
|
131
|
-
"Cs": ["Cs"],
|
|
132
|
-
"Ba": ["Ba"],
|
|
133
|
-
"La": ["La"],
|
|
134
|
-
"Ce": ["Ce"],
|
|
135
|
-
"W": ["W"],
|
|
136
|
-
"Pt": ["Pt"],
|
|
137
|
-
"Au": ["Au"],
|
|
138
|
-
"Hg": ["Hg"],
|
|
139
|
-
"Tl": ["Tl"],
|
|
140
|
-
"Pb": ["Pb"],
|
|
141
|
-
"Bi": ["Bi"],
|
|
118
|
+
"Several trace element (Al, Si, Ti, V, Cr, Mn, Fe)": ["Al", "Si", "Ti", "V", "Cr", "Mn", "Fe"],
|
|
119
|
+
|
|
142
120
|
},
|
|
121
|
+
# base on Xact 625i Minimum Decision Limit (MDL) for XRF in ng/m3, 60 min sample time
|
|
122
|
+
"MDL": {
|
|
123
|
+
'Al': 100, 'Si': 18, 'P': 5.2, 'S': 3.2, 'Cl': 1.7,
|
|
124
|
+
'K': 1.2, 'Ca': 0.3, 'Ti': 1.6, 'V': 0.12, 'Cr': 0.12,
|
|
125
|
+
'Mn': 0.14, 'Fe': 0.17, 'Co': 0.14, 'Ni': 0.096, 'Cu': 0.079,
|
|
126
|
+
'Zn': 0.067, 'Ga': 0.059, 'Ge': 0.056, 'As': 0.063, 'Se': 0.081,
|
|
127
|
+
'Br': 0.1, 'Rb': 0.19, 'Sr': 0.22, 'Y': 0.28, 'Zr': 0.33,
|
|
128
|
+
'Nb': 0.41, 'Mo': 0.48, 'Pd': 2.2, 'Ag': 1.9, 'Cd': 2.5,
|
|
129
|
+
'In': 3.1, 'Sn': 4.1, 'Sb': 5.2, 'Te': 0.6, 'Cs': 0.37,
|
|
130
|
+
'Ba': 0.39, 'La': 0.36, 'Ce': 0.3, 'W': 0.0001, 'Pt': 0.12,
|
|
131
|
+
'Au': 0.1, 'Hg': 0.12, 'Tl': 0.12, 'Pb': 0.13, 'Bi': 0.13
|
|
132
|
+
}
|
|
143
133
|
},
|
|
144
134
|
|
|
145
135
|
"VOC": {
|
|
@@ -174,7 +164,7 @@ meta = {
|
|
|
174
164
|
"freq": "1h",
|
|
175
165
|
"deter_key": {
|
|
176
166
|
"Main Salt (Na+, NH4+, Cl-, NO3-, SO42-)": ["Na+", "NH4+", "Cl-", "NO3-", "SO42-"],
|
|
177
|
-
"
|
|
167
|
+
"Several trace element (Al, Ti, V, Cr, Mn, Fe)": ["Al", "Ti", "V", "Cr", "Mn", "Fe"],
|
|
178
168
|
},
|
|
179
169
|
},
|
|
180
170
|
}
|
|
@@ -7,11 +7,12 @@ from typing import Optional
|
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import pandas as pd
|
|
10
|
-
from pandas import DataFrame, concat, read_pickle
|
|
10
|
+
from pandas import DataFrame, concat, read_pickle, to_numeric
|
|
11
11
|
from rich.console import Console
|
|
12
12
|
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, TaskProgressColumn
|
|
13
13
|
|
|
14
14
|
from AeroViz.rawDataReader.config.supported_instruments import meta
|
|
15
|
+
from AeroViz.rawDataReader.core.qc import DataQualityControl
|
|
15
16
|
|
|
16
17
|
__all__ = ['AbstractReader']
|
|
17
18
|
|
|
@@ -75,18 +76,20 @@ class AbstractReader(ABC):
|
|
|
75
76
|
|
|
76
77
|
@abstractmethod
|
|
77
78
|
def _QC(self, df: DataFrame) -> DataFrame:
|
|
78
|
-
return
|
|
79
|
+
return df
|
|
79
80
|
|
|
80
81
|
def _setup_logger(self) -> logging.Logger:
|
|
81
82
|
logger = logging.getLogger(self.nam)
|
|
82
83
|
logger.setLevel(logging.INFO)
|
|
83
84
|
|
|
84
85
|
for handler in logger.handlers[:]:
|
|
86
|
+
handler.close()
|
|
85
87
|
logger.removeHandler(handler)
|
|
86
88
|
|
|
87
89
|
handler = logging.FileHandler(self.path / f'{self.nam}.log')
|
|
88
90
|
handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
|
|
89
91
|
logger.addHandler(handler)
|
|
92
|
+
|
|
90
93
|
return logger
|
|
91
94
|
|
|
92
95
|
def _rate_calculate(self, raw_data, qc_data) -> None:
|
|
@@ -94,18 +97,25 @@ class AbstractReader(ABC):
|
|
|
94
97
|
period_size = len(raw_data.resample('1h').mean().index)
|
|
95
98
|
|
|
96
99
|
for _nam, _key in self.meta['deter_key'].items():
|
|
97
|
-
|
|
100
|
+
_columns_key, _drop_how = (qc_data.keys(), 'all') if _key == ['all'] else (_key, 'any')
|
|
98
101
|
|
|
99
|
-
sample_size = len(raw_data[
|
|
100
|
-
qc_size = len(qc_data[
|
|
102
|
+
sample_size = len(raw_data[_columns_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
|
|
103
|
+
qc_size = len(qc_data[_columns_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
|
|
101
104
|
|
|
102
105
|
# validate rate calculation
|
|
103
|
-
if period_size
|
|
104
|
-
|
|
106
|
+
if period_size == 0 or sample_size == 0 or qc_size == 0:
|
|
107
|
+
print(f'\t\t\033[91m No data for this period... skipping\033[0m')
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
if period_size < sample_size or sample_size < qc_size:
|
|
111
|
+
print(
|
|
112
|
+
f'\t\tInvalid size relationship: period={period_size}, sample={sample_size}, QC={qc_size}... skipping')
|
|
113
|
+
continue
|
|
105
114
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
115
|
+
else:
|
|
116
|
+
_acq_rate = round((sample_size / period_size) * 100, 1)
|
|
117
|
+
_yid_rate = round((qc_size / sample_size) * 100, 1)
|
|
118
|
+
_OEE_rate = round((qc_size / period_size) * 100, 1)
|
|
109
119
|
|
|
110
120
|
self.logger.info(f'{_nam}:')
|
|
111
121
|
self.logger.info(f"\tAcquisition rate: {_acq_rate}%")
|
|
@@ -114,8 +124,8 @@ class AbstractReader(ABC):
|
|
|
114
124
|
self.logger.info(f"{'=' * 60}")
|
|
115
125
|
|
|
116
126
|
print(f'\n\t{_nam} : ')
|
|
117
|
-
print(f'\t\tacquisition rate | yield rate
|
|
118
|
-
f'
|
|
127
|
+
print(f'\t\tacquisition rate | yield rate -> OEE rate : '
|
|
128
|
+
f'\033[91m{_acq_rate}% | {_yid_rate}% -> {_OEE_rate}%\033[0m')
|
|
119
129
|
|
|
120
130
|
if self.meta['deter_key'] is not None:
|
|
121
131
|
# use qc_freq to calculate each period rate
|
|
@@ -163,9 +173,7 @@ class AbstractReader(ABC):
|
|
|
163
173
|
new_index = pd.date_range(user_start or df_start, user_end or df_end, freq=freq, name='time')
|
|
164
174
|
|
|
165
175
|
# Process data: convert to numeric, resample, and reindex
|
|
166
|
-
return
|
|
167
|
-
.resample(freq).mean()
|
|
168
|
-
.reindex(new_index))
|
|
176
|
+
return _df.reindex(new_index)
|
|
169
177
|
|
|
170
178
|
def _outlier_process(self, _df):
|
|
171
179
|
outlier_file = self.path / 'outlier.json'
|
|
@@ -235,8 +243,8 @@ class AbstractReader(ABC):
|
|
|
235
243
|
|
|
236
244
|
raw_data = concat(df_list, axis=0).groupby(level=0).first()
|
|
237
245
|
|
|
238
|
-
raw_data = self._timeIndex_process(raw_data)
|
|
239
|
-
qc_data = self._QC(raw_data)
|
|
246
|
+
raw_data = self._timeIndex_process(raw_data).apply(to_numeric, errors='coerce').copy(deep=True)
|
|
247
|
+
qc_data = self._QC(raw_data).apply(to_numeric, errors='coerce').copy(deep=True)
|
|
240
248
|
|
|
241
249
|
return raw_data, qc_data
|
|
242
250
|
|
|
@@ -279,6 +287,8 @@ class AbstractReader(ABC):
|
|
|
279
287
|
self.logger.info(f"{'-' * 60}")
|
|
280
288
|
|
|
281
289
|
if self.rate:
|
|
290
|
+
_f_raw = _f_raw.apply(to_numeric, errors='coerce')
|
|
291
|
+
_f_qc = _f_qc.apply(to_numeric, errors='coerce')
|
|
282
292
|
self._rate_calculate(_f_raw, _f_qc)
|
|
283
293
|
|
|
284
294
|
return _f_qc if self.qc else _f_raw
|
|
@@ -297,84 +307,6 @@ class AbstractReader(ABC):
|
|
|
297
307
|
|
|
298
308
|
return df[new_order]
|
|
299
309
|
|
|
300
|
-
@staticmethod
|
|
301
|
-
def n_sigma_QC(df: pd.DataFrame, std_range: int = 5) -> pd.DataFrame:
|
|
302
|
-
# 確保輸入是DataFrame
|
|
303
|
-
df = df.to_frame() if isinstance(df, pd.Series) else df
|
|
304
|
-
|
|
305
|
-
df_ave = df.mean()
|
|
306
|
-
df_std = df.std()
|
|
307
|
-
|
|
308
|
-
lower_bound = df < (df_ave - df_std * std_range)
|
|
309
|
-
upper_bound = df > (df_ave + df_std * std_range)
|
|
310
|
-
|
|
311
|
-
return df.mask(lower_bound | upper_bound)
|
|
312
|
-
|
|
313
|
-
@staticmethod
|
|
314
|
-
def IQR_QC(df: pd.DataFrame, log_dist=False) -> pd.DataFrame:
|
|
315
|
-
# 確保輸入是DataFrame
|
|
316
|
-
df = df.to_frame() if isinstance(df, pd.Series) else df
|
|
317
|
-
|
|
318
|
-
df_transformed = np.log10(df) if log_dist else df
|
|
319
|
-
|
|
320
|
-
_df_q1 = df_transformed.quantile(0.25)
|
|
321
|
-
_df_q3 = df_transformed.quantile(0.75)
|
|
322
|
-
|
|
323
|
-
_df_iqr = _df_q3 - _df_q1
|
|
324
|
-
|
|
325
|
-
# Calculate lower and upper bounds
|
|
326
|
-
lower_bound = df_transformed < (_df_q1 - 1.5 * _df_iqr)
|
|
327
|
-
upper_bound = df_transformed > (_df_q3 + 1.5 * _df_iqr)
|
|
328
|
-
|
|
329
|
-
# Apply the filter to the original dataframe
|
|
330
|
-
return df.mask(lower_bound | upper_bound)
|
|
331
|
-
|
|
332
|
-
@staticmethod
|
|
333
|
-
def rolling_IQR_QC(df: pd.DataFrame, window_size=24, log_dist=False) -> pd.DataFrame:
|
|
334
|
-
df = df.to_frame() if isinstance(df, pd.Series) else df
|
|
335
|
-
df_transformed = np.log10(df) if log_dist else df
|
|
336
|
-
|
|
337
|
-
def iqr_filter(x):
|
|
338
|
-
q1, q3 = x.quantile(0.25), x.quantile(0.75)
|
|
339
|
-
iqr = q3 - q1
|
|
340
|
-
lower, upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
|
|
341
|
-
return (x >= lower) & (x <= upper)
|
|
342
|
-
|
|
343
|
-
mask = df_transformed.rolling(window=window_size, center=True, min_periods=1).apply(iqr_filter)
|
|
344
|
-
return df.where(mask, np.nan)
|
|
345
|
-
|
|
346
310
|
@staticmethod
|
|
347
311
|
def time_aware_IQR_QC(df: pd.DataFrame, time_window='1D', log_dist=False) -> pd.DataFrame:
|
|
348
|
-
|
|
349
|
-
df_transformed = np.log10(df) if log_dist else df
|
|
350
|
-
|
|
351
|
-
def iqr_filter(group):
|
|
352
|
-
q1, q3 = group.quantile(0.25), group.quantile(0.75)
|
|
353
|
-
iqr = q3 - q1
|
|
354
|
-
lower, upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
|
|
355
|
-
return (group >= lower) & (group <= upper)
|
|
356
|
-
|
|
357
|
-
mask = df_transformed.groupby(pd.Grouper(freq=time_window)).transform(iqr_filter)
|
|
358
|
-
return df.where(mask, np.nan)
|
|
359
|
-
|
|
360
|
-
@staticmethod
|
|
361
|
-
def mad_iqr_hybrid_QC(df: pd.DataFrame, mad_threshold=3.5, log_dist=False) -> pd.DataFrame:
|
|
362
|
-
df = df.to_frame() if isinstance(df, pd.Series) else df
|
|
363
|
-
df_transformed = np.log10(df) if log_dist else df
|
|
364
|
-
|
|
365
|
-
# IQR 方法
|
|
366
|
-
q1, q3 = df_transformed.quantile(0.25), df_transformed.quantile(0.75)
|
|
367
|
-
iqr = q3 - q1
|
|
368
|
-
iqr_lower, iqr_upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
|
|
369
|
-
|
|
370
|
-
# MAD 方法
|
|
371
|
-
median = df_transformed.median()
|
|
372
|
-
mad = (df_transformed - median).abs().median()
|
|
373
|
-
mad_lower, mad_upper = median - mad_threshold * mad, median + mad_threshold * mad
|
|
374
|
-
|
|
375
|
-
# 结合两种方法
|
|
376
|
-
lower = np.maximum(iqr_lower, mad_lower)
|
|
377
|
-
upper = np.minimum(iqr_upper, mad_upper)
|
|
378
|
-
|
|
379
|
-
mask = (df_transformed >= lower) & (df_transformed <= upper)
|
|
380
|
-
return df.where(mask, np.nan)
|
|
312
|
+
return DataQualityControl().time_aware_iqr(df, time_window=time_window, log_dist=log_dist)
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class DataQualityControl:
|
|
6
|
+
"""A class providing various methods for data quality control and outlier detection"""
|
|
7
|
+
|
|
8
|
+
@staticmethod
|
|
9
|
+
def _ensure_dataframe(df: pd.DataFrame | pd.Series) -> pd.DataFrame:
|
|
10
|
+
"""Ensure input data is in DataFrame format"""
|
|
11
|
+
return df.to_frame() if isinstance(df, pd.Series) else df
|
|
12
|
+
|
|
13
|
+
@staticmethod
|
|
14
|
+
def _transform_if_log(df: pd.DataFrame, log_dist: bool) -> pd.DataFrame:
|
|
15
|
+
"""Transform data to log scale if required"""
|
|
16
|
+
return np.log10(df) if log_dist else df
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def n_sigma(cls, df: pd.DataFrame, std_range: int = 5) -> pd.DataFrame:
|
|
20
|
+
"""
|
|
21
|
+
Detect outliers using n-sigma method
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
df : pd.DataFrame
|
|
26
|
+
Input data
|
|
27
|
+
std_range : int, default=5
|
|
28
|
+
Number of standard deviations to use as threshold
|
|
29
|
+
|
|
30
|
+
Returns
|
|
31
|
+
-------
|
|
32
|
+
pd.DataFrame
|
|
33
|
+
Cleaned DataFrame with outliers masked as NaN
|
|
34
|
+
"""
|
|
35
|
+
df = cls._ensure_dataframe(df)
|
|
36
|
+
df_ave = df.mean()
|
|
37
|
+
df_std = df.std()
|
|
38
|
+
|
|
39
|
+
lower_bound = df < (df_ave - df_std * std_range)
|
|
40
|
+
upper_bound = df > (df_ave + df_std * std_range)
|
|
41
|
+
|
|
42
|
+
return df.mask(lower_bound | upper_bound)
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def iqr(cls, df: pd.DataFrame, log_dist: bool = False) -> pd.DataFrame:
|
|
46
|
+
"""
|
|
47
|
+
Detect outliers using Interquartile Range (IQR) method
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
df : pd.DataFrame
|
|
52
|
+
Input data
|
|
53
|
+
log_dist : bool, default=False
|
|
54
|
+
Whether to apply log transformation to data
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
pd.DataFrame
|
|
59
|
+
Cleaned DataFrame with outliers masked as NaN
|
|
60
|
+
"""
|
|
61
|
+
df = cls._ensure_dataframe(df)
|
|
62
|
+
df_transformed = cls._transform_if_log(df, log_dist)
|
|
63
|
+
|
|
64
|
+
q1 = df_transformed.quantile(0.25)
|
|
65
|
+
q3 = df_transformed.quantile(0.75)
|
|
66
|
+
iqr = q3 - q1
|
|
67
|
+
|
|
68
|
+
lower_bound = df_transformed < (q1 - 1.5 * iqr)
|
|
69
|
+
upper_bound = df_transformed > (q3 + 1.5 * iqr)
|
|
70
|
+
|
|
71
|
+
return df.mask(lower_bound | upper_bound)
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def rolling_iqr(cls, df: pd.DataFrame, window_size: int = 24,
|
|
75
|
+
log_dist: bool = False) -> pd.DataFrame:
|
|
76
|
+
"""
|
|
77
|
+
Detect outliers using rolling window IQR method
|
|
78
|
+
|
|
79
|
+
Parameters
|
|
80
|
+
----------
|
|
81
|
+
df : pd.DataFrame
|
|
82
|
+
Input data
|
|
83
|
+
window_size : int, default=24
|
|
84
|
+
Size of the rolling window
|
|
85
|
+
log_dist : bool, default=False
|
|
86
|
+
Whether to apply log transformation to data
|
|
87
|
+
|
|
88
|
+
Returns
|
|
89
|
+
-------
|
|
90
|
+
pd.DataFrame
|
|
91
|
+
Cleaned DataFrame with outliers masked as NaN
|
|
92
|
+
"""
|
|
93
|
+
df = cls._ensure_dataframe(df)
|
|
94
|
+
df_transformed = cls._transform_if_log(df, log_dist)
|
|
95
|
+
|
|
96
|
+
def iqr_filter(x):
|
|
97
|
+
q1, q3 = x.quantile(0.25), x.quantile(0.75)
|
|
98
|
+
iqr = q3 - q1
|
|
99
|
+
lower, upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
|
|
100
|
+
return (x >= lower) & (x <= upper)
|
|
101
|
+
|
|
102
|
+
mask = df_transformed.rolling(
|
|
103
|
+
window=window_size,
|
|
104
|
+
center=True,
|
|
105
|
+
min_periods=1
|
|
106
|
+
).apply(iqr_filter)
|
|
107
|
+
|
|
108
|
+
return df.where(mask, np.nan)
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def time_aware_iqr(cls, df: pd.DataFrame, time_window: str = '1D',
|
|
112
|
+
log_dist: bool = False) -> pd.DataFrame:
|
|
113
|
+
"""
|
|
114
|
+
Detect outliers using time-aware IQR method
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
df : pd.DataFrame
|
|
119
|
+
Input data
|
|
120
|
+
time_window : str, default='1D'
|
|
121
|
+
Time window size (e.g., '1D' for one day)
|
|
122
|
+
log_dist : bool, default=False
|
|
123
|
+
Whether to apply log transformation to data
|
|
124
|
+
|
|
125
|
+
Returns
|
|
126
|
+
-------
|
|
127
|
+
pd.DataFrame
|
|
128
|
+
Cleaned DataFrame with outliers masked as NaN
|
|
129
|
+
"""
|
|
130
|
+
df = cls._ensure_dataframe(df)
|
|
131
|
+
df_transformed = cls._transform_if_log(df, log_dist)
|
|
132
|
+
|
|
133
|
+
def iqr_filter(group):
|
|
134
|
+
q1, q3 = group.quantile(0.25), group.quantile(0.75)
|
|
135
|
+
iqr = q3 - q1
|
|
136
|
+
lower, upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
|
|
137
|
+
return (group >= lower) & (group <= upper)
|
|
138
|
+
|
|
139
|
+
mask = df_transformed.groupby(
|
|
140
|
+
pd.Grouper(freq=time_window)
|
|
141
|
+
).transform(iqr_filter)
|
|
142
|
+
|
|
143
|
+
return df.where(mask, np.nan)
|
|
144
|
+
|
|
145
|
+
@classmethod
|
|
146
|
+
def mad_iqr_hybrid(cls, df: pd.DataFrame, mad_threshold: float = 3.5,
|
|
147
|
+
log_dist: bool = False) -> pd.DataFrame:
|
|
148
|
+
"""
|
|
149
|
+
Detect outliers using a hybrid of MAD and IQR methods
|
|
150
|
+
|
|
151
|
+
Parameters
|
|
152
|
+
----------
|
|
153
|
+
df : pd.DataFrame
|
|
154
|
+
Input data
|
|
155
|
+
mad_threshold : float, default=3.5
|
|
156
|
+
Threshold for MAD method
|
|
157
|
+
log_dist : bool, default=False
|
|
158
|
+
Whether to apply log transformation to data
|
|
159
|
+
|
|
160
|
+
Returns
|
|
161
|
+
-------
|
|
162
|
+
pd.DataFrame
|
|
163
|
+
Cleaned DataFrame with outliers masked as NaN
|
|
164
|
+
"""
|
|
165
|
+
df = cls._ensure_dataframe(df)
|
|
166
|
+
df_transformed = cls._transform_if_log(df, log_dist)
|
|
167
|
+
|
|
168
|
+
# IQR method
|
|
169
|
+
q1, q3 = df_transformed.quantile(0.25), df_transformed.quantile(0.75)
|
|
170
|
+
iqr = q3 - q1
|
|
171
|
+
iqr_lower, iqr_upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
|
|
172
|
+
|
|
173
|
+
# MAD method
|
|
174
|
+
median = df_transformed.median()
|
|
175
|
+
mad = (df_transformed - median).abs().median()
|
|
176
|
+
mad_lower = median - mad_threshold * mad
|
|
177
|
+
mad_upper = median + mad_threshold * mad
|
|
178
|
+
|
|
179
|
+
# Combine both methods
|
|
180
|
+
lower = np.maximum(iqr_lower, mad_lower)
|
|
181
|
+
upper = np.minimum(iqr_upper, mad_upper)
|
|
182
|
+
|
|
183
|
+
mask = (df_transformed >= lower) & (df_transformed <= upper)
|
|
184
|
+
return df.where(mask, np.nan)
|
|
@@ -11,14 +11,14 @@ class Reader(AbstractReader):
|
|
|
11
11
|
self.logger.info(f'\t {file} may not be a whole daily data. Make sure the file is correct.')
|
|
12
12
|
|
|
13
13
|
_df = read_table(file, parse_dates={'time': [0, 1]}, index_col='time',
|
|
14
|
-
delimiter=r'\s+', skiprows=5, usecols=range(67))
|
|
14
|
+
delimiter=r'\s+', skiprows=5, usecols=range(67))
|
|
15
15
|
_df.columns = _df.columns.str.strip(';')
|
|
16
16
|
|
|
17
17
|
# remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape)
|
|
18
18
|
if self.meta.get('error_state', False):
|
|
19
19
|
_df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
|
|
20
20
|
|
|
21
|
-
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
|
|
21
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].apply(to_numeric, errors='coerce')
|
|
22
22
|
|
|
23
23
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
24
24
|
|
|
@@ -7,7 +7,7 @@ class Reader(AbstractReader):
|
|
|
7
7
|
nam = 'AE43'
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, file):
|
|
10
|
-
_df = read_csv(file, parse_dates={'time': ['StartTime']}, index_col='time')
|
|
10
|
+
_df = read_csv(file, parse_dates={'time': ['StartTime']}, index_col='time')
|
|
11
11
|
_df_id = _df['SetupID'].iloc[-1]
|
|
12
12
|
|
|
13
13
|
# get last SetupID data
|
|
@@ -18,7 +18,7 @@ class Reader(AbstractReader):
|
|
|
18
18
|
if self.meta.get('error_state', False):
|
|
19
19
|
_df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
|
|
20
20
|
|
|
21
|
-
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
|
|
21
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].apply(to_numeric, errors='coerce')
|
|
22
22
|
|
|
23
23
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
24
24
|
|
|
@@ -8,7 +8,7 @@ class Reader(AbstractReader):
|
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, file):
|
|
10
10
|
with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
11
|
-
_df = read_csv(f, low_memory=False, index_col=0)
|
|
11
|
+
_df = read_csv(f, low_memory=False, index_col=0)
|
|
12
12
|
|
|
13
13
|
_df.index = to_datetime(_df.index, errors='coerce')
|
|
14
14
|
_df.index.name = 'time'
|
|
@@ -24,7 +24,7 @@ class Reader(AbstractReader):
|
|
|
24
24
|
'RH': 'RH'
|
|
25
25
|
})
|
|
26
26
|
|
|
27
|
-
_df = _df[['B', 'G', 'R', 'BB', 'BG', 'BR']]
|
|
27
|
+
_df = _df[['B', 'G', 'R', 'BB', 'BG', 'BR']].apply(to_numeric, errors='coerce')
|
|
28
28
|
|
|
29
29
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
30
30
|
|
|
@@ -8,7 +8,7 @@ class Reader(AbstractReader):
|
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, file):
|
|
10
10
|
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
|
-
_df = read_csv(f, parse_dates=True, index_col=0)
|
|
11
|
+
_df = read_csv(f, parse_dates=True, index_col=0)
|
|
12
12
|
|
|
13
13
|
_df.columns = _df.columns.str.replace(' ', '')
|
|
14
14
|
|
|
@@ -29,7 +29,8 @@ class Reader(AbstractReader):
|
|
|
29
29
|
if self.meta.get('error_state', False):
|
|
30
30
|
_df = _df[~_df['Status'].isin(self.meta.get('error_state'))]
|
|
31
31
|
|
|
32
|
-
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']]
|
|
32
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']].apply(to_numeric,
|
|
33
|
+
errors='coerce')
|
|
33
34
|
|
|
34
35
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
35
36
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import numpy as np
|
|
1
2
|
from pandas import read_csv, to_numeric
|
|
2
3
|
|
|
3
4
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
@@ -14,7 +15,7 @@ class Reader(AbstractReader):
|
|
|
14
15
|
def _raw_reader(self, file):
|
|
15
16
|
# 查詢小時值(測項).csv & 查詢小時值(直式).csv (有、無輸出有效值都可以)
|
|
16
17
|
df = read_csv(file, encoding='big5', encoding_errors='ignore', index_col=0, parse_dates=True,
|
|
17
|
-
on_bad_lines='skip')
|
|
18
|
+
on_bad_lines='skip')
|
|
18
19
|
|
|
19
20
|
if len(df.groupby('測站')) > 1:
|
|
20
21
|
raise ValueError(f'Multiple stations found in the file: {df['測站'].unique()}')
|
|
@@ -29,11 +30,12 @@ class Reader(AbstractReader):
|
|
|
29
30
|
df.index.name = 'Time'
|
|
30
31
|
|
|
31
32
|
# 如果沒有將無效值拿掉就輸出 請將包含 #、L 的字串替換成 # 或 _
|
|
32
|
-
df = df.replace(to_replace=r'\d*[#]\b', value='#', regex=True)
|
|
33
|
-
df = df.replace(to_replace=r'\d*[L]\b', value='_', regex=True)
|
|
33
|
+
df = df.replace(to_replace=r'\d*\.?\d*[#]\b', value='#', regex=True)
|
|
34
|
+
df = df.replace(to_replace=r'\d*\.?\d*[L]\b', value='_', regex=True)
|
|
34
35
|
|
|
35
36
|
# 欄位排序
|
|
36
|
-
return self.reorder_dataframe_columns(df, [desired_order1])
|
|
37
|
+
return self.reorder_dataframe_columns(df, [desired_order1]).apply(to_numeric, errors='coerce')
|
|
37
38
|
|
|
38
39
|
def _QC(self, _df):
|
|
40
|
+
_df = _df.mask(_df < 0, np.nan)
|
|
39
41
|
return _df
|
|
@@ -7,7 +7,6 @@ class Reader(AbstractReader):
|
|
|
7
7
|
nam = 'GRIMM'
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, file):
|
|
10
|
-
|
|
11
10
|
_df = read_csv(file, header=233, delimiter='\t', index_col=0, parse_dates=[0], encoding='ISO-8859-1',
|
|
12
11
|
dayfirst=True).rename_axis("Time")
|
|
13
12
|
_df.index = to_datetime(_df.index, format="%d/%m/%Y %H:%M:%S", dayfirst=True)
|
|
@@ -12,11 +12,13 @@ class Reader(AbstractReader):
|
|
|
12
12
|
def _raw_reader(self, file):
|
|
13
13
|
|
|
14
14
|
with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
15
|
-
_df = read_csv(f, parse_dates=True, index_col=0, na_values='-')
|
|
15
|
+
_df = read_csv(f, parse_dates=True, index_col=0, na_values='-')
|
|
16
16
|
|
|
17
17
|
_df.columns = _df.keys().str.strip(' ')
|
|
18
18
|
_df.index.name = 'time'
|
|
19
19
|
|
|
20
|
+
_df = _df.apply(to_numeric, errors='coerce')
|
|
21
|
+
|
|
20
22
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
21
23
|
|
|
22
24
|
def _QC(self, _df):
|
|
@@ -8,7 +8,7 @@ class Reader(AbstractReader):
|
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, file):
|
|
10
10
|
_df = read_csv(file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis(
|
|
11
|
-
"Time")
|
|
11
|
+
"Time")
|
|
12
12
|
|
|
13
13
|
_df = _df.rename(columns={
|
|
14
14
|
'UV BCc': 'BC1',
|
|
@@ -26,7 +26,8 @@ class Reader(AbstractReader):
|
|
|
26
26
|
# if self.meta.get('error_state', False):
|
|
27
27
|
# _df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
|
|
28
28
|
|
|
29
|
-
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE', 'BB']]
|
|
29
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE', 'BB']].apply(to_numeric,
|
|
30
|
+
errors='coerce')
|
|
30
31
|
|
|
31
32
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
32
33
|
|
|
@@ -2,8 +2,9 @@ from typing import Literal
|
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas
|
|
5
|
-
from pandas import
|
|
5
|
+
from pandas import DataFrame, read_excel
|
|
6
6
|
|
|
7
|
+
from AeroViz.rawDataReader.config.supported_instruments import meta
|
|
7
8
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
8
9
|
|
|
9
10
|
pandas.set_option("future.no_silent_downcasting", True)
|
|
@@ -13,143 +14,181 @@ desired_order1 = ['SO2', 'NO', 'NOx', 'NO2', 'CO', 'O3', 'THC', 'NMHC',
|
|
|
13
14
|
|
|
14
15
|
desired_order2 = ['Benzene', 'Toluene', 'EthylBenzene', 'm/p-Xylene', 'o-Xylene']
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Rb', 'Sr',
|
|
18
|
-
'Y', 'Zr', 'Nb', 'Mo', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te',
|
|
19
|
-
'Cs', 'Ba', 'La', 'Ce', 'W', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi']
|
|
20
|
-
|
|
21
|
-
desired_order4 = ['NH3', 'HF', 'HCl', 'HNO2', 'HNO3', 'G-SO2',
|
|
22
|
-
'Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+',
|
|
23
|
-
'F-', 'Cl-', 'NO2-', 'NO3-', 'PO43-', 'SO42-']
|
|
17
|
+
MDL_NUMBER = -999
|
|
24
18
|
|
|
25
19
|
|
|
26
20
|
class Reader(AbstractReader):
|
|
27
21
|
nam = 'Minion'
|
|
28
22
|
|
|
23
|
+
# 楠梓8月數據(環境部)(空品、重金屬和氣膠可用率) -> 楠梓8月數據_level1 -> NZ_minion_XXXX
|
|
29
24
|
def _raw_reader(self, file):
|
|
30
|
-
# 讀取 Excel 文件
|
|
31
25
|
df = read_excel(file, index_col=0, parse_dates=True)
|
|
26
|
+
df.index.name = 'Time'
|
|
32
27
|
|
|
33
28
|
# 重命名列,去除空白
|
|
34
29
|
df = df.rename(columns=lambda x: x.strip())
|
|
35
30
|
|
|
36
|
-
#
|
|
37
|
-
units = df.iloc[0].copy()
|
|
31
|
+
# 保存單位
|
|
32
|
+
self.units = df.iloc[0].copy()
|
|
38
33
|
|
|
39
34
|
# 刪除原始數據中的單位行
|
|
40
35
|
df = df.iloc[1:]
|
|
41
36
|
|
|
42
37
|
# 替換特定值
|
|
43
|
-
df = df.replace({'維護校正': '*', np.nan: '-', '
|
|
44
|
-
df = df.replace(to_replace=r'\d*[#]\b', value='
|
|
45
|
-
df = df.replace(to_replace=r'\d*[L]\b', value=
|
|
38
|
+
df = df.replace({'維護校正': '*', np.nan: '-', 'Nodata': '-', '0L': MDL_NUMBER})
|
|
39
|
+
# df = df.replace(to_replace=r'\d*\.?\d*[#]\b', value='_', regex=True)
|
|
40
|
+
df = df.replace(to_replace=r'\d*\.?\d*[L]\b', value=MDL_NUMBER, regex=True)
|
|
41
|
+
|
|
42
|
+
# 處理除了'WD'列的 0 值 替換為 '_'
|
|
43
|
+
for col in [col for col in df.columns if col != 'WD']:
|
|
44
|
+
df[col] = df[col].replace({0: MDL_NUMBER})
|
|
45
|
+
|
|
46
|
+
# replace to numeric for estimating qc rate
|
|
47
|
+
df = df.replace({'_': MDL_NUMBER})
|
|
46
48
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
df.loc[:, non_wd_columns] = df.loc[:, non_wd_columns].replace({0: '_'})
|
|
49
|
+
XRF_col = list(meta.get('XRF').get('MDL').keys())
|
|
50
|
+
IGAC_col = list(meta.get('IGAC').get('MDL').keys())
|
|
50
51
|
|
|
51
52
|
# 重新排序列
|
|
52
|
-
df = self.reorder_dataframe_columns(df, [desired_order1, desired_order2,
|
|
53
|
+
df = self.reorder_dataframe_columns(df, [desired_order1, desired_order2, XRF_col, IGAC_col])
|
|
53
54
|
|
|
54
55
|
# 將單位行添加回 DataFrame
|
|
55
56
|
# df = concat([units.to_frame().T, df])
|
|
56
57
|
|
|
57
|
-
|
|
58
|
+
# save Level1 data
|
|
59
|
+
output_folder = file.parent / 'Level1'
|
|
60
|
+
output_folder.mkdir(parents=True, exist_ok=True)
|
|
61
|
+
df.to_csv(output_folder / f'{file.stem}_Level1.csv')
|
|
58
62
|
|
|
59
63
|
return df.loc[~df.index.duplicated() & df.index.notna()]
|
|
60
64
|
|
|
61
65
|
def _QC(self, _df):
|
|
66
|
+
IGAC_col = list(meta.get('IGAC').get('MDL'))
|
|
67
|
+
XRF_col = list(meta.get('XRF').get('MDL'))
|
|
68
|
+
|
|
69
|
+
# IGAC MDL QC
|
|
70
|
+
_df[IGAC_col] = self.IGAC_QAQC(_df[IGAC_col])
|
|
71
|
+
|
|
72
|
+
# XRF MDL QC
|
|
73
|
+
_df[XRF_col] = self.XRF_QAQC(_df[XRF_col])
|
|
74
|
+
|
|
62
75
|
# remove negative value
|
|
63
|
-
_df = _df.mask((_df < 0))
|
|
76
|
+
# _df = _df.mask((_df < 0))
|
|
77
|
+
_df = _df.mask(_df == MDL_NUMBER, np.nan)
|
|
64
78
|
|
|
65
|
-
|
|
66
|
-
_df = self.
|
|
79
|
+
col = [col for col in desired_order1 if col != 'WD']
|
|
80
|
+
_df[col] = self.time_aware_IQR_QC(_df[col])
|
|
67
81
|
|
|
68
|
-
#
|
|
69
|
-
|
|
82
|
+
# Calculate the mass and ion balance
|
|
83
|
+
# mass tolerance = ± 1, ions balance tolerance = ± 1
|
|
70
84
|
|
|
71
|
-
#
|
|
72
|
-
|
|
85
|
+
# # conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
|
|
86
|
+
# _df_salt = df.mask(df.sum(axis=1, min_count=1) > df.PM25).dropna(subset=_main).copy()
|
|
87
|
+
|
|
88
|
+
ions_mass = _df[['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+', 'Cl-', 'NO3-', 'SO42-']].sum(axis=1)
|
|
89
|
+
element_mass = _df[XRF_col].sum(axis=1)
|
|
90
|
+
|
|
91
|
+
estimated_mass = ions_mass + element_mass
|
|
92
|
+
|
|
93
|
+
valid_mask = 2 * _df['PM2.5'] > estimated_mass
|
|
94
|
+
|
|
95
|
+
_df.loc[~valid_mask, IGAC_col + XRF_col] = np.nan
|
|
73
96
|
|
|
74
97
|
return _df
|
|
75
98
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
99
|
+
def mdlReplace_timeAware_qc(self, df: DataFrame, MDL: dict, MDL_replace) -> DataFrame:
|
|
100
|
+
# Step 1: Track MDL positions and values below threshold
|
|
101
|
+
mdl_mask = (df.eq(MDL_NUMBER) |
|
|
102
|
+
df.apply(lambda x: x < MDL.get(x.name, float('-inf'))))
|
|
103
|
+
|
|
104
|
+
# Step 2: Convert all values below MDL to MDL_NUMBER (-999)
|
|
105
|
+
df_mdl = df.mask(mdl_mask, MDL_NUMBER)
|
|
106
|
+
|
|
107
|
+
# Step 3: Apply time_aware_IQR_QC (excluding MDL_NUMBER values)
|
|
108
|
+
df_qc = self.time_aware_IQR_QC(df_mdl.mask(df_mdl == MDL_NUMBER))
|
|
109
|
+
|
|
110
|
+
# Step 4: Handle values below MDL according to specified method
|
|
111
|
+
if MDL_replace == '0.5 * MDL':
|
|
112
|
+
for column, threshold in MDL.items():
|
|
113
|
+
if column in df.columns and threshold is not None:
|
|
114
|
+
df_qc.loc[df_mdl[column] == MDL_NUMBER, column] = 0.5 * threshold
|
|
115
|
+
else:
|
|
116
|
+
df_qc.loc[df_mdl[column] == MDL_NUMBER, column] = np.nan
|
|
117
|
+
else: # 'nan'
|
|
118
|
+
df_qc = df_qc.mask(df_mdl == MDL_NUMBER, np.nan)
|
|
119
|
+
|
|
120
|
+
return df_qc
|
|
121
|
+
|
|
122
|
+
def XRF_QAQC(self,
|
|
123
|
+
df: DataFrame,
|
|
124
|
+
MDL_replace: Literal['nan', '0.5 * MDL'] = '0.5 * MDL'
|
|
125
|
+
) -> DataFrame:
|
|
126
|
+
"""
|
|
127
|
+
Perform Quality Assurance and Quality Control for XRF data
|
|
128
|
+
|
|
129
|
+
Parameters
|
|
130
|
+
----------
|
|
131
|
+
df : pd.DataFrame
|
|
132
|
+
Input dataframe with XRF data
|
|
133
|
+
MDL_replace : {'nan', '0.5 * MDL'}, default='nan'
|
|
134
|
+
Method to handle values below MDL:
|
|
135
|
+
- 'nan': Replace with NaN
|
|
136
|
+
- '0.5 * MDL': Replace with half of MDL value
|
|
137
|
+
|
|
138
|
+
Returns
|
|
139
|
+
-------
|
|
140
|
+
pd.DataFrame
|
|
141
|
+
Processed dataframe with QC applied and MDL values handled
|
|
142
|
+
"""
|
|
143
|
+
MDL = meta.get('XRF').get('MDL')
|
|
144
|
+
|
|
145
|
+
df = self.mdlReplace_timeAware_qc(df, MDL, MDL_replace)
|
|
105
146
|
|
|
106
147
|
# 轉換單位 ng/m3 -> ug/m3
|
|
107
148
|
if df.Al.max() > 10 and df.Fe.max() > 10:
|
|
108
|
-
# 確保 MDL.keys() 中的所有列都存在於 _df 中
|
|
109
149
|
columns_to_convert = [col for col in MDL.keys() if col in df.columns]
|
|
110
|
-
|
|
111
150
|
df[columns_to_convert] = df[columns_to_convert].div(1000)
|
|
112
151
|
|
|
152
|
+
self.logger.info(f"XRF QAQC summary: transform values below MDL to {MDL_replace}")
|
|
153
|
+
|
|
113
154
|
return df
|
|
114
155
|
|
|
115
|
-
def IGAC_QAQC(self,
|
|
156
|
+
def IGAC_QAQC(self,
|
|
157
|
+
df: DataFrame,
|
|
158
|
+
MDL_replace: Literal['nan', '0.5 * MDL'] = '0.5 * MDL',
|
|
159
|
+
tolerance: float = 1
|
|
160
|
+
) -> DataFrame:
|
|
116
161
|
"""
|
|
117
|
-
|
|
162
|
+
Perform Quality Assurance and Quality Control for IGAC data
|
|
163
|
+
|
|
164
|
+
Parameters
|
|
165
|
+
----------
|
|
166
|
+
df : pd.DataFrame
|
|
167
|
+
Input dataframe with IGAC data
|
|
168
|
+
MDL_replace : {'nan', '0.5 * MDL'}, default='nan'
|
|
169
|
+
Method to handle values below MDL:
|
|
170
|
+
- 'nan': Replace with NaN
|
|
171
|
+
- '0.5 * MDL': Replace with half of MDL value
|
|
172
|
+
tolerance : float, default=1
|
|
173
|
+
Tolerance value for QC checks
|
|
174
|
+
|
|
175
|
+
Returns
|
|
176
|
+
-------
|
|
177
|
+
pd.DataFrame
|
|
178
|
+
Processed dataframe with QC applied and MDL values handled
|
|
118
179
|
"""
|
|
119
|
-
|
|
120
|
-
MDL = {
|
|
121
|
-
'HF': 0.08, 'HCl': 0.05, 'HNO2': 0.01, 'HNO3': 0.05, 'G-SO2': 0.05, 'NH3': 0.1,
|
|
122
|
-
'Na+': 0.05, 'NH4+': 0.08, 'K+': 0.08, 'Mg2+': 0.05, 'Ca2+': 0.05,
|
|
123
|
-
'F-': 0.08, 'Cl-': 0.05, 'NO2-': 0.05, 'NO3-': 0.01, 'PO43-': None, 'SO42-': 0.05,
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
MR = {
|
|
127
|
-
'HF': 200, 'HCl': 200, 'HNO2': 200, 'HNO3': 200, 'G-SO2': 200, 'NH3': 300,
|
|
128
|
-
'Na+': 300, 'NH4+': 300, 'K+': 300, 'Mg2+': 300, 'Ca2+': 300,
|
|
129
|
-
'F-': 300, 'Cl-': 300, 'NO2-': 300, 'NO3-': 300, 'PO43-': None, 'SO42-': 300,
|
|
130
|
-
}
|
|
180
|
+
MDL = meta.get('IGAC').get('MDL')
|
|
131
181
|
|
|
182
|
+
df = self.mdlReplace_timeAware_qc(df, MDL, MDL_replace)
|
|
183
|
+
|
|
184
|
+
# Define the ions
|
|
185
|
+
_df = df.copy()
|
|
132
186
|
_cation, _anion, _main = (['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'],
|
|
133
187
|
['Cl-', 'NO2-', 'NO3-', 'SO42-'],
|
|
134
188
|
['SO42-', 'NO3-', 'NH4+'])
|
|
135
|
-
# QC: replace values below MDL with 0.5 * MDL -> ions balance -> PM2.5 > main salt
|
|
136
|
-
# mass tolerance = 0.3, ions balance tolerance = 0.3
|
|
137
|
-
|
|
138
|
-
# # conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
|
|
139
|
-
# _df_salt = df.mask(df.sum(axis=1, min_count=1) > df.PM25).dropna(subset=_main).copy()
|
|
140
|
-
|
|
141
|
-
# Define the ions
|
|
142
|
-
item = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+', 'Cl-', 'NO2-', 'NO3-', 'SO42-']
|
|
143
189
|
|
|
144
|
-
|
|
145
|
-
_df =
|
|
146
|
-
|
|
147
|
-
# for (_key, _df_col) in _df.items():
|
|
148
|
-
# _df[_key] = _df_col.mask(_df_col < MDL[_key], MDL[_key] / 2)
|
|
149
|
-
|
|
150
|
-
_df['+_mole'] = _df[['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+']].div([23, 18, 39, (24 / 2), (40 / 2)]).sum(axis=1,
|
|
151
|
-
skipna=True)
|
|
152
|
-
_df['-_mole'] = _df[['Cl-', 'NO2-', 'NO3-', 'SO42-']].div([35.5, 46, 62, (96 / 2)]).sum(axis=1, skipna=True)
|
|
190
|
+
_df['+_mole'] = _df[_cation].div([23, 18, 39, (24 / 2), (40 / 2)]).sum(axis=1, skipna=True)
|
|
191
|
+
_df['-_mole'] = _df[_anion].div([35.5, 46, 62, (96 / 2)]).sum(axis=1, skipna=True)
|
|
153
192
|
|
|
154
193
|
# Avoid division by zero
|
|
155
194
|
_df['ratio'] = np.where(_df['-_mole'] != 0, _df['+_mole'] / _df['-_mole'], np.nan)
|
|
@@ -157,24 +196,19 @@ class Reader(AbstractReader):
|
|
|
157
196
|
# Calculate bounds
|
|
158
197
|
lower_bound, upper_bound = 1 - tolerance, 1 + tolerance
|
|
159
198
|
|
|
160
|
-
#
|
|
199
|
+
# 根據ratio决定是否保留原始数据
|
|
161
200
|
valid_mask = ((_df['ratio'] <= upper_bound) & (_df['ratio'] >= lower_bound) &
|
|
162
201
|
~np.isnan(_df['+_mole']) & ~np.isnan(_df['-_mole']))
|
|
163
202
|
|
|
164
|
-
#
|
|
165
|
-
df.loc[~valid_mask
|
|
203
|
+
# 保留数據或將不符合的條件設為NaN
|
|
204
|
+
df.loc[~valid_mask] = np.nan
|
|
166
205
|
|
|
167
|
-
#
|
|
206
|
+
# 計算保留的数據的百分比
|
|
168
207
|
retained_percentage = (valid_mask.sum() / len(df)) * 100
|
|
169
208
|
|
|
170
|
-
self.logger.info(f"{
|
|
171
|
-
self.logger.info(f"Ions balance summary:")
|
|
172
|
-
self.logger.info(f"\t\tretain {retained_percentage.__round__(0)}% data within tolerance {tolerance}")
|
|
173
|
-
self.logger.info(f"{'=' * 60}")
|
|
209
|
+
self.logger.info(f"Ions balance summary: {retained_percentage.__round__(0)}% within tolerance ± {tolerance}")
|
|
174
210
|
|
|
175
211
|
if retained_percentage < 70:
|
|
176
212
|
self.logger.warning("Warning: The percentage of retained data is less than 70%")
|
|
177
213
|
|
|
178
|
-
# print(f"\tretain {retained_percentage.__round__(0)}% data within tolerance {tolerance}")
|
|
179
|
-
|
|
180
214
|
return df
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pandas import to_datetime, read_csv, DataFrame
|
|
1
|
+
from pandas import to_datetime, read_csv, DataFrame, to_numeric
|
|
2
2
|
|
|
3
3
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
4
|
|
|
@@ -42,7 +42,7 @@ class Reader(AbstractReader):
|
|
|
42
42
|
|
|
43
43
|
_df_out.mask(_df_out['status'] != 0) # 0000 -> numeric to 0
|
|
44
44
|
|
|
45
|
-
_df = _df_out[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
|
|
45
|
+
_df = _df_out[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']].apply(to_numeric, errors='coerce')
|
|
46
46
|
|
|
47
47
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
48
48
|
|
|
@@ -9,7 +9,7 @@ class Reader(AbstractReader):
|
|
|
9
9
|
|
|
10
10
|
def _raw_reader(self, file):
|
|
11
11
|
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
12
|
-
_df = read_csv(f, skiprows=3)
|
|
12
|
+
_df = read_csv(f, skiprows=3)
|
|
13
13
|
|
|
14
14
|
_df['Start Date/Time'] = _df['Start Date/Time'].str.strip()
|
|
15
15
|
_df['time'] = to_datetime(_df['Start Date/Time'], format='%m/%d/%Y %I:%M:%S %p', errors='coerce')
|
|
@@ -51,6 +51,8 @@ class Reader(AbstractReader):
|
|
|
51
51
|
'ECPk5-ug C': 'EC5_raw',
|
|
52
52
|
})
|
|
53
53
|
|
|
54
|
+
_df = _df.apply(to_numeric, errors='coerce')
|
|
55
|
+
|
|
54
56
|
_df['OC1'] = _df['OC1_raw'] / _df['Sample_Volume']
|
|
55
57
|
_df['OC2'] = _df['OC2_raw'] / _df['Sample_Volume']
|
|
56
58
|
_df['OC3'] = _df['OC3_raw'] / _df['Sample_Volume']
|
|
@@ -64,7 +66,7 @@ class Reader(AbstractReader):
|
|
|
64
66
|
# _df['EC4'] = _df['EC4_raw'] / _df['Sample_Volume']
|
|
65
67
|
# _df['EC5'] = _df['EC5_raw'] / _df['Sample_Volume']
|
|
66
68
|
|
|
67
|
-
_df = _df[['Thermal_OC', '
|
|
69
|
+
_df = _df[['Thermal_OC', 'Thermal_EC', 'Optical_OC', 'Optical_EC', 'TC', 'Sample_Volume',
|
|
68
70
|
'OC1', 'OC2', 'OC3', 'OC4', 'PC']]
|
|
69
71
|
|
|
70
72
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
@@ -8,7 +8,7 @@ class Reader(AbstractReader):
|
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, file):
|
|
10
10
|
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
|
-
_df = read_csv(f, skiprows=3, index_col=False)
|
|
11
|
+
_df = read_csv(f, skiprows=3, index_col=False)
|
|
12
12
|
|
|
13
13
|
_df = _df.rename(columns={'Time Stamp': 'time',
|
|
14
14
|
'System status': 'status',
|
|
@@ -27,7 +27,7 @@ class Reader(AbstractReader):
|
|
|
27
27
|
|
|
28
28
|
_df = _df.where(_df['status'] < 1)
|
|
29
29
|
|
|
30
|
-
_df = _df[['PM_NV', 'PM_Total', 'noise']]
|
|
30
|
+
_df = _df[['PM_NV', 'PM_Total', 'noise']].apply(to_numeric, errors='coerce')
|
|
31
31
|
|
|
32
32
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
33
33
|
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: AeroViz
|
|
3
|
-
Version: 0.1.9.
|
|
3
|
+
Version: 0.1.9.2
|
|
4
4
|
Summary: Aerosol science
|
|
5
5
|
Home-page: https://github.com/Alex870521/AeroViz
|
|
6
6
|
Author: alex
|
|
7
7
|
Author-email: alex870521@gmail.com
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
9
10
|
Classifier: License :: OSI Approved :: MIT License
|
|
10
11
|
Classifier: Operating System :: OS Independent
|
|
11
12
|
Requires-Python: >=3.12
|
|
@@ -21,6 +22,8 @@ Requires-Dist: windrose ==1.9.2
|
|
|
21
22
|
Requires-Dist: cartopy ==0.24.1
|
|
22
23
|
Requires-Dist: tabulate ==0.9.0
|
|
23
24
|
Requires-Dist: rich ~=13.7.1
|
|
25
|
+
Provides-Extra: test
|
|
26
|
+
Requires-Dist: pytest >=7.0.0 ; extra == 'test'
|
|
24
27
|
|
|
25
28
|
## <div align="center">AeroViz for Aerosol Science Visualization</div>
|
|
26
29
|
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
AeroViz/__init__.py,sha256=A5W6SR71uY_eW44Sh-Yk6blJQ_G1aHrkSzeP2YTPQc4,371
|
|
2
|
-
AeroViz/data/240228_00.txt,sha256=DWfY83EW3fOcv9dW-Y4pudq8-M7BJlXD-tlMSYrAk2w,8946
|
|
3
2
|
AeroViz/data/DEFAULT_DATA.csv,sha256=eeeyeh8vSLKkE5tAF0TYnUNOyQIH98VA41bJaAP204Y,2248526
|
|
4
3
|
AeroViz/data/DEFAULT_PNSD_DATA.csv,sha256=imLvLA80oYwo_jzXZtlQn5hZ76d47HUIlK2jp0tZPrg,2636511
|
|
4
|
+
AeroViz/data/hysplit_example_data.txt,sha256=DWfY83EW3fOcv9dW-Y4pudq8-M7BJlXD-tlMSYrAk2w,8946
|
|
5
5
|
AeroViz/dataProcess/__init__.py,sha256=D3rTVUiGfs_daGuaotVtbijOgLAp6HaRWchj-zoEnHw,828
|
|
6
6
|
AeroViz/dataProcess/Chemistry/__init__.py,sha256=fyyomjxkQcUNWDx4R5jPrHafAftN-v2liUZii9OlaiU,2058
|
|
7
7
|
AeroViz/dataProcess/Chemistry/_calculate.py,sha256=q7ojTFPok0vg8k_1PMECNdP5CPanR9NWQ4Rx5iTcHew,599
|
|
8
8
|
AeroViz/dataProcess/Chemistry/_isoropia.py,sha256=3wp_FXdN230awlStMbctutwld4oot9WaAVXETGd6PSs,3255
|
|
9
9
|
AeroViz/dataProcess/Chemistry/_mass_volume.py,sha256=0joH2BAx0NUwDFzyrLgG-v7WrGl46R7zWxwbajWBV8o,5378
|
|
10
|
-
AeroViz/dataProcess/Chemistry/_ocec.py,sha256=
|
|
10
|
+
AeroViz/dataProcess/Chemistry/_ocec.py,sha256=1UpSwdxYVy_LpUjtoaEUoXA-r1nKEFJoyYglf_6CoXA,5899
|
|
11
11
|
AeroViz/dataProcess/Chemistry/_partition.py,sha256=tKhb6BJns46UiUlEq6Zq7ahYnvUJ_whY3tWE54C3bqU,1023
|
|
12
12
|
AeroViz/dataProcess/Chemistry/_teom.py,sha256=IiM-TrifWpQLTbKllG-4k4c3mvQulfcmjswWu6muCXA,486
|
|
13
13
|
AeroViz/dataProcess/Chemistry/isrpia.cnf,sha256=iWXTqsOZFmNrJxAI9nYuilZ9h6ru1icdPFVim7YKc_k,566
|
|
@@ -28,7 +28,7 @@ AeroViz/dataProcess/SizeDistr/_merge_v1.py,sha256=6Anb8DszoatK66tc9ccA6ZApbqtL7p
|
|
|
28
28
|
AeroViz/dataProcess/SizeDistr/_merge_v2.py,sha256=8OzUKw7hTg-yuQBipuFKgBS_7c7zbApN_BNr00G8q9c,9046
|
|
29
29
|
AeroViz/dataProcess/SizeDistr/_merge_v3.py,sha256=HN2ARFmeWOawOWRPPv_pHEGBBZNgXVbH4dDTxcN7rdY,18749
|
|
30
30
|
AeroViz/dataProcess/SizeDistr/_merge_v4.py,sha256=b8RVAievGIOLrmJHJXRsKXQ1tkMkm6rx43S7XAfeXE4,16228
|
|
31
|
-
AeroViz/dataProcess/SizeDistr/_size_distr.py,sha256=
|
|
31
|
+
AeroViz/dataProcess/SizeDistr/_size_distr.py,sha256=ULhGKlxE9QmbDO_PS3HOSKzepeMfJZWabJvGXqsDEvE,3259
|
|
32
32
|
AeroViz/dataProcess/VOC/__init__.py,sha256=8GNP0RMymTkJXK18pSgfLHqrKPWboN-3x1_Ke4UrI44,259
|
|
33
33
|
AeroViz/dataProcess/VOC/_potential_par.py,sha256=h3rVdvtBvC6xHa_ZG4Oq5eXezeSZtHNy6T6I40maIcM,3863
|
|
34
34
|
AeroViz/dataProcess/VOC/support_voc.json,sha256=tMYp_NERqhSriVRE2NavXh33CQ5CnsbJHtmMFlE5q_E,6804
|
|
@@ -44,7 +44,7 @@ AeroViz/plot/violin.py,sha256=pU2Z2yTWocEtImmCAmbtn0WvXtUOrnCGOdDOrLxjooU,2689
|
|
|
44
44
|
AeroViz/plot/distribution/__init__.py,sha256=nhbIegWczkuEfWsE7-2jfF0dnpmPDzJJzjq8Fuh6q5k,28
|
|
45
45
|
AeroViz/plot/distribution/distribution.py,sha256=sAjqtqKavFwQqI8PGPFnpvZFSU-w2UKjcTTC5L91f4E,20595
|
|
46
46
|
AeroViz/plot/hysplit/__init__.py,sha256=VrEkha2OEFp_00Xj9R98C96niZ7fYqJzGPeYsbojtzA,23
|
|
47
|
-
AeroViz/plot/hysplit/hysplit.py,sha256=
|
|
47
|
+
AeroViz/plot/hysplit/hysplit.py,sha256=gSCkemFLRvsk4m8zYbxbsjrdU14NkN9ZNfVRvdq69aM,2796
|
|
48
48
|
AeroViz/plot/meteorology/__init__.py,sha256=hhGfQE3IUzS3Eaju_nO7LomPPHJnd-zAAZZweXOXs2M,27
|
|
49
49
|
AeroViz/plot/meteorology/meteorology.py,sha256=6hk-5olgQTw2SB-GhEizLN19vRVBztgiXoruh8Q6Zns,11282
|
|
50
50
|
AeroViz/plot/optical/PyMieScatt_update.py,sha256=g3vlzATjzYSYZd3LwmwxEmdkpo4cHJ3KY4rePY4jwZk,21065
|
|
@@ -68,24 +68,25 @@ AeroViz/plot/utils/fRH.json,sha256=t-2ux4TLOYAB-4jJ72LSM4jv1jk9XkaxKYNMDepMHIg,6
|
|
|
68
68
|
AeroViz/plot/utils/plt_utils.py,sha256=7Au3r2-7AZQmzrO2OfcyTFomJRLHgu1Npb7wxQqUvzY,3438
|
|
69
69
|
AeroViz/plot/utils/sklearn_utils.py,sha256=hKFfkVkYLRxkIDKvO9COHXwhjD_UWqQigdT3mDToni4,2098
|
|
70
70
|
AeroViz/plot/utils/units.json,sha256=JKcqvLA6dkc8REV_NhX85Jl9LA4iAZxzw9RJp6JTla0,2965
|
|
71
|
-
AeroViz/rawDataReader/__init__.py,sha256=
|
|
71
|
+
AeroViz/rawDataReader/__init__.py,sha256=FDUsJ_v6wG9DUcNOjmqaWJvVdMgatJrqGYPZdbaW7Wo,4859
|
|
72
72
|
AeroViz/rawDataReader/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
73
|
-
AeroViz/rawDataReader/config/supported_instruments.py,sha256=
|
|
74
|
-
AeroViz/rawDataReader/core/__init__.py,sha256=
|
|
75
|
-
AeroViz/rawDataReader/
|
|
76
|
-
AeroViz/rawDataReader/script/
|
|
73
|
+
AeroViz/rawDataReader/config/supported_instruments.py,sha256=RR2TPome27O3ERNxQ043boDMfvWG9BHvhpGJ6VQC5gw,5988
|
|
74
|
+
AeroViz/rawDataReader/core/__init__.py,sha256=i5JTd8IhhTYI2bpEMQ27XmxPZojACUzWnKcw4gGofnE,12691
|
|
75
|
+
AeroViz/rawDataReader/core/qc.py,sha256=tFIVsfph8yZIK6NRKQxaZYHcruJclriKSvR0oC12T0Q,5698
|
|
76
|
+
AeroViz/rawDataReader/script/AE33.py,sha256=FbbFJ93aLVjA8k2QZ_fKcI9uXoux2k0AL3O73iY879I,1278
|
|
77
|
+
AeroViz/rawDataReader/script/AE43.py,sha256=GjcICBJ3nIANyMd4kovteBUtkyCGLTos07BczgSCuVE,1231
|
|
77
78
|
AeroViz/rawDataReader/script/APS_3321.py,sha256=x75G72Xl0vElr6Njbv8SlOcosAHNozseaJzAxVmfXyI,1697
|
|
78
|
-
AeroViz/rawDataReader/script/Aurora.py,sha256=
|
|
79
|
-
AeroViz/rawDataReader/script/BC1054.py,sha256=
|
|
80
|
-
AeroViz/rawDataReader/script/EPA.py,sha256=
|
|
81
|
-
AeroViz/rawDataReader/script/GRIMM.py,sha256
|
|
82
|
-
AeroViz/rawDataReader/script/IGAC.py,sha256=
|
|
83
|
-
AeroViz/rawDataReader/script/MA350.py,sha256=
|
|
84
|
-
AeroViz/rawDataReader/script/Minion.py,sha256=
|
|
85
|
-
AeroViz/rawDataReader/script/NEPH.py,sha256=
|
|
86
|
-
AeroViz/rawDataReader/script/OCEC.py,sha256=
|
|
79
|
+
AeroViz/rawDataReader/script/Aurora.py,sha256=2duNsK2WCWk21Rd2d4EugAA_yN27p2AjRFd9ClJ2aUA,1491
|
|
80
|
+
AeroViz/rawDataReader/script/BC1054.py,sha256=tuDyq8M5BPbmu1yJr9zXYS2piMGz08yTQXGT6tK9jxA,1675
|
|
81
|
+
AeroViz/rawDataReader/script/EPA.py,sha256=1ZXEcCnIMOhEXu0JwzeCgmhRtPzBNo2CfLhfhstOT4k,1649
|
|
82
|
+
AeroViz/rawDataReader/script/GRIMM.py,sha256=-D4U83ihjAqcvOAnk7NET59IZfV1JzPYKRQjrIQyBDM,846
|
|
83
|
+
AeroViz/rawDataReader/script/IGAC.py,sha256=i6WT3rX0n0e4hq7NfWN6tVwCuKAeV9ARxPkXZSbQj74,2387
|
|
84
|
+
AeroViz/rawDataReader/script/MA350.py,sha256=EfPTFhgDAjI7r0G6kW7pjog-4MBOnvW0cyFqIkCxEP8,1597
|
|
85
|
+
AeroViz/rawDataReader/script/Minion.py,sha256=9G_q-EhE3nfJoxWFwAnMYdY0teSYqcYxTkk0JW5lmY0,7793
|
|
86
|
+
AeroViz/rawDataReader/script/NEPH.py,sha256=x6HgnvpmmhOOvB4-nL-jTfoSo0x8FUxVBXPqAyfhZVk,3215
|
|
87
|
+
AeroViz/rawDataReader/script/OCEC.py,sha256=jWWaNbCjP5MJDYrdWUhjrQLClaWqC8SGDVPIFJ9xljU,3413
|
|
87
88
|
AeroViz/rawDataReader/script/SMPS.py,sha256=EtXmeukOIwqfMwMJqv99_STfVg0uPdVr96r-tfD95gk,2774
|
|
88
|
-
AeroViz/rawDataReader/script/TEOM.py,sha256=
|
|
89
|
+
AeroViz/rawDataReader/script/TEOM.py,sha256=jsxU4W46FmLjiIthmPOHo6CAYFZiPENhW80WjMCiIPA,2103
|
|
89
90
|
AeroViz/rawDataReader/script/VOC.py,sha256=GUme72ZyjSzREsFNUgOV_OCESIVJBXY9KrKP1c9Av7I,1248
|
|
90
91
|
AeroViz/rawDataReader/script/XRF.py,sha256=SU1-D94GkwdkjlNXcyXbwQG1tOYCpeL6GTVkaLBHc-s,187
|
|
91
92
|
AeroViz/rawDataReader/script/__init__.py,sha256=s3c797Q8EAGcJCxVRTA-KdHie-vHLNYbMxwa5c0qz-I,214
|
|
@@ -94,8 +95,8 @@ AeroViz/tools/database.py,sha256=05VzjJyhlRrhsZdhfFQ__7CxGm4MdFekLjz3_Is5h9U,343
|
|
|
94
95
|
AeroViz/tools/dataclassifier.py,sha256=_wpv0PlZ5EGkcNqHxfFtdEsYvHP5FVE8sMZXikhm_YE,4492
|
|
95
96
|
AeroViz/tools/dataprinter.py,sha256=Jq2Yztpa9YCOeLDVTrRs7PhSdNIPhEAexVj1YSuJ7hY,2249
|
|
96
97
|
AeroViz/tools/datareader.py,sha256=iTQ0U8hdNMjCdbiH7EiKW10UEoxzxXRHc4s5_1IikJo,1933
|
|
97
|
-
AeroViz-0.1.9.
|
|
98
|
-
AeroViz-0.1.9.
|
|
99
|
-
AeroViz-0.1.9.
|
|
100
|
-
AeroViz-0.1.9.
|
|
101
|
-
AeroViz-0.1.9.
|
|
98
|
+
AeroViz-0.1.9.2.dist-info/LICENSE,sha256=E-679GpGGkp3irmtuJXiT7R4cNUA4cmsH6Q7QUgPf5U,1069
|
|
99
|
+
AeroViz-0.1.9.2.dist-info/METADATA,sha256=qTnQ4ONlpadkTfYa5lhvSQ3DxuI4p_geNcv16f_bOjI,6373
|
|
100
|
+
AeroViz-0.1.9.2.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
|
101
|
+
AeroViz-0.1.9.2.dist-info/top_level.txt,sha256=BYsmTst_o4FZOKRP1XIvIMlN6mMTTXNfnSToL2_nVbQ,8
|
|
102
|
+
AeroViz-0.1.9.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|