AeroViz 0.1.3b0__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/__init__.py +5 -3
- AeroViz/{config → data}/DEFAULT_DATA.csv +1 -1
- AeroViz/dataProcess/Chemistry/__init__.py +7 -7
- AeroViz/dataProcess/Chemistry/_isoropia.py +5 -2
- AeroViz/dataProcess/Chemistry/_mass_volume.py +15 -18
- AeroViz/dataProcess/Chemistry/_ocec.py +2 -2
- AeroViz/dataProcess/Chemistry/_teom.py +2 -1
- AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
- AeroViz/dataProcess/Optical/Angstrom_exponent.py +20 -0
- AeroViz/dataProcess/Optical/_IMPROVE.py +13 -15
- AeroViz/dataProcess/Optical/__init__.py +15 -30
- AeroViz/dataProcess/Optical/_absorption.py +21 -47
- AeroViz/dataProcess/Optical/_extinction.py +20 -15
- AeroViz/dataProcess/Optical/_mie.py +0 -1
- AeroViz/dataProcess/Optical/_scattering.py +19 -20
- AeroViz/dataProcess/SizeDistr/__init__.py +7 -7
- AeroViz/dataProcess/SizeDistr/_merge.py +2 -2
- AeroViz/dataProcess/SizeDistr/_merge_v1.py +2 -2
- AeroViz/dataProcess/SizeDistr/_merge_v2.py +2 -2
- AeroViz/dataProcess/SizeDistr/_merge_v3.py +1 -1
- AeroViz/dataProcess/SizeDistr/_merge_v4.py +1 -1
- AeroViz/dataProcess/VOC/__init__.py +3 -3
- AeroViz/dataProcess/__init__.py +28 -6
- AeroViz/dataProcess/core/__init__.py +10 -17
- AeroViz/plot/__init__.py +1 -1
- AeroViz/plot/box.py +2 -1
- AeroViz/plot/optical/optical.py +4 -4
- AeroViz/plot/regression.py +25 -39
- AeroViz/plot/scatter.py +68 -2
- AeroViz/plot/templates/__init__.py +2 -1
- AeroViz/plot/templates/ammonium_rich.py +34 -0
- AeroViz/plot/templates/diurnal_pattern.py +11 -9
- AeroViz/plot/templates/koschmieder.py +51 -115
- AeroViz/plot/templates/metal_heatmap.py +115 -17
- AeroViz/plot/timeseries/__init__.py +1 -0
- AeroViz/plot/timeseries/template.py +47 -0
- AeroViz/plot/timeseries/timeseries.py +275 -208
- AeroViz/plot/utils/plt_utils.py +2 -2
- AeroViz/plot/utils/units.json +5 -0
- AeroViz/plot/violin.py +9 -8
- AeroViz/process/__init__.py +2 -2
- AeroViz/process/script/AbstractDistCalc.py +1 -1
- AeroViz/process/script/Chemical.py +5 -4
- AeroViz/process/script/Others.py +1 -1
- AeroViz/rawDataReader/__init__.py +17 -22
- AeroViz/rawDataReader/{utils/config.py → config/supported_instruments.py} +38 -52
- AeroViz/rawDataReader/core/__init__.py +104 -229
- AeroViz/rawDataReader/script/AE33.py +10 -11
- AeroViz/rawDataReader/script/AE43.py +8 -11
- AeroViz/rawDataReader/script/APS_3321.py +6 -6
- AeroViz/rawDataReader/script/Aurora.py +18 -19
- AeroViz/rawDataReader/script/BC1054.py +11 -15
- AeroViz/rawDataReader/script/EPA_vertical.py +35 -7
- AeroViz/rawDataReader/script/GRIMM.py +2 -9
- AeroViz/rawDataReader/script/{IGAC_ZM.py → IGAC.py} +17 -17
- AeroViz/rawDataReader/script/MA350.py +7 -14
- AeroViz/rawDataReader/script/Minion.py +103 -0
- AeroViz/rawDataReader/script/NEPH.py +24 -29
- AeroViz/rawDataReader/script/SMPS_TH.py +4 -4
- AeroViz/rawDataReader/script/SMPS_aim11.py +6 -6
- AeroViz/rawDataReader/script/SMPS_genr.py +6 -6
- AeroViz/rawDataReader/script/Sunset_OCEC.py +60 -0
- AeroViz/rawDataReader/script/TEOM.py +8 -6
- AeroViz/rawDataReader/script/Table.py +7 -8
- AeroViz/rawDataReader/script/VOC.py +26 -0
- AeroViz/rawDataReader/script/__init__.py +10 -12
- AeroViz/tools/database.py +7 -9
- AeroViz/tools/datareader.py +3 -3
- {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.4.dist-info}/METADATA +1 -1
- AeroViz-0.1.4.dist-info/RECORD +112 -0
- AeroViz/rawDataReader/script/IGAC_TH.py +0 -104
- AeroViz/rawDataReader/script/OCEC_LCRES.py +0 -34
- AeroViz/rawDataReader/script/OCEC_RES.py +0 -28
- AeroViz/rawDataReader/script/VOC_TH.py +0 -30
- AeroViz/rawDataReader/script/VOC_ZM.py +0 -37
- AeroViz-0.1.3b0.dist-info/RECORD +0 -110
- /AeroViz/{config → data}/DEFAULT_PNSD_DATA.csv +0 -0
- /AeroViz/rawDataReader/{utils → config}/__init__.py +0 -0
- {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.4.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.4.dist-info}/WHEEL +0 -0
- {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import numpy as np
|
|
1
2
|
from pandas import read_csv, to_numeric
|
|
2
3
|
|
|
3
4
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
@@ -7,12 +8,39 @@ class Reader(AbstractReader):
|
|
|
7
8
|
nam = 'EPA_vertical'
|
|
8
9
|
|
|
9
10
|
def _raw_reader(self, _file):
|
|
10
|
-
with _file.open('r', encoding='
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
with _file.open('r', encoding='ascii', errors='ignore') as f:
|
|
12
|
+
# 有、無輸出有效值都可以
|
|
13
|
+
# read 查詢小時值(測項).csv
|
|
14
|
+
df = read_csv(f, encoding='ascii', encoding_errors='ignore', index_col=0, parse_dates=True,
|
|
15
|
+
usecols=lambda col: col != 'Unnamed: 1')
|
|
14
16
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
+
df.index.name = 'Time'
|
|
18
|
+
df.rename(columns={'AMB_TEMP': 'AT', 'WIND_SPEED': 'WS', 'WIND_DIREC': 'WD'}, inplace=True)
|
|
17
19
|
|
|
18
|
-
|
|
20
|
+
# 欄位排序
|
|
21
|
+
desired_order = ['SO2', 'NO', 'NOx', 'NO2', 'CO', 'O3', 'THC', 'NMHC', 'CH4', 'PM10', 'PM2.5', 'WS', 'WD',
|
|
22
|
+
'AT', 'RH']
|
|
23
|
+
|
|
24
|
+
missing_columns = []
|
|
25
|
+
|
|
26
|
+
for col in desired_order:
|
|
27
|
+
if col not in df.columns:
|
|
28
|
+
df[col] = np.nan
|
|
29
|
+
missing_columns.append(col)
|
|
30
|
+
|
|
31
|
+
if missing_columns:
|
|
32
|
+
self.logger.info(f"{'=' * 60}")
|
|
33
|
+
self.logger.info(f"Missing columns: {missing_columns}")
|
|
34
|
+
self.logger.info(f"{'=' * 60}")
|
|
35
|
+
print(f"Missing columns: {missing_columns}")
|
|
36
|
+
|
|
37
|
+
df = df[desired_order]
|
|
38
|
+
|
|
39
|
+
# 如果沒有將無效值拿掉就輸出 請將包含 #、L、O 的字串替換成 *
|
|
40
|
+
df.replace(to_replace=r'\d*[#LO]\b', value='*', regex=True, inplace=True)
|
|
41
|
+
df = df.apply(to_numeric, errors='coerce')
|
|
42
|
+
|
|
43
|
+
return df
|
|
44
|
+
|
|
45
|
+
def _QC(self, _df):
|
|
46
|
+
return _df
|
|
@@ -24,12 +24,5 @@ class Reader(AbstractReader):
|
|
|
24
24
|
return _df / 0.035
|
|
25
25
|
|
|
26
26
|
def _QC(self, _df):
|
|
27
|
-
# QC data in
|
|
28
|
-
|
|
29
|
-
_df_ave = _df_1hr.mean()
|
|
30
|
-
_df_std = _df_1hr.std()
|
|
31
|
-
_df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
|
|
32
|
-
|
|
33
|
-
return _df_1hr.mask(_df_lowb | _df_highb).copy()
|
|
34
|
-
|
|
35
|
-
return _df.resample('5min').apply(_QC_func).resample('1h').mean()
|
|
27
|
+
# QC data in 1h
|
|
28
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -8,22 +8,21 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class Reader(AbstractReader):
|
|
11
|
-
nam = '
|
|
11
|
+
nam = 'IGAC'
|
|
12
12
|
|
|
13
13
|
def _raw_reader(self, _file):
|
|
14
14
|
|
|
15
|
-
with
|
|
16
|
-
_df = read_csv(f, parse_dates=
|
|
15
|
+
with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
16
|
+
_df = read_csv(f, parse_dates=True, index_col=0, na_values='-').apply(to_numeric, errors='coerce')
|
|
17
17
|
|
|
18
18
|
_df.columns = _df.keys().str.strip(' ')
|
|
19
19
|
_df.index.name = 'time'
|
|
20
20
|
|
|
21
|
-
return _df.loc[_df.index.
|
|
21
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
22
22
|
|
|
23
|
-
## QC data
|
|
24
23
|
def _QC(self, _df):
|
|
25
24
|
|
|
26
|
-
|
|
25
|
+
# QC parameter, function (MDL SE LE)
|
|
27
26
|
_mdl = {
|
|
28
27
|
'Na+': 0.06,
|
|
29
28
|
'NH4+': 0.05,
|
|
@@ -35,7 +34,8 @@ class Reader(AbstractReader):
|
|
|
35
34
|
'NO3-': 0.11,
|
|
36
35
|
'SO42-': 0.08,
|
|
37
36
|
}
|
|
38
|
-
|
|
37
|
+
|
|
38
|
+
# _mdl.update(self._oth_set.get('mdl', {}))
|
|
39
39
|
|
|
40
40
|
def _se_le(_df_, _log=False):
|
|
41
41
|
_df_ = np.log10(_df_) if _log else _df_
|
|
@@ -51,27 +51,27 @@ class Reader(AbstractReader):
|
|
|
51
51
|
return 10 ** _se, 10 ** _le
|
|
52
52
|
return _se, _le
|
|
53
53
|
|
|
54
|
-
_cation, _anion, _main = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'],
|
|
55
|
-
|
|
56
|
-
|
|
54
|
+
_cation, _anion, _main = (['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'],
|
|
55
|
+
['Cl-', 'NO2-', 'NO3-', 'SO42-', ],
|
|
56
|
+
['SO42-', 'NO3-', 'NH4+'])
|
|
57
57
|
|
|
58
58
|
_df_salt = _df[_mdl.keys()].copy()
|
|
59
59
|
_df_pm = _df['PM2.5'].copy()
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
|
|
61
|
+
# lower than PM2.5
|
|
62
|
+
# conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
|
|
63
63
|
_df_salt = _df_salt.mask(_df_salt.sum(axis=1, min_count=1) > _df_pm).dropna(subset=_main).copy()
|
|
64
64
|
|
|
65
|
-
|
|
65
|
+
# mdl
|
|
66
66
|
for (_key, _df_col), _mdl_val in zip(_df_salt.items(), _mdl.values()):
|
|
67
67
|
_df_salt[_key] = _df_col.mask(_df_col < _mdl_val, _mdl_val / 2)
|
|
68
68
|
|
|
69
|
-
|
|
70
|
-
|
|
69
|
+
# calculate SE LE
|
|
70
|
+
# salt < LE
|
|
71
71
|
_se, _le = _se_le(_df_salt, _log=True)
|
|
72
72
|
_df_salt = _df_salt.mask(_df_salt > _le).copy()
|
|
73
73
|
|
|
74
|
-
|
|
74
|
+
# C/A, A/C
|
|
75
75
|
_rat_CA = (_df_salt[_cation].sum(axis=1) / _df_salt[_anion].sum(axis=1)).to_frame()
|
|
76
76
|
_rat_AC = (1 / _rat_CA).copy()
|
|
77
77
|
|
|
@@ -83,7 +83,7 @@ class Reader(AbstractReader):
|
|
|
83
83
|
|
|
84
84
|
_df_salt = _df_salt.where((_cond_CA * _cond_AC)[0]).copy()
|
|
85
85
|
|
|
86
|
-
|
|
86
|
+
# conc. of main salt > SE
|
|
87
87
|
_se, _le = _se_le(_df_salt[_main], _log=True)
|
|
88
88
|
_df_salt[_main] = _df_salt[_main].mask(_df_salt[_main] < _se).copy()
|
|
89
89
|
|
|
@@ -22,24 +22,17 @@ class Reader(AbstractReader):
|
|
|
22
22
|
'BB (%)': 'BB',
|
|
23
23
|
})
|
|
24
24
|
|
|
25
|
-
#
|
|
26
|
-
#
|
|
27
|
-
# _df = _df.where((_df['Status'] != 32) | (_df['Status'] != 65536)).copy()
|
|
25
|
+
# if self.meta.get('error_state', False):
|
|
26
|
+
# _df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
|
|
28
27
|
|
|
29
|
-
|
|
28
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE', 'BB']]
|
|
29
|
+
|
|
30
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
30
31
|
|
|
31
32
|
# QC data
|
|
32
33
|
def _QC(self, _df):
|
|
33
34
|
# remove negative value
|
|
34
35
|
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'AAE', 'BB']].mask((_df < 0).copy())
|
|
35
36
|
|
|
36
|
-
#
|
|
37
|
-
|
|
38
|
-
def _QC_func(_df_1hr):
|
|
39
|
-
_df_ave = _df_1hr.mean()
|
|
40
|
-
_df_std = _df_1hr.std()
|
|
41
|
-
_df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
|
|
42
|
-
|
|
43
|
-
return _df_1hr.mask(_df_lowb | _df_highb).copy()
|
|
44
|
-
|
|
45
|
-
return _df.resample('1h', group_keys=False).apply(_QC_func).resample('5min').mean()
|
|
37
|
+
# QC data in 1h
|
|
38
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from pandas import read_csv, to_datetime, to_numeric
|
|
3
|
+
|
|
4
|
+
from AeroViz.rawDataReader.core import AbstractReader
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Reader(AbstractReader):
|
|
8
|
+
nam = 'Minion'
|
|
9
|
+
|
|
10
|
+
def _raw_reader(self, _file):
|
|
11
|
+
with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
12
|
+
_df = read_csv(f, low_memory=False, index_col=0)
|
|
13
|
+
|
|
14
|
+
_df.index = to_datetime(_df.index, errors='coerce')
|
|
15
|
+
_df.index.name = 'time'
|
|
16
|
+
|
|
17
|
+
_df.columns = _df.keys().str.strip(' ')
|
|
18
|
+
|
|
19
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
20
|
+
|
|
21
|
+
def _QC(self, _df):
|
|
22
|
+
# XRF QAQC
|
|
23
|
+
_df = self.XRF_QAQC(_df)
|
|
24
|
+
|
|
25
|
+
# ions balance
|
|
26
|
+
_df = self.ions_balance(_df)
|
|
27
|
+
|
|
28
|
+
# remove negative value
|
|
29
|
+
_df = _df.mask((_df < 0).copy())
|
|
30
|
+
|
|
31
|
+
# QC data in 6h
|
|
32
|
+
return _df.resample('6h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
33
|
+
|
|
34
|
+
# base on Xact 625i Minimum Decision Limit (MDL) for XRF in ng/m3, 60 min sample time
|
|
35
|
+
def XRF_QAQC(self, df):
|
|
36
|
+
MDL = {
|
|
37
|
+
'Al': 100, 'Si': 18, 'P': 5.2, 'S': 3.2,
|
|
38
|
+
'Cl': 1.7, 'K': 1.2, 'Ca': 0.3, 'Ti': 1.6,
|
|
39
|
+
'V': 0.12, 'Cr': 0.12, 'Mn': 0.14, 'Fe': 0.17,
|
|
40
|
+
'Co': 0.14, 'Ni': 0.096, 'Cu': 0.079, 'Zn': 0.067,
|
|
41
|
+
'Ga': 0.059, 'Ge': 0.056, 'As': 0.063, 'Se': 0.081,
|
|
42
|
+
'Br': 0.1, 'Rb': 0.19, 'Sr': 0.22, 'Y': 0.28,
|
|
43
|
+
'Zr': 0.33, 'Nb': 0.41, 'Mo': 0.48, 'Ag': 1.9,
|
|
44
|
+
'Cd': 2.5, 'In': 3.1, 'Sn': 4.1, 'Sb': 5.2,
|
|
45
|
+
'Te': 0.6, 'I': 0.49, 'Cs': 0.37, 'Ba': 0.39,
|
|
46
|
+
'La': 0.36, 'Ce': 0.3, 'Pt': 0.12, 'Au': 0.1,
|
|
47
|
+
'Hg': 0.12, 'Tl': 0.12, 'Pb': 0.13, 'Bi': 0.13
|
|
48
|
+
}
|
|
49
|
+
# 將小於 MDL 值的數據替換為 NaN
|
|
50
|
+
for element, threshold in MDL.items():
|
|
51
|
+
if element in df.columns:
|
|
52
|
+
df[element] = df[element].where(df[element] >= threshold, np.nan)
|
|
53
|
+
|
|
54
|
+
self.logger.info(f"{'=' * 60}")
|
|
55
|
+
self.logger.info(f"XRF QAQC summary:")
|
|
56
|
+
self.logger.info("\t\ttransform values below MDL to NaN")
|
|
57
|
+
self.logger.info(f"{'=' * 60}")
|
|
58
|
+
|
|
59
|
+
return df
|
|
60
|
+
|
|
61
|
+
def ions_balance(self, df, tolerance=0.3):
|
|
62
|
+
"""
|
|
63
|
+
Calculate the balance of ions in the system
|
|
64
|
+
"""
|
|
65
|
+
# Define the ions
|
|
66
|
+
item = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+', 'F-', 'Cl-', 'NO2-', 'NO3-', 'PO43-', 'SO42-']
|
|
67
|
+
|
|
68
|
+
# Calculate the balance
|
|
69
|
+
_df = df[item].copy()
|
|
70
|
+
_df = _df.apply(lambda x: to_numeric(x, errors='coerce'))
|
|
71
|
+
_df['+_mole'] = _df[['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+']].div([23, 18, 39, (24 / 2), (40 / 2)]).sum(axis=1,
|
|
72
|
+
skipna=True)
|
|
73
|
+
_df['-_mole'] = _df[['Cl-', 'NO2-', 'NO3-', 'SO42-']].div([35.5, 46, 62, (96 / 2)]).sum(axis=1, skipna=True)
|
|
74
|
+
|
|
75
|
+
# Avoid division by zero
|
|
76
|
+
_df['ratio'] = np.where(_df['-_mole'] != 0, _df['+_mole'] / _df['-_mole'], np.nan)
|
|
77
|
+
|
|
78
|
+
# Calculate bounds
|
|
79
|
+
lower_bound, upper_bound = 1 - tolerance, 1 + tolerance
|
|
80
|
+
|
|
81
|
+
# 根据ratio决定是否保留原始数据
|
|
82
|
+
valid_mask = (
|
|
83
|
+
(_df['ratio'] <= upper_bound) &
|
|
84
|
+
(_df['ratio'] >= lower_bound) &
|
|
85
|
+
~np.isnan(_df['+_mole']) &
|
|
86
|
+
~np.isnan(_df['-_mole'])
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# 保留数据或将不符合条件的行设为NaN
|
|
90
|
+
df.loc[~valid_mask, item] = np.nan
|
|
91
|
+
|
|
92
|
+
# 计算保留的数据的百分比
|
|
93
|
+
retained_percentage = (valid_mask.sum() / len(df)) * 100
|
|
94
|
+
|
|
95
|
+
self.logger.info(f"{'=' * 60}")
|
|
96
|
+
self.logger.info(f"Ions balance summary:")
|
|
97
|
+
self.logger.info(f"\t\tretain {retained_percentage.__round__(0)}% data within tolerance {tolerance}")
|
|
98
|
+
self.logger.info(f"{'=' * 60}")
|
|
99
|
+
|
|
100
|
+
if retained_percentage < 70:
|
|
101
|
+
self.logger.warning("Warning: The percentage of retained data is less than 70%")
|
|
102
|
+
|
|
103
|
+
return df
|
|
@@ -13,21 +13,24 @@ class Reader(AbstractReader):
|
|
|
13
13
|
_df_grp = _df.groupby(0)
|
|
14
14
|
|
|
15
15
|
# T : time
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
_idx_tm = to_datetime((_df_tm[1] + _df_tm[2] + _df_tm[3] + _df_tm[4] + _df_tm[5] + _df_tm[6]),
|
|
23
|
-
format='%Y%m%d%H%M%S')
|
|
16
|
+
_idx_tm = to_datetime(
|
|
17
|
+
_df_grp.get_group('T')[[1, 2, 3, 4, 5, 6]]
|
|
18
|
+
.map(lambda x: f"{int(x):02d}")
|
|
19
|
+
.agg(''.join, axis=1),
|
|
20
|
+
format='%Y%m%d%H%M%S'
|
|
21
|
+
)
|
|
24
22
|
|
|
25
23
|
# D : data
|
|
26
24
|
# col : 3~8 B G R BB BG BR
|
|
27
25
|
# 1e6
|
|
28
26
|
try:
|
|
29
27
|
_df_dt = _df_grp.get_group('D')[[1, 2, 3, 4, 5, 6, 7, 8]].set_index(_idx_tm)
|
|
30
|
-
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
_df_out = (_df_dt.groupby(1).get_group('NBXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm)
|
|
31
|
+
except KeyError:
|
|
32
|
+
_df_out = (_df_dt.groupby(1).get_group('NTXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm)
|
|
33
|
+
|
|
31
34
|
_df_out.columns = ['B', 'G', 'R', 'BB', 'BG', 'BR']
|
|
32
35
|
_df_out.index.name = 'Time'
|
|
33
36
|
|
|
@@ -39,24 +42,24 @@ class Reader(AbstractReader):
|
|
|
39
42
|
|
|
40
43
|
_df_out.mask(_df_out['status'] != 0) # 0000 -> numeric to 0
|
|
41
44
|
|
|
42
|
-
|
|
45
|
+
_df = _df_out[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
|
|
46
|
+
|
|
47
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
43
48
|
|
|
44
49
|
except ValueError:
|
|
45
50
|
group_sizes = _df_grp.size()
|
|
46
51
|
print(group_sizes)
|
|
47
|
-
# Define the valid groups
|
|
48
|
-
valid_groups = {'B', 'G', 'R', 'D', 'T', 'Y', 'Z'}
|
|
49
52
|
|
|
50
|
-
#
|
|
53
|
+
# Define valid groups and find invalid indices
|
|
54
|
+
valid_groups = {'B', 'G', 'R', 'D', 'T', 'Y', 'Z'}
|
|
51
55
|
invalid_indices = _df[~_df[0].isin(valid_groups)].index
|
|
52
56
|
|
|
53
|
-
# Print
|
|
54
|
-
invalid_values = _df.loc[invalid_indices, 0]
|
|
57
|
+
# Print invalid indices and values
|
|
55
58
|
print("Invalid values and their indices:")
|
|
56
|
-
for idx
|
|
57
|
-
print(f"Index: {idx}, Value: {
|
|
59
|
+
for idx in invalid_indices:
|
|
60
|
+
print(f"Index: {idx}, Value: {_df.at[idx, 0]}")
|
|
58
61
|
|
|
59
|
-
#
|
|
62
|
+
# Return an empty DataFrame with specified columns if there's a length mismatch
|
|
60
63
|
columns = ['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']
|
|
61
64
|
_df_out = DataFrame(index=_idx_tm, columns=columns)
|
|
62
65
|
_df_out.index.name = 'Time'
|
|
@@ -66,15 +69,7 @@ class Reader(AbstractReader):
|
|
|
66
69
|
# QC data
|
|
67
70
|
def _QC(self, _df):
|
|
68
71
|
# remove negative value
|
|
69
|
-
_df = _df.mask((_df <=
|
|
70
|
-
|
|
71
|
-
# call by _QC function
|
|
72
|
-
# QC data in 1 hr
|
|
73
|
-
def _QC_func(_df_1hr):
|
|
74
|
-
_df_ave = _df_1hr.mean()
|
|
75
|
-
_df_std = _df_1hr.std()
|
|
76
|
-
_df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
|
|
77
|
-
|
|
78
|
-
return _df_1hr.mask(_df_lowb | _df_highb).copy()
|
|
72
|
+
_df = _df.mask((_df <= 5).copy())
|
|
79
73
|
|
|
80
|
-
|
|
74
|
+
# QC data in 1h
|
|
75
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -21,19 +21,19 @@ class Reader(AbstractReader):
|
|
|
21
21
|
_df_idx = to_datetime(_df.index, errors='coerce')
|
|
22
22
|
return _df[_newkey.keys()].rename(_newkey, axis=1).set_index(_df_idx).loc[_df_idx.dropna()]
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
# QC data
|
|
25
25
|
def _QC(self, _df):
|
|
26
26
|
import numpy as n
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
# mask out the data size lower than 7
|
|
29
29
|
_df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
|
|
30
30
|
_df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
|
|
31
31
|
_df = _df.mask(_df_size < 7)
|
|
32
32
|
|
|
33
|
-
|
|
33
|
+
# remove total conc. lower than 2000
|
|
34
34
|
_df = _df.mask(_df['total'] < 2000)
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
# remove the bin over 400 nm which num. conc. larger than 4000
|
|
37
37
|
_df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
|
|
38
38
|
|
|
39
39
|
_df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
|
|
@@ -21,29 +21,29 @@ class Reader(AbstractReader):
|
|
|
21
21
|
_df = read_csv(f, skiprows=skiprows)
|
|
22
22
|
_tm_idx = to_datetime(_df['DateTime Sample Start'], format='%d/%m/%Y %X', errors='coerce')
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
# index
|
|
25
25
|
_df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()]
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
# keys
|
|
28
28
|
_key = to_numeric(_df.keys(), errors='coerce')
|
|
29
29
|
_df.columns = _key
|
|
30
30
|
_df = _df.loc[:, ~_key.isna()]
|
|
31
31
|
|
|
32
32
|
return _df.apply(to_numeric, errors='coerce')
|
|
33
33
|
|
|
34
|
-
|
|
34
|
+
# QC data
|
|
35
35
|
def _QC(self, _df):
|
|
36
36
|
import numpy as n
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
# mask out the data size lower than 7
|
|
39
39
|
_df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
|
|
40
40
|
_df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
|
|
41
41
|
_df = _df.mask(_df_size < 7)
|
|
42
42
|
|
|
43
|
-
|
|
43
|
+
# remove total conc. lower than 2000
|
|
44
44
|
_df = _df.mask(_df['total'] < 2000)
|
|
45
45
|
|
|
46
|
-
|
|
46
|
+
# remove the bin over 400 nm which num. conc. larger than 4000
|
|
47
47
|
_df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
|
|
48
48
|
|
|
49
49
|
_df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
|
|
@@ -21,29 +21,29 @@ class Reader(AbstractReader):
|
|
|
21
21
|
_df = read_table(f, skiprows=skiprows)
|
|
22
22
|
_tm_idx = to_datetime(_df['Date'] + _df['Start Time'], format='%m/%d/%y%X', errors='coerce')
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
# index
|
|
25
25
|
_df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()]
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
# keys
|
|
28
28
|
_key = to_numeric(_df.keys(), errors='coerce')
|
|
29
29
|
_df.columns = _key
|
|
30
30
|
_df = _df.loc[:, ~_key.isna()]
|
|
31
31
|
|
|
32
32
|
return _df.apply(to_numeric, errors='coerce')
|
|
33
33
|
|
|
34
|
-
|
|
34
|
+
# QC data
|
|
35
35
|
def _QC(self, _df):
|
|
36
36
|
import numpy as n
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
# mask out the data size lower than 7
|
|
39
39
|
_df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
|
|
40
40
|
_df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
|
|
41
41
|
_df = _df.mask(_df_size < 7)
|
|
42
42
|
|
|
43
|
-
|
|
43
|
+
# remove total conc. lower than 2000
|
|
44
44
|
_df = _df.mask(_df['total'] < 2000)
|
|
45
45
|
|
|
46
|
-
|
|
46
|
+
# remove the bin over 400 nm which num. conc. larger than 4000
|
|
47
47
|
_df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
|
|
48
48
|
|
|
49
49
|
_df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from pandas import to_datetime, read_csv
|
|
2
|
+
|
|
3
|
+
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Reader(AbstractReader):
|
|
7
|
+
nam = 'Sunset_OCEC'
|
|
8
|
+
|
|
9
|
+
def _raw_reader(self, _file):
|
|
10
|
+
with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
|
+
_df = read_csv(f, skiprows=3)
|
|
12
|
+
|
|
13
|
+
_df['Start Date/Time'] = _df['Start Date/Time'].str.strip()
|
|
14
|
+
_df['time'] = to_datetime(_df['Start Date/Time'], format='%m/%d/%Y %I:%M:%S %p', errors='coerce')
|
|
15
|
+
_df = _df.set_index('time')
|
|
16
|
+
|
|
17
|
+
_df = _df.rename(columns={
|
|
18
|
+
'Thermal/Optical OC (ugC/LCm^3)': 'Thermal_OC',
|
|
19
|
+
'OC ugC/m^3 (Thermal/Optical)': 'Thermal_OC',
|
|
20
|
+
|
|
21
|
+
'Thermal/Optical EC (ugC/LCm^3)': 'Thermal_EC',
|
|
22
|
+
'EC ugC/m^3 (Thermal/Optical)': 'Thermal_EC',
|
|
23
|
+
|
|
24
|
+
'OC=TC-BC (ugC/LCm^3)': 'Optical_OC',
|
|
25
|
+
'OC by diff ugC (TC-OptEC)': 'Optical_OC',
|
|
26
|
+
|
|
27
|
+
'BC (ugC/LCm^3)': 'Optical_EC',
|
|
28
|
+
'OptEC ugC/m^3': 'Optical_EC',
|
|
29
|
+
|
|
30
|
+
'Sample Volume Local Condition Actual m^3': 'Sample_Volume',
|
|
31
|
+
'TC (ugC/LCm^3)': 'TC',
|
|
32
|
+
'TC ugC/m^3': 'TC',
|
|
33
|
+
'OCPk1-ug C': 'OC1',
|
|
34
|
+
'OCPk2-ug C': 'OC2',
|
|
35
|
+
'OCPk3-ug C': 'OC3',
|
|
36
|
+
'OCPk4-ug C': 'OC4',
|
|
37
|
+
'Pyrolized C ug': 'PC'
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
_df = _df[['Thermal_OC', 'Optical_OC', 'Thermal_EC', 'Optical_EC', 'TC', 'OC1', 'OC2', 'OC3', 'OC4']]
|
|
41
|
+
|
|
42
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
43
|
+
|
|
44
|
+
# QC data
|
|
45
|
+
def _QC(self, _df):
|
|
46
|
+
import numpy as np
|
|
47
|
+
|
|
48
|
+
_df = _df.where(_df > 0)
|
|
49
|
+
|
|
50
|
+
thresholds = {
|
|
51
|
+
'Thermal_OC': 0.3,
|
|
52
|
+
'Optical_OC': 0.3,
|
|
53
|
+
'Thermal_EC': 0.015,
|
|
54
|
+
'Optical_EC': 0.015
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
for col, thresh in thresholds.items():
|
|
58
|
+
_df.loc[_df[col] <= thresh, col] = np.nan
|
|
59
|
+
|
|
60
|
+
return _df
|
|
@@ -27,18 +27,20 @@ class Reader(AbstractReader):
|
|
|
27
27
|
|
|
28
28
|
_df = _df.where(_df['status'] < 1e-7)
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
_df = _df[['PM_NV', 'PM_Total', 'noise', ]]
|
|
31
31
|
|
|
32
|
-
|
|
32
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
33
|
+
|
|
34
|
+
# QC data
|
|
33
35
|
def _QC(self, _df):
|
|
34
36
|
|
|
35
37
|
_df_idx = _df.index.copy()
|
|
36
38
|
|
|
37
|
-
|
|
38
|
-
_df = _df.where(_df.noise < 0.01)[['PM_NV', 'PM_Total']].mask((_df
|
|
39
|
+
# remove negative value
|
|
40
|
+
_df = _df.where(_df.noise < 0.01)[['PM_NV', 'PM_Total']].mask((_df <= 0).copy())
|
|
39
41
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
+
# QC data in 1 hr
|
|
43
|
+
# remove data where size < 8 in 1-hr
|
|
42
44
|
for _key in ['PM_Total', 'PM_NV']:
|
|
43
45
|
_size = _df[_key].dropna().resample('1h').size().reindex(_df_idx).ffill().copy()
|
|
44
46
|
_df[_key] = _df[_key].mask(_size < 8)
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# read meteorological data from google sheet
|
|
2
2
|
|
|
3
|
-
|
|
4
3
|
from pandas import read_csv, to_datetime
|
|
5
4
|
|
|
6
5
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
@@ -10,9 +9,7 @@ class Reader(AbstractReader):
|
|
|
10
9
|
nam = 'Table'
|
|
11
10
|
|
|
12
11
|
def _raw_reader(self, _file):
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
12
|
+
with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
16
13
|
_df = read_csv(f, low_memory=False, index_col=0)
|
|
17
14
|
|
|
18
15
|
_df.index = to_datetime(_df.index, errors='coerce', format=self._oth_set.get('date_format') or 'mixed')
|
|
@@ -20,9 +17,11 @@ class Reader(AbstractReader):
|
|
|
20
17
|
|
|
21
18
|
_df.columns = _df.keys().str.strip(' ')
|
|
22
19
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
return _df.loc[~_df.index.duplicated()]
|
|
20
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
26
21
|
|
|
27
22
|
def _QC(self, _df):
|
|
28
|
-
|
|
23
|
+
# remove negative value
|
|
24
|
+
_df = _df.mask((_df < 0).copy())
|
|
25
|
+
|
|
26
|
+
# QC data in 6h
|
|
27
|
+
return _df.resample('6h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
|
|
2
|
+
from pandas import read_csv
|
|
3
|
+
|
|
4
|
+
from AeroViz.rawDataReader.core import AbstractReader
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Reader(AbstractReader):
|
|
8
|
+
nam = 'VOC'
|
|
9
|
+
|
|
10
|
+
def _raw_reader(self, _file):
|
|
11
|
+
with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
12
|
+
_df = read_csv(f, parse_dates=[0], index_col=[0], na_values=('-', 'N.D.'))
|
|
13
|
+
|
|
14
|
+
_df.columns = _df.keys().str.strip(' ')
|
|
15
|
+
_df.index.name = 'time'
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
_df = _df[self.meta["key"]].loc[_df.index.dropna()]
|
|
19
|
+
|
|
20
|
+
except KeyError:
|
|
21
|
+
_df = _df[self.meta["key_2"]].loc[_df.index.dropna()]
|
|
22
|
+
|
|
23
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
24
|
+
|
|
25
|
+
def _QC(self, _df):
|
|
26
|
+
return _df
|
|
@@ -1,22 +1,20 @@
|
|
|
1
1
|
__all__ = [
|
|
2
2
|
'NEPH',
|
|
3
3
|
'Aurora',
|
|
4
|
-
'Table',
|
|
5
|
-
'EPA_vertical',
|
|
6
|
-
'APS_3321',
|
|
7
4
|
'SMPS_TH',
|
|
5
|
+
'SMPS_genr',
|
|
6
|
+
'SMPS_aim11',
|
|
7
|
+
'APS_3321',
|
|
8
|
+
'GRIMM',
|
|
8
9
|
'AE33',
|
|
9
10
|
'AE43',
|
|
10
11
|
'BC1054',
|
|
11
12
|
'MA350',
|
|
12
13
|
'TEOM',
|
|
13
|
-
'
|
|
14
|
-
'
|
|
15
|
-
'
|
|
16
|
-
'
|
|
17
|
-
'
|
|
18
|
-
'
|
|
19
|
-
'SMPS_genr',
|
|
20
|
-
'SMPS_aim11',
|
|
21
|
-
'GRIMM'
|
|
14
|
+
'Sunset_OCEC',
|
|
15
|
+
'IGAC',
|
|
16
|
+
'VOC',
|
|
17
|
+
'Table',
|
|
18
|
+
'EPA_vertical',
|
|
19
|
+
'Minion'
|
|
22
20
|
]
|