AeroViz 0.1.3b0__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/__init__.py +5 -3
- AeroViz/{config → data}/DEFAULT_DATA.csv +1 -1
- AeroViz/dataProcess/Chemistry/__init__.py +28 -27
- AeroViz/dataProcess/Chemistry/_isoropia.py +11 -11
- AeroViz/dataProcess/Chemistry/_mass_volume.py +15 -18
- AeroViz/dataProcess/Chemistry/_ocec.py +21 -46
- AeroViz/dataProcess/Chemistry/_teom.py +2 -1
- AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
- AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
- AeroViz/dataProcess/Optical/Angstrom_exponent.py +20 -0
- AeroViz/dataProcess/Optical/_IMPROVE.py +13 -15
- AeroViz/dataProcess/Optical/__init__.py +15 -30
- AeroViz/dataProcess/Optical/_absorption.py +21 -47
- AeroViz/dataProcess/Optical/_extinction.py +20 -15
- AeroViz/dataProcess/Optical/_mie.py +0 -1
- AeroViz/dataProcess/Optical/_scattering.py +19 -20
- AeroViz/dataProcess/Optical/fRH.pkl +0 -0
- AeroViz/dataProcess/SizeDistr/__init__.py +7 -7
- AeroViz/dataProcess/SizeDistr/_merge.py +2 -2
- AeroViz/dataProcess/SizeDistr/_merge_v1.py +2 -2
- AeroViz/dataProcess/SizeDistr/_merge_v2.py +2 -2
- AeroViz/dataProcess/SizeDistr/_merge_v3.py +1 -1
- AeroViz/dataProcess/SizeDistr/_merge_v4.py +1 -1
- AeroViz/dataProcess/VOC/__init__.py +4 -9
- AeroViz/dataProcess/VOC/_potential_par.py +71 -37
- AeroViz/dataProcess/VOC/{voc_par.json → support_voc.json} +321 -339
- AeroViz/dataProcess/__init__.py +28 -6
- AeroViz/dataProcess/core/__init__.py +10 -17
- AeroViz/plot/__init__.py +1 -1
- AeroViz/plot/box.py +2 -1
- AeroViz/plot/optical/optical.py +4 -4
- AeroViz/plot/regression.py +25 -39
- AeroViz/plot/scatter.py +68 -2
- AeroViz/plot/templates/__init__.py +2 -1
- AeroViz/plot/templates/ammonium_rich.py +34 -0
- AeroViz/plot/templates/diurnal_pattern.py +11 -9
- AeroViz/plot/templates/koschmieder.py +51 -115
- AeroViz/plot/templates/metal_heatmap.py +115 -17
- AeroViz/plot/timeseries/__init__.py +1 -0
- AeroViz/plot/timeseries/template.py +47 -0
- AeroViz/plot/timeseries/timeseries.py +275 -208
- AeroViz/plot/utils/plt_utils.py +2 -2
- AeroViz/plot/utils/units.json +5 -0
- AeroViz/plot/violin.py +9 -8
- AeroViz/process/__init__.py +2 -2
- AeroViz/process/script/AbstractDistCalc.py +1 -1
- AeroViz/process/script/Chemical.py +5 -4
- AeroViz/process/script/Others.py +1 -1
- AeroViz/rawDataReader/__init__.py +66 -22
- AeroViz/rawDataReader/{utils/config.py → config/supported_instruments.py} +33 -54
- AeroViz/rawDataReader/core/__init__.py +116 -231
- AeroViz/rawDataReader/script/AE33.py +12 -13
- AeroViz/rawDataReader/script/AE43.py +10 -13
- AeroViz/rawDataReader/script/APS_3321.py +8 -8
- AeroViz/rawDataReader/script/Aurora.py +21 -19
- AeroViz/rawDataReader/script/BC1054.py +13 -17
- AeroViz/rawDataReader/script/EPA_vertical.py +36 -8
- AeroViz/rawDataReader/script/GRIMM.py +6 -13
- AeroViz/rawDataReader/script/{IGAC_ZM.py → IGAC.py} +18 -18
- AeroViz/rawDataReader/script/MA350.py +9 -16
- AeroViz/rawDataReader/script/Minion.py +103 -0
- AeroViz/rawDataReader/script/NEPH.py +28 -38
- AeroViz/rawDataReader/script/SMPS_TH.py +6 -6
- AeroViz/rawDataReader/script/SMPS_aim11.py +8 -8
- AeroViz/rawDataReader/script/SMPS_genr.py +8 -8
- AeroViz/rawDataReader/script/Sunset_OCEC.py +66 -0
- AeroViz/rawDataReader/script/TEOM.py +10 -8
- AeroViz/rawDataReader/script/Table.py +9 -10
- AeroViz/rawDataReader/script/VOC.py +33 -0
- AeroViz/rawDataReader/script/__init__.py +10 -12
- AeroViz/tools/database.py +7 -9
- AeroViz/tools/datareader.py +3 -3
- {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.5.dist-info}/METADATA +1 -1
- AeroViz-0.1.5.dist-info/RECORD +114 -0
- AeroViz/rawDataReader/script/IGAC_TH.py +0 -104
- AeroViz/rawDataReader/script/OCEC_LCRES.py +0 -34
- AeroViz/rawDataReader/script/OCEC_RES.py +0 -28
- AeroViz/rawDataReader/script/VOC_TH.py +0 -30
- AeroViz/rawDataReader/script/VOC_ZM.py +0 -37
- AeroViz-0.1.3b0.dist-info/RECORD +0 -110
- /AeroViz/{config → data}/DEFAULT_PNSD_DATA.csv +0 -0
- /AeroViz/rawDataReader/{utils → config}/__init__.py +0 -0
- {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.5.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.5.dist-info}/WHEEL +0 -0
- {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.5.dist-info}/top_level.txt +0 -0
|
@@ -6,9 +6,11 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'BC1054'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
with open(
|
|
11
|
-
_df = read_csv(f, parse_dates=
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
|
+
_df = read_csv(f, parse_dates=True, index_col=0)
|
|
12
|
+
|
|
13
|
+
_df.columns = _df.columns.str.replace(' ', '')
|
|
12
14
|
|
|
13
15
|
_df = _df.rename(columns={
|
|
14
16
|
'BC1(ng/m3)': 'BC1',
|
|
@@ -23,24 +25,18 @@ class Reader(AbstractReader):
|
|
|
23
25
|
'BC10(ng/m3)': 'BC10'
|
|
24
26
|
})
|
|
25
27
|
|
|
26
|
-
# remove data without Status=32 (Automatic Tape Advance), 65536 (Tape Move)
|
|
27
|
-
|
|
28
|
-
|
|
28
|
+
# remove data without Status=1, 8, 16, 32 (Automatic Tape Advance), 65536 (Tape Move)
|
|
29
|
+
if self.meta.get('error_state', False):
|
|
30
|
+
_df = _df[~_df['Status'].isin(self.meta.get('error_state'))]
|
|
31
|
+
|
|
32
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']]
|
|
29
33
|
|
|
30
|
-
return _df[
|
|
34
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
31
35
|
|
|
32
36
|
# QC data
|
|
33
37
|
def _QC(self, _df):
|
|
34
38
|
# remove negative value
|
|
35
39
|
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']].mask((_df < 0).copy())
|
|
36
40
|
|
|
37
|
-
#
|
|
38
|
-
|
|
39
|
-
def _QC_func(_df_1hr):
|
|
40
|
-
_df_ave = _df_1hr.mean()
|
|
41
|
-
_df_std = _df_1hr.std()
|
|
42
|
-
_df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
|
|
43
|
-
|
|
44
|
-
return _df_1hr.mask(_df_lowb | _df_highb).copy()
|
|
45
|
-
|
|
46
|
-
return _df.resample('1h', group_keys=False).apply(_QC_func).resample('5min').mean()
|
|
41
|
+
# QC data in 1h
|
|
42
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import numpy as np
|
|
1
2
|
from pandas import read_csv, to_numeric
|
|
2
3
|
|
|
3
4
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
@@ -6,13 +7,40 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
7
|
class Reader(AbstractReader):
|
|
7
8
|
nam = 'EPA_vertical'
|
|
8
9
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
with
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
def _raw_reader(self, file):
|
|
11
|
+
with file.open('r', encoding='ascii', errors='ignore') as f:
|
|
12
|
+
# 有、無輸出有效值都可以
|
|
13
|
+
# read 查詢小時值(測項).csv
|
|
14
|
+
df = read_csv(f, encoding='ascii', encoding_errors='ignore', index_col=0, parse_dates=True,
|
|
15
|
+
usecols=lambda col: col != 'Unnamed: 1')
|
|
14
16
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
+
df.index.name = 'Time'
|
|
18
|
+
df.rename(columns={'AMB_TEMP': 'AT', 'WIND_SPEED': 'WS', 'WIND_DIREC': 'WD'}, inplace=True)
|
|
17
19
|
|
|
18
|
-
|
|
20
|
+
# 欄位排序
|
|
21
|
+
desired_order = ['SO2', 'NO', 'NOx', 'NO2', 'CO', 'O3', 'THC', 'NMHC', 'CH4', 'PM10', 'PM2.5', 'WS', 'WD',
|
|
22
|
+
'AT', 'RH']
|
|
23
|
+
|
|
24
|
+
missing_columns = []
|
|
25
|
+
|
|
26
|
+
for col in desired_order:
|
|
27
|
+
if col not in df.columns:
|
|
28
|
+
df[col] = np.nan
|
|
29
|
+
missing_columns.append(col)
|
|
30
|
+
|
|
31
|
+
if missing_columns:
|
|
32
|
+
self.logger.info(f"{'=' * 60}")
|
|
33
|
+
self.logger.info(f"Missing columns: {missing_columns}")
|
|
34
|
+
self.logger.info(f"{'=' * 60}")
|
|
35
|
+
print(f"Missing columns: {missing_columns}")
|
|
36
|
+
|
|
37
|
+
df = df[desired_order]
|
|
38
|
+
|
|
39
|
+
# 如果沒有將無效值拿掉就輸出 請將包含 #、L、O 的字串替換成 *
|
|
40
|
+
df.replace(to_replace=r'\d*[#LO]\b', value='*', regex=True, inplace=True)
|
|
41
|
+
df = df.apply(to_numeric, errors='coerce')
|
|
42
|
+
|
|
43
|
+
return df
|
|
44
|
+
|
|
45
|
+
def _QC(self, _df):
|
|
46
|
+
return _df
|
|
@@ -6,30 +6,23 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'GRIMM'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
10
|
|
|
11
|
-
_df = read_csv(
|
|
11
|
+
_df = read_csv(file, header=233, delimiter='\t', index_col=0, parse_dates=[0], encoding='ISO-8859-1',
|
|
12
12
|
dayfirst=True).rename_axis("Time")
|
|
13
13
|
_df.index = to_datetime(_df.index, format="%d/%m/%Y %H:%M:%S", dayfirst=True)
|
|
14
14
|
|
|
15
|
-
if
|
|
15
|
+
if file.name.startswith("A407ST"):
|
|
16
16
|
_df.drop(_df.columns[0:11].tolist() + _df.columns[128:].tolist(), axis=1, inplace=True)
|
|
17
17
|
else:
|
|
18
18
|
_df.drop(_df.columns[0:11].tolist() + _df.columns[-5:].tolist(), axis=1, inplace=True)
|
|
19
19
|
|
|
20
20
|
if _df.empty:
|
|
21
|
-
print(
|
|
21
|
+
print(file, "is empty")
|
|
22
22
|
return None
|
|
23
23
|
|
|
24
24
|
return _df / 0.035
|
|
25
25
|
|
|
26
26
|
def _QC(self, _df):
|
|
27
|
-
# QC data in
|
|
28
|
-
|
|
29
|
-
_df_ave = _df_1hr.mean()
|
|
30
|
-
_df_std = _df_1hr.std()
|
|
31
|
-
_df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
|
|
32
|
-
|
|
33
|
-
return _df_1hr.mask(_df_lowb | _df_highb).copy()
|
|
34
|
-
|
|
35
|
-
return _df.resample('5min').apply(_QC_func).resample('1h').mean()
|
|
27
|
+
# QC data in 1h
|
|
28
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -8,22 +8,21 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class Reader(AbstractReader):
|
|
11
|
-
nam = '
|
|
11
|
+
nam = 'IGAC'
|
|
12
12
|
|
|
13
|
-
def _raw_reader(self,
|
|
13
|
+
def _raw_reader(self, file):
|
|
14
14
|
|
|
15
|
-
with
|
|
16
|
-
_df = read_csv(f, parse_dates=
|
|
15
|
+
with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
16
|
+
_df = read_csv(f, parse_dates=True, index_col=0, na_values='-').apply(to_numeric, errors='coerce')
|
|
17
17
|
|
|
18
18
|
_df.columns = _df.keys().str.strip(' ')
|
|
19
19
|
_df.index.name = 'time'
|
|
20
20
|
|
|
21
|
-
return _df.loc[_df.index.
|
|
21
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
22
22
|
|
|
23
|
-
## QC data
|
|
24
23
|
def _QC(self, _df):
|
|
25
24
|
|
|
26
|
-
|
|
25
|
+
# QC parameter, function (MDL SE LE)
|
|
27
26
|
_mdl = {
|
|
28
27
|
'Na+': 0.06,
|
|
29
28
|
'NH4+': 0.05,
|
|
@@ -35,7 +34,8 @@ class Reader(AbstractReader):
|
|
|
35
34
|
'NO3-': 0.11,
|
|
36
35
|
'SO42-': 0.08,
|
|
37
36
|
}
|
|
38
|
-
|
|
37
|
+
|
|
38
|
+
# _mdl.update(self._oth_set.get('mdl', {}))
|
|
39
39
|
|
|
40
40
|
def _se_le(_df_, _log=False):
|
|
41
41
|
_df_ = np.log10(_df_) if _log else _df_
|
|
@@ -51,27 +51,27 @@ class Reader(AbstractReader):
|
|
|
51
51
|
return 10 ** _se, 10 ** _le
|
|
52
52
|
return _se, _le
|
|
53
53
|
|
|
54
|
-
_cation, _anion, _main = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'],
|
|
55
|
-
|
|
56
|
-
|
|
54
|
+
_cation, _anion, _main = (['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'],
|
|
55
|
+
['Cl-', 'NO2-', 'NO3-', 'SO42-', ],
|
|
56
|
+
['SO42-', 'NO3-', 'NH4+'])
|
|
57
57
|
|
|
58
58
|
_df_salt = _df[_mdl.keys()].copy()
|
|
59
59
|
_df_pm = _df['PM2.5'].copy()
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
|
|
61
|
+
# lower than PM2.5
|
|
62
|
+
# conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
|
|
63
63
|
_df_salt = _df_salt.mask(_df_salt.sum(axis=1, min_count=1) > _df_pm).dropna(subset=_main).copy()
|
|
64
64
|
|
|
65
|
-
|
|
65
|
+
# mdl
|
|
66
66
|
for (_key, _df_col), _mdl_val in zip(_df_salt.items(), _mdl.values()):
|
|
67
67
|
_df_salt[_key] = _df_col.mask(_df_col < _mdl_val, _mdl_val / 2)
|
|
68
68
|
|
|
69
|
-
|
|
70
|
-
|
|
69
|
+
# calculate SE LE
|
|
70
|
+
# salt < LE
|
|
71
71
|
_se, _le = _se_le(_df_salt, _log=True)
|
|
72
72
|
_df_salt = _df_salt.mask(_df_salt > _le).copy()
|
|
73
73
|
|
|
74
|
-
|
|
74
|
+
# C/A, A/C
|
|
75
75
|
_rat_CA = (_df_salt[_cation].sum(axis=1) / _df_salt[_anion].sum(axis=1)).to_frame()
|
|
76
76
|
_rat_AC = (1 / _rat_CA).copy()
|
|
77
77
|
|
|
@@ -83,7 +83,7 @@ class Reader(AbstractReader):
|
|
|
83
83
|
|
|
84
84
|
_df_salt = _df_salt.where((_cond_CA * _cond_AC)[0]).copy()
|
|
85
85
|
|
|
86
|
-
|
|
86
|
+
# conc. of main salt > SE
|
|
87
87
|
_se, _le = _se_le(_df_salt[_main], _log=True)
|
|
88
88
|
_df_salt[_main] = _df_salt[_main].mask(_df_salt[_main] < _se).copy()
|
|
89
89
|
|
|
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'MA350'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
_df = read_csv(
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
_df = read_csv(file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis("Time")
|
|
11
11
|
|
|
12
12
|
_df = _df.rename(columns={
|
|
13
13
|
'UV BCc': 'BC1',
|
|
@@ -22,24 +22,17 @@ class Reader(AbstractReader):
|
|
|
22
22
|
'BB (%)': 'BB',
|
|
23
23
|
})
|
|
24
24
|
|
|
25
|
-
#
|
|
26
|
-
#
|
|
27
|
-
# _df = _df.where((_df['Status'] != 32) | (_df['Status'] != 65536)).copy()
|
|
25
|
+
# if self.meta.get('error_state', False):
|
|
26
|
+
# _df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
|
|
28
27
|
|
|
29
|
-
|
|
28
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE', 'BB']]
|
|
29
|
+
|
|
30
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
30
31
|
|
|
31
32
|
# QC data
|
|
32
33
|
def _QC(self, _df):
|
|
33
34
|
# remove negative value
|
|
34
35
|
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'AAE', 'BB']].mask((_df < 0).copy())
|
|
35
36
|
|
|
36
|
-
#
|
|
37
|
-
|
|
38
|
-
def _QC_func(_df_1hr):
|
|
39
|
-
_df_ave = _df_1hr.mean()
|
|
40
|
-
_df_std = _df_1hr.std()
|
|
41
|
-
_df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
|
|
42
|
-
|
|
43
|
-
return _df_1hr.mask(_df_lowb | _df_highb).copy()
|
|
44
|
-
|
|
45
|
-
return _df.resample('1h', group_keys=False).apply(_QC_func).resample('5min').mean()
|
|
37
|
+
# QC data in 1h
|
|
38
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from pandas import read_csv, to_datetime, to_numeric
|
|
3
|
+
|
|
4
|
+
from AeroViz.rawDataReader.core import AbstractReader
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Reader(AbstractReader):
|
|
8
|
+
nam = 'Minion'
|
|
9
|
+
|
|
10
|
+
def _raw_reader(self, file):
|
|
11
|
+
with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
12
|
+
_df = read_csv(f, low_memory=False, index_col=0)
|
|
13
|
+
|
|
14
|
+
_df.index = to_datetime(_df.index, errors='coerce')
|
|
15
|
+
_df.index.name = 'time'
|
|
16
|
+
|
|
17
|
+
_df.columns = _df.keys().str.strip(' ')
|
|
18
|
+
|
|
19
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
20
|
+
|
|
21
|
+
def _QC(self, _df):
|
|
22
|
+
# XRF QAQC
|
|
23
|
+
_df = self.XRF_QAQC(_df)
|
|
24
|
+
|
|
25
|
+
# ions balance
|
|
26
|
+
_df = self.ions_balance(_df)
|
|
27
|
+
|
|
28
|
+
# remove negative value
|
|
29
|
+
_df = _df.mask((_df < 0).copy())
|
|
30
|
+
|
|
31
|
+
# QC data in 6h
|
|
32
|
+
return _df.resample('6h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
33
|
+
|
|
34
|
+
# base on Xact 625i Minimum Decision Limit (MDL) for XRF in ng/m3, 60 min sample time
|
|
35
|
+
def XRF_QAQC(self, df):
|
|
36
|
+
MDL = {
|
|
37
|
+
'Al': 100, 'Si': 18, 'P': 5.2, 'S': 3.2,
|
|
38
|
+
'Cl': 1.7, 'K': 1.2, 'Ca': 0.3, 'Ti': 1.6,
|
|
39
|
+
'V': 0.12, 'Cr': 0.12, 'Mn': 0.14, 'Fe': 0.17,
|
|
40
|
+
'Co': 0.14, 'Ni': 0.096, 'Cu': 0.079, 'Zn': 0.067,
|
|
41
|
+
'Ga': 0.059, 'Ge': 0.056, 'As': 0.063, 'Se': 0.081,
|
|
42
|
+
'Br': 0.1, 'Rb': 0.19, 'Sr': 0.22, 'Y': 0.28,
|
|
43
|
+
'Zr': 0.33, 'Nb': 0.41, 'Mo': 0.48, 'Ag': 1.9,
|
|
44
|
+
'Cd': 2.5, 'In': 3.1, 'Sn': 4.1, 'Sb': 5.2,
|
|
45
|
+
'Te': 0.6, 'I': 0.49, 'Cs': 0.37, 'Ba': 0.39,
|
|
46
|
+
'La': 0.36, 'Ce': 0.3, 'Pt': 0.12, 'Au': 0.1,
|
|
47
|
+
'Hg': 0.12, 'Tl': 0.12, 'Pb': 0.13, 'Bi': 0.13
|
|
48
|
+
}
|
|
49
|
+
# 將小於 MDL 值的數據替換為 NaN
|
|
50
|
+
for element, threshold in MDL.items():
|
|
51
|
+
if element in df.columns:
|
|
52
|
+
df[element] = df[element].where(df[element] >= threshold, np.nan)
|
|
53
|
+
|
|
54
|
+
self.logger.info(f"{'=' * 60}")
|
|
55
|
+
self.logger.info(f"XRF QAQC summary:")
|
|
56
|
+
self.logger.info("\t\ttransform values below MDL to NaN")
|
|
57
|
+
self.logger.info(f"{'=' * 60}")
|
|
58
|
+
|
|
59
|
+
return df
|
|
60
|
+
|
|
61
|
+
def ions_balance(self, df, tolerance=0.3):
|
|
62
|
+
"""
|
|
63
|
+
Calculate the balance of ions in the system
|
|
64
|
+
"""
|
|
65
|
+
# Define the ions
|
|
66
|
+
item = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+', 'F-', 'Cl-', 'NO2-', 'NO3-', 'PO43-', 'SO42-']
|
|
67
|
+
|
|
68
|
+
# Calculate the balance
|
|
69
|
+
_df = df[item].copy()
|
|
70
|
+
_df = _df.apply(lambda x: to_numeric(x, errors='coerce'))
|
|
71
|
+
_df['+_mole'] = _df[['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+']].div([23, 18, 39, (24 / 2), (40 / 2)]).sum(axis=1,
|
|
72
|
+
skipna=True)
|
|
73
|
+
_df['-_mole'] = _df[['Cl-', 'NO2-', 'NO3-', 'SO42-']].div([35.5, 46, 62, (96 / 2)]).sum(axis=1, skipna=True)
|
|
74
|
+
|
|
75
|
+
# Avoid division by zero
|
|
76
|
+
_df['ratio'] = np.where(_df['-_mole'] != 0, _df['+_mole'] / _df['-_mole'], np.nan)
|
|
77
|
+
|
|
78
|
+
# Calculate bounds
|
|
79
|
+
lower_bound, upper_bound = 1 - tolerance, 1 + tolerance
|
|
80
|
+
|
|
81
|
+
# 根据ratio决定是否保留原始数据
|
|
82
|
+
valid_mask = (
|
|
83
|
+
(_df['ratio'] <= upper_bound) &
|
|
84
|
+
(_df['ratio'] >= lower_bound) &
|
|
85
|
+
~np.isnan(_df['+_mole']) &
|
|
86
|
+
~np.isnan(_df['-_mole'])
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# 保留数据或将不符合条件的行设为NaN
|
|
90
|
+
df.loc[~valid_mask, item] = np.nan
|
|
91
|
+
|
|
92
|
+
# 计算保留的数据的百分比
|
|
93
|
+
retained_percentage = (valid_mask.sum() / len(df)) * 100
|
|
94
|
+
|
|
95
|
+
self.logger.info(f"{'=' * 60}")
|
|
96
|
+
self.logger.info(f"Ions balance summary:")
|
|
97
|
+
self.logger.info(f"\t\tretain {retained_percentage.__round__(0)}% data within tolerance {tolerance}")
|
|
98
|
+
self.logger.info(f"{'=' * 60}")
|
|
99
|
+
|
|
100
|
+
if retained_percentage < 70:
|
|
101
|
+
self.logger.warning("Warning: The percentage of retained data is less than 70%")
|
|
102
|
+
|
|
103
|
+
return df
|
|
@@ -6,28 +6,31 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'NEPH'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
with
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
with file.open('r', encoding='utf-8', errors='ignore') as f:
|
|
11
11
|
_df = read_csv(f, header=None, names=range(11))
|
|
12
12
|
|
|
13
13
|
_df_grp = _df.groupby(0)
|
|
14
14
|
|
|
15
15
|
# T : time
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
_idx_tm = to_datetime((_df_tm[1] + _df_tm[2] + _df_tm[3] + _df_tm[4] + _df_tm[5] + _df_tm[6]),
|
|
23
|
-
format='%Y%m%d%H%M%S')
|
|
16
|
+
_idx_tm = to_datetime(
|
|
17
|
+
_df_grp.get_group('T')[[1, 2, 3, 4, 5, 6]]
|
|
18
|
+
.map(lambda x: f"{int(x):02d}")
|
|
19
|
+
.agg(''.join, axis=1),
|
|
20
|
+
format='%Y%m%d%H%M%S'
|
|
21
|
+
)
|
|
24
22
|
|
|
25
23
|
# D : data
|
|
26
24
|
# col : 3~8 B G R BB BG BR
|
|
27
25
|
# 1e6
|
|
28
26
|
try:
|
|
29
27
|
_df_dt = _df_grp.get_group('D')[[1, 2, 3, 4, 5, 6, 7, 8]].set_index(_idx_tm)
|
|
30
|
-
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
_df_out = (_df_dt.groupby(1).get_group('NBXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm)
|
|
31
|
+
except KeyError:
|
|
32
|
+
_df_out = (_df_dt.groupby(1).get_group('NTXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm)
|
|
33
|
+
|
|
31
34
|
_df_out.columns = ['B', 'G', 'R', 'BB', 'BG', 'BR']
|
|
32
35
|
_df_out.index.name = 'Time'
|
|
33
36
|
|
|
@@ -39,42 +42,29 @@ class Reader(AbstractReader):
|
|
|
39
42
|
|
|
40
43
|
_df_out.mask(_df_out['status'] != 0) # 0000 -> numeric to 0
|
|
41
44
|
|
|
42
|
-
|
|
45
|
+
_df = _df_out[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
|
|
43
46
|
|
|
44
|
-
|
|
45
|
-
group_sizes = _df_grp.size()
|
|
46
|
-
print(group_sizes)
|
|
47
|
-
# Define the valid groups
|
|
48
|
-
valid_groups = {'B', 'G', 'R', 'D', 'T', 'Y', 'Z'}
|
|
49
|
-
|
|
50
|
-
# Find the rows where the value in the first column is not in valid_groups
|
|
51
|
-
invalid_indices = _df[~_df[0].isin(valid_groups)].index
|
|
47
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
52
48
|
|
|
53
|
-
|
|
54
|
-
|
|
49
|
+
except ValueError:
|
|
50
|
+
# Define valid groups and find invalid indices
|
|
51
|
+
invalid_indices = _df[~_df[0].isin({'B', 'G', 'R', 'D', 'T', 'Y', 'Z'})].index
|
|
55
52
|
print("Invalid values and their indices:")
|
|
56
|
-
|
|
57
|
-
print(f"Index: {idx}, Value: {value}")
|
|
53
|
+
print("\n".join([f"Index: {idx}, Value: {_df.at[idx, 0]}" for idx in invalid_indices]))
|
|
58
54
|
|
|
59
|
-
#
|
|
60
|
-
|
|
61
|
-
_df_out = DataFrame(index=_idx_tm, columns=columns)
|
|
55
|
+
# Return an empty DataFrame with specified columns if there's a length mismatch
|
|
56
|
+
_df_out = DataFrame(index=_idx_tm, columns=['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH'])
|
|
62
57
|
_df_out.index.name = 'Time'
|
|
63
|
-
print(f'\n\t\t\t Length mismatch in {
|
|
58
|
+
print(f'\n\t\t\t Length mismatch in {file} data. Returning an empty DataFrame.')
|
|
64
59
|
return _df_out
|
|
65
60
|
|
|
66
61
|
# QC data
|
|
67
62
|
def _QC(self, _df):
|
|
68
63
|
# remove negative value
|
|
69
|
-
_df = _df.mask((_df <=
|
|
70
|
-
|
|
71
|
-
# call by _QC function
|
|
72
|
-
# QC data in 1 hr
|
|
73
|
-
def _QC_func(_df_1hr):
|
|
74
|
-
_df_ave = _df_1hr.mean()
|
|
75
|
-
_df_std = _df_1hr.std()
|
|
76
|
-
_df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
|
|
64
|
+
_df = _df.mask((_df <= 5).copy())
|
|
77
65
|
|
|
78
|
-
|
|
66
|
+
# total scattering is larger than back scattering
|
|
67
|
+
_df = _df[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])]
|
|
79
68
|
|
|
80
|
-
|
|
69
|
+
# QC data in 1h
|
|
70
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'SMPS_TH'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
with open(
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
11
|
_df = read_table(f, skiprows=18, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
|
|
12
12
|
_key = list(_df.keys()[6:-26])
|
|
13
13
|
|
|
@@ -21,19 +21,19 @@ class Reader(AbstractReader):
|
|
|
21
21
|
_df_idx = to_datetime(_df.index, errors='coerce')
|
|
22
22
|
return _df[_newkey.keys()].rename(_newkey, axis=1).set_index(_df_idx).loc[_df_idx.dropna()]
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
# QC data
|
|
25
25
|
def _QC(self, _df):
|
|
26
26
|
import numpy as n
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
# mask out the data size lower than 7
|
|
29
29
|
_df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
|
|
30
30
|
_df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
|
|
31
31
|
_df = _df.mask(_df_size < 7)
|
|
32
32
|
|
|
33
|
-
|
|
33
|
+
# remove total conc. lower than 2000
|
|
34
34
|
_df = _df.mask(_df['total'] < 2000)
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
# remove the bin over 400 nm which num. conc. larger than 4000
|
|
37
37
|
_df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
|
|
38
38
|
|
|
39
39
|
_df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
|
|
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'SMPS_aim11'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
with open(
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
11
|
|
|
12
12
|
skiprows = 0
|
|
13
13
|
for _line in f:
|
|
@@ -21,29 +21,29 @@ class Reader(AbstractReader):
|
|
|
21
21
|
_df = read_csv(f, skiprows=skiprows)
|
|
22
22
|
_tm_idx = to_datetime(_df['DateTime Sample Start'], format='%d/%m/%Y %X', errors='coerce')
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
# index
|
|
25
25
|
_df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()]
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
# keys
|
|
28
28
|
_key = to_numeric(_df.keys(), errors='coerce')
|
|
29
29
|
_df.columns = _key
|
|
30
30
|
_df = _df.loc[:, ~_key.isna()]
|
|
31
31
|
|
|
32
32
|
return _df.apply(to_numeric, errors='coerce')
|
|
33
33
|
|
|
34
|
-
|
|
34
|
+
# QC data
|
|
35
35
|
def _QC(self, _df):
|
|
36
36
|
import numpy as n
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
# mask out the data size lower than 7
|
|
39
39
|
_df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
|
|
40
40
|
_df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
|
|
41
41
|
_df = _df.mask(_df_size < 7)
|
|
42
42
|
|
|
43
|
-
|
|
43
|
+
# remove total conc. lower than 2000
|
|
44
44
|
_df = _df.mask(_df['total'] < 2000)
|
|
45
45
|
|
|
46
|
-
|
|
46
|
+
# remove the bin over 400 nm which num. conc. larger than 4000
|
|
47
47
|
_df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
|
|
48
48
|
|
|
49
49
|
_df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
|
|
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'SMPS_genr'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
with open(
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
11
|
|
|
12
12
|
skiprows = 0
|
|
13
13
|
for _line in f:
|
|
@@ -21,29 +21,29 @@ class Reader(AbstractReader):
|
|
|
21
21
|
_df = read_table(f, skiprows=skiprows)
|
|
22
22
|
_tm_idx = to_datetime(_df['Date'] + _df['Start Time'], format='%m/%d/%y%X', errors='coerce')
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
# index
|
|
25
25
|
_df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()]
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
# keys
|
|
28
28
|
_key = to_numeric(_df.keys(), errors='coerce')
|
|
29
29
|
_df.columns = _key
|
|
30
30
|
_df = _df.loc[:, ~_key.isna()]
|
|
31
31
|
|
|
32
32
|
return _df.apply(to_numeric, errors='coerce')
|
|
33
33
|
|
|
34
|
-
|
|
34
|
+
# QC data
|
|
35
35
|
def _QC(self, _df):
|
|
36
36
|
import numpy as n
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
# mask out the data size lower than 7
|
|
39
39
|
_df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
|
|
40
40
|
_df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
|
|
41
41
|
_df = _df.mask(_df_size < 7)
|
|
42
42
|
|
|
43
|
-
|
|
43
|
+
# remove total conc. lower than 2000
|
|
44
44
|
_df = _df.mask(_df['total'] < 2000)
|
|
45
45
|
|
|
46
|
-
|
|
46
|
+
# remove the bin over 400 nm which num. conc. larger than 4000
|
|
47
47
|
_df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
|
|
48
48
|
|
|
49
49
|
_df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
|