AeroViz 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/data/240228_00.txt +101 -0
- AeroViz/dataProcess/Chemistry/_ocec.py +20 -7
- AeroViz/plot/__init__.py +2 -0
- AeroViz/plot/hysplit/__init__.py +1 -0
- AeroViz/plot/hysplit/hysplit.py +79 -0
- AeroViz/plot/meteorology/meteorology.py +2 -0
- AeroViz/plot/optical/optical.py +60 -59
- AeroViz/plot/pie.py +14 -2
- AeroViz/plot/radar.py +184 -0
- AeroViz/plot/scatter.py +16 -7
- AeroViz/plot/templates/diurnal_pattern.py +24 -7
- AeroViz/plot/templates/koschmieder.py +11 -8
- AeroViz/plot/timeseries/template.py +2 -2
- AeroViz/plot/timeseries/timeseries.py +47 -7
- AeroViz/rawDataReader/__init__.py +75 -68
- AeroViz/rawDataReader/config/supported_instruments.py +52 -19
- AeroViz/rawDataReader/core/__init__.py +194 -106
- AeroViz/rawDataReader/script/AE33.py +11 -6
- AeroViz/rawDataReader/script/AE43.py +10 -5
- AeroViz/rawDataReader/script/Aurora.py +14 -10
- AeroViz/rawDataReader/script/BC1054.py +10 -6
- AeroViz/rawDataReader/script/EPA.py +39 -0
- AeroViz/rawDataReader/script/GRIMM.py +1 -2
- AeroViz/rawDataReader/script/IGAC.py +6 -23
- AeroViz/rawDataReader/script/MA350.py +12 -5
- AeroViz/rawDataReader/script/Minion.py +107 -30
- AeroViz/rawDataReader/script/NEPH.py +15 -5
- AeroViz/rawDataReader/script/OCEC.py +39 -15
- AeroViz/rawDataReader/script/SMPS.py +1 -0
- AeroViz/rawDataReader/script/TEOM.py +15 -11
- AeroViz/rawDataReader/script/VOC.py +1 -1
- AeroViz/rawDataReader/script/XRF.py +11 -0
- AeroViz/rawDataReader/script/__init__.py +2 -2
- {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/METADATA +54 -30
- {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/RECORD +40 -51
- AeroViz/process/__init__.py +0 -31
- AeroViz/process/core/DataProc.py +0 -19
- AeroViz/process/core/SizeDist.py +0 -90
- AeroViz/process/core/__init__.py +0 -4
- AeroViz/process/method/__init__.py +0 -2
- AeroViz/process/method/prop.py +0 -62
- AeroViz/process/script/AbstractDistCalc.py +0 -143
- AeroViz/process/script/Chemical.py +0 -177
- AeroViz/process/script/IMPACT.py +0 -49
- AeroViz/process/script/IMPROVE.py +0 -161
- AeroViz/process/script/Others.py +0 -65
- AeroViz/process/script/PSD.py +0 -103
- AeroViz/process/script/PSD_dry.py +0 -93
- AeroViz/process/script/__init__.py +0 -5
- AeroViz/process/script/retrieve_RI.py +0 -69
- AeroViz/rawDataReader/script/EPA_vertical.py +0 -46
- AeroViz/rawDataReader/script/Table.py +0 -27
- /AeroViz/{process/method → plot/optical}/PyMieScatt_update.py +0 -0
- /AeroViz/{process/method → plot/optical}/mie_theory.py +0 -0
- {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/WHEEL +0 -0
- {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
# read meteorological data from google sheet
|
|
2
2
|
|
|
3
3
|
|
|
4
|
-
import
|
|
5
|
-
from pandas import read_csv, concat, to_numeric
|
|
4
|
+
from pandas import read_csv, to_numeric
|
|
6
5
|
|
|
7
6
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
8
7
|
|
|
@@ -35,24 +34,8 @@ class Reader(AbstractReader):
|
|
|
35
34
|
'SO42-': 0.08,
|
|
36
35
|
}
|
|
37
36
|
|
|
38
|
-
# _mdl.update(self._oth_set.get('mdl', {}))
|
|
39
|
-
|
|
40
|
-
def _se_le(_df_, _log=False):
|
|
41
|
-
_df_ = np.log10(_df_) if _log else _df_
|
|
42
|
-
|
|
43
|
-
_df_qua = _df_.quantile([.25, .75])
|
|
44
|
-
_df_q1, _df_q3 = _df_qua.loc[.25].copy(), _df_qua.loc[.75].copy()
|
|
45
|
-
_df_iqr = _df_q3 - _df_q1
|
|
46
|
-
|
|
47
|
-
_se = concat([_df_q1 - 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
|
|
48
|
-
_le = concat([_df_q3 + 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
|
|
49
|
-
|
|
50
|
-
if _log:
|
|
51
|
-
return 10 ** _se, 10 ** _le
|
|
52
|
-
return _se, _le
|
|
53
|
-
|
|
54
37
|
_cation, _anion, _main = (['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'],
|
|
55
|
-
['Cl-', 'NO2-', 'NO3-', 'SO42-', ],
|
|
38
|
+
['Cl-', 'NO2-', 'NO3-', 'PO43-', 'SO42-', ],
|
|
56
39
|
['SO42-', 'NO3-', 'NH4+'])
|
|
57
40
|
|
|
58
41
|
_df_salt = _df[_mdl.keys()].copy()
|
|
@@ -68,23 +51,23 @@ class Reader(AbstractReader):
|
|
|
68
51
|
|
|
69
52
|
# calculate SE LE
|
|
70
53
|
# salt < LE
|
|
71
|
-
_se, _le =
|
|
54
|
+
_se, _le = self.IQR_QC(_df_salt, log_dist=True)
|
|
72
55
|
_df_salt = _df_salt.mask(_df_salt > _le).copy()
|
|
73
56
|
|
|
74
57
|
# C/A, A/C
|
|
75
58
|
_rat_CA = (_df_salt[_cation].sum(axis=1) / _df_salt[_anion].sum(axis=1)).to_frame()
|
|
76
59
|
_rat_AC = (1 / _rat_CA).copy()
|
|
77
60
|
|
|
78
|
-
_se, _le =
|
|
61
|
+
_se, _le = self.IQR_QC(_rat_CA, )
|
|
79
62
|
_cond_CA = (_rat_CA < _le) & (_rat_CA > 0)
|
|
80
63
|
|
|
81
|
-
_se, _le =
|
|
64
|
+
_se, _le = self.IQR_QC(_rat_AC, )
|
|
82
65
|
_cond_AC = (_rat_AC < _le) & (_rat_AC > 0)
|
|
83
66
|
|
|
84
67
|
_df_salt = _df_salt.where((_cond_CA * _cond_AC)[0]).copy()
|
|
85
68
|
|
|
86
69
|
# conc. of main salt > SE
|
|
87
|
-
_se, _le =
|
|
70
|
+
_se, _le = self.IQR_QC(_df_salt[_main], log_dist=True)
|
|
88
71
|
_df_salt[_main] = _df_salt[_main].mask(_df_salt[_main] < _se).copy()
|
|
89
72
|
|
|
90
73
|
return _df_salt.reindex(_df.index)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pandas import read_csv
|
|
1
|
+
from pandas import read_csv, to_numeric
|
|
2
2
|
|
|
3
3
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
4
|
|
|
@@ -7,7 +7,8 @@ class Reader(AbstractReader):
|
|
|
7
7
|
nam = 'MA350'
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, file):
|
|
10
|
-
_df = read_csv(file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis(
|
|
10
|
+
_df = read_csv(file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis(
|
|
11
|
+
"Time").apply(to_numeric, errors='coerce')
|
|
11
12
|
|
|
12
13
|
_df = _df.rename(columns={
|
|
13
14
|
'UV BCc': 'BC1',
|
|
@@ -31,8 +32,14 @@ class Reader(AbstractReader):
|
|
|
31
32
|
|
|
32
33
|
# QC data
|
|
33
34
|
def _QC(self, _df):
|
|
35
|
+
_index = _df.index.copy()
|
|
36
|
+
|
|
34
37
|
# remove negative value
|
|
35
|
-
_df = _df
|
|
38
|
+
_df = _df.mask(
|
|
39
|
+
(_df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5']] <= 0) | (_df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5']] > 20000))
|
|
40
|
+
|
|
41
|
+
# use IQR_QC
|
|
42
|
+
_df = self.time_aware_IQR_QC(_df, time_window='1h')
|
|
36
43
|
|
|
37
|
-
#
|
|
38
|
-
return _df.
|
|
44
|
+
# make sure all columns have values, otherwise set to nan
|
|
45
|
+
return _df.dropna(how='any').reindex(_index)
|
|
@@ -1,38 +1,80 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
1
3
|
import numpy as np
|
|
2
|
-
|
|
4
|
+
import pandas
|
|
5
|
+
from pandas import read_excel, to_numeric
|
|
3
6
|
|
|
4
7
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
5
8
|
|
|
9
|
+
pandas.set_option("future.no_silent_downcasting", True)
|
|
10
|
+
|
|
11
|
+
desired_order1 = ['SO2', 'NO', 'NOx', 'NO2', 'CO', 'O3', 'THC', 'NMHC',
|
|
12
|
+
'CH4', 'PM10', 'PM2.5', 'WS', 'WD', 'AT', 'RH']
|
|
13
|
+
|
|
14
|
+
desired_order2 = ['Benzene', 'Toluene', 'EthylBenzene', 'm/p-Xylene', 'o-Xylene']
|
|
15
|
+
|
|
16
|
+
desired_order3 = ['Al', 'Si', 'P', 'S', 'Cl', 'K', 'Ca', 'Ti', 'V', 'Cr', 'Mn', 'Fe',
|
|
17
|
+
'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Rb', 'Sr',
|
|
18
|
+
'Y', 'Zr', 'Nb', 'Mo', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te',
|
|
19
|
+
'Cs', 'Ba', 'La', 'Ce', 'W', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi']
|
|
20
|
+
|
|
21
|
+
desired_order4 = ['NH3', 'HF', 'HCl', 'HNO2', 'HNO3', 'G-SO2',
|
|
22
|
+
'Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+',
|
|
23
|
+
'F-', 'Cl-', 'NO2-', 'NO3-', 'PO43-', 'SO42-']
|
|
24
|
+
|
|
6
25
|
|
|
7
26
|
class Reader(AbstractReader):
|
|
8
27
|
nam = 'Minion'
|
|
9
28
|
|
|
10
29
|
def _raw_reader(self, file):
|
|
11
|
-
|
|
12
|
-
|
|
30
|
+
# 讀取 Excel 文件
|
|
31
|
+
df = read_excel(file, index_col=0, parse_dates=True)
|
|
32
|
+
|
|
33
|
+
# 重命名列,去除空白
|
|
34
|
+
df = df.rename(columns=lambda x: x.strip())
|
|
35
|
+
|
|
36
|
+
# 保存單位行並給它一個名稱
|
|
37
|
+
units = df.iloc[0].copy()
|
|
13
38
|
|
|
14
|
-
|
|
15
|
-
|
|
39
|
+
# 刪除原始數據中的單位行
|
|
40
|
+
df = df.iloc[1:]
|
|
16
41
|
|
|
17
|
-
|
|
42
|
+
# 替換特定值
|
|
43
|
+
df = df.replace({'維護校正': '*', np.nan: '-', '0L': '_', 'Nodata': '-'}, inplace=False)
|
|
44
|
+
df = df.replace(to_replace=r'\d*[#]\b', value='#', regex=True)
|
|
45
|
+
df = df.replace(to_replace=r'\d*[L]\b', value='_', regex=True)
|
|
18
46
|
|
|
19
|
-
|
|
47
|
+
# 處理除了'WD'列的 0 值
|
|
48
|
+
non_wd_columns = [col for col in df.columns if col != 'WD']
|
|
49
|
+
df.loc[:, non_wd_columns] = df.loc[:, non_wd_columns].replace({0: '_'})
|
|
50
|
+
|
|
51
|
+
# 重新排序列
|
|
52
|
+
df = self.reorder_dataframe_columns(df, [desired_order1, desired_order2, desired_order3, desired_order4])
|
|
53
|
+
|
|
54
|
+
# 將單位行添加回 DataFrame
|
|
55
|
+
# df = concat([units.to_frame().T, df])
|
|
56
|
+
|
|
57
|
+
df.index.name = 'Time'
|
|
58
|
+
|
|
59
|
+
return df.loc[~df.index.duplicated() & df.index.notna()]
|
|
20
60
|
|
|
21
61
|
def _QC(self, _df):
|
|
62
|
+
# remove negative value
|
|
63
|
+
_df = _df.mask((_df < 0))
|
|
64
|
+
|
|
22
65
|
# XRF QAQC
|
|
23
66
|
_df = self.XRF_QAQC(_df)
|
|
24
67
|
|
|
25
68
|
# ions balance
|
|
26
|
-
_df = self.
|
|
27
|
-
|
|
28
|
-
# remove negative value
|
|
29
|
-
_df = _df.mask((_df < 0).copy())
|
|
69
|
+
_df = self.IGAC_QAQC(_df)
|
|
30
70
|
|
|
31
71
|
# QC data in 6h
|
|
32
|
-
|
|
72
|
+
_df = self.time_aware_IQR_QC(_df)
|
|
73
|
+
|
|
74
|
+
return _df
|
|
33
75
|
|
|
34
76
|
# base on Xact 625i Minimum Decision Limit (MDL) for XRF in ng/m3, 60 min sample time
|
|
35
|
-
def XRF_QAQC(self, df):
|
|
77
|
+
def XRF_QAQC(self, df, MDL_replace: Literal['nan', '0.5 * MDL'] = 'nan'):
|
|
36
78
|
MDL = {
|
|
37
79
|
'Al': 100, 'Si': 18, 'P': 5.2, 'S': 3.2,
|
|
38
80
|
'Cl': 1.7, 'K': 1.2, 'Ca': 0.3, 'Ti': 1.6,
|
|
@@ -40,34 +82,71 @@ class Reader(AbstractReader):
|
|
|
40
82
|
'Co': 0.14, 'Ni': 0.096, 'Cu': 0.079, 'Zn': 0.067,
|
|
41
83
|
'Ga': 0.059, 'Ge': 0.056, 'As': 0.063, 'Se': 0.081,
|
|
42
84
|
'Br': 0.1, 'Rb': 0.19, 'Sr': 0.22, 'Y': 0.28,
|
|
43
|
-
'Zr': 0.33, 'Nb': 0.41, 'Mo': 0.48, '
|
|
44
|
-
'Cd': 2.5, 'In': 3.1, 'Sn': 4.1,
|
|
45
|
-
'
|
|
46
|
-
'La': 0.36, 'Ce': 0.3, '
|
|
47
|
-
'
|
|
85
|
+
'Zr': 0.33, 'Nb': 0.41, 'Mo': 0.48, 'Pd': 2.2,
|
|
86
|
+
'Ag': 1.9, 'Cd': 2.5, 'In': 3.1, 'Sn': 4.1,
|
|
87
|
+
'Sb': 5.2, 'Te': 0.6, 'Cs': 0.37, 'Ba': 0.39,
|
|
88
|
+
'La': 0.36, 'Ce': 0.3, 'W': 0.0001, 'Pt': 0.12,
|
|
89
|
+
'Au': 0.1, 'Hg': 0.12, 'Tl': 0.12, 'Pb': 0.13,
|
|
90
|
+
'Bi': 0.13
|
|
48
91
|
}
|
|
49
|
-
|
|
92
|
+
|
|
93
|
+
# Br Li internal standard
|
|
94
|
+
|
|
95
|
+
# 將小於 MDL 值的數據替換為 nan or 1/2 MDL
|
|
50
96
|
for element, threshold in MDL.items():
|
|
51
97
|
if element in df.columns:
|
|
52
|
-
|
|
98
|
+
rep = np.nan if MDL_replace == 'nan' else 0.5 * threshold
|
|
99
|
+
df[element] = df[element].where(df[element] >= threshold, rep)
|
|
53
100
|
|
|
54
101
|
self.logger.info(f"{'=' * 60}")
|
|
55
102
|
self.logger.info(f"XRF QAQC summary:")
|
|
56
103
|
self.logger.info("\t\ttransform values below MDL to NaN")
|
|
57
104
|
self.logger.info(f"{'=' * 60}")
|
|
58
105
|
|
|
106
|
+
# 轉換單位 ng/m3 -> ug/m3
|
|
107
|
+
if df.Al.max() > 10 and df.Fe.max() > 10:
|
|
108
|
+
# 確保 MDL.keys() 中的所有列都存在於 _df 中
|
|
109
|
+
columns_to_convert = [col for col in MDL.keys() if col in df.columns]
|
|
110
|
+
|
|
111
|
+
df[columns_to_convert] = df[columns_to_convert].div(1000)
|
|
112
|
+
|
|
59
113
|
return df
|
|
60
114
|
|
|
61
|
-
def
|
|
115
|
+
def IGAC_QAQC(self, df, tolerance=1):
|
|
62
116
|
"""
|
|
63
117
|
Calculate the balance of ions in the system
|
|
64
118
|
"""
|
|
119
|
+
# https://www.yangyao-env.com/web/product/product_in2.jsp?pd_id=PD1640151884502
|
|
120
|
+
MDL = {
|
|
121
|
+
'HF': 0.08, 'HCl': 0.05, 'HNO2': 0.01, 'HNO3': 0.05, 'G-SO2': 0.05, 'NH3': 0.1,
|
|
122
|
+
'Na+': 0.05, 'NH4+': 0.08, 'K+': 0.08, 'Mg2+': 0.05, 'Ca2+': 0.05,
|
|
123
|
+
'F-': 0.08, 'Cl-': 0.05, 'NO2-': 0.05, 'NO3-': 0.01, 'PO43-': None, 'SO42-': 0.05,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
MR = {
|
|
127
|
+
'HF': 200, 'HCl': 200, 'HNO2': 200, 'HNO3': 200, 'G-SO2': 200, 'NH3': 300,
|
|
128
|
+
'Na+': 300, 'NH4+': 300, 'K+': 300, 'Mg2+': 300, 'Ca2+': 300,
|
|
129
|
+
'F-': 300, 'Cl-': 300, 'NO2-': 300, 'NO3-': 300, 'PO43-': None, 'SO42-': 300,
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
_cation, _anion, _main = (['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'],
|
|
133
|
+
['Cl-', 'NO2-', 'NO3-', 'SO42-'],
|
|
134
|
+
['SO42-', 'NO3-', 'NH4+'])
|
|
135
|
+
# QC: replace values below MDL with 0.5 * MDL -> ions balance -> PM2.5 > main salt
|
|
136
|
+
# mass tolerance = 0.3, ions balance tolerance = 0.3
|
|
137
|
+
|
|
138
|
+
# # conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
|
|
139
|
+
# _df_salt = df.mask(df.sum(axis=1, min_count=1) > df.PM25).dropna(subset=_main).copy()
|
|
140
|
+
|
|
65
141
|
# Define the ions
|
|
66
|
-
item = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+', '
|
|
142
|
+
item = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+', 'Cl-', 'NO2-', 'NO3-', 'SO42-']
|
|
67
143
|
|
|
68
144
|
# Calculate the balance
|
|
69
|
-
_df = df[item].
|
|
70
|
-
|
|
145
|
+
_df = df[item].apply(to_numeric, errors='coerce')
|
|
146
|
+
|
|
147
|
+
# for (_key, _df_col) in _df.items():
|
|
148
|
+
# _df[_key] = _df_col.mask(_df_col < MDL[_key], MDL[_key] / 2)
|
|
149
|
+
|
|
71
150
|
_df['+_mole'] = _df[['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+']].div([23, 18, 39, (24 / 2), (40 / 2)]).sum(axis=1,
|
|
72
151
|
skipna=True)
|
|
73
152
|
_df['-_mole'] = _df[['Cl-', 'NO2-', 'NO3-', 'SO42-']].div([35.5, 46, 62, (96 / 2)]).sum(axis=1, skipna=True)
|
|
@@ -79,12 +158,8 @@ class Reader(AbstractReader):
|
|
|
79
158
|
lower_bound, upper_bound = 1 - tolerance, 1 + tolerance
|
|
80
159
|
|
|
81
160
|
# 根据ratio决定是否保留原始数据
|
|
82
|
-
valid_mask = (
|
|
83
|
-
|
|
84
|
-
(_df['ratio'] >= lower_bound) &
|
|
85
|
-
~np.isnan(_df['+_mole']) &
|
|
86
|
-
~np.isnan(_df['-_mole'])
|
|
87
|
-
)
|
|
161
|
+
valid_mask = ((_df['ratio'] <= upper_bound) & (_df['ratio'] >= lower_bound) &
|
|
162
|
+
~np.isnan(_df['+_mole']) & ~np.isnan(_df['-_mole']))
|
|
88
163
|
|
|
89
164
|
# 保留数据或将不符合条件的行设为NaN
|
|
90
165
|
df.loc[~valid_mask, item] = np.nan
|
|
@@ -100,4 +175,6 @@ class Reader(AbstractReader):
|
|
|
100
175
|
if retained_percentage < 70:
|
|
101
176
|
self.logger.warning("Warning: The percentage of retained data is less than 70%")
|
|
102
177
|
|
|
178
|
+
# print(f"\tretain {retained_percentage.__round__(0)}% data within tolerance {tolerance}")
|
|
179
|
+
|
|
103
180
|
return df
|
|
@@ -58,13 +58,23 @@ class Reader(AbstractReader):
|
|
|
58
58
|
print(f'\n\t\t\t Length mismatch in {file} data. Returning an empty DataFrame.')
|
|
59
59
|
return _df_out
|
|
60
60
|
|
|
61
|
-
# QC data
|
|
62
61
|
def _QC(self, _df):
|
|
62
|
+
MDL_sensitivity = {'B': .1, 'G': .1, 'R': .3}
|
|
63
|
+
|
|
64
|
+
_index = _df.index.copy()
|
|
65
|
+
|
|
63
66
|
# remove negative value
|
|
64
|
-
_df = _df.mask((_df <=
|
|
67
|
+
_df = _df.mask((_df <= 0) | (_df > 2000))
|
|
65
68
|
|
|
66
69
|
# total scattering is larger than back scattering
|
|
67
|
-
_df = _df[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])]
|
|
70
|
+
_df = _df.loc[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])]
|
|
71
|
+
|
|
72
|
+
# blue scattering is larger than green scattering, green scattering is larger than red scattering
|
|
73
|
+
# due to the nephelometer's Green PMT in FS is already aged, this QC may delete too many data
|
|
74
|
+
# _df = _df.loc[(_df['B'] > _df['G']) & (_df['G'] > _df['R'])]
|
|
75
|
+
|
|
76
|
+
# use IQR_QC
|
|
77
|
+
_df = self.time_aware_IQR_QC(_df)
|
|
68
78
|
|
|
69
|
-
#
|
|
70
|
-
return _df.
|
|
79
|
+
# make sure all columns have values, otherwise set to nan
|
|
80
|
+
return _df.dropna(how='any').reindex(_index)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
|
|
1
|
+
import numpy as np
|
|
2
|
+
from pandas import to_datetime, read_csv, to_numeric
|
|
2
3
|
|
|
3
4
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
5
|
|
|
@@ -8,11 +9,18 @@ class Reader(AbstractReader):
|
|
|
8
9
|
|
|
9
10
|
def _raw_reader(self, file):
|
|
10
11
|
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
|
-
_df = read_csv(f, skiprows=3,
|
|
12
|
+
_df = read_csv(f, skiprows=3).apply(to_numeric, errors='coerce')
|
|
12
13
|
|
|
13
14
|
_df['Start Date/Time'] = _df['Start Date/Time'].str.strip()
|
|
14
15
|
_df['time'] = to_datetime(_df['Start Date/Time'], format='%m/%d/%Y %I:%M:%S %p', errors='coerce')
|
|
16
|
+
|
|
17
|
+
if _df['time'].isna().any():
|
|
18
|
+
_df['time'] = to_datetime(_df['Start Date/Time'], format='%m/%d/%Y %H:%M:%S', errors='coerce')
|
|
19
|
+
|
|
15
20
|
_df = _df.set_index('time')
|
|
21
|
+
|
|
22
|
+
_df = _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
23
|
+
|
|
16
24
|
_df.index = _df.index.round('1h')
|
|
17
25
|
|
|
18
26
|
_df = _df.rename(columns={
|
|
@@ -34,6 +42,8 @@ class Reader(AbstractReader):
|
|
|
34
42
|
'OCPk2-ug C': 'OC2_raw',
|
|
35
43
|
'OCPk3-ug C': 'OC3_raw',
|
|
36
44
|
'OCPk4-ug C': 'OC4_raw',
|
|
45
|
+
'Pyrolized C ug': 'PC_raw',
|
|
46
|
+
|
|
37
47
|
'ECPk1-ug C': 'EC1_raw',
|
|
38
48
|
'ECPk2-ug C': 'EC2_raw',
|
|
39
49
|
'ECPk3-ug C': 'EC3_raw',
|
|
@@ -41,26 +51,40 @@ class Reader(AbstractReader):
|
|
|
41
51
|
'ECPk5-ug C': 'EC5_raw',
|
|
42
52
|
})
|
|
43
53
|
|
|
54
|
+
_df['OC1'] = _df['OC1_raw'] / _df['Sample_Volume']
|
|
55
|
+
_df['OC2'] = _df['OC2_raw'] / _df['Sample_Volume']
|
|
56
|
+
_df['OC3'] = _df['OC3_raw'] / _df['Sample_Volume']
|
|
57
|
+
_df['OC4'] = _df['OC4_raw'] / _df['Sample_Volume']
|
|
58
|
+
|
|
59
|
+
_df['PC'] = _df['Thermal_OC'] - _df['OC1'] - _df['OC2'] - _df['OC3'] - _df['OC4']
|
|
60
|
+
|
|
61
|
+
# _df['EC1'] = _df['EC1_raw'] / _df['Sample_Volume']
|
|
62
|
+
# _df['EC2'] = _df['EC2_raw'] / _df['Sample_Volume']
|
|
63
|
+
# _df['EC3'] = _df['EC3_raw'] / _df['Sample_Volume']
|
|
64
|
+
# _df['EC4'] = _df['EC4_raw'] / _df['Sample_Volume']
|
|
65
|
+
# _df['EC5'] = _df['EC5_raw'] / _df['Sample_Volume']
|
|
66
|
+
|
|
44
67
|
_df = _df[['Thermal_OC', 'Optical_OC', 'Thermal_EC', 'Optical_EC', 'TC', 'Sample_Volume',
|
|
45
|
-
'
|
|
46
|
-
'EC5_raw']]
|
|
68
|
+
'OC1', 'OC2', 'OC3', 'OC4', 'PC']]
|
|
47
69
|
|
|
48
70
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
49
71
|
|
|
50
72
|
# QC data
|
|
51
73
|
def _QC(self, _df):
|
|
52
|
-
|
|
74
|
+
MDL = {'Thermal_OC': 0.3,
|
|
75
|
+
'Optical_OC': 0.3,
|
|
76
|
+
'Thermal_EC': 0.015,
|
|
77
|
+
'Optical_EC': 0.015
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
_index = _df.index.copy()
|
|
53
81
|
|
|
54
|
-
_df = _df.
|
|
82
|
+
_df = _df.mask((_df <= -5) | (_df > 100))
|
|
55
83
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
'Optical_OC': 0.3,
|
|
59
|
-
'Thermal_EC': 0.015,
|
|
60
|
-
'Optical_EC': 0.015
|
|
61
|
-
}
|
|
84
|
+
for col, threshold in MDL.items():
|
|
85
|
+
_df.loc[_df[col] <= threshold, col] = np.nan
|
|
62
86
|
|
|
63
|
-
|
|
64
|
-
|
|
87
|
+
# use IQR_QC
|
|
88
|
+
_df = self.time_aware_IQR_QC(_df, time_window='1h')
|
|
65
89
|
|
|
66
|
-
return _df
|
|
90
|
+
return _df.dropna(subset=['Thermal_OC', 'Optical_OC']).reindex(_index)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pandas import to_datetime, read_csv
|
|
1
|
+
from pandas import to_datetime, read_csv, Timedelta, to_numeric
|
|
2
2
|
|
|
3
3
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
4
|
|
|
@@ -8,7 +8,7 @@ class Reader(AbstractReader):
|
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, file):
|
|
10
10
|
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
|
-
_df = read_csv(f, skiprows=3, index_col=False)
|
|
11
|
+
_df = read_csv(f, skiprows=3, index_col=False).apply(to_numeric, errors='coerce')
|
|
12
12
|
|
|
13
13
|
_df = _df.rename(columns={'Time Stamp': 'time',
|
|
14
14
|
'System status': 'status',
|
|
@@ -25,24 +25,28 @@ class Reader(AbstractReader):
|
|
|
25
25
|
|
|
26
26
|
_df = _df.set_index(to_datetime(_tm_idx, errors='coerce', format='%d - %m - %Y %X'))
|
|
27
27
|
|
|
28
|
-
_df = _df.where(_df['status'] <
|
|
28
|
+
_df = _df.where(_df['status'] < 1)
|
|
29
29
|
|
|
30
|
-
_df = _df[['PM_NV', 'PM_Total', 'noise'
|
|
30
|
+
_df = _df[['PM_NV', 'PM_Total', 'noise']]
|
|
31
31
|
|
|
32
32
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
33
33
|
|
|
34
34
|
# QC data
|
|
35
35
|
def _QC(self, _df):
|
|
36
|
-
|
|
37
|
-
_df_idx = _df.index.copy()
|
|
36
|
+
_index = _df.index.copy()
|
|
38
37
|
|
|
39
38
|
# remove negative value
|
|
40
|
-
_df = _df.where(_df.noise < 0.01)[['PM_NV', 'PM_Total']].mask((_df <= 0)
|
|
39
|
+
_df = _df.where(_df.noise < 0.01)[['PM_NV', 'PM_Total']].mask((_df <= 0))
|
|
41
40
|
|
|
42
41
|
# QC data in 1 hr
|
|
43
|
-
#
|
|
42
|
+
# use time_aware_IQR_QC
|
|
43
|
+
_df = self.time_aware_IQR_QC(_df, time_window='1h')
|
|
44
|
+
|
|
45
|
+
# remove data where size < 50% in 1-hr
|
|
46
|
+
points_per_hour = Timedelta('1h') / Timedelta(self.meta['freq'])
|
|
44
47
|
for _key in ['PM_Total', 'PM_NV']:
|
|
45
|
-
_size = _df[_key].dropna().resample('1h').size().reindex(
|
|
46
|
-
_df[_key] = _df[_key].mask(_size <
|
|
48
|
+
_size = _df[_key].dropna().resample('1h').size().reindex(_index).ffill()
|
|
49
|
+
_df[_key] = _df[_key].mask(_size < points_per_hour * 0.5)
|
|
47
50
|
|
|
48
|
-
|
|
51
|
+
# make sure all columns have values, otherwise set to nan
|
|
52
|
+
return _df.dropna(how='any').reindex(_index)
|
|
@@ -26,7 +26,7 @@ class Reader(AbstractReader):
|
|
|
26
26
|
if valid_keys:
|
|
27
27
|
return _df[valid_keys].loc[~_df.index.duplicated() & _df.index.notna()]
|
|
28
28
|
else:
|
|
29
|
-
self.logger.warning("沒有找到匹配的鍵。返回原始DataFrame
|
|
29
|
+
self.logger.warning("沒有找到匹配的鍵。返回原始DataFrame。")
|
|
30
30
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
31
31
|
|
|
32
32
|
def _QC(self, _df):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: AeroViz
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: Aerosol science
|
|
5
5
|
Home-page: https://github.com/Alex870521/AeroViz
|
|
6
6
|
Author: alex
|
|
@@ -28,7 +28,7 @@ Requires-Dist: rich ~=13.7.1
|
|
|
28
28
|
<img alt="Static Badge" src="https://img.shields.io/badge/python-3.12-blue?logo=python">
|
|
29
29
|
<img alt="Static Badge" src="https://img.shields.io/badge/License-MIT-yellow">
|
|
30
30
|
<img alt="Static Badge" src="https://img.shields.io/badge/github-updating-red?logo=github">
|
|
31
|
-
<img src="https://img.shields.io/badge/testing-green?logo=Pytest&logoColor=blue">
|
|
31
|
+
<img alt="Static Badge" src="https://img.shields.io/badge/testing-green?logo=Pytest&logoColor=blue">
|
|
32
32
|
|
|
33
33
|
</p>
|
|
34
34
|
|
|
@@ -56,41 +56,68 @@ Requires-Dist: rich ~=13.7.1
|
|
|
56
56
|
pip install AeroViz
|
|
57
57
|
```
|
|
58
58
|
|
|
59
|
-
|
|
59
|
+
For Windows users: Run `install_windows.bat`
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
|
|
61
|
+
For Linux and Mac users: Run `install_unix.bat`
|
|
62
|
+
|
|
63
|
+
## <div align="center">Quick Start</div>
|
|
63
64
|
|
|
65
|
+
```python
|
|
66
|
+
from datetime import datetime
|
|
67
|
+
from pathlib import Path
|
|
64
68
|
from AeroViz import RawDataReader, DataProcess, plot
|
|
69
|
+
|
|
70
|
+
# Read data from a supported instrument
|
|
71
|
+
data = RawDataReader('NEPH', Path('/path/to/data'), start=datetime(2024, 2, 1), end=datetime(2024, 4, 30))
|
|
72
|
+
|
|
73
|
+
# Create a visualization
|
|
74
|
+
plot.timeseries(data, y='scattering_coefficient')
|
|
65
75
|
```
|
|
66
76
|
|
|
67
|
-
|
|
77
|
+
For more detailed usage instructions, please refer to our [User Guide]().
|
|
78
|
+
|
|
79
|
+
## <div align="center"> RawDataReader
|
|
80
|
+
|
|
81
|
+
RawDataReader supports a wide range of aerosol instruments, including NEPH, SMPS, AE33, and many more. It handles
|
|
82
|
+
various file types and time resolutions, making data processing efficient and standardized.
|
|
83
|
+
|
|
84
|
+
For a detailed list of supported instruments, file types, and data columns, please refer to
|
|
85
|
+
our [RawDataReader Usage Guide](docs/user_guide/RawDataReader) in the `docs` folder.
|
|
86
|
+
|
|
87
|
+
### Key Features:
|
|
88
|
+
|
|
89
|
+
- Supports multiple aerosol instruments
|
|
90
|
+
- Applies customizable quality control measures
|
|
91
|
+
- Offers flexible data filtering and resampling options
|
|
92
|
+
- Enables easy data export to CSV format
|
|
68
93
|
|
|
69
|
-
|
|
70
|
-
> We are continuously working to support more instruments. Please check back for updates or contribute to our project on
|
|
71
|
-
> GitHub.
|
|
94
|
+
### Supported Instruments
|
|
72
95
|
|
|
73
96
|
The AeroViz project currently supports data from the following instruments:
|
|
74
97
|
|
|
75
|
-
-
|
|
76
|
-
-
|
|
77
|
-
-
|
|
78
|
-
-
|
|
79
|
-
-
|
|
80
|
-
-
|
|
81
|
-
-
|
|
82
|
-
-
|
|
83
|
-
-
|
|
84
|
-
-
|
|
85
|
-
-
|
|
86
|
-
-
|
|
87
|
-
-
|
|
88
|
-
|
|
89
|
-
|
|
98
|
+
- SMPS (Scanning Mobility Particle Sizer)
|
|
99
|
+
- APS (Aerodynamic Particle Sizer)
|
|
100
|
+
- GRIMM (GRIMM Aerosol Technik)
|
|
101
|
+
- TEOM (Continuous Ambient Particulate Monitor)
|
|
102
|
+
- NEPH (Nephelometer)
|
|
103
|
+
- Aurora (Nephelometer)
|
|
104
|
+
- AE33 (Aethalometer Model 33)
|
|
105
|
+
- AE43 (Aethalometer Model 43)
|
|
106
|
+
- BC1054 (Black Carbon Monitor 1054)
|
|
107
|
+
- MA350 (MicroAeth MA350)
|
|
108
|
+
- OCEC (Organic Carbon Elemental Carbon Analyzer)
|
|
109
|
+
- IGAC (In-situ Gas and Aerosol Compositions monitor)
|
|
110
|
+
- XRF (X-ray Fluorescence Spectrometer)
|
|
111
|
+
- VOC (Volatile Organic Compounds Monitor)
|
|
112
|
+
|
|
113
|
+
> **Note:** We are continuously working to support more instruments. Please check back for updates or contribute to our
|
|
114
|
+
> project on GitHub.
|
|
115
|
+
|
|
116
|
+
## <div align="center">DataProcess</div>
|
|
90
117
|
|
|
91
118
|
The AeroViz project currently supports the following processing methods:
|
|
92
119
|
|
|
93
|
-
- **Chemistry
|
|
120
|
+
- **Chemistry**:
|
|
94
121
|
- **Optical**
|
|
95
122
|
- **SizeDistr**
|
|
96
123
|
- **VOC**
|
|
@@ -103,15 +130,12 @@ For detailed documentation, please refer to the `docs` folder, which includes:
|
|
|
103
130
|
|
|
104
131
|
| Documentation | Description |
|
|
105
132
|
|--------------------------------------------|----------------------------|
|
|
106
|
-
| [User Guide](docs/user_guide
|
|
107
|
-
| [Developer Guide](docs/developer_guide.md) | Developer guidelines |
|
|
108
|
-
| [API Reference](docs/api_reference.md) | API documentation |
|
|
109
|
-
| [FAQ](docs/faq.md) | Frequently Asked Questions |
|
|
133
|
+
| [User Guide](docs/user_guide) | Basic usage instructions |
|
|
110
134
|
| [Changelog](docs/changelog.md) | List of changes |
|
|
111
135
|
|
|
112
136
|
</div>
|
|
113
137
|
|
|
114
|
-
## <div align="center">Related
|
|
138
|
+
## <div align="center">Related Source</div>
|
|
115
139
|
|
|
116
140
|
* #### [PyMieScatt](https://github.com/bsumlin/PyMieScatt.git)
|
|
117
141
|
* #### [py-smps](https://github.com/quant-aq/py-smps.git)
|