AeroViz 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/__init__.py +7 -5
- AeroViz/{config → data}/DEFAULT_DATA.csv +1 -1
- AeroViz/dataProcess/Chemistry/__init__.py +40 -40
- AeroViz/dataProcess/Chemistry/_calculate.py +15 -15
- AeroViz/dataProcess/Chemistry/_isoropia.py +72 -68
- AeroViz/dataProcess/Chemistry/_mass_volume.py +158 -161
- AeroViz/dataProcess/Chemistry/_ocec.py +109 -109
- AeroViz/dataProcess/Chemistry/_partition.py +19 -18
- AeroViz/dataProcess/Chemistry/_teom.py +9 -11
- AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
- AeroViz/dataProcess/Optical/Angstrom_exponent.py +20 -0
- AeroViz/dataProcess/Optical/_IMPROVE.py +40 -41
- AeroViz/dataProcess/Optical/__init__.py +29 -44
- AeroViz/dataProcess/Optical/_absorption.py +21 -47
- AeroViz/dataProcess/Optical/_extinction.py +31 -25
- AeroViz/dataProcess/Optical/_mie.py +5 -7
- AeroViz/dataProcess/Optical/_mie_sd.py +89 -90
- AeroViz/dataProcess/Optical/_scattering.py +19 -20
- AeroViz/dataProcess/SizeDistr/__init__.py +39 -39
- AeroViz/dataProcess/SizeDistr/__merge.py +159 -158
- AeroViz/dataProcess/SizeDistr/_merge.py +155 -154
- AeroViz/dataProcess/SizeDistr/_merge_v1.py +162 -161
- AeroViz/dataProcess/SizeDistr/_merge_v2.py +153 -152
- AeroViz/dataProcess/SizeDistr/_merge_v3.py +327 -327
- AeroViz/dataProcess/SizeDistr/_merge_v4.py +273 -275
- AeroViz/dataProcess/SizeDistr/_size_distr.py +51 -51
- AeroViz/dataProcess/VOC/__init__.py +9 -9
- AeroViz/dataProcess/VOC/_potential_par.py +53 -55
- AeroViz/dataProcess/__init__.py +28 -6
- AeroViz/dataProcess/core/__init__.py +59 -65
- AeroViz/plot/__init__.py +7 -2
- AeroViz/plot/bar.py +126 -0
- AeroViz/plot/box.py +69 -0
- AeroViz/plot/distribution/distribution.py +421 -427
- AeroViz/plot/meteorology/meteorology.py +240 -292
- AeroViz/plot/optical/__init__.py +0 -1
- AeroViz/plot/optical/optical.py +230 -230
- AeroViz/plot/pie.py +198 -0
- AeroViz/plot/regression.py +196 -0
- AeroViz/plot/scatter.py +165 -0
- AeroViz/plot/templates/__init__.py +2 -4
- AeroViz/plot/templates/ammonium_rich.py +34 -0
- AeroViz/plot/templates/contour.py +25 -25
- AeroViz/plot/templates/corr_matrix.py +86 -93
- AeroViz/plot/templates/diurnal_pattern.py +28 -26
- AeroViz/plot/templates/koschmieder.py +59 -123
- AeroViz/plot/templates/metal_heatmap.py +135 -37
- AeroViz/plot/timeseries/__init__.py +1 -0
- AeroViz/plot/timeseries/template.py +47 -0
- AeroViz/plot/timeseries/timeseries.py +324 -264
- AeroViz/plot/utils/__init__.py +2 -1
- AeroViz/plot/utils/_color.py +57 -57
- AeroViz/plot/utils/_unit.py +48 -48
- AeroViz/plot/utils/plt_utils.py +92 -0
- AeroViz/plot/utils/sklearn_utils.py +49 -0
- AeroViz/plot/utils/units.json +5 -0
- AeroViz/plot/violin.py +80 -0
- AeroViz/process/__init__.py +17 -17
- AeroViz/process/core/DataProc.py +9 -9
- AeroViz/process/core/SizeDist.py +81 -81
- AeroViz/process/method/PyMieScatt_update.py +488 -488
- AeroViz/process/method/mie_theory.py +231 -229
- AeroViz/process/method/prop.py +40 -40
- AeroViz/process/script/AbstractDistCalc.py +103 -103
- AeroViz/process/script/Chemical.py +168 -167
- AeroViz/process/script/IMPACT.py +40 -40
- AeroViz/process/script/IMPROVE.py +152 -152
- AeroViz/process/script/Others.py +45 -45
- AeroViz/process/script/PSD.py +26 -26
- AeroViz/process/script/PSD_dry.py +69 -70
- AeroViz/process/script/retrieve_RI.py +50 -51
- AeroViz/rawDataReader/__init__.py +53 -58
- AeroViz/rawDataReader/config/supported_instruments.py +155 -0
- AeroViz/rawDataReader/core/__init__.py +233 -356
- AeroViz/rawDataReader/script/AE33.py +17 -18
- AeroViz/rawDataReader/script/AE43.py +18 -21
- AeroViz/rawDataReader/script/APS_3321.py +30 -30
- AeroViz/rawDataReader/script/Aurora.py +23 -24
- AeroViz/rawDataReader/script/BC1054.py +36 -40
- AeroViz/rawDataReader/script/EPA_vertical.py +37 -9
- AeroViz/rawDataReader/script/GRIMM.py +16 -23
- AeroViz/rawDataReader/script/IGAC.py +90 -0
- AeroViz/rawDataReader/script/MA350.py +32 -39
- AeroViz/rawDataReader/script/Minion.py +103 -0
- AeroViz/rawDataReader/script/NEPH.py +69 -74
- AeroViz/rawDataReader/script/SMPS_TH.py +25 -25
- AeroViz/rawDataReader/script/SMPS_aim11.py +32 -32
- AeroViz/rawDataReader/script/SMPS_genr.py +31 -31
- AeroViz/rawDataReader/script/Sunset_OCEC.py +60 -0
- AeroViz/rawDataReader/script/TEOM.py +30 -28
- AeroViz/rawDataReader/script/Table.py +13 -14
- AeroViz/rawDataReader/script/VOC.py +26 -0
- AeroViz/rawDataReader/script/__init__.py +18 -20
- AeroViz/tools/database.py +64 -66
- AeroViz/tools/dataclassifier.py +106 -106
- AeroViz/tools/dataprinter.py +51 -51
- AeroViz/tools/datareader.py +38 -38
- {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/METADATA +5 -4
- AeroViz-0.1.4.dist-info/RECORD +112 -0
- AeroViz/plot/improve/__init__.py +0 -1
- AeroViz/plot/improve/improve.py +0 -240
- AeroViz/plot/optical/aethalometer.py +0 -77
- AeroViz/plot/templates/event_evolution.py +0 -65
- AeroViz/plot/templates/regression.py +0 -256
- AeroViz/plot/templates/scatter.py +0 -130
- AeroViz/plot/templates/templates.py +0 -398
- AeroViz/plot/utils/_decorator.py +0 -74
- AeroViz/rawDataReader/script/IGAC_TH.py +0 -104
- AeroViz/rawDataReader/script/IGAC_ZM.py +0 -90
- AeroViz/rawDataReader/script/OCEC_LCRES.py +0 -34
- AeroViz/rawDataReader/script/OCEC_RES.py +0 -28
- AeroViz/rawDataReader/script/VOC_TH.py +0 -30
- AeroViz/rawDataReader/script/VOC_ZM.py +0 -37
- AeroViz/rawDataReader/utils/__init__.py +0 -0
- AeroViz/rawDataReader/utils/config.py +0 -169
- AeroViz-0.1.3.dist-info/RECORD +0 -111
- /AeroViz/{config → data}/DEFAULT_PNSD_DATA.csv +0 -0
- /AeroViz/{config → rawDataReader/config}/__init__.py +0 -0
- {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/WHEEL +0 -0
- {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -4,31 +4,28 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
|
-
|
|
7
|
+
nam = 'AE43'
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
def _raw_reader(self, _file):
|
|
10
|
+
_df = read_csv(_file, parse_dates={'time': ['StartTime']}, index_col='time')
|
|
11
|
+
_df_id = _df['SetupID'].iloc[-1]
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
# get last SetupID data
|
|
14
|
+
_df = _df.groupby('SetupID').get_group(_df_id)[
|
|
15
|
+
['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'Status']].copy()
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
# remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape)
|
|
18
|
+
if self.meta.get('error_state', False):
|
|
19
|
+
_df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
|
|
19
20
|
|
|
20
|
-
|
|
21
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
|
|
21
22
|
|
|
22
|
-
|
|
23
|
-
def _QC(self, _df):
|
|
24
|
-
# remove negative value
|
|
25
|
-
_df = _df.mask((_df < 0).copy())
|
|
23
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
26
24
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
25
|
+
# QC data
|
|
26
|
+
def _QC(self, _df):
|
|
27
|
+
# remove negative value
|
|
28
|
+
_df = _df.mask((_df < 0).copy())
|
|
31
29
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
return _df.resample('5min').apply(_QC_func).resample('1h').mean()
|
|
30
|
+
# QC data in 1h
|
|
31
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -5,43 +5,43 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class Reader(AbstractReader):
|
|
8
|
-
|
|
8
|
+
nam = 'APS_3321'
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
def _raw_reader(self, _file):
|
|
11
|
+
with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
12
|
+
_df = read_table(f, skiprows=6, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
|
|
13
|
+
_key = list(_df.keys()[3:54]) ## 542 ~ 1981
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
15
|
+
# create new keys
|
|
16
|
+
_newkey = {}
|
|
17
|
+
for _k in _key:
|
|
18
|
+
_newkey[_k] = float(_k).__round__(4)
|
|
19
|
+
# _newkey['Mode(m)'] = 'mode'
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
21
|
+
# get new dataframe
|
|
22
|
+
_df = _df[_newkey.keys()].rename(_newkey, axis=1)
|
|
23
|
+
# df['total'] = _df[list(_newkey.values())[:-1]].sum(axis=1)*(n.diff(n.log(_df.keys()[:-1].to_numpy(float))).mean()).copy()
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
_df_idx = to_datetime(_df.index, errors='coerce')
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
return _df.set_index(_df_idx).loc[_df_idx.dropna()]
|
|
28
28
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
29
|
+
# QC data
|
|
30
|
+
def _QC(self, _df):
|
|
31
|
+
# mask out the data size lower than 7
|
|
32
|
+
_df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
|
|
33
|
+
_df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
|
|
34
|
+
_df = _df.mask(_df_size < 7)
|
|
35
35
|
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
# remove total conc. lower than 700
|
|
37
|
+
_df = _df.mask(_df['total'] > 700)
|
|
38
38
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
39
|
+
# not confirmed
|
|
40
|
+
"""
|
|
41
|
+
## remove the bin over 4000 nm which num. conc. larger than 1
|
|
42
|
+
# _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2]>=4.]
|
|
43
43
|
|
|
44
|
-
|
|
45
|
-
|
|
44
|
+
# _df_1hr[_df_remv_ky] = _df_1hr[_df_remv_ky].copy().mask(_df_1hr[_df_remv_ky]>1.)
|
|
45
|
+
# """
|
|
46
46
|
|
|
47
|
-
|
|
47
|
+
return _df[_df.keys()[:-1]]
|
|
@@ -4,35 +4,34 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
|
-
|
|
7
|
+
nam = 'Aurora'
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
def _raw_reader(self, _file):
|
|
10
|
+
with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
11
|
+
_df = read_csv(f, low_memory=False, index_col=0)
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
_df.index = to_datetime(_df.index, errors='coerce')
|
|
14
|
+
_df.index.name = 'time'
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
_df.columns = _df.keys().str.strip(' ')
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
18
|
+
# consider another csv format
|
|
19
|
+
_df = _df.rename(columns={
|
|
20
|
+
'0°σspB': 'B', '0°σspG': 'G', '0°σspR': 'R',
|
|
21
|
+
'90°σspB': 'BB', '90°σspG': 'BG', '90°σspR': 'BR',
|
|
22
|
+
'Blue': 'B', 'Green': 'G', 'Red': 'R',
|
|
23
|
+
'B_Blue': 'BB', 'B_Green': 'BG', 'B_Red': 'BR',
|
|
24
|
+
'RH': 'RH'
|
|
25
|
+
})
|
|
21
26
|
|
|
22
|
-
|
|
27
|
+
_df = _df[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
|
|
23
28
|
|
|
24
|
-
|
|
25
|
-
def _QC(self, _df):
|
|
26
|
-
## remove negative value
|
|
27
|
-
_df = _df.mask((_df <= 0).copy())
|
|
29
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
28
30
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
_df_std = _df_1hr.std()
|
|
34
|
-
_df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
|
|
31
|
+
# QC data
|
|
32
|
+
def _QC(self, _df):
|
|
33
|
+
# remove negative value
|
|
34
|
+
_df = _df.mask((_df <= 0) | (_df > 2000)).copy()
|
|
35
35
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
return _df.resample('1h', group_keys=False).apply(_QC_func)
|
|
36
|
+
# QC data in 1h
|
|
37
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -4,43 +4,39 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
return _df_1hr.mask(_df_lowb | _df_highb).copy()
|
|
45
|
-
|
|
46
|
-
return _df.resample('1h', group_keys=False).apply(_QC_func).resample('5min').mean()
|
|
7
|
+
nam = 'BC1054'
|
|
8
|
+
|
|
9
|
+
def _raw_reader(self, _file):
|
|
10
|
+
with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
|
+
_df = read_csv(f, parse_dates=True, index_col=0)
|
|
12
|
+
|
|
13
|
+
_df.columns = _df.columns.str.replace(' ', '')
|
|
14
|
+
|
|
15
|
+
_df = _df.rename(columns={
|
|
16
|
+
'BC1(ng/m3)': 'BC1',
|
|
17
|
+
'BC2(ng/m3)': 'BC2',
|
|
18
|
+
'BC3(ng/m3)': 'BC3',
|
|
19
|
+
'BC4(ng/m3)': 'BC4',
|
|
20
|
+
'BC5(ng/m3)': 'BC5',
|
|
21
|
+
'BC6(ng/m3)': 'BC6',
|
|
22
|
+
'BC7(ng/m3)': 'BC7',
|
|
23
|
+
'BC8(ng/m3)': 'BC8',
|
|
24
|
+
'BC9(ng/m3)': 'BC9',
|
|
25
|
+
'BC10(ng/m3)': 'BC10'
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
# remove data without Status=1, 8, 16, 32 (Automatic Tape Advance), 65536 (Tape Move)
|
|
29
|
+
if self.meta.get('error_state', False):
|
|
30
|
+
_df = _df[~_df['Status'].isin(self.meta.get('error_state'))]
|
|
31
|
+
|
|
32
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']]
|
|
33
|
+
|
|
34
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
35
|
+
|
|
36
|
+
# QC data
|
|
37
|
+
def _QC(self, _df):
|
|
38
|
+
# remove negative value
|
|
39
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']].mask((_df < 0).copy())
|
|
40
|
+
|
|
41
|
+
# QC data in 1h
|
|
42
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -1,18 +1,46 @@
|
|
|
1
|
+
import numpy as np
|
|
1
2
|
from pandas import read_csv, to_numeric
|
|
2
3
|
|
|
3
4
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
class Reader(AbstractReader):
|
|
7
|
-
|
|
8
|
+
nam = 'EPA_vertical'
|
|
8
9
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
def _raw_reader(self, _file):
|
|
11
|
+
with _file.open('r', encoding='ascii', errors='ignore') as f:
|
|
12
|
+
# 有、無輸出有效值都可以
|
|
13
|
+
# read 查詢小時值(測項).csv
|
|
14
|
+
df = read_csv(f, encoding='ascii', encoding_errors='ignore', index_col=0, parse_dates=True,
|
|
15
|
+
usecols=lambda col: col != 'Unnamed: 1')
|
|
14
16
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
+
df.index.name = 'Time'
|
|
18
|
+
df.rename(columns={'AMB_TEMP': 'AT', 'WIND_SPEED': 'WS', 'WIND_DIREC': 'WD'}, inplace=True)
|
|
17
19
|
|
|
18
|
-
|
|
20
|
+
# 欄位排序
|
|
21
|
+
desired_order = ['SO2', 'NO', 'NOx', 'NO2', 'CO', 'O3', 'THC', 'NMHC', 'CH4', 'PM10', 'PM2.5', 'WS', 'WD',
|
|
22
|
+
'AT', 'RH']
|
|
23
|
+
|
|
24
|
+
missing_columns = []
|
|
25
|
+
|
|
26
|
+
for col in desired_order:
|
|
27
|
+
if col not in df.columns:
|
|
28
|
+
df[col] = np.nan
|
|
29
|
+
missing_columns.append(col)
|
|
30
|
+
|
|
31
|
+
if missing_columns:
|
|
32
|
+
self.logger.info(f"{'=' * 60}")
|
|
33
|
+
self.logger.info(f"Missing columns: {missing_columns}")
|
|
34
|
+
self.logger.info(f"{'=' * 60}")
|
|
35
|
+
print(f"Missing columns: {missing_columns}")
|
|
36
|
+
|
|
37
|
+
df = df[desired_order]
|
|
38
|
+
|
|
39
|
+
# 如果沒有將無效值拿掉就輸出 請將包含 #、L、O 的字串替換成 *
|
|
40
|
+
df.replace(to_replace=r'\d*[#LO]\b', value='*', regex=True, inplace=True)
|
|
41
|
+
df = df.apply(to_numeric, errors='coerce')
|
|
42
|
+
|
|
43
|
+
return df
|
|
44
|
+
|
|
45
|
+
def _QC(self, _df):
|
|
46
|
+
return _df
|
|
@@ -4,32 +4,25 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
|
-
|
|
7
|
+
nam = 'GRIMM'
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
def _raw_reader(self, _file):
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
_df = read_csv(_file, header=233, delimiter='\t', index_col=0, parse_dates=[0], encoding='ISO-8859-1',
|
|
12
|
+
dayfirst=True).rename_axis("Time")
|
|
13
|
+
_df.index = to_datetime(_df.index, format="%d/%m/%Y %H:%M:%S", dayfirst=True)
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
15
|
+
if _file.name.startswith("A407ST"):
|
|
16
|
+
_df.drop(_df.columns[0:11].tolist() + _df.columns[128:].tolist(), axis=1, inplace=True)
|
|
17
|
+
else:
|
|
18
|
+
_df.drop(_df.columns[0:11].tolist() + _df.columns[-5:].tolist(), axis=1, inplace=True)
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
if _df.empty:
|
|
21
|
+
print(_file, "is empty")
|
|
22
|
+
return None
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
return _df / 0.035
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
_df_ave = _df_1hr.mean()
|
|
30
|
-
_df_std = _df_1hr.std()
|
|
31
|
-
_df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
|
|
32
|
-
|
|
33
|
-
return _df_1hr.mask(_df_lowb | _df_highb).copy()
|
|
34
|
-
|
|
35
|
-
return _df.resample('5min').apply(_QC_func).resample('1h').mean()
|
|
26
|
+
def _QC(self, _df):
|
|
27
|
+
# QC data in 1h
|
|
28
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# read meteorological data from google sheet
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from pandas import read_csv, concat, to_numeric
|
|
6
|
+
|
|
7
|
+
from AeroViz.rawDataReader.core import AbstractReader
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Reader(AbstractReader):
|
|
11
|
+
nam = 'IGAC'
|
|
12
|
+
|
|
13
|
+
def _raw_reader(self, _file):
|
|
14
|
+
|
|
15
|
+
with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
16
|
+
_df = read_csv(f, parse_dates=True, index_col=0, na_values='-').apply(to_numeric, errors='coerce')
|
|
17
|
+
|
|
18
|
+
_df.columns = _df.keys().str.strip(' ')
|
|
19
|
+
_df.index.name = 'time'
|
|
20
|
+
|
|
21
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
22
|
+
|
|
23
|
+
def _QC(self, _df):
|
|
24
|
+
|
|
25
|
+
# QC parameter, function (MDL SE LE)
|
|
26
|
+
_mdl = {
|
|
27
|
+
'Na+': 0.06,
|
|
28
|
+
'NH4+': 0.05,
|
|
29
|
+
'K+': 0.05,
|
|
30
|
+
'Mg2+': 0.12,
|
|
31
|
+
'Ca2+': 0.07,
|
|
32
|
+
'Cl-': 0.07,
|
|
33
|
+
'NO2-': 0.05,
|
|
34
|
+
'NO3-': 0.11,
|
|
35
|
+
'SO42-': 0.08,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
# _mdl.update(self._oth_set.get('mdl', {}))
|
|
39
|
+
|
|
40
|
+
def _se_le(_df_, _log=False):
|
|
41
|
+
_df_ = np.log10(_df_) if _log else _df_
|
|
42
|
+
|
|
43
|
+
_df_qua = _df_.quantile([.25, .75])
|
|
44
|
+
_df_q1, _df_q3 = _df_qua.loc[.25].copy(), _df_qua.loc[.75].copy()
|
|
45
|
+
_df_iqr = _df_q3 - _df_q1
|
|
46
|
+
|
|
47
|
+
_se = concat([_df_q1 - 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
|
|
48
|
+
_le = concat([_df_q3 + 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
|
|
49
|
+
|
|
50
|
+
if _log:
|
|
51
|
+
return 10 ** _se, 10 ** _le
|
|
52
|
+
return _se, _le
|
|
53
|
+
|
|
54
|
+
_cation, _anion, _main = (['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'],
|
|
55
|
+
['Cl-', 'NO2-', 'NO3-', 'SO42-', ],
|
|
56
|
+
['SO42-', 'NO3-', 'NH4+'])
|
|
57
|
+
|
|
58
|
+
_df_salt = _df[_mdl.keys()].copy()
|
|
59
|
+
_df_pm = _df['PM2.5'].copy()
|
|
60
|
+
|
|
61
|
+
# lower than PM2.5
|
|
62
|
+
# conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
|
|
63
|
+
_df_salt = _df_salt.mask(_df_salt.sum(axis=1, min_count=1) > _df_pm).dropna(subset=_main).copy()
|
|
64
|
+
|
|
65
|
+
# mdl
|
|
66
|
+
for (_key, _df_col), _mdl_val in zip(_df_salt.items(), _mdl.values()):
|
|
67
|
+
_df_salt[_key] = _df_col.mask(_df_col < _mdl_val, _mdl_val / 2)
|
|
68
|
+
|
|
69
|
+
# calculate SE LE
|
|
70
|
+
# salt < LE
|
|
71
|
+
_se, _le = _se_le(_df_salt, _log=True)
|
|
72
|
+
_df_salt = _df_salt.mask(_df_salt > _le).copy()
|
|
73
|
+
|
|
74
|
+
# C/A, A/C
|
|
75
|
+
_rat_CA = (_df_salt[_cation].sum(axis=1) / _df_salt[_anion].sum(axis=1)).to_frame()
|
|
76
|
+
_rat_AC = (1 / _rat_CA).copy()
|
|
77
|
+
|
|
78
|
+
_se, _le = _se_le(_rat_CA, )
|
|
79
|
+
_cond_CA = (_rat_CA < _le) & (_rat_CA > 0)
|
|
80
|
+
|
|
81
|
+
_se, _le = _se_le(_rat_AC, )
|
|
82
|
+
_cond_AC = (_rat_AC < _le) & (_rat_AC > 0)
|
|
83
|
+
|
|
84
|
+
_df_salt = _df_salt.where((_cond_CA * _cond_AC)[0]).copy()
|
|
85
|
+
|
|
86
|
+
# conc. of main salt > SE
|
|
87
|
+
_se, _le = _se_le(_df_salt[_main], _log=True)
|
|
88
|
+
_df_salt[_main] = _df_salt[_main].mask(_df_salt[_main] < _se).copy()
|
|
89
|
+
|
|
90
|
+
return _df_salt.reindex(_df.index)
|
|
@@ -4,42 +4,35 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
_df_ave = _df_1hr.mean()
|
|
40
|
-
_df_std = _df_1hr.std()
|
|
41
|
-
_df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
|
|
42
|
-
|
|
43
|
-
return _df_1hr.mask(_df_lowb | _df_highb).copy()
|
|
44
|
-
|
|
45
|
-
return _df.resample('1h', group_keys=False).apply(_QC_func).resample('5min').mean()
|
|
7
|
+
nam = 'MA350'
|
|
8
|
+
|
|
9
|
+
def _raw_reader(self, _file):
|
|
10
|
+
_df = read_csv(_file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis("Time")
|
|
11
|
+
|
|
12
|
+
_df = _df.rename(columns={
|
|
13
|
+
'UV BCc': 'BC1',
|
|
14
|
+
'Blue BCc': 'BC2',
|
|
15
|
+
'Green BCc': 'BC3',
|
|
16
|
+
'Red BCc': 'BC4',
|
|
17
|
+
'IR BCc': 'BC5',
|
|
18
|
+
'Biomass BCc (ng/m^3)': 'BB mass',
|
|
19
|
+
'Fossil fuel BCc (ng/m^3)': 'FF mass',
|
|
20
|
+
'Delta-C (ng/m^3)': 'Delta-C',
|
|
21
|
+
'AAE': 'AAE',
|
|
22
|
+
'BB (%)': 'BB',
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
# if self.meta.get('error_state', False):
|
|
26
|
+
# _df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
|
|
27
|
+
|
|
28
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE', 'BB']]
|
|
29
|
+
|
|
30
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
31
|
+
|
|
32
|
+
# QC data
|
|
33
|
+
def _QC(self, _df):
|
|
34
|
+
# remove negative value
|
|
35
|
+
_df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'AAE', 'BB']].mask((_df < 0).copy())
|
|
36
|
+
|
|
37
|
+
# QC data in 1h
|
|
38
|
+
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from pandas import read_csv, to_datetime, to_numeric
|
|
3
|
+
|
|
4
|
+
from AeroViz.rawDataReader.core import AbstractReader
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Reader(AbstractReader):
|
|
8
|
+
nam = 'Minion'
|
|
9
|
+
|
|
10
|
+
def _raw_reader(self, _file):
|
|
11
|
+
with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
12
|
+
_df = read_csv(f, low_memory=False, index_col=0)
|
|
13
|
+
|
|
14
|
+
_df.index = to_datetime(_df.index, errors='coerce')
|
|
15
|
+
_df.index.name = 'time'
|
|
16
|
+
|
|
17
|
+
_df.columns = _df.keys().str.strip(' ')
|
|
18
|
+
|
|
19
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
20
|
+
|
|
21
|
+
def _QC(self, _df):
|
|
22
|
+
# XRF QAQC
|
|
23
|
+
_df = self.XRF_QAQC(_df)
|
|
24
|
+
|
|
25
|
+
# ions balance
|
|
26
|
+
_df = self.ions_balance(_df)
|
|
27
|
+
|
|
28
|
+
# remove negative value
|
|
29
|
+
_df = _df.mask((_df < 0).copy())
|
|
30
|
+
|
|
31
|
+
# QC data in 6h
|
|
32
|
+
return _df.resample('6h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
33
|
+
|
|
34
|
+
# base on Xact 625i Minimum Decision Limit (MDL) for XRF in ng/m3, 60 min sample time
|
|
35
|
+
def XRF_QAQC(self, df):
|
|
36
|
+
MDL = {
|
|
37
|
+
'Al': 100, 'Si': 18, 'P': 5.2, 'S': 3.2,
|
|
38
|
+
'Cl': 1.7, 'K': 1.2, 'Ca': 0.3, 'Ti': 1.6,
|
|
39
|
+
'V': 0.12, 'Cr': 0.12, 'Mn': 0.14, 'Fe': 0.17,
|
|
40
|
+
'Co': 0.14, 'Ni': 0.096, 'Cu': 0.079, 'Zn': 0.067,
|
|
41
|
+
'Ga': 0.059, 'Ge': 0.056, 'As': 0.063, 'Se': 0.081,
|
|
42
|
+
'Br': 0.1, 'Rb': 0.19, 'Sr': 0.22, 'Y': 0.28,
|
|
43
|
+
'Zr': 0.33, 'Nb': 0.41, 'Mo': 0.48, 'Ag': 1.9,
|
|
44
|
+
'Cd': 2.5, 'In': 3.1, 'Sn': 4.1, 'Sb': 5.2,
|
|
45
|
+
'Te': 0.6, 'I': 0.49, 'Cs': 0.37, 'Ba': 0.39,
|
|
46
|
+
'La': 0.36, 'Ce': 0.3, 'Pt': 0.12, 'Au': 0.1,
|
|
47
|
+
'Hg': 0.12, 'Tl': 0.12, 'Pb': 0.13, 'Bi': 0.13
|
|
48
|
+
}
|
|
49
|
+
# 將小於 MDL 值的數據替換為 NaN
|
|
50
|
+
for element, threshold in MDL.items():
|
|
51
|
+
if element in df.columns:
|
|
52
|
+
df[element] = df[element].where(df[element] >= threshold, np.nan)
|
|
53
|
+
|
|
54
|
+
self.logger.info(f"{'=' * 60}")
|
|
55
|
+
self.logger.info(f"XRF QAQC summary:")
|
|
56
|
+
self.logger.info("\t\ttransform values below MDL to NaN")
|
|
57
|
+
self.logger.info(f"{'=' * 60}")
|
|
58
|
+
|
|
59
|
+
return df
|
|
60
|
+
|
|
61
|
+
def ions_balance(self, df, tolerance=0.3):
|
|
62
|
+
"""
|
|
63
|
+
Calculate the balance of ions in the system
|
|
64
|
+
"""
|
|
65
|
+
# Define the ions
|
|
66
|
+
item = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+', 'F-', 'Cl-', 'NO2-', 'NO3-', 'PO43-', 'SO42-']
|
|
67
|
+
|
|
68
|
+
# Calculate the balance
|
|
69
|
+
_df = df[item].copy()
|
|
70
|
+
_df = _df.apply(lambda x: to_numeric(x, errors='coerce'))
|
|
71
|
+
_df['+_mole'] = _df[['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+']].div([23, 18, 39, (24 / 2), (40 / 2)]).sum(axis=1,
|
|
72
|
+
skipna=True)
|
|
73
|
+
_df['-_mole'] = _df[['Cl-', 'NO2-', 'NO3-', 'SO42-']].div([35.5, 46, 62, (96 / 2)]).sum(axis=1, skipna=True)
|
|
74
|
+
|
|
75
|
+
# Avoid division by zero
|
|
76
|
+
_df['ratio'] = np.where(_df['-_mole'] != 0, _df['+_mole'] / _df['-_mole'], np.nan)
|
|
77
|
+
|
|
78
|
+
# Calculate bounds
|
|
79
|
+
lower_bound, upper_bound = 1 - tolerance, 1 + tolerance
|
|
80
|
+
|
|
81
|
+
# 根据ratio决定是否保留原始数据
|
|
82
|
+
valid_mask = (
|
|
83
|
+
(_df['ratio'] <= upper_bound) &
|
|
84
|
+
(_df['ratio'] >= lower_bound) &
|
|
85
|
+
~np.isnan(_df['+_mole']) &
|
|
86
|
+
~np.isnan(_df['-_mole'])
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# 保留数据或将不符合条件的行设为NaN
|
|
90
|
+
df.loc[~valid_mask, item] = np.nan
|
|
91
|
+
|
|
92
|
+
# 计算保留的数据的百分比
|
|
93
|
+
retained_percentage = (valid_mask.sum() / len(df)) * 100
|
|
94
|
+
|
|
95
|
+
self.logger.info(f"{'=' * 60}")
|
|
96
|
+
self.logger.info(f"Ions balance summary:")
|
|
97
|
+
self.logger.info(f"\t\tretain {retained_percentage.__round__(0)}% data within tolerance {tolerance}")
|
|
98
|
+
self.logger.info(f"{'=' * 60}")
|
|
99
|
+
|
|
100
|
+
if retained_percentage < 70:
|
|
101
|
+
self.logger.warning("Warning: The percentage of retained data is less than 70%")
|
|
102
|
+
|
|
103
|
+
return df
|