AeroViz 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (102) hide show
  1. AeroViz/__init__.py +15 -0
  2. AeroViz/dataProcess/Chemistry/__init__.py +63 -0
  3. AeroViz/dataProcess/Chemistry/_calculate.py +27 -0
  4. AeroViz/dataProcess/Chemistry/_isoropia.py +99 -0
  5. AeroViz/dataProcess/Chemistry/_mass_volume.py +175 -0
  6. AeroViz/dataProcess/Chemistry/_ocec.py +184 -0
  7. AeroViz/dataProcess/Chemistry/_partition.py +29 -0
  8. AeroViz/dataProcess/Chemistry/_teom.py +16 -0
  9. AeroViz/dataProcess/Optical/_IMPROVE.py +61 -0
  10. AeroViz/dataProcess/Optical/__init__.py +62 -0
  11. AeroViz/dataProcess/Optical/_absorption.py +54 -0
  12. AeroViz/dataProcess/Optical/_extinction.py +36 -0
  13. AeroViz/dataProcess/Optical/_mie.py +16 -0
  14. AeroViz/dataProcess/Optical/_mie_sd.py +143 -0
  15. AeroViz/dataProcess/Optical/_scattering.py +30 -0
  16. AeroViz/dataProcess/SizeDistr/__init__.py +61 -0
  17. AeroViz/dataProcess/SizeDistr/__merge.py +250 -0
  18. AeroViz/dataProcess/SizeDistr/_merge.py +245 -0
  19. AeroViz/dataProcess/SizeDistr/_merge_v1.py +254 -0
  20. AeroViz/dataProcess/SizeDistr/_merge_v2.py +243 -0
  21. AeroViz/dataProcess/SizeDistr/_merge_v3.py +518 -0
  22. AeroViz/dataProcess/SizeDistr/_merge_v4.py +424 -0
  23. AeroViz/dataProcess/SizeDistr/_size_distr.py +93 -0
  24. AeroViz/dataProcess/VOC/__init__.py +19 -0
  25. AeroViz/dataProcess/VOC/_potential_par.py +76 -0
  26. AeroViz/dataProcess/__init__.py +11 -0
  27. AeroViz/dataProcess/core/__init__.py +92 -0
  28. AeroViz/plot/__init__.py +7 -0
  29. AeroViz/plot/distribution/__init__.py +1 -0
  30. AeroViz/plot/distribution/distribution.py +582 -0
  31. AeroViz/plot/improve/__init__.py +1 -0
  32. AeroViz/plot/improve/improve.py +240 -0
  33. AeroViz/plot/meteorology/__init__.py +1 -0
  34. AeroViz/plot/meteorology/meteorology.py +317 -0
  35. AeroViz/plot/optical/__init__.py +2 -0
  36. AeroViz/plot/optical/aethalometer.py +77 -0
  37. AeroViz/plot/optical/optical.py +388 -0
  38. AeroViz/plot/templates/__init__.py +8 -0
  39. AeroViz/plot/templates/contour.py +47 -0
  40. AeroViz/plot/templates/corr_matrix.py +108 -0
  41. AeroViz/plot/templates/diurnal_pattern.py +42 -0
  42. AeroViz/plot/templates/event_evolution.py +65 -0
  43. AeroViz/plot/templates/koschmieder.py +156 -0
  44. AeroViz/plot/templates/metal_heatmap.py +57 -0
  45. AeroViz/plot/templates/regression.py +256 -0
  46. AeroViz/plot/templates/scatter.py +130 -0
  47. AeroViz/plot/templates/templates.py +398 -0
  48. AeroViz/plot/timeseries/__init__.py +1 -0
  49. AeroViz/plot/timeseries/timeseries.py +317 -0
  50. AeroViz/plot/utils/__init__.py +3 -0
  51. AeroViz/plot/utils/_color.py +71 -0
  52. AeroViz/plot/utils/_decorator.py +74 -0
  53. AeroViz/plot/utils/_unit.py +55 -0
  54. AeroViz/process/__init__.py +31 -0
  55. AeroViz/process/core/DataProc.py +19 -0
  56. AeroViz/process/core/SizeDist.py +90 -0
  57. AeroViz/process/core/__init__.py +4 -0
  58. AeroViz/process/method/PyMieScatt_update.py +567 -0
  59. AeroViz/process/method/__init__.py +2 -0
  60. AeroViz/process/method/mie_theory.py +258 -0
  61. AeroViz/process/method/prop.py +62 -0
  62. AeroViz/process/script/AbstractDistCalc.py +143 -0
  63. AeroViz/process/script/Chemical.py +176 -0
  64. AeroViz/process/script/IMPACT.py +49 -0
  65. AeroViz/process/script/IMPROVE.py +161 -0
  66. AeroViz/process/script/Others.py +65 -0
  67. AeroViz/process/script/PSD.py +103 -0
  68. AeroViz/process/script/PSD_dry.py +94 -0
  69. AeroViz/process/script/__init__.py +5 -0
  70. AeroViz/process/script/retrieve_RI.py +70 -0
  71. AeroViz/rawDataReader/__init__.py +68 -0
  72. AeroViz/rawDataReader/core/__init__.py +397 -0
  73. AeroViz/rawDataReader/script/AE33.py +31 -0
  74. AeroViz/rawDataReader/script/AE43.py +34 -0
  75. AeroViz/rawDataReader/script/APS_3321.py +47 -0
  76. AeroViz/rawDataReader/script/Aurora.py +38 -0
  77. AeroViz/rawDataReader/script/BC1054.py +46 -0
  78. AeroViz/rawDataReader/script/EPA_vertical.py +18 -0
  79. AeroViz/rawDataReader/script/GRIMM.py +35 -0
  80. AeroViz/rawDataReader/script/IGAC_TH.py +104 -0
  81. AeroViz/rawDataReader/script/IGAC_ZM.py +90 -0
  82. AeroViz/rawDataReader/script/MA350.py +45 -0
  83. AeroViz/rawDataReader/script/NEPH.py +57 -0
  84. AeroViz/rawDataReader/script/OCEC_LCRES.py +34 -0
  85. AeroViz/rawDataReader/script/OCEC_RES.py +28 -0
  86. AeroViz/rawDataReader/script/SMPS_TH.py +41 -0
  87. AeroViz/rawDataReader/script/SMPS_aim11.py +51 -0
  88. AeroViz/rawDataReader/script/SMPS_genr.py +51 -0
  89. AeroViz/rawDataReader/script/TEOM.py +46 -0
  90. AeroViz/rawDataReader/script/Table.py +28 -0
  91. AeroViz/rawDataReader/script/VOC_TH.py +30 -0
  92. AeroViz/rawDataReader/script/VOC_ZM.py +37 -0
  93. AeroViz/rawDataReader/script/__init__.py +22 -0
  94. AeroViz/tools/__init__.py +3 -0
  95. AeroViz/tools/database.py +94 -0
  96. AeroViz/tools/dataclassifier.py +117 -0
  97. AeroViz/tools/datareader.py +66 -0
  98. AeroViz-0.1.0.dist-info/LICENSE +21 -0
  99. AeroViz-0.1.0.dist-info/METADATA +117 -0
  100. AeroViz-0.1.0.dist-info/RECORD +102 -0
  101. AeroViz-0.1.0.dist-info/WHEEL +5 -0
  102. AeroViz-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,104 @@
1
+ # read meteorological data from google sheet
2
+
3
+
4
+ import numpy as np
5
+ from pandas import read_csv, concat, to_datetime
6
+
7
+ from AeroViz.rawDataReader.core import AbstractReader
8
+
9
+
10
+ class Reader(AbstractReader):
11
+ nam = 'IGAC_TH'
12
+
13
+ def _raw_reader(self, _file):
14
+
15
+ self.meta['freq'] = self._oth_set.get('data_freq') or self.meta['freq']
16
+
17
+ with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f:
18
+ _df = read_csv(f, low_memory=False, index_col=0)
19
+
20
+ _df.index = to_datetime(_df.index, errors='coerce', format=self._oth_set.get('date_format') or 'mixed')
21
+ _df.index.name = 'time'
22
+
23
+ _df.columns = _df.keys().str.strip(' ')
24
+
25
+ _df = _df.loc[_df.index.dropna()].copy()
26
+
27
+ return _df.loc[~_df.index.duplicated()]
28
+
29
+ ## QC data
30
+ def _QC(self, _df):
31
+
32
+ ## QC parameter, function (MDL SE LE)
33
+ _mdl = {
34
+ 'Na+': 0.05,
35
+ 'NH4+': 0.05,
36
+ 'K+': 0.05,
37
+ 'Mg2+': 0.05,
38
+ 'Ca2+': 0.05,
39
+ 'Cl-': 0.05,
40
+ 'NO2-': 0.05,
41
+ 'NO3-': 0.05,
42
+ 'SO42-': 0.05,
43
+ }
44
+
45
+ def _se_le(_df_, _log=False):
46
+ _df_ = np.log10(_df_) if _log else _df_
47
+
48
+ _df_qua = _df_.quantile([.25, .75])
49
+ _df_q1, _df_q3 = _df_qua.loc[.25].copy(), _df_qua.loc[.75].copy()
50
+ _df_iqr = _df_q3 - _df_q1
51
+
52
+ _se = concat([_df_q1 - 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
53
+ _le = concat([_df_q3 + 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
54
+
55
+ if _log:
56
+ return 10 ** _se, 10 ** _le
57
+ return _se, _le
58
+
59
+ _cation, _anion, _main = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'], ['Cl-', 'NO2-', 'NO3-', 'SO42-', ], ['SO42-',
60
+ 'NO3-',
61
+ 'NH4+']
62
+
63
+ _df_salt = _df[_mdl.keys()].copy()
64
+ _df_pm = _df['PM2.5'].copy()
65
+
66
+ ## lower than PM2.5
67
+ ## conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
68
+ _df_salt = _df_salt.mask(_df_salt.sum(axis=1, min_count=1) > _df_pm).dropna(subset=_main).copy()
69
+
70
+ ## mdl
71
+ for (_key, _df_col), _mdl_val in zip(_df_salt.items(), _mdl.values()):
72
+ _df_salt[_key] = _df_col.mask(_df_col < _mdl_val, _mdl_val / 2)
73
+
74
+ ## group by time (per month)
75
+ _df_salt['tm'] = _df_salt.index.strftime('%Y-%m')
76
+
77
+ _df_lst = []
78
+ for _ky, _df_grp in _df_salt.groupby('tm'):
79
+ _df_grp = _df_grp[_mdl.keys()].copy()
80
+
81
+ ## calculate SE LE
82
+ ## salt < LE
83
+ _se, _le = _se_le(_df_grp, _log=True)
84
+ _df_grp = _df_grp.mask(_df_grp > _le).copy()
85
+
86
+ ## C/A, A/C
87
+ _rat_CA = (_df_grp[_cation].sum(axis=1) / _df_grp[_anion].sum(axis=1)).to_frame()
88
+ _rat_AC = (1 / _rat_CA).copy()
89
+
90
+ _se, _le = _se_le(_rat_CA, )
91
+ _cond_CA = (_rat_CA < _le) & (_rat_CA > 0)
92
+
93
+ _se, _le = _se_le(_rat_AC, )
94
+ _cond_AC = (_rat_AC < _le) & (_rat_AC > 0)
95
+
96
+ _df_grp = _df_grp.where((_cond_CA * _cond_AC)[0]).copy()
97
+
98
+ ## conc. of main salt > SE
99
+ _se, _le = _se_le(_df_grp[_main], _log=True)
100
+ _df_grp[_main] = _df_grp[_main].mask(_df_grp[_main] < _se).copy()
101
+
102
+ _df_lst.append(_df_grp)
103
+
104
+ return concat(_df_lst).reindex(_df.index)
@@ -0,0 +1,90 @@
1
+ # read meteorological data from google sheet
2
+
3
+
4
+ import numpy as np
5
+ from pandas import read_csv, concat, to_numeric
6
+
7
+ from AeroViz.rawDataReader.core import AbstractReader
8
+
9
+
10
+ class Reader(AbstractReader):
11
+ nam = 'IGAC_ZM'
12
+
13
+ def _raw_reader(self, _file):
14
+
15
+ with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f:
16
+ _df = read_csv(f, parse_dates=[0], index_col=[0], na_values=['-']).apply(to_numeric, errors='coerce')
17
+
18
+ _df.columns = _df.keys().str.strip(' ')
19
+ _df.index.name = 'time'
20
+
21
+ return _df.loc[_df.index.dropna()].loc[~_df.index.duplicated()]
22
+
23
+ ## QC data
24
+ def _QC(self, _df):
25
+
26
+ ## QC parameter, function (MDL SE LE)
27
+ _mdl = {
28
+ 'Na+': 0.06,
29
+ 'NH4+': 0.05,
30
+ 'K+': 0.05,
31
+ 'Mg2+': 0.12,
32
+ 'Ca2+': 0.07,
33
+ 'Cl-': 0.07,
34
+ 'NO2-': 0.05,
35
+ 'NO3-': 0.11,
36
+ 'SO42-': 0.08,
37
+ }
38
+ _mdl.update(self._oth_set.get('mdl', {}))
39
+
40
+ def _se_le(_df_, _log=False):
41
+ _df_ = np.log10(_df_) if _log else _df_
42
+
43
+ _df_qua = _df_.quantile([.25, .75])
44
+ _df_q1, _df_q3 = _df_qua.loc[.25].copy(), _df_qua.loc[.75].copy()
45
+ _df_iqr = _df_q3 - _df_q1
46
+
47
+ _se = concat([_df_q1 - 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
48
+ _le = concat([_df_q3 + 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
49
+
50
+ if _log:
51
+ return 10 ** _se, 10 ** _le
52
+ return _se, _le
53
+
54
+ _cation, _anion, _main = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'], ['Cl-', 'NO2-', 'NO3-', 'SO42-', ], ['SO42-',
55
+ 'NO3-',
56
+ 'NH4+']
57
+
58
+ _df_salt = _df[_mdl.keys()].copy()
59
+ _df_pm = _df['PM2.5'].copy()
60
+
61
+ ## lower than PM2.5
62
+ ## conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
63
+ _df_salt = _df_salt.mask(_df_salt.sum(axis=1, min_count=1) > _df_pm).dropna(subset=_main).copy()
64
+
65
+ ## mdl
66
+ for (_key, _df_col), _mdl_val in zip(_df_salt.items(), _mdl.values()):
67
+ _df_salt[_key] = _df_col.mask(_df_col < _mdl_val, _mdl_val / 2)
68
+
69
+ ## calculate SE LE
70
+ ## salt < LE
71
+ _se, _le = _se_le(_df_salt, _log=True)
72
+ _df_salt = _df_salt.mask(_df_salt > _le).copy()
73
+
74
+ ## C/A, A/C
75
+ _rat_CA = (_df_salt[_cation].sum(axis=1) / _df_salt[_anion].sum(axis=1)).to_frame()
76
+ _rat_AC = (1 / _rat_CA).copy()
77
+
78
+ _se, _le = _se_le(_rat_CA, )
79
+ _cond_CA = (_rat_CA < _le) & (_rat_CA > 0)
80
+
81
+ _se, _le = _se_le(_rat_AC, )
82
+ _cond_AC = (_rat_AC < _le) & (_rat_AC > 0)
83
+
84
+ _df_salt = _df_salt.where((_cond_CA * _cond_AC)[0]).copy()
85
+
86
+ ## conc. of main salt > SE
87
+ _se, _le = _se_le(_df_salt[_main], _log=True)
88
+ _df_salt[_main] = _df_salt[_main].mask(_df_salt[_main] < _se).copy()
89
+
90
+ return _df_salt.reindex(_df.index)
@@ -0,0 +1,45 @@
1
+ from pandas import read_csv
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader
4
+
5
+
6
+ class Reader(AbstractReader):
7
+ nam = 'MA350'
8
+
9
+ def _raw_reader(self, _file):
10
+ _df = read_csv(_file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis("Time")
11
+
12
+ _df = _df.rename(columns={
13
+ 'UV BCc': 'BC1',
14
+ 'Blue BCc': 'BC2',
15
+ 'Green BCc': 'BC3',
16
+ 'Red BCc': 'BC4',
17
+ 'IR BCc': 'BC5',
18
+ 'Biomass BCc (ng/m^3)': 'BB mass',
19
+ 'Fossil fuel BCc (ng/m^3)': 'FF mass',
20
+ 'Delta-C (ng/m^3)': 'Delta-C',
21
+ 'AAE': 'AAE',
22
+ 'BB (%)': 'BB',
23
+ })
24
+
25
+ # remove data without Status=32 (Automatic Tape Advance), 65536 (Tape Move)
26
+ # if not self._oth_set.get('ignore_err', False):
27
+ # _df = _df.where((_df['Status'] != 32) | (_df['Status'] != 65536)).copy()
28
+
29
+ return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE', 'BB']]
30
+
31
+ # QC data
32
+ def _QC(self, _df):
33
+ # remove negative value
34
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'AAE', 'BB']].mask((_df < 0).copy())
35
+
36
+ # call by _QC function
37
+ # QC data in 1 hr
38
+ def _QC_func(_df_1hr):
39
+ _df_ave = _df_1hr.mean()
40
+ _df_std = _df_1hr.std()
41
+ _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
42
+
43
+ return _df_1hr.mask(_df_lowb | _df_highb).copy()
44
+
45
+ return _df.resample('1h', group_keys=False).apply(_QC_func).resample('5min').mean()
@@ -0,0 +1,57 @@
1
+ from pandas import to_datetime, read_csv
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader
4
+
5
+
6
+ class Reader(AbstractReader):
7
+ nam = 'NEPH'
8
+
9
+ def _raw_reader(self, _file):
10
+ with (_file).open('r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_csv(f, header=None, names=range(11))
12
+
13
+ _df_grp = _df.groupby(0)
14
+
15
+ ## T : time
16
+ _df_tm = _df_grp.get_group('T')[[1, 2, 3, 4, 5, 6]].astype(int)
17
+
18
+ for _k in [2, 3, 4, 5, 6]:
19
+ _df_tm[_k] = _df_tm[_k].astype(int).map('{:02d}'.format).copy()
20
+ _df_tm = _df_tm.astype(str)
21
+
22
+ _idx_tm = to_datetime((_df_tm[1] + _df_tm[2] + _df_tm[3] + _df_tm[4] + _df_tm[5] + _df_tm[6]),
23
+ format='%Y%m%d%H%M%S')
24
+
25
+ ## D : data
26
+ ## col : 3~8 B G R BB BG BR
27
+ ## 1e6
28
+ _df_dt = _df_grp.get_group('D')[[1, 2, 3, 4, 5, 6, 7, 8]].set_index(_idx_tm)
29
+ _df_out = (_df_dt.groupby(1).get_group('NBXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm)
30
+ _df_out.columns = ['B', 'G', 'R', 'BB', 'BG', 'BR']
31
+ _df_out.index.name = 'Time'
32
+
33
+ ## Y : state
34
+ ## col : 5 RH
35
+ _df_st = _df_grp.get_group('Y')
36
+ _df_out['RH'] = _df_st[5].values
37
+ _df_out['status'] = _df_st[9].values
38
+
39
+ _df_out.mask(_df_out['status'] != 0) ## 0000 -> numeric to 0
40
+
41
+ return _df_out[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
42
+
43
+ ## QC data
44
+ def _QC(self, _df):
45
+ ## remove negative value
46
+ _df = _df.mask((_df <= 0).copy())
47
+
48
+ ## call by _QC function
49
+ ## QC data in 1 hr
50
+ def _QC_func(_df_1hr):
51
+ _df_ave = _df_1hr.mean()
52
+ _df_std = _df_1hr.std()
53
+ _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
54
+
55
+ return _df_1hr.mask(_df_lowb | _df_highb).copy()
56
+
57
+ return _df.resample('1h', group_keys=False).apply(_QC_func)
@@ -0,0 +1,34 @@
1
+ from pandas import to_datetime, read_csv
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader
4
+
5
+
6
+ class Reader(AbstractReader):
7
+ nam = 'OCEC_LCRES'
8
+
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_csv(f, skiprows=3)
12
+
13
+ _col = {'Thermal/Optical OC (ugC/LCm^3)': 'Thermal_OC',
14
+ 'Thermal/Optical EC (ugC/LCm^3)': 'Thermal_EC',
15
+ 'OC=TC-BC (ugC/LCm^3)': 'Optical_OC',
16
+ 'BC (ugC/LCm^3)': 'Optical_EC',
17
+ 'Sample Volume Local Condition Actual m^3': 'Sample_Volume',
18
+ 'TC (ugC/LCm^3)': 'TC', }
19
+
20
+ _tm_idx = to_datetime(_df['Start Date/Time'], errors='coerce')
21
+ _df['time'] = _tm_idx
22
+
23
+ _df = _df.dropna(subset='time').loc[~_tm_idx.duplicated()].set_index('time')
24
+
25
+ return _df[_col.keys()].rename(columns=_col)
26
+
27
+ ## QC data
28
+ def _QC(self, _df):
29
+ _df[['Thermal_OC', 'Optical_OC']] = _df[['Thermal_OC', 'Optical_OC']].where(
30
+ _df[['Thermal_OC', 'Optical_OC']] > 0.3).copy()
31
+ _df[['Thermal_EC', 'Optical_EC']] = _df[['Thermal_EC', 'Optical_EC']].where(
32
+ _df[['Thermal_EC', 'Optical_EC']] > .015).copy()
33
+
34
+ return _df
@@ -0,0 +1,28 @@
1
+ from pandas import to_datetime, read_csv
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader
4
+
5
+
6
+ class Reader(AbstractReader):
7
+ nam = 'OCEC_RES'
8
+
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_csv(f, skiprows=3)
12
+
13
+ _col = {'OCPk1-ug C': 'OC1',
14
+ 'OCPk2-ug C': 'OC2',
15
+ 'OCPk3-ug C': 'OC3',
16
+ 'OCPk4-ug C': 'OC4',
17
+ 'Pyrolized C ug': 'PC', }
18
+
19
+ _tm_idx = to_datetime(_df['Start Date/Time'], errors='coerce')
20
+ _df['time'] = _tm_idx
21
+
22
+ _df = _df.dropna(subset='time').loc[~_tm_idx.duplicated()].set_index('time')
23
+
24
+ return _df[_col.keys()].rename(columns=_col)
25
+
26
+ ## QC data
27
+ def _QC(self, _df):
28
+ return _df.where(_df > 0)
@@ -0,0 +1,41 @@
1
+ from pandas import to_datetime, read_table
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader
4
+
5
+
6
+ class Reader(AbstractReader):
7
+ nam = 'SMPS_TH'
8
+
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_table(f, skiprows=18, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
12
+ _key = list(_df.keys()[6:-26])
13
+
14
+ _newkey = {}
15
+ for _k in _key:
16
+ _newkey[_k] = float(_k).__round__(4)
17
+
18
+ # _newkey['Total Conc.(#/cm)'] = 'total'
19
+ # _newkey['Mode(nm)'] = 'mode'
20
+
21
+ _df_idx = to_datetime(_df.index, errors='coerce')
22
+ return _df[_newkey.keys()].rename(_newkey, axis=1).set_index(_df_idx).loc[_df_idx.dropna()]
23
+
24
+ ## QC data
25
+ def _QC(self, _df):
26
+ import numpy as n
27
+
28
+ ## mask out the data size lower than 7
29
+ _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
30
+ _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
31
+ _df = _df.mask(_df_size < 7)
32
+
33
+ ## remove total conc. lower than 2000
34
+ _df = _df.mask(_df['total'] < 2000)
35
+
36
+ ## remove the bin over 400 nm which num. conc. larger than 4000
37
+ _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
38
+
39
+ _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
40
+
41
+ return _df[_df.keys()[:-1]]
@@ -0,0 +1,51 @@
1
+ from pandas import to_datetime, read_csv, to_numeric
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader
4
+
5
+
6
+ class Reader(AbstractReader):
7
+ nam = 'SMPS_aim11'
8
+
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
+
12
+ skiprows = 0
13
+ for _line in f:
14
+
15
+ if _line.split(',')[0] == 'Scan Number':
16
+ f.seek(0)
17
+ break
18
+
19
+ skiprows += 1
20
+ # breakpoint()
21
+ _df = read_csv(f, skiprows=skiprows)
22
+ _tm_idx = to_datetime(_df['DateTime Sample Start'], format='%d/%m/%Y %X', errors='coerce')
23
+
24
+ ## index
25
+ _df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()]
26
+
27
+ ## keys
28
+ _key = to_numeric(_df.keys(), errors='coerce')
29
+ _df.columns = _key
30
+ _df = _df.loc[:, ~_key.isna()]
31
+
32
+ return _df.apply(to_numeric, errors='coerce')
33
+
34
+ ## QC data
35
+ def _QC(self, _df):
36
+ import numpy as n
37
+
38
+ ## mask out the data size lower than 7
39
+ _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
40
+ _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
41
+ _df = _df.mask(_df_size < 7)
42
+
43
+ ## remove total conc. lower than 2000
44
+ _df = _df.mask(_df['total'] < 2000)
45
+
46
+ ## remove the bin over 400 nm which num. conc. larger than 4000
47
+ _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
48
+
49
+ _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
50
+
51
+ return _df[_df.keys()[:-1]]
@@ -0,0 +1,51 @@
1
+ from pandas import to_datetime, read_table, to_numeric
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader
4
+
5
+
6
+ class Reader(AbstractReader):
7
+ nam = 'SMPS_genr'
8
+
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
+
12
+ skiprows = 0
13
+ for _line in f:
14
+
15
+ if _line.split('\t')[0] == 'Sample #':
16
+ f.seek(0)
17
+ break
18
+
19
+ skiprows += 1
20
+
21
+ _df = read_table(f, skiprows=skiprows)
22
+ _tm_idx = to_datetime(_df['Date'] + _df['Start Time'], format='%m/%d/%y%X', errors='coerce')
23
+
24
+ ## index
25
+ _df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()]
26
+
27
+ ## keys
28
+ _key = to_numeric(_df.keys(), errors='coerce')
29
+ _df.columns = _key
30
+ _df = _df.loc[:, ~_key.isna()]
31
+
32
+ return _df.apply(to_numeric, errors='coerce')
33
+
34
+ ## QC data
35
+ def _QC(self, _df):
36
+ import numpy as n
37
+
38
+ ## mask out the data size lower than 7
39
+ _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
40
+ _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
41
+ _df = _df.mask(_df_size < 7)
42
+
43
+ ## remove total conc. lower than 2000
44
+ _df = _df.mask(_df['total'] < 2000)
45
+
46
+ ## remove the bin over 400 nm which num. conc. larger than 4000
47
+ _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
48
+
49
+ _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
50
+
51
+ return _df[_df.keys()[:-1]]
@@ -0,0 +1,46 @@
1
+ from pandas import to_datetime, read_csv
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader
4
+
5
+
6
+ class Reader(AbstractReader):
7
+ nam = 'TEOM'
8
+
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_csv(f, skiprows=3, index_col=False)
12
+
13
+ _df = _df.rename(columns={'Time Stamp': 'time',
14
+ 'System status': 'status',
15
+ 'PM-2.5 base MC': 'PM_NV',
16
+ 'PM-2.5 MC': 'PM_Total',
17
+ 'PM-2.5 TEOM noise': 'noise', })
18
+
19
+ _time_replace = {'十一月': '11', '十二月': '12', '一月': '01', '二月': '02', '三月': '03', '四月': '04',
20
+ '五月': '05', '六月': '06', '七月': '07', '八月': '08', '九月': '09', '十月': '10'}
21
+
22
+ _tm_idx = _df.time
23
+ for _ori, _rpl in _time_replace.items():
24
+ _tm_idx = _tm_idx.str.replace(_ori, _rpl)
25
+
26
+ _df = _df.set_index(to_datetime(_tm_idx, errors='coerce', format='%d - %m - %Y %X'))
27
+
28
+ _df = _df.where(_df['status'] < 1e-7)
29
+
30
+ return _df[['PM_NV', 'PM_Total', 'noise', ]]
31
+
32
+ ## QC data
33
+ def _QC(self, _df):
34
+
35
+ _df_idx = _df.index.copy()
36
+
37
+ ## remove negative value
38
+ _df = _df.where(_df.noise < 0.01)[['PM_NV', 'PM_Total']].mask((_df < 0).copy())
39
+
40
+ ## QC data in 1 hr
41
+ ## remove data where size < 8 in 1-hr
42
+ for _key in ['PM_Total', 'PM_NV']:
43
+ _size = _df[_key].dropna().resample('1h').size().reindex(_df_idx).ffill().copy()
44
+ _df[_key] = _df[_key].mask(_size < 8)
45
+
46
+ return _df.reindex(_df_idx)
@@ -0,0 +1,28 @@
1
+ # read meteorological data from google sheet
2
+
3
+
4
+ from pandas import read_csv, to_datetime
5
+
6
+ from AeroViz.rawDataReader.core import AbstractReader
7
+
8
+
9
+ class Reader(AbstractReader):
10
+ nam = 'Table'
11
+
12
+ def _raw_reader(self, _file):
13
+ self.meta['freq'] = self._oth_set.get('data_freq') or self.meta['freq']
14
+
15
+ with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f:
16
+ _df = read_csv(f, low_memory=False, index_col=0)
17
+
18
+ _df.index = to_datetime(_df.index, errors='coerce', format=self._oth_set.get('date_format') or 'mixed')
19
+ _df.index.name = 'time'
20
+
21
+ _df.columns = _df.keys().str.strip(' ')
22
+
23
+ _df = _df.loc[_df.index.dropna()].copy()
24
+
25
+ return _df.loc[~_df.index.duplicated()]
26
+
27
+ def _QC(self, _df):
28
+ return _df
@@ -0,0 +1,30 @@
1
+ # read meteorological data from google sheet
2
+
3
+
4
+ from pandas import read_csv
5
+
6
+ from AeroViz.rawDataReader.core import AbstractReader
7
+
8
+
9
+ class Reader(AbstractReader):
10
+ nam = 'VOC_TH'
11
+
12
+ def _raw_reader(self, _file):
13
+ _keys = ['Isopentane', 'Hexane', '2-Methylhexane', '3-Methylhexane', '2-Methylheptane', '3-Methylheptane',
14
+ 'Propene', '1.3-Butadiene', 'Isoprene', '1-Octene',
15
+ 'Benzene', 'Toluene', 'Ethylbenzene', 'm.p-Xylene', 'o-Xylene', 'Iso-Propylbenzene', 'Styrene',
16
+ 'n-Propylbenzene', '3.4-Ethyltoluene', '1.3.5-TMB', '2-Ethyltoluene', '1.2.4-TMB', '1.2.3-TMB',
17
+ 'Acetaldehyde', 'Ethanol', 'Acetone', 'IPA', 'Ethyl Acetate', 'Butyl Acetate',
18
+ 'VCM', 'TCE', 'PCE', '1.4-DCB', '1.2-DCB']
19
+
20
+ with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f:
21
+ _df = read_csv(f, parse_dates=[0], index_col=[0], na_values=['-', 'N.D.'])
22
+
23
+ _df.columns = _df.keys().str.strip(' ')
24
+ _df.index.name = 'time'
25
+
26
+ _df = _df[_keys].loc[_df.index.dropna()]
27
+ return _df.loc[~_df.index.duplicated()]
28
+
29
+ def _QC(self, _df):
30
+ return _df
@@ -0,0 +1,37 @@
1
+ # read meteorological data from google sheet
2
+
3
+
4
+ from pandas import read_csv
5
+
6
+ from AeroViz.rawDataReader.core import AbstractReader
7
+
8
+
9
+ class Reader(AbstractReader):
10
+ nam = 'VOC_ZM'
11
+
12
+ def _raw_reader(self, _file):
13
+ _keys = ['Ethane', 'Propane', 'Isobutane', 'n-Butane', 'Cyclopentane', 'Isopentane',
14
+ 'n-Pentane', '2,2-Dimethylbutane', '2,3-Dimethylbutane', '2-Methylpentane',
15
+ '3-Methylpentane', 'n-Hexane', 'Methylcyclopentane', '2,4-Dimethylpentane',
16
+ 'Cyclohexane', '2-Methylhexane', '2-Methylhexane', '3-Methylheptane',
17
+ '2,2,4-Trimethylpentane', 'n-Heptane', 'Methylcyclohexane',
18
+ '2,3,4-Trimethylpentane', '2-Methylheptane', '3-Methylhexane', 'n-Octane',
19
+ 'n-Nonane', 'n-Decane', 'n-Undecane', 'Ethylene', 'Propylene', 't-2-Butene',
20
+ '1-Butene', 'cis-2-Butene', 't-2-Pentene', '1-Pentene', 'cis-2-Pentene',
21
+ 'isoprene', 'Acetylene', 'Benzene', 'Toluene', 'Ethylbenzene', 'm,p-Xylene',
22
+ 'Styrene', 'o-Xylene', 'Isopropylbenzene', 'n-Propylbenzene', 'm-Ethyltoluene',
23
+ 'p-Ethyltoluene', '1,3,5-Trimethylbenzene', 'o-Ethyltoluene',
24
+ '1,2,4-Trimethylbenzene', '1,2,3-Trimethylbenzene', 'm-Diethylbenzene',
25
+ 'p-Diethylbenzene']
26
+
27
+ with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f:
28
+ _df = read_csv(f, parse_dates=[0], index_col=[0], na_values=['-'])
29
+
30
+ _df.columns = _df.keys().str.strip(' ')
31
+ _df.index.name = 'time'
32
+
33
+ _df = _df[_keys].loc[_df.index.dropna()]
34
+ return _df.loc[~_df.index.duplicated()]
35
+
36
+ def _QC(self, _df):
37
+ return _df
@@ -0,0 +1,22 @@
1
+ __all__ = [
2
+ 'NEPH',
3
+ 'Aurora',
4
+ 'Table',
5
+ 'EPA_vertical',
6
+ 'APS_3321',
7
+ 'SMPS_TH',
8
+ 'AE33',
9
+ 'AE43',
10
+ 'BC1054',
11
+ 'MA350',
12
+ 'TEOM',
13
+ 'OCEC_RES',
14
+ 'OCEC_LCRES',
15
+ 'IGAC_TH',
16
+ 'IGAC_ZM',
17
+ 'VOC_TH',
18
+ 'VOC_ZM',
19
+ 'SMPS_genr',
20
+ 'SMPS_aim11',
21
+ 'GRIMM'
22
+ ]
@@ -0,0 +1,3 @@
1
+ from .database import DataBase
2
+ from .dataclassifier import DataClassifier
3
+ from .datareader import DataReader