AeroViz 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (121) hide show
  1. AeroViz/__init__.py +7 -5
  2. AeroViz/{config → data}/DEFAULT_DATA.csv +1 -1
  3. AeroViz/dataProcess/Chemistry/__init__.py +40 -40
  4. AeroViz/dataProcess/Chemistry/_calculate.py +15 -15
  5. AeroViz/dataProcess/Chemistry/_isoropia.py +72 -68
  6. AeroViz/dataProcess/Chemistry/_mass_volume.py +158 -161
  7. AeroViz/dataProcess/Chemistry/_ocec.py +109 -109
  8. AeroViz/dataProcess/Chemistry/_partition.py +19 -18
  9. AeroViz/dataProcess/Chemistry/_teom.py +9 -11
  10. AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
  11. AeroViz/dataProcess/Optical/Angstrom_exponent.py +20 -0
  12. AeroViz/dataProcess/Optical/_IMPROVE.py +40 -41
  13. AeroViz/dataProcess/Optical/__init__.py +29 -44
  14. AeroViz/dataProcess/Optical/_absorption.py +21 -47
  15. AeroViz/dataProcess/Optical/_extinction.py +31 -25
  16. AeroViz/dataProcess/Optical/_mie.py +5 -7
  17. AeroViz/dataProcess/Optical/_mie_sd.py +89 -90
  18. AeroViz/dataProcess/Optical/_scattering.py +19 -20
  19. AeroViz/dataProcess/SizeDistr/__init__.py +39 -39
  20. AeroViz/dataProcess/SizeDistr/__merge.py +159 -158
  21. AeroViz/dataProcess/SizeDistr/_merge.py +155 -154
  22. AeroViz/dataProcess/SizeDistr/_merge_v1.py +162 -161
  23. AeroViz/dataProcess/SizeDistr/_merge_v2.py +153 -152
  24. AeroViz/dataProcess/SizeDistr/_merge_v3.py +327 -327
  25. AeroViz/dataProcess/SizeDistr/_merge_v4.py +273 -275
  26. AeroViz/dataProcess/SizeDistr/_size_distr.py +51 -51
  27. AeroViz/dataProcess/VOC/__init__.py +9 -9
  28. AeroViz/dataProcess/VOC/_potential_par.py +53 -55
  29. AeroViz/dataProcess/__init__.py +28 -6
  30. AeroViz/dataProcess/core/__init__.py +59 -65
  31. AeroViz/plot/__init__.py +7 -2
  32. AeroViz/plot/bar.py +126 -0
  33. AeroViz/plot/box.py +69 -0
  34. AeroViz/plot/distribution/distribution.py +421 -427
  35. AeroViz/plot/meteorology/meteorology.py +240 -292
  36. AeroViz/plot/optical/__init__.py +0 -1
  37. AeroViz/plot/optical/optical.py +230 -230
  38. AeroViz/plot/pie.py +198 -0
  39. AeroViz/plot/regression.py +196 -0
  40. AeroViz/plot/scatter.py +165 -0
  41. AeroViz/plot/templates/__init__.py +2 -4
  42. AeroViz/plot/templates/ammonium_rich.py +34 -0
  43. AeroViz/plot/templates/contour.py +25 -25
  44. AeroViz/plot/templates/corr_matrix.py +86 -93
  45. AeroViz/plot/templates/diurnal_pattern.py +28 -26
  46. AeroViz/plot/templates/koschmieder.py +59 -123
  47. AeroViz/plot/templates/metal_heatmap.py +135 -37
  48. AeroViz/plot/timeseries/__init__.py +1 -0
  49. AeroViz/plot/timeseries/template.py +47 -0
  50. AeroViz/plot/timeseries/timeseries.py +324 -264
  51. AeroViz/plot/utils/__init__.py +2 -1
  52. AeroViz/plot/utils/_color.py +57 -57
  53. AeroViz/plot/utils/_unit.py +48 -48
  54. AeroViz/plot/utils/plt_utils.py +92 -0
  55. AeroViz/plot/utils/sklearn_utils.py +49 -0
  56. AeroViz/plot/utils/units.json +5 -0
  57. AeroViz/plot/violin.py +80 -0
  58. AeroViz/process/__init__.py +17 -17
  59. AeroViz/process/core/DataProc.py +9 -9
  60. AeroViz/process/core/SizeDist.py +81 -81
  61. AeroViz/process/method/PyMieScatt_update.py +488 -488
  62. AeroViz/process/method/mie_theory.py +231 -229
  63. AeroViz/process/method/prop.py +40 -40
  64. AeroViz/process/script/AbstractDistCalc.py +103 -103
  65. AeroViz/process/script/Chemical.py +168 -167
  66. AeroViz/process/script/IMPACT.py +40 -40
  67. AeroViz/process/script/IMPROVE.py +152 -152
  68. AeroViz/process/script/Others.py +45 -45
  69. AeroViz/process/script/PSD.py +26 -26
  70. AeroViz/process/script/PSD_dry.py +69 -70
  71. AeroViz/process/script/retrieve_RI.py +50 -51
  72. AeroViz/rawDataReader/__init__.py +53 -58
  73. AeroViz/rawDataReader/config/supported_instruments.py +155 -0
  74. AeroViz/rawDataReader/core/__init__.py +233 -356
  75. AeroViz/rawDataReader/script/AE33.py +17 -18
  76. AeroViz/rawDataReader/script/AE43.py +18 -21
  77. AeroViz/rawDataReader/script/APS_3321.py +30 -30
  78. AeroViz/rawDataReader/script/Aurora.py +23 -24
  79. AeroViz/rawDataReader/script/BC1054.py +36 -40
  80. AeroViz/rawDataReader/script/EPA_vertical.py +37 -9
  81. AeroViz/rawDataReader/script/GRIMM.py +16 -23
  82. AeroViz/rawDataReader/script/IGAC.py +90 -0
  83. AeroViz/rawDataReader/script/MA350.py +32 -39
  84. AeroViz/rawDataReader/script/Minion.py +103 -0
  85. AeroViz/rawDataReader/script/NEPH.py +69 -74
  86. AeroViz/rawDataReader/script/SMPS_TH.py +25 -25
  87. AeroViz/rawDataReader/script/SMPS_aim11.py +32 -32
  88. AeroViz/rawDataReader/script/SMPS_genr.py +31 -31
  89. AeroViz/rawDataReader/script/Sunset_OCEC.py +60 -0
  90. AeroViz/rawDataReader/script/TEOM.py +30 -28
  91. AeroViz/rawDataReader/script/Table.py +13 -14
  92. AeroViz/rawDataReader/script/VOC.py +26 -0
  93. AeroViz/rawDataReader/script/__init__.py +18 -20
  94. AeroViz/tools/database.py +64 -66
  95. AeroViz/tools/dataclassifier.py +106 -106
  96. AeroViz/tools/dataprinter.py +51 -51
  97. AeroViz/tools/datareader.py +38 -38
  98. {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/METADATA +5 -4
  99. AeroViz-0.1.4.dist-info/RECORD +112 -0
  100. AeroViz/plot/improve/__init__.py +0 -1
  101. AeroViz/plot/improve/improve.py +0 -240
  102. AeroViz/plot/optical/aethalometer.py +0 -77
  103. AeroViz/plot/templates/event_evolution.py +0 -65
  104. AeroViz/plot/templates/regression.py +0 -256
  105. AeroViz/plot/templates/scatter.py +0 -130
  106. AeroViz/plot/templates/templates.py +0 -398
  107. AeroViz/plot/utils/_decorator.py +0 -74
  108. AeroViz/rawDataReader/script/IGAC_TH.py +0 -104
  109. AeroViz/rawDataReader/script/IGAC_ZM.py +0 -90
  110. AeroViz/rawDataReader/script/OCEC_LCRES.py +0 -34
  111. AeroViz/rawDataReader/script/OCEC_RES.py +0 -28
  112. AeroViz/rawDataReader/script/VOC_TH.py +0 -30
  113. AeroViz/rawDataReader/script/VOC_ZM.py +0 -37
  114. AeroViz/rawDataReader/utils/__init__.py +0 -0
  115. AeroViz/rawDataReader/utils/config.py +0 -169
  116. AeroViz-0.1.3.dist-info/RECORD +0 -111
  117. /AeroViz/{config → data}/DEFAULT_PNSD_DATA.csv +0 -0
  118. /AeroViz/{config → rawDataReader/config}/__init__.py +0 -0
  119. {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/LICENSE +0 -0
  120. {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/WHEEL +0 -0
  121. {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/top_level.txt +0 -0
@@ -4,31 +4,28 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'AE43'
7
+ nam = 'AE43'
8
8
 
9
- def _raw_reader(self, _file):
10
- _df = read_csv(_file, parse_dates={'time': ['StartTime']}, index_col='time')
11
- _df_id = _df['SetupID'].iloc[-1]
9
+ def _raw_reader(self, _file):
10
+ _df = read_csv(_file, parse_dates={'time': ['StartTime']}, index_col='time')
11
+ _df_id = _df['SetupID'].iloc[-1]
12
12
 
13
- # get last SetupID data
14
- _df = _df.groupby('SetupID').get_group(_df_id)[
15
- ['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'Status']].copy()
13
+ # get last SetupID data
14
+ _df = _df.groupby('SetupID').get_group(_df_id)[
15
+ ['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'Status']].copy()
16
16
 
17
- # remove data without Status=0
18
- _df = _df.where(_df['Status'] == 0).copy()
17
+ # remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape)
18
+ if self.meta.get('error_state', False):
19
+ _df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
19
20
 
20
- return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
21
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
21
22
 
22
- # QC data
23
- def _QC(self, _df):
24
- # remove negative value
25
- _df = _df.mask((_df < 0).copy())
23
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
26
24
 
27
- # QC data in 5 min
28
- def _QC_func(df):
29
- _df_ave, _df_std = df.mean(), df.std()
30
- _df_lowb, _df_highb = df < (_df_ave - _df_std * 1.5), df > (_df_ave + _df_std * 1.5)
25
+ # QC data
26
+ def _QC(self, _df):
27
+ # remove negative value
28
+ _df = _df.mask((_df < 0).copy())
31
29
 
32
- return df.mask(_df_lowb | _df_highb).copy()
33
-
34
- return _df.resample('5min').apply(_QC_func).resample('1h').mean()
30
+ # QC data in 1h
31
+ return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -5,43 +5,43 @@ from AeroViz.rawDataReader.core import AbstractReader
5
5
 
6
6
 
7
7
  class Reader(AbstractReader):
8
- nam = 'APS_3321'
8
+ nam = 'APS_3321'
9
9
 
10
- def _raw_reader(self, _file):
11
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
12
- _df = read_table(f, skiprows=6, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
13
- _key = list(_df.keys()[3:54]) ## 542 ~ 1981
10
+ def _raw_reader(self, _file):
11
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
12
+ _df = read_table(f, skiprows=6, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
13
+ _key = list(_df.keys()[3:54]) ## 542 ~ 1981
14
14
 
15
- ## create new keys
16
- _newkey = {}
17
- for _k in _key:
18
- _newkey[_k] = float(_k).__round__(4)
19
- # _newkey['Mode(m)'] = 'mode'
15
+ # create new keys
16
+ _newkey = {}
17
+ for _k in _key:
18
+ _newkey[_k] = float(_k).__round__(4)
19
+ # _newkey['Mode(m)'] = 'mode'
20
20
 
21
- ## get new dataframe
22
- _df = _df[_newkey.keys()].rename(_newkey, axis=1)
23
- # _df['total'] = _df[list(_newkey.values())[:-1]].sum(axis=1)*(n.diff(n.log(_df.keys()[:-1].to_numpy(float))).mean()).copy()
21
+ # get new dataframe
22
+ _df = _df[_newkey.keys()].rename(_newkey, axis=1)
23
+ # df['total'] = _df[list(_newkey.values())[:-1]].sum(axis=1)*(n.diff(n.log(_df.keys()[:-1].to_numpy(float))).mean()).copy()
24
24
 
25
- _df_idx = to_datetime(_df.index, errors='coerce')
25
+ _df_idx = to_datetime(_df.index, errors='coerce')
26
26
 
27
- return _df.set_index(_df_idx).loc[_df_idx.dropna()]
27
+ return _df.set_index(_df_idx).loc[_df_idx.dropna()]
28
28
 
29
- ## QC data
30
- def _QC(self, _df):
31
- ## mask out the data size lower than 7
32
- _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
33
- _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
34
- _df = _df.mask(_df_size < 7)
29
+ # QC data
30
+ def _QC(self, _df):
31
+ # mask out the data size lower than 7
32
+ _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
33
+ _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
34
+ _df = _df.mask(_df_size < 7)
35
35
 
36
- ## remove total conc. lower than 700
37
- _df = _df.mask(_df['total'] > 700)
36
+ # remove total conc. lower than 700
37
+ _df = _df.mask(_df['total'] > 700)
38
38
 
39
- # not confirmed
40
- """
41
- ## remove the bin over 4000 nm which num. conc. larger than 1
42
- # _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2]>=4.]
39
+ # not confirmed
40
+ """
41
+ ## remove the bin over 4000 nm which num. conc. larger than 1
42
+ # _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2]>=4.]
43
43
 
44
- # _df_1hr[_df_remv_ky] = _df_1hr[_df_remv_ky].copy().mask(_df_1hr[_df_remv_ky]>1.)
45
- # """
44
+ # _df_1hr[_df_remv_ky] = _df_1hr[_df_remv_ky].copy().mask(_df_1hr[_df_remv_ky]>1.)
45
+ # """
46
46
 
47
- return _df[_df.keys()[:-1]]
47
+ return _df[_df.keys()[:-1]]
@@ -4,35 +4,34 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'Aurora'
7
+ nam = 'Aurora'
8
8
 
9
- def _raw_reader(self, _file):
10
- with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f:
11
- _df = read_csv(f, low_memory=False, index_col=0)
9
+ def _raw_reader(self, _file):
10
+ with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
11
+ _df = read_csv(f, low_memory=False, index_col=0)
12
12
 
13
- _df.index = to_datetime(_df.index, errors='coerce', format=self._oth_set.get('date_format') or 'mixed')
14
- _df.index.name = 'time'
13
+ _df.index = to_datetime(_df.index, errors='coerce')
14
+ _df.index.name = 'time'
15
15
 
16
- _df.columns = _df.keys().str.strip(' ')
16
+ _df.columns = _df.keys().str.strip(' ')
17
17
 
18
- _df = _df.loc[
19
- _df.index.dropna(), ['0°σspB', '0°σspG', '0°σspR', '90°σspB', '90°σspG', '90°σspR', 'RH']].copy()
20
- _df.columns = ['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']
18
+ # consider another csv format
19
+ _df = _df.rename(columns={
20
+ '0°σspB': 'B', '0°σspG': 'G', '0°σspR': 'R',
21
+ '90°σspB': 'BB', '90°σspG': 'BG', '90°σspR': 'BR',
22
+ 'Blue': 'B', 'Green': 'G', 'Red': 'R',
23
+ 'B_Blue': 'BB', 'B_Green': 'BG', 'B_Red': 'BR',
24
+ 'RH': 'RH'
25
+ })
21
26
 
22
- return _df
27
+ _df = _df[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
23
28
 
24
- ## QC data
25
- def _QC(self, _df):
26
- ## remove negative value
27
- _df = _df.mask((_df <= 0).copy())
29
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
28
30
 
29
- ## call by _QC function
30
- ## QC data in 1 hr
31
- def _QC_func(_df_1hr):
32
- _df_ave = _df_1hr.mean()
33
- _df_std = _df_1hr.std()
34
- _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
31
+ # QC data
32
+ def _QC(self, _df):
33
+ # remove negative value
34
+ _df = _df.mask((_df <= 0) | (_df > 2000)).copy()
35
35
 
36
- return _df_1hr.mask(_df_lowb | _df_highb).copy()
37
-
38
- return _df.resample('1h', group_keys=False).apply(_QC_func)
36
+ # QC data in 1h
37
+ return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -4,43 +4,39 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'BC1054'
8
-
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_csv(f, parse_dates=['Time'], index_col='Time')
12
-
13
- _df = _df.rename(columns={
14
- 'BC1(ng/m3)': 'BC1',
15
- 'BC2(ng/m3)': 'BC2',
16
- 'BC3(ng/m3)': 'BC3',
17
- 'BC4(ng/m3)': 'BC4',
18
- 'BC5(ng/m3)': 'BC5',
19
- 'BC6(ng/m3)': 'BC6',
20
- 'BC7(ng/m3)': 'BC7',
21
- 'BC8(ng/m3)': 'BC8',
22
- 'BC9(ng/m3)': 'BC9',
23
- 'BC10(ng/m3)': 'BC10'
24
- })
25
-
26
- # remove data without Status=32 (Automatic Tape Advance), 65536 (Tape Move)
27
- # if not self._oth_set.get('ignore_err', False):
28
- # _df = _df.where((_df['Status'] != 32) | (_df['Status'] != 65536)).copy()
29
-
30
- return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10', 'Status']]
31
-
32
- # QC data
33
- def _QC(self, _df):
34
- # remove negative value
35
- _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']].mask((_df < 0).copy())
36
-
37
- # call by _QC function
38
- # QC data in 1 hr
39
- def _QC_func(_df_1hr):
40
- _df_ave = _df_1hr.mean()
41
- _df_std = _df_1hr.std()
42
- _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
43
-
44
- return _df_1hr.mask(_df_lowb | _df_highb).copy()
45
-
46
- return _df.resample('1h', group_keys=False).apply(_QC_func).resample('5min').mean()
7
+ nam = 'BC1054'
8
+
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_csv(f, parse_dates=True, index_col=0)
12
+
13
+ _df.columns = _df.columns.str.replace(' ', '')
14
+
15
+ _df = _df.rename(columns={
16
+ 'BC1(ng/m3)': 'BC1',
17
+ 'BC2(ng/m3)': 'BC2',
18
+ 'BC3(ng/m3)': 'BC3',
19
+ 'BC4(ng/m3)': 'BC4',
20
+ 'BC5(ng/m3)': 'BC5',
21
+ 'BC6(ng/m3)': 'BC6',
22
+ 'BC7(ng/m3)': 'BC7',
23
+ 'BC8(ng/m3)': 'BC8',
24
+ 'BC9(ng/m3)': 'BC9',
25
+ 'BC10(ng/m3)': 'BC10'
26
+ })
27
+
28
+ # remove data without Status=1, 8, 16, 32 (Automatic Tape Advance), 65536 (Tape Move)
29
+ if self.meta.get('error_state', False):
30
+ _df = _df[~_df['Status'].isin(self.meta.get('error_state'))]
31
+
32
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']]
33
+
34
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
35
+
36
+ # QC data
37
+ def _QC(self, _df):
38
+ # remove negative value
39
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']].mask((_df < 0).copy())
40
+
41
+ # QC data in 1h
42
+ return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -1,18 +1,46 @@
1
+ import numpy as np
1
2
  from pandas import read_csv, to_numeric
2
3
 
3
4
  from AeroViz.rawDataReader.core import AbstractReader
4
5
 
5
6
 
6
7
  class Reader(AbstractReader):
7
- nam = 'EPA_vertical'
8
+ nam = 'EPA_vertical'
8
9
 
9
- def _raw_reader(self, _file):
10
- with _file.open('r', encoding='big5', errors='ignore') as f:
11
- _df = read_csv(f, names=['time', 'station', 'comp', 'data', None], skiprows=1, na_values=['-'],
12
- parse_dates=['time'], index_col='time')
13
- _df['data'] = to_numeric(_df['data'], errors='coerce')
10
+ def _raw_reader(self, _file):
11
+ with _file.open('r', encoding='ascii', errors='ignore') as f:
12
+ # 有、無輸出有效值都可以
13
+ # read 查詢小時值(測項).csv
14
+ df = read_csv(f, encoding='ascii', encoding_errors='ignore', index_col=0, parse_dates=True,
15
+ usecols=lambda col: col != 'Unnamed: 1')
14
16
 
15
- _df_piv = _df.pivot_table(values='data', columns='comp', index='time')
16
- _df_piv.index.name = 'time'
17
+ df.index.name = 'Time'
18
+ df.rename(columns={'AMB_TEMP': 'AT', 'WIND_SPEED': 'WS', 'WIND_DIREC': 'WD'}, inplace=True)
17
19
 
18
- return _df_piv
20
+ # 欄位排序
21
+ desired_order = ['SO2', 'NO', 'NOx', 'NO2', 'CO', 'O3', 'THC', 'NMHC', 'CH4', 'PM10', 'PM2.5', 'WS', 'WD',
22
+ 'AT', 'RH']
23
+
24
+ missing_columns = []
25
+
26
+ for col in desired_order:
27
+ if col not in df.columns:
28
+ df[col] = np.nan
29
+ missing_columns.append(col)
30
+
31
+ if missing_columns:
32
+ self.logger.info(f"{'=' * 60}")
33
+ self.logger.info(f"Missing columns: {missing_columns}")
34
+ self.logger.info(f"{'=' * 60}")
35
+ print(f"Missing columns: {missing_columns}")
36
+
37
+ df = df[desired_order]
38
+
39
+ # 如果沒有將無效值拿掉就輸出 請將包含 #、L、O 的字串替換成 *
40
+ df.replace(to_replace=r'\d*[#LO]\b', value='*', regex=True, inplace=True)
41
+ df = df.apply(to_numeric, errors='coerce')
42
+
43
+ return df
44
+
45
+ def _QC(self, _df):
46
+ return _df
@@ -4,32 +4,25 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'GRIMM'
7
+ nam = 'GRIMM'
8
8
 
9
- def _raw_reader(self, _file):
9
+ def _raw_reader(self, _file):
10
10
 
11
- _df = read_csv(_file, header=233, delimiter='\t', index_col=0, parse_dates=[0], encoding='ISO-8859-1',
12
- dayfirst=True).rename_axis("Time")
13
- _df.index = to_datetime(_df.index, format="%d/%m/%Y %H:%M:%S", dayfirst=True)
11
+ _df = read_csv(_file, header=233, delimiter='\t', index_col=0, parse_dates=[0], encoding='ISO-8859-1',
12
+ dayfirst=True).rename_axis("Time")
13
+ _df.index = to_datetime(_df.index, format="%d/%m/%Y %H:%M:%S", dayfirst=True)
14
14
 
15
- if _file.name.startswith("A407ST"):
16
- _df.drop(_df.columns[0:11].tolist() + _df.columns[128:].tolist(), axis=1, inplace=True)
17
- else:
18
- _df.drop(_df.columns[0:11].tolist() + _df.columns[-5:].tolist(), axis=1, inplace=True)
15
+ if _file.name.startswith("A407ST"):
16
+ _df.drop(_df.columns[0:11].tolist() + _df.columns[128:].tolist(), axis=1, inplace=True)
17
+ else:
18
+ _df.drop(_df.columns[0:11].tolist() + _df.columns[-5:].tolist(), axis=1, inplace=True)
19
19
 
20
- if _df.empty:
21
- print(_file, "is empty")
22
- return None
20
+ if _df.empty:
21
+ print(_file, "is empty")
22
+ return None
23
23
 
24
- return _df / 0.035
24
+ return _df / 0.035
25
25
 
26
- def _QC(self, _df):
27
- # QC data in 1 hr
28
- def _QC_func(_df_1hr):
29
- _df_ave = _df_1hr.mean()
30
- _df_std = _df_1hr.std()
31
- _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
32
-
33
- return _df_1hr.mask(_df_lowb | _df_highb).copy()
34
-
35
- return _df.resample('5min').apply(_QC_func).resample('1h').mean()
26
+ def _QC(self, _df):
27
+ # QC data in 1h
28
+ return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -0,0 +1,90 @@
1
+ # read meteorological data from google sheet
2
+
3
+
4
+ import numpy as np
5
+ from pandas import read_csv, concat, to_numeric
6
+
7
+ from AeroViz.rawDataReader.core import AbstractReader
8
+
9
+
10
+ class Reader(AbstractReader):
11
+ nam = 'IGAC'
12
+
13
+ def _raw_reader(self, _file):
14
+
15
+ with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
16
+ _df = read_csv(f, parse_dates=True, index_col=0, na_values='-').apply(to_numeric, errors='coerce')
17
+
18
+ _df.columns = _df.keys().str.strip(' ')
19
+ _df.index.name = 'time'
20
+
21
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
22
+
23
+ def _QC(self, _df):
24
+
25
+ # QC parameter, function (MDL SE LE)
26
+ _mdl = {
27
+ 'Na+': 0.06,
28
+ 'NH4+': 0.05,
29
+ 'K+': 0.05,
30
+ 'Mg2+': 0.12,
31
+ 'Ca2+': 0.07,
32
+ 'Cl-': 0.07,
33
+ 'NO2-': 0.05,
34
+ 'NO3-': 0.11,
35
+ 'SO42-': 0.08,
36
+ }
37
+
38
+ # _mdl.update(self._oth_set.get('mdl', {}))
39
+
40
+ def _se_le(_df_, _log=False):
41
+ _df_ = np.log10(_df_) if _log else _df_
42
+
43
+ _df_qua = _df_.quantile([.25, .75])
44
+ _df_q1, _df_q3 = _df_qua.loc[.25].copy(), _df_qua.loc[.75].copy()
45
+ _df_iqr = _df_q3 - _df_q1
46
+
47
+ _se = concat([_df_q1 - 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
48
+ _le = concat([_df_q3 + 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
49
+
50
+ if _log:
51
+ return 10 ** _se, 10 ** _le
52
+ return _se, _le
53
+
54
+ _cation, _anion, _main = (['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'],
55
+ ['Cl-', 'NO2-', 'NO3-', 'SO42-', ],
56
+ ['SO42-', 'NO3-', 'NH4+'])
57
+
58
+ _df_salt = _df[_mdl.keys()].copy()
59
+ _df_pm = _df['PM2.5'].copy()
60
+
61
+ # lower than PM2.5
62
+ # conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
63
+ _df_salt = _df_salt.mask(_df_salt.sum(axis=1, min_count=1) > _df_pm).dropna(subset=_main).copy()
64
+
65
+ # mdl
66
+ for (_key, _df_col), _mdl_val in zip(_df_salt.items(), _mdl.values()):
67
+ _df_salt[_key] = _df_col.mask(_df_col < _mdl_val, _mdl_val / 2)
68
+
69
+ # calculate SE LE
70
+ # salt < LE
71
+ _se, _le = _se_le(_df_salt, _log=True)
72
+ _df_salt = _df_salt.mask(_df_salt > _le).copy()
73
+
74
+ # C/A, A/C
75
+ _rat_CA = (_df_salt[_cation].sum(axis=1) / _df_salt[_anion].sum(axis=1)).to_frame()
76
+ _rat_AC = (1 / _rat_CA).copy()
77
+
78
+ _se, _le = _se_le(_rat_CA, )
79
+ _cond_CA = (_rat_CA < _le) & (_rat_CA > 0)
80
+
81
+ _se, _le = _se_le(_rat_AC, )
82
+ _cond_AC = (_rat_AC < _le) & (_rat_AC > 0)
83
+
84
+ _df_salt = _df_salt.where((_cond_CA * _cond_AC)[0]).copy()
85
+
86
+ # conc. of main salt > SE
87
+ _se, _le = _se_le(_df_salt[_main], _log=True)
88
+ _df_salt[_main] = _df_salt[_main].mask(_df_salt[_main] < _se).copy()
89
+
90
+ return _df_salt.reindex(_df.index)
@@ -4,42 +4,35 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'MA350'
8
-
9
- def _raw_reader(self, _file):
10
- _df = read_csv(_file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis("Time")
11
-
12
- _df = _df.rename(columns={
13
- 'UV BCc': 'BC1',
14
- 'Blue BCc': 'BC2',
15
- 'Green BCc': 'BC3',
16
- 'Red BCc': 'BC4',
17
- 'IR BCc': 'BC5',
18
- 'Biomass BCc (ng/m^3)': 'BB mass',
19
- 'Fossil fuel BCc (ng/m^3)': 'FF mass',
20
- 'Delta-C (ng/m^3)': 'Delta-C',
21
- 'AAE': 'AAE',
22
- 'BB (%)': 'BB',
23
- })
24
-
25
- # remove data without Status=32 (Automatic Tape Advance), 65536 (Tape Move)
26
- # if not self._oth_set.get('ignore_err', False):
27
- # _df = _df.where((_df['Status'] != 32) | (_df['Status'] != 65536)).copy()
28
-
29
- return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE', 'BB']]
30
-
31
- # QC data
32
- def _QC(self, _df):
33
- # remove negative value
34
- _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'AAE', 'BB']].mask((_df < 0).copy())
35
-
36
- # call by _QC function
37
- # QC data in 1 hr
38
- def _QC_func(_df_1hr):
39
- _df_ave = _df_1hr.mean()
40
- _df_std = _df_1hr.std()
41
- _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
42
-
43
- return _df_1hr.mask(_df_lowb | _df_highb).copy()
44
-
45
- return _df.resample('1h', group_keys=False).apply(_QC_func).resample('5min').mean()
7
+ nam = 'MA350'
8
+
9
+ def _raw_reader(self, _file):
10
+ _df = read_csv(_file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis("Time")
11
+
12
+ _df = _df.rename(columns={
13
+ 'UV BCc': 'BC1',
14
+ 'Blue BCc': 'BC2',
15
+ 'Green BCc': 'BC3',
16
+ 'Red BCc': 'BC4',
17
+ 'IR BCc': 'BC5',
18
+ 'Biomass BCc (ng/m^3)': 'BB mass',
19
+ 'Fossil fuel BCc (ng/m^3)': 'FF mass',
20
+ 'Delta-C (ng/m^3)': 'Delta-C',
21
+ 'AAE': 'AAE',
22
+ 'BB (%)': 'BB',
23
+ })
24
+
25
+ # if self.meta.get('error_state', False):
26
+ # _df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
27
+
28
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE', 'BB']]
29
+
30
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
31
+
32
+ # QC data
33
+ def _QC(self, _df):
34
+ # remove negative value
35
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'AAE', 'BB']].mask((_df < 0).copy())
36
+
37
+ # QC data in 1h
38
+ return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -0,0 +1,103 @@
1
+ import numpy as np
2
+ from pandas import read_csv, to_datetime, to_numeric
3
+
4
+ from AeroViz.rawDataReader.core import AbstractReader
5
+
6
+
7
+ class Reader(AbstractReader):
8
+ nam = 'Minion'
9
+
10
+ def _raw_reader(self, _file):
11
+ with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
12
+ _df = read_csv(f, low_memory=False, index_col=0)
13
+
14
+ _df.index = to_datetime(_df.index, errors='coerce')
15
+ _df.index.name = 'time'
16
+
17
+ _df.columns = _df.keys().str.strip(' ')
18
+
19
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
20
+
21
+ def _QC(self, _df):
22
+ # XRF QAQC
23
+ _df = self.XRF_QAQC(_df)
24
+
25
+ # ions balance
26
+ _df = self.ions_balance(_df)
27
+
28
+ # remove negative value
29
+ _df = _df.mask((_df < 0).copy())
30
+
31
+ # QC data in 6h
32
+ return _df.resample('6h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
33
+
34
+ # base on Xact 625i Minimum Decision Limit (MDL) for XRF in ng/m3, 60 min sample time
35
+ def XRF_QAQC(self, df):
36
+ MDL = {
37
+ 'Al': 100, 'Si': 18, 'P': 5.2, 'S': 3.2,
38
+ 'Cl': 1.7, 'K': 1.2, 'Ca': 0.3, 'Ti': 1.6,
39
+ 'V': 0.12, 'Cr': 0.12, 'Mn': 0.14, 'Fe': 0.17,
40
+ 'Co': 0.14, 'Ni': 0.096, 'Cu': 0.079, 'Zn': 0.067,
41
+ 'Ga': 0.059, 'Ge': 0.056, 'As': 0.063, 'Se': 0.081,
42
+ 'Br': 0.1, 'Rb': 0.19, 'Sr': 0.22, 'Y': 0.28,
43
+ 'Zr': 0.33, 'Nb': 0.41, 'Mo': 0.48, 'Ag': 1.9,
44
+ 'Cd': 2.5, 'In': 3.1, 'Sn': 4.1, 'Sb': 5.2,
45
+ 'Te': 0.6, 'I': 0.49, 'Cs': 0.37, 'Ba': 0.39,
46
+ 'La': 0.36, 'Ce': 0.3, 'Pt': 0.12, 'Au': 0.1,
47
+ 'Hg': 0.12, 'Tl': 0.12, 'Pb': 0.13, 'Bi': 0.13
48
+ }
49
+ # 將小於 MDL 值的數據替換為 NaN
50
+ for element, threshold in MDL.items():
51
+ if element in df.columns:
52
+ df[element] = df[element].where(df[element] >= threshold, np.nan)
53
+
54
+ self.logger.info(f"{'=' * 60}")
55
+ self.logger.info(f"XRF QAQC summary:")
56
+ self.logger.info("\t\ttransform values below MDL to NaN")
57
+ self.logger.info(f"{'=' * 60}")
58
+
59
+ return df
60
+
61
+ def ions_balance(self, df, tolerance=0.3):
62
+ """
63
+ Calculate the balance of ions in the system
64
+ """
65
+ # Define the ions
66
+ item = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+', 'F-', 'Cl-', 'NO2-', 'NO3-', 'PO43-', 'SO42-']
67
+
68
+ # Calculate the balance
69
+ _df = df[item].copy()
70
+ _df = _df.apply(lambda x: to_numeric(x, errors='coerce'))
71
+ _df['+_mole'] = _df[['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+']].div([23, 18, 39, (24 / 2), (40 / 2)]).sum(axis=1,
72
+ skipna=True)
73
+ _df['-_mole'] = _df[['Cl-', 'NO2-', 'NO3-', 'SO42-']].div([35.5, 46, 62, (96 / 2)]).sum(axis=1, skipna=True)
74
+
75
+ # Avoid division by zero
76
+ _df['ratio'] = np.where(_df['-_mole'] != 0, _df['+_mole'] / _df['-_mole'], np.nan)
77
+
78
+ # Calculate bounds
79
+ lower_bound, upper_bound = 1 - tolerance, 1 + tolerance
80
+
81
+ # 根据ratio决定是否保留原始数据
82
+ valid_mask = (
83
+ (_df['ratio'] <= upper_bound) &
84
+ (_df['ratio'] >= lower_bound) &
85
+ ~np.isnan(_df['+_mole']) &
86
+ ~np.isnan(_df['-_mole'])
87
+ )
88
+
89
+ # 保留数据或将不符合条件的行设为NaN
90
+ df.loc[~valid_mask, item] = np.nan
91
+
92
+ # 计算保留的数据的百分比
93
+ retained_percentage = (valid_mask.sum() / len(df)) * 100
94
+
95
+ self.logger.info(f"{'=' * 60}")
96
+ self.logger.info(f"Ions balance summary:")
97
+ self.logger.info(f"\t\tretain {retained_percentage.__round__(0)}% data within tolerance {tolerance}")
98
+ self.logger.info(f"{'=' * 60}")
99
+
100
+ if retained_percentage < 70:
101
+ self.logger.warning("Warning: The percentage of retained data is less than 70%")
102
+
103
+ return df