AeroViz 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (121) hide show
  1. AeroViz/__init__.py +7 -5
  2. AeroViz/{config → data}/DEFAULT_DATA.csv +1 -1
  3. AeroViz/dataProcess/Chemistry/__init__.py +40 -40
  4. AeroViz/dataProcess/Chemistry/_calculate.py +15 -15
  5. AeroViz/dataProcess/Chemistry/_isoropia.py +72 -68
  6. AeroViz/dataProcess/Chemistry/_mass_volume.py +158 -161
  7. AeroViz/dataProcess/Chemistry/_ocec.py +109 -109
  8. AeroViz/dataProcess/Chemistry/_partition.py +19 -18
  9. AeroViz/dataProcess/Chemistry/_teom.py +9 -11
  10. AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
  11. AeroViz/dataProcess/Optical/Angstrom_exponent.py +20 -0
  12. AeroViz/dataProcess/Optical/_IMPROVE.py +40 -41
  13. AeroViz/dataProcess/Optical/__init__.py +29 -44
  14. AeroViz/dataProcess/Optical/_absorption.py +21 -47
  15. AeroViz/dataProcess/Optical/_extinction.py +31 -25
  16. AeroViz/dataProcess/Optical/_mie.py +5 -7
  17. AeroViz/dataProcess/Optical/_mie_sd.py +89 -90
  18. AeroViz/dataProcess/Optical/_scattering.py +19 -20
  19. AeroViz/dataProcess/SizeDistr/__init__.py +39 -39
  20. AeroViz/dataProcess/SizeDistr/__merge.py +159 -158
  21. AeroViz/dataProcess/SizeDistr/_merge.py +155 -154
  22. AeroViz/dataProcess/SizeDistr/_merge_v1.py +162 -161
  23. AeroViz/dataProcess/SizeDistr/_merge_v2.py +153 -152
  24. AeroViz/dataProcess/SizeDistr/_merge_v3.py +327 -327
  25. AeroViz/dataProcess/SizeDistr/_merge_v4.py +273 -275
  26. AeroViz/dataProcess/SizeDistr/_size_distr.py +51 -51
  27. AeroViz/dataProcess/VOC/__init__.py +9 -9
  28. AeroViz/dataProcess/VOC/_potential_par.py +53 -55
  29. AeroViz/dataProcess/__init__.py +28 -6
  30. AeroViz/dataProcess/core/__init__.py +59 -65
  31. AeroViz/plot/__init__.py +7 -2
  32. AeroViz/plot/bar.py +126 -0
  33. AeroViz/plot/box.py +69 -0
  34. AeroViz/plot/distribution/distribution.py +421 -427
  35. AeroViz/plot/meteorology/meteorology.py +240 -292
  36. AeroViz/plot/optical/__init__.py +0 -1
  37. AeroViz/plot/optical/optical.py +230 -230
  38. AeroViz/plot/pie.py +198 -0
  39. AeroViz/plot/regression.py +196 -0
  40. AeroViz/plot/scatter.py +165 -0
  41. AeroViz/plot/templates/__init__.py +2 -4
  42. AeroViz/plot/templates/ammonium_rich.py +34 -0
  43. AeroViz/plot/templates/contour.py +25 -25
  44. AeroViz/plot/templates/corr_matrix.py +86 -93
  45. AeroViz/plot/templates/diurnal_pattern.py +28 -26
  46. AeroViz/plot/templates/koschmieder.py +59 -123
  47. AeroViz/plot/templates/metal_heatmap.py +135 -37
  48. AeroViz/plot/timeseries/__init__.py +1 -0
  49. AeroViz/plot/timeseries/template.py +47 -0
  50. AeroViz/plot/timeseries/timeseries.py +324 -264
  51. AeroViz/plot/utils/__init__.py +2 -1
  52. AeroViz/plot/utils/_color.py +57 -57
  53. AeroViz/plot/utils/_unit.py +48 -48
  54. AeroViz/plot/utils/plt_utils.py +92 -0
  55. AeroViz/plot/utils/sklearn_utils.py +49 -0
  56. AeroViz/plot/utils/units.json +5 -0
  57. AeroViz/plot/violin.py +80 -0
  58. AeroViz/process/__init__.py +17 -17
  59. AeroViz/process/core/DataProc.py +9 -9
  60. AeroViz/process/core/SizeDist.py +81 -81
  61. AeroViz/process/method/PyMieScatt_update.py +488 -488
  62. AeroViz/process/method/mie_theory.py +231 -229
  63. AeroViz/process/method/prop.py +40 -40
  64. AeroViz/process/script/AbstractDistCalc.py +103 -103
  65. AeroViz/process/script/Chemical.py +168 -167
  66. AeroViz/process/script/IMPACT.py +40 -40
  67. AeroViz/process/script/IMPROVE.py +152 -152
  68. AeroViz/process/script/Others.py +45 -45
  69. AeroViz/process/script/PSD.py +26 -26
  70. AeroViz/process/script/PSD_dry.py +69 -70
  71. AeroViz/process/script/retrieve_RI.py +50 -51
  72. AeroViz/rawDataReader/__init__.py +53 -58
  73. AeroViz/rawDataReader/config/supported_instruments.py +155 -0
  74. AeroViz/rawDataReader/core/__init__.py +233 -356
  75. AeroViz/rawDataReader/script/AE33.py +17 -18
  76. AeroViz/rawDataReader/script/AE43.py +18 -21
  77. AeroViz/rawDataReader/script/APS_3321.py +30 -30
  78. AeroViz/rawDataReader/script/Aurora.py +23 -24
  79. AeroViz/rawDataReader/script/BC1054.py +36 -40
  80. AeroViz/rawDataReader/script/EPA_vertical.py +37 -9
  81. AeroViz/rawDataReader/script/GRIMM.py +16 -23
  82. AeroViz/rawDataReader/script/IGAC.py +90 -0
  83. AeroViz/rawDataReader/script/MA350.py +32 -39
  84. AeroViz/rawDataReader/script/Minion.py +103 -0
  85. AeroViz/rawDataReader/script/NEPH.py +69 -74
  86. AeroViz/rawDataReader/script/SMPS_TH.py +25 -25
  87. AeroViz/rawDataReader/script/SMPS_aim11.py +32 -32
  88. AeroViz/rawDataReader/script/SMPS_genr.py +31 -31
  89. AeroViz/rawDataReader/script/Sunset_OCEC.py +60 -0
  90. AeroViz/rawDataReader/script/TEOM.py +30 -28
  91. AeroViz/rawDataReader/script/Table.py +13 -14
  92. AeroViz/rawDataReader/script/VOC.py +26 -0
  93. AeroViz/rawDataReader/script/__init__.py +18 -20
  94. AeroViz/tools/database.py +64 -66
  95. AeroViz/tools/dataclassifier.py +106 -106
  96. AeroViz/tools/dataprinter.py +51 -51
  97. AeroViz/tools/datareader.py +38 -38
  98. {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/METADATA +5 -4
  99. AeroViz-0.1.4.dist-info/RECORD +112 -0
  100. AeroViz/plot/improve/__init__.py +0 -1
  101. AeroViz/plot/improve/improve.py +0 -240
  102. AeroViz/plot/optical/aethalometer.py +0 -77
  103. AeroViz/plot/templates/event_evolution.py +0 -65
  104. AeroViz/plot/templates/regression.py +0 -256
  105. AeroViz/plot/templates/scatter.py +0 -130
  106. AeroViz/plot/templates/templates.py +0 -398
  107. AeroViz/plot/utils/_decorator.py +0 -74
  108. AeroViz/rawDataReader/script/IGAC_TH.py +0 -104
  109. AeroViz/rawDataReader/script/IGAC_ZM.py +0 -90
  110. AeroViz/rawDataReader/script/OCEC_LCRES.py +0 -34
  111. AeroViz/rawDataReader/script/OCEC_RES.py +0 -28
  112. AeroViz/rawDataReader/script/VOC_TH.py +0 -30
  113. AeroViz/rawDataReader/script/VOC_ZM.py +0 -37
  114. AeroViz/rawDataReader/utils/__init__.py +0 -0
  115. AeroViz/rawDataReader/utils/config.py +0 -169
  116. AeroViz-0.1.3.dist-info/RECORD +0 -111
  117. /AeroViz/{config → data}/DEFAULT_PNSD_DATA.csv +0 -0
  118. /AeroViz/{config → rawDataReader/config}/__init__.py +0 -0
  119. {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/LICENSE +0 -0
  120. {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/WHEEL +0 -0
  121. {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/top_level.txt +0 -0
@@ -4,77 +4,72 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'NEPH'
8
-
9
- def _raw_reader(self, _file):
10
- with _file.open('r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_csv(f, header=None, names=range(11))
12
-
13
- _df_grp = _df.groupby(0)
14
-
15
- # T : time
16
- _df_tm = _df_grp.get_group('T')[[1, 2, 3, 4, 5, 6]].astype(int)
17
-
18
- for _k in [2, 3, 4, 5, 6]:
19
- _df_tm[_k] = _df_tm[_k].astype(int).map('{:02d}'.format).copy()
20
- _df_tm = _df_tm.astype(str)
21
-
22
- _idx_tm = to_datetime((_df_tm[1] + _df_tm[2] + _df_tm[3] + _df_tm[4] + _df_tm[5] + _df_tm[6]),
23
- format='%Y%m%d%H%M%S')
24
-
25
- # D : data
26
- # col : 3~8 B G R BB BG BR
27
- # 1e6
28
- try:
29
- _df_dt = _df_grp.get_group('D')[[1, 2, 3, 4, 5, 6, 7, 8]].set_index(_idx_tm)
30
- _df_out = (_df_dt.groupby(1).get_group('NBXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm)
31
- _df_out.columns = ['B', 'G', 'R', 'BB', 'BG', 'BR']
32
- _df_out.index.name = 'Time'
33
-
34
- # Y : state
35
- # col : 5 RH
36
- _df_st = _df_grp.get_group('Y')
37
- _df_out['RH'] = _df_st[5].values
38
- _df_out['status'] = _df_st[9].values
39
-
40
- _df_out.mask(_df_out['status'] != 0) # 0000 -> numeric to 0
41
-
42
- return _df_out[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
43
-
44
- except ValueError:
45
- group_sizes = _df_grp.size()
46
- print(group_sizes)
47
- # Define the valid groups
48
- valid_groups = {'B', 'G', 'R', 'D', 'T', 'Y', 'Z'}
49
-
50
- # Find the rows where the value in the first column is not in valid_groups
51
- invalid_indices = _df[~_df[0].isin(valid_groups)].index
52
-
53
- # Print the invalid indices and their corresponding values
54
- invalid_values = _df.loc[invalid_indices, 0]
55
- print("Invalid values and their indices:")
56
- for idx, value in zip(invalid_indices, invalid_values):
57
- print(f"Index: {idx}, Value: {value}")
58
-
59
- # If there's a length mismatch, return an empty DataFrame with the same index and column names
60
- columns = ['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']
61
- _df_out = DataFrame(index=_idx_tm, columns=columns)
62
- _df_out.index.name = 'Time'
63
- print(f'\n\t\t\t Length mismatch in {_file} data. Returning an empty DataFrame.')
64
- return _df_out
65
-
66
- # QC data
67
- def _QC(self, _df):
68
- # remove negative value
69
- _df = _df.mask((_df <= 0).copy())
70
-
71
- # call by _QC function
72
- # QC data in 1 hr
73
- def _QC_func(_df_1hr):
74
- _df_ave = _df_1hr.mean()
75
- _df_std = _df_1hr.std()
76
- _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
77
-
78
- return _df_1hr.mask(_df_lowb | _df_highb).copy()
79
-
80
- return _df.resample('1h', group_keys=False).apply(_QC_func)
7
+ nam = 'NEPH'
8
+
9
+ def _raw_reader(self, _file):
10
+ with _file.open('r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_csv(f, header=None, names=range(11))
12
+
13
+ _df_grp = _df.groupby(0)
14
+
15
+ # T : time
16
+ _idx_tm = to_datetime(
17
+ _df_grp.get_group('T')[[1, 2, 3, 4, 5, 6]]
18
+ .map(lambda x: f"{int(x):02d}")
19
+ .agg(''.join, axis=1),
20
+ format='%Y%m%d%H%M%S'
21
+ )
22
+
23
+ # D : data
24
+ # col : 3~8 B G R BB BG BR
25
+ # 1e6
26
+ try:
27
+ _df_dt = _df_grp.get_group('D')[[1, 2, 3, 4, 5, 6, 7, 8]].set_index(_idx_tm)
28
+
29
+ try:
30
+ _df_out = (_df_dt.groupby(1).get_group('NBXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm)
31
+ except KeyError:
32
+ _df_out = (_df_dt.groupby(1).get_group('NTXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm)
33
+
34
+ _df_out.columns = ['B', 'G', 'R', 'BB', 'BG', 'BR']
35
+ _df_out.index.name = 'Time'
36
+
37
+ # Y : state
38
+ # col : 5 RH
39
+ _df_st = _df_grp.get_group('Y')
40
+ _df_out['RH'] = _df_st[5].values
41
+ _df_out['status'] = _df_st[9].values
42
+
43
+ _df_out.mask(_df_out['status'] != 0) # 0000 -> numeric to 0
44
+
45
+ _df = _df_out[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
46
+
47
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
48
+
49
+ except ValueError:
50
+ group_sizes = _df_grp.size()
51
+ print(group_sizes)
52
+
53
+ # Define valid groups and find invalid indices
54
+ valid_groups = {'B', 'G', 'R', 'D', 'T', 'Y', 'Z'}
55
+ invalid_indices = _df[~_df[0].isin(valid_groups)].index
56
+
57
+ # Print invalid indices and values
58
+ print("Invalid values and their indices:")
59
+ for idx in invalid_indices:
60
+ print(f"Index: {idx}, Value: {_df.at[idx, 0]}")
61
+
62
+ # Return an empty DataFrame with specified columns if there's a length mismatch
63
+ columns = ['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']
64
+ _df_out = DataFrame(index=_idx_tm, columns=columns)
65
+ _df_out.index.name = 'Time'
66
+ print(f'\n\t\t\t Length mismatch in {_file} data. Returning an empty DataFrame.')
67
+ return _df_out
68
+
69
+ # QC data
70
+ def _QC(self, _df):
71
+ # remove negative value
72
+ _df = _df.mask((_df <= 5).copy())
73
+
74
+ # QC data in 1h
75
+ return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -4,38 +4,38 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'SMPS_TH'
7
+ nam = 'SMPS_TH'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_table(f, skiprows=18, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
12
- _key = list(_df.keys()[6:-26])
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_table(f, skiprows=18, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
12
+ _key = list(_df.keys()[6:-26])
13
13
 
14
- _newkey = {}
15
- for _k in _key:
16
- _newkey[_k] = float(_k).__round__(4)
14
+ _newkey = {}
15
+ for _k in _key:
16
+ _newkey[_k] = float(_k).__round__(4)
17
17
 
18
- # _newkey['Total Conc.(#/cm)'] = 'total'
19
- # _newkey['Mode(nm)'] = 'mode'
18
+ # _newkey['Total Conc.(#/cm)'] = 'total'
19
+ # _newkey['Mode(nm)'] = 'mode'
20
20
 
21
- _df_idx = to_datetime(_df.index, errors='coerce')
22
- return _df[_newkey.keys()].rename(_newkey, axis=1).set_index(_df_idx).loc[_df_idx.dropna()]
21
+ _df_idx = to_datetime(_df.index, errors='coerce')
22
+ return _df[_newkey.keys()].rename(_newkey, axis=1).set_index(_df_idx).loc[_df_idx.dropna()]
23
23
 
24
- ## QC data
25
- def _QC(self, _df):
26
- import numpy as n
24
+ # QC data
25
+ def _QC(self, _df):
26
+ import numpy as n
27
27
 
28
- ## mask out the data size lower than 7
29
- _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
30
- _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
31
- _df = _df.mask(_df_size < 7)
28
+ # mask out the data size lower than 7
29
+ _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
30
+ _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
31
+ _df = _df.mask(_df_size < 7)
32
32
 
33
- ## remove total conc. lower than 2000
34
- _df = _df.mask(_df['total'] < 2000)
33
+ # remove total conc. lower than 2000
34
+ _df = _df.mask(_df['total'] < 2000)
35
35
 
36
- ## remove the bin over 400 nm which num. conc. larger than 4000
37
- _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
36
+ # remove the bin over 400 nm which num. conc. larger than 4000
37
+ _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
38
38
 
39
- _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
39
+ _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
40
40
 
41
- return _df[_df.keys()[:-1]]
41
+ return _df[_df.keys()[:-1]]
@@ -4,48 +4,48 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'SMPS_aim11'
7
+ nam = 'SMPS_aim11'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
11
 
12
- skiprows = 0
13
- for _line in f:
12
+ skiprows = 0
13
+ for _line in f:
14
14
 
15
- if _line.split(',')[0] == 'Scan Number':
16
- f.seek(0)
17
- break
15
+ if _line.split(',')[0] == 'Scan Number':
16
+ f.seek(0)
17
+ break
18
18
 
19
- skiprows += 1
20
- # breakpoint()
21
- _df = read_csv(f, skiprows=skiprows)
22
- _tm_idx = to_datetime(_df['DateTime Sample Start'], format='%d/%m/%Y %X', errors='coerce')
19
+ skiprows += 1
20
+ # breakpoint()
21
+ _df = read_csv(f, skiprows=skiprows)
22
+ _tm_idx = to_datetime(_df['DateTime Sample Start'], format='%d/%m/%Y %X', errors='coerce')
23
23
 
24
- ## index
25
- _df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()]
24
+ # index
25
+ _df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()]
26
26
 
27
- ## keys
28
- _key = to_numeric(_df.keys(), errors='coerce')
29
- _df.columns = _key
30
- _df = _df.loc[:, ~_key.isna()]
27
+ # keys
28
+ _key = to_numeric(_df.keys(), errors='coerce')
29
+ _df.columns = _key
30
+ _df = _df.loc[:, ~_key.isna()]
31
31
 
32
- return _df.apply(to_numeric, errors='coerce')
32
+ return _df.apply(to_numeric, errors='coerce')
33
33
 
34
- ## QC data
35
- def _QC(self, _df):
36
- import numpy as n
34
+ # QC data
35
+ def _QC(self, _df):
36
+ import numpy as n
37
37
 
38
- ## mask out the data size lower than 7
39
- _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
40
- _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
41
- _df = _df.mask(_df_size < 7)
38
+ # mask out the data size lower than 7
39
+ _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
40
+ _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
41
+ _df = _df.mask(_df_size < 7)
42
42
 
43
- ## remove total conc. lower than 2000
44
- _df = _df.mask(_df['total'] < 2000)
43
+ # remove total conc. lower than 2000
44
+ _df = _df.mask(_df['total'] < 2000)
45
45
 
46
- ## remove the bin over 400 nm which num. conc. larger than 4000
47
- _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
46
+ # remove the bin over 400 nm which num. conc. larger than 4000
47
+ _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
48
48
 
49
- _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
49
+ _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
50
50
 
51
- return _df[_df.keys()[:-1]]
51
+ return _df[_df.keys()[:-1]]
@@ -4,48 +4,48 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'SMPS_genr'
7
+ nam = 'SMPS_genr'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
11
 
12
- skiprows = 0
13
- for _line in f:
12
+ skiprows = 0
13
+ for _line in f:
14
14
 
15
- if _line.split('\t')[0] == 'Sample #':
16
- f.seek(0)
17
- break
15
+ if _line.split('\t')[0] == 'Sample #':
16
+ f.seek(0)
17
+ break
18
18
 
19
- skiprows += 1
19
+ skiprows += 1
20
20
 
21
- _df = read_table(f, skiprows=skiprows)
22
- _tm_idx = to_datetime(_df['Date'] + _df['Start Time'], format='%m/%d/%y%X', errors='coerce')
21
+ _df = read_table(f, skiprows=skiprows)
22
+ _tm_idx = to_datetime(_df['Date'] + _df['Start Time'], format='%m/%d/%y%X', errors='coerce')
23
23
 
24
- ## index
25
- _df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()]
24
+ # index
25
+ _df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()]
26
26
 
27
- ## keys
28
- _key = to_numeric(_df.keys(), errors='coerce')
29
- _df.columns = _key
30
- _df = _df.loc[:, ~_key.isna()]
27
+ # keys
28
+ _key = to_numeric(_df.keys(), errors='coerce')
29
+ _df.columns = _key
30
+ _df = _df.loc[:, ~_key.isna()]
31
31
 
32
- return _df.apply(to_numeric, errors='coerce')
32
+ return _df.apply(to_numeric, errors='coerce')
33
33
 
34
- ## QC data
35
- def _QC(self, _df):
36
- import numpy as n
34
+ # QC data
35
+ def _QC(self, _df):
36
+ import numpy as n
37
37
 
38
- ## mask out the data size lower than 7
39
- _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
40
- _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
41
- _df = _df.mask(_df_size < 7)
38
+ # mask out the data size lower than 7
39
+ _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
40
+ _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
41
+ _df = _df.mask(_df_size < 7)
42
42
 
43
- ## remove total conc. lower than 2000
44
- _df = _df.mask(_df['total'] < 2000)
43
+ # remove total conc. lower than 2000
44
+ _df = _df.mask(_df['total'] < 2000)
45
45
 
46
- ## remove the bin over 400 nm which num. conc. larger than 4000
47
- _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
46
+ # remove the bin over 400 nm which num. conc. larger than 4000
47
+ _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
48
48
 
49
- _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
49
+ _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
50
50
 
51
- return _df[_df.keys()[:-1]]
51
+ return _df[_df.keys()[:-1]]
@@ -0,0 +1,60 @@
1
+ from pandas import to_datetime, read_csv
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader
4
+
5
+
6
+ class Reader(AbstractReader):
7
+ nam = 'Sunset_OCEC'
8
+
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_csv(f, skiprows=3)
12
+
13
+ _df['Start Date/Time'] = _df['Start Date/Time'].str.strip()
14
+ _df['time'] = to_datetime(_df['Start Date/Time'], format='%m/%d/%Y %I:%M:%S %p', errors='coerce')
15
+ _df = _df.set_index('time')
16
+
17
+ _df = _df.rename(columns={
18
+ 'Thermal/Optical OC (ugC/LCm^3)': 'Thermal_OC',
19
+ 'OC ugC/m^3 (Thermal/Optical)': 'Thermal_OC',
20
+
21
+ 'Thermal/Optical EC (ugC/LCm^3)': 'Thermal_EC',
22
+ 'EC ugC/m^3 (Thermal/Optical)': 'Thermal_EC',
23
+
24
+ 'OC=TC-BC (ugC/LCm^3)': 'Optical_OC',
25
+ 'OC by diff ugC (TC-OptEC)': 'Optical_OC',
26
+
27
+ 'BC (ugC/LCm^3)': 'Optical_EC',
28
+ 'OptEC ugC/m^3': 'Optical_EC',
29
+
30
+ 'Sample Volume Local Condition Actual m^3': 'Sample_Volume',
31
+ 'TC (ugC/LCm^3)': 'TC',
32
+ 'TC ugC/m^3': 'TC',
33
+ 'OCPk1-ug C': 'OC1',
34
+ 'OCPk2-ug C': 'OC2',
35
+ 'OCPk3-ug C': 'OC3',
36
+ 'OCPk4-ug C': 'OC4',
37
+ 'Pyrolized C ug': 'PC'
38
+ })
39
+
40
+ _df = _df[['Thermal_OC', 'Optical_OC', 'Thermal_EC', 'Optical_EC', 'TC', 'OC1', 'OC2', 'OC3', 'OC4']]
41
+
42
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
43
+
44
+ # QC data
45
+ def _QC(self, _df):
46
+ import numpy as np
47
+
48
+ _df = _df.where(_df > 0)
49
+
50
+ thresholds = {
51
+ 'Thermal_OC': 0.3,
52
+ 'Optical_OC': 0.3,
53
+ 'Thermal_EC': 0.015,
54
+ 'Optical_EC': 0.015
55
+ }
56
+
57
+ for col, thresh in thresholds.items():
58
+ _df.loc[_df[col] <= thresh, col] = np.nan
59
+
60
+ return _df
@@ -4,43 +4,45 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'TEOM'
7
+ nam = 'TEOM'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_csv(f, skiprows=3, index_col=False)
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_csv(f, skiprows=3, index_col=False)
12
12
 
13
- _df = _df.rename(columns={'Time Stamp': 'time',
14
- 'System status': 'status',
15
- 'PM-2.5 base MC': 'PM_NV',
16
- 'PM-2.5 MC': 'PM_Total',
17
- 'PM-2.5 TEOM noise': 'noise', })
13
+ _df = _df.rename(columns={'Time Stamp': 'time',
14
+ 'System status': 'status',
15
+ 'PM-2.5 base MC': 'PM_NV',
16
+ 'PM-2.5 MC': 'PM_Total',
17
+ 'PM-2.5 TEOM noise': 'noise', })
18
18
 
19
- _time_replace = {'十一月': '11', '十二月': '12', '一月': '01', '二月': '02', '三月': '03', '四月': '04',
20
- '五月': '05', '六月': '06', '七月': '07', '八月': '08', '九月': '09', '十月': '10'}
19
+ _time_replace = {'十一月': '11', '十二月': '12', '一月': '01', '二月': '02', '三月': '03', '四月': '04',
20
+ '五月': '05', '六月': '06', '七月': '07', '八月': '08', '九月': '09', '十月': '10'}
21
21
 
22
- _tm_idx = _df.time
23
- for _ori, _rpl in _time_replace.items():
24
- _tm_idx = _tm_idx.str.replace(_ori, _rpl)
22
+ _tm_idx = _df.time
23
+ for _ori, _rpl in _time_replace.items():
24
+ _tm_idx = _tm_idx.str.replace(_ori, _rpl)
25
25
 
26
- _df = _df.set_index(to_datetime(_tm_idx, errors='coerce', format='%d - %m - %Y %X'))
26
+ _df = _df.set_index(to_datetime(_tm_idx, errors='coerce', format='%d - %m - %Y %X'))
27
27
 
28
- _df = _df.where(_df['status'] < 1e-7)
28
+ _df = _df.where(_df['status'] < 1e-7)
29
29
 
30
- return _df[['PM_NV', 'PM_Total', 'noise', ]]
30
+ _df = _df[['PM_NV', 'PM_Total', 'noise', ]]
31
31
 
32
- ## QC data
33
- def _QC(self, _df):
32
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
34
33
 
35
- _df_idx = _df.index.copy()
34
+ # QC data
35
+ def _QC(self, _df):
36
36
 
37
- ## remove negative value
38
- _df = _df.where(_df.noise < 0.01)[['PM_NV', 'PM_Total']].mask((_df < 0).copy())
37
+ _df_idx = _df.index.copy()
39
38
 
40
- ## QC data in 1 hr
41
- ## remove data where size < 8 in 1-hr
42
- for _key in ['PM_Total', 'PM_NV']:
43
- _size = _df[_key].dropna().resample('1h').size().reindex(_df_idx).ffill().copy()
44
- _df[_key] = _df[_key].mask(_size < 8)
39
+ # remove negative value
40
+ _df = _df.where(_df.noise < 0.01)[['PM_NV', 'PM_Total']].mask((_df <= 0).copy())
45
41
 
46
- return _df.reindex(_df_idx)
42
+ # QC data in 1 hr
43
+ # remove data where size < 8 in 1-hr
44
+ for _key in ['PM_Total', 'PM_NV']:
45
+ _size = _df[_key].dropna().resample('1h').size().reindex(_df_idx).ffill().copy()
46
+ _df[_key] = _df[_key].mask(_size < 8)
47
+
48
+ return _df.reindex(_df_idx)
@@ -1,28 +1,27 @@
1
1
  # read meteorological data from google sheet
2
2
 
3
-
4
3
  from pandas import read_csv, to_datetime
5
4
 
6
5
  from AeroViz.rawDataReader.core import AbstractReader
7
6
 
8
7
 
9
8
  class Reader(AbstractReader):
10
- nam = 'Table'
11
-
12
- def _raw_reader(self, _file):
13
- self.meta['freq'] = self._oth_set.get('data_freq') or self.meta['freq']
9
+ nam = 'Table'
14
10
 
15
- with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f:
16
- _df = read_csv(f, low_memory=False, index_col=0)
11
+ def _raw_reader(self, _file):
12
+ with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
13
+ _df = read_csv(f, low_memory=False, index_col=0)
17
14
 
18
- _df.index = to_datetime(_df.index, errors='coerce', format=self._oth_set.get('date_format') or 'mixed')
19
- _df.index.name = 'time'
15
+ _df.index = to_datetime(_df.index, errors='coerce', format=self._oth_set.get('date_format') or 'mixed')
16
+ _df.index.name = 'time'
20
17
 
21
- _df.columns = _df.keys().str.strip(' ')
18
+ _df.columns = _df.keys().str.strip(' ')
22
19
 
23
- _df = _df.loc[_df.index.dropna()].copy()
20
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
24
21
 
25
- return _df.loc[~_df.index.duplicated()]
22
+ def _QC(self, _df):
23
+ # remove negative value
24
+ _df = _df.mask((_df < 0).copy())
26
25
 
27
- def _QC(self, _df):
28
- return _df
26
+ # QC data in 6h
27
+ return _df.resample('6h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -0,0 +1,26 @@
1
+
2
+ from pandas import read_csv
3
+
4
+ from AeroViz.rawDataReader.core import AbstractReader
5
+
6
+
7
+ class Reader(AbstractReader):
8
+ nam = 'VOC'
9
+
10
+ def _raw_reader(self, _file):
11
+ with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
12
+ _df = read_csv(f, parse_dates=[0], index_col=[0], na_values=('-', 'N.D.'))
13
+
14
+ _df.columns = _df.keys().str.strip(' ')
15
+ _df.index.name = 'time'
16
+
17
+ try:
18
+ _df = _df[self.meta["key"]].loc[_df.index.dropna()]
19
+
20
+ except KeyError:
21
+ _df = _df[self.meta["key_2"]].loc[_df.index.dropna()]
22
+
23
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
24
+
25
+ def _QC(self, _df):
26
+ return _df
@@ -1,22 +1,20 @@
1
1
  __all__ = [
2
- 'NEPH',
3
- 'Aurora',
4
- 'Table',
5
- 'EPA_vertical',
6
- 'APS_3321',
7
- 'SMPS_TH',
8
- 'AE33',
9
- 'AE43',
10
- 'BC1054',
11
- 'MA350',
12
- 'TEOM',
13
- 'OCEC_RES',
14
- 'OCEC_LCRES',
15
- 'IGAC_TH',
16
- 'IGAC_ZM',
17
- 'VOC_TH',
18
- 'VOC_ZM',
19
- 'SMPS_genr',
20
- 'SMPS_aim11',
21
- 'GRIMM'
2
+ 'NEPH',
3
+ 'Aurora',
4
+ 'SMPS_TH',
5
+ 'SMPS_genr',
6
+ 'SMPS_aim11',
7
+ 'APS_3321',
8
+ 'GRIMM',
9
+ 'AE33',
10
+ 'AE43',
11
+ 'BC1054',
12
+ 'MA350',
13
+ 'TEOM',
14
+ 'Sunset_OCEC',
15
+ 'IGAC',
16
+ 'VOC',
17
+ 'Table',
18
+ 'EPA_vertical',
19
+ 'Minion'
22
20
  ]