AeroViz 0.1.3__py3-none-any.whl → 0.1.3b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (107) hide show
  1. AeroViz/__init__.py +4 -4
  2. AeroViz/dataProcess/Chemistry/__init__.py +38 -38
  3. AeroViz/dataProcess/Chemistry/_calculate.py +15 -15
  4. AeroViz/dataProcess/Chemistry/_isoropia.py +69 -68
  5. AeroViz/dataProcess/Chemistry/_mass_volume.py +158 -158
  6. AeroViz/dataProcess/Chemistry/_ocec.py +109 -109
  7. AeroViz/dataProcess/Chemistry/_partition.py +19 -18
  8. AeroViz/dataProcess/Chemistry/_teom.py +8 -11
  9. AeroViz/dataProcess/Optical/_IMPROVE.py +40 -39
  10. AeroViz/dataProcess/Optical/__init__.py +35 -35
  11. AeroViz/dataProcess/Optical/_absorption.py +35 -35
  12. AeroViz/dataProcess/Optical/_extinction.py +25 -24
  13. AeroViz/dataProcess/Optical/_mie.py +5 -6
  14. AeroViz/dataProcess/Optical/_mie_sd.py +89 -90
  15. AeroViz/dataProcess/Optical/_scattering.py +16 -16
  16. AeroViz/dataProcess/SizeDistr/__init__.py +37 -37
  17. AeroViz/dataProcess/SizeDistr/__merge.py +159 -158
  18. AeroViz/dataProcess/SizeDistr/_merge.py +155 -154
  19. AeroViz/dataProcess/SizeDistr/_merge_v1.py +162 -161
  20. AeroViz/dataProcess/SizeDistr/_merge_v2.py +153 -152
  21. AeroViz/dataProcess/SizeDistr/_merge_v3.py +326 -326
  22. AeroViz/dataProcess/SizeDistr/_merge_v4.py +272 -274
  23. AeroViz/dataProcess/SizeDistr/_size_distr.py +51 -51
  24. AeroViz/dataProcess/VOC/__init__.py +7 -7
  25. AeroViz/dataProcess/VOC/_potential_par.py +53 -55
  26. AeroViz/dataProcess/__init__.py +4 -4
  27. AeroViz/dataProcess/core/__init__.py +59 -58
  28. AeroViz/plot/__init__.py +6 -1
  29. AeroViz/plot/bar.py +126 -0
  30. AeroViz/plot/box.py +68 -0
  31. AeroViz/plot/distribution/distribution.py +421 -427
  32. AeroViz/plot/meteorology/meteorology.py +240 -292
  33. AeroViz/plot/optical/__init__.py +0 -1
  34. AeroViz/plot/optical/optical.py +230 -230
  35. AeroViz/plot/pie.py +198 -0
  36. AeroViz/plot/regression.py +210 -0
  37. AeroViz/plot/scatter.py +99 -0
  38. AeroViz/plot/templates/__init__.py +0 -3
  39. AeroViz/plot/templates/contour.py +25 -25
  40. AeroViz/plot/templates/corr_matrix.py +86 -93
  41. AeroViz/plot/templates/diurnal_pattern.py +24 -24
  42. AeroViz/plot/templates/koschmieder.py +106 -106
  43. AeroViz/plot/templates/metal_heatmap.py +34 -34
  44. AeroViz/plot/timeseries/timeseries.py +53 -60
  45. AeroViz/plot/utils/__init__.py +2 -1
  46. AeroViz/plot/utils/_color.py +57 -57
  47. AeroViz/plot/utils/_unit.py +48 -48
  48. AeroViz/plot/utils/plt_utils.py +92 -0
  49. AeroViz/plot/utils/sklearn_utils.py +49 -0
  50. AeroViz/plot/violin.py +79 -0
  51. AeroViz/process/__init__.py +15 -15
  52. AeroViz/process/core/DataProc.py +9 -9
  53. AeroViz/process/core/SizeDist.py +81 -81
  54. AeroViz/process/method/PyMieScatt_update.py +488 -488
  55. AeroViz/process/method/mie_theory.py +231 -229
  56. AeroViz/process/method/prop.py +40 -40
  57. AeroViz/process/script/AbstractDistCalc.py +103 -103
  58. AeroViz/process/script/Chemical.py +166 -166
  59. AeroViz/process/script/IMPACT.py +40 -40
  60. AeroViz/process/script/IMPROVE.py +152 -152
  61. AeroViz/process/script/Others.py +45 -45
  62. AeroViz/process/script/PSD.py +26 -26
  63. AeroViz/process/script/PSD_dry.py +69 -70
  64. AeroViz/process/script/retrieve_RI.py +50 -51
  65. AeroViz/rawDataReader/__init__.py +57 -57
  66. AeroViz/rawDataReader/core/__init__.py +328 -326
  67. AeroViz/rawDataReader/script/AE33.py +18 -18
  68. AeroViz/rawDataReader/script/AE43.py +20 -20
  69. AeroViz/rawDataReader/script/APS_3321.py +30 -30
  70. AeroViz/rawDataReader/script/Aurora.py +23 -23
  71. AeroViz/rawDataReader/script/BC1054.py +40 -40
  72. AeroViz/rawDataReader/script/EPA_vertical.py +9 -9
  73. AeroViz/rawDataReader/script/GRIMM.py +21 -21
  74. AeroViz/rawDataReader/script/IGAC_TH.py +67 -67
  75. AeroViz/rawDataReader/script/IGAC_ZM.py +59 -59
  76. AeroViz/rawDataReader/script/MA350.py +39 -39
  77. AeroViz/rawDataReader/script/NEPH.py +74 -74
  78. AeroViz/rawDataReader/script/OCEC_LCRES.py +21 -21
  79. AeroViz/rawDataReader/script/OCEC_RES.py +16 -16
  80. AeroViz/rawDataReader/script/SMPS_TH.py +25 -25
  81. AeroViz/rawDataReader/script/SMPS_aim11.py +32 -32
  82. AeroViz/rawDataReader/script/SMPS_genr.py +31 -31
  83. AeroViz/rawDataReader/script/TEOM.py +28 -28
  84. AeroViz/rawDataReader/script/Table.py +12 -12
  85. AeroViz/rawDataReader/script/VOC_TH.py +16 -16
  86. AeroViz/rawDataReader/script/VOC_ZM.py +28 -28
  87. AeroViz/rawDataReader/script/__init__.py +20 -20
  88. AeroViz/rawDataReader/utils/config.py +161 -161
  89. AeroViz/tools/database.py +65 -65
  90. AeroViz/tools/dataclassifier.py +106 -106
  91. AeroViz/tools/dataprinter.py +51 -51
  92. AeroViz/tools/datareader.py +38 -38
  93. {AeroViz-0.1.3.dist-info → AeroViz-0.1.3b0.dist-info}/METADATA +5 -4
  94. AeroViz-0.1.3b0.dist-info/RECORD +110 -0
  95. AeroViz/config/__init__.py +0 -0
  96. AeroViz/plot/improve/__init__.py +0 -1
  97. AeroViz/plot/improve/improve.py +0 -240
  98. AeroViz/plot/optical/aethalometer.py +0 -77
  99. AeroViz/plot/templates/event_evolution.py +0 -65
  100. AeroViz/plot/templates/regression.py +0 -256
  101. AeroViz/plot/templates/scatter.py +0 -130
  102. AeroViz/plot/templates/templates.py +0 -398
  103. AeroViz/plot/utils/_decorator.py +0 -74
  104. AeroViz-0.1.3.dist-info/RECORD +0 -111
  105. {AeroViz-0.1.3.dist-info → AeroViz-0.1.3b0.dist-info}/LICENSE +0 -0
  106. {AeroViz-0.1.3.dist-info → AeroViz-0.1.3b0.dist-info}/WHEEL +0 -0
  107. {AeroViz-0.1.3.dist-info → AeroViz-0.1.3b0.dist-info}/top_level.txt +0 -0
@@ -8,83 +8,83 @@ from AeroViz.rawDataReader.core import AbstractReader
8
8
 
9
9
 
10
10
  class Reader(AbstractReader):
11
- nam = 'IGAC_ZM'
11
+ nam = 'IGAC_ZM'
12
12
 
13
- def _raw_reader(self, _file):
13
+ def _raw_reader(self, _file):
14
14
 
15
- with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f:
16
- _df = read_csv(f, parse_dates=[0], index_col=[0], na_values=['-']).apply(to_numeric, errors='coerce')
15
+ with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f:
16
+ _df = read_csv(f, parse_dates=[0], index_col=[0], na_values=['-']).apply(to_numeric, errors='coerce')
17
17
 
18
- _df.columns = _df.keys().str.strip(' ')
19
- _df.index.name = 'time'
18
+ _df.columns = _df.keys().str.strip(' ')
19
+ _df.index.name = 'time'
20
20
 
21
- return _df.loc[_df.index.dropna()].loc[~_df.index.duplicated()]
21
+ return _df.loc[_df.index.dropna()].loc[~_df.index.duplicated()]
22
22
 
23
- ## QC data
24
- def _QC(self, _df):
23
+ ## QC data
24
+ def _QC(self, _df):
25
25
 
26
- ## QC parameter, function (MDL SE LE)
27
- _mdl = {
28
- 'Na+': 0.06,
29
- 'NH4+': 0.05,
30
- 'K+': 0.05,
31
- 'Mg2+': 0.12,
32
- 'Ca2+': 0.07,
33
- 'Cl-': 0.07,
34
- 'NO2-': 0.05,
35
- 'NO3-': 0.11,
36
- 'SO42-': 0.08,
37
- }
38
- _mdl.update(self._oth_set.get('mdl', {}))
26
+ ## QC parameter, function (MDL SE LE)
27
+ _mdl = {
28
+ 'Na+': 0.06,
29
+ 'NH4+': 0.05,
30
+ 'K+': 0.05,
31
+ 'Mg2+': 0.12,
32
+ 'Ca2+': 0.07,
33
+ 'Cl-': 0.07,
34
+ 'NO2-': 0.05,
35
+ 'NO3-': 0.11,
36
+ 'SO42-': 0.08,
37
+ }
38
+ _mdl.update(self._oth_set.get('mdl', {}))
39
39
 
40
- def _se_le(_df_, _log=False):
41
- _df_ = np.log10(_df_) if _log else _df_
40
+ def _se_le(_df_, _log=False):
41
+ _df_ = np.log10(_df_) if _log else _df_
42
42
 
43
- _df_qua = _df_.quantile([.25, .75])
44
- _df_q1, _df_q3 = _df_qua.loc[.25].copy(), _df_qua.loc[.75].copy()
45
- _df_iqr = _df_q3 - _df_q1
43
+ _df_qua = _df_.quantile([.25, .75])
44
+ _df_q1, _df_q3 = _df_qua.loc[.25].copy(), _df_qua.loc[.75].copy()
45
+ _df_iqr = _df_q3 - _df_q1
46
46
 
47
- _se = concat([_df_q1 - 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
48
- _le = concat([_df_q3 + 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
47
+ _se = concat([_df_q1 - 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
48
+ _le = concat([_df_q3 + 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
49
49
 
50
- if _log:
51
- return 10 ** _se, 10 ** _le
52
- return _se, _le
50
+ if _log:
51
+ return 10 ** _se, 10 ** _le
52
+ return _se, _le
53
53
 
54
- _cation, _anion, _main = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'], ['Cl-', 'NO2-', 'NO3-', 'SO42-', ], ['SO42-',
55
- 'NO3-',
56
- 'NH4+']
54
+ _cation, _anion, _main = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'], ['Cl-', 'NO2-', 'NO3-', 'SO42-', ], ['SO42-',
55
+ 'NO3-',
56
+ 'NH4+']
57
57
 
58
- _df_salt = _df[_mdl.keys()].copy()
59
- _df_pm = _df['PM2.5'].copy()
58
+ _df_salt = _df[_mdl.keys()].copy()
59
+ _df_pm = _df['PM2.5'].copy()
60
60
 
61
- ## lower than PM2.5
62
- ## conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
63
- _df_salt = _df_salt.mask(_df_salt.sum(axis=1, min_count=1) > _df_pm).dropna(subset=_main).copy()
61
+ ## lower than PM2.5
62
+ ## conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
63
+ _df_salt = _df_salt.mask(_df_salt.sum(axis=1, min_count=1) > _df_pm).dropna(subset=_main).copy()
64
64
 
65
- ## mdl
66
- for (_key, _df_col), _mdl_val in zip(_df_salt.items(), _mdl.values()):
67
- _df_salt[_key] = _df_col.mask(_df_col < _mdl_val, _mdl_val / 2)
65
+ ## mdl
66
+ for (_key, _df_col), _mdl_val in zip(_df_salt.items(), _mdl.values()):
67
+ _df_salt[_key] = _df_col.mask(_df_col < _mdl_val, _mdl_val / 2)
68
68
 
69
- ## calculate SE LE
70
- ## salt < LE
71
- _se, _le = _se_le(_df_salt, _log=True)
72
- _df_salt = _df_salt.mask(_df_salt > _le).copy()
69
+ ## calculate SE LE
70
+ ## salt < LE
71
+ _se, _le = _se_le(_df_salt, _log=True)
72
+ _df_salt = _df_salt.mask(_df_salt > _le).copy()
73
73
 
74
- ## C/A, A/C
75
- _rat_CA = (_df_salt[_cation].sum(axis=1) / _df_salt[_anion].sum(axis=1)).to_frame()
76
- _rat_AC = (1 / _rat_CA).copy()
74
+ ## C/A, A/C
75
+ _rat_CA = (_df_salt[_cation].sum(axis=1) / _df_salt[_anion].sum(axis=1)).to_frame()
76
+ _rat_AC = (1 / _rat_CA).copy()
77
77
 
78
- _se, _le = _se_le(_rat_CA, )
79
- _cond_CA = (_rat_CA < _le) & (_rat_CA > 0)
78
+ _se, _le = _se_le(_rat_CA, )
79
+ _cond_CA = (_rat_CA < _le) & (_rat_CA > 0)
80
80
 
81
- _se, _le = _se_le(_rat_AC, )
82
- _cond_AC = (_rat_AC < _le) & (_rat_AC > 0)
81
+ _se, _le = _se_le(_rat_AC, )
82
+ _cond_AC = (_rat_AC < _le) & (_rat_AC > 0)
83
83
 
84
- _df_salt = _df_salt.where((_cond_CA * _cond_AC)[0]).copy()
84
+ _df_salt = _df_salt.where((_cond_CA * _cond_AC)[0]).copy()
85
85
 
86
- ## conc. of main salt > SE
87
- _se, _le = _se_le(_df_salt[_main], _log=True)
88
- _df_salt[_main] = _df_salt[_main].mask(_df_salt[_main] < _se).copy()
86
+ ## conc. of main salt > SE
87
+ _se, _le = _se_le(_df_salt[_main], _log=True)
88
+ _df_salt[_main] = _df_salt[_main].mask(_df_salt[_main] < _se).copy()
89
89
 
90
- return _df_salt.reindex(_df.index)
90
+ return _df_salt.reindex(_df.index)
@@ -4,42 +4,42 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'MA350'
8
-
9
- def _raw_reader(self, _file):
10
- _df = read_csv(_file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis("Time")
11
-
12
- _df = _df.rename(columns={
13
- 'UV BCc': 'BC1',
14
- 'Blue BCc': 'BC2',
15
- 'Green BCc': 'BC3',
16
- 'Red BCc': 'BC4',
17
- 'IR BCc': 'BC5',
18
- 'Biomass BCc (ng/m^3)': 'BB mass',
19
- 'Fossil fuel BCc (ng/m^3)': 'FF mass',
20
- 'Delta-C (ng/m^3)': 'Delta-C',
21
- 'AAE': 'AAE',
22
- 'BB (%)': 'BB',
23
- })
24
-
25
- # remove data without Status=32 (Automatic Tape Advance), 65536 (Tape Move)
26
- # if not self._oth_set.get('ignore_err', False):
27
- # _df = _df.where((_df['Status'] != 32) | (_df['Status'] != 65536)).copy()
28
-
29
- return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE', 'BB']]
30
-
31
- # QC data
32
- def _QC(self, _df):
33
- # remove negative value
34
- _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'AAE', 'BB']].mask((_df < 0).copy())
35
-
36
- # call by _QC function
37
- # QC data in 1 hr
38
- def _QC_func(_df_1hr):
39
- _df_ave = _df_1hr.mean()
40
- _df_std = _df_1hr.std()
41
- _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
42
-
43
- return _df_1hr.mask(_df_lowb | _df_highb).copy()
44
-
45
- return _df.resample('1h', group_keys=False).apply(_QC_func).resample('5min').mean()
7
+ nam = 'MA350'
8
+
9
+ def _raw_reader(self, _file):
10
+ _df = read_csv(_file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis("Time")
11
+
12
+ _df = _df.rename(columns={
13
+ 'UV BCc': 'BC1',
14
+ 'Blue BCc': 'BC2',
15
+ 'Green BCc': 'BC3',
16
+ 'Red BCc': 'BC4',
17
+ 'IR BCc': 'BC5',
18
+ 'Biomass BCc (ng/m^3)': 'BB mass',
19
+ 'Fossil fuel BCc (ng/m^3)': 'FF mass',
20
+ 'Delta-C (ng/m^3)': 'Delta-C',
21
+ 'AAE': 'AAE',
22
+ 'BB (%)': 'BB',
23
+ })
24
+
25
+ # remove data without Status=32 (Automatic Tape Advance), 65536 (Tape Move)
26
+ # if not self._oth_set.get('ignore_err', False):
27
+ # _df = _df.where((_df['Status'] != 32) | (_df['Status'] != 65536)).copy()
28
+
29
+ return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE', 'BB']]
30
+
31
+ # QC data
32
+ def _QC(self, _df):
33
+ # remove negative value
34
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'AAE', 'BB']].mask((_df < 0).copy())
35
+
36
+ # call by _QC function
37
+ # QC data in 1 hr
38
+ def _QC_func(_df_1hr):
39
+ _df_ave = _df_1hr.mean()
40
+ _df_std = _df_1hr.std()
41
+ _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
42
+
43
+ return _df_1hr.mask(_df_lowb | _df_highb).copy()
44
+
45
+ return _df.resample('1h', group_keys=False).apply(_QC_func).resample('5min').mean()
@@ -4,77 +4,77 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'NEPH'
8
-
9
- def _raw_reader(self, _file):
10
- with _file.open('r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_csv(f, header=None, names=range(11))
12
-
13
- _df_grp = _df.groupby(0)
14
-
15
- # T : time
16
- _df_tm = _df_grp.get_group('T')[[1, 2, 3, 4, 5, 6]].astype(int)
17
-
18
- for _k in [2, 3, 4, 5, 6]:
19
- _df_tm[_k] = _df_tm[_k].astype(int).map('{:02d}'.format).copy()
20
- _df_tm = _df_tm.astype(str)
21
-
22
- _idx_tm = to_datetime((_df_tm[1] + _df_tm[2] + _df_tm[3] + _df_tm[4] + _df_tm[5] + _df_tm[6]),
23
- format='%Y%m%d%H%M%S')
24
-
25
- # D : data
26
- # col : 3~8 B G R BB BG BR
27
- # 1e6
28
- try:
29
- _df_dt = _df_grp.get_group('D')[[1, 2, 3, 4, 5, 6, 7, 8]].set_index(_idx_tm)
30
- _df_out = (_df_dt.groupby(1).get_group('NBXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm)
31
- _df_out.columns = ['B', 'G', 'R', 'BB', 'BG', 'BR']
32
- _df_out.index.name = 'Time'
33
-
34
- # Y : state
35
- # col : 5 RH
36
- _df_st = _df_grp.get_group('Y')
37
- _df_out['RH'] = _df_st[5].values
38
- _df_out['status'] = _df_st[9].values
39
-
40
- _df_out.mask(_df_out['status'] != 0) # 0000 -> numeric to 0
41
-
42
- return _df_out[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
43
-
44
- except ValueError:
45
- group_sizes = _df_grp.size()
46
- print(group_sizes)
47
- # Define the valid groups
48
- valid_groups = {'B', 'G', 'R', 'D', 'T', 'Y', 'Z'}
49
-
50
- # Find the rows where the value in the first column is not in valid_groups
51
- invalid_indices = _df[~_df[0].isin(valid_groups)].index
52
-
53
- # Print the invalid indices and their corresponding values
54
- invalid_values = _df.loc[invalid_indices, 0]
55
- print("Invalid values and their indices:")
56
- for idx, value in zip(invalid_indices, invalid_values):
57
- print(f"Index: {idx}, Value: {value}")
58
-
59
- # If there's a length mismatch, return an empty DataFrame with the same index and column names
60
- columns = ['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']
61
- _df_out = DataFrame(index=_idx_tm, columns=columns)
62
- _df_out.index.name = 'Time'
63
- print(f'\n\t\t\t Length mismatch in {_file} data. Returning an empty DataFrame.')
64
- return _df_out
65
-
66
- # QC data
67
- def _QC(self, _df):
68
- # remove negative value
69
- _df = _df.mask((_df <= 0).copy())
70
-
71
- # call by _QC function
72
- # QC data in 1 hr
73
- def _QC_func(_df_1hr):
74
- _df_ave = _df_1hr.mean()
75
- _df_std = _df_1hr.std()
76
- _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
77
-
78
- return _df_1hr.mask(_df_lowb | _df_highb).copy()
79
-
80
- return _df.resample('1h', group_keys=False).apply(_QC_func)
7
+ nam = 'NEPH'
8
+
9
+ def _raw_reader(self, _file):
10
+ with _file.open('r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_csv(f, header=None, names=range(11))
12
+
13
+ _df_grp = _df.groupby(0)
14
+
15
+ # T : time
16
+ _df_tm = _df_grp.get_group('T')[[1, 2, 3, 4, 5, 6]].astype(int)
17
+
18
+ for _k in [2, 3, 4, 5, 6]:
19
+ _df_tm[_k] = _df_tm[_k].astype(int).map('{:02d}'.format).copy()
20
+ _df_tm = _df_tm.astype(str)
21
+
22
+ _idx_tm = to_datetime((_df_tm[1] + _df_tm[2] + _df_tm[3] + _df_tm[4] + _df_tm[5] + _df_tm[6]),
23
+ format='%Y%m%d%H%M%S')
24
+
25
+ # D : data
26
+ # col : 3~8 B G R BB BG BR
27
+ # 1e6
28
+ try:
29
+ _df_dt = _df_grp.get_group('D')[[1, 2, 3, 4, 5, 6, 7, 8]].set_index(_idx_tm)
30
+ _df_out = (_df_dt.groupby(1).get_group('NBXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm)
31
+ _df_out.columns = ['B', 'G', 'R', 'BB', 'BG', 'BR']
32
+ _df_out.index.name = 'Time'
33
+
34
+ # Y : state
35
+ # col : 5 RH
36
+ _df_st = _df_grp.get_group('Y')
37
+ _df_out['RH'] = _df_st[5].values
38
+ _df_out['status'] = _df_st[9].values
39
+
40
+ _df_out.mask(_df_out['status'] != 0) # 0000 -> numeric to 0
41
+
42
+ return _df_out[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
43
+
44
+ except ValueError:
45
+ group_sizes = _df_grp.size()
46
+ print(group_sizes)
47
+ # Define the valid groups
48
+ valid_groups = {'B', 'G', 'R', 'D', 'T', 'Y', 'Z'}
49
+
50
+ # Find the rows where the value in the first column is not in valid_groups
51
+ invalid_indices = _df[~_df[0].isin(valid_groups)].index
52
+
53
+ # Print the invalid indices and their corresponding values
54
+ invalid_values = _df.loc[invalid_indices, 0]
55
+ print("Invalid values and their indices:")
56
+ for idx, value in zip(invalid_indices, invalid_values):
57
+ print(f"Index: {idx}, Value: {value}")
58
+
59
+ # If there's a length mismatch, return an empty DataFrame with the same index and column names
60
+ columns = ['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']
61
+ _df_out = DataFrame(index=_idx_tm, columns=columns)
62
+ _df_out.index.name = 'Time'
63
+ print(f'\n\t\t\t Length mismatch in {_file} data. Returning an empty DataFrame.')
64
+ return _df_out
65
+
66
+ # QC data
67
+ def _QC(self, _df):
68
+ # remove negative value
69
+ _df = _df.mask((_df <= 0).copy())
70
+
71
+ # call by _QC function
72
+ # QC data in 1 hr
73
+ def _QC_func(_df_1hr):
74
+ _df_ave = _df_1hr.mean()
75
+ _df_std = _df_1hr.std()
76
+ _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
77
+
78
+ return _df_1hr.mask(_df_lowb | _df_highb).copy()
79
+
80
+ return _df.resample('1h', group_keys=False).apply(_QC_func)
@@ -4,31 +4,31 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'OCEC_LCRES'
7
+ nam = 'OCEC_LCRES'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_csv(f, skiprows=3)
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_csv(f, skiprows=3)
12
12
 
13
- _col = {'Thermal/Optical OC (ugC/LCm^3)': 'Thermal_OC',
14
- 'Thermal/Optical EC (ugC/LCm^3)': 'Thermal_EC',
15
- 'OC=TC-BC (ugC/LCm^3)': 'Optical_OC',
16
- 'BC (ugC/LCm^3)': 'Optical_EC',
17
- 'Sample Volume Local Condition Actual m^3': 'Sample_Volume',
18
- 'TC (ugC/LCm^3)': 'TC', }
13
+ _col = {'Thermal/Optical OC (ugC/LCm^3)': 'Thermal_OC',
14
+ 'Thermal/Optical EC (ugC/LCm^3)': 'Thermal_EC',
15
+ 'OC=TC-BC (ugC/LCm^3)': 'Optical_OC',
16
+ 'BC (ugC/LCm^3)': 'Optical_EC',
17
+ 'Sample Volume Local Condition Actual m^3': 'Sample_Volume',
18
+ 'TC (ugC/LCm^3)': 'TC', }
19
19
 
20
- _tm_idx = to_datetime(_df['Start Date/Time'], errors='coerce')
21
- _df['time'] = _tm_idx
20
+ _tm_idx = to_datetime(_df['Start Date/Time'], errors='coerce')
21
+ _df['time'] = _tm_idx
22
22
 
23
- _df = _df.dropna(subset='time').loc[~_tm_idx.duplicated()].set_index('time')
23
+ _df = _df.dropna(subset='time').loc[~_tm_idx.duplicated()].set_index('time')
24
24
 
25
- return _df[_col.keys()].rename(columns=_col)
25
+ return _df[_col.keys()].rename(columns=_col)
26
26
 
27
- ## QC data
28
- def _QC(self, _df):
29
- _df[['Thermal_OC', 'Optical_OC']] = _df[['Thermal_OC', 'Optical_OC']].where(
30
- _df[['Thermal_OC', 'Optical_OC']] > 0.3).copy()
31
- _df[['Thermal_EC', 'Optical_EC']] = _df[['Thermal_EC', 'Optical_EC']].where(
32
- _df[['Thermal_EC', 'Optical_EC']] > .015).copy()
27
+ ## QC data
28
+ def _QC(self, _df):
29
+ _df[['Thermal_OC', 'Optical_OC']] = _df[['Thermal_OC', 'Optical_OC']].where(
30
+ _df[['Thermal_OC', 'Optical_OC']] > 0.3).copy()
31
+ _df[['Thermal_EC', 'Optical_EC']] = _df[['Thermal_EC', 'Optical_EC']].where(
32
+ _df[['Thermal_EC', 'Optical_EC']] > .015).copy()
33
33
 
34
- return _df
34
+ return _df
@@ -4,25 +4,25 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'OCEC_RES'
7
+ nam = 'OCEC_RES'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_csv(f, skiprows=3)
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_csv(f, skiprows=3)
12
12
 
13
- _col = {'OCPk1-ug C': 'OC1',
14
- 'OCPk2-ug C': 'OC2',
15
- 'OCPk3-ug C': 'OC3',
16
- 'OCPk4-ug C': 'OC4',
17
- 'Pyrolized C ug': 'PC', }
13
+ _col = {'OCPk1-ug C': 'OC1',
14
+ 'OCPk2-ug C': 'OC2',
15
+ 'OCPk3-ug C': 'OC3',
16
+ 'OCPk4-ug C': 'OC4',
17
+ 'Pyrolized C ug': 'PC', }
18
18
 
19
- _tm_idx = to_datetime(_df['Start Date/Time'], errors='coerce')
20
- _df['time'] = _tm_idx
19
+ _tm_idx = to_datetime(_df['Start Date/Time'], errors='coerce')
20
+ _df['time'] = _tm_idx
21
21
 
22
- _df = _df.dropna(subset='time').loc[~_tm_idx.duplicated()].set_index('time')
22
+ _df = _df.dropna(subset='time').loc[~_tm_idx.duplicated()].set_index('time')
23
23
 
24
- return _df[_col.keys()].rename(columns=_col)
24
+ return _df[_col.keys()].rename(columns=_col)
25
25
 
26
- ## QC data
27
- def _QC(self, _df):
28
- return _df.where(_df > 0)
26
+ ## QC data
27
+ def _QC(self, _df):
28
+ return _df.where(_df > 0)
@@ -4,38 +4,38 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'SMPS_TH'
7
+ nam = 'SMPS_TH'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_table(f, skiprows=18, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
12
- _key = list(_df.keys()[6:-26])
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_table(f, skiprows=18, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
12
+ _key = list(_df.keys()[6:-26])
13
13
 
14
- _newkey = {}
15
- for _k in _key:
16
- _newkey[_k] = float(_k).__round__(4)
14
+ _newkey = {}
15
+ for _k in _key:
16
+ _newkey[_k] = float(_k).__round__(4)
17
17
 
18
- # _newkey['Total Conc.(#/cm)'] = 'total'
19
- # _newkey['Mode(nm)'] = 'mode'
18
+ # _newkey['Total Conc.(#/cm)'] = 'total'
19
+ # _newkey['Mode(nm)'] = 'mode'
20
20
 
21
- _df_idx = to_datetime(_df.index, errors='coerce')
22
- return _df[_newkey.keys()].rename(_newkey, axis=1).set_index(_df_idx).loc[_df_idx.dropna()]
21
+ _df_idx = to_datetime(_df.index, errors='coerce')
22
+ return _df[_newkey.keys()].rename(_newkey, axis=1).set_index(_df_idx).loc[_df_idx.dropna()]
23
23
 
24
- ## QC data
25
- def _QC(self, _df):
26
- import numpy as n
24
+ ## QC data
25
+ def _QC(self, _df):
26
+ import numpy as n
27
27
 
28
- ## mask out the data size lower than 7
29
- _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
30
- _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
31
- _df = _df.mask(_df_size < 7)
28
+ ## mask out the data size lower than 7
29
+ _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
30
+ _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
31
+ _df = _df.mask(_df_size < 7)
32
32
 
33
- ## remove total conc. lower than 2000
34
- _df = _df.mask(_df['total'] < 2000)
33
+ ## remove total conc. lower than 2000
34
+ _df = _df.mask(_df['total'] < 2000)
35
35
 
36
- ## remove the bin over 400 nm which num. conc. larger than 4000
37
- _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
36
+ ## remove the bin over 400 nm which num. conc. larger than 4000
37
+ _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
38
38
 
39
- _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
39
+ _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
40
40
 
41
- return _df[_df.keys()[:-1]]
41
+ return _df[_df.keys()[:-1]]
@@ -4,48 +4,48 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'SMPS_aim11'
7
+ nam = 'SMPS_aim11'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
11
 
12
- skiprows = 0
13
- for _line in f:
12
+ skiprows = 0
13
+ for _line in f:
14
14
 
15
- if _line.split(',')[0] == 'Scan Number':
16
- f.seek(0)
17
- break
15
+ if _line.split(',')[0] == 'Scan Number':
16
+ f.seek(0)
17
+ break
18
18
 
19
- skiprows += 1
20
- # breakpoint()
21
- _df = read_csv(f, skiprows=skiprows)
22
- _tm_idx = to_datetime(_df['DateTime Sample Start'], format='%d/%m/%Y %X', errors='coerce')
19
+ skiprows += 1
20
+ # breakpoint()
21
+ _df = read_csv(f, skiprows=skiprows)
22
+ _tm_idx = to_datetime(_df['DateTime Sample Start'], format='%d/%m/%Y %X', errors='coerce')
23
23
 
24
- ## index
25
- _df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()]
24
+ ## index
25
+ _df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()]
26
26
 
27
- ## keys
28
- _key = to_numeric(_df.keys(), errors='coerce')
29
- _df.columns = _key
30
- _df = _df.loc[:, ~_key.isna()]
27
+ ## keys
28
+ _key = to_numeric(_df.keys(), errors='coerce')
29
+ _df.columns = _key
30
+ _df = _df.loc[:, ~_key.isna()]
31
31
 
32
- return _df.apply(to_numeric, errors='coerce')
32
+ return _df.apply(to_numeric, errors='coerce')
33
33
 
34
- ## QC data
35
- def _QC(self, _df):
36
- import numpy as n
34
+ ## QC data
35
+ def _QC(self, _df):
36
+ import numpy as n
37
37
 
38
- ## mask out the data size lower than 7
39
- _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
40
- _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
41
- _df = _df.mask(_df_size < 7)
38
+ ## mask out the data size lower than 7
39
+ _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
40
+ _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
41
+ _df = _df.mask(_df_size < 7)
42
42
 
43
- ## remove total conc. lower than 2000
44
- _df = _df.mask(_df['total'] < 2000)
43
+ ## remove total conc. lower than 2000
44
+ _df = _df.mask(_df['total'] < 2000)
45
45
 
46
- ## remove the bin over 400 nm which num. conc. larger than 4000
47
- _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
46
+ ## remove the bin over 400 nm which num. conc. larger than 4000
47
+ _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
48
48
 
49
- _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
49
+ _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
50
50
 
51
- return _df[_df.keys()[:-1]]
51
+ return _df[_df.keys()[:-1]]