AeroViz 0.1.2__py3-none-any.whl → 0.1.3b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (112) hide show
  1. AeroViz/__init__.py +4 -4
  2. AeroViz/config/DEFAULT_DATA.csv +1417 -0
  3. AeroViz/config/DEFAULT_PNSD_DATA.csv +1417 -0
  4. AeroViz/dataProcess/Chemistry/__init__.py +38 -38
  5. AeroViz/dataProcess/Chemistry/_calculate.py +15 -15
  6. AeroViz/dataProcess/Chemistry/_isoropia.py +69 -68
  7. AeroViz/dataProcess/Chemistry/_mass_volume.py +158 -158
  8. AeroViz/dataProcess/Chemistry/_ocec.py +109 -109
  9. AeroViz/dataProcess/Chemistry/_partition.py +19 -18
  10. AeroViz/dataProcess/Chemistry/_teom.py +8 -11
  11. AeroViz/dataProcess/Optical/_IMPROVE.py +40 -39
  12. AeroViz/dataProcess/Optical/__init__.py +35 -35
  13. AeroViz/dataProcess/Optical/_absorption.py +35 -35
  14. AeroViz/dataProcess/Optical/_extinction.py +25 -24
  15. AeroViz/dataProcess/Optical/_mie.py +5 -6
  16. AeroViz/dataProcess/Optical/_mie_sd.py +89 -90
  17. AeroViz/dataProcess/Optical/_scattering.py +16 -16
  18. AeroViz/dataProcess/SizeDistr/__init__.py +37 -37
  19. AeroViz/dataProcess/SizeDistr/__merge.py +159 -158
  20. AeroViz/dataProcess/SizeDistr/_merge.py +155 -154
  21. AeroViz/dataProcess/SizeDistr/_merge_v1.py +162 -161
  22. AeroViz/dataProcess/SizeDistr/_merge_v2.py +153 -152
  23. AeroViz/dataProcess/SizeDistr/_merge_v3.py +326 -326
  24. AeroViz/dataProcess/SizeDistr/_merge_v4.py +272 -274
  25. AeroViz/dataProcess/SizeDistr/_size_distr.py +51 -51
  26. AeroViz/dataProcess/VOC/__init__.py +7 -7
  27. AeroViz/dataProcess/VOC/_potential_par.py +53 -55
  28. AeroViz/dataProcess/VOC/voc_par.json +464 -0
  29. AeroViz/dataProcess/__init__.py +4 -4
  30. AeroViz/dataProcess/core/__init__.py +59 -58
  31. AeroViz/plot/__init__.py +6 -1
  32. AeroViz/plot/bar.py +126 -0
  33. AeroViz/plot/box.py +68 -0
  34. AeroViz/plot/distribution/distribution.py +421 -427
  35. AeroViz/plot/meteorology/meteorology.py +240 -292
  36. AeroViz/plot/optical/__init__.py +0 -1
  37. AeroViz/plot/optical/optical.py +230 -230
  38. AeroViz/plot/pie.py +198 -0
  39. AeroViz/plot/regression.py +210 -0
  40. AeroViz/plot/scatter.py +99 -0
  41. AeroViz/plot/templates/__init__.py +0 -3
  42. AeroViz/plot/templates/contour.py +25 -25
  43. AeroViz/plot/templates/corr_matrix.py +86 -93
  44. AeroViz/plot/templates/diurnal_pattern.py +24 -24
  45. AeroViz/plot/templates/koschmieder.py +106 -106
  46. AeroViz/plot/templates/metal_heatmap.py +34 -34
  47. AeroViz/plot/timeseries/timeseries.py +53 -60
  48. AeroViz/plot/utils/__init__.py +2 -1
  49. AeroViz/plot/utils/_color.py +57 -57
  50. AeroViz/plot/utils/_unit.py +48 -48
  51. AeroViz/plot/utils/fRH.json +390 -0
  52. AeroViz/plot/utils/plt_utils.py +92 -0
  53. AeroViz/plot/utils/sklearn_utils.py +49 -0
  54. AeroViz/plot/utils/units.json +84 -0
  55. AeroViz/plot/violin.py +79 -0
  56. AeroViz/process/__init__.py +15 -15
  57. AeroViz/process/core/DataProc.py +9 -9
  58. AeroViz/process/core/SizeDist.py +81 -81
  59. AeroViz/process/method/PyMieScatt_update.py +488 -488
  60. AeroViz/process/method/mie_theory.py +231 -229
  61. AeroViz/process/method/prop.py +40 -40
  62. AeroViz/process/script/AbstractDistCalc.py +103 -103
  63. AeroViz/process/script/Chemical.py +166 -166
  64. AeroViz/process/script/IMPACT.py +40 -40
  65. AeroViz/process/script/IMPROVE.py +152 -152
  66. AeroViz/process/script/Others.py +45 -45
  67. AeroViz/process/script/PSD.py +26 -26
  68. AeroViz/process/script/PSD_dry.py +69 -70
  69. AeroViz/process/script/retrieve_RI.py +50 -51
  70. AeroViz/rawDataReader/__init__.py +57 -57
  71. AeroViz/rawDataReader/core/__init__.py +328 -326
  72. AeroViz/rawDataReader/script/AE33.py +18 -18
  73. AeroViz/rawDataReader/script/AE43.py +20 -20
  74. AeroViz/rawDataReader/script/APS_3321.py +30 -30
  75. AeroViz/rawDataReader/script/Aurora.py +23 -23
  76. AeroViz/rawDataReader/script/BC1054.py +40 -40
  77. AeroViz/rawDataReader/script/EPA_vertical.py +9 -9
  78. AeroViz/rawDataReader/script/GRIMM.py +21 -21
  79. AeroViz/rawDataReader/script/IGAC_TH.py +67 -67
  80. AeroViz/rawDataReader/script/IGAC_ZM.py +59 -59
  81. AeroViz/rawDataReader/script/MA350.py +39 -39
  82. AeroViz/rawDataReader/script/NEPH.py +74 -74
  83. AeroViz/rawDataReader/script/OCEC_LCRES.py +21 -21
  84. AeroViz/rawDataReader/script/OCEC_RES.py +16 -16
  85. AeroViz/rawDataReader/script/SMPS_TH.py +25 -25
  86. AeroViz/rawDataReader/script/SMPS_aim11.py +32 -32
  87. AeroViz/rawDataReader/script/SMPS_genr.py +31 -31
  88. AeroViz/rawDataReader/script/TEOM.py +28 -28
  89. AeroViz/rawDataReader/script/Table.py +12 -12
  90. AeroViz/rawDataReader/script/VOC_TH.py +16 -16
  91. AeroViz/rawDataReader/script/VOC_ZM.py +28 -28
  92. AeroViz/rawDataReader/script/__init__.py +20 -20
  93. AeroViz/rawDataReader/utils/config.py +161 -161
  94. AeroViz/tools/database.py +65 -65
  95. AeroViz/tools/dataclassifier.py +106 -106
  96. AeroViz/tools/dataprinter.py +51 -51
  97. AeroViz/tools/datareader.py +38 -38
  98. {AeroViz-0.1.2.dist-info → AeroViz-0.1.3b0.dist-info}/METADATA +5 -4
  99. AeroViz-0.1.3b0.dist-info/RECORD +110 -0
  100. AeroViz/config/__init__.py +0 -0
  101. AeroViz/plot/improve/__init__.py +0 -1
  102. AeroViz/plot/improve/improve.py +0 -240
  103. AeroViz/plot/optical/aethalometer.py +0 -77
  104. AeroViz/plot/templates/event_evolution.py +0 -65
  105. AeroViz/plot/templates/regression.py +0 -256
  106. AeroViz/plot/templates/scatter.py +0 -130
  107. AeroViz/plot/templates/templates.py +0 -398
  108. AeroViz/plot/utils/_decorator.py +0 -74
  109. AeroViz-0.1.2.dist-info/RECORD +0 -106
  110. {AeroViz-0.1.2.dist-info → AeroViz-0.1.3b0.dist-info}/LICENSE +0 -0
  111. {AeroViz-0.1.2.dist-info → AeroViz-0.1.3b0.dist-info}/WHEEL +0 -0
  112. {AeroViz-0.1.2.dist-info → AeroViz-0.1.3b0.dist-info}/top_level.txt +0 -0
@@ -8,83 +8,83 @@ from AeroViz.rawDataReader.core import AbstractReader
8
8
 
9
9
 
10
10
  class Reader(AbstractReader):
11
- nam = 'IGAC_ZM'
11
+ nam = 'IGAC_ZM'
12
12
 
13
- def _raw_reader(self, _file):
13
+ def _raw_reader(self, _file):
14
14
 
15
- with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f:
16
- _df = read_csv(f, parse_dates=[0], index_col=[0], na_values=['-']).apply(to_numeric, errors='coerce')
15
+ with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f:
16
+ _df = read_csv(f, parse_dates=[0], index_col=[0], na_values=['-']).apply(to_numeric, errors='coerce')
17
17
 
18
- _df.columns = _df.keys().str.strip(' ')
19
- _df.index.name = 'time'
18
+ _df.columns = _df.keys().str.strip(' ')
19
+ _df.index.name = 'time'
20
20
 
21
- return _df.loc[_df.index.dropna()].loc[~_df.index.duplicated()]
21
+ return _df.loc[_df.index.dropna()].loc[~_df.index.duplicated()]
22
22
 
23
- ## QC data
24
- def _QC(self, _df):
23
+ ## QC data
24
+ def _QC(self, _df):
25
25
 
26
- ## QC parameter, function (MDL SE LE)
27
- _mdl = {
28
- 'Na+': 0.06,
29
- 'NH4+': 0.05,
30
- 'K+': 0.05,
31
- 'Mg2+': 0.12,
32
- 'Ca2+': 0.07,
33
- 'Cl-': 0.07,
34
- 'NO2-': 0.05,
35
- 'NO3-': 0.11,
36
- 'SO42-': 0.08,
37
- }
38
- _mdl.update(self._oth_set.get('mdl', {}))
26
+ ## QC parameter, function (MDL SE LE)
27
+ _mdl = {
28
+ 'Na+': 0.06,
29
+ 'NH4+': 0.05,
30
+ 'K+': 0.05,
31
+ 'Mg2+': 0.12,
32
+ 'Ca2+': 0.07,
33
+ 'Cl-': 0.07,
34
+ 'NO2-': 0.05,
35
+ 'NO3-': 0.11,
36
+ 'SO42-': 0.08,
37
+ }
38
+ _mdl.update(self._oth_set.get('mdl', {}))
39
39
 
40
- def _se_le(_df_, _log=False):
41
- _df_ = np.log10(_df_) if _log else _df_
40
+ def _se_le(_df_, _log=False):
41
+ _df_ = np.log10(_df_) if _log else _df_
42
42
 
43
- _df_qua = _df_.quantile([.25, .75])
44
- _df_q1, _df_q3 = _df_qua.loc[.25].copy(), _df_qua.loc[.75].copy()
45
- _df_iqr = _df_q3 - _df_q1
43
+ _df_qua = _df_.quantile([.25, .75])
44
+ _df_q1, _df_q3 = _df_qua.loc[.25].copy(), _df_qua.loc[.75].copy()
45
+ _df_iqr = _df_q3 - _df_q1
46
46
 
47
- _se = concat([_df_q1 - 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
48
- _le = concat([_df_q3 + 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
47
+ _se = concat([_df_q1 - 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
48
+ _le = concat([_df_q3 + 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
49
49
 
50
- if _log:
51
- return 10 ** _se, 10 ** _le
52
- return _se, _le
50
+ if _log:
51
+ return 10 ** _se, 10 ** _le
52
+ return _se, _le
53
53
 
54
- _cation, _anion, _main = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'], ['Cl-', 'NO2-', 'NO3-', 'SO42-', ], ['SO42-',
55
- 'NO3-',
56
- 'NH4+']
54
+ _cation, _anion, _main = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'], ['Cl-', 'NO2-', 'NO3-', 'SO42-', ], ['SO42-',
55
+ 'NO3-',
56
+ 'NH4+']
57
57
 
58
- _df_salt = _df[_mdl.keys()].copy()
59
- _df_pm = _df['PM2.5'].copy()
58
+ _df_salt = _df[_mdl.keys()].copy()
59
+ _df_pm = _df['PM2.5'].copy()
60
60
 
61
- ## lower than PM2.5
62
- ## conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
63
- _df_salt = _df_salt.mask(_df_salt.sum(axis=1, min_count=1) > _df_pm).dropna(subset=_main).copy()
61
+ ## lower than PM2.5
62
+ ## conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
63
+ _df_salt = _df_salt.mask(_df_salt.sum(axis=1, min_count=1) > _df_pm).dropna(subset=_main).copy()
64
64
 
65
- ## mdl
66
- for (_key, _df_col), _mdl_val in zip(_df_salt.items(), _mdl.values()):
67
- _df_salt[_key] = _df_col.mask(_df_col < _mdl_val, _mdl_val / 2)
65
+ ## mdl
66
+ for (_key, _df_col), _mdl_val in zip(_df_salt.items(), _mdl.values()):
67
+ _df_salt[_key] = _df_col.mask(_df_col < _mdl_val, _mdl_val / 2)
68
68
 
69
- ## calculate SE LE
70
- ## salt < LE
71
- _se, _le = _se_le(_df_salt, _log=True)
72
- _df_salt = _df_salt.mask(_df_salt > _le).copy()
69
+ ## calculate SE LE
70
+ ## salt < LE
71
+ _se, _le = _se_le(_df_salt, _log=True)
72
+ _df_salt = _df_salt.mask(_df_salt > _le).copy()
73
73
 
74
- ## C/A, A/C
75
- _rat_CA = (_df_salt[_cation].sum(axis=1) / _df_salt[_anion].sum(axis=1)).to_frame()
76
- _rat_AC = (1 / _rat_CA).copy()
74
+ ## C/A, A/C
75
+ _rat_CA = (_df_salt[_cation].sum(axis=1) / _df_salt[_anion].sum(axis=1)).to_frame()
76
+ _rat_AC = (1 / _rat_CA).copy()
77
77
 
78
- _se, _le = _se_le(_rat_CA, )
79
- _cond_CA = (_rat_CA < _le) & (_rat_CA > 0)
78
+ _se, _le = _se_le(_rat_CA, )
79
+ _cond_CA = (_rat_CA < _le) & (_rat_CA > 0)
80
80
 
81
- _se, _le = _se_le(_rat_AC, )
82
- _cond_AC = (_rat_AC < _le) & (_rat_AC > 0)
81
+ _se, _le = _se_le(_rat_AC, )
82
+ _cond_AC = (_rat_AC < _le) & (_rat_AC > 0)
83
83
 
84
- _df_salt = _df_salt.where((_cond_CA * _cond_AC)[0]).copy()
84
+ _df_salt = _df_salt.where((_cond_CA * _cond_AC)[0]).copy()
85
85
 
86
- ## conc. of main salt > SE
87
- _se, _le = _se_le(_df_salt[_main], _log=True)
88
- _df_salt[_main] = _df_salt[_main].mask(_df_salt[_main] < _se).copy()
86
+ ## conc. of main salt > SE
87
+ _se, _le = _se_le(_df_salt[_main], _log=True)
88
+ _df_salt[_main] = _df_salt[_main].mask(_df_salt[_main] < _se).copy()
89
89
 
90
- return _df_salt.reindex(_df.index)
90
+ return _df_salt.reindex(_df.index)
@@ -4,42 +4,42 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'MA350'
8
-
9
- def _raw_reader(self, _file):
10
- _df = read_csv(_file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis("Time")
11
-
12
- _df = _df.rename(columns={
13
- 'UV BCc': 'BC1',
14
- 'Blue BCc': 'BC2',
15
- 'Green BCc': 'BC3',
16
- 'Red BCc': 'BC4',
17
- 'IR BCc': 'BC5',
18
- 'Biomass BCc (ng/m^3)': 'BB mass',
19
- 'Fossil fuel BCc (ng/m^3)': 'FF mass',
20
- 'Delta-C (ng/m^3)': 'Delta-C',
21
- 'AAE': 'AAE',
22
- 'BB (%)': 'BB',
23
- })
24
-
25
- # remove data without Status=32 (Automatic Tape Advance), 65536 (Tape Move)
26
- # if not self._oth_set.get('ignore_err', False):
27
- # _df = _df.where((_df['Status'] != 32) | (_df['Status'] != 65536)).copy()
28
-
29
- return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE', 'BB']]
30
-
31
- # QC data
32
- def _QC(self, _df):
33
- # remove negative value
34
- _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'AAE', 'BB']].mask((_df < 0).copy())
35
-
36
- # call by _QC function
37
- # QC data in 1 hr
38
- def _QC_func(_df_1hr):
39
- _df_ave = _df_1hr.mean()
40
- _df_std = _df_1hr.std()
41
- _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
42
-
43
- return _df_1hr.mask(_df_lowb | _df_highb).copy()
44
-
45
- return _df.resample('1h', group_keys=False).apply(_QC_func).resample('5min').mean()
7
+ nam = 'MA350'
8
+
9
+ def _raw_reader(self, _file):
10
+ _df = read_csv(_file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis("Time")
11
+
12
+ _df = _df.rename(columns={
13
+ 'UV BCc': 'BC1',
14
+ 'Blue BCc': 'BC2',
15
+ 'Green BCc': 'BC3',
16
+ 'Red BCc': 'BC4',
17
+ 'IR BCc': 'BC5',
18
+ 'Biomass BCc (ng/m^3)': 'BB mass',
19
+ 'Fossil fuel BCc (ng/m^3)': 'FF mass',
20
+ 'Delta-C (ng/m^3)': 'Delta-C',
21
+ 'AAE': 'AAE',
22
+ 'BB (%)': 'BB',
23
+ })
24
+
25
+ # remove data without Status=32 (Automatic Tape Advance), 65536 (Tape Move)
26
+ # if not self._oth_set.get('ignore_err', False):
27
+ # _df = _df.where((_df['Status'] != 32) | (_df['Status'] != 65536)).copy()
28
+
29
+ return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE', 'BB']]
30
+
31
+ # QC data
32
+ def _QC(self, _df):
33
+ # remove negative value
34
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'AAE', 'BB']].mask((_df < 0).copy())
35
+
36
+ # call by _QC function
37
+ # QC data in 1 hr
38
+ def _QC_func(_df_1hr):
39
+ _df_ave = _df_1hr.mean()
40
+ _df_std = _df_1hr.std()
41
+ _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
42
+
43
+ return _df_1hr.mask(_df_lowb | _df_highb).copy()
44
+
45
+ return _df.resample('1h', group_keys=False).apply(_QC_func).resample('5min').mean()
@@ -4,77 +4,77 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'NEPH'
8
-
9
- def _raw_reader(self, _file):
10
- with _file.open('r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_csv(f, header=None, names=range(11))
12
-
13
- _df_grp = _df.groupby(0)
14
-
15
- # T : time
16
- _df_tm = _df_grp.get_group('T')[[1, 2, 3, 4, 5, 6]].astype(int)
17
-
18
- for _k in [2, 3, 4, 5, 6]:
19
- _df_tm[_k] = _df_tm[_k].astype(int).map('{:02d}'.format).copy()
20
- _df_tm = _df_tm.astype(str)
21
-
22
- _idx_tm = to_datetime((_df_tm[1] + _df_tm[2] + _df_tm[3] + _df_tm[4] + _df_tm[5] + _df_tm[6]),
23
- format='%Y%m%d%H%M%S')
24
-
25
- # D : data
26
- # col : 3~8 B G R BB BG BR
27
- # 1e6
28
- try:
29
- _df_dt = _df_grp.get_group('D')[[1, 2, 3, 4, 5, 6, 7, 8]].set_index(_idx_tm)
30
- _df_out = (_df_dt.groupby(1).get_group('NBXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm)
31
- _df_out.columns = ['B', 'G', 'R', 'BB', 'BG', 'BR']
32
- _df_out.index.name = 'Time'
33
-
34
- # Y : state
35
- # col : 5 RH
36
- _df_st = _df_grp.get_group('Y')
37
- _df_out['RH'] = _df_st[5].values
38
- _df_out['status'] = _df_st[9].values
39
-
40
- _df_out.mask(_df_out['status'] != 0) # 0000 -> numeric to 0
41
-
42
- return _df_out[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
43
-
44
- except ValueError:
45
- group_sizes = _df_grp.size()
46
- print(group_sizes)
47
- # Define the valid groups
48
- valid_groups = {'B', 'G', 'R', 'D', 'T', 'Y', 'Z'}
49
-
50
- # Find the rows where the value in the first column is not in valid_groups
51
- invalid_indices = _df[~_df[0].isin(valid_groups)].index
52
-
53
- # Print the invalid indices and their corresponding values
54
- invalid_values = _df.loc[invalid_indices, 0]
55
- print("Invalid values and their indices:")
56
- for idx, value in zip(invalid_indices, invalid_values):
57
- print(f"Index: {idx}, Value: {value}")
58
-
59
- # If there's a length mismatch, return an empty DataFrame with the same index and column names
60
- columns = ['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']
61
- _df_out = DataFrame(index=_idx_tm, columns=columns)
62
- _df_out.index.name = 'Time'
63
- print(f'\n\t\t\t Length mismatch in {_file} data. Returning an empty DataFrame.')
64
- return _df_out
65
-
66
- # QC data
67
- def _QC(self, _df):
68
- # remove negative value
69
- _df = _df.mask((_df <= 0).copy())
70
-
71
- # call by _QC function
72
- # QC data in 1 hr
73
- def _QC_func(_df_1hr):
74
- _df_ave = _df_1hr.mean()
75
- _df_std = _df_1hr.std()
76
- _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
77
-
78
- return _df_1hr.mask(_df_lowb | _df_highb).copy()
79
-
80
- return _df.resample('1h', group_keys=False).apply(_QC_func)
7
+ nam = 'NEPH'
8
+
9
+ def _raw_reader(self, _file):
10
+ with _file.open('r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_csv(f, header=None, names=range(11))
12
+
13
+ _df_grp = _df.groupby(0)
14
+
15
+ # T : time
16
+ _df_tm = _df_grp.get_group('T')[[1, 2, 3, 4, 5, 6]].astype(int)
17
+
18
+ for _k in [2, 3, 4, 5, 6]:
19
+ _df_tm[_k] = _df_tm[_k].astype(int).map('{:02d}'.format).copy()
20
+ _df_tm = _df_tm.astype(str)
21
+
22
+ _idx_tm = to_datetime((_df_tm[1] + _df_tm[2] + _df_tm[3] + _df_tm[4] + _df_tm[5] + _df_tm[6]),
23
+ format='%Y%m%d%H%M%S')
24
+
25
+ # D : data
26
+ # col : 3~8 B G R BB BG BR
27
+ # 1e6
28
+ try:
29
+ _df_dt = _df_grp.get_group('D')[[1, 2, 3, 4, 5, 6, 7, 8]].set_index(_idx_tm)
30
+ _df_out = (_df_dt.groupby(1).get_group('NBXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm)
31
+ _df_out.columns = ['B', 'G', 'R', 'BB', 'BG', 'BR']
32
+ _df_out.index.name = 'Time'
33
+
34
+ # Y : state
35
+ # col : 5 RH
36
+ _df_st = _df_grp.get_group('Y')
37
+ _df_out['RH'] = _df_st[5].values
38
+ _df_out['status'] = _df_st[9].values
39
+
40
+ _df_out.mask(_df_out['status'] != 0) # 0000 -> numeric to 0
41
+
42
+ return _df_out[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
43
+
44
+ except ValueError:
45
+ group_sizes = _df_grp.size()
46
+ print(group_sizes)
47
+ # Define the valid groups
48
+ valid_groups = {'B', 'G', 'R', 'D', 'T', 'Y', 'Z'}
49
+
50
+ # Find the rows where the value in the first column is not in valid_groups
51
+ invalid_indices = _df[~_df[0].isin(valid_groups)].index
52
+
53
+ # Print the invalid indices and their corresponding values
54
+ invalid_values = _df.loc[invalid_indices, 0]
55
+ print("Invalid values and their indices:")
56
+ for idx, value in zip(invalid_indices, invalid_values):
57
+ print(f"Index: {idx}, Value: {value}")
58
+
59
+ # If there's a length mismatch, return an empty DataFrame with the same index and column names
60
+ columns = ['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']
61
+ _df_out = DataFrame(index=_idx_tm, columns=columns)
62
+ _df_out.index.name = 'Time'
63
+ print(f'\n\t\t\t Length mismatch in {_file} data. Returning an empty DataFrame.')
64
+ return _df_out
65
+
66
+ # QC data
67
+ def _QC(self, _df):
68
+ # remove negative value
69
+ _df = _df.mask((_df <= 0).copy())
70
+
71
+ # call by _QC function
72
+ # QC data in 1 hr
73
+ def _QC_func(_df_1hr):
74
+ _df_ave = _df_1hr.mean()
75
+ _df_std = _df_1hr.std()
76
+ _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
77
+
78
+ return _df_1hr.mask(_df_lowb | _df_highb).copy()
79
+
80
+ return _df.resample('1h', group_keys=False).apply(_QC_func)
@@ -4,31 +4,31 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'OCEC_LCRES'
7
+ nam = 'OCEC_LCRES'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_csv(f, skiprows=3)
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_csv(f, skiprows=3)
12
12
 
13
- _col = {'Thermal/Optical OC (ugC/LCm^3)': 'Thermal_OC',
14
- 'Thermal/Optical EC (ugC/LCm^3)': 'Thermal_EC',
15
- 'OC=TC-BC (ugC/LCm^3)': 'Optical_OC',
16
- 'BC (ugC/LCm^3)': 'Optical_EC',
17
- 'Sample Volume Local Condition Actual m^3': 'Sample_Volume',
18
- 'TC (ugC/LCm^3)': 'TC', }
13
+ _col = {'Thermal/Optical OC (ugC/LCm^3)': 'Thermal_OC',
14
+ 'Thermal/Optical EC (ugC/LCm^3)': 'Thermal_EC',
15
+ 'OC=TC-BC (ugC/LCm^3)': 'Optical_OC',
16
+ 'BC (ugC/LCm^3)': 'Optical_EC',
17
+ 'Sample Volume Local Condition Actual m^3': 'Sample_Volume',
18
+ 'TC (ugC/LCm^3)': 'TC', }
19
19
 
20
- _tm_idx = to_datetime(_df['Start Date/Time'], errors='coerce')
21
- _df['time'] = _tm_idx
20
+ _tm_idx = to_datetime(_df['Start Date/Time'], errors='coerce')
21
+ _df['time'] = _tm_idx
22
22
 
23
- _df = _df.dropna(subset='time').loc[~_tm_idx.duplicated()].set_index('time')
23
+ _df = _df.dropna(subset='time').loc[~_tm_idx.duplicated()].set_index('time')
24
24
 
25
- return _df[_col.keys()].rename(columns=_col)
25
+ return _df[_col.keys()].rename(columns=_col)
26
26
 
27
- ## QC data
28
- def _QC(self, _df):
29
- _df[['Thermal_OC', 'Optical_OC']] = _df[['Thermal_OC', 'Optical_OC']].where(
30
- _df[['Thermal_OC', 'Optical_OC']] > 0.3).copy()
31
- _df[['Thermal_EC', 'Optical_EC']] = _df[['Thermal_EC', 'Optical_EC']].where(
32
- _df[['Thermal_EC', 'Optical_EC']] > .015).copy()
27
+ ## QC data
28
+ def _QC(self, _df):
29
+ _df[['Thermal_OC', 'Optical_OC']] = _df[['Thermal_OC', 'Optical_OC']].where(
30
+ _df[['Thermal_OC', 'Optical_OC']] > 0.3).copy()
31
+ _df[['Thermal_EC', 'Optical_EC']] = _df[['Thermal_EC', 'Optical_EC']].where(
32
+ _df[['Thermal_EC', 'Optical_EC']] > .015).copy()
33
33
 
34
- return _df
34
+ return _df
@@ -4,25 +4,25 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'OCEC_RES'
7
+ nam = 'OCEC_RES'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_csv(f, skiprows=3)
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_csv(f, skiprows=3)
12
12
 
13
- _col = {'OCPk1-ug C': 'OC1',
14
- 'OCPk2-ug C': 'OC2',
15
- 'OCPk3-ug C': 'OC3',
16
- 'OCPk4-ug C': 'OC4',
17
- 'Pyrolized C ug': 'PC', }
13
+ _col = {'OCPk1-ug C': 'OC1',
14
+ 'OCPk2-ug C': 'OC2',
15
+ 'OCPk3-ug C': 'OC3',
16
+ 'OCPk4-ug C': 'OC4',
17
+ 'Pyrolized C ug': 'PC', }
18
18
 
19
- _tm_idx = to_datetime(_df['Start Date/Time'], errors='coerce')
20
- _df['time'] = _tm_idx
19
+ _tm_idx = to_datetime(_df['Start Date/Time'], errors='coerce')
20
+ _df['time'] = _tm_idx
21
21
 
22
- _df = _df.dropna(subset='time').loc[~_tm_idx.duplicated()].set_index('time')
22
+ _df = _df.dropna(subset='time').loc[~_tm_idx.duplicated()].set_index('time')
23
23
 
24
- return _df[_col.keys()].rename(columns=_col)
24
+ return _df[_col.keys()].rename(columns=_col)
25
25
 
26
- ## QC data
27
- def _QC(self, _df):
28
- return _df.where(_df > 0)
26
+ ## QC data
27
+ def _QC(self, _df):
28
+ return _df.where(_df > 0)
@@ -4,38 +4,38 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'SMPS_TH'
7
+ nam = 'SMPS_TH'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_table(f, skiprows=18, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
12
- _key = list(_df.keys()[6:-26])
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_table(f, skiprows=18, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
12
+ _key = list(_df.keys()[6:-26])
13
13
 
14
- _newkey = {}
15
- for _k in _key:
16
- _newkey[_k] = float(_k).__round__(4)
14
+ _newkey = {}
15
+ for _k in _key:
16
+ _newkey[_k] = float(_k).__round__(4)
17
17
 
18
- # _newkey['Total Conc.(#/cm)'] = 'total'
19
- # _newkey['Mode(nm)'] = 'mode'
18
+ # _newkey['Total Conc.(#/cm)'] = 'total'
19
+ # _newkey['Mode(nm)'] = 'mode'
20
20
 
21
- _df_idx = to_datetime(_df.index, errors='coerce')
22
- return _df[_newkey.keys()].rename(_newkey, axis=1).set_index(_df_idx).loc[_df_idx.dropna()]
21
+ _df_idx = to_datetime(_df.index, errors='coerce')
22
+ return _df[_newkey.keys()].rename(_newkey, axis=1).set_index(_df_idx).loc[_df_idx.dropna()]
23
23
 
24
- ## QC data
25
- def _QC(self, _df):
26
- import numpy as n
24
+ ## QC data
25
+ def _QC(self, _df):
26
+ import numpy as n
27
27
 
28
- ## mask out the data size lower than 7
29
- _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
30
- _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
31
- _df = _df.mask(_df_size < 7)
28
+ ## mask out the data size lower than 7
29
+ _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
30
+ _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
31
+ _df = _df.mask(_df_size < 7)
32
32
 
33
- ## remove total conc. lower than 2000
34
- _df = _df.mask(_df['total'] < 2000)
33
+ ## remove total conc. lower than 2000
34
+ _df = _df.mask(_df['total'] < 2000)
35
35
 
36
- ## remove the bin over 400 nm which num. conc. larger than 4000
37
- _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
36
+ ## remove the bin over 400 nm which num. conc. larger than 4000
37
+ _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
38
38
 
39
- _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
39
+ _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
40
40
 
41
- return _df[_df.keys()[:-1]]
41
+ return _df[_df.keys()[:-1]]
@@ -4,48 +4,48 @@ from AeroViz.rawDataReader.core import AbstractReader
4
4
 
5
5
 
6
6
  class Reader(AbstractReader):
7
- nam = 'SMPS_aim11'
7
+ nam = 'SMPS_aim11'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
11
 
12
- skiprows = 0
13
- for _line in f:
12
+ skiprows = 0
13
+ for _line in f:
14
14
 
15
- if _line.split(',')[0] == 'Scan Number':
16
- f.seek(0)
17
- break
15
+ if _line.split(',')[0] == 'Scan Number':
16
+ f.seek(0)
17
+ break
18
18
 
19
- skiprows += 1
20
- # breakpoint()
21
- _df = read_csv(f, skiprows=skiprows)
22
- _tm_idx = to_datetime(_df['DateTime Sample Start'], format='%d/%m/%Y %X', errors='coerce')
19
+ skiprows += 1
20
+ # breakpoint()
21
+ _df = read_csv(f, skiprows=skiprows)
22
+ _tm_idx = to_datetime(_df['DateTime Sample Start'], format='%d/%m/%Y %X', errors='coerce')
23
23
 
24
- ## index
25
- _df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()]
24
+ ## index
25
+ _df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()]
26
26
 
27
- ## keys
28
- _key = to_numeric(_df.keys(), errors='coerce')
29
- _df.columns = _key
30
- _df = _df.loc[:, ~_key.isna()]
27
+ ## keys
28
+ _key = to_numeric(_df.keys(), errors='coerce')
29
+ _df.columns = _key
30
+ _df = _df.loc[:, ~_key.isna()]
31
31
 
32
- return _df.apply(to_numeric, errors='coerce')
32
+ return _df.apply(to_numeric, errors='coerce')
33
33
 
34
- ## QC data
35
- def _QC(self, _df):
36
- import numpy as n
34
+ ## QC data
35
+ def _QC(self, _df):
36
+ import numpy as n
37
37
 
38
- ## mask out the data size lower than 7
39
- _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
40
- _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
41
- _df = _df.mask(_df_size < 7)
38
+ ## mask out the data size lower than 7
39
+ _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
40
+ _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
41
+ _df = _df.mask(_df_size < 7)
42
42
 
43
- ## remove total conc. lower than 2000
44
- _df = _df.mask(_df['total'] < 2000)
43
+ ## remove total conc. lower than 2000
44
+ _df = _df.mask(_df['total'] < 2000)
45
45
 
46
- ## remove the bin over 400 nm which num. conc. larger than 4000
47
- _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
46
+ ## remove the bin over 400 nm which num. conc. larger than 4000
47
+ _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
48
48
 
49
- _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
49
+ _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
50
50
 
51
- return _df[_df.keys()[:-1]]
51
+ return _df[_df.keys()[:-1]]