AeroViz 0.1.3b0__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (81) hide show
  1. AeroViz/__init__.py +5 -3
  2. AeroViz/{config → data}/DEFAULT_DATA.csv +1 -1
  3. AeroViz/dataProcess/Chemistry/__init__.py +7 -7
  4. AeroViz/dataProcess/Chemistry/_isoropia.py +5 -2
  5. AeroViz/dataProcess/Chemistry/_mass_volume.py +15 -18
  6. AeroViz/dataProcess/Chemistry/_ocec.py +2 -2
  7. AeroViz/dataProcess/Chemistry/_teom.py +2 -1
  8. AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
  9. AeroViz/dataProcess/Optical/Angstrom_exponent.py +20 -0
  10. AeroViz/dataProcess/Optical/_IMPROVE.py +13 -15
  11. AeroViz/dataProcess/Optical/__init__.py +15 -30
  12. AeroViz/dataProcess/Optical/_absorption.py +21 -47
  13. AeroViz/dataProcess/Optical/_extinction.py +20 -15
  14. AeroViz/dataProcess/Optical/_mie.py +0 -1
  15. AeroViz/dataProcess/Optical/_scattering.py +19 -20
  16. AeroViz/dataProcess/SizeDistr/__init__.py +7 -7
  17. AeroViz/dataProcess/SizeDistr/_merge.py +2 -2
  18. AeroViz/dataProcess/SizeDistr/_merge_v1.py +2 -2
  19. AeroViz/dataProcess/SizeDistr/_merge_v2.py +2 -2
  20. AeroViz/dataProcess/SizeDistr/_merge_v3.py +1 -1
  21. AeroViz/dataProcess/SizeDistr/_merge_v4.py +1 -1
  22. AeroViz/dataProcess/VOC/__init__.py +3 -3
  23. AeroViz/dataProcess/__init__.py +28 -6
  24. AeroViz/dataProcess/core/__init__.py +10 -17
  25. AeroViz/plot/__init__.py +1 -1
  26. AeroViz/plot/box.py +2 -1
  27. AeroViz/plot/optical/optical.py +4 -4
  28. AeroViz/plot/regression.py +25 -39
  29. AeroViz/plot/scatter.py +68 -2
  30. AeroViz/plot/templates/__init__.py +2 -1
  31. AeroViz/plot/templates/ammonium_rich.py +34 -0
  32. AeroViz/plot/templates/diurnal_pattern.py +11 -9
  33. AeroViz/plot/templates/koschmieder.py +51 -115
  34. AeroViz/plot/templates/metal_heatmap.py +115 -17
  35. AeroViz/plot/timeseries/__init__.py +1 -0
  36. AeroViz/plot/timeseries/template.py +47 -0
  37. AeroViz/plot/timeseries/timeseries.py +275 -208
  38. AeroViz/plot/utils/plt_utils.py +2 -2
  39. AeroViz/plot/utils/units.json +5 -0
  40. AeroViz/plot/violin.py +9 -8
  41. AeroViz/process/__init__.py +2 -2
  42. AeroViz/process/script/AbstractDistCalc.py +1 -1
  43. AeroViz/process/script/Chemical.py +5 -4
  44. AeroViz/process/script/Others.py +1 -1
  45. AeroViz/rawDataReader/__init__.py +17 -22
  46. AeroViz/rawDataReader/{utils/config.py → config/supported_instruments.py} +38 -52
  47. AeroViz/rawDataReader/core/__init__.py +104 -229
  48. AeroViz/rawDataReader/script/AE33.py +10 -11
  49. AeroViz/rawDataReader/script/AE43.py +8 -11
  50. AeroViz/rawDataReader/script/APS_3321.py +6 -6
  51. AeroViz/rawDataReader/script/Aurora.py +18 -19
  52. AeroViz/rawDataReader/script/BC1054.py +11 -15
  53. AeroViz/rawDataReader/script/EPA_vertical.py +35 -7
  54. AeroViz/rawDataReader/script/GRIMM.py +2 -9
  55. AeroViz/rawDataReader/script/{IGAC_ZM.py → IGAC.py} +17 -17
  56. AeroViz/rawDataReader/script/MA350.py +7 -14
  57. AeroViz/rawDataReader/script/Minion.py +103 -0
  58. AeroViz/rawDataReader/script/NEPH.py +24 -29
  59. AeroViz/rawDataReader/script/SMPS_TH.py +4 -4
  60. AeroViz/rawDataReader/script/SMPS_aim11.py +6 -6
  61. AeroViz/rawDataReader/script/SMPS_genr.py +6 -6
  62. AeroViz/rawDataReader/script/Sunset_OCEC.py +60 -0
  63. AeroViz/rawDataReader/script/TEOM.py +8 -6
  64. AeroViz/rawDataReader/script/Table.py +7 -8
  65. AeroViz/rawDataReader/script/VOC.py +26 -0
  66. AeroViz/rawDataReader/script/__init__.py +10 -12
  67. AeroViz/tools/database.py +7 -9
  68. AeroViz/tools/datareader.py +3 -3
  69. {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.4.dist-info}/METADATA +1 -1
  70. AeroViz-0.1.4.dist-info/RECORD +112 -0
  71. AeroViz/rawDataReader/script/IGAC_TH.py +0 -104
  72. AeroViz/rawDataReader/script/OCEC_LCRES.py +0 -34
  73. AeroViz/rawDataReader/script/OCEC_RES.py +0 -28
  74. AeroViz/rawDataReader/script/VOC_TH.py +0 -30
  75. AeroViz/rawDataReader/script/VOC_ZM.py +0 -37
  76. AeroViz-0.1.3b0.dist-info/RECORD +0 -110
  77. /AeroViz/{config → data}/DEFAULT_PNSD_DATA.csv +0 -0
  78. /AeroViz/rawDataReader/{utils → config}/__init__.py +0 -0
  79. {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.4.dist-info}/LICENSE +0 -0
  80. {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.4.dist-info}/WHEEL +0 -0
  81. {AeroViz-0.1.3b0.dist-info → AeroViz-0.1.4.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ import numpy as np
1
2
  from pandas import read_csv, to_numeric
2
3
 
3
4
  from AeroViz.rawDataReader.core import AbstractReader
@@ -7,12 +8,39 @@ class Reader(AbstractReader):
7
8
  nam = 'EPA_vertical'
8
9
 
9
10
  def _raw_reader(self, _file):
10
- with _file.open('r', encoding='big5', errors='ignore') as f:
11
- _df = read_csv(f, names=['time', 'station', 'comp', 'data', None], skiprows=1, na_values=['-'],
12
- parse_dates=['time'], index_col='time')
13
- _df['data'] = to_numeric(_df['data'], errors='coerce')
11
+ with _file.open('r', encoding='ascii', errors='ignore') as f:
12
+ # 有、無輸出有效值都可以
13
+ # read 查詢小時值(測項).csv
14
+ df = read_csv(f, encoding='ascii', encoding_errors='ignore', index_col=0, parse_dates=True,
15
+ usecols=lambda col: col != 'Unnamed: 1')
14
16
 
15
- _df_piv = _df.pivot_table(values='data', columns='comp', index='time')
16
- _df_piv.index.name = 'time'
17
+ df.index.name = 'Time'
18
+ df.rename(columns={'AMB_TEMP': 'AT', 'WIND_SPEED': 'WS', 'WIND_DIREC': 'WD'}, inplace=True)
17
19
 
18
- return _df_piv
20
+ # 欄位排序
21
+ desired_order = ['SO2', 'NO', 'NOx', 'NO2', 'CO', 'O3', 'THC', 'NMHC', 'CH4', 'PM10', 'PM2.5', 'WS', 'WD',
22
+ 'AT', 'RH']
23
+
24
+ missing_columns = []
25
+
26
+ for col in desired_order:
27
+ if col not in df.columns:
28
+ df[col] = np.nan
29
+ missing_columns.append(col)
30
+
31
+ if missing_columns:
32
+ self.logger.info(f"{'=' * 60}")
33
+ self.logger.info(f"Missing columns: {missing_columns}")
34
+ self.logger.info(f"{'=' * 60}")
35
+ print(f"Missing columns: {missing_columns}")
36
+
37
+ df = df[desired_order]
38
+
39
+ # 如果沒有將無效值拿掉就輸出 請將包含 #、L、O 的字串替換成 *
40
+ df.replace(to_replace=r'\d*[#LO]\b', value='*', regex=True, inplace=True)
41
+ df = df.apply(to_numeric, errors='coerce')
42
+
43
+ return df
44
+
45
+ def _QC(self, _df):
46
+ return _df
@@ -24,12 +24,5 @@ class Reader(AbstractReader):
24
24
  return _df / 0.035
25
25
 
26
26
  def _QC(self, _df):
27
- # QC data in 1 hr
28
- def _QC_func(_df_1hr):
29
- _df_ave = _df_1hr.mean()
30
- _df_std = _df_1hr.std()
31
- _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
32
-
33
- return _df_1hr.mask(_df_lowb | _df_highb).copy()
34
-
35
- return _df.resample('5min').apply(_QC_func).resample('1h').mean()
27
+ # QC data in 1h
28
+ return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -8,22 +8,21 @@ from AeroViz.rawDataReader.core import AbstractReader
8
8
 
9
9
 
10
10
  class Reader(AbstractReader):
11
- nam = 'IGAC_ZM'
11
+ nam = 'IGAC'
12
12
 
13
13
  def _raw_reader(self, _file):
14
14
 
15
- with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f:
16
- _df = read_csv(f, parse_dates=[0], index_col=[0], na_values=['-']).apply(to_numeric, errors='coerce')
15
+ with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
16
+ _df = read_csv(f, parse_dates=True, index_col=0, na_values='-').apply(to_numeric, errors='coerce')
17
17
 
18
18
  _df.columns = _df.keys().str.strip(' ')
19
19
  _df.index.name = 'time'
20
20
 
21
- return _df.loc[_df.index.dropna()].loc[~_df.index.duplicated()]
21
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
22
22
 
23
- ## QC data
24
23
  def _QC(self, _df):
25
24
 
26
- ## QC parameter, function (MDL SE LE)
25
+ # QC parameter, function (MDL SE LE)
27
26
  _mdl = {
28
27
  'Na+': 0.06,
29
28
  'NH4+': 0.05,
@@ -35,7 +34,8 @@ class Reader(AbstractReader):
35
34
  'NO3-': 0.11,
36
35
  'SO42-': 0.08,
37
36
  }
38
- _mdl.update(self._oth_set.get('mdl', {}))
37
+
38
+ # _mdl.update(self._oth_set.get('mdl', {}))
39
39
 
40
40
  def _se_le(_df_, _log=False):
41
41
  _df_ = np.log10(_df_) if _log else _df_
@@ -51,27 +51,27 @@ class Reader(AbstractReader):
51
51
  return 10 ** _se, 10 ** _le
52
52
  return _se, _le
53
53
 
54
- _cation, _anion, _main = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'], ['Cl-', 'NO2-', 'NO3-', 'SO42-', ], ['SO42-',
55
- 'NO3-',
56
- 'NH4+']
54
+ _cation, _anion, _main = (['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'],
55
+ ['Cl-', 'NO2-', 'NO3-', 'SO42-', ],
56
+ ['SO42-', 'NO3-', 'NH4+'])
57
57
 
58
58
  _df_salt = _df[_mdl.keys()].copy()
59
59
  _df_pm = _df['PM2.5'].copy()
60
60
 
61
- ## lower than PM2.5
62
- ## conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
61
+ # lower than PM2.5
62
+ # conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
63
63
  _df_salt = _df_salt.mask(_df_salt.sum(axis=1, min_count=1) > _df_pm).dropna(subset=_main).copy()
64
64
 
65
- ## mdl
65
+ # mdl
66
66
  for (_key, _df_col), _mdl_val in zip(_df_salt.items(), _mdl.values()):
67
67
  _df_salt[_key] = _df_col.mask(_df_col < _mdl_val, _mdl_val / 2)
68
68
 
69
- ## calculate SE LE
70
- ## salt < LE
69
+ # calculate SE LE
70
+ # salt < LE
71
71
  _se, _le = _se_le(_df_salt, _log=True)
72
72
  _df_salt = _df_salt.mask(_df_salt > _le).copy()
73
73
 
74
- ## C/A, A/C
74
+ # C/A, A/C
75
75
  _rat_CA = (_df_salt[_cation].sum(axis=1) / _df_salt[_anion].sum(axis=1)).to_frame()
76
76
  _rat_AC = (1 / _rat_CA).copy()
77
77
 
@@ -83,7 +83,7 @@ class Reader(AbstractReader):
83
83
 
84
84
  _df_salt = _df_salt.where((_cond_CA * _cond_AC)[0]).copy()
85
85
 
86
- ## conc. of main salt > SE
86
+ # conc. of main salt > SE
87
87
  _se, _le = _se_le(_df_salt[_main], _log=True)
88
88
  _df_salt[_main] = _df_salt[_main].mask(_df_salt[_main] < _se).copy()
89
89
 
@@ -22,24 +22,17 @@ class Reader(AbstractReader):
22
22
  'BB (%)': 'BB',
23
23
  })
24
24
 
25
- # remove data without Status=32 (Automatic Tape Advance), 65536 (Tape Move)
26
- # if not self._oth_set.get('ignore_err', False):
27
- # _df = _df.where((_df['Status'] != 32) | (_df['Status'] != 65536)).copy()
25
+ # if self.meta.get('error_state', False):
26
+ # _df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
28
27
 
29
- return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE', 'BB']]
28
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE', 'BB']]
29
+
30
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
30
31
 
31
32
  # QC data
32
33
  def _QC(self, _df):
33
34
  # remove negative value
34
35
  _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'AAE', 'BB']].mask((_df < 0).copy())
35
36
 
36
- # call by _QC function
37
- # QC data in 1 hr
38
- def _QC_func(_df_1hr):
39
- _df_ave = _df_1hr.mean()
40
- _df_std = _df_1hr.std()
41
- _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
42
-
43
- return _df_1hr.mask(_df_lowb | _df_highb).copy()
44
-
45
- return _df.resample('1h', group_keys=False).apply(_QC_func).resample('5min').mean()
37
+ # QC data in 1h
38
+ return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -0,0 +1,103 @@
1
+ import numpy as np
2
+ from pandas import read_csv, to_datetime, to_numeric
3
+
4
+ from AeroViz.rawDataReader.core import AbstractReader
5
+
6
+
7
+ class Reader(AbstractReader):
8
+ nam = 'Minion'
9
+
10
+ def _raw_reader(self, _file):
11
+ with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
12
+ _df = read_csv(f, low_memory=False, index_col=0)
13
+
14
+ _df.index = to_datetime(_df.index, errors='coerce')
15
+ _df.index.name = 'time'
16
+
17
+ _df.columns = _df.keys().str.strip(' ')
18
+
19
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
20
+
21
+ def _QC(self, _df):
22
+ # XRF QAQC
23
+ _df = self.XRF_QAQC(_df)
24
+
25
+ # ions balance
26
+ _df = self.ions_balance(_df)
27
+
28
+ # remove negative value
29
+ _df = _df.mask((_df < 0).copy())
30
+
31
+ # QC data in 6h
32
+ return _df.resample('6h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
33
+
34
+ # base on Xact 625i Minimum Decision Limit (MDL) for XRF in ng/m3, 60 min sample time
35
+ def XRF_QAQC(self, df):
36
+ MDL = {
37
+ 'Al': 100, 'Si': 18, 'P': 5.2, 'S': 3.2,
38
+ 'Cl': 1.7, 'K': 1.2, 'Ca': 0.3, 'Ti': 1.6,
39
+ 'V': 0.12, 'Cr': 0.12, 'Mn': 0.14, 'Fe': 0.17,
40
+ 'Co': 0.14, 'Ni': 0.096, 'Cu': 0.079, 'Zn': 0.067,
41
+ 'Ga': 0.059, 'Ge': 0.056, 'As': 0.063, 'Se': 0.081,
42
+ 'Br': 0.1, 'Rb': 0.19, 'Sr': 0.22, 'Y': 0.28,
43
+ 'Zr': 0.33, 'Nb': 0.41, 'Mo': 0.48, 'Ag': 1.9,
44
+ 'Cd': 2.5, 'In': 3.1, 'Sn': 4.1, 'Sb': 5.2,
45
+ 'Te': 0.6, 'I': 0.49, 'Cs': 0.37, 'Ba': 0.39,
46
+ 'La': 0.36, 'Ce': 0.3, 'Pt': 0.12, 'Au': 0.1,
47
+ 'Hg': 0.12, 'Tl': 0.12, 'Pb': 0.13, 'Bi': 0.13
48
+ }
49
+ # 將小於 MDL 值的數據替換為 NaN
50
+ for element, threshold in MDL.items():
51
+ if element in df.columns:
52
+ df[element] = df[element].where(df[element] >= threshold, np.nan)
53
+
54
+ self.logger.info(f"{'=' * 60}")
55
+ self.logger.info(f"XRF QAQC summary:")
56
+ self.logger.info("\t\ttransform values below MDL to NaN")
57
+ self.logger.info(f"{'=' * 60}")
58
+
59
+ return df
60
+
61
+ def ions_balance(self, df, tolerance=0.3):
62
+ """
63
+ Calculate the balance of ions in the system
64
+ """
65
+ # Define the ions
66
+ item = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+', 'F-', 'Cl-', 'NO2-', 'NO3-', 'PO43-', 'SO42-']
67
+
68
+ # Calculate the balance
69
+ _df = df[item].copy()
70
+ _df = _df.apply(lambda x: to_numeric(x, errors='coerce'))
71
+ _df['+_mole'] = _df[['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+']].div([23, 18, 39, (24 / 2), (40 / 2)]).sum(axis=1,
72
+ skipna=True)
73
+ _df['-_mole'] = _df[['Cl-', 'NO2-', 'NO3-', 'SO42-']].div([35.5, 46, 62, (96 / 2)]).sum(axis=1, skipna=True)
74
+
75
+ # Avoid division by zero
76
+ _df['ratio'] = np.where(_df['-_mole'] != 0, _df['+_mole'] / _df['-_mole'], np.nan)
77
+
78
+ # Calculate bounds
79
+ lower_bound, upper_bound = 1 - tolerance, 1 + tolerance
80
+
81
+ # 根据ratio决定是否保留原始数据
82
+ valid_mask = (
83
+ (_df['ratio'] <= upper_bound) &
84
+ (_df['ratio'] >= lower_bound) &
85
+ ~np.isnan(_df['+_mole']) &
86
+ ~np.isnan(_df['-_mole'])
87
+ )
88
+
89
+ # 保留数据或将不符合条件的行设为NaN
90
+ df.loc[~valid_mask, item] = np.nan
91
+
92
+ # 计算保留的数据的百分比
93
+ retained_percentage = (valid_mask.sum() / len(df)) * 100
94
+
95
+ self.logger.info(f"{'=' * 60}")
96
+ self.logger.info(f"Ions balance summary:")
97
+ self.logger.info(f"\t\tretain {retained_percentage.__round__(0)}% data within tolerance {tolerance}")
98
+ self.logger.info(f"{'=' * 60}")
99
+
100
+ if retained_percentage < 70:
101
+ self.logger.warning("Warning: The percentage of retained data is less than 70%")
102
+
103
+ return df
@@ -13,21 +13,24 @@ class Reader(AbstractReader):
13
13
  _df_grp = _df.groupby(0)
14
14
 
15
15
  # T : time
16
- _df_tm = _df_grp.get_group('T')[[1, 2, 3, 4, 5, 6]].astype(int)
17
-
18
- for _k in [2, 3, 4, 5, 6]:
19
- _df_tm[_k] = _df_tm[_k].astype(int).map('{:02d}'.format).copy()
20
- _df_tm = _df_tm.astype(str)
21
-
22
- _idx_tm = to_datetime((_df_tm[1] + _df_tm[2] + _df_tm[3] + _df_tm[4] + _df_tm[5] + _df_tm[6]),
23
- format='%Y%m%d%H%M%S')
16
+ _idx_tm = to_datetime(
17
+ _df_grp.get_group('T')[[1, 2, 3, 4, 5, 6]]
18
+ .map(lambda x: f"{int(x):02d}")
19
+ .agg(''.join, axis=1),
20
+ format='%Y%m%d%H%M%S'
21
+ )
24
22
 
25
23
  # D : data
26
24
  # col : 3~8 B G R BB BG BR
27
25
  # 1e6
28
26
  try:
29
27
  _df_dt = _df_grp.get_group('D')[[1, 2, 3, 4, 5, 6, 7, 8]].set_index(_idx_tm)
30
- _df_out = (_df_dt.groupby(1).get_group('NBXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm)
28
+
29
+ try:
30
+ _df_out = (_df_dt.groupby(1).get_group('NBXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm)
31
+ except KeyError:
32
+ _df_out = (_df_dt.groupby(1).get_group('NTXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm)
33
+
31
34
  _df_out.columns = ['B', 'G', 'R', 'BB', 'BG', 'BR']
32
35
  _df_out.index.name = 'Time'
33
36
 
@@ -39,24 +42,24 @@ class Reader(AbstractReader):
39
42
 
40
43
  _df_out.mask(_df_out['status'] != 0) # 0000 -> numeric to 0
41
44
 
42
- return _df_out[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
45
+ _df = _df_out[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']]
46
+
47
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
43
48
 
44
49
  except ValueError:
45
50
  group_sizes = _df_grp.size()
46
51
  print(group_sizes)
47
- # Define the valid groups
48
- valid_groups = {'B', 'G', 'R', 'D', 'T', 'Y', 'Z'}
49
52
 
50
- # Find the rows where the value in the first column is not in valid_groups
53
+ # Define valid groups and find invalid indices
54
+ valid_groups = {'B', 'G', 'R', 'D', 'T', 'Y', 'Z'}
51
55
  invalid_indices = _df[~_df[0].isin(valid_groups)].index
52
56
 
53
- # Print the invalid indices and their corresponding values
54
- invalid_values = _df.loc[invalid_indices, 0]
57
+ # Print invalid indices and values
55
58
  print("Invalid values and their indices:")
56
- for idx, value in zip(invalid_indices, invalid_values):
57
- print(f"Index: {idx}, Value: {value}")
59
+ for idx in invalid_indices:
60
+ print(f"Index: {idx}, Value: {_df.at[idx, 0]}")
58
61
 
59
- # If there's a length mismatch, return an empty DataFrame with the same index and column names
62
+ # Return an empty DataFrame with specified columns if there's a length mismatch
60
63
  columns = ['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']
61
64
  _df_out = DataFrame(index=_idx_tm, columns=columns)
62
65
  _df_out.index.name = 'Time'
@@ -66,15 +69,7 @@ class Reader(AbstractReader):
66
69
  # QC data
67
70
  def _QC(self, _df):
68
71
  # remove negative value
69
- _df = _df.mask((_df <= 0).copy())
70
-
71
- # call by _QC function
72
- # QC data in 1 hr
73
- def _QC_func(_df_1hr):
74
- _df_ave = _df_1hr.mean()
75
- _df_std = _df_1hr.std()
76
- _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5)
77
-
78
- return _df_1hr.mask(_df_lowb | _df_highb).copy()
72
+ _df = _df.mask((_df <= 5).copy())
79
73
 
80
- return _df.resample('1h', group_keys=False).apply(_QC_func)
74
+ # QC data in 1h
75
+ return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -21,19 +21,19 @@ class Reader(AbstractReader):
21
21
  _df_idx = to_datetime(_df.index, errors='coerce')
22
22
  return _df[_newkey.keys()].rename(_newkey, axis=1).set_index(_df_idx).loc[_df_idx.dropna()]
23
23
 
24
- ## QC data
24
+ # QC data
25
25
  def _QC(self, _df):
26
26
  import numpy as n
27
27
 
28
- ## mask out the data size lower than 7
28
+ # mask out the data size lower than 7
29
29
  _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
30
30
  _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
31
31
  _df = _df.mask(_df_size < 7)
32
32
 
33
- ## remove total conc. lower than 2000
33
+ # remove total conc. lower than 2000
34
34
  _df = _df.mask(_df['total'] < 2000)
35
35
 
36
- ## remove the bin over 400 nm which num. conc. larger than 4000
36
+ # remove the bin over 400 nm which num. conc. larger than 4000
37
37
  _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
38
38
 
39
39
  _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
@@ -21,29 +21,29 @@ class Reader(AbstractReader):
21
21
  _df = read_csv(f, skiprows=skiprows)
22
22
  _tm_idx = to_datetime(_df['DateTime Sample Start'], format='%d/%m/%Y %X', errors='coerce')
23
23
 
24
- ## index
24
+ # index
25
25
  _df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()]
26
26
 
27
- ## keys
27
+ # keys
28
28
  _key = to_numeric(_df.keys(), errors='coerce')
29
29
  _df.columns = _key
30
30
  _df = _df.loc[:, ~_key.isna()]
31
31
 
32
32
  return _df.apply(to_numeric, errors='coerce')
33
33
 
34
- ## QC data
34
+ # QC data
35
35
  def _QC(self, _df):
36
36
  import numpy as n
37
37
 
38
- ## mask out the data size lower than 7
38
+ # mask out the data size lower than 7
39
39
  _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
40
40
  _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
41
41
  _df = _df.mask(_df_size < 7)
42
42
 
43
- ## remove total conc. lower than 2000
43
+ # remove total conc. lower than 2000
44
44
  _df = _df.mask(_df['total'] < 2000)
45
45
 
46
- ## remove the bin over 400 nm which num. conc. larger than 4000
46
+ # remove the bin over 400 nm which num. conc. larger than 4000
47
47
  _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
48
48
 
49
49
  _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
@@ -21,29 +21,29 @@ class Reader(AbstractReader):
21
21
  _df = read_table(f, skiprows=skiprows)
22
22
  _tm_idx = to_datetime(_df['Date'] + _df['Start Time'], format='%m/%d/%y%X', errors='coerce')
23
23
 
24
- ## index
24
+ # index
25
25
  _df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()]
26
26
 
27
- ## keys
27
+ # keys
28
28
  _key = to_numeric(_df.keys(), errors='coerce')
29
29
  _df.columns = _key
30
30
  _df = _df.loc[:, ~_key.isna()]
31
31
 
32
32
  return _df.apply(to_numeric, errors='coerce')
33
33
 
34
- ## QC data
34
+ # QC data
35
35
  def _QC(self, _df):
36
36
  import numpy as n
37
37
 
38
- ## mask out the data size lower than 7
38
+ # mask out the data size lower than 7
39
39
  _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean()
40
40
  _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
41
41
  _df = _df.mask(_df_size < 7)
42
42
 
43
- ## remove total conc. lower than 2000
43
+ # remove total conc. lower than 2000
44
44
  _df = _df.mask(_df['total'] < 2000)
45
45
 
46
- ## remove the bin over 400 nm which num. conc. larger than 4000
46
+ # remove the bin over 400 nm which num. conc. larger than 4000
47
47
  _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
48
48
 
49
49
  _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
@@ -0,0 +1,60 @@
1
+ from pandas import to_datetime, read_csv
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader
4
+
5
+
6
+ class Reader(AbstractReader):
7
+ nam = 'Sunset_OCEC'
8
+
9
+ def _raw_reader(self, _file):
10
+ with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_csv(f, skiprows=3)
12
+
13
+ _df['Start Date/Time'] = _df['Start Date/Time'].str.strip()
14
+ _df['time'] = to_datetime(_df['Start Date/Time'], format='%m/%d/%Y %I:%M:%S %p', errors='coerce')
15
+ _df = _df.set_index('time')
16
+
17
+ _df = _df.rename(columns={
18
+ 'Thermal/Optical OC (ugC/LCm^3)': 'Thermal_OC',
19
+ 'OC ugC/m^3 (Thermal/Optical)': 'Thermal_OC',
20
+
21
+ 'Thermal/Optical EC (ugC/LCm^3)': 'Thermal_EC',
22
+ 'EC ugC/m^3 (Thermal/Optical)': 'Thermal_EC',
23
+
24
+ 'OC=TC-BC (ugC/LCm^3)': 'Optical_OC',
25
+ 'OC by diff ugC (TC-OptEC)': 'Optical_OC',
26
+
27
+ 'BC (ugC/LCm^3)': 'Optical_EC',
28
+ 'OptEC ugC/m^3': 'Optical_EC',
29
+
30
+ 'Sample Volume Local Condition Actual m^3': 'Sample_Volume',
31
+ 'TC (ugC/LCm^3)': 'TC',
32
+ 'TC ugC/m^3': 'TC',
33
+ 'OCPk1-ug C': 'OC1',
34
+ 'OCPk2-ug C': 'OC2',
35
+ 'OCPk3-ug C': 'OC3',
36
+ 'OCPk4-ug C': 'OC4',
37
+ 'Pyrolized C ug': 'PC'
38
+ })
39
+
40
+ _df = _df[['Thermal_OC', 'Optical_OC', 'Thermal_EC', 'Optical_EC', 'TC', 'OC1', 'OC2', 'OC3', 'OC4']]
41
+
42
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
43
+
44
+ # QC data
45
+ def _QC(self, _df):
46
+ import numpy as np
47
+
48
+ _df = _df.where(_df > 0)
49
+
50
+ thresholds = {
51
+ 'Thermal_OC': 0.3,
52
+ 'Optical_OC': 0.3,
53
+ 'Thermal_EC': 0.015,
54
+ 'Optical_EC': 0.015
55
+ }
56
+
57
+ for col, thresh in thresholds.items():
58
+ _df.loc[_df[col] <= thresh, col] = np.nan
59
+
60
+ return _df
@@ -27,18 +27,20 @@ class Reader(AbstractReader):
27
27
 
28
28
  _df = _df.where(_df['status'] < 1e-7)
29
29
 
30
- return _df[['PM_NV', 'PM_Total', 'noise', ]]
30
+ _df = _df[['PM_NV', 'PM_Total', 'noise', ]]
31
31
 
32
- ## QC data
32
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
33
+
34
+ # QC data
33
35
  def _QC(self, _df):
34
36
 
35
37
  _df_idx = _df.index.copy()
36
38
 
37
- ## remove negative value
38
- _df = _df.where(_df.noise < 0.01)[['PM_NV', 'PM_Total']].mask((_df < 0).copy())
39
+ # remove negative value
40
+ _df = _df.where(_df.noise < 0.01)[['PM_NV', 'PM_Total']].mask((_df <= 0).copy())
39
41
 
40
- ## QC data in 1 hr
41
- ## remove data where size < 8 in 1-hr
42
+ # QC data in 1 hr
43
+ # remove data where size < 8 in 1-hr
42
44
  for _key in ['PM_Total', 'PM_NV']:
43
45
  _size = _df[_key].dropna().resample('1h').size().reindex(_df_idx).ffill().copy()
44
46
  _df[_key] = _df[_key].mask(_size < 8)
@@ -1,6 +1,5 @@
1
1
  # read meteorological data from google sheet
2
2
 
3
-
4
3
  from pandas import read_csv, to_datetime
5
4
 
6
5
  from AeroViz.rawDataReader.core import AbstractReader
@@ -10,9 +9,7 @@ class Reader(AbstractReader):
10
9
  nam = 'Table'
11
10
 
12
11
  def _raw_reader(self, _file):
13
- self.meta['freq'] = self._oth_set.get('data_freq') or self.meta['freq']
14
-
15
- with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f:
12
+ with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
16
13
  _df = read_csv(f, low_memory=False, index_col=0)
17
14
 
18
15
  _df.index = to_datetime(_df.index, errors='coerce', format=self._oth_set.get('date_format') or 'mixed')
@@ -20,9 +17,11 @@ class Reader(AbstractReader):
20
17
 
21
18
  _df.columns = _df.keys().str.strip(' ')
22
19
 
23
- _df = _df.loc[_df.index.dropna()].copy()
24
-
25
- return _df.loc[~_df.index.duplicated()]
20
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
26
21
 
27
22
  def _QC(self, _df):
28
- return _df
23
+ # remove negative value
24
+ _df = _df.mask((_df < 0).copy())
25
+
26
+ # QC data in 6h
27
+ return _df.resample('6h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -0,0 +1,26 @@
1
+
2
+ from pandas import read_csv
3
+
4
+ from AeroViz.rawDataReader.core import AbstractReader
5
+
6
+
7
+ class Reader(AbstractReader):
8
+ nam = 'VOC'
9
+
10
+ def _raw_reader(self, _file):
11
+ with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
12
+ _df = read_csv(f, parse_dates=[0], index_col=[0], na_values=('-', 'N.D.'))
13
+
14
+ _df.columns = _df.keys().str.strip(' ')
15
+ _df.index.name = 'time'
16
+
17
+ try:
18
+ _df = _df[self.meta["key"]].loc[_df.index.dropna()]
19
+
20
+ except KeyError:
21
+ _df = _df[self.meta["key_2"]].loc[_df.index.dropna()]
22
+
23
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
24
+
25
+ def _QC(self, _df):
26
+ return _df
@@ -1,22 +1,20 @@
1
1
  __all__ = [
2
2
  'NEPH',
3
3
  'Aurora',
4
- 'Table',
5
- 'EPA_vertical',
6
- 'APS_3321',
7
4
  'SMPS_TH',
5
+ 'SMPS_genr',
6
+ 'SMPS_aim11',
7
+ 'APS_3321',
8
+ 'GRIMM',
8
9
  'AE33',
9
10
  'AE43',
10
11
  'BC1054',
11
12
  'MA350',
12
13
  'TEOM',
13
- 'OCEC_RES',
14
- 'OCEC_LCRES',
15
- 'IGAC_TH',
16
- 'IGAC_ZM',
17
- 'VOC_TH',
18
- 'VOC_ZM',
19
- 'SMPS_genr',
20
- 'SMPS_aim11',
21
- 'GRIMM'
14
+ 'Sunset_OCEC',
15
+ 'IGAC',
16
+ 'VOC',
17
+ 'Table',
18
+ 'EPA_vertical',
19
+ 'Minion'
22
20
  ]