AeroViz 0.1.9.0__tar.gz → 0.1.9.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (113) hide show
  1. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Chemistry/_ocec.py +5 -5
  2. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/SizeDistr/_size_distr.py +5 -1
  3. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/hysplit/hysplit.py +11 -3
  4. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/__init__.py +1 -1
  5. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/config/supported_instruments.py +31 -41
  6. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/core/__init__.py +28 -96
  7. aeroviz-0.1.9.2/AeroViz/rawDataReader/core/qc.py +184 -0
  8. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/script/AE33.py +2 -2
  9. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/script/AE43.py +2 -2
  10. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/script/Aurora.py +2 -2
  11. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/script/BC1054.py +3 -2
  12. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/script/EPA.py +6 -4
  13. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/script/GRIMM.py +0 -1
  14. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/script/IGAC.py +3 -1
  15. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/script/MA350.py +3 -2
  16. aeroviz-0.1.9.2/AeroViz/rawDataReader/script/Minion.py +214 -0
  17. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/script/NEPH.py +2 -2
  18. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/script/OCEC.py +4 -2
  19. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/script/TEOM.py +2 -2
  20. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz.egg-info/PKG-INFO +4 -1
  21. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz.egg-info/SOURCES.txt +4 -3
  22. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz.egg-info/requires.txt +3 -0
  23. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/PKG-INFO +4 -1
  24. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/setup.py +18 -8
  25. aeroviz-0.1.9.2/tests/test_RawDataReader.py +118 -0
  26. aeroviz-0.1.9.0/test/test_aeroviz_imports.py → aeroviz-0.1.9.2/tests/test_aeroviz_import.py +1 -0
  27. aeroviz-0.1.9.0/AeroViz/rawDataReader/script/Minion.py +0 -180
  28. aeroviz-0.1.9.0/test/test_plot.py +0 -133
  29. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/__init__.py +0 -0
  30. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/data/DEFAULT_DATA.csv +0 -0
  31. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/data/DEFAULT_PNSD_DATA.csv +0 -0
  32. /aeroviz-0.1.9.0/AeroViz/data/240228_00.txt → /aeroviz-0.1.9.2/AeroViz/data/hysplit_example_data.txt +0 -0
  33. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Chemistry/__init__.py +0 -0
  34. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Chemistry/_calculate.py +0 -0
  35. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Chemistry/_isoropia.py +0 -0
  36. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Chemistry/_mass_volume.py +0 -0
  37. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Chemistry/_partition.py +0 -0
  38. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Chemistry/_teom.py +0 -0
  39. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Chemistry/isrpia.cnf +0 -0
  40. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
  41. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Optical/Angstrom_exponent.py +0 -0
  42. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Optical/_IMPROVE.py +0 -0
  43. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Optical/__init__.py +0 -0
  44. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Optical/_absorption.py +0 -0
  45. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Optical/_extinction.py +0 -0
  46. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Optical/_mie.py +0 -0
  47. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Optical/_mie_sd.py +0 -0
  48. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Optical/_scattering.py +0 -0
  49. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/Optical/fRH.pkl +0 -0
  50. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/SizeDistr/__init__.py +0 -0
  51. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/SizeDistr/__merge.py +0 -0
  52. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/SizeDistr/_merge.py +0 -0
  53. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/SizeDistr/_merge_v1.py +0 -0
  54. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/SizeDistr/_merge_v2.py +0 -0
  55. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/SizeDistr/_merge_v3.py +0 -0
  56. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/SizeDistr/_merge_v4.py +0 -0
  57. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/VOC/__init__.py +0 -0
  58. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/VOC/_potential_par.py +0 -0
  59. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/VOC/support_voc.json +0 -0
  60. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/__init__.py +0 -0
  61. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/dataProcess/core/__init__.py +0 -0
  62. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/__init__.py +0 -0
  63. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/bar.py +0 -0
  64. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/box.py +0 -0
  65. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/distribution/__init__.py +0 -0
  66. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/distribution/distribution.py +0 -0
  67. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/hysplit/__init__.py +0 -0
  68. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/meteorology/__init__.py +0 -0
  69. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/meteorology/meteorology.py +0 -0
  70. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/optical/PyMieScatt_update.py +0 -0
  71. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/optical/__init__.py +0 -0
  72. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/optical/mie_theory.py +0 -0
  73. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/optical/optical.py +0 -0
  74. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/pie.py +0 -0
  75. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/radar.py +0 -0
  76. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/regression.py +0 -0
  77. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/scatter.py +0 -0
  78. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/templates/__init__.py +0 -0
  79. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/templates/ammonium_rich.py +0 -0
  80. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/templates/contour.py +0 -0
  81. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/templates/corr_matrix.py +0 -0
  82. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/templates/diurnal_pattern.py +0 -0
  83. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/templates/koschmieder.py +0 -0
  84. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/templates/metal_heatmap.py +0 -0
  85. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/timeseries/__init__.py +0 -0
  86. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/timeseries/template.py +0 -0
  87. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/timeseries/timeseries.py +0 -0
  88. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/utils/__init__.py +0 -0
  89. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/utils/_color.py +0 -0
  90. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/utils/_unit.py +0 -0
  91. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/utils/fRH.json +0 -0
  92. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/utils/plt_utils.py +0 -0
  93. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/utils/sklearn_utils.py +0 -0
  94. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/utils/units.json +0 -0
  95. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/plot/violin.py +0 -0
  96. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/config/__init__.py +0 -0
  97. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/script/APS_3321.py +0 -0
  98. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/script/SMPS.py +0 -0
  99. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/script/VOC.py +0 -0
  100. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/script/XRF.py +0 -0
  101. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/rawDataReader/script/__init__.py +0 -0
  102. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/tools/__init__.py +0 -0
  103. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/tools/database.py +0 -0
  104. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/tools/dataclassifier.py +0 -0
  105. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/tools/dataprinter.py +0 -0
  106. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz/tools/datareader.py +0 -0
  107. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz.egg-info/dependency_links.txt +0 -0
  108. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/AeroViz.egg-info/top_level.txt +0 -0
  109. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/LICENSE +0 -0
  110. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/MANIFEST.in +0 -0
  111. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/README.md +0 -0
  112. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/requirements.txt +0 -0
  113. {aeroviz-0.1.9.0 → aeroviz-0.1.9.2}/setup.cfg +0 -0
@@ -114,8 +114,7 @@ def _basic(_lcres, _mass, _ocec_ratio, _ocec_ratio_month, _hr_lim, _range, _wiso
114
114
  _out = {}
115
115
 
116
116
  # OC1, OC2, OC3, OC4, PC
117
- _df_bsc = _lcres[['OC1_raw', 'OC2_raw', 'OC3_raw', 'OC4_raw']] / _lcres['Sample_Volume'].to_frame().values.copy()
118
- _df_bsc.rename(columns={'OC1_raw': 'OC1', 'OC2_raw': 'OC2', 'OC3_raw': 'OC3', 'OC4_raw': 'OC4'}, inplace=True)
117
+ _df_bsc = _lcres[['OC1', 'OC2', 'OC3', 'OC4', 'PC']].copy()
119
118
 
120
119
  # SOC, POC, OC/EC
121
120
  if _ocec_ratio is not None:
@@ -144,7 +143,8 @@ def _basic(_lcres, _mass, _ocec_ratio, _ocec_ratio_month, _hr_lim, _range, _wiso
144
143
  _df_ratio = DataFrame(index=_df_bsc.index)
145
144
 
146
145
  for _ky, _val in _df_bsc.items():
147
- if 'OC/EC' in _ky: continue
146
+ if 'OC/EC' in _ky:
147
+ continue
148
148
  _df_ratio[f'{_ky}/Thermal_OC'] = _val / _lcres['Thermal_OC']
149
149
  _df_ratio[f'{_ky}/Optical_OC'] = _val / _lcres['Optical_OC']
150
150
 
@@ -159,14 +159,14 @@ def _basic(_lcres, _mass, _ocec_ratio, _ocec_ratio_month, _hr_lim, _range, _wiso
159
159
  _df_ratio[f'Optical_EC/PM'] = _lcres['Optical_EC'] / _mass
160
160
 
161
161
  # ratio status
162
- _df_bsc = concat((_lcres, _df_bsc.copy()), axis=1)
162
+ _df_bsc = concat((_lcres.loc[:, :'Sample_Volume'], _df_bsc.copy()), axis=1)
163
163
 
164
164
  for _ky, _df in _df_ratio.items():
165
165
  _df_bsc[f'{_ky}_status'] = 'Normal'
166
166
  _df_bsc[f'{_ky}_status'] = _df_bsc[f'{_ky}_status'].mask(_df > 1, 'Warning')
167
167
 
168
168
  # out
169
- _out['ratio'] = _df_ratio
170
169
  _out['basic'] = _df_bsc
170
+ _out['ratio'] = _df_ratio
171
171
 
172
172
  return _out
@@ -69,7 +69,11 @@ def _basic(df, hybrid, unit, bin_rg, input_type):
69
69
 
70
70
  df_oth[f'total_{_tp_nam}_{_md_nam}'], df_oth[f'GMD_{_tp_nam}_{_md_nam}'], df_oth[
71
71
  f'GSD_{_tp_nam}_{_md_nam}'] = _geometric_prop(_dia, _dt)
72
- df_oth[f'mode_{_tp_nam}_{_md_nam}'] = _dt.idxmax(axis=1)
72
+
73
+ mask = _dt.notna().any(axis=1)
74
+
75
+ df_oth.loc[mask, f'mode_{_tp_nam}_{_md_nam}'] = _dt.loc[mask].idxmax(axis=1)
76
+ df_oth.loc[~mask, f'mode_{_tp_nam}_{_md_nam}'] = n.nan
73
77
 
74
78
  ## out
75
79
  out_dic['other'] = df_oth
@@ -7,21 +7,29 @@ import pandas as pd
7
7
 
8
8
  from AeroViz.plot.utils import set_figure
9
9
 
10
- # TODO: Hybrid Single-Particle Lagrangian Integrated Trajectory (HYSPLIT) model
10
+ # Hybrid Single-Particle Lagrangian Integrated Trajectory (HYSPLIT) model
11
11
 
12
12
 
13
13
  __all__ = ['hysplit']
14
14
 
15
15
  # 設置默認文件路徑
16
- DEFAULT_FILE = Path(__file__).parent.parent.parent / 'data' / '240228_00.txt'
16
+ DEFAULT_FILE = Path(__file__).parent.parent.parent / 'data' / 'hysplit_example_data.txt'
17
17
 
18
18
 
19
19
  def read_hysplit_data(file: Path):
20
20
  data = pd.read_csv(file, skiprows=8, sep=r'\s+', names=range(0, 12), engine='python')
21
21
  data = data.reset_index(drop=False)
22
- data.columns = ['category', 'name', 'abc', 'year', 'month', 'hour', 'min', 'cont', 'backward', 'lat', 'lon',
22
+ data.columns = ['category', 'name', 'year', 'month', 'day', 'hour', 'minute', 'count', 'backward', 'lat', 'lon',
23
23
  'height', 'pressure']
24
24
 
25
+ time_cols = ['year', 'month', 'day', 'hour', 'minute']
26
+
27
+ data['time'] = pd.to_datetime(data[time_cols].astype(str).agg(''.join, axis=1), format='%y%m%d%H%M')
28
+
29
+ data = data.drop(columns=time_cols)
30
+
31
+ data = data[['time'] + [col for col in data.columns if col != 'time']]
32
+
25
33
  return data
26
34
 
27
35
 
@@ -74,7 +74,7 @@ def RawDataReader(instrument_name: str,
74
74
  if not isinstance(path, Path):
75
75
  path = Path(path)
76
76
  if not path.exists() or not path.is_dir():
77
- raise ValueError(f"The specified path '{path}' does not exist or is not a directory.")
77
+ raise FileNotFoundError(f"The specified path '{path}' does not exist or is not a directory.")
78
78
 
79
79
  # Validate the QC frequency
80
80
  if qc_freq is not None:
@@ -75,6 +75,8 @@ meta = {
75
75
  "Thermal EC": ["Thermal_EC"],
76
76
  "Optical OC": ["Optical_OC"],
77
77
  "Optical EC": ["Optical_EC"],
78
+ "Thermal OC & EC": ["Thermal_OC", "Thermal_EC"],
79
+ "Optical OC & EC": ["Optical_OC", "Optical_EC"],
78
80
  },
79
81
  },
80
82
 
@@ -93,53 +95,41 @@ meta = {
93
95
  "SO42-": ["SO42-"],
94
96
  "Main Salt (NH4+, NO3-, SO42-)": ["NO3-", "SO42-", "NH4+"],
95
97
  },
98
+ # https://www.yangyao-env.com/web/product/product_in2.jsp?pd_id=PD1640151884502
99
+
100
+ # HF: 0.08, F-: 0.08, PO43-: None is not measured
101
+ "MDL": {
102
+ 'HF': None, 'HCl': 0.05, 'HNO2': 0.01, 'HNO3': 0.05, 'G-SO2': 0.05, 'NH3': 0.1,
103
+ 'Na+': 0.05, 'NH4+': 0.08, 'K+': 0.08, 'Mg2+': 0.05, 'Ca2+': 0.05,
104
+ 'F-': None, 'Cl-': 0.05, 'NO2-': 0.05, 'NO3-': 0.01, 'PO43-': None, 'SO42-': 0.05,
105
+ },
106
+
107
+ "MR": {
108
+ 'HF': 200, 'HCl': 200, 'HNO2': 200, 'HNO3': 200, 'G-SO2': 200, 'NH3': 300,
109
+ 'Na+': 300, 'NH4+': 300, 'K+': 300, 'Mg2+': 300, 'Ca2+': 300,
110
+ 'F-': 300, 'Cl-': 300, 'NO2-': 300, 'NO3-': 300, 'PO43-': None, 'SO42-': 300,
111
+ }
96
112
  },
97
113
 
98
114
  "XRF": {
99
115
  "pattern": ["*.csv"],
100
116
  "freq": "1h",
101
117
  "deter_key": {
102
- "Al": ["Al"],
103
- "Si": ["Si"],
104
- "P": ["P"],
105
- "S": ["S"],
106
- "Cl": ["Cl"],
107
- "K": ["K"],
108
- "Ca": ["Ca"],
109
- "Ti": ["Ti"],
110
- "V": ["V"],
111
- "Cr": ["Cr"],
112
- "Mn": ["Mn"],
113
- "Fe": ["Fe"],
114
- "Ni": ["Ni"],
115
- "Cu": ["Cu"],
116
- "Zn": ["Zn"],
117
- "As": ["As"],
118
- "Se": ["Se"],
119
- "Br": ["Br"],
120
- "Rb": ["Rb"],
121
- "Sr": ["Sr"],
122
- "Y": ["Y"],
123
- "Zr": ["Zr"],
124
- "Mo": ["Mo"],
125
- "Ag": ["Ag"],
126
- "Cd": ["Cd"],
127
- "In": ["In"],
128
- "Sn": ["Sn"],
129
- "Sb": ["Sb"],
130
- "Te": ["Te"],
131
- "Cs": ["Cs"],
132
- "Ba": ["Ba"],
133
- "La": ["La"],
134
- "Ce": ["Ce"],
135
- "W": ["W"],
136
- "Pt": ["Pt"],
137
- "Au": ["Au"],
138
- "Hg": ["Hg"],
139
- "Tl": ["Tl"],
140
- "Pb": ["Pb"],
141
- "Bi": ["Bi"],
118
+ "Several trace element (Al, Si, Ti, V, Cr, Mn, Fe)": ["Al", "Si", "Ti", "V", "Cr", "Mn", "Fe"],
119
+
142
120
  },
121
+ # base on Xact 625i Minimum Decision Limit (MDL) for XRF in ng/m3, 60 min sample time
122
+ "MDL": {
123
+ 'Al': 100, 'Si': 18, 'P': 5.2, 'S': 3.2, 'Cl': 1.7,
124
+ 'K': 1.2, 'Ca': 0.3, 'Ti': 1.6, 'V': 0.12, 'Cr': 0.12,
125
+ 'Mn': 0.14, 'Fe': 0.17, 'Co': 0.14, 'Ni': 0.096, 'Cu': 0.079,
126
+ 'Zn': 0.067, 'Ga': 0.059, 'Ge': 0.056, 'As': 0.063, 'Se': 0.081,
127
+ 'Br': 0.1, 'Rb': 0.19, 'Sr': 0.22, 'Y': 0.28, 'Zr': 0.33,
128
+ 'Nb': 0.41, 'Mo': 0.48, 'Pd': 2.2, 'Ag': 1.9, 'Cd': 2.5,
129
+ 'In': 3.1, 'Sn': 4.1, 'Sb': 5.2, 'Te': 0.6, 'Cs': 0.37,
130
+ 'Ba': 0.39, 'La': 0.36, 'Ce': 0.3, 'W': 0.0001, 'Pt': 0.12,
131
+ 'Au': 0.1, 'Hg': 0.12, 'Tl': 0.12, 'Pb': 0.13, 'Bi': 0.13
132
+ }
143
133
  },
144
134
 
145
135
  "VOC": {
@@ -174,7 +164,7 @@ meta = {
174
164
  "freq": "1h",
175
165
  "deter_key": {
176
166
  "Main Salt (Na+, NH4+, Cl-, NO3-, SO42-)": ["Na+", "NH4+", "Cl-", "NO3-", "SO42-"],
177
- "XRF (Al, Ti, V, Cr, Mn, Fe)": ["Al", "Ti", "V", "Cr", "Mn", "Fe"],
167
+ "Several trace element (Al, Ti, V, Cr, Mn, Fe)": ["Al", "Ti", "V", "Cr", "Mn", "Fe"],
178
168
  },
179
169
  },
180
170
  }
@@ -7,11 +7,12 @@ from typing import Optional
7
7
 
8
8
  import numpy as np
9
9
  import pandas as pd
10
- from pandas import DataFrame, concat, read_pickle
10
+ from pandas import DataFrame, concat, read_pickle, to_numeric
11
11
  from rich.console import Console
12
12
  from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, TaskProgressColumn
13
13
 
14
14
  from AeroViz.rawDataReader.config.supported_instruments import meta
15
+ from AeroViz.rawDataReader.core.qc import DataQualityControl
15
16
 
16
17
  __all__ = ['AbstractReader']
17
18
 
@@ -75,18 +76,20 @@ class AbstractReader(ABC):
75
76
 
76
77
  @abstractmethod
77
78
  def _QC(self, df: DataFrame) -> DataFrame:
78
- return self.n_sigma_QC(df)
79
+ return df
79
80
 
80
81
  def _setup_logger(self) -> logging.Logger:
81
82
  logger = logging.getLogger(self.nam)
82
83
  logger.setLevel(logging.INFO)
83
84
 
84
85
  for handler in logger.handlers[:]:
86
+ handler.close()
85
87
  logger.removeHandler(handler)
86
88
 
87
89
  handler = logging.FileHandler(self.path / f'{self.nam}.log')
88
90
  handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
89
91
  logger.addHandler(handler)
92
+
90
93
  return logger
91
94
 
92
95
  def _rate_calculate(self, raw_data, qc_data) -> None:
@@ -94,18 +97,25 @@ class AbstractReader(ABC):
94
97
  period_size = len(raw_data.resample('1h').mean().index)
95
98
 
96
99
  for _nam, _key in self.meta['deter_key'].items():
97
- _key, _drop_how = (qc_data.keys(), 'all') if _key is ['all'] else (_key, 'any')
100
+ _columns_key, _drop_how = (qc_data.keys(), 'all') if _key == ['all'] else (_key, 'any')
98
101
 
99
- sample_size = len(raw_data[_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
100
- qc_size = len(qc_data[_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
102
+ sample_size = len(raw_data[_columns_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
103
+ qc_size = len(qc_data[_columns_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
101
104
 
102
105
  # validate rate calculation
103
- if period_size < sample_size or sample_size < qc_size or period_size == 0 or sample_size == 0:
104
- raise ValueError(f"Invalid sample sizes: period={period_size}, sample={sample_size}, QC={qc_size}")
106
+ if period_size == 0 or sample_size == 0 or qc_size == 0:
107
+ print(f'\t\t\033[91m No data for this period... skipping\033[0m')
108
+ continue
109
+
110
+ if period_size < sample_size or sample_size < qc_size:
111
+ print(
112
+ f'\t\tInvalid size relationship: period={period_size}, sample={sample_size}, QC={qc_size}... skipping')
113
+ continue
105
114
 
106
- _acq_rate = round((sample_size / period_size) * 100, 1)
107
- _yid_rate = round((qc_size / sample_size) * 100, 1)
108
- _OEE_rate = round((qc_size / period_size) * 100, 1)
115
+ else:
116
+ _acq_rate = round((sample_size / period_size) * 100, 1)
117
+ _yid_rate = round((qc_size / sample_size) * 100, 1)
118
+ _OEE_rate = round((qc_size / period_size) * 100, 1)
109
119
 
110
120
  self.logger.info(f'{_nam}:')
111
121
  self.logger.info(f"\tAcquisition rate: {_acq_rate}%")
@@ -114,8 +124,8 @@ class AbstractReader(ABC):
114
124
  self.logger.info(f"{'=' * 60}")
115
125
 
116
126
  print(f'\n\t{_nam} : ')
117
- print(f'\t\tacquisition rate | yield rate | OEE rate :'
118
- f' \033[91m{_acq_rate}% | {_yid_rate}% -> {_OEE_rate}%\033[0m')
127
+ print(f'\t\tacquisition rate | yield rate -> OEE rate : '
128
+ f'\033[91m{_acq_rate}% | {_yid_rate}% -> {_OEE_rate}%\033[0m')
119
129
 
120
130
  if self.meta['deter_key'] is not None:
121
131
  # use qc_freq to calculate each period rate
@@ -163,9 +173,7 @@ class AbstractReader(ABC):
163
173
  new_index = pd.date_range(user_start or df_start, user_end or df_end, freq=freq, name='time')
164
174
 
165
175
  # Process data: convert to numeric, resample, and reindex
166
- return (_df.apply(pd.to_numeric, errors='coerce')
167
- .resample(freq).mean()
168
- .reindex(new_index))
176
+ return _df.reindex(new_index)
169
177
 
170
178
  def _outlier_process(self, _df):
171
179
  outlier_file = self.path / 'outlier.json'
@@ -235,8 +243,8 @@ class AbstractReader(ABC):
235
243
 
236
244
  raw_data = concat(df_list, axis=0).groupby(level=0).first()
237
245
 
238
- raw_data = self._timeIndex_process(raw_data)
239
- qc_data = self._QC(raw_data)
246
+ raw_data = self._timeIndex_process(raw_data).apply(to_numeric, errors='coerce').copy(deep=True)
247
+ qc_data = self._QC(raw_data).apply(to_numeric, errors='coerce').copy(deep=True)
240
248
 
241
249
  return raw_data, qc_data
242
250
 
@@ -279,6 +287,8 @@ class AbstractReader(ABC):
279
287
  self.logger.info(f"{'-' * 60}")
280
288
 
281
289
  if self.rate:
290
+ _f_raw = _f_raw.apply(to_numeric, errors='coerce')
291
+ _f_qc = _f_qc.apply(to_numeric, errors='coerce')
282
292
  self._rate_calculate(_f_raw, _f_qc)
283
293
 
284
294
  return _f_qc if self.qc else _f_raw
@@ -297,84 +307,6 @@ class AbstractReader(ABC):
297
307
 
298
308
  return df[new_order]
299
309
 
300
- @staticmethod
301
- def n_sigma_QC(df: pd.DataFrame, std_range: int = 5) -> pd.DataFrame:
302
- # 確保輸入是DataFrame
303
- df = df.to_frame() if isinstance(df, pd.Series) else df
304
-
305
- df_ave = df.mean()
306
- df_std = df.std()
307
-
308
- lower_bound = df < (df_ave - df_std * std_range)
309
- upper_bound = df > (df_ave + df_std * std_range)
310
-
311
- return df.mask(lower_bound | upper_bound)
312
-
313
- @staticmethod
314
- def IQR_QC(df: pd.DataFrame, log_dist=False) -> pd.DataFrame:
315
- # 確保輸入是DataFrame
316
- df = df.to_frame() if isinstance(df, pd.Series) else df
317
-
318
- df_transformed = np.log10(df) if log_dist else df
319
-
320
- _df_q1 = df_transformed.quantile(0.25)
321
- _df_q3 = df_transformed.quantile(0.75)
322
-
323
- _df_iqr = _df_q3 - _df_q1
324
-
325
- # Calculate lower and upper bounds
326
- lower_bound = df_transformed < (_df_q1 - 1.5 * _df_iqr)
327
- upper_bound = df_transformed > (_df_q3 + 1.5 * _df_iqr)
328
-
329
- # Apply the filter to the original dataframe
330
- return df.mask(lower_bound | upper_bound)
331
-
332
- @staticmethod
333
- def rolling_IQR_QC(df: pd.DataFrame, window_size=24, log_dist=False) -> pd.DataFrame:
334
- df = df.to_frame() if isinstance(df, pd.Series) else df
335
- df_transformed = np.log10(df) if log_dist else df
336
-
337
- def iqr_filter(x):
338
- q1, q3 = x.quantile(0.25), x.quantile(0.75)
339
- iqr = q3 - q1
340
- lower, upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
341
- return (x >= lower) & (x <= upper)
342
-
343
- mask = df_transformed.rolling(window=window_size, center=True, min_periods=1).apply(iqr_filter)
344
- return df.where(mask, np.nan)
345
-
346
310
  @staticmethod
347
311
  def time_aware_IQR_QC(df: pd.DataFrame, time_window='1D', log_dist=False) -> pd.DataFrame:
348
- df = df.to_frame() if isinstance(df, pd.Series) else df
349
- df_transformed = np.log10(df) if log_dist else df
350
-
351
- def iqr_filter(group):
352
- q1, q3 = group.quantile(0.25), group.quantile(0.75)
353
- iqr = q3 - q1
354
- lower, upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
355
- return (group >= lower) & (group <= upper)
356
-
357
- mask = df_transformed.groupby(pd.Grouper(freq=time_window)).transform(iqr_filter)
358
- return df.where(mask, np.nan)
359
-
360
- @staticmethod
361
- def mad_iqr_hybrid_QC(df: pd.DataFrame, mad_threshold=3.5, log_dist=False) -> pd.DataFrame:
362
- df = df.to_frame() if isinstance(df, pd.Series) else df
363
- df_transformed = np.log10(df) if log_dist else df
364
-
365
- # IQR 方法
366
- q1, q3 = df_transformed.quantile(0.25), df_transformed.quantile(0.75)
367
- iqr = q3 - q1
368
- iqr_lower, iqr_upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
369
-
370
- # MAD 方法
371
- median = df_transformed.median()
372
- mad = (df_transformed - median).abs().median()
373
- mad_lower, mad_upper = median - mad_threshold * mad, median + mad_threshold * mad
374
-
375
- # 结合两种方法
376
- lower = np.maximum(iqr_lower, mad_lower)
377
- upper = np.minimum(iqr_upper, mad_upper)
378
-
379
- mask = (df_transformed >= lower) & (df_transformed <= upper)
380
- return df.where(mask, np.nan)
312
+ return DataQualityControl().time_aware_iqr(df, time_window=time_window, log_dist=log_dist)
@@ -0,0 +1,184 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+
5
+ class DataQualityControl:
6
+ """A class providing various methods for data quality control and outlier detection"""
7
+
8
+ @staticmethod
9
+ def _ensure_dataframe(df: pd.DataFrame | pd.Series) -> pd.DataFrame:
10
+ """Ensure input data is in DataFrame format"""
11
+ return df.to_frame() if isinstance(df, pd.Series) else df
12
+
13
+ @staticmethod
14
+ def _transform_if_log(df: pd.DataFrame, log_dist: bool) -> pd.DataFrame:
15
+ """Transform data to log scale if required"""
16
+ return np.log10(df) if log_dist else df
17
+
18
+ @classmethod
19
+ def n_sigma(cls, df: pd.DataFrame, std_range: int = 5) -> pd.DataFrame:
20
+ """
21
+ Detect outliers using n-sigma method
22
+
23
+ Parameters
24
+ ----------
25
+ df : pd.DataFrame
26
+ Input data
27
+ std_range : int, default=5
28
+ Number of standard deviations to use as threshold
29
+
30
+ Returns
31
+ -------
32
+ pd.DataFrame
33
+ Cleaned DataFrame with outliers masked as NaN
34
+ """
35
+ df = cls._ensure_dataframe(df)
36
+ df_ave = df.mean()
37
+ df_std = df.std()
38
+
39
+ lower_bound = df < (df_ave - df_std * std_range)
40
+ upper_bound = df > (df_ave + df_std * std_range)
41
+
42
+ return df.mask(lower_bound | upper_bound)
43
+
44
+ @classmethod
45
+ def iqr(cls, df: pd.DataFrame, log_dist: bool = False) -> pd.DataFrame:
46
+ """
47
+ Detect outliers using Interquartile Range (IQR) method
48
+
49
+ Parameters
50
+ ----------
51
+ df : pd.DataFrame
52
+ Input data
53
+ log_dist : bool, default=False
54
+ Whether to apply log transformation to data
55
+
56
+ Returns
57
+ -------
58
+ pd.DataFrame
59
+ Cleaned DataFrame with outliers masked as NaN
60
+ """
61
+ df = cls._ensure_dataframe(df)
62
+ df_transformed = cls._transform_if_log(df, log_dist)
63
+
64
+ q1 = df_transformed.quantile(0.25)
65
+ q3 = df_transformed.quantile(0.75)
66
+ iqr = q3 - q1
67
+
68
+ lower_bound = df_transformed < (q1 - 1.5 * iqr)
69
+ upper_bound = df_transformed > (q3 + 1.5 * iqr)
70
+
71
+ return df.mask(lower_bound | upper_bound)
72
+
73
+ @classmethod
74
+ def rolling_iqr(cls, df: pd.DataFrame, window_size: int = 24,
75
+ log_dist: bool = False) -> pd.DataFrame:
76
+ """
77
+ Detect outliers using rolling window IQR method
78
+
79
+ Parameters
80
+ ----------
81
+ df : pd.DataFrame
82
+ Input data
83
+ window_size : int, default=24
84
+ Size of the rolling window
85
+ log_dist : bool, default=False
86
+ Whether to apply log transformation to data
87
+
88
+ Returns
89
+ -------
90
+ pd.DataFrame
91
+ Cleaned DataFrame with outliers masked as NaN
92
+ """
93
+ df = cls._ensure_dataframe(df)
94
+ df_transformed = cls._transform_if_log(df, log_dist)
95
+
96
+ def iqr_filter(x):
97
+ q1, q3 = x.quantile(0.25), x.quantile(0.75)
98
+ iqr = q3 - q1
99
+ lower, upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
100
+ return (x >= lower) & (x <= upper)
101
+
102
+ mask = df_transformed.rolling(
103
+ window=window_size,
104
+ center=True,
105
+ min_periods=1
106
+ ).apply(iqr_filter)
107
+
108
+ return df.where(mask, np.nan)
109
+
110
+ @classmethod
111
+ def time_aware_iqr(cls, df: pd.DataFrame, time_window: str = '1D',
112
+ log_dist: bool = False) -> pd.DataFrame:
113
+ """
114
+ Detect outliers using time-aware IQR method
115
+
116
+ Parameters
117
+ ----------
118
+ df : pd.DataFrame
119
+ Input data
120
+ time_window : str, default='1D'
121
+ Time window size (e.g., '1D' for one day)
122
+ log_dist : bool, default=False
123
+ Whether to apply log transformation to data
124
+
125
+ Returns
126
+ -------
127
+ pd.DataFrame
128
+ Cleaned DataFrame with outliers masked as NaN
129
+ """
130
+ df = cls._ensure_dataframe(df)
131
+ df_transformed = cls._transform_if_log(df, log_dist)
132
+
133
+ def iqr_filter(group):
134
+ q1, q3 = group.quantile(0.25), group.quantile(0.75)
135
+ iqr = q3 - q1
136
+ lower, upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
137
+ return (group >= lower) & (group <= upper)
138
+
139
+ mask = df_transformed.groupby(
140
+ pd.Grouper(freq=time_window)
141
+ ).transform(iqr_filter)
142
+
143
+ return df.where(mask, np.nan)
144
+
145
+ @classmethod
146
+ def mad_iqr_hybrid(cls, df: pd.DataFrame, mad_threshold: float = 3.5,
147
+ log_dist: bool = False) -> pd.DataFrame:
148
+ """
149
+ Detect outliers using a hybrid of MAD and IQR methods
150
+
151
+ Parameters
152
+ ----------
153
+ df : pd.DataFrame
154
+ Input data
155
+ mad_threshold : float, default=3.5
156
+ Threshold for MAD method
157
+ log_dist : bool, default=False
158
+ Whether to apply log transformation to data
159
+
160
+ Returns
161
+ -------
162
+ pd.DataFrame
163
+ Cleaned DataFrame with outliers masked as NaN
164
+ """
165
+ df = cls._ensure_dataframe(df)
166
+ df_transformed = cls._transform_if_log(df, log_dist)
167
+
168
+ # IQR method
169
+ q1, q3 = df_transformed.quantile(0.25), df_transformed.quantile(0.75)
170
+ iqr = q3 - q1
171
+ iqr_lower, iqr_upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
172
+
173
+ # MAD method
174
+ median = df_transformed.median()
175
+ mad = (df_transformed - median).abs().median()
176
+ mad_lower = median - mad_threshold * mad
177
+ mad_upper = median + mad_threshold * mad
178
+
179
+ # Combine both methods
180
+ lower = np.maximum(iqr_lower, mad_lower)
181
+ upper = np.minimum(iqr_upper, mad_upper)
182
+
183
+ mask = (df_transformed >= lower) & (df_transformed <= upper)
184
+ return df.where(mask, np.nan)
@@ -11,14 +11,14 @@ class Reader(AbstractReader):
11
11
  self.logger.info(f'\t {file} may not be a whole daily data. Make sure the file is correct.')
12
12
 
13
13
  _df = read_table(file, parse_dates={'time': [0, 1]}, index_col='time',
14
- delimiter=r'\s+', skiprows=5, usecols=range(67)).apply(to_numeric, errors='coerce')
14
+ delimiter=r'\s+', skiprows=5, usecols=range(67))
15
15
  _df.columns = _df.columns.str.strip(';')
16
16
 
17
17
  # remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape)
18
18
  if self.meta.get('error_state', False):
19
19
  _df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
20
20
 
21
- _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
21
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].apply(to_numeric, errors='coerce')
22
22
 
23
23
  return _df.loc[~_df.index.duplicated() & _df.index.notna()]
24
24
 
@@ -7,7 +7,7 @@ class Reader(AbstractReader):
7
7
  nam = 'AE43'
8
8
 
9
9
  def _raw_reader(self, file):
10
- _df = read_csv(file, parse_dates={'time': ['StartTime']}, index_col='time').apply(to_numeric, errors='coerce')
10
+ _df = read_csv(file, parse_dates={'time': ['StartTime']}, index_col='time')
11
11
  _df_id = _df['SetupID'].iloc[-1]
12
12
 
13
13
  # get last SetupID data
@@ -18,7 +18,7 @@ class Reader(AbstractReader):
18
18
  if self.meta.get('error_state', False):
19
19
  _df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy()
20
20
 
21
- _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']]
21
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].apply(to_numeric, errors='coerce')
22
22
 
23
23
  return _df.loc[~_df.index.duplicated() & _df.index.notna()]
24
24
 
@@ -8,7 +8,7 @@ class Reader(AbstractReader):
8
8
 
9
9
  def _raw_reader(self, file):
10
10
  with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
11
- _df = read_csv(f, low_memory=False, index_col=0).apply(to_numeric, errors='coerce')
11
+ _df = read_csv(f, low_memory=False, index_col=0)
12
12
 
13
13
  _df.index = to_datetime(_df.index, errors='coerce')
14
14
  _df.index.name = 'time'
@@ -24,7 +24,7 @@ class Reader(AbstractReader):
24
24
  'RH': 'RH'
25
25
  })
26
26
 
27
- _df = _df[['B', 'G', 'R', 'BB', 'BG', 'BR']]
27
+ _df = _df[['B', 'G', 'R', 'BB', 'BG', 'BR']].apply(to_numeric, errors='coerce')
28
28
 
29
29
  return _df.loc[~_df.index.duplicated() & _df.index.notna()]
30
30
 
@@ -8,7 +8,7 @@ class Reader(AbstractReader):
8
8
 
9
9
  def _raw_reader(self, file):
10
10
  with open(file, 'r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_csv(f, parse_dates=True, index_col=0).apply(to_numeric, errors='coerce')
11
+ _df = read_csv(f, parse_dates=True, index_col=0)
12
12
 
13
13
  _df.columns = _df.columns.str.replace(' ', '')
14
14
 
@@ -29,7 +29,8 @@ class Reader(AbstractReader):
29
29
  if self.meta.get('error_state', False):
30
30
  _df = _df[~_df['Status'].isin(self.meta.get('error_state'))]
31
31
 
32
- _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']]
32
+ _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']].apply(to_numeric,
33
+ errors='coerce')
33
34
 
34
35
  return _df.loc[~_df.index.duplicated() & _df.index.notna()]
35
36