AeroViz 0.1.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. AeroViz/__init__.py +13 -0
  2. AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
  3. AeroViz/data/DEFAULT_DATA.csv +1417 -0
  4. AeroViz/data/DEFAULT_PNSD_DATA.csv +1417 -0
  5. AeroViz/data/hysplit_example_data.txt +101 -0
  6. AeroViz/dataProcess/Chemistry/__init__.py +149 -0
  7. AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
  8. AeroViz/dataProcess/Chemistry/_calculate.py +557 -0
  9. AeroViz/dataProcess/Chemistry/_isoropia.py +150 -0
  10. AeroViz/dataProcess/Chemistry/_mass_volume.py +487 -0
  11. AeroViz/dataProcess/Chemistry/_ocec.py +172 -0
  12. AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
  13. AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
  14. AeroViz/dataProcess/Optical/PyMieScatt_update.py +577 -0
  15. AeroViz/dataProcess/Optical/_IMPROVE.py +452 -0
  16. AeroViz/dataProcess/Optical/__init__.py +281 -0
  17. AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
  18. AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
  19. AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
  20. AeroViz/dataProcess/Optical/_derived.py +518 -0
  21. AeroViz/dataProcess/Optical/_extinction.py +123 -0
  22. AeroViz/dataProcess/Optical/_mie_sd.py +912 -0
  23. AeroViz/dataProcess/Optical/_retrieve_RI.py +243 -0
  24. AeroViz/dataProcess/Optical/coefficient.py +72 -0
  25. AeroViz/dataProcess/Optical/fRH.pkl +0 -0
  26. AeroViz/dataProcess/Optical/mie_theory.py +260 -0
  27. AeroViz/dataProcess/README.md +271 -0
  28. AeroViz/dataProcess/SizeDistr/__init__.py +245 -0
  29. AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
  30. AeroViz/dataProcess/SizeDistr/__pycache__/_size_dist.cpython-312.pyc +0 -0
  31. AeroViz/dataProcess/SizeDistr/_size_dist.py +810 -0
  32. AeroViz/dataProcess/SizeDistr/merge/README.md +93 -0
  33. AeroViz/dataProcess/SizeDistr/merge/__init__.py +20 -0
  34. AeroViz/dataProcess/SizeDistr/merge/_merge_v0.py +251 -0
  35. AeroViz/dataProcess/SizeDistr/merge/_merge_v0_1.py +246 -0
  36. AeroViz/dataProcess/SizeDistr/merge/_merge_v1.py +255 -0
  37. AeroViz/dataProcess/SizeDistr/merge/_merge_v2.py +244 -0
  38. AeroViz/dataProcess/SizeDistr/merge/_merge_v3.py +518 -0
  39. AeroViz/dataProcess/SizeDistr/merge/_merge_v4.py +422 -0
  40. AeroViz/dataProcess/SizeDistr/prop.py +62 -0
  41. AeroViz/dataProcess/VOC/__init__.py +14 -0
  42. AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
  43. AeroViz/dataProcess/VOC/_potential_par.py +108 -0
  44. AeroViz/dataProcess/VOC/support_voc.json +446 -0
  45. AeroViz/dataProcess/__init__.py +66 -0
  46. AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
  47. AeroViz/dataProcess/core/__init__.py +272 -0
  48. AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
  49. AeroViz/mcp_server.py +352 -0
  50. AeroViz/plot/__init__.py +13 -0
  51. AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
  52. AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
  53. AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
  54. AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
  55. AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
  56. AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
  57. AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
  58. AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
  59. AeroViz/plot/bar.py +126 -0
  60. AeroViz/plot/box.py +69 -0
  61. AeroViz/plot/distribution/__init__.py +1 -0
  62. AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
  63. AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
  64. AeroViz/plot/distribution/distribution.py +576 -0
  65. AeroViz/plot/meteorology/CBPF.py +295 -0
  66. AeroViz/plot/meteorology/__init__.py +3 -0
  67. AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
  68. AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
  69. AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
  70. AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
  71. AeroViz/plot/meteorology/hysplit.py +93 -0
  72. AeroViz/plot/meteorology/wind_rose.py +77 -0
  73. AeroViz/plot/optical/__init__.py +1 -0
  74. AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
  75. AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
  76. AeroViz/plot/optical/optical.py +388 -0
  77. AeroViz/plot/pie.py +210 -0
  78. AeroViz/plot/radar.py +184 -0
  79. AeroViz/plot/regression.py +200 -0
  80. AeroViz/plot/scatter.py +174 -0
  81. AeroViz/plot/templates/__init__.py +6 -0
  82. AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
  83. AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
  84. AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
  85. AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
  86. AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
  87. AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
  88. AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
  89. AeroViz/plot/templates/ammonium_rich.py +34 -0
  90. AeroViz/plot/templates/contour.py +47 -0
  91. AeroViz/plot/templates/corr_matrix.py +267 -0
  92. AeroViz/plot/templates/diurnal_pattern.py +61 -0
  93. AeroViz/plot/templates/koschmieder.py +95 -0
  94. AeroViz/plot/templates/metal_heatmap.py +164 -0
  95. AeroViz/plot/timeseries/__init__.py +2 -0
  96. AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
  97. AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
  98. AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
  99. AeroViz/plot/timeseries/template.py +47 -0
  100. AeroViz/plot/timeseries/timeseries.py +446 -0
  101. AeroViz/plot/utils/__init__.py +4 -0
  102. AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  103. AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
  104. AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
  105. AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
  106. AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
  107. AeroViz/plot/utils/_color.py +71 -0
  108. AeroViz/plot/utils/_unit.py +55 -0
  109. AeroViz/plot/utils/fRH.json +390 -0
  110. AeroViz/plot/utils/plt_utils.py +92 -0
  111. AeroViz/plot/utils/sklearn_utils.py +49 -0
  112. AeroViz/plot/utils/units.json +89 -0
  113. AeroViz/plot/violin.py +80 -0
  114. AeroViz/rawDataReader/FLOW.md +138 -0
  115. AeroViz/rawDataReader/__init__.py +220 -0
  116. AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
  117. AeroViz/rawDataReader/config/__init__.py +0 -0
  118. AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
  119. AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
  120. AeroViz/rawDataReader/config/supported_instruments.py +135 -0
  121. AeroViz/rawDataReader/core/__init__.py +658 -0
  122. AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
  123. AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
  124. AeroViz/rawDataReader/core/__pycache__/pre_process.cpython-312.pyc +0 -0
  125. AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
  126. AeroViz/rawDataReader/core/__pycache__/report.cpython-312.pyc +0 -0
  127. AeroViz/rawDataReader/core/logger.py +171 -0
  128. AeroViz/rawDataReader/core/pre_process.py +308 -0
  129. AeroViz/rawDataReader/core/qc.py +961 -0
  130. AeroViz/rawDataReader/core/report.py +579 -0
  131. AeroViz/rawDataReader/script/AE33.py +173 -0
  132. AeroViz/rawDataReader/script/AE43.py +151 -0
  133. AeroViz/rawDataReader/script/APS.py +339 -0
  134. AeroViz/rawDataReader/script/Aurora.py +191 -0
  135. AeroViz/rawDataReader/script/BAM1020.py +90 -0
  136. AeroViz/rawDataReader/script/BC1054.py +161 -0
  137. AeroViz/rawDataReader/script/EPA.py +79 -0
  138. AeroViz/rawDataReader/script/GRIMM.py +68 -0
  139. AeroViz/rawDataReader/script/IGAC.py +140 -0
  140. AeroViz/rawDataReader/script/MA350.py +179 -0
  141. AeroViz/rawDataReader/script/Minion.py +218 -0
  142. AeroViz/rawDataReader/script/NEPH.py +199 -0
  143. AeroViz/rawDataReader/script/OCEC.py +173 -0
  144. AeroViz/rawDataReader/script/Q-ACSM.py +12 -0
  145. AeroViz/rawDataReader/script/SMPS.py +389 -0
  146. AeroViz/rawDataReader/script/TEOM.py +181 -0
  147. AeroViz/rawDataReader/script/VOC.py +106 -0
  148. AeroViz/rawDataReader/script/Xact.py +244 -0
  149. AeroViz/rawDataReader/script/__init__.py +28 -0
  150. AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
  151. AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
  152. AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
  153. AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
  154. AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
  155. AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
  156. AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
  157. AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
  158. AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
  159. AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
  160. AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
  161. AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
  162. AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
  163. AeroViz/rawDataReader/script/__pycache__/Q-ACSM.cpython-312.pyc +0 -0
  164. AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
  165. AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
  166. AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
  167. AeroViz/rawDataReader/script/__pycache__/Xact.cpython-312.pyc +0 -0
  168. AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
  169. AeroViz/tools/__init__.py +2 -0
  170. AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  171. AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
  172. AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
  173. AeroViz/tools/database.py +95 -0
  174. AeroViz/tools/dataclassifier.py +117 -0
  175. AeroViz/tools/dataprinter.py +58 -0
  176. aeroviz-0.1.21.dist-info/METADATA +294 -0
  177. aeroviz-0.1.21.dist-info/RECORD +180 -0
  178. aeroviz-0.1.21.dist-info/WHEEL +5 -0
  179. aeroviz-0.1.21.dist-info/licenses/LICENSE +21 -0
  180. aeroviz-0.1.21.dist-info/top_level.txt +1 -0
@@ -0,0 +1,140 @@
1
+ # read meteorological data from google sheet
2
+
3
+
4
+ from pandas import read_csv, to_numeric, Series
5
+
6
+ from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
7
+
8
+
9
+ class Reader(AbstractReader):
10
+ """IGAC (In-situ Gas and Aerosol Composition) Monitor Data Reader
11
+
12
+ This class handles the reading and parsing of IGAC monitor data files,
13
+ which provide real-time measurements of water-soluble inorganic ions in
14
+ particulate matter.
15
+
16
+ See full documentation at docs/source/instruments/IGAC.md for detailed information
17
+ on supported formats and QC procedures.
18
+ """
19
+ nam = 'IGAC'
20
+
21
+ # =========================================================================
22
+ # Column Definitions
23
+ # =========================================================================
24
+ CATION_COLUMNS = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+']
25
+ ANION_COLUMNS = ['Cl-', 'NO2-', 'NO3-', 'PO43-', 'SO42-']
26
+ MAIN_IONS = ['SO42-', 'NO3-', 'NH4+']
27
+
28
+ # =========================================================================
29
+ # Detection Limits (MDL) in ug/m3
30
+ # =========================================================================
31
+ MDL = {
32
+ 'Na+': 0.06,
33
+ 'NH4+': 0.05,
34
+ 'K+': 0.05,
35
+ 'Mg2+': 0.12,
36
+ 'Ca2+': 0.07,
37
+ 'Cl-': 0.07,
38
+ 'NO2-': 0.05,
39
+ 'NO3-': 0.11,
40
+ 'SO42-': 0.08,
41
+ }
42
+
43
+ def _raw_reader(self, file):
44
+ """
45
+ Read and parse raw IGAC monitor data files.
46
+
47
+ Parameters
48
+ ----------
49
+ file : Path or str
50
+ Path to the IGAC data file.
51
+
52
+ Returns
53
+ -------
54
+ pandas.DataFrame
55
+ Processed IGAC data with datetime index and ion concentration columns.
56
+ """
57
+ with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
58
+ _df = read_csv(f, parse_dates=True, index_col=0, na_values='-')
59
+
60
+ _df.columns = _df.keys().str.strip(' ')
61
+ _df.index.name = 'time'
62
+
63
+ _df = _df.apply(to_numeric, errors='coerce')
64
+
65
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
66
+
67
+ def _QC(self, _df):
68
+ """
69
+ Perform quality control on IGAC ion composition data.
70
+
71
+ QC Rules Applied
72
+ ----------------
73
+ 1. Mass Closure : Total ion mass > PM2.5 mass
74
+ 2. Missing Main : Main ions (NH4+, SO42-, NO3-) not present
75
+ 3. Below MDL : Ion concentration below detection limit
76
+ 4. Ion Balance : Cation/Anion ratio outside valid range
77
+ """
78
+ _index = _df.index.copy()
79
+
80
+ # Get ion columns that exist in the data
81
+ ion_columns = [col for col in self.MDL.keys() if col in _df.columns]
82
+ df_qc = _df[ion_columns].copy()
83
+
84
+ # Calculate total ion mass for mass closure check
85
+ total_ions = df_qc.sum(axis=1, min_count=1)
86
+ pm25 = _df['PM2.5'] if 'PM2.5' in _df.columns else Series(float('inf'), index=_df.index)
87
+
88
+ # Calculate cation/anion ratio for ion balance check
89
+ cation_cols = [c for c in self.CATION_COLUMNS if c in df_qc.columns]
90
+ anion_cols = [c for c in self.ANION_COLUMNS if c in df_qc.columns]
91
+ cation_sum = df_qc[cation_cols].sum(axis=1, min_count=1) if cation_cols else Series(0, index=df_qc.index)
92
+ anion_sum = df_qc[anion_cols].sum(axis=1, min_count=1) if anion_cols else Series(1, index=df_qc.index)
93
+ ca_ratio = cation_sum / anion_sum.replace(0, float('nan'))
94
+
95
+ # Calculate IQR bounds for ion balance
96
+ q1, q3 = ca_ratio.quantile(0.25), ca_ratio.quantile(0.75)
97
+ iqr = q3 - q1
98
+ ca_lower, ca_upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
99
+
100
+ # Build QC rules declaratively
101
+ qc = QCFlagBuilder()
102
+ qc.add_rules([
103
+ QCRule(
104
+ name='Mass Closure',
105
+ condition=lambda df: total_ions > pm25,
106
+ description='Total ion mass exceeds PM2.5 mass'
107
+ ),
108
+ QCRule(
109
+ name='Missing Main',
110
+ condition=lambda df: df[self.MAIN_IONS].isna().any(axis=1) if all(
111
+ c in df.columns for c in self.MAIN_IONS) else Series(False, index=df.index),
112
+ description='Missing main ions (NH4+, SO42-, NO3-)'
113
+ ),
114
+ QCRule(
115
+ name='Below MDL',
116
+ condition=lambda df: Series(
117
+ [any(df.loc[idx, col] < self.MDL.get(col, 0)
118
+ for col in ion_columns if col in df.columns and not Series(df.loc[idx, col]).isna().any())
119
+ for idx in df.index],
120
+ index=df.index
121
+ ),
122
+ description='Ion concentration below detection limit'
123
+ ),
124
+ QCRule(
125
+ name='Ion Balance',
126
+ condition=lambda df: (ca_ratio < ca_lower) | (ca_ratio > ca_upper) | ca_ratio.isna(),
127
+ description='Cation/Anion ratio outside valid range'
128
+ ),
129
+ ])
130
+
131
+ # Apply all QC rules and get flagged DataFrame
132
+ df_qc = qc.apply(df_qc)
133
+
134
+ # Log QC summary
135
+ summary = qc.get_summary(df_qc)
136
+ self.logger.info(f"{self.nam} QC Summary:")
137
+ for _, row in summary.iterrows():
138
+ self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
139
+
140
+ return df_qc.reindex(_index)
@@ -0,0 +1,179 @@
1
+ from pandas import read_csv, to_numeric, concat, Series
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
4
+ from AeroViz.rawDataReader.core.pre_process import _absCoe
5
+
6
+
7
+ class Reader(AbstractReader):
8
+ """MA350 Aethalometer Data Reader
9
+
10
+ A specialized reader for MA350 Aethalometer data files, which measure
11
+ black carbon at multiple wavelengths and provide source apportionment.
12
+
13
+ See full documentation at docs/source/instruments/MA350.md for detailed information
14
+ on supported formats and QC procedures.
15
+ """
16
+ nam = 'MA350'
17
+
18
+ # =========================================================================
19
+ # Column Definitions
20
+ # =========================================================================
21
+ BC_COLUMNS = ['BC1', 'BC2', 'BC3', 'BC4', 'BC5']
22
+ ABS_COLUMNS = ['abs_375', 'abs_470', 'abs_528', 'abs_625', 'abs_880']
23
+ CAL_COLUMNS = ['abs_550', 'AAE', 'eBC']
24
+
25
+ # =========================================================================
26
+ # QC Thresholds
27
+ # =========================================================================
28
+ MIN_BC = 0 # Minimum BC concentration (ng/m³)
29
+ MAX_BC = 20000 # Maximum BC concentration (ng/m³)
30
+ MIN_AAE = 0.7 # Minimum valid AAE (absolute value)
31
+ MAX_AAE = 2.0 # Maximum valid AAE (absolute value)
32
+
33
+ # =========================================================================
34
+ # Status Error Codes (bitwise flags)
35
+ # =========================================================================
36
+ ERROR_STATES = [
37
+ 1, # Power Failure
38
+ 2, # Start up
39
+ 4, # Tape advance
40
+ 16, # Optical saturation
41
+ 32, # Sample timing error
42
+ 128, # Flow unstable
43
+ 256, # Pump drive limit
44
+ 2048, # System busy
45
+ 8192, # Tape jam
46
+ 16384, # Tape at end
47
+ 32768, # Tape not ready
48
+ 65536, # Tape transport not ready
49
+ 262144, # Invalid date/time
50
+ 524288, # Tape error
51
+ ]
52
+
53
+ def _raw_reader(self, file):
54
+ """
55
+ Read and parse raw MA350 Aethalometer data files.
56
+
57
+ Parameters
58
+ ----------
59
+ file : Path or str
60
+ Path to the MA350 data file.
61
+
62
+ Returns
63
+ -------
64
+ pandas.DataFrame
65
+ Processed MA350 data with datetime index and standardized black carbon
66
+ and source apportionment columns.
67
+ """
68
+ _df = read_csv(file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis(
69
+ "Time")
70
+
71
+ _df = _df.rename(columns={
72
+ 'UV BCc': 'BC1',
73
+ 'Blue BCc': 'BC2',
74
+ 'Green BCc': 'BC3',
75
+ 'Red BCc': 'BC4',
76
+ 'IR BCc': 'BC5',
77
+ 'Biomass BCc (ng/m^3)': 'BB mass',
78
+ 'Fossil fuel BCc (ng/m^3)': 'FF mass',
79
+ 'Delta-C (ng/m^3)': 'Delta-C',
80
+ 'AAE': 'AAE_ref',
81
+ 'BB (%)': 'BB',
82
+ })
83
+
84
+ _df = _df[
85
+ ['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE_ref', 'BB', 'Status']].apply(
86
+ to_numeric,
87
+ errors='coerce')
88
+
89
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
90
+
91
+ def _QC(self, _df):
92
+ """
93
+ Perform quality control on MA350 Aethalometer raw data.
94
+
95
+ QC Rules Applied (raw data only)
96
+ ---------------------------------
97
+ 1. Status Error : Invalid instrument status codes
98
+ 2. Invalid BC : BC concentration outside 0-20000 ng/m³
99
+ 3. Insufficient : Less than 50% hourly data completeness
100
+
101
+ Note: AAE validation is done in _process() after calculation.
102
+ """
103
+ _index = _df.index.copy()
104
+ df_qc = _df.copy()
105
+
106
+ # Build QC rules declaratively
107
+ qc = QCFlagBuilder()
108
+ qc.add_rules([
109
+ QCRule(
110
+ name='Status Error',
111
+ condition=lambda df: self.QC_control().filter_error_status(df, self.ERROR_STATES),
112
+ description='Invalid instrument status code detected'
113
+ ),
114
+ QCRule(
115
+ name='Invalid BC',
116
+ condition=lambda df: ((df[self.BC_COLUMNS] <= self.MIN_BC) |
117
+ (df[self.BC_COLUMNS] > self.MAX_BC)).any(axis=1),
118
+ description=f'BC concentration outside valid range {self.MIN_BC}-{self.MAX_BC} ng/m³'
119
+ ),
120
+ QCRule(
121
+ name='Insufficient',
122
+ condition=lambda df: self.QC_control().hourly_completeness_QC(
123
+ df[self.BC_COLUMNS], freq=self.meta['freq']
124
+ ),
125
+ description='Less than 50% hourly data completeness'
126
+ ),
127
+ ])
128
+
129
+ # Apply all QC rules and get flagged DataFrame
130
+ df_qc = qc.apply(df_qc)
131
+
132
+ # Store QC summary for combined output in _process()
133
+ self._qc_summary = qc.get_summary(df_qc)
134
+
135
+ return df_qc.reindex(_index)
136
+
137
+ def _process(self, _df):
138
+ """
139
+ Calculate absorption coefficients and validate derived parameters.
140
+
141
+ Processing Steps
142
+ ----------------
143
+ 1. Calculate absorption coefficients at each wavelength
144
+ 2. Calculate AAE (Absorption Ångström Exponent)
145
+ 3. Calculate eBC (equivalent Black Carbon)
146
+ 4. Validate AAE range and update QC_Flag
147
+ """
148
+ _index = _df.index.copy()
149
+
150
+ # Calculate absorption coefficients, AAE, and eBC
151
+ _df_cal = _absCoe(_df[self.BC_COLUMNS], instru=self.nam, specified_band=[550])
152
+
153
+ # Combine with Status and QC_Flag
154
+ df_out = concat([_df_cal, _df[['Status', 'QC_Flag']]], axis=1)
155
+
156
+ # Validate AAE and update QC_Flag
157
+ invalid_aae = (-df_out['AAE'] < self.MIN_AAE) | (-df_out['AAE'] > self.MAX_AAE)
158
+ df_out = self.update_qc_flag(df_out, invalid_aae, 'Invalid AAE')
159
+
160
+ # Log combined QC summary with calculated info
161
+ if hasattr(self, '_qc_summary') and self._qc_summary is not None:
162
+ import pandas as pd
163
+ # Add Invalid AAE row before Valid row
164
+ total = len(df_out)
165
+ invalid_aae_row = pd.DataFrame([{
166
+ 'Rule': 'Invalid AAE',
167
+ 'Count': invalid_aae.sum(),
168
+ 'Percentage': f'{invalid_aae.sum() / total * 100:.1f}%',
169
+ 'Description': f'AAE outside valid range {self.MIN_AAE}-{self.MAX_AAE}'
170
+ }])
171
+ # Insert before Valid row (last row)
172
+ summary = pd.concat([self._qc_summary.iloc[:-1], invalid_aae_row, self._qc_summary.iloc[-1:]], ignore_index=True)
173
+ self.logger.info(f"{self.nam} QC Summary:")
174
+ for _, row in summary.iterrows():
175
+ self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
176
+
177
+ # Reorder columns
178
+ all_data_cols = self.BC_COLUMNS + self.ABS_COLUMNS + self.CAL_COLUMNS
179
+ return df_out[all_data_cols + ['QC_Flag']].reindex(_index)
@@ -0,0 +1,218 @@
1
+ from typing import Literal
2
+
3
+ import numpy as np
4
+ import pandas
5
+ from pandas import DataFrame, read_excel
6
+
7
+ from AeroViz.rawDataReader.config.supported_instruments import meta
8
+ from AeroViz.rawDataReader.core import AbstractReader
9
+
10
+ pandas.set_option("future.no_silent_downcasting", True)
11
+
12
+ desired_order1 = ['SO2', 'NO', 'NOx', 'NO2', 'CO', 'O3', 'THC', 'NMHC',
13
+ 'CH4', 'PM10', 'PM2.5', 'WS', 'WD', 'AT', 'RH']
14
+
15
+ desired_order2 = ['Benzene', 'Toluene', 'EthylBenzene', 'm/p-Xylene', 'o-Xylene']
16
+
17
+ MDL_NUMBER = -999
18
+
19
+
20
+ class Reader(AbstractReader):
21
+ nam = 'Minion'
22
+
23
+ # 楠梓8月數據(環境部)(空品、重金屬和氣膠可用率) -> 楠梓8月數據_level1 -> NZ_minion_XXXX
24
+ def _raw_reader(self, file):
25
+ df = read_excel(file, index_col=0, parse_dates=True)
26
+ df.index.name = 'Time'
27
+
28
+ # 重命名列,去除空白
29
+ df = df.rename(columns=lambda x: x.strip())
30
+
31
+ # 保存單位
32
+ self.units = df.iloc[0].copy()
33
+
34
+ # 刪除原始數據中的單位行
35
+ df = df.iloc[1:]
36
+
37
+ # 替換特定值
38
+ df = df.replace({'維護校正': '*', np.nan: '-', 'Nodata': '-', '0L': MDL_NUMBER})
39
+ # df = df.replace(to_replace=r'\d*\.?\d*[#]\b', value='_', regex=True)
40
+ df = df.replace(to_replace=r'\d*\.?\d*[L]\b', value=MDL_NUMBER, regex=True)
41
+
42
+ # 處理除了'WD'列的 0 值 替換為 '_'
43
+ for col in [col for col in df.columns if col != 'WD']:
44
+ df[col] = df[col].replace({0: MDL_NUMBER})
45
+
46
+ # replace to numeric for estimating qc rate
47
+ df = df.replace({'_': MDL_NUMBER})
48
+
49
+ XRF_col = list(meta.get('XRF').get('MDL').keys())
50
+ IGAC_col = list(meta.get('IGAC').get('MDL').keys())
51
+
52
+ # 重新排序列
53
+ df = self.reorder_dataframe_columns(df, [desired_order1, desired_order2, XRF_col, IGAC_col])
54
+
55
+ # 將單位行添加回 DataFrame
56
+ # df = concat([units.to_frame().T, df])
57
+
58
+ # save Level1 data
59
+ output_folder = file.parent / 'Level1'
60
+ output_folder.mkdir(parents=True, exist_ok=True)
61
+ df.to_csv(output_folder / f'{file.stem}_Level1.csv')
62
+
63
+ return df.loc[~df.index.duplicated() & df.index.notna()]
64
+
65
+ def _QC(self, _df):
66
+ IGAC_col = list(meta.get('IGAC').get('MDL'))
67
+ XRF_col = list(meta.get('XRF').get('MDL'))
68
+
69
+ # IGAC MDL QC
70
+ _df[IGAC_col] = self.IGAC_QAQC(_df[IGAC_col])
71
+
72
+ # XRF MDL QC
73
+ _df[XRF_col] = self.XRF_QAQC(_df[XRF_col])
74
+
75
+ # remove negative value
76
+ # _df = _df.mask((_df < 0))
77
+ _df = _df.mask(_df == MDL_NUMBER, np.nan)
78
+
79
+ col = [col for col in desired_order1 if col != 'WD']
80
+ _df[col] = self.QC_control().time_aware_rolling_iqr(_df[col])
81
+
82
+ # Calculate the mass and ion balance
83
+ # mass tolerance = ± 1, ions balance tolerance = ± 1
84
+
85
+ # # conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
86
+ # _df_salt = df.mask(df.sum(axis=1, min_count=1) > df.PM25).dropna(subset=_main).copy()
87
+
88
+ ions_mass = _df[['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+', 'Cl-', 'NO3-', 'SO42-']].sum(axis=1)
89
+ element_mass = _df[XRF_col].sum(axis=1)
90
+
91
+ estimated_mass = ions_mass + element_mass
92
+
93
+ valid_mask = 2 * _df['PM2.5'] > estimated_mass
94
+
95
+ _df.loc[~valid_mask, IGAC_col + XRF_col] = np.nan
96
+
97
+ return _df
98
+
99
+ def mdlReplace_timeAware_qc(self, df: DataFrame, MDL: dict, MDL_replace) -> DataFrame:
100
+ # Step 1: Track MDL positions and values below threshold
101
+ mdl_mask = (df.eq(MDL_NUMBER) |
102
+ df.apply(lambda x: x < MDL.get(x.name, float('-inf'))))
103
+
104
+ # Step 2: Convert all values below MDL to MDL_NUMBER (-999)
105
+ df_mdl = df.mask(mdl_mask, MDL_NUMBER)
106
+
107
+ # Step 3: Apply time_aware_IQR_QC (excluding MDL_NUMBER values)
108
+ df_qc = self.time_aware_IQR_QC(df_mdl.mask(df_mdl == MDL_NUMBER))
109
+
110
+ # Step 4: Handle values below MDL according to specified method
111
+ if MDL_replace == '0.5 * MDL':
112
+ for column, threshold in MDL.items():
113
+ if column in df.columns and threshold is not None:
114
+ df_qc.loc[df_mdl[column] == MDL_NUMBER, column] = 0.5 * threshold
115
+ else:
116
+ df_qc.loc[df_mdl[column] == MDL_NUMBER, column] = np.nan
117
+ else: # 'nan'
118
+ df_qc = df_qc.mask(df_mdl == MDL_NUMBER, np.nan)
119
+
120
+ return df_qc
121
+
122
+ def XRF_QAQC(self,
123
+ df: DataFrame,
124
+ MDL_replace: Literal['nan', '0.5 * MDL'] = '0.5 * MDL'
125
+ ) -> DataFrame:
126
+ """
127
+ Perform Quality Assurance and Quality Control for XRF data
128
+
129
+ Parameters
130
+ ----------
131
+ df : pd.DataFrame
132
+ Input dataframe with XRF data
133
+ MDL_replace : {'nan', '0.5 * MDL'}, default='nan'
134
+ Method to handle values below MDL:
135
+ - 'nan': Replace with NaN
136
+ - '0.5 * MDL': Replace with half of MDL value
137
+
138
+ Returns
139
+ -------
140
+ pd.DataFrame
141
+ Processed dataframe with QC applied and MDL values handled
142
+ """
143
+ MDL = meta.get('XRF').get('MDL')
144
+
145
+ df = self.mdlReplace_timeAware_qc(df, MDL, MDL_replace)
146
+
147
+ # 轉換單位 ng/m3 -> ug/m3
148
+ if df.Al.max() > 10 and df.Fe.max() > 10:
149
+ columns_to_convert = [col for col in MDL.keys() if col in df.columns]
150
+ df[columns_to_convert] = df[columns_to_convert].div(1000)
151
+
152
+ self.logger.info("")
153
+ self.logger.info(f"XRF QAQC: values below MDL -> {MDL_replace}")
154
+
155
+ return df
156
+
157
+ def IGAC_QAQC(self,
158
+ df: DataFrame,
159
+ MDL_replace: Literal['nan', '0.5 * MDL'] = '0.5 * MDL',
160
+ tolerance: float = 1
161
+ ) -> DataFrame:
162
+ """
163
+ Perform Quality Assurance and Quality Control for IGAC data
164
+
165
+ Parameters
166
+ ----------
167
+ df : pd.DataFrame
168
+ Input dataframe with IGAC data
169
+ MDL_replace : {'nan', '0.5 * MDL'}, default='nan'
170
+ Method to handle values below MDL:
171
+ - 'nan': Replace with NaN
172
+ - '0.5 * MDL': Replace with half of MDL value
173
+ tolerance : float, default=1
174
+ Tolerance value for QC checks
175
+
176
+ Returns
177
+ -------
178
+ pd.DataFrame
179
+ Processed dataframe with QC applied and MDL values handled
180
+ """
181
+ MDL = meta.get('IGAC').get('MDL')
182
+
183
+ df = self.mdlReplace_timeAware_qc(df, MDL, MDL_replace)
184
+
185
+ # Define the ions
186
+ _df = df.copy()
187
+ _cation, _anion, _main = (['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'],
188
+ ['Cl-', 'NO2-', 'NO3-', 'SO42-'],
189
+ ['SO42-', 'NO3-', 'NH4+'])
190
+
191
+ CA_range = () # CA, AC Q3=1.5 * IQR
192
+
193
+ _df['+_mole'] = _df[_cation].div([23, 18, 39, (24 / 2), (40 / 2)]).sum(axis=1, skipna=True)
194
+ _df['-_mole'] = _df[_anion].div([35.5, 46, 62, (96 / 2)]).sum(axis=1, skipna=True)
195
+
196
+ # Avoid division by zero
197
+ _df['ratio'] = np.where(_df['-_mole'] != 0, _df['+_mole'] / _df['-_mole'], np.nan)
198
+
199
+ # Calculate bounds
200
+ lower_bound, upper_bound = 1 - tolerance, 1 + tolerance
201
+
202
+ # 根據ratio决定是否保留原始数据
203
+ valid_mask = ((_df['ratio'] <= upper_bound) & (_df['ratio'] >= lower_bound) &
204
+ ~np.isnan(_df['+_mole']) & ~np.isnan(_df['-_mole']))
205
+
206
+ # 保留数據或將不符合的條件設為NaN
207
+ df.loc[~valid_mask] = np.nan
208
+
209
+ # 計算保留的数據的百分比
210
+ retained_percentage = (valid_mask.sum() / len(df)) * 100
211
+
212
+ self.logger.info("")
213
+ self.logger.info(f"Ions balance: {retained_percentage.__round__(0)}% within tolerance (±{tolerance})")
214
+
215
+ if retained_percentage < 70:
216
+ self.logger.warning("Warning: retained data < 70%")
217
+
218
+ return df