AeroViz 0.1.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. AeroViz/__init__.py +13 -0
  2. AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
  3. AeroViz/data/DEFAULT_DATA.csv +1417 -0
  4. AeroViz/data/DEFAULT_PNSD_DATA.csv +1417 -0
  5. AeroViz/data/hysplit_example_data.txt +101 -0
  6. AeroViz/dataProcess/Chemistry/__init__.py +149 -0
  7. AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
  8. AeroViz/dataProcess/Chemistry/_calculate.py +557 -0
  9. AeroViz/dataProcess/Chemistry/_isoropia.py +150 -0
  10. AeroViz/dataProcess/Chemistry/_mass_volume.py +487 -0
  11. AeroViz/dataProcess/Chemistry/_ocec.py +172 -0
  12. AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
  13. AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
  14. AeroViz/dataProcess/Optical/PyMieScatt_update.py +577 -0
  15. AeroViz/dataProcess/Optical/_IMPROVE.py +452 -0
  16. AeroViz/dataProcess/Optical/__init__.py +281 -0
  17. AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
  18. AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
  19. AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
  20. AeroViz/dataProcess/Optical/_derived.py +518 -0
  21. AeroViz/dataProcess/Optical/_extinction.py +123 -0
  22. AeroViz/dataProcess/Optical/_mie_sd.py +912 -0
  23. AeroViz/dataProcess/Optical/_retrieve_RI.py +243 -0
  24. AeroViz/dataProcess/Optical/coefficient.py +72 -0
  25. AeroViz/dataProcess/Optical/fRH.pkl +0 -0
  26. AeroViz/dataProcess/Optical/mie_theory.py +260 -0
  27. AeroViz/dataProcess/README.md +271 -0
  28. AeroViz/dataProcess/SizeDistr/__init__.py +245 -0
  29. AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
  30. AeroViz/dataProcess/SizeDistr/__pycache__/_size_dist.cpython-312.pyc +0 -0
  31. AeroViz/dataProcess/SizeDistr/_size_dist.py +810 -0
  32. AeroViz/dataProcess/SizeDistr/merge/README.md +93 -0
  33. AeroViz/dataProcess/SizeDistr/merge/__init__.py +20 -0
  34. AeroViz/dataProcess/SizeDistr/merge/_merge_v0.py +251 -0
  35. AeroViz/dataProcess/SizeDistr/merge/_merge_v0_1.py +246 -0
  36. AeroViz/dataProcess/SizeDistr/merge/_merge_v1.py +255 -0
  37. AeroViz/dataProcess/SizeDistr/merge/_merge_v2.py +244 -0
  38. AeroViz/dataProcess/SizeDistr/merge/_merge_v3.py +518 -0
  39. AeroViz/dataProcess/SizeDistr/merge/_merge_v4.py +422 -0
  40. AeroViz/dataProcess/SizeDistr/prop.py +62 -0
  41. AeroViz/dataProcess/VOC/__init__.py +14 -0
  42. AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
  43. AeroViz/dataProcess/VOC/_potential_par.py +108 -0
  44. AeroViz/dataProcess/VOC/support_voc.json +446 -0
  45. AeroViz/dataProcess/__init__.py +66 -0
  46. AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
  47. AeroViz/dataProcess/core/__init__.py +272 -0
  48. AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
  49. AeroViz/mcp_server.py +352 -0
  50. AeroViz/plot/__init__.py +13 -0
  51. AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
  52. AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
  53. AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
  54. AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
  55. AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
  56. AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
  57. AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
  58. AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
  59. AeroViz/plot/bar.py +126 -0
  60. AeroViz/plot/box.py +69 -0
  61. AeroViz/plot/distribution/__init__.py +1 -0
  62. AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
  63. AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
  64. AeroViz/plot/distribution/distribution.py +576 -0
  65. AeroViz/plot/meteorology/CBPF.py +295 -0
  66. AeroViz/plot/meteorology/__init__.py +3 -0
  67. AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
  68. AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
  69. AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
  70. AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
  71. AeroViz/plot/meteorology/hysplit.py +93 -0
  72. AeroViz/plot/meteorology/wind_rose.py +77 -0
  73. AeroViz/plot/optical/__init__.py +1 -0
  74. AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
  75. AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
  76. AeroViz/plot/optical/optical.py +388 -0
  77. AeroViz/plot/pie.py +210 -0
  78. AeroViz/plot/radar.py +184 -0
  79. AeroViz/plot/regression.py +200 -0
  80. AeroViz/plot/scatter.py +174 -0
  81. AeroViz/plot/templates/__init__.py +6 -0
  82. AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
  83. AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
  84. AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
  85. AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
  86. AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
  87. AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
  88. AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
  89. AeroViz/plot/templates/ammonium_rich.py +34 -0
  90. AeroViz/plot/templates/contour.py +47 -0
  91. AeroViz/plot/templates/corr_matrix.py +267 -0
  92. AeroViz/plot/templates/diurnal_pattern.py +61 -0
  93. AeroViz/plot/templates/koschmieder.py +95 -0
  94. AeroViz/plot/templates/metal_heatmap.py +164 -0
  95. AeroViz/plot/timeseries/__init__.py +2 -0
  96. AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
  97. AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
  98. AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
  99. AeroViz/plot/timeseries/template.py +47 -0
  100. AeroViz/plot/timeseries/timeseries.py +446 -0
  101. AeroViz/plot/utils/__init__.py +4 -0
  102. AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  103. AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
  104. AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
  105. AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
  106. AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
  107. AeroViz/plot/utils/_color.py +71 -0
  108. AeroViz/plot/utils/_unit.py +55 -0
  109. AeroViz/plot/utils/fRH.json +390 -0
  110. AeroViz/plot/utils/plt_utils.py +92 -0
  111. AeroViz/plot/utils/sklearn_utils.py +49 -0
  112. AeroViz/plot/utils/units.json +89 -0
  113. AeroViz/plot/violin.py +80 -0
  114. AeroViz/rawDataReader/FLOW.md +138 -0
  115. AeroViz/rawDataReader/__init__.py +220 -0
  116. AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
  117. AeroViz/rawDataReader/config/__init__.py +0 -0
  118. AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
  119. AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
  120. AeroViz/rawDataReader/config/supported_instruments.py +135 -0
  121. AeroViz/rawDataReader/core/__init__.py +658 -0
  122. AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
  123. AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
  124. AeroViz/rawDataReader/core/__pycache__/pre_process.cpython-312.pyc +0 -0
  125. AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
  126. AeroViz/rawDataReader/core/__pycache__/report.cpython-312.pyc +0 -0
  127. AeroViz/rawDataReader/core/logger.py +171 -0
  128. AeroViz/rawDataReader/core/pre_process.py +308 -0
  129. AeroViz/rawDataReader/core/qc.py +961 -0
  130. AeroViz/rawDataReader/core/report.py +579 -0
  131. AeroViz/rawDataReader/script/AE33.py +173 -0
  132. AeroViz/rawDataReader/script/AE43.py +151 -0
  133. AeroViz/rawDataReader/script/APS.py +339 -0
  134. AeroViz/rawDataReader/script/Aurora.py +191 -0
  135. AeroViz/rawDataReader/script/BAM1020.py +90 -0
  136. AeroViz/rawDataReader/script/BC1054.py +161 -0
  137. AeroViz/rawDataReader/script/EPA.py +79 -0
  138. AeroViz/rawDataReader/script/GRIMM.py +68 -0
  139. AeroViz/rawDataReader/script/IGAC.py +140 -0
  140. AeroViz/rawDataReader/script/MA350.py +179 -0
  141. AeroViz/rawDataReader/script/Minion.py +218 -0
  142. AeroViz/rawDataReader/script/NEPH.py +199 -0
  143. AeroViz/rawDataReader/script/OCEC.py +173 -0
  144. AeroViz/rawDataReader/script/Q-ACSM.py +12 -0
  145. AeroViz/rawDataReader/script/SMPS.py +389 -0
  146. AeroViz/rawDataReader/script/TEOM.py +181 -0
  147. AeroViz/rawDataReader/script/VOC.py +106 -0
  148. AeroViz/rawDataReader/script/Xact.py +244 -0
  149. AeroViz/rawDataReader/script/__init__.py +28 -0
  150. AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
  151. AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
  152. AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
  153. AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
  154. AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
  155. AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
  156. AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
  157. AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
  158. AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
  159. AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
  160. AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
  161. AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
  162. AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
  163. AeroViz/rawDataReader/script/__pycache__/Q-ACSM.cpython-312.pyc +0 -0
  164. AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
  165. AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
  166. AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
  167. AeroViz/rawDataReader/script/__pycache__/Xact.cpython-312.pyc +0 -0
  168. AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
  169. AeroViz/tools/__init__.py +2 -0
  170. AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  171. AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
  172. AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
  173. AeroViz/tools/database.py +95 -0
  174. AeroViz/tools/dataclassifier.py +117 -0
  175. AeroViz/tools/dataprinter.py +58 -0
  176. aeroviz-0.1.21.dist-info/METADATA +294 -0
  177. aeroviz-0.1.21.dist-info/RECORD +180 -0
  178. aeroviz-0.1.21.dist-info/WHEEL +5 -0
  179. aeroviz-0.1.21.dist-info/licenses/LICENSE +21 -0
  180. aeroviz-0.1.21.dist-info/top_level.txt +1 -0
@@ -0,0 +1,173 @@
1
+ from pandas import read_table, to_numeric, concat
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
4
+ from AeroViz.rawDataReader.core.pre_process import _absCoe
5
+
6
+
7
+ class Reader(AbstractReader):
8
+ """AE33 Aethalometer Data Reader.
9
+
10
+ A specialized reader for AE33 Aethalometer data files, which measure black carbon
11
+ concentrations at seven wavelengths.
12
+
13
+ See full documentation at docs/source/instruments/AE33.md for detailed information
14
+ on supported formats and QC procedures.
15
+ """
16
+ nam = 'AE33'
17
+
18
+ # =========================================================================
19
+ # Column Definitions
20
+ # =========================================================================
21
+ BC_COLUMNS = ['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']
22
+ ABS_COLUMNS = ['abs_370', 'abs_470', 'abs_520', 'abs_590', 'abs_660', 'abs_880', 'abs_950']
23
+ CAL_COLUMNS = ['abs_550', 'AAE', 'eBC']
24
+ BB_COLUMN = 'BB(%)' # Biomass Burning percentage from source apportionment
25
+
26
+ # =========================================================================
27
+ # QC Thresholds
28
+ # =========================================================================
29
+ MIN_BC = 0 # Minimum BC concentration (ng/m³)
30
+ MAX_BC = 20000 # Maximum BC concentration (ng/m³)
31
+ MIN_AAE = 0.7 # Minimum valid AAE (absolute value)
32
+ MAX_AAE = 2.0 # Maximum valid AAE (absolute value)
33
+
34
+ # =========================================================================
35
+ # Status Error Codes (bitwise flags)
36
+ # =========================================================================
37
+ # Note: 128 and 256 are tape low warnings, not errors - data is still valid
38
+ # 384 (128+256) removed to avoid flagging tape warnings as errors
39
+ ERROR_STATES = [
40
+ 1, # Tape advance (tape advance, fast calibration, warm-up)
41
+ 2, # First measurement – obtaining ATN0
42
+ 3, # Stopped
43
+ 4, # Flow low/high by more than 0.5 LPM
44
+ 16, # Calibrating LED
45
+ 32, # Calibration error (at least one channel OK)
46
+ 1024, # Stability test
47
+ 2048, # Clean air test
48
+ 4096, # Optical test
49
+ ]
50
+
51
+ def _raw_reader(self, file):
52
+ """Read and parse raw AE33 Aethalometer data files."""
53
+ if file.stat().st_size / 1024 < 550:
54
+ self.logger.warning(f'{file.name} may not be a whole daily data.')
55
+
56
+ _df = read_table(file, parse_dates={'time': [0, 1]}, index_col='time',
57
+ delimiter=r'\s+', skiprows=5, usecols=range(67))
58
+ _df.columns = _df.columns.str.strip(';')
59
+
60
+ # Select BC columns, Status, and BB(%) if available
61
+ cols_to_read = self.BC_COLUMNS + ['Status']
62
+ if self.BB_COLUMN in _df.columns:
63
+ cols_to_read.append(self.BB_COLUMN)
64
+
65
+ _df = _df[cols_to_read].apply(to_numeric, errors='coerce')
66
+
67
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
68
+
69
+ def _QC(self, _df):
70
+ """
71
+ Perform quality control on AE33 Aethalometer raw data.
72
+
73
+ QC Rules Applied (raw data only)
74
+ ---------------------------------
75
+ 1. Status Error : Invalid instrument status codes
76
+ 2. Invalid BC : BC concentration outside 0-20000 ng/m³
77
+ 3. Insufficient : Less than 50% hourly data completeness
78
+
79
+ Note: AAE validation is done in _process() after calculation.
80
+ """
81
+ _index = _df.index.copy()
82
+ df_qc = _df.copy()
83
+
84
+ # Build QC rules declaratively
85
+ qc = QCFlagBuilder()
86
+ qc.add_rules([
87
+ QCRule(
88
+ name='Status Error',
89
+ condition=lambda df: self.QC_control().filter_error_status(df, self.ERROR_STATES),
90
+ description='Invalid instrument status code detected'
91
+ ),
92
+ QCRule(
93
+ name='Invalid BC',
94
+ condition=lambda df: ((df[self.BC_COLUMNS] <= self.MIN_BC) |
95
+ (df[self.BC_COLUMNS] > self.MAX_BC)).any(axis=1),
96
+ description=f'BC concentration outside valid range {self.MIN_BC}-{self.MAX_BC} ng/m³'
97
+ ),
98
+ QCRule(
99
+ name='Insufficient',
100
+ condition=lambda df: self.QC_control().hourly_completeness_QC(
101
+ df[self.BC_COLUMNS], freq=self.meta['freq']
102
+ ),
103
+ description='Less than 50% hourly data completeness'
104
+ ),
105
+ ])
106
+
107
+ # Apply all QC rules and get flagged DataFrame
108
+ df_qc = qc.apply(df_qc)
109
+
110
+ # Store QC summary for combined output in _process()
111
+ self._qc_summary = qc.get_summary(df_qc)
112
+
113
+ return df_qc.reindex(_index)
114
+
115
+ def _process(self, _df):
116
+ """
117
+ Calculate absorption coefficients and validate derived parameters.
118
+
119
+ Processing Steps
120
+ ----------------
121
+ 1. Calculate absorption coefficients at each wavelength
122
+ 2. Calculate AAE (Absorption Ångström Exponent)
123
+ 3. Calculate eBC (equivalent Black Carbon)
124
+ 4. Validate AAE range and update QC_Flag
125
+
126
+ Parameters
127
+ ----------
128
+ _df : pd.DataFrame
129
+ Quality-controlled DataFrame with BC columns and QC_Flag
130
+
131
+ Returns
132
+ -------
133
+ pd.DataFrame
134
+ DataFrame with absorption coefficients, AAE, eBC, and updated QC_Flag
135
+ """
136
+ _index = _df.index.copy()
137
+
138
+ # Calculate absorption coefficients, AAE, and eBC
139
+ _df_cal = _absCoe(_df[self.BC_COLUMNS], instru=self.nam, specified_band=[550])
140
+
141
+ # Combine with Status, BB(%), and QC_Flag
142
+ extra_cols = ['Status', 'QC_Flag']
143
+ if self.BB_COLUMN in _df.columns:
144
+ extra_cols.insert(0, self.BB_COLUMN)
145
+ df_out = concat([_df_cal, _df[extra_cols]], axis=1)
146
+
147
+ # Validate AAE and update QC_Flag
148
+ # AAE is stored as negative value, so we check -AAE
149
+ invalid_aae = (-df_out['AAE'] < self.MIN_AAE) | (-df_out['AAE'] > self.MAX_AAE)
150
+ df_out = self.update_qc_flag(df_out, invalid_aae, 'Invalid AAE')
151
+
152
+ # Log combined QC summary with calculated info
153
+ if hasattr(self, '_qc_summary') and self._qc_summary is not None:
154
+ import pandas as pd
155
+ # Add Invalid AAE row before Valid row
156
+ total = len(df_out)
157
+ invalid_aae_row = pd.DataFrame([{
158
+ 'Rule': 'Invalid AAE',
159
+ 'Count': invalid_aae.sum(),
160
+ 'Percentage': f'{invalid_aae.sum() / total * 100:.1f}%',
161
+ 'Description': f'AAE outside valid range {self.MIN_AAE}-{self.MAX_AAE}'
162
+ }])
163
+ # Insert before Valid row (last row)
164
+ summary = pd.concat([self._qc_summary.iloc[:-1], invalid_aae_row, self._qc_summary.iloc[-1:]], ignore_index=True)
165
+ self.logger.info(f"{self.nam} QC Summary:")
166
+ for _, row in summary.iterrows():
167
+ self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
168
+
169
+ # Reorder columns
170
+ all_data_cols = self.BC_COLUMNS + self.ABS_COLUMNS + self.CAL_COLUMNS
171
+ if self.BB_COLUMN in df_out.columns:
172
+ all_data_cols.append(self.BB_COLUMN)
173
+ return df_out[all_data_cols + ['QC_Flag']].reindex(_index)
@@ -0,0 +1,151 @@
1
+ from pandas import read_csv, to_numeric, concat
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
4
+ from AeroViz.rawDataReader.core.pre_process import _absCoe
5
+
6
+
7
+ class Reader(AbstractReader):
8
+ """AE43 Aethalometer Data Reader
9
+
10
+ A specialized reader for AE43 Aethalometer data files, which measure black carbon
11
+ concentrations at seven wavelengths.
12
+
13
+ See full documentation at docs/source/instruments/AE43.md for detailed information
14
+ on supported formats and QC procedures.
15
+ """
16
+ nam = 'AE43'
17
+
18
+ # =========================================================================
19
+ # Column Definitions
20
+ # =========================================================================
21
+ BC_COLUMNS = ['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']
22
+ ABS_COLUMNS = ['abs_370', 'abs_470', 'abs_520', 'abs_590', 'abs_660', 'abs_880', 'abs_950']
23
+ CAL_COLUMNS = ['abs_550', 'AAE', 'eBC']
24
+
25
+ # =========================================================================
26
+ # QC Thresholds
27
+ # =========================================================================
28
+ MIN_BC = 0 # Minimum BC concentration (ng/m³)
29
+ MAX_BC = 20000 # Maximum BC concentration (ng/m³)
30
+ MIN_AAE = 0.7 # Minimum valid AAE (absolute value)
31
+ MAX_AAE = 2.0 # Maximum valid AAE (absolute value)
32
+
33
+ # =========================================================================
34
+ # Status Error Codes (bitwise flags) - Same as AE33
35
+ # =========================================================================
36
+ ERROR_STATES = [
37
+ 1, # Tape advance (tape advance, fast calibration, warm-up)
38
+ 2, # First measurement – obtaining ATN0
39
+ 3, # Stopped
40
+ 4, # Flow low/high by more than 0.5 LPM
41
+ 16, # Calibrating LED
42
+ 32, # Calibration error (at least one channel OK)
43
+ 384, # Tape error (tape not moving, end of tape)
44
+ 1024, # Stability test
45
+ 2048, # Clean air test
46
+ 4096, # Optical test
47
+ ]
48
+
49
+ def _raw_reader(self, file):
50
+ """Read and parse raw AE43 Aethalometer data files."""
51
+ _df = read_csv(file, parse_dates={'time': ['StartTime']}, index_col='time')
52
+ _df_id = _df['SetupID'].iloc[-1]
53
+
54
+ # Get last SetupID data (including Status column)
55
+ _df = _df.groupby('SetupID').get_group(_df_id)[self.BC_COLUMNS + ['Status']].copy()
56
+ _df = _df.apply(to_numeric, errors='coerce')
57
+
58
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
59
+
60
+ def _QC(self, _df):
61
+ """
62
+ Perform quality control on AE43 Aethalometer raw data.
63
+
64
+ QC Rules Applied (raw data only)
65
+ ---------------------------------
66
+ 1. Status Error : Invalid instrument status codes
67
+ 2. Invalid BC : BC concentration outside 0-20000 ng/m³
68
+ 3. Insufficient : Less than 50% hourly data completeness
69
+
70
+ Note: AAE validation is done in _process() after calculation.
71
+ """
72
+ _index = _df.index.copy()
73
+ df_qc = _df.copy()
74
+
75
+ # Build QC rules declaratively
76
+ qc = QCFlagBuilder()
77
+ qc.add_rules([
78
+ QCRule(
79
+ name='Status Error',
80
+ condition=lambda df: self.QC_control().filter_error_status(df, self.ERROR_STATES),
81
+ description='Invalid instrument status code detected'
82
+ ),
83
+ QCRule(
84
+ name='Invalid BC',
85
+ condition=lambda df: ((df[self.BC_COLUMNS] <= self.MIN_BC) |
86
+ (df[self.BC_COLUMNS] > self.MAX_BC)).any(axis=1),
87
+ description=f'BC concentration outside valid range {self.MIN_BC}-{self.MAX_BC} ng/m³'
88
+ ),
89
+ QCRule(
90
+ name='Insufficient',
91
+ condition=lambda df: self.QC_control().hourly_completeness_QC(
92
+ df[self.BC_COLUMNS], freq=self.meta['freq']
93
+ ),
94
+ description='Less than 50% hourly data completeness'
95
+ ),
96
+ ])
97
+
98
+ # Apply all QC rules and get flagged DataFrame
99
+ df_qc = qc.apply(df_qc)
100
+
101
+ # Store QC summary for combined output in _process()
102
+ self._qc_summary = qc.get_summary(df_qc)
103
+
104
+ return df_qc.reindex(_index)
105
+
106
+ def _process(self, _df):
107
+ """
108
+ Calculate absorption coefficients and validate derived parameters.
109
+
110
+ Processing Steps
111
+ ----------------
112
+ 1. Calculate absorption coefficients at each wavelength
113
+ 2. Calculate AAE (Absorption Ångström Exponent)
114
+ 3. Calculate eBC (equivalent Black Carbon)
115
+ 4. Validate AAE range and update QC_Flag
116
+
117
+ Note: AE43 uses AE33 coefficients for absorption calculation.
118
+ """
119
+ _index = _df.index.copy()
120
+
121
+ # Calculate absorption coefficients, AAE, and eBC
122
+ # Note: AE43 uses AE33 coefficients
123
+ _df_cal = _absCoe(_df[self.BC_COLUMNS], instru='AE33', specified_band=[550])
124
+
125
+ # Combine with Status and QC_Flag
126
+ df_out = concat([_df_cal, _df[['Status', 'QC_Flag']]], axis=1)
127
+
128
+ # Validate AAE and update QC_Flag
129
+ invalid_aae = (-df_out['AAE'] < self.MIN_AAE) | (-df_out['AAE'] > self.MAX_AAE)
130
+ df_out = self.update_qc_flag(df_out, invalid_aae, 'Invalid AAE')
131
+
132
+ # Log combined QC summary with calculated info
133
+ if hasattr(self, '_qc_summary') and self._qc_summary is not None:
134
+ import pandas as pd
135
+ # Add Invalid AAE row before Valid row
136
+ total = len(df_out)
137
+ invalid_aae_row = pd.DataFrame([{
138
+ 'Rule': 'Invalid AAE',
139
+ 'Count': invalid_aae.sum(),
140
+ 'Percentage': f'{invalid_aae.sum() / total * 100:.1f}%',
141
+ 'Description': f'AAE outside valid range {self.MIN_AAE}-{self.MAX_AAE}'
142
+ }])
143
+ # Insert before Valid row (last row)
144
+ summary = pd.concat([self._qc_summary.iloc[:-1], invalid_aae_row, self._qc_summary.iloc[-1:]], ignore_index=True)
145
+ self.logger.info(f"{self.nam} QC Summary:")
146
+ for _, row in summary.iterrows():
147
+ self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
148
+
149
+ # Reorder columns
150
+ all_data_cols = self.BC_COLUMNS + self.ABS_COLUMNS + self.CAL_COLUMNS
151
+ return df_out[all_data_cols + ['QC_Flag']].reindex(_index)
@@ -0,0 +1,339 @@
1
+ import numpy as np
2
+ from pandas import to_datetime, read_table, Series, DataFrame, concat
3
+
4
+ from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
5
+
6
+
7
+ class Reader(AbstractReader):
8
+ """APS (Aerodynamic Particle Sizer) Data Reader
9
+
10
+ A specialized reader for APS data files, which measure particle size distributions
11
+ in the range of 542-1981 nm (aerodynamic diameter).
12
+
13
+ See full documentation at docs/source/instruments/APS.md for detailed information
14
+ on supported formats and QC procedures.
15
+ """
16
+ nam = 'APS'
17
+
18
+ # =========================================================================
19
+ # QC Thresholds
20
+ # =========================================================================
21
+ MIN_HOURLY_COUNT = 5 # Minimum measurements per hour
22
+ MIN_TOTAL_CONC = 1 # Minimum total concentration (#/cm³)
23
+ MAX_TOTAL_CONC = 700 # Maximum total concentration (#/cm³)
24
+
25
+ # Status Flags column name
26
+ STATUS_COLUMN = 'Status Flags'
27
+ # All zeros status means no error
28
+ STATUS_OK = '0000 0000 0000 0000'
29
+
30
+ # APS Status Flag bit definitions (from TSI RF command)
31
+ # Format: bit_position: description
32
+ ERROR_STATES = {
33
+ 0: 'Laser fault',
34
+ 1: 'Total Flow out of range',
35
+ 2: 'Sheath Flow out of range',
36
+ 3: 'Excessive sample concentration',
37
+ 4: 'Accumulator clipped',
38
+ 5: 'Autocal failed',
39
+ 6: 'Internal temperature < 10°C',
40
+ 7: 'Internal temperature > 40°C',
41
+ 8: 'Detector voltage out of range',
42
+ # 9: Reserved (unused)
43
+ }
44
+
45
+ def __init__(self, *args, **kwargs):
46
+ super().__init__(*args, **kwargs)
47
+ self._distributions = None # Store distributions for separate file output
48
+
49
+ def __call__(self, start, end, mean_freq='1h'):
50
+ """
51
+ Process APS data and save size distributions to separate files.
52
+
53
+ Overrides AbstractReader.__call__ to add distribution file saving
54
+ and filter out size bins from main output.
55
+
56
+ Parameters
57
+ ----------
58
+ start : datetime
59
+ Start time for data processing
60
+ end : datetime
61
+ End time for data processing
62
+ mean_freq : str, default='1h'
63
+ Frequency for resampling the data
64
+
65
+ Returns
66
+ -------
67
+ pd.DataFrame
68
+ Processed and resampled data (statistics only, no size bins)
69
+ """
70
+ # Call parent __call__ for standard processing
71
+ result = super().__call__(start, end, mean_freq)
72
+
73
+ # Save distributions to separate files
74
+ self._save_distributions(mean_freq)
75
+
76
+ # Filter out size bins from main output, keep only statistics
77
+ stat_cols = [col for col in result.columns if not isinstance(col, (int, float))]
78
+ result_stats = result[stat_cols]
79
+
80
+ # Re-save filtered output to CSV
81
+ result_stats.to_csv(self.csv_out)
82
+
83
+ return result_stats
84
+
85
+ def _raw_reader(self, file):
86
+ """Read and parse raw APS data files.
87
+
88
+ Handles files with multiple concatenated headers (when multiple APS export
89
+ files are merged into one). Header rows are identified and filtered out.
90
+ """
91
+ with open(file, 'r', encoding='utf-8', errors='ignore') as f:
92
+ try:
93
+ # Try normal reading first
94
+ _df_full = read_table(f, skiprows=6, parse_dates={'Time': ['Date', 'Start Time']},
95
+ date_format='%m/%d/%y %H:%M:%S', low_memory=False).set_index('Time')
96
+ except:
97
+ # File is transposed, re-read
98
+ f.seek(0)
99
+ raw_df = read_table(f, skiprows=6, low_memory=False, index_col='Sample #')
100
+ _df_full = raw_df.T
101
+ _df_full.columns.name = None
102
+
103
+ if 'Date' in _df_full.columns and 'Start Time' in _df_full.columns:
104
+ datetime_str = _df_full['Date'] + ' ' + _df_full['Start Time']
105
+ df_idx = to_datetime(datetime_str, format='%m/%d/%y %H:%M:%S', errors='coerce')
106
+ _df_full.index = df_idx
107
+ _df_full.index.name = 'Time'
108
+ _df_full.drop('Date', axis=1, inplace=True)
109
+
110
+ # Index is already datetime from try/except block above
111
+ # Filter out invalid timestamps (NaT from embedded headers)
112
+ _df_full = _df_full.loc[_df_full.index.notna()]
113
+ # Remove duplicate indices (keep first occurrence)
114
+ dup_mask = _df_full.index.duplicated(keep=False)
115
+ if dup_mask.any():
116
+ print(f"File: {file.name} - Duplicated indices: {_df_full.index[dup_mask].unique().tolist()}")
117
+ _df_full = _df_full[~_df_full.index.duplicated(keep='first')]
118
+
119
+ # Now extract size bins (542 nm ~ 1981 nm, columns 3 to 54)
120
+ _df = _df_full.iloc[:, 3:54].rename(columns=lambda x: round(float(x), 4))
121
+
122
+ # Include Status Flags column in _df (will be processed by core together)
123
+ if self.STATUS_COLUMN in _df_full.columns:
124
+ _df[self.STATUS_COLUMN] = _df_full[self.STATUS_COLUMN].astype(str).str.strip()
125
+
126
+ return _df
127
+
128
+ def _QC(self, _df):
129
+ """
130
+ Perform quality control on APS data.
131
+
132
+ QC Rules Applied
133
+ ----------------
134
+ 1. Status Error : Non-zero status flags indicate instrument error
135
+ 2. Insufficient : Less than 5 measurements per hour
136
+ 3. Invalid Number Conc : Total number concentration outside valid range (1-700 #/cm³)
137
+ """
138
+ _df = _df.copy()
139
+ _index = _df.index.copy()
140
+
141
+ # Filter to numeric columns only (exclude Status Flags)
142
+ numeric_cols = [col for col in _df.columns if isinstance(col, (int, float))]
143
+ df_numeric = _df[numeric_cols]
144
+
145
+ # Calculate total concentration
146
+ dlogDp = np.diff(np.log(df_numeric.columns.to_numpy(float))).mean()
147
+ total_conc = df_numeric.sum(axis=1, min_count=1) * dlogDp
148
+
149
+ # Calculate hourly data counts
150
+ hourly_counts = (total_conc
151
+ .dropna()
152
+ .resample('h')
153
+ .size()
154
+ .resample('6min')
155
+ .ffill()
156
+ .reindex(df_numeric.index, method='ffill', tolerance='6min'))
157
+
158
+ # Build QC rules declaratively
159
+ qc = QCFlagBuilder()
160
+
161
+ qc.add_rules([
162
+ QCRule(
163
+ name='Status Error',
164
+ condition=lambda df: self.QC_control().filter_error_status(
165
+ _df, status_column=self.STATUS_COLUMN, status_type='binary_string'
166
+ ),
167
+ description='Non-zero status flags indicate instrument error'
168
+ ),
169
+ QCRule(
170
+ name='Insufficient',
171
+ condition=lambda df: Series(hourly_counts < self.MIN_HOURLY_COUNT, index=df.index).fillna(True),
172
+ description=f'Less than {self.MIN_HOURLY_COUNT} measurements per hour'
173
+ ),
174
+ QCRule(
175
+ name='Invalid Number Conc',
176
+ condition=lambda df, tc=total_conc: Series(
177
+ (tc < self.MIN_TOTAL_CONC) | (tc > self.MAX_TOTAL_CONC),
178
+ index=df.index
179
+ ).fillna(True),
180
+ description=f'Total number concentration outside valid range ({self.MIN_TOTAL_CONC}-{self.MAX_TOTAL_CONC} #/cm³)'
181
+ ),
182
+ ])
183
+
184
+ # Apply all QC rules
185
+ df_qc = qc.apply(_df)
186
+
187
+ # Store QC summary for combined output in _process()
188
+ self._qc_summary = qc.get_summary(df_qc)
189
+
190
+ return df_qc.reindex(_index)
191
+
192
+ def _process(self, _df):
193
+ """
194
+ Calculate size distribution statistics from QC'd APS data.
195
+
196
+ Processing Steps
197
+ ----------------
198
+ 1. Calculate dlogDp from bin diameters
199
+ 2. Calculate number, surface, volume distributions (all in dX/dlogDp)
200
+ 3. Calculate total, GMD, GSD, mode for each weighting
201
+ 4. Calculate totals for size cutoffs: 1μm, 2.5μm, all
202
+ 5. Store distributions for separate file output
203
+
204
+ Size Cutoffs (APS range: 0.542-19.81 μm)
205
+ -----------------------------------------
206
+ - 1μm: particles smaller than 1 μm
207
+ - 2.5μm: particles smaller than 2.5 μm
208
+ - all: full size range
209
+
210
+ Parameters
211
+ ----------
212
+ _df : pd.DataFrame
213
+ Quality-controlled DataFrame with size bin columns and QC_Flag
214
+
215
+ Returns
216
+ -------
217
+ pd.DataFrame
218
+ Original size bins (dN/dlogDp) + calculated statistics + QC_Flag
219
+ """
220
+ _index = _df.index.copy()
221
+
222
+ # Separate QC_Flag from size bins
223
+ qc_flag = _df['QC_Flag'].copy() if 'QC_Flag' in _df.columns else Series('Valid', index=_df.index)
224
+
225
+ # Get numeric columns (size bins)
226
+ bin_cols = [col for col in _df.columns if isinstance(col, (int, float))]
227
+ df_bins = _df[bin_cols].copy() # This is dN/dlogDp
228
+ dp = np.array(bin_cols, dtype=float) # in μm
229
+
230
+ # Input is already dN/dlogDp, calculate dS/dlogDp and dV/dlogDp
231
+ dN_dlogDp = df_bins.copy()
232
+ dS_dlogDp = dN_dlogDp * np.pi * dp ** 2 # Surface area distribution (μm²·cm⁻³)
233
+ dV_dlogDp = dN_dlogDp * np.pi * (dp ** 3) / 6 # Volume distribution (μm³·cm⁻³)
234
+
235
+ # Store distributions for separate file output (with QC_Flag)
236
+ self._distributions = {
237
+ 'dNdlogDp': concat([dN_dlogDp, qc_flag], axis=1),
238
+ 'dSdlogDp': concat([dS_dlogDp, qc_flag], axis=1),
239
+ 'dVdlogDp': concat([dV_dlogDp, qc_flag], axis=1),
240
+ }
241
+
242
+ # For statistics calculation, convert to absolute values (dX = dX/dlogDp * dlogDp)
243
+ dlogDp = np.diff(np.log10(dp))
244
+ dlogDp = np.append(dlogDp, dlogDp[-1]) # Extend to match length
245
+ dN = dN_dlogDp * dlogDp # Number concentration
246
+ dS = dS_dlogDp * dlogDp # Surface area
247
+ dV = dV_dlogDp * dlogDp # Volume
248
+
249
+ # Calculate statistics
250
+ stats = DataFrame(index=_df.index)
251
+
252
+ # Size cutoffs in μm (APS bins are in μm)
253
+ SIZE_CUTOFFS = {
254
+ '1um': 1.0, # 1 μm
255
+ '2.5um': 2.5, # 2.5 μm
256
+ 'all': np.inf # All particles
257
+ }
258
+
259
+ # Calculate for each weighting type and size cutoff
260
+ for weight_name, dist in [('num', dN), ('surf', dS), ('vol', dV)]:
261
+ for cutoff_name, cutoff_um in SIZE_CUTOFFS.items():
262
+ # Filter bins for this cutoff
263
+ mask_bins = dp < cutoff_um
264
+ if not mask_bins.any():
265
+ continue
266
+
267
+ dp_cut = dp[mask_bins]
268
+ dist_cut = dist.iloc[:, mask_bins]
269
+
270
+ # Calculate total
271
+ total = dist_cut.sum(axis=1, min_count=1)
272
+ stats[f'total_{weight_name}_{cutoff_name}'] = total
273
+
274
+ # Calculate GMD and GSD only for 'all' cutoff
275
+ if cutoff_name == 'all':
276
+ total_valid = total.where(total > 0)
277
+
278
+ # GMD calculation (in log space)
279
+ log_dp = np.log(dp_cut)
280
+ gmd_log = (dist_cut * log_dp).sum(axis=1) / total_valid
281
+
282
+ # GSD calculation
283
+ dp_mesh, gmd_mesh = np.meshgrid(log_dp, gmd_log)
284
+ gsd_log = np.sqrt(((dp_mesh - gmd_mesh) ** 2 * dist_cut.values).sum(axis=1) / total_valid)
285
+
286
+ stats[f'GMD_{weight_name}'] = np.exp(gmd_log)
287
+ stats[f'GSD_{weight_name}'] = np.exp(gsd_log)
288
+
289
+ # Calculate mode (diameter with maximum concentration)
290
+ mask = dist_cut.notna().any(axis=1)
291
+ stats.loc[mask, f'mode_{weight_name}'] = dist_cut.loc[mask].idxmax(axis=1)
292
+
293
+ # Combine: size bins + statistics + QC_Flag
294
+ # (bins are kept for rate calculation, filtered out when saving to CSV)
295
+ df_out = concat([df_bins, stats, qc_flag], axis=1)
296
+
297
+ # Log QC summary
298
+ if hasattr(self, '_qc_summary') and self._qc_summary is not None:
299
+ self.logger.info(f"{self.nam} QC Summary:")
300
+ for _, row in self._qc_summary.iterrows():
301
+ self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
302
+
303
+ return df_out.reindex(_index)
304
+
305
+ def _save_distributions(self, mean_freq: str = '1h') -> None:
306
+ """
307
+ Save size distributions to separate CSV files.
308
+
309
+ Output Files
310
+ ------------
311
+ - output_aps_dNdlogDp.csv : Number distribution (dN/dlogDp)
312
+ - output_aps_dSdlogDp.csv : Surface distribution (dS/dlogDp)
313
+ - output_aps_dVdlogDp.csv : Volume distribution (dV/dlogDp)
314
+
315
+ Parameters
316
+ ----------
317
+ mean_freq : str, default='1h'
318
+ Frequency for resampling the data
319
+ """
320
+ if not hasattr(self, '_distributions') or self._distributions is None:
321
+ self.logger.warning("No distributions to save. Run _process() first.")
322
+ return
323
+
324
+ output_folder = self.csv_out.parent
325
+ self.logger.info("")
326
+
327
+ for dist_name, dist_df in self._distributions.items():
328
+ # Process QC_Flag: set invalid rows to NaN
329
+ if 'QC_Flag' in dist_df.columns:
330
+ invalid_mask = dist_df['QC_Flag'] != 'Valid'
331
+ numeric_cols = [c for c in dist_df.columns if c != 'QC_Flag']
332
+ dist_df.loc[invalid_mask, numeric_cols] = np.nan
333
+ dist_df = dist_df.drop(columns=['QC_Flag'])
334
+
335
+ # Resample and save
336
+ dist_resampled = dist_df.resample(mean_freq).mean().round(4)
337
+ output_path = output_folder / f'output_{self.nam.lower()}_{dist_name}.csv'
338
+ dist_resampled.to_csv(output_path)
339
+ self.logger.info(f"Saved: {output_path.name}")