AeroViz 0.1.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. AeroViz/__init__.py +13 -0
  2. AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
  3. AeroViz/data/DEFAULT_DATA.csv +1417 -0
  4. AeroViz/data/DEFAULT_PNSD_DATA.csv +1417 -0
  5. AeroViz/data/hysplit_example_data.txt +101 -0
  6. AeroViz/dataProcess/Chemistry/__init__.py +149 -0
  7. AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
  8. AeroViz/dataProcess/Chemistry/_calculate.py +557 -0
  9. AeroViz/dataProcess/Chemistry/_isoropia.py +150 -0
  10. AeroViz/dataProcess/Chemistry/_mass_volume.py +487 -0
  11. AeroViz/dataProcess/Chemistry/_ocec.py +172 -0
  12. AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
  13. AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
  14. AeroViz/dataProcess/Optical/PyMieScatt_update.py +577 -0
  15. AeroViz/dataProcess/Optical/_IMPROVE.py +452 -0
  16. AeroViz/dataProcess/Optical/__init__.py +281 -0
  17. AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
  18. AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
  19. AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
  20. AeroViz/dataProcess/Optical/_derived.py +518 -0
  21. AeroViz/dataProcess/Optical/_extinction.py +123 -0
  22. AeroViz/dataProcess/Optical/_mie_sd.py +912 -0
  23. AeroViz/dataProcess/Optical/_retrieve_RI.py +243 -0
  24. AeroViz/dataProcess/Optical/coefficient.py +72 -0
  25. AeroViz/dataProcess/Optical/fRH.pkl +0 -0
  26. AeroViz/dataProcess/Optical/mie_theory.py +260 -0
  27. AeroViz/dataProcess/README.md +271 -0
  28. AeroViz/dataProcess/SizeDistr/__init__.py +245 -0
  29. AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
  30. AeroViz/dataProcess/SizeDistr/__pycache__/_size_dist.cpython-312.pyc +0 -0
  31. AeroViz/dataProcess/SizeDistr/_size_dist.py +810 -0
  32. AeroViz/dataProcess/SizeDistr/merge/README.md +93 -0
  33. AeroViz/dataProcess/SizeDistr/merge/__init__.py +20 -0
  34. AeroViz/dataProcess/SizeDistr/merge/_merge_v0.py +251 -0
  35. AeroViz/dataProcess/SizeDistr/merge/_merge_v0_1.py +246 -0
  36. AeroViz/dataProcess/SizeDistr/merge/_merge_v1.py +255 -0
  37. AeroViz/dataProcess/SizeDistr/merge/_merge_v2.py +244 -0
  38. AeroViz/dataProcess/SizeDistr/merge/_merge_v3.py +518 -0
  39. AeroViz/dataProcess/SizeDistr/merge/_merge_v4.py +422 -0
  40. AeroViz/dataProcess/SizeDistr/prop.py +62 -0
  41. AeroViz/dataProcess/VOC/__init__.py +14 -0
  42. AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
  43. AeroViz/dataProcess/VOC/_potential_par.py +108 -0
  44. AeroViz/dataProcess/VOC/support_voc.json +446 -0
  45. AeroViz/dataProcess/__init__.py +66 -0
  46. AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
  47. AeroViz/dataProcess/core/__init__.py +272 -0
  48. AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
  49. AeroViz/mcp_server.py +352 -0
  50. AeroViz/plot/__init__.py +13 -0
  51. AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
  52. AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
  53. AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
  54. AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
  55. AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
  56. AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
  57. AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
  58. AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
  59. AeroViz/plot/bar.py +126 -0
  60. AeroViz/plot/box.py +69 -0
  61. AeroViz/plot/distribution/__init__.py +1 -0
  62. AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
  63. AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
  64. AeroViz/plot/distribution/distribution.py +576 -0
  65. AeroViz/plot/meteorology/CBPF.py +295 -0
  66. AeroViz/plot/meteorology/__init__.py +3 -0
  67. AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
  68. AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
  69. AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
  70. AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
  71. AeroViz/plot/meteorology/hysplit.py +93 -0
  72. AeroViz/plot/meteorology/wind_rose.py +77 -0
  73. AeroViz/plot/optical/__init__.py +1 -0
  74. AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
  75. AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
  76. AeroViz/plot/optical/optical.py +388 -0
  77. AeroViz/plot/pie.py +210 -0
  78. AeroViz/plot/radar.py +184 -0
  79. AeroViz/plot/regression.py +200 -0
  80. AeroViz/plot/scatter.py +174 -0
  81. AeroViz/plot/templates/__init__.py +6 -0
  82. AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
  83. AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
  84. AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
  85. AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
  86. AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
  87. AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
  88. AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
  89. AeroViz/plot/templates/ammonium_rich.py +34 -0
  90. AeroViz/plot/templates/contour.py +47 -0
  91. AeroViz/plot/templates/corr_matrix.py +267 -0
  92. AeroViz/plot/templates/diurnal_pattern.py +61 -0
  93. AeroViz/plot/templates/koschmieder.py +95 -0
  94. AeroViz/plot/templates/metal_heatmap.py +164 -0
  95. AeroViz/plot/timeseries/__init__.py +2 -0
  96. AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
  97. AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
  98. AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
  99. AeroViz/plot/timeseries/template.py +47 -0
  100. AeroViz/plot/timeseries/timeseries.py +446 -0
  101. AeroViz/plot/utils/__init__.py +4 -0
  102. AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  103. AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
  104. AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
  105. AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
  106. AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
  107. AeroViz/plot/utils/_color.py +71 -0
  108. AeroViz/plot/utils/_unit.py +55 -0
  109. AeroViz/plot/utils/fRH.json +390 -0
  110. AeroViz/plot/utils/plt_utils.py +92 -0
  111. AeroViz/plot/utils/sklearn_utils.py +49 -0
  112. AeroViz/plot/utils/units.json +89 -0
  113. AeroViz/plot/violin.py +80 -0
  114. AeroViz/rawDataReader/FLOW.md +138 -0
  115. AeroViz/rawDataReader/__init__.py +220 -0
  116. AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
  117. AeroViz/rawDataReader/config/__init__.py +0 -0
  118. AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
  119. AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
  120. AeroViz/rawDataReader/config/supported_instruments.py +135 -0
  121. AeroViz/rawDataReader/core/__init__.py +658 -0
  122. AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
  123. AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
  124. AeroViz/rawDataReader/core/__pycache__/pre_process.cpython-312.pyc +0 -0
  125. AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
  126. AeroViz/rawDataReader/core/__pycache__/report.cpython-312.pyc +0 -0
  127. AeroViz/rawDataReader/core/logger.py +171 -0
  128. AeroViz/rawDataReader/core/pre_process.py +308 -0
  129. AeroViz/rawDataReader/core/qc.py +961 -0
  130. AeroViz/rawDataReader/core/report.py +579 -0
  131. AeroViz/rawDataReader/script/AE33.py +173 -0
  132. AeroViz/rawDataReader/script/AE43.py +151 -0
  133. AeroViz/rawDataReader/script/APS.py +339 -0
  134. AeroViz/rawDataReader/script/Aurora.py +191 -0
  135. AeroViz/rawDataReader/script/BAM1020.py +90 -0
  136. AeroViz/rawDataReader/script/BC1054.py +161 -0
  137. AeroViz/rawDataReader/script/EPA.py +79 -0
  138. AeroViz/rawDataReader/script/GRIMM.py +68 -0
  139. AeroViz/rawDataReader/script/IGAC.py +140 -0
  140. AeroViz/rawDataReader/script/MA350.py +179 -0
  141. AeroViz/rawDataReader/script/Minion.py +218 -0
  142. AeroViz/rawDataReader/script/NEPH.py +199 -0
  143. AeroViz/rawDataReader/script/OCEC.py +173 -0
  144. AeroViz/rawDataReader/script/Q-ACSM.py +12 -0
  145. AeroViz/rawDataReader/script/SMPS.py +389 -0
  146. AeroViz/rawDataReader/script/TEOM.py +181 -0
  147. AeroViz/rawDataReader/script/VOC.py +106 -0
  148. AeroViz/rawDataReader/script/Xact.py +244 -0
  149. AeroViz/rawDataReader/script/__init__.py +28 -0
  150. AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
  151. AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
  152. AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
  153. AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
  154. AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
  155. AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
  156. AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
  157. AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
  158. AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
  159. AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
  160. AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
  161. AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
  162. AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
  163. AeroViz/rawDataReader/script/__pycache__/Q-ACSM.cpython-312.pyc +0 -0
  164. AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
  165. AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
  166. AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
  167. AeroViz/rawDataReader/script/__pycache__/Xact.cpython-312.pyc +0 -0
  168. AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
  169. AeroViz/tools/__init__.py +2 -0
  170. AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  171. AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
  172. AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
  173. AeroViz/tools/database.py +95 -0
  174. AeroViz/tools/dataclassifier.py +117 -0
  175. AeroViz/tools/dataprinter.py +58 -0
  176. aeroviz-0.1.21.dist-info/METADATA +294 -0
  177. aeroviz-0.1.21.dist-info/RECORD +180 -0
  178. aeroviz-0.1.21.dist-info/WHEEL +5 -0
  179. aeroviz-0.1.21.dist-info/licenses/LICENSE +21 -0
  180. aeroviz-0.1.21.dist-info/top_level.txt +1 -0
@@ -0,0 +1,191 @@
1
+ import pandas as pd
2
+ from pandas import Series, concat
3
+
4
+ from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
5
+ from AeroViz.rawDataReader.core.pre_process import _scaCoe
6
+
7
+
8
+ class Reader(AbstractReader):
9
+ """Aurora Integrating Nephelometer Data Reader
10
+
11
+ A specialized reader for Aurora nephelometer data files, which measure aerosol light
12
+ scattering properties at multiple wavelengths.
13
+
14
+ See full documentation at docs/source/instruments/Aurora.md for detailed information
15
+ on supported formats and QC procedures.
16
+ """
17
+ nam = 'Aurora'
18
+
19
+ # =========================================================================
20
+ # Column Definitions
21
+ # =========================================================================
22
+ SCAT_COLUMNS = ['B', 'G', 'R', 'BB', 'BG', 'BR']
23
+ CAL_COLUMNS = ['sca_550', 'SAE']
24
+
25
+ # =========================================================================
26
+ # QC Thresholds
27
+ # =========================================================================
28
+ MIN_SCAT_VALUE = 0 # Minimum scattering coefficient (Mm^-1)
29
+ MAX_SCAT_VALUE = 2000 # Maximum scattering coefficient (Mm^-1)
30
+
31
+ # Status Flag
32
+ STATUS_COLUMN = 'Status' # Common status column names to check
33
+ STATUS_COLUMNS = ['Status', 'status', 'Error', 'error', 'Flag', 'flag']
34
+ STATUS_OK = 0 # Status code 0 means normal operation
35
+
36
+ def __init__(self, *args, **kwargs):
37
+ super().__init__(*args, **kwargs)
38
+
39
+ def _raw_reader(self, file):
40
+ """
41
+ Read and parse raw Aurora nephelometer data files.
42
+
43
+ Parameters
44
+ ----------
45
+ file : Path or str
46
+ Path to the Aurora data file.
47
+
48
+ Returns
49
+ -------
50
+ pandas.DataFrame
51
+ Processed Aurora data with datetime index and standardized
52
+ scattering coefficient columns.
53
+ """
54
+ _df = pd.read_csv(file, low_memory=False, index_col=0)
55
+
56
+ _df.index = pd.to_datetime(_df.index, errors='coerce')
57
+ _df.index.name = 'time'
58
+
59
+ _df.columns = _df.keys().str.strip(' ')
60
+
61
+ # consider another csv format
62
+ _df = _df.rename(columns={
63
+ '0°σspB': 'B',
64
+ '0°σspG': 'G',
65
+ '0°σspR': 'R',
66
+ '90°σspB': 'BB',
67
+ '90°σspG': 'BG',
68
+ '90°σspR': 'BR',
69
+ 'Blue': 'B',
70
+ 'Green': 'G',
71
+ 'Red': 'R',
72
+ 'B_Blue': 'BB',
73
+ 'B_Green': 'BG',
74
+ 'B_Red': 'BR',
75
+ })
76
+
77
+ # Check for status column (try multiple common names)
78
+ status_col_name = None
79
+ for col_name in self.STATUS_COLUMNS:
80
+ if col_name in _df.columns:
81
+ status_col_name = col_name
82
+ break
83
+
84
+ _df_out = _df[['B', 'G', 'R', 'BB', 'BG', 'BR']].apply(pd.to_numeric, errors='coerce')
85
+
86
+ # Include status column in _df (will be processed by core together)
87
+ if status_col_name is not None:
88
+ _df_out[self.STATUS_COLUMN] = pd.to_numeric(_df[status_col_name], errors='coerce').astype('Int64')
89
+
90
+ _df_out = _df_out.loc[~_df_out.index.duplicated() & _df_out.index.notna()]
91
+
92
+ return _df_out
93
+
94
+ def _QC(self, _df):
95
+ """
96
+ Perform quality control on Aurora nephelometer raw data.
97
+
98
+ QC Rules Applied (raw data only)
99
+ ---------------------------------
100
+ 1. Status Error : Non-zero status code indicates instrument error
101
+ 2. No Data : All scattering columns are NaN
102
+ 3. Invalid Scat Value: Scattering coefficient outside 0-2000 Mm^-1
103
+ 4. Invalid Scat Rel. : Wavelength dependence violation (B < G < R)
104
+ 5. Insufficient : Less than 50% hourly data completeness
105
+
106
+ Note: SAE calculation is done in _process() after QC.
107
+ """
108
+ _index = _df.index.copy()
109
+ df_qc = _df.copy()
110
+
111
+ # Identify rows with all data missing (handled separately)
112
+ all_missing_mask = df_qc[self.SCAT_COLUMNS].isna().all(axis=1)
113
+
114
+ # Build QC rules declaratively
115
+ qc = QCFlagBuilder()
116
+
117
+ qc.add_rules([
118
+ QCRule(
119
+ name='Status Error',
120
+ condition=lambda df: self.QC_control().filter_error_status(
121
+ _df, status_column=self.STATUS_COLUMN, status_type='numeric', ok_value=self.STATUS_OK
122
+ ),
123
+ description=f'Status code is not {self.STATUS_OK} (non-zero indicates error)'
124
+ ),
125
+ QCRule(
126
+ name='No Data',
127
+ condition=lambda df: Series(all_missing_mask, index=df.index),
128
+ description='All scattering columns are NaN'
129
+ ),
130
+ QCRule(
131
+ name='Invalid Scat Value',
132
+ condition=lambda df: ((df[self.SCAT_COLUMNS] <= self.MIN_SCAT_VALUE) |
133
+ (df[self.SCAT_COLUMNS] > self.MAX_SCAT_VALUE)).any(axis=1),
134
+ description=f'Scattering coefficient outside {self.MIN_SCAT_VALUE}-{self.MAX_SCAT_VALUE} Mm^-1'
135
+ ),
136
+ QCRule(
137
+ name='Invalid Scat Rel',
138
+ condition=lambda df: (df['B'] < df['G']) & (df['G'] < df['R']),
139
+ description='Wavelength dependence violation (Blue < Green < Red)'
140
+ ),
141
+ QCRule(
142
+ name='Insufficient',
143
+ condition=lambda df: self.QC_control().hourly_completeness_QC(
144
+ df[self.SCAT_COLUMNS], freq=self.meta['freq']
145
+ ),
146
+ description='Less than 50% hourly data completeness'
147
+ ),
148
+ ])
149
+
150
+ # Apply all QC rules and get flagged DataFrame
151
+ df_qc = qc.apply(df_qc)
152
+
153
+ # Store QC summary for combined output in _process()
154
+ self._qc_summary = qc.get_summary(df_qc)
155
+
156
+ return df_qc.reindex(_index)
157
+
158
+ def _process(self, _df):
159
+ """
160
+ Calculate scattering coefficients and SAE.
161
+
162
+ Processing Steps
163
+ ----------------
164
+ 1. Calculate scattering coefficient at 550nm
165
+ 2. Calculate SAE (Scattering Ångström Exponent)
166
+
167
+ Parameters
168
+ ----------
169
+ _df : pd.DataFrame
170
+ Quality-controlled DataFrame with scattering columns and QC_Flag
171
+
172
+ Returns
173
+ -------
174
+ pd.DataFrame
175
+ DataFrame with sca_550, SAE, and updated QC_Flag
176
+ """
177
+ _index = _df.index.copy()
178
+
179
+ # Calculate SAE and scattering at 550nm
180
+ _df_cal = _scaCoe(_df[self.SCAT_COLUMNS], instru=self.nam, specified_band=[550])
181
+
182
+ # Combine with QC_Flag
183
+ df_out = concat([_df_cal, _df[['QC_Flag']]], axis=1)
184
+
185
+ # Log QC summary
186
+ if hasattr(self, '_qc_summary') and self._qc_summary is not None:
187
+ self.logger.info(f"{self.nam} QC Summary:")
188
+ for _, row in self._qc_summary.iterrows():
189
+ self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
190
+
191
+ return df_out.reindex(_index)
@@ -0,0 +1,90 @@
1
+ from pandas import read_csv, to_numeric, NA
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
4
+
5
+
6
+ class Reader(AbstractReader):
7
+ """BAM1020 (Beta Attenuation Monitor) Data Reader
8
+
9
+ A specialized reader for BAM1020 data files, which measure PM2.5 mass concentration
10
+ using beta attenuation technology.
11
+
12
+ See full documentation at docs/source/instruments/BAM1020.md for detailed information
13
+ on supported formats and QC procedures.
14
+ """
15
+ nam = 'BAM1020'
16
+
17
+ # =========================================================================
18
+ # QC Thresholds
19
+ # =========================================================================
20
+ MIN_CONC = 0 # Minimum PM2.5 concentration (ug/m3)
21
+ MAX_CONC = 500 # Maximum PM2.5 concentration (ug/m3)
22
+
23
+ def _raw_reader(self, file):
24
+ """
25
+ Read and parse raw BAM1020 data files.
26
+
27
+ Parameters
28
+ ----------
29
+ file : Path or str
30
+ Path to the BAM1020 data file.
31
+
32
+ Returns
33
+ -------
34
+ pandas.DataFrame
35
+ Processed BAM1020 data with datetime index and PM2.5 concentration column.
36
+ """
37
+ PM = 'Conc'
38
+
39
+ _df = read_csv(file, parse_dates=True, index_col=0, usecols=range(0, 21))
40
+ _df.rename(columns={'Conc (mg/m3)': PM}, inplace=True)
41
+
42
+ # remove data when Conc = 1 or 0
43
+ _df[PM] = _df[PM].replace(1, NA)
44
+
45
+ _df = _df[[PM]].apply(to_numeric, errors='coerce')
46
+
47
+ # tranfer unit from mg/m3 to ug/m3
48
+ _df = _df * 1000
49
+
50
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
51
+
52
+ def _QC(self, _df):
53
+ """
54
+ Perform quality control on BAM1020 data.
55
+
56
+ QC Rules Applied
57
+ ----------------
58
+ 1. Invalid Conc : Concentration outside valid range (0-500 ug/m3)
59
+ 2. Spike : Sudden value change (vectorized spike detection)
60
+ """
61
+ _index = _df.index.copy()
62
+ df_qc = _df.copy()
63
+
64
+ # Build QC rules declaratively
65
+ qc = QCFlagBuilder()
66
+ qc.add_rules([
67
+ QCRule(
68
+ name='Invalid Conc',
69
+ condition=lambda df: (df['Conc'] <= self.MIN_CONC) | (df['Conc'] > self.MAX_CONC),
70
+ description=f'Concentration outside valid range ({self.MIN_CONC}-{self.MAX_CONC} ug/m3)'
71
+ ),
72
+ QCRule(
73
+ name='Spike',
74
+ condition=lambda df: self.QC_control().spike_detection(
75
+ df[['Conc']], max_change_rate=3.0
76
+ ),
77
+ description='Sudden unreasonable value change detected'
78
+ ),
79
+ ])
80
+
81
+ # Apply all QC rules and get flagged DataFrame
82
+ df_qc = qc.apply(df_qc)
83
+
84
+ # Log QC summary
85
+ summary = qc.get_summary(df_qc)
86
+ self.logger.info(f"{self.nam} QC Summary:")
87
+ for _, row in summary.iterrows():
88
+ self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
89
+
90
+ return df_qc[['Conc', 'QC_Flag']].reindex(_index)
@@ -0,0 +1,161 @@
1
+ from pandas import read_csv, to_numeric, concat
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
4
+ from AeroViz.rawDataReader.core.pre_process import _absCoe
5
+
6
+
7
+ class Reader(AbstractReader):
8
+ """BC1054 Black Carbon Monitor Data Reader
9
+
10
+ A specialized reader for BC1054 data files, which measure black carbon
11
+ concentrations using light absorption at 10 wavelengths.
12
+
13
+ See full documentation at docs/source/instruments/BC1054.md for detailed information
14
+ on supported formats and QC procedures.
15
+ """
16
+ nam = 'BC1054'
17
+
18
+ # =========================================================================
19
+ # Column Definitions
20
+ # =========================================================================
21
+ BC_COLUMNS = ['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']
22
+ ABS_COLUMNS = ['abs_370', 'abs_430', 'abs_470', 'abs_525', 'abs_565',
23
+ 'abs_590', 'abs_660', 'abs_700', 'abs_880', 'abs_950']
24
+ CAL_COLUMNS = ['abs_550', 'AAE', 'eBC']
25
+
26
+ # =========================================================================
27
+ # QC Thresholds
28
+ # =========================================================================
29
+ MIN_BC = 0 # Minimum BC concentration (ng/m³)
30
+ MAX_BC = 20000 # Maximum BC concentration (ng/m³)
31
+ MIN_AAE = 0.7 # Minimum valid AAE (absolute value)
32
+ MAX_AAE = 2.0 # Maximum valid AAE (absolute value)
33
+
34
+ # =========================================================================
35
+ # Status Error Codes (bitwise flags)
36
+ # =========================================================================
37
+ ERROR_STATES = [
38
+ 1, # Power Failure
39
+ 2, # Digital Sensor Link Failure
40
+ 4, # Tape Move Failure
41
+ 8, # Maintenance
42
+ 16, # Flow Failure
43
+ 32, # Automatic Tape Advance
44
+ 64, # Detector Failure
45
+ 256, # Sensor Range
46
+ 512, # Nozzle Move Failure
47
+ 1024, # SPI Link Failure
48
+ 2048, # Calibration Audit
49
+ 65536, # Tape Move
50
+ ]
51
+
52
+ def _raw_reader(self, file):
53
+ """Read and parse raw BC1054 data files."""
54
+ with open(file, 'r', encoding='utf-8', errors='ignore') as f:
55
+ _df = read_csv(f, parse_dates=True, index_col=0)
56
+ _df.columns = _df.columns.str.replace(' ', '')
57
+
58
+ _df = _df.rename(columns={
59
+ 'BC1(ng/m3)': 'BC1', 'BC2(ng/m3)': 'BC2', 'BC3(ng/m3)': 'BC3',
60
+ 'BC4(ng/m3)': 'BC4', 'BC5(ng/m3)': 'BC5', 'BC6(ng/m3)': 'BC6',
61
+ 'BC7(ng/m3)': 'BC7', 'BC8(ng/m3)': 'BC8', 'BC9(ng/m3)': 'BC9',
62
+ 'BC10(ng/m3)': 'BC10'
63
+ })
64
+
65
+ _df = _df[self.BC_COLUMNS + ['Status']].apply(to_numeric, errors='coerce')
66
+
67
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
68
+
69
+ def _QC(self, _df):
70
+ """
71
+ Perform quality control on BC1054 raw data.
72
+
73
+ QC Rules Applied (raw data only)
74
+ ---------------------------------
75
+ 1. Duplicate : Consecutive duplicate rows removed
76
+ 2. Status Error : Invalid instrument status codes
77
+ 3. Invalid BC : BC concentration outside 0-20000 ng/m³
78
+ 4. Insufficient : Less than 50% hourly data completeness
79
+
80
+ Note: AAE validation is done in _process() after calculation.
81
+ """
82
+ _index = _df.index.copy()
83
+
84
+ # Remove consecutive duplicate rows
85
+ duplicate_rows = _df.eq(_df.shift()).all(axis=1) | _df.eq(_df.shift(-1)).all(axis=1)
86
+ df_qc = _df[~duplicate_rows].copy()
87
+
88
+ # Build QC rules declaratively
89
+ qc = QCFlagBuilder()
90
+ qc.add_rules([
91
+ QCRule(
92
+ name='Status Error',
93
+ condition=lambda df: self.QC_control().filter_error_status(df, self.ERROR_STATES),
94
+ description='Invalid instrument status code detected'
95
+ ),
96
+ QCRule(
97
+ name='Invalid BC',
98
+ condition=lambda df: ((df[self.BC_COLUMNS] <= self.MIN_BC) |
99
+ (df[self.BC_COLUMNS] > self.MAX_BC)).any(axis=1),
100
+ description=f'BC concentration outside valid range {self.MIN_BC}-{self.MAX_BC} ng/m³'
101
+ ),
102
+ QCRule(
103
+ name='Insufficient',
104
+ condition=lambda df: self.QC_control().hourly_completeness_QC(
105
+ df[self.BC_COLUMNS], freq=self.meta['freq']
106
+ ),
107
+ description='Less than 50% hourly data completeness'
108
+ ),
109
+ ])
110
+
111
+ # Apply all QC rules and get flagged DataFrame
112
+ df_qc = qc.apply(df_qc)
113
+
114
+ # Store QC summary for combined output in _process()
115
+ self._qc_summary = qc.get_summary(df_qc)
116
+
117
+ return df_qc.reindex(_index)
118
+
119
+ def _process(self, _df):
120
+ """
121
+ Calculate absorption coefficients and validate derived parameters.
122
+
123
+ Processing Steps
124
+ ----------------
125
+ 1. Calculate absorption coefficients at each wavelength
126
+ 2. Calculate AAE (Absorption Ångström Exponent)
127
+ 3. Calculate eBC (equivalent Black Carbon)
128
+ 4. Validate AAE range and update QC_Flag
129
+ """
130
+ _index = _df.index.copy()
131
+
132
+ # Calculate absorption coefficients, AAE, and eBC
133
+ _df_cal = _absCoe(_df[self.BC_COLUMNS], instru=self.nam, specified_band=[550])
134
+
135
+ # Combine with Status and QC_Flag
136
+ df_out = concat([_df_cal, _df[['Status', 'QC_Flag']]], axis=1)
137
+
138
+ # Validate AAE and update QC_Flag
139
+ invalid_aae = (-df_out['AAE'] < self.MIN_AAE) | (-df_out['AAE'] > self.MAX_AAE)
140
+ df_out = self.update_qc_flag(df_out, invalid_aae, 'Invalid AAE')
141
+
142
+ # Log combined QC summary with calculated info
143
+ if hasattr(self, '_qc_summary') and self._qc_summary is not None:
144
+ import pandas as pd
145
+ # Add Invalid AAE row before Valid row
146
+ total = len(df_out)
147
+ invalid_aae_row = pd.DataFrame([{
148
+ 'Rule': 'Invalid AAE',
149
+ 'Count': invalid_aae.sum(),
150
+ 'Percentage': f'{invalid_aae.sum() / total * 100:.1f}%',
151
+ 'Description': f'AAE outside valid range {self.MIN_AAE}-{self.MAX_AAE}'
152
+ }])
153
+ # Insert before Valid row (last row)
154
+ summary = pd.concat([self._qc_summary.iloc[:-1], invalid_aae_row, self._qc_summary.iloc[-1:]], ignore_index=True)
155
+ self.logger.info(f"{self.nam} QC Summary:")
156
+ for _, row in summary.iterrows():
157
+ self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
158
+
159
+ # Reorder columns
160
+ all_data_cols = self.BC_COLUMNS + self.ABS_COLUMNS + self.CAL_COLUMNS
161
+ return df_out[all_data_cols + ['QC_Flag']].reindex(_index)
@@ -0,0 +1,79 @@
1
+ import numpy as np
2
+ from pandas import read_csv, to_numeric
3
+
4
+ from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
5
+
6
+ desired_order1 = ['SO2', 'NO', 'NOx', 'NO2', 'CO', 'O3', 'THC', 'NMHC',
7
+ 'CH4', 'PM10', 'PM2.5', 'PM1', 'WS', 'WD', 'AT', 'RH']
8
+
9
+ desired_order2 = ['Benzene', 'Toluene', 'EthylBenzene', 'm/p-Xylene', 'o-Xylene']
10
+
11
+
12
+ class Reader(AbstractReader):
13
+ """EPA Environmental Data Reader
14
+
15
+ A specialized reader for EPA air quality monitoring data files.
16
+
17
+ See full documentation at docs/source/instruments/EPA.md for detailed information
18
+ on supported formats and QC procedures.
19
+ """
20
+ nam = 'EPA'
21
+
22
+ def _raw_reader(self, file):
23
+ # 查詢小時值(測項).csv & 查詢小時值(直式).csv (有、無輸出有效值都可以)
24
+ df = read_csv(file, encoding='big5', encoding_errors='ignore', index_col=0, parse_dates=True,
25
+ on_bad_lines='skip')
26
+
27
+ if len(df.groupby('測站')) > 1:
28
+ raise ValueError(f"Multiple stations found in the file: {df['測站'].unique()}")
29
+ else:
30
+ if '測站' in df.columns:
31
+ df.drop(columns=['測站'], inplace=True)
32
+
33
+ if '測項' in df.columns:
34
+ df = df.pivot(columns='測項', values='資料')
35
+
36
+ df.rename(columns={'AMB_TEMP': 'AT', 'WIND_SPEED': 'WS', 'WIND_DIREC': 'WD'}, inplace=True)
37
+ df.index.name = 'Time'
38
+
39
+ # 如果沒有將無效值拿掉就輸出 請將包含 #、L 的字串替換成 # 或 _
40
+ df = df.replace(to_replace=r'\d*\.?\d*[#]\b', value='#', regex=True)
41
+ df = df.replace(to_replace=r'\d*\.?\d*[L]\b', value='_', regex=True)
42
+
43
+ # 欄位排序
44
+ return self.reorder_dataframe_columns(df, [desired_order1]).apply(to_numeric, errors='coerce')
45
+
46
+ def _QC(self, _df):
47
+ """
48
+ Perform quality control on EPA data.
49
+
50
+ QC Rules Applied
51
+ ----------------
52
+ 1. Negative : Any measurement value < 0
53
+ """
54
+ _index = _df.index.copy()
55
+ df_qc = _df.copy()
56
+
57
+ # Get numeric columns for negative value check
58
+ numeric_cols = df_qc.select_dtypes(include=[np.number]).columns.tolist()
59
+
60
+ # Build QC rules declaratively
61
+ qc = QCFlagBuilder()
62
+ qc.add_rules([
63
+ QCRule(
64
+ name='Negative',
65
+ condition=lambda df: (df[numeric_cols] < 0).any(axis=1) if numeric_cols else False,
66
+ description='Measurement value is negative (< 0)'
67
+ ),
68
+ ])
69
+
70
+ # Apply all QC rules and get flagged DataFrame
71
+ df_qc = qc.apply(df_qc)
72
+
73
+ # Log QC summary
74
+ summary = qc.get_summary(df_qc)
75
+ self.logger.info(f"{self.nam} QC Summary:")
76
+ for _, row in summary.iterrows():
77
+ self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
78
+
79
+ return df_qc.reindex(_index)
@@ -0,0 +1,68 @@
1
+ from pandas import to_datetime, read_csv
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader
4
+
5
+
6
+ class Reader(AbstractReader):
7
+ """ GRIMM Aerosol Spectrometer Data Reader
8
+
9
+ A specialized reader for GRIMM data files, which measure particle size distributions
10
+ in the range of 0.25-32 μm.
11
+
12
+ See full documentation at docs/source/instruments/GRIMM.md for detailed information
13
+ on supported formats and QC procedures.
14
+ """
15
+ nam = 'GRIMM'
16
+
17
+ def _raw_reader(self, file):
18
+ """
19
+ Read and parse raw GRIMM data files.
20
+
21
+ Parameters
22
+ ----------
23
+ file : Path or str
24
+ Path to the GRIMM data file.
25
+
26
+ Returns
27
+ -------
28
+ pandas.DataFrame or None
29
+ Processed GRIMM data with datetime index and size channels as columns.
30
+ Returns None if the file is empty.
31
+ """
32
+ _df = read_csv(file, header=233, delimiter='\t', index_col=0, parse_dates=[0], encoding='ISO-8859-1',
33
+ dayfirst=True).rename_axis("Time")
34
+ _df.index = to_datetime(_df.index, format="%d/%m/%Y %H:%M:%S", dayfirst=True)
35
+
36
+ if file.name.startswith("A407ST"):
37
+ _df.drop(_df.columns[0:11].tolist() + _df.columns[128:].tolist(), axis=1, inplace=True)
38
+ else:
39
+ _df.drop(_df.columns[0:11].tolist() + _df.columns[-5:].tolist(), axis=1, inplace=True)
40
+
41
+ if _df.empty:
42
+ print(file, "is empty")
43
+ return None
44
+
45
+ return _df / 0.035
46
+
47
+ def _QC(self, _df):
48
+ """
49
+ Perform quality control on GRIMM data.
50
+
51
+ Parameters
52
+ ----------
53
+ _df : pandas.DataFrame
54
+ Raw GRIMM data with datetime index and size channels as columns.
55
+
56
+ Returns
57
+ -------
58
+ pandas.DataFrame
59
+ The input data unchanged.
60
+
61
+ Notes
62
+ -----
63
+ No QC filters are currently applied. Future implementations could include:
64
+ 1. Value range checks for each size channel
65
+ 2. Total concentration consistency checks
66
+ 3. Time-based outlier detection
67
+ """
68
+ return _df