AeroViz 0.1.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. AeroViz/__init__.py +13 -0
  2. AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
  3. AeroViz/data/DEFAULT_DATA.csv +1417 -0
  4. AeroViz/data/DEFAULT_PNSD_DATA.csv +1417 -0
  5. AeroViz/data/hysplit_example_data.txt +101 -0
  6. AeroViz/dataProcess/Chemistry/__init__.py +149 -0
  7. AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
  8. AeroViz/dataProcess/Chemistry/_calculate.py +557 -0
  9. AeroViz/dataProcess/Chemistry/_isoropia.py +150 -0
  10. AeroViz/dataProcess/Chemistry/_mass_volume.py +487 -0
  11. AeroViz/dataProcess/Chemistry/_ocec.py +172 -0
  12. AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
  13. AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
  14. AeroViz/dataProcess/Optical/PyMieScatt_update.py +577 -0
  15. AeroViz/dataProcess/Optical/_IMPROVE.py +452 -0
  16. AeroViz/dataProcess/Optical/__init__.py +281 -0
  17. AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
  18. AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
  19. AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
  20. AeroViz/dataProcess/Optical/_derived.py +518 -0
  21. AeroViz/dataProcess/Optical/_extinction.py +123 -0
  22. AeroViz/dataProcess/Optical/_mie_sd.py +912 -0
  23. AeroViz/dataProcess/Optical/_retrieve_RI.py +243 -0
  24. AeroViz/dataProcess/Optical/coefficient.py +72 -0
  25. AeroViz/dataProcess/Optical/fRH.pkl +0 -0
  26. AeroViz/dataProcess/Optical/mie_theory.py +260 -0
  27. AeroViz/dataProcess/README.md +271 -0
  28. AeroViz/dataProcess/SizeDistr/__init__.py +245 -0
  29. AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
  30. AeroViz/dataProcess/SizeDistr/__pycache__/_size_dist.cpython-312.pyc +0 -0
  31. AeroViz/dataProcess/SizeDistr/_size_dist.py +810 -0
  32. AeroViz/dataProcess/SizeDistr/merge/README.md +93 -0
  33. AeroViz/dataProcess/SizeDistr/merge/__init__.py +20 -0
  34. AeroViz/dataProcess/SizeDistr/merge/_merge_v0.py +251 -0
  35. AeroViz/dataProcess/SizeDistr/merge/_merge_v0_1.py +246 -0
  36. AeroViz/dataProcess/SizeDistr/merge/_merge_v1.py +255 -0
  37. AeroViz/dataProcess/SizeDistr/merge/_merge_v2.py +244 -0
  38. AeroViz/dataProcess/SizeDistr/merge/_merge_v3.py +518 -0
  39. AeroViz/dataProcess/SizeDistr/merge/_merge_v4.py +422 -0
  40. AeroViz/dataProcess/SizeDistr/prop.py +62 -0
  41. AeroViz/dataProcess/VOC/__init__.py +14 -0
  42. AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
  43. AeroViz/dataProcess/VOC/_potential_par.py +108 -0
  44. AeroViz/dataProcess/VOC/support_voc.json +446 -0
  45. AeroViz/dataProcess/__init__.py +66 -0
  46. AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
  47. AeroViz/dataProcess/core/__init__.py +272 -0
  48. AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
  49. AeroViz/mcp_server.py +352 -0
  50. AeroViz/plot/__init__.py +13 -0
  51. AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
  52. AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
  53. AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
  54. AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
  55. AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
  56. AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
  57. AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
  58. AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
  59. AeroViz/plot/bar.py +126 -0
  60. AeroViz/plot/box.py +69 -0
  61. AeroViz/plot/distribution/__init__.py +1 -0
  62. AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
  63. AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
  64. AeroViz/plot/distribution/distribution.py +576 -0
  65. AeroViz/plot/meteorology/CBPF.py +295 -0
  66. AeroViz/plot/meteorology/__init__.py +3 -0
  67. AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
  68. AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
  69. AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
  70. AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
  71. AeroViz/plot/meteorology/hysplit.py +93 -0
  72. AeroViz/plot/meteorology/wind_rose.py +77 -0
  73. AeroViz/plot/optical/__init__.py +1 -0
  74. AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
  75. AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
  76. AeroViz/plot/optical/optical.py +388 -0
  77. AeroViz/plot/pie.py +210 -0
  78. AeroViz/plot/radar.py +184 -0
  79. AeroViz/plot/regression.py +200 -0
  80. AeroViz/plot/scatter.py +174 -0
  81. AeroViz/plot/templates/__init__.py +6 -0
  82. AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
  83. AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
  84. AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
  85. AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
  86. AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
  87. AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
  88. AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
  89. AeroViz/plot/templates/ammonium_rich.py +34 -0
  90. AeroViz/plot/templates/contour.py +47 -0
  91. AeroViz/plot/templates/corr_matrix.py +267 -0
  92. AeroViz/plot/templates/diurnal_pattern.py +61 -0
  93. AeroViz/plot/templates/koschmieder.py +95 -0
  94. AeroViz/plot/templates/metal_heatmap.py +164 -0
  95. AeroViz/plot/timeseries/__init__.py +2 -0
  96. AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
  97. AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
  98. AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
  99. AeroViz/plot/timeseries/template.py +47 -0
  100. AeroViz/plot/timeseries/timeseries.py +446 -0
  101. AeroViz/plot/utils/__init__.py +4 -0
  102. AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  103. AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
  104. AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
  105. AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
  106. AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
  107. AeroViz/plot/utils/_color.py +71 -0
  108. AeroViz/plot/utils/_unit.py +55 -0
  109. AeroViz/plot/utils/fRH.json +390 -0
  110. AeroViz/plot/utils/plt_utils.py +92 -0
  111. AeroViz/plot/utils/sklearn_utils.py +49 -0
  112. AeroViz/plot/utils/units.json +89 -0
  113. AeroViz/plot/violin.py +80 -0
  114. AeroViz/rawDataReader/FLOW.md +138 -0
  115. AeroViz/rawDataReader/__init__.py +220 -0
  116. AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
  117. AeroViz/rawDataReader/config/__init__.py +0 -0
  118. AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
  119. AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
  120. AeroViz/rawDataReader/config/supported_instruments.py +135 -0
  121. AeroViz/rawDataReader/core/__init__.py +658 -0
  122. AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
  123. AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
  124. AeroViz/rawDataReader/core/__pycache__/pre_process.cpython-312.pyc +0 -0
  125. AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
  126. AeroViz/rawDataReader/core/__pycache__/report.cpython-312.pyc +0 -0
  127. AeroViz/rawDataReader/core/logger.py +171 -0
  128. AeroViz/rawDataReader/core/pre_process.py +308 -0
  129. AeroViz/rawDataReader/core/qc.py +961 -0
  130. AeroViz/rawDataReader/core/report.py +579 -0
  131. AeroViz/rawDataReader/script/AE33.py +173 -0
  132. AeroViz/rawDataReader/script/AE43.py +151 -0
  133. AeroViz/rawDataReader/script/APS.py +339 -0
  134. AeroViz/rawDataReader/script/Aurora.py +191 -0
  135. AeroViz/rawDataReader/script/BAM1020.py +90 -0
  136. AeroViz/rawDataReader/script/BC1054.py +161 -0
  137. AeroViz/rawDataReader/script/EPA.py +79 -0
  138. AeroViz/rawDataReader/script/GRIMM.py +68 -0
  139. AeroViz/rawDataReader/script/IGAC.py +140 -0
  140. AeroViz/rawDataReader/script/MA350.py +179 -0
  141. AeroViz/rawDataReader/script/Minion.py +218 -0
  142. AeroViz/rawDataReader/script/NEPH.py +199 -0
  143. AeroViz/rawDataReader/script/OCEC.py +173 -0
  144. AeroViz/rawDataReader/script/Q-ACSM.py +12 -0
  145. AeroViz/rawDataReader/script/SMPS.py +389 -0
  146. AeroViz/rawDataReader/script/TEOM.py +181 -0
  147. AeroViz/rawDataReader/script/VOC.py +106 -0
  148. AeroViz/rawDataReader/script/Xact.py +244 -0
  149. AeroViz/rawDataReader/script/__init__.py +28 -0
  150. AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
  151. AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
  152. AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
  153. AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
  154. AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
  155. AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
  156. AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
  157. AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
  158. AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
  159. AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
  160. AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
  161. AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
  162. AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
  163. AeroViz/rawDataReader/script/__pycache__/Q-ACSM.cpython-312.pyc +0 -0
  164. AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
  165. AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
  166. AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
  167. AeroViz/rawDataReader/script/__pycache__/Xact.cpython-312.pyc +0 -0
  168. AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
  169. AeroViz/tools/__init__.py +2 -0
  170. AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  171. AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
  172. AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
  173. AeroViz/tools/database.py +95 -0
  174. AeroViz/tools/dataclassifier.py +117 -0
  175. AeroViz/tools/dataprinter.py +58 -0
  176. aeroviz-0.1.21.dist-info/METADATA +294 -0
  177. aeroviz-0.1.21.dist-info/RECORD +180 -0
  178. aeroviz-0.1.21.dist-info/WHEEL +5 -0
  179. aeroviz-0.1.21.dist-info/licenses/LICENSE +21 -0
  180. aeroviz-0.1.21.dist-info/top_level.txt +1 -0
@@ -0,0 +1,244 @@
1
+ from pandas import read_csv, to_datetime, to_numeric
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
4
+
5
+
6
+ class Reader(AbstractReader):
7
+ """Xact 625i XRF Analyzer Data Reader
8
+
9
+ A specialized reader for Xact 625i continuous XRF analyzer data files,
10
+ which measure elemental composition of particulate matter.
11
+ """
12
+ nam = 'Xact'
13
+
14
+ # Element symbols with atomic numbers (extracted from column headers)
15
+ ELEMENTS = [
16
+ 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe',
17
+ 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo',
18
+ 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Cs', 'Ba', 'La', 'Ce',
19
+ 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu',
20
+ 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Th', 'Pa', 'U'
21
+ ]
22
+
23
+ # Environmental/status columns to keep
24
+ ENV_COLUMNS = [
25
+ 'AT', 'SAMPLE_T', 'BP', 'TAPE', 'FLOW_25', 'FLOW_ACT', 'FLOW_STD', 'VOLUME',
26
+ 'TUBE_T', 'ENCLOSURE_T', 'FILAMENT_V', 'SDD_T', 'DPP_T', 'RH',
27
+ 'WIND', 'WIND_DIR', 'SAMPLE_TIME', 'ALARM', 'SAMPLE_TYPE'
28
+ ]
29
+
30
+ # =========================================================================
31
+ # Alarm Code Definitions
32
+ # =========================================================================
33
+ # Error codes (100-110) - indicate instrument malfunction, invalidate data
34
+ ERROR_CODES = {
35
+ 100: 'Xray Voltage Error',
36
+ 101: 'Xray Current Error',
37
+ 102: 'Tube Temperature Error',
38
+ 103: 'Enclosure Temperature Error',
39
+ 104: 'Tape Error',
40
+ 105: 'Pump Error',
41
+ 106: 'Filter Wheel Error',
42
+ 107: 'Dynamic Rod Error',
43
+ 108: 'Nozzle Error',
44
+ 109: 'Energy Calibration Error',
45
+ 110: 'Software Error',
46
+ }
47
+
48
+ # Warning codes (200-203) - indicate upscale warnings
49
+ WARNING_CODES = {
50
+ 200: 'Upscale Cr Warning',
51
+ 201: 'Upscale Pb Warning',
52
+ 202: 'Upscale Cd Warning',
53
+ 203: 'Upscale Nb Warning',
54
+ }
55
+
56
+ # =========================================================================
57
+ # QC Thresholds
58
+ # =========================================================================
59
+ MIN_VALUE = 0
60
+ MAX_VALUE = 100000 # ng/m3
61
+
62
+ # Internal standard (Nb) QC parameters
63
+ INTERNAL_STD_ELEMENT = 'Nb'
64
+ INTERNAL_STD_TOLERANCE = 0.20 # ±20% from median
65
+
66
+ def _raw_reader(self, file):
67
+ """Read and parse raw Xact 625i XRF data files."""
68
+ with open(file, 'r', encoding='utf-8', errors='ignore') as f:
69
+ f.readline() # skip row 0 (element names)
70
+ headers = f.readline().strip().split(',')
71
+ headers.append('_extra_') # data has one extra field at end
72
+ _df = read_csv(f, names=headers, on_bad_lines='skip')
73
+
74
+ # Parse time column
75
+ _df['time'] = to_datetime(_df['TIME'], format='%m/%d/%Y %H:%M:%S', errors='coerce')
76
+ _df = _df.set_index('time')
77
+ _df = _df.loc[~_df.index.duplicated() & _df.index.notna()]
78
+
79
+ # Filter out calibration samples BEFORE rounding to avoid losing valid 00:30 samples
80
+ # Xact does daily QA checks at midnight (00:00-00:30), SAMPLE_TYPE: 1=normal, 2=calibration
81
+ if 'Sample Type' in _df.columns:
82
+ _df = _df[_df['Sample Type'] == 1]
83
+
84
+ _df.index = _df.index.round('1h')
85
+
86
+ # Rename environmental/status columns
87
+ rename_map = {
88
+ 'AT (C)': 'AT',
89
+ 'SAMPLE (C)': 'SAMPLE_T',
90
+ 'BP (mmHg)': 'BP',
91
+ 'TAPE (mmHg)': 'TAPE',
92
+ 'FLOW 25 (slpm)': 'FLOW_25',
93
+ 'FLOW ACT (lpm)': 'FLOW_ACT',
94
+ 'FLOW STD (slpm)': 'FLOW_STD',
95
+ 'VOLUME (L)': 'VOLUME',
96
+ 'TUBE (C)': 'TUBE_T',
97
+ 'ENCLOSURE (C)': 'ENCLOSURE_T',
98
+ 'FILAMENT (V)': 'FILAMENT_V',
99
+ 'SDD (C)': 'SDD_T',
100
+ 'DPP (C)': 'DPP_T',
101
+ 'RH (%)': 'RH',
102
+ 'WIND (m/s)': 'WIND',
103
+ 'WIND DIR (deg)': 'WIND_DIR',
104
+ 'SAMPLE TIME (min)': 'SAMPLE_TIME',
105
+ 'ALARM': 'ALARM',
106
+ 'Sample Type': 'SAMPLE_TYPE'
107
+ }
108
+
109
+ # Build element column rename map
110
+ for col in _df.columns:
111
+ for elem in self.ELEMENTS:
112
+ # Match pattern like "Mg 12 (ng/m3)" or " K 19 (ng/m3)" for concentration
113
+ if f'{elem} ' in col and '(ng/m3)' in col and 'uncert' not in col.lower():
114
+ rename_map[col] = elem
115
+ # Match pattern like "Al Uncert (ng/m3)" or "Mg uncert (ng/m3)" for uncertainty
116
+ elif f'{elem} ' in col and 'uncert' in col.lower():
117
+ rename_map[col] = f'{elem}_uncert'
118
+
119
+ _df = _df.rename(columns=rename_map)
120
+
121
+ # Select columns to keep (elements + uncertainties + environmental)
122
+ keep_cols = []
123
+ for elem in self.ELEMENTS:
124
+ if elem in _df.columns:
125
+ keep_cols.append(elem)
126
+ if f'{elem}_uncert' in _df.columns:
127
+ keep_cols.append(f'{elem}_uncert')
128
+ for env_col in self.ENV_COLUMNS:
129
+ if env_col in _df.columns:
130
+ keep_cols.append(env_col)
131
+
132
+ _df = _df[[col for col in keep_cols if col in _df.columns]]
133
+ _df = _df.apply(to_numeric, errors='coerce')
134
+
135
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
136
+
137
+ def _QC(self, _df):
138
+ """Perform quality control on Xact XRF data.
139
+
140
+ QC Rules Applied
141
+ ----------------
142
+ 1. Calibration Mode : SAMPLE_TYPE != 1 indicates zero calibration
143
+ 2. Instrument Error : ALARM code 100-110 indicates instrument error
144
+ 3. Upscale Warning : ALARM code 200-203 indicates upscale warning
145
+ 4. Invalid Value : Element concentration outside valid range (0-100000 ng/m3)
146
+ 5. Internal Std Drift : Nb internal standard deviates ±20% from median
147
+ """
148
+ _index = _df.index.copy()
149
+ df_qc = _df.copy()
150
+
151
+ # Get element columns (exclude uncertainty and environmental columns)
152
+ element_cols = [col for col in df_qc.columns if col in self.ELEMENTS]
153
+ uncert_cols = [f'{elem}_uncert' for elem in element_cols if f'{elem}_uncert' in df_qc.columns]
154
+
155
+ # Build QC rules declaratively
156
+ qc = QCFlagBuilder()
157
+
158
+ # Add Calibration Mode rule (SAMPLE_TYPE: 1=normal sampling, 2=zero calibration)
159
+ # Note: Most calibration samples are already filtered in _raw_reader, this catches any remaining
160
+ if 'SAMPLE_TYPE' in df_qc.columns:
161
+ qc.add_rules([
162
+ QCRule(
163
+ name='Calibration Mode',
164
+ condition=lambda df: (df['SAMPLE_TYPE'] != 1) & df['SAMPLE_TYPE'].notna(),
165
+ description='Instrument in calibration mode (SAMPLE_TYPE != 1)'
166
+ ),
167
+ ])
168
+
169
+ # Add Instrument Error rule (ALARM codes 100-110)
170
+ if 'ALARM' in df_qc.columns:
171
+ qc.add_rules([
172
+ QCRule(
173
+ name='Instrument Error',
174
+ condition=lambda df: df['ALARM'].isin(list(self.ERROR_CODES.keys())),
175
+ description='Instrument error detected (ALARM code 100-110)'
176
+ ),
177
+ QCRule(
178
+ name='Upscale Warning',
179
+ condition=lambda df: df['ALARM'].isin(list(self.WARNING_CODES.keys())),
180
+ description='Upscale warning detected (ALARM code 200-203)'
181
+ ),
182
+ ])
183
+
184
+ # Add Invalid Value rule
185
+ if element_cols:
186
+ qc.add_rules([
187
+ QCRule(
188
+ name='Invalid Value',
189
+ condition=lambda df, cols=element_cols: (
190
+ (df[cols] < self.MIN_VALUE) | (df[cols] > self.MAX_VALUE)
191
+ ).any(axis=1),
192
+ description=f'Concentration outside valid range ({self.MIN_VALUE}-{self.MAX_VALUE} ng/m3)'
193
+ ),
194
+ ])
195
+
196
+ # Add Internal Standard Drift rule (Nb)
197
+ if self.INTERNAL_STD_ELEMENT in df_qc.columns:
198
+ nb_median = df_qc[self.INTERNAL_STD_ELEMENT].median()
199
+ lower_bound = nb_median * (1 - self.INTERNAL_STD_TOLERANCE)
200
+ upper_bound = nb_median * (1 + self.INTERNAL_STD_TOLERANCE)
201
+ qc.add_rules([
202
+ QCRule(
203
+ name='Internal Std Drift',
204
+ condition=lambda df, lb=lower_bound, ub=upper_bound: (
205
+ (df[self.INTERNAL_STD_ELEMENT] < lb) | (df[self.INTERNAL_STD_ELEMENT] > ub)
206
+ ),
207
+ description=f'{self.INTERNAL_STD_ELEMENT} internal standard outside ±{int(self.INTERNAL_STD_TOLERANCE * 100)}% of median ({nb_median:.2f} ng/m³)'
208
+ ),
209
+ ])
210
+
211
+ # Apply all QC rules and get flagged DataFrame
212
+ df_qc = qc.apply(df_qc)
213
+
214
+ # Log QC summary
215
+ summary = qc.get_summary(df_qc)
216
+ self.logger.info(f"{self.nam} QC Summary:")
217
+ for _, row in summary.iterrows():
218
+ self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
219
+
220
+ # Get output columns: elements + uncertainties + environmental + QC_Flag
221
+ output_cols = element_cols + uncert_cols + [c for c in self.ENV_COLUMNS if c in df_qc.columns] + ['QC_Flag']
222
+ return df_qc[[c for c in output_cols if c in df_qc.columns]].reindex(_index)
223
+
224
+ def decode_alarm(self, alarm_code):
225
+ """Decode ALARM code to human-readable message.
226
+
227
+ Parameters
228
+ ----------
229
+ alarm_code : int
230
+ The ALARM code from the Xact data
231
+
232
+ Returns
233
+ -------
234
+ str
235
+ Human-readable description of the alarm
236
+ """
237
+ if alarm_code == 0:
238
+ return 'Normal'
239
+ elif alarm_code in self.ERROR_CODES:
240
+ return self.ERROR_CODES[alarm_code]
241
+ elif alarm_code in self.WARNING_CODES:
242
+ return self.WARNING_CODES[alarm_code]
243
+ else:
244
+ return f'Unknown Alarm ({alarm_code})'
@@ -0,0 +1,28 @@
1
+ # Auto-import all instrument reader modules dynamically
2
+ # New instruments only need to:
3
+ # 1. Add a new .py file in this directory
4
+ # 2. Add entry to supported_instruments.py meta dict
5
+
6
+ import importlib
7
+ from pathlib import Path
8
+
9
+ # Get all .py files in this directory (excluding __init__.py)
10
+ _script_dir = Path(__file__).parent
11
+ _module_files = [
12
+ f.stem for f in _script_dir.glob('*.py')
13
+ if f.stem != '__init__' and not f.stem.startswith('_')
14
+ ]
15
+
16
+ # Dynamically import each module
17
+ __all__ = []
18
+ for _module_name in _module_files:
19
+ try:
20
+ _module = importlib.import_module(f'.{_module_name}', package=__name__)
21
+ globals()[_module_name] = _module
22
+ __all__.append(_module_name)
23
+ except ImportError as e:
24
+ # Skip modules that fail to import (e.g., missing dependencies)
25
+ pass
26
+
27
+ # Clean up temporary variables
28
+ del _script_dir, _module_files, _module_name, _module, importlib, Path
@@ -0,0 +1,2 @@
1
+ from .database import DataBase
2
+ from .dataclassifier import DataClassifier
@@ -0,0 +1,95 @@
1
+ from io import StringIO
2
+ from pathlib import Path
3
+ from typing import Literal
4
+
5
+ from pandas import read_csv, DataFrame
6
+
7
+
8
+ def load_default_chemical_data():
9
+ # The following data is from the chemical composition of real atmospheric particles.
10
+ #
11
+ # The six main chemical components that comprised PM2.5 are listed in the data.
12
+ # Here, we test the radar charts to see if we can clearly identify how the
13
+ # chemical components vary between the three pollutant scenarios:
14
+ #
15
+ # 1) Whole sampling period (Total)
16
+ # 2) Clean period (Clean)
17
+ # 3) Transition period (Transition)
18
+ # 4) Event period (Event)
19
+
20
+ data = {
21
+ 'Sulfate': [0.01, 0.34, 0.02, 0.71],
22
+ 'Nitrate': [0.88, 0.13, 0.34, 0.13],
23
+ 'OC': [0.07, 0.95, 0.04, 0.05],
24
+ 'EC': [0.20, 0.02, 0.85, 0.19],
25
+ 'Soil': [0.20, 0.10, 0.07, 0.01],
26
+ 'SS': [0.20, 0.10, 0.07, 0.01]
27
+ }
28
+
29
+ return DataFrame(data, index=['Total', 'Clean', 'Transition', 'Event'])
30
+
31
+
32
+ def load_dataset_by_url(dataset_name: Literal["Tunghai", "Taipei"] = "Tunghai") -> DataFrame:
33
+ import requests
34
+ dataset_uris = {
35
+ "Tunghai": "https://raw.githubusercontent.com/alex870521/DataPlot/main/DataPlot/config/default_data.csv"
36
+ }
37
+
38
+ # Ensure the dataset name is valid
39
+ if dataset_name not in dataset_uris:
40
+ raise ValueError(f"Dataset {dataset_name} is not supported.")
41
+
42
+ url = dataset_uris[dataset_name]
43
+
44
+ # Make a request to the URL
45
+ response = requests.get(url)
46
+
47
+ if response.status_code == 200:
48
+ return read_csv(StringIO(response.text), na_values=('E', 'F', '-', '_', '#', '*'), index_col=0,
49
+ parse_dates=True, low_memory=False)
50
+ else:
51
+ print(f"Failed to download file: {response.status_code}")
52
+ print(response.text) # Print the response text for debugging
53
+ return DataFrame() # Return an empty DataFrame in case of failure
54
+
55
+
56
+ def load_dataset_local(dataset_name: Literal["Tunghai", "Taipei", "PNSD"] = "Tunghai") -> DataFrame:
57
+ base_dir = Path(__file__).resolve().parent.parent
58
+ config_dir = base_dir / 'data'
59
+
60
+ dataset_paths = {
61
+ "Tunghai": config_dir / 'DEFAULT_DATA.csv',
62
+ "Taipei": config_dir / 'DEFAULT_DATA.csv',
63
+ "PNSD": config_dir / 'DEFAULT_PNSD_DATA.csv'
64
+ }
65
+
66
+ if dataset_name not in dataset_paths:
67
+ raise ValueError(f"Dataset {dataset_name} is not supported.")
68
+
69
+ file_path = dataset_paths[dataset_name]
70
+
71
+ if not file_path.exists():
72
+ raise FileNotFoundError(f"The file {file_path} does not exist.")
73
+
74
+ return read_csv(file_path, na_values=('E', 'F', '-', '_', '#', '*'), index_col=0, parse_dates=True,
75
+ low_memory=False)
76
+
77
+
78
+ class DataBase:
79
+ def __new__(cls, file_path: Path | str = None, load_data: bool = False, load_PSD: bool = False):
80
+ print(f'Loading:\033[96m Default Data\033[0m')
81
+ if file_path is not None:
82
+ file_path = Path(file_path)
83
+ if file_path.exists():
84
+ return read_csv(file_path, na_values=('E', 'F', '-', '_', '#', '*'), index_col=0, parse_dates=True,
85
+ low_memory=False)
86
+
87
+ if load_data ^ load_PSD:
88
+ return load_dataset_local("Tunghai") if load_data else load_dataset_local("PNSD")
89
+
90
+ else:
91
+ raise ValueError("Exactly one of 'load_data' or 'load_PSD' must be True.")
92
+
93
+
94
+ if __name__ == '__main__':
95
+ df = DataBase("Tunghai")
@@ -0,0 +1,117 @@
1
+ from datetime import datetime
2
+ from typing import Literal, Sequence
3
+
4
+ import pandas as pd
5
+ from pandas import concat, DataFrame, Series
6
+
7
+
8
+ class Classifier:
9
+ Seasons = {'2020-Summer': (datetime(2020, 9, 4), datetime(2020, 9, 21, 23)),
10
+ '2020-Autumn': (datetime(2020, 9, 22), datetime(2020, 12, 29, 23)),
11
+ '2020-Winter': (datetime(2020, 12, 30), datetime(2021, 3, 25, 23)),
12
+ '2021-Spring': (datetime(2021, 3, 26), datetime(2021, 5, 6, 23))}
13
+
14
+ # '2021-Summer': (datetime(2021, 5, 7), datetime(2021, 10, 16, 23))
15
+ # '2021-Autumn': (datetime(2021, 10, 17), datetime(2021, 12, 31, 23))
16
+
17
+ @classmethod
18
+ def classify(cls, df) -> DataFrame:
19
+ df = cls.classify_by_diurnal(df)
20
+ df = cls.classify_by_state(df)
21
+ df = cls.classify_by_season(df)
22
+ df = cls.classify_by_season_state(df)
23
+
24
+ return df
25
+
26
+ @classmethod
27
+ def classify_by_diurnal(cls, df):
28
+ df['Hour'] = df.index.hour
29
+ df['Diurnal'] = df['Hour'].apply(cls.map_diurnal)
30
+ return df
31
+
32
+ @classmethod
33
+ def classify_by_state(cls, df):
34
+ df['State'] = df.apply(cls.map_state, axis=1, clean_bound=df.Extinction.quantile(0.2),
35
+ event_bound=df.Extinction.quantile(0.8))
36
+ return df
37
+
38
+ @classmethod
39
+ def classify_by_season(cls, df):
40
+ for season, (season_start, season_end) in cls.Seasons.items():
41
+ df.loc[season_start:season_end, 'Season'] = season
42
+ return df
43
+
44
+ @classmethod
45
+ def classify_by_season_state(cls, df):
46
+ for _grp, _df in df.groupby('Season'):
47
+ df['Season_State'] = df.apply(cls.map_state, axis=1, clean_bound=_df.Extinction.quantile(0.2),
48
+ event_bound=_df.Extinction.quantile(0.8))
49
+ return df
50
+
51
+ @staticmethod
52
+ def map_diurnal(hour):
53
+ return 'Day' if 7 <= hour <= 18 else 'Night'
54
+
55
+ @staticmethod
56
+ def map_state(row, clean_bound, event_bound):
57
+ return 'Event' if row['Extinction'] >= event_bound else 'Clean' if row[
58
+ 'Extinction'] < clean_bound else 'Transition'
59
+
60
+
61
+ class DataClassifier(Classifier):
62
+ """
63
+ Notes
64
+ -----
65
+ First, create group then return the selected statistic method.
66
+ If the 'by' does not exist in DataFrame, import the default DataFrame to help to sign the different group.
67
+
68
+ """
69
+
70
+ def __new__(cls,
71
+ df: DataFrame,
72
+ by: Literal["Hour", "State", "Season", "Season_state"] | str,
73
+ df_support: DataFrame | Series = None,
74
+ cut_bins: Sequence = None,
75
+ qcut: int = None,
76
+ labels: list[str] = None
77
+ ) -> tuple[DataFrame, DataFrame]:
78
+ group = cls._group_data(df, by, df_support, cut_bins, qcut, labels)
79
+ return cls._compute_statistics(df, group)
80
+
81
+ @staticmethod
82
+ def _group_data(df, by, df_support, cut_bins, qcut, labels):
83
+ if by not in df.columns:
84
+ if df_support is None:
85
+ raise KeyError(f"Column '{by}' does not exist in DataFrame."
86
+ f"Please provide a support DataFrame or Series to help classify.")
87
+ else:
88
+ df = concat([df, Classifier.classify(df_support.copy())[by]], axis=1)
89
+
90
+ if cut_bins is not None:
91
+ df[f'{by}_cut'] = pd.cut(df.loc[:, f'{by}'], cut_bins,
92
+ labels=labels or (cut_bins + (cut_bins[1] - cut_bins[0]) / 2)[:-1])
93
+ return df.groupby(f'{by}_cut', observed=False)
94
+
95
+ elif qcut is not None:
96
+ df[f'{by}_qcut'] = pd.qcut(df.loc[:, f'{by}'], q=qcut, labels=labels)
97
+ return df.groupby(f'{by}_qcut', observed=False)
98
+
99
+ else:
100
+ if by == 'State':
101
+ return df.groupby(by)
102
+
103
+ elif by == 'Season':
104
+ return df.groupby(pd.Categorical(df['Season'], categories=['2020-Summer', '2020-Autumn', '2020-Winter',
105
+ '2021-Spring']), observed=False)
106
+ else:
107
+ return df.groupby(by, observed=False)
108
+
109
+ @staticmethod
110
+ def _compute_statistics(df, group):
111
+ mean_df = group.mean(numeric_only=True)
112
+ mean_df.loc['Total'] = df.mean(numeric_only=True)
113
+
114
+ std_df = group.std(numeric_only=True)
115
+ std_df.loc['Total'] = df.std(numeric_only=True)
116
+
117
+ return mean_df, std_df
@@ -0,0 +1,58 @@
1
+ from datetime import datetime
2
+
3
+ from pandas import DataFrame, Timestamp
4
+ from tabulate import tabulate
5
+
6
+
7
+ def data_table(df: DataFrame,
8
+ items: list[str] | str = None,
9
+ times: list[datetime | Timestamp | str] = None,
10
+ ):
11
+ """
12
+ This function cuts the DataFrame based on the given time periods and calculates the mean and standard deviation
13
+ of the specified items for each period.
14
+
15
+ Parameters
16
+ ----------
17
+ df : pd.DataFrame
18
+ The DataFrame to be processed. It should have a DateTime index.
19
+ items : list[str] | str, optional
20
+ The columns of the DataFrame to be processed. It can be a list of column names or a single column name.
21
+ By default, it is ['NO', 'NO2', 'NOx'].
22
+ times : list[str] | str, optional
23
+ The time periods to cut the DataFrame. It can be a list of time strings or a single time string.
24
+ Each time string should be in the format of 'YYYY-MM-DD'. By default, it is ['2024-03-21', '2024-04-30'].
25
+
26
+ Returns
27
+ -------
28
+ None
29
+ This function doesn't return any value. It prints out a table showing the mean and standard deviation
30
+ of the specified items for each time period.
31
+ """
32
+ items = [items] if isinstance(items, str) else items
33
+ times = [times] if isinstance(times, str) else times
34
+ times = list(map(Timestamp, times))
35
+
36
+ times.sort()
37
+
38
+ results = []
39
+ periods = []
40
+ for i in range(len(times) + 1):
41
+ if i == 0:
42
+ df_period = df.loc[df.index <= times[i], items]
43
+ period_label = f'Before {times[i].date()}'
44
+ elif i == len(times):
45
+ df_period = df.loc[df.index > times[i - 1], items]
46
+ period_label = f'After {times[i - 1].date()}'
47
+ else:
48
+ df_period = df.loc[(df.index > times[i - 1]) & (df.index <= times[i]), items]
49
+ period_label = f'{times[i - 1].date()} to {times[i].date()}'
50
+
51
+ mean, std = df_period.mean().round(2).to_numpy(), df_period.std().round(2).to_numpy()
52
+
53
+ results.append([f'{m} ± {s}' for m, s in zip(mean, std)])
54
+ periods.append(period_label)
55
+
56
+ result = DataFrame(results, columns=items, index=periods)
57
+
58
+ print(tabulate(result, headers='keys', tablefmt='fancy_grid'))