AeroViz 0.1.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. AeroViz/__init__.py +13 -0
  2. AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
  3. AeroViz/data/DEFAULT_DATA.csv +1417 -0
  4. AeroViz/data/DEFAULT_PNSD_DATA.csv +1417 -0
  5. AeroViz/data/hysplit_example_data.txt +101 -0
  6. AeroViz/dataProcess/Chemistry/__init__.py +149 -0
  7. AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
  8. AeroViz/dataProcess/Chemistry/_calculate.py +557 -0
  9. AeroViz/dataProcess/Chemistry/_isoropia.py +150 -0
  10. AeroViz/dataProcess/Chemistry/_mass_volume.py +487 -0
  11. AeroViz/dataProcess/Chemistry/_ocec.py +172 -0
  12. AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
  13. AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
  14. AeroViz/dataProcess/Optical/PyMieScatt_update.py +577 -0
  15. AeroViz/dataProcess/Optical/_IMPROVE.py +452 -0
  16. AeroViz/dataProcess/Optical/__init__.py +281 -0
  17. AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
  18. AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
  19. AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
  20. AeroViz/dataProcess/Optical/_derived.py +518 -0
  21. AeroViz/dataProcess/Optical/_extinction.py +123 -0
  22. AeroViz/dataProcess/Optical/_mie_sd.py +912 -0
  23. AeroViz/dataProcess/Optical/_retrieve_RI.py +243 -0
  24. AeroViz/dataProcess/Optical/coefficient.py +72 -0
  25. AeroViz/dataProcess/Optical/fRH.pkl +0 -0
  26. AeroViz/dataProcess/Optical/mie_theory.py +260 -0
  27. AeroViz/dataProcess/README.md +271 -0
  28. AeroViz/dataProcess/SizeDistr/__init__.py +245 -0
  29. AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
  30. AeroViz/dataProcess/SizeDistr/__pycache__/_size_dist.cpython-312.pyc +0 -0
  31. AeroViz/dataProcess/SizeDistr/_size_dist.py +810 -0
  32. AeroViz/dataProcess/SizeDistr/merge/README.md +93 -0
  33. AeroViz/dataProcess/SizeDistr/merge/__init__.py +20 -0
  34. AeroViz/dataProcess/SizeDistr/merge/_merge_v0.py +251 -0
  35. AeroViz/dataProcess/SizeDistr/merge/_merge_v0_1.py +246 -0
  36. AeroViz/dataProcess/SizeDistr/merge/_merge_v1.py +255 -0
  37. AeroViz/dataProcess/SizeDistr/merge/_merge_v2.py +244 -0
  38. AeroViz/dataProcess/SizeDistr/merge/_merge_v3.py +518 -0
  39. AeroViz/dataProcess/SizeDistr/merge/_merge_v4.py +422 -0
  40. AeroViz/dataProcess/SizeDistr/prop.py +62 -0
  41. AeroViz/dataProcess/VOC/__init__.py +14 -0
  42. AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
  43. AeroViz/dataProcess/VOC/_potential_par.py +108 -0
  44. AeroViz/dataProcess/VOC/support_voc.json +446 -0
  45. AeroViz/dataProcess/__init__.py +66 -0
  46. AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
  47. AeroViz/dataProcess/core/__init__.py +272 -0
  48. AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
  49. AeroViz/mcp_server.py +352 -0
  50. AeroViz/plot/__init__.py +13 -0
  51. AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
  52. AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
  53. AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
  54. AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
  55. AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
  56. AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
  57. AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
  58. AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
  59. AeroViz/plot/bar.py +126 -0
  60. AeroViz/plot/box.py +69 -0
  61. AeroViz/plot/distribution/__init__.py +1 -0
  62. AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
  63. AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
  64. AeroViz/plot/distribution/distribution.py +576 -0
  65. AeroViz/plot/meteorology/CBPF.py +295 -0
  66. AeroViz/plot/meteorology/__init__.py +3 -0
  67. AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
  68. AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
  69. AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
  70. AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
  71. AeroViz/plot/meteorology/hysplit.py +93 -0
  72. AeroViz/plot/meteorology/wind_rose.py +77 -0
  73. AeroViz/plot/optical/__init__.py +1 -0
  74. AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
  75. AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
  76. AeroViz/plot/optical/optical.py +388 -0
  77. AeroViz/plot/pie.py +210 -0
  78. AeroViz/plot/radar.py +184 -0
  79. AeroViz/plot/regression.py +200 -0
  80. AeroViz/plot/scatter.py +174 -0
  81. AeroViz/plot/templates/__init__.py +6 -0
  82. AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
  83. AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
  84. AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
  85. AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
  86. AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
  87. AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
  88. AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
  89. AeroViz/plot/templates/ammonium_rich.py +34 -0
  90. AeroViz/plot/templates/contour.py +47 -0
  91. AeroViz/plot/templates/corr_matrix.py +267 -0
  92. AeroViz/plot/templates/diurnal_pattern.py +61 -0
  93. AeroViz/plot/templates/koschmieder.py +95 -0
  94. AeroViz/plot/templates/metal_heatmap.py +164 -0
  95. AeroViz/plot/timeseries/__init__.py +2 -0
  96. AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
  97. AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
  98. AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
  99. AeroViz/plot/timeseries/template.py +47 -0
  100. AeroViz/plot/timeseries/timeseries.py +446 -0
  101. AeroViz/plot/utils/__init__.py +4 -0
  102. AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  103. AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
  104. AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
  105. AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
  106. AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
  107. AeroViz/plot/utils/_color.py +71 -0
  108. AeroViz/plot/utils/_unit.py +55 -0
  109. AeroViz/plot/utils/fRH.json +390 -0
  110. AeroViz/plot/utils/plt_utils.py +92 -0
  111. AeroViz/plot/utils/sklearn_utils.py +49 -0
  112. AeroViz/plot/utils/units.json +89 -0
  113. AeroViz/plot/violin.py +80 -0
  114. AeroViz/rawDataReader/FLOW.md +138 -0
  115. AeroViz/rawDataReader/__init__.py +220 -0
  116. AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
  117. AeroViz/rawDataReader/config/__init__.py +0 -0
  118. AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
  119. AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
  120. AeroViz/rawDataReader/config/supported_instruments.py +135 -0
  121. AeroViz/rawDataReader/core/__init__.py +658 -0
  122. AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
  123. AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
  124. AeroViz/rawDataReader/core/__pycache__/pre_process.cpython-312.pyc +0 -0
  125. AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
  126. AeroViz/rawDataReader/core/__pycache__/report.cpython-312.pyc +0 -0
  127. AeroViz/rawDataReader/core/logger.py +171 -0
  128. AeroViz/rawDataReader/core/pre_process.py +308 -0
  129. AeroViz/rawDataReader/core/qc.py +961 -0
  130. AeroViz/rawDataReader/core/report.py +579 -0
  131. AeroViz/rawDataReader/script/AE33.py +173 -0
  132. AeroViz/rawDataReader/script/AE43.py +151 -0
  133. AeroViz/rawDataReader/script/APS.py +339 -0
  134. AeroViz/rawDataReader/script/Aurora.py +191 -0
  135. AeroViz/rawDataReader/script/BAM1020.py +90 -0
  136. AeroViz/rawDataReader/script/BC1054.py +161 -0
  137. AeroViz/rawDataReader/script/EPA.py +79 -0
  138. AeroViz/rawDataReader/script/GRIMM.py +68 -0
  139. AeroViz/rawDataReader/script/IGAC.py +140 -0
  140. AeroViz/rawDataReader/script/MA350.py +179 -0
  141. AeroViz/rawDataReader/script/Minion.py +218 -0
  142. AeroViz/rawDataReader/script/NEPH.py +199 -0
  143. AeroViz/rawDataReader/script/OCEC.py +173 -0
  144. AeroViz/rawDataReader/script/Q-ACSM.py +12 -0
  145. AeroViz/rawDataReader/script/SMPS.py +389 -0
  146. AeroViz/rawDataReader/script/TEOM.py +181 -0
  147. AeroViz/rawDataReader/script/VOC.py +106 -0
  148. AeroViz/rawDataReader/script/Xact.py +244 -0
  149. AeroViz/rawDataReader/script/__init__.py +28 -0
  150. AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
  151. AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
  152. AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
  153. AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
  154. AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
  155. AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
  156. AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
  157. AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
  158. AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
  159. AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
  160. AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
  161. AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
  162. AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
  163. AeroViz/rawDataReader/script/__pycache__/Q-ACSM.cpython-312.pyc +0 -0
  164. AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
  165. AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
  166. AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
  167. AeroViz/rawDataReader/script/__pycache__/Xact.cpython-312.pyc +0 -0
  168. AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
  169. AeroViz/tools/__init__.py +2 -0
  170. AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  171. AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
  172. AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
  173. AeroViz/tools/database.py +95 -0
  174. AeroViz/tools/dataclassifier.py +117 -0
  175. AeroViz/tools/dataprinter.py +58 -0
  176. aeroviz-0.1.21.dist-info/METADATA +294 -0
  177. aeroviz-0.1.21.dist-info/RECORD +180 -0
  178. aeroviz-0.1.21.dist-info/WHEEL +5 -0
  179. aeroviz-0.1.21.dist-info/licenses/LICENSE +21 -0
  180. aeroviz-0.1.21.dist-info/top_level.txt +1 -0
@@ -0,0 +1,658 @@
1
+ import json
2
+ from abc import ABC, abstractmethod
3
+ from contextlib import contextmanager
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Generator
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+ from rich.console import Console
11
+ from rich.progress import Progress, TextColumn, BarColumn, SpinnerColumn, TaskProgressColumn
12
+
13
+ from AeroViz.rawDataReader.config.supported_instruments import meta
14
+ from AeroViz.rawDataReader.core.logger import ReaderLogger
15
+ from AeroViz.rawDataReader.core.qc import QualityControl, QCRule, QCFlagBuilder
16
+ from AeroViz.rawDataReader.core.report import calculate_rates, process_rates_report, process_timeline_report, print_timeline_visual
17
+
18
+ __all__ = ['AbstractReader', 'QCRule', 'QCFlagBuilder']
19
+
20
+
21
+ class AbstractReader(ABC):
22
+ """
23
+ Abstract class for reading raw data from different instruments.
24
+
25
+ This class serves as a base class for reading raw data from various instruments. Each instrument
26
+ should have a separate class that inherits from this class and implements the abstract methods.
27
+ The abstract methods are `_raw_reader` and `_QC`.
28
+
29
+ The class handles file management, including reading from and writing to pickle files, and
30
+ implements quality control measures. It can process data in both batch and streaming modes.
31
+
32
+ Attributes
33
+ ----------
34
+ nam : str
35
+ Name identifier for the reader class
36
+ path : Path
37
+ Path to the raw data files
38
+ meta : dict
39
+ Metadata configuration for the instrument
40
+ logger : ReaderLogger
41
+ Custom logger instance for the reader
42
+ reset : bool
43
+ Flag to indicate whether to reset existing processed data
44
+ append : bool
45
+ Flag to indicate whether to append new data to existing processed data
46
+ qc : bool or str
47
+ Quality control settings
48
+ qc_freq : str or None
49
+ Frequency for quality control calculations
50
+ """
51
+
52
+ nam = 'AbstractReader'
53
+
54
+ def __init__(self,
55
+ path: Path | str,
56
+ reset: bool | str = False,
57
+ qc: bool | str = True,
58
+ **kwargs):
59
+ """
60
+ Initialize the AbstractReader.
61
+
62
+ Parameters
63
+ ----------
64
+ path : Path or str
65
+ Path to the directory containing raw data files
66
+ reset : bool or str, default=False
67
+ If True, forces re-reading of raw data
68
+ If 'append', appends new data to existing processed data
69
+ qc : bool or str, default=True
70
+ If True, performs quality control
71
+ If str, specifies the frequency for QC calculations
72
+ **kwargs : dict
73
+ Additional keyword arguments:
74
+ log_level : str
75
+ Logging level for the reader
76
+ suppress_warnings : bool
77
+ If True, suppresses warning messages
78
+
79
+ Notes
80
+ -----
81
+ Creates necessary output directories and initializes logging system.
82
+ Sets up paths for pickle files, CSV files, and report outputs.
83
+ """
84
+ self.path = Path(path)
85
+ self.meta = meta[self.nam]
86
+ output_folder = self.path / f'{self.nam.lower()}_outputs'
87
+ output_folder.mkdir(parents=True, exist_ok=True)
88
+
89
+ self.logger = ReaderLogger(
90
+ self.nam, output_folder,
91
+ kwargs.get('log_level').upper() if not kwargs.get('suppress_warnings') else 'ERROR')
92
+
93
+ self.reset = reset is True
94
+ self.append = reset == 'append'
95
+ self.qc = qc # if qc, then calculate rate
96
+ self.qc_freq = qc if isinstance(qc, str) else None
97
+ self.kwargs = kwargs
98
+
99
+ self.pkl_nam = output_folder / f'_read_{self.nam.lower()}_qc.pkl'
100
+ self.csv_nam = output_folder / f'_read_{self.nam.lower()}_qc.csv'
101
+ self.pkl_nam_raw = output_folder / f'_read_{self.nam.lower()}_raw.pkl'
102
+ self.csv_nam_raw = output_folder / f'_read_{self.nam.lower()}_raw.csv'
103
+ self.csv_out = output_folder / f'output_{self.nam.lower()}.csv'
104
+ self.report_out = output_folder / 'report.json'
105
+
106
+ def __call__(self,
107
+ start: datetime,
108
+ end: datetime,
109
+ mean_freq: str = '1h',
110
+ ) -> pd.DataFrame:
111
+ """
112
+ Process data for a specified time range.
113
+
114
+ Parameters
115
+ ----------
116
+ start : datetime
117
+ Start time for data processing
118
+ end : datetime
119
+ End time for data processing
120
+ mean_freq : str, default='1h'
121
+ Frequency for resampling the data
122
+
123
+ Returns
124
+ -------
125
+ pd.DataFrame
126
+ Processed and resampled data for the specified time range
127
+
128
+ Notes
129
+ -----
130
+ The processed data is also saved to a CSV file.
131
+ """
132
+
133
+ _f_raw, _f_qc = self._run(start, end)
134
+
135
+ if not self.qc: return _f_raw
136
+
137
+ # Extract QC_Flag before processing
138
+ qc_flag = _f_qc['QC_Flag'].copy() if 'QC_Flag' in _f_qc else None
139
+
140
+ # Process QC_Flag
141
+ if 'QC_Flag' in _f_qc:
142
+ # Set rows with QC_Flag != "Valid" to NaN while preserving index
143
+ invalid_mask = _f_qc['QC_Flag'] != 'Valid'
144
+ if invalid_mask.any():
145
+ # Get all numeric columns (excluding QC_Flag column)
146
+ numeric_columns = [col for col in _f_qc.columns if col != 'QC_Flag']
147
+ # Set invalid data to NaN
148
+ _f_qc.loc[invalid_mask, numeric_columns] = np.nan
149
+
150
+ # Drop QC_Flag column
151
+ _f_qc.drop(columns=['QC_Flag'], inplace=True)
152
+
153
+ # Generate data acquisition and quality rate report (instrument time resolution)
154
+ self._generate_report(_f_raw.apply(pd.to_numeric, errors='coerce'),
155
+ _f_qc.apply(pd.to_numeric, errors='coerce'),
156
+ qc_flag=qc_flag)
157
+
158
+ _f_qc = _f_qc.resample(mean_freq).mean().__round__(4)
159
+
160
+ _f_qc.to_csv(self.csv_out)
161
+
162
+ # Generate timeline data (hourly values)
163
+ report_dict = process_timeline_report(self.report_dict, _f_qc)
164
+
165
+ # Write report
166
+ with open(self.report_out, 'w') as f:
167
+ json.dump(report_dict, f, indent=4)
168
+
169
+ return _f_qc
170
+
171
+ @abstractmethod
172
+ def _raw_reader(self, file):
173
+ """
174
+ Abstract method to read raw data files.
175
+
176
+ Parameters
177
+ ----------
178
+ file : Path or str
179
+ Path to the raw data file
180
+
181
+ Returns
182
+ -------
183
+ pd.DataFrame
184
+ Raw data read from the file
185
+
186
+ Notes
187
+ -----
188
+ Must be implemented by child classes to handle specific file formats.
189
+ """
190
+ pass
191
+
192
+ @abstractmethod
193
+ def _QC(self, df: pd.DataFrame) -> pd.DataFrame:
194
+ """
195
+ Abstract method for quality control processing.
196
+
197
+ Parameters
198
+ ----------
199
+ df : pd.DataFrame
200
+ Input DataFrame containing raw data
201
+
202
+ Returns
203
+ -------
204
+ pd.DataFrame
205
+ Quality controlled data with QC_Flag column
206
+
207
+ Notes
208
+ -----
209
+ Must be implemented by child classes to handle instrument-specific QC.
210
+ This method should only check raw data quality (status, range, completeness).
211
+ Derived parameter validation should be done in _process().
212
+ """
213
+ return df
214
+
215
+ def _process(self, df: pd.DataFrame) -> pd.DataFrame:
216
+ """
217
+ Process data to calculate derived parameters.
218
+
219
+ This method is called after _QC() to calculate instrument-specific
220
+ derived parameters (e.g., absorption coefficients, AAE, SAE).
221
+
222
+ Parameters
223
+ ----------
224
+ df : pd.DataFrame
225
+ Quality-controlled DataFrame with QC_Flag column
226
+
227
+ Returns
228
+ -------
229
+ pd.DataFrame
230
+ DataFrame with derived parameters added and QC_Flag updated
231
+
232
+ Notes
233
+ -----
234
+ Default implementation returns the input unchanged.
235
+ Override in child classes to implement instrument-specific processing.
236
+
237
+ The method should:
238
+ 1. Skip calculation for rows where QC_Flag != 'Valid' (optional optimization)
239
+ 2. Calculate derived parameters
240
+ 3. Validate derived parameters and update QC_Flag if invalid
241
+ """
242
+ return df
243
+
244
+ def _generate_report(self, raw_data, qc_data, qc_flag=None) -> None:
245
+ """
246
+ Calculate and log data quality rates for different time periods.
247
+
248
+ Parameters
249
+ ----------
250
+ raw_data : pd.DataFrame
251
+ Raw data before quality control
252
+ qc_data : pd.DataFrame
253
+ Data after quality control
254
+ qc_flag : pd.Series, optional
255
+ QC flag series indicating validity of each row
256
+
257
+ Notes
258
+ -----
259
+ Calculates rates for specified QC frequency if set.
260
+ Updates the quality report with calculated rates.
261
+ """
262
+ if qc_flag is not None:
263
+ # Add blank line before rate section
264
+ self.logger.info("")
265
+
266
+ if self.qc_freq is not None:
267
+ raw_data_grouped = raw_data.groupby(pd.Grouper(freq=self.qc_freq))
268
+ qc_flag_grouped = qc_flag.groupby(pd.Grouper(freq=self.qc_freq))
269
+
270
+ for (month, _sub_raw_data), (_, _sub_qc_flag) in zip(raw_data_grouped, qc_flag_grouped):
271
+ self.logger.info(
272
+ f"{self.logger.BLUE}Period: {_sub_raw_data.index[0].strftime('%Y-%m-%d')} ~ "
273
+ f"{_sub_raw_data.index[-1].strftime('%Y-%m-%d')}{self.logger.RESET}")
274
+
275
+ calculate_rates(self.logger, _sub_raw_data, _sub_qc_flag, with_log=True)
276
+ else:
277
+ calculate_rates(self.logger, raw_data, qc_flag, with_log=True)
278
+
279
+ # 使用 Grouper 對數據按週和月進行分組
280
+ current_time = datetime.now()
281
+
282
+ # 按週分組 (使用星期一作為每週的開始)
283
+ weekly_raw_groups = raw_data.groupby(pd.Grouper(freq='W-MON', label="left", closed="left"))
284
+ weekly_flag_groups = qc_flag.groupby(pd.Grouper(freq='W-MON', label="left", closed="left"))
285
+
286
+ # 按月分組 (使用月初作為每月的開始)
287
+ monthly_raw_groups = raw_data.groupby(pd.Grouper(freq='MS'))
288
+ monthly_flag_groups = qc_flag.groupby(pd.Grouper(freq='MS'))
289
+
290
+ # 報告基本資訊
291
+ report_dict = {
292
+ 'startDate': qc_data.index.min().strftime('%Y/%m/%d %H:%M'),
293
+ 'endDate': qc_data.index.max().strftime('%Y/%m/%d %H:%M'),
294
+ "report_time": current_time.strftime('%Y-%m-%d %H:%M:%S'),
295
+ "instrument_id": f"{self.path.name[:2]}_{self.nam}",
296
+ "instrument": self.nam,
297
+ }
298
+
299
+ # 生成報告資料
300
+ self.report_dict = process_rates_report(
301
+ self.logger, report_dict,
302
+ weekly_raw_groups, monthly_raw_groups,
303
+ weekly_flag_groups, monthly_flag_groups
304
+ )
305
+
306
+ def _timeIndex_process(self, _df, user_start=None, user_end=None, append_df=None):
307
+ """
308
+ Process time index of the DataFrame.
309
+
310
+ Parameters
311
+ ----------
312
+ _df : pd.DataFrame
313
+ Input DataFrame to process
314
+ user_start : datetime, optional
315
+ User-specified start time
316
+ user_end : datetime, optional
317
+ User-specified end time
318
+ append_df : pd.DataFrame, optional
319
+ DataFrame to append to
320
+
321
+ Returns
322
+ -------
323
+ pd.DataFrame
324
+ DataFrame with processed time index
325
+
326
+ Notes
327
+ -----
328
+ Handles time range filtering and data appending.
329
+ """
330
+ # Round timestamps and remove duplicates
331
+ _df = _df.groupby(_df.index.floor('1min')).first()
332
+
333
+ # Determine frequency
334
+ freq = _df.index.inferred_freq or self.meta['freq']
335
+
336
+ # Append new data if provided
337
+ if append_df is not None:
338
+ append_df.index = append_df.index.round('1min')
339
+ _df = pd.concat([append_df.dropna(how='all'), _df.dropna(how='all')])
340
+ _df = _df.loc[~_df.index.duplicated()]
341
+
342
+ # Determine time range
343
+ df_start, df_end = _df.index.sort_values()[[0, -1]]
344
+
345
+ # Create new time index
346
+ new_index = pd.date_range(user_start or df_start, user_end or df_end, freq=freq, name='time')
347
+
348
+ # Process data: convert to numeric, resample, and reindex with controlled tolerance
349
+ if freq in ['1min', 'min', 'T']:
350
+ # For minute-level data, use smaller tolerance, e.g., 30 seconds
351
+ return _df.reindex(new_index, method='nearest', tolerance='30s')
352
+ elif freq in ['1h', 'h', 'H']:
353
+ # For hourly data, use 30 minutes as tolerance
354
+ # This way 08:20 matches to 08:00, but not to 09:00
355
+ return _df.reindex(new_index, method='nearest', tolerance='30min')
356
+ else:
357
+ # For other frequencies, set tolerance to half the frequency
358
+ if isinstance(freq, str) and freq[-1].isalpha():
359
+ # If freq format is 'number+unit', e.g., '2h', '3min'
360
+ try:
361
+ num = int(freq[:-1])
362
+ unit = freq[-1]
363
+ half_freq = f"{num // 2}{unit}" if num > 1 else f"30{'min' if unit == 'h' else 's'}"
364
+ return _df.reindex(new_index, method='nearest', tolerance=half_freq)
365
+ except ValueError:
366
+ # Cannot parse freq, use default value
367
+ return _df.reindex(new_index, method='nearest', tolerance=freq)
368
+ else:
369
+ return _df.reindex(new_index, method='nearest', tolerance=freq)
370
+
371
+ def _outlier_process(self, _df):
372
+ """
373
+ Process outliers in the data.
374
+
375
+ Parameters
376
+ ----------
377
+ _df : pd.DataFrame
378
+ Input DataFrame containing potential outliers
379
+
380
+ Returns
381
+ -------
382
+ pd.DataFrame
383
+ DataFrame with outliers processed
384
+
385
+ Notes
386
+ -----
387
+ Implementation depends on specific instrument requirements.
388
+ """
389
+ outlier_file = self.path / 'outlier.json'
390
+
391
+ if not outlier_file.exists():
392
+ return _df
393
+
394
+ with outlier_file.open('r', encoding='utf-8', errors='ignore') as f:
395
+ outliers = json.load(f)
396
+
397
+ for _st, _ed in outliers.values():
398
+ _df.loc[_st:_ed] = np.nan
399
+
400
+ return _df
401
+
402
+ def _save_data(self, raw_data: pd.DataFrame, qc_data: pd.DataFrame) -> None:
403
+ """
404
+ Save processed data to files.
405
+
406
+ Parameters
407
+ ----------
408
+ raw_data : pd.DataFrame
409
+ Raw data to save
410
+ qc_data : pd.DataFrame
411
+ Quality controlled data to save
412
+
413
+ Notes
414
+ -----
415
+ Saves data in both pickle and CSV formats.
416
+ """
417
+ try:
418
+ raw_data.to_pickle(self.pkl_nam_raw)
419
+ raw_data.to_csv(self.csv_nam_raw)
420
+ qc_data.to_pickle(self.pkl_nam)
421
+ qc_data.to_csv(self.csv_nam)
422
+
423
+ except Exception as e:
424
+ raise IOError(f"Error saving data. {e}")
425
+
426
+ @contextmanager
427
+ def progress_reading(self, files: list) -> Generator:
428
+ """
429
+ Context manager for tracking file reading progress.
430
+
431
+ Parameters
432
+ ----------
433
+ files : list
434
+ List of files to process
435
+
436
+ Yields
437
+ ------
438
+ Progress
439
+ Progress bar object for tracking
440
+
441
+ Notes
442
+ -----
443
+ Uses rich library for progress display.
444
+ """
445
+ # Create message temporary storage and replace logger method
446
+ logs = {level: [] for level in ['info', 'warning', 'error']}
447
+ original = {level: getattr(self.logger, level) for level in logs}
448
+
449
+ for level, msgs in logs.items():
450
+ setattr(self.logger, level, msgs.append)
451
+
452
+ try:
453
+ with Progress(
454
+ SpinnerColumn(finished_text="✓"),
455
+ BarColumn(bar_width=25, complete_style="green", finished_style="bright_green"),
456
+ TaskProgressColumn(style="bold", text_format="[bright_green]{task.percentage:>3.0f}%"),
457
+ TextColumn("{task.description}", style="bold blue"),
458
+ TextColumn("{task.fields[filename]}", style="bold blue"),
459
+ console=Console(force_terminal=True, color_system="auto", width=120),
460
+ expand=False
461
+ ) as progress:
462
+ task = progress.add_task(f"Reading {self.nam} files:", total=len(files), filename="")
463
+ yield progress, task
464
+ finally:
465
+ # Restore logger method and output message
466
+ for level, msgs in logs.items():
467
+ setattr(self.logger, level, original[level])
468
+ for msg in msgs:
469
+ original[level](msg)
470
+
471
+ def _read_raw_files(self) -> tuple[pd.DataFrame | None, pd.DataFrame | None]:
472
+ """
473
+ Read and process raw data files.
474
+
475
+ Returns
476
+ -------
477
+ tuple[pd.DataFrame | None, pd.DataFrame | None]
478
+ Tuple containing:
479
+ - Raw data DataFrame or None
480
+ - Quality controlled DataFrame or None
481
+
482
+ Notes
483
+ -----
484
+ Handles file reading and initial processing.
485
+ """
486
+ files = [f
487
+ for file_pattern in self.meta['pattern']
488
+ for pattern in {file_pattern.lower(), file_pattern.upper(), file_pattern}
489
+ for f in self.path.glob(pattern)
490
+ if f.name not in [self.csv_out.name, self.csv_nam.name, self.csv_nam_raw.name, f'{self.nam}.log']]
491
+
492
+ if not files:
493
+ raise FileNotFoundError(f"No files in '{self.path}' could be read. Please check the current path.")
494
+
495
+ df_list = []
496
+
497
+ # Context manager for progress bar display
498
+ with self.progress_reading(files) as (progress, task):
499
+ for file in files:
500
+ progress.update(task, advance=1, filename=file.name)
501
+ try:
502
+ if (df := self._raw_reader(file)) is not None and not df.empty:
503
+ df_list.append(df)
504
+ else:
505
+ self.logger.debug(f"File {file.name} produced an empty DataFrame or None.")
506
+
507
+ except Exception as e:
508
+ self.logger.error(f"Error reading {file.name}: {e}")
509
+
510
+ if not df_list:
511
+ raise ValueError(f"\033[41m\033[97mAll files were either empty or failed to read.\033[0m")
512
+
513
+ raw_data = pd.concat(df_list, axis=0).groupby(level=0).first()
514
+
515
+ if self.nam in ['SMPS', 'APS', 'GRIMM']:
516
+ raw_data = raw_data.sort_index(axis=1, key=lambda x: x.astype(float))
517
+
518
+ raw_data = self._timeIndex_process(raw_data)
519
+
520
+ raw_data = raw_data.apply(pd.to_numeric, errors='coerce').copy(deep=True)
521
+
522
+ # Perform QC processing (raw data quality checks only)
523
+ qc_data = self._QC(raw_data.copy(deep=True))
524
+
525
+ # Perform processing (calculate derived parameters + validate)
526
+ qc_data = self._process(qc_data)
527
+
528
+ # Only convert numeric columns to numeric, preserve QC_Flag column string values
529
+ if 'QC_Flag' in qc_data.columns:
530
+ numeric_columns = qc_data.select_dtypes(exclude=['object', 'string']).columns
531
+ qc_data[numeric_columns] = qc_data[numeric_columns].apply(pd.to_numeric, errors='coerce')
532
+ else:
533
+ qc_data = qc_data.apply(pd.to_numeric, errors='coerce')
534
+
535
+ # Make a deep copy to ensure data integrity
536
+ qc_data_copy = qc_data.copy(deep=True)
537
+
538
+ return raw_data, qc_data_copy
539
+
540
+ def _run(self, user_start, user_end):
541
+ """
542
+ Main execution method for data processing.
543
+
544
+ Parameters
545
+ ----------
546
+ user_start : datetime
547
+ Start time for processing
548
+ user_end : datetime
549
+ End time for processing
550
+
551
+ Returns
552
+ -------
553
+ pd.DataFrame
554
+ Processed data for the specified time range
555
+
556
+ Notes
557
+ -----
558
+ Coordinates the entire data processing workflow.
559
+ """
560
+ # read pickle if pickle file exists and 'reset=False' or process raw data or append new data
561
+ if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset:
562
+ self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}")
563
+
564
+ _f_raw_done, _f_qc_done = pd.read_pickle(self.pkl_nam_raw), pd.read_pickle(self.pkl_nam)
565
+
566
+ if self.append:
567
+ self.logger.info_box(f"Appending New data from {user_start} to {user_end}")
568
+
569
+ _f_raw_new, _f_qc_new = self._read_raw_files()
570
+ _f_raw = self._timeIndex_process(_f_raw_done, append_df=_f_raw_new)
571
+ _f_qc = self._timeIndex_process(_f_qc_done, append_df=_f_qc_new)
572
+
573
+ else:
574
+ _f_raw, _f_qc = _f_raw_done, _f_qc_done
575
+
576
+ return _f_raw, _f_qc
577
+
578
+ else:
579
+ self.logger.info_box(f"Reading {self.nam} RAW DATA from {user_start} to {user_end}")
580
+
581
+ _f_raw, _f_qc = self._read_raw_files()
582
+
583
+ # process time index
584
+ _f_raw = self._timeIndex_process(_f_raw, user_start, user_end)
585
+ _f_qc = self._timeIndex_process(_f_qc, user_start, user_end)
586
+
587
+ # process outlier
588
+ _f_qc = self._outlier_process(_f_qc)
589
+
590
+ # save
591
+ self._save_data(_f_raw, _f_qc)
592
+
593
+ return _f_raw, _f_qc
594
+
595
+ @staticmethod
596
+ def reorder_dataframe_columns(df, order_lists: list[list], keep_others: bool = False):
597
+ """
598
+ Reorder DataFrame columns according to specified lists.
599
+
600
+ Parameters
601
+ ----------
602
+ df : pd.DataFrame
603
+ Input DataFrame
604
+ order_lists : list[list]
605
+ Lists specifying column order
606
+ keep_others : bool, default=False
607
+ If True, keeps unspecified columns at the end
608
+
609
+ Returns
610
+ -------
611
+ pd.DataFrame
612
+ DataFrame with reordered columns
613
+ """
614
+ new_order = []
615
+
616
+ for order in order_lists:
617
+ # Only add column that exist in the DataFrame and do not add them repeatedly
618
+ new_order.extend([col for col in order if col in df.columns and col not in new_order])
619
+
620
+ if keep_others:
621
+ # Add all original fields not in the new order list, keeping their original order
622
+ new_order.extend([col for col in df.columns if col not in new_order])
623
+
624
+ return df[new_order]
625
+
626
+ @staticmethod
627
+ def QC_control():
628
+ return QualityControl()
629
+
630
+ @staticmethod
631
+ def update_qc_flag(df: pd.DataFrame, mask: pd.Series, flag_name: str) -> pd.DataFrame:
632
+ """
633
+ Update QC_Flag column for rows matching the mask.
634
+
635
+ Parameters
636
+ ----------
637
+ df : pd.DataFrame
638
+ DataFrame with QC_Flag column
639
+ mask : pd.Series
640
+ Boolean mask indicating rows to flag
641
+ flag_name : str
642
+ Name of the flag to add
643
+
644
+ Returns
645
+ -------
646
+ pd.DataFrame
647
+ DataFrame with updated QC_Flag column
648
+ """
649
+ if 'QC_Flag' not in df.columns:
650
+ df['QC_Flag'] = 'Valid'
651
+
652
+ # For rows that are already Valid, set to flag_name
653
+ # For rows that already have flags, append the new flag
654
+ valid_mask = df['QC_Flag'] == 'Valid'
655
+ df.loc[mask & valid_mask, 'QC_Flag'] = flag_name
656
+ df.loc[mask & ~valid_mask, 'QC_Flag'] = df.loc[mask & ~valid_mask, 'QC_Flag'] + ', ' + flag_name
657
+
658
+ return df