AeroViz 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (87) hide show
  1. AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
  2. AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
  3. AeroViz/dataProcess/Optical/_IMPROVE.py +1 -1
  4. AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
  5. AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
  6. AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
  7. AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
  8. AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
  9. AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
  10. AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
  11. AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
  12. AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
  13. AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
  14. AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
  15. AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
  16. AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
  17. AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
  18. AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
  19. AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
  20. AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
  21. AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
  22. AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
  23. AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
  24. AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
  25. AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
  26. AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
  27. AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
  28. AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
  29. AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
  30. AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
  31. AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
  32. AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
  33. AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
  34. AeroViz/plot/templates/koschmieder.py +1 -1
  35. AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
  36. AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
  37. AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
  38. AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  39. AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
  40. AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
  41. AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
  42. AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
  43. AeroViz/rawDataReader/__init__.py +121 -56
  44. AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
  45. AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
  46. AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
  47. AeroViz/rawDataReader/config/supported_instruments.py +7 -4
  48. AeroViz/rawDataReader/core/__init__.py +42 -42
  49. AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
  50. AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
  51. AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
  52. AeroViz/rawDataReader/core/logger.py +6 -2
  53. AeroViz/rawDataReader/core/qc.py +1 -1
  54. AeroViz/rawDataReader/script/AE33.py +1 -1
  55. AeroViz/rawDataReader/script/APS.py +13 -9
  56. AeroViz/rawDataReader/script/BAM1020.py +35 -0
  57. AeroViz/rawDataReader/script/NEPH.py +6 -10
  58. AeroViz/rawDataReader/script/OCEC.py +2 -2
  59. AeroViz/rawDataReader/script/SMPS.py +36 -16
  60. AeroViz/rawDataReader/script/TEOM.py +15 -3
  61. AeroViz/rawDataReader/script/__init__.py +1 -0
  62. AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
  63. AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
  64. AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
  65. AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
  66. AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
  67. AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
  68. AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
  69. AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
  70. AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
  71. AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
  72. AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
  73. AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
  74. AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
  75. AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
  76. AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
  77. AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
  78. AeroViz/rawDataReader/script/__pycache__/XRF.cpython-312.pyc +0 -0
  79. AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
  80. AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  81. AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
  82. AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
  83. {AeroViz-0.1.12.dist-info → AeroViz-0.1.14.dist-info}/METADATA +7 -6
  84. {AeroViz-0.1.12.dist-info → AeroViz-0.1.14.dist-info}/RECORD +87 -85
  85. {AeroViz-0.1.12.dist-info → AeroViz-0.1.14.dist-info}/LICENSE +0 -0
  86. {AeroViz-0.1.12.dist-info → AeroViz-0.1.14.dist-info}/WHEEL +0 -0
  87. {AeroViz-0.1.12.dist-info → AeroViz-0.1.14.dist-info}/top_level.txt +0 -0
@@ -41,7 +41,7 @@ def _revised(_df_mass, _df_RH):
41
41
 
42
42
  _df['AS'] = 2.2 * _frhs * _df_mass['S_AS'] + 4.8 * _frhl * _df_mass['L_AS']
43
43
  _df['AN'] = 2.4 * _frhs * _df_mass['S_AN'] + 5.1 * _frhl * _df_mass['L_AN']
44
- _df['OM'] = 2.8 * _df_mass['S_OM'] + 6.1 * _frhl * _df_mass['L_OM']
44
+ _df['OM'] = 2.8 * _df_mass['S_OM'] + 6.1 * _df_mass['L_OM']
45
45
  _df['Soil'] = _df_mass['Soil']
46
46
  _df['SS'] = 1.7 * _frhss * _df_mass['SS']
47
47
  _df['EC'] = 10 * _df_mass['EC']
@@ -89,7 +89,7 @@ def koschmieder(df: pd.DataFrame,
89
89
  )
90
90
 
91
91
  plt.xticks(ticks=np.array(range(0, 31, 5)), labels=np.array(range(0, 31, 5)))
92
-
92
+ fig.savefig('koschmieder.png', dpi=600)
93
93
  plt.show()
94
94
 
95
95
  return fig, ax
@@ -1,6 +1,6 @@
1
1
  from datetime import datetime
2
2
  from pathlib import Path
3
- from typing import Any
3
+ from typing import Literal
4
4
 
5
5
  from pandas import Grouper, Timedelta
6
6
 
@@ -10,66 +10,111 @@ from AeroViz.rawDataReader.script import *
10
10
  __all__ = ['RawDataReader']
11
11
 
12
12
  SUPPORTED_INSTRUMENTS = [
13
- NEPH, Aurora, SMPS, GRIMM, APS, AE33, AE43, BC1054,
14
- MA350, TEOM, OCEC, IGAC, VOC, EPA, Minion
13
+ NEPH, Aurora, SMPS, APS, GRIMM, AE33, AE43, BC1054,
14
+ MA350, BAM1020, TEOM, OCEC, IGAC, VOC, EPA, Minion
15
15
  ]
16
16
 
17
+ SIZE_RANGE_INSTRUMENTS = ['SMPS', 'APS', 'GRIMM']
17
18
 
18
- def RawDataReader(instrument_name: str,
19
+
20
+ def RawDataReader(instrument: str,
19
21
  path: Path | str,
20
22
  reset: bool = False,
21
23
  qc: bool | str = True,
22
- qc_freq: str | None = None,
23
- rate: bool = True,
24
- append_data: bool = False,
25
24
  start: datetime = None,
26
25
  end: datetime = None,
27
26
  mean_freq: str = '1h',
28
- csv_out: bool = True,
29
- **kwargs: Any):
27
+ size_range: tuple[float, float] | None = None,
28
+ suppress_warnings: bool = False,
29
+ log_level: Literal['DEBUG', 'INFO', 'WARNING', 'ERROR'] = 'INFO',
30
+ **kwargs):
30
31
  """
31
32
  Factory function to instantiate the appropriate reader module for a given instrument and
32
33
  return the processed data over the specified time range.
33
34
 
34
- :param instrument_name: The name of the instrument for which to read data. Must be a valid key in the `meta` dictionary.
35
- :param path: The directory where raw data files for the instrument are stored.
36
- :param reset: If True, reset the state and reprocess the data from scratch.
37
- :param qc: If True, apply quality control (QC) to the raw data.
38
- :param qc_freq: Frequency at which to perform QC. Must be one of 'W', 'M', 'Q', 'Y' for weekly, monthly, quarterly, or yearly.
39
- :param rate: If True, calculate rates from the data.
40
- :param append_data: If True, append new data to the existing dataset instead of overwriting it.
41
- :param start: Start time for filtering the data. If None, no start time filtering will be applied.
42
- :param end: End time for filtering the data. If None, no end time filtering will be applied.
43
- :param mean_freq: Resampling frequency for averaging the data. Example: '1h' for hourly mean.
44
- :param csv_out: If True, output the processed data as a CSV file.
45
-
46
- :return: An instance of the reader module corresponding to the specified instrument, which processes the data and returns it in a usable format.
47
-
48
- :raises ValueError: If the `instrument_name` provided is not a valid key in the `meta` dictionary.
49
- :raises ValueError: If the specified path does not exist or is not a directory.
50
- :raises ValueError: If the QC frequency is invalid.
51
- :raises ValueError: If start and end times are not both provided or are invalid.
52
- :raises ValueError: If the mean_freq is not a valid frequency string.
53
-
54
- :Example:
55
-
56
- To read and process data for the BC1054 instrument:
57
-
35
+ Parameters
36
+ ----------
37
+ instrument : str
38
+ The instrument name for which to read data, must be a valid key in the meta dictionary
39
+
40
+ path : Path or str
41
+ The directory where raw data files for the instrument are stored
42
+
43
+ reset : bool or str
44
+ Data processing control mode:
45
+ False (default) - Use existing processed data if available
46
+ True - Force reprocess all data from raw files
47
+ 'append' - Add new data to existing processed data
48
+
49
+ qc : bool or str
50
+ Quality control and rate calculation mode:
51
+ True (default) - Apply QC and calculate overall rates
52
+ False - Skip QC and return raw data only
53
+ str - Calculate rates at specified intervals:
54
+ 'W' - Weekly rates
55
+ 'MS' - Month start rates
56
+ 'QS' - Quarter start rates
57
+ 'YS' - Year start rates
58
+ Can add number prefix (e.g., '2MS' for bi-monthly)
59
+
60
+ start : datetime
61
+ Start time for filtering the data
62
+
63
+ end : datetime
64
+ End time for filtering the data
65
+
66
+ mean_freq : str
67
+ Resampling frequency for averaging the data (e.g., '1h' for hourly mean)
68
+
69
+ size_range : tuple[float, float], optional
70
+ Size range in nanometers (min_size, max_size) for SMPS/APS data filtering
71
+
72
+ suppress_warnings : bool, optional
73
+ Whether to suppress warning messages (default: False)
74
+
75
+ log_level : {'DEBUG', 'INFO', 'WARNING', 'ERROR'}
76
+ Logging level (default: 'INFO')
77
+
78
+ **kwargs
79
+ Additional arguments to pass to the reader module
80
+
81
+ Returns
82
+ -------
83
+ pd.DataFrame
84
+ Processed data with specified QC and time range
85
+
86
+ Raises
87
+ ------
88
+ ValueError
89
+ If instrument name is invalid
90
+ If path does not exist
91
+ If QC frequency is invalid
92
+ If time range is invalid
93
+ If mean_freq format is invalid
94
+
95
+ Examples
96
+ --------
58
97
  >>> from pathlib import Path
59
98
  >>> from datetime import datetime
99
+ >>> from AeroViz import RawDataReader
60
100
  >>>
61
- >>> data = RawDataReader(
62
- ... instrument_name='BC1054',
63
- ... path=Path('/path/to/data'),
64
- ... start=datetime(2024, 2, 1),
65
- ... end=datetime(2024, 7, 31))
101
+ >>> df_ae33 = RawDataReader(
102
+ ... instrument='AE33',
103
+ ... path=Path('/path/to/your/data/folder'),
104
+ ... reset=True,
105
+ ... qc='1MS',
106
+ ... start=datetime(2024, 1, 1),
107
+ ... end=datetime(2024, 6, 30),
108
+ ... mean_freq='1h',
109
+ ... )
66
110
  """
111
+
67
112
  # Mapping of instrument names to their respective classes
68
113
  instrument_class_map = {cls.__name__.split('.')[-1]: cls for cls in SUPPORTED_INSTRUMENTS}
69
114
 
70
115
  # Check if the instrument name is in the map
71
- if instrument_name not in meta.keys():
72
- raise ValueError(f"Instrument name '{instrument_name}' is not valid. \nMust be one of: {list(meta.keys())}")
116
+ if instrument not in meta.keys():
117
+ raise ValueError(f"Instrument name '{instrument}' is not valid. \nMust be one of: {list(meta.keys())}")
73
118
 
74
119
  # Check if path exists and is a directory
75
120
  if not isinstance(path, Path):
@@ -78,22 +123,21 @@ def RawDataReader(instrument_name: str,
78
123
  raise FileNotFoundError(f"The specified path '{path}' does not exist or is not a directory.")
79
124
 
80
125
  # Validate the QC frequency
81
- if qc_freq is not None:
126
+ if isinstance(qc, str):
82
127
  try:
83
- Grouper(freq=qc_freq)
84
- except ValueError as e:
85
- raise ValueError(f"Invalid frequency: {qc_freq}. Error: {str(e)}")
86
- except TypeError as e:
87
- raise ValueError(f"Invalid frequency type: {qc_freq}. Frequency should be a string.")
88
-
89
- if start and end:
90
- if end.hour == 0 and end.minute == 0 and end.second == 0:
91
- end = end.replace(hour=23, minute=59, second=59)
92
- else:
128
+ Grouper(freq=qc)
129
+ except (ValueError, TypeError):
130
+ raise ValueError(f"Invalid frequency: {qc}. Must be one of: "
131
+ f"W (week), MS (month start), QS (quarter start), YS (year start)")
132
+
133
+ # Verify input times
134
+ if not (start and end):
93
135
  raise ValueError("Both start and end times must be provided.")
94
136
  if end <= start:
95
137
  raise ValueError(f"Invalid time range: start {start} is after end {end}")
96
138
 
139
+ end = end.replace(hour=23, minute=59, second=59) if end.hour == 0 and end.minute == 0 else end
140
+
97
141
  # Verify that mean_freq format
98
142
  try:
99
143
  Timedelta(mean_freq)
@@ -101,19 +145,40 @@ def RawDataReader(instrument_name: str,
101
145
  raise ValueError(
102
146
  f"Invalid mean_freq: '{mean_freq}'. It should be a valid frequency string (e.g., '1h', '30min', '1D').")
103
147
 
148
+ # Validate size range
149
+ if size_range is not None:
150
+ if instrument not in SIZE_RANGE_INSTRUMENTS:
151
+ raise ValueError(f"Size range filtering is only supported for {SIZE_RANGE_INSTRUMENTS}")
152
+
153
+ min_size, max_size = size_range
154
+ if not isinstance(min_size, (int, float)) or not isinstance(max_size, (int, float)):
155
+ raise ValueError("Size range values must be numeric")
156
+ if min_size >= max_size:
157
+ raise ValueError("Minimum size must be less than maximum size")
158
+
159
+ if instrument == 'SMPS':
160
+ if not (1 <= min_size <= 1000) or not (1 <= max_size <= 1000):
161
+ raise ValueError("SMPS size range must be between 1 and 1000 nm")
162
+ elif instrument == 'APS':
163
+ if not (500 <= min_size <= 20000) or not (500 <= max_size <= 20000):
164
+ raise ValueError("APS size range must be between 500 and 20000 nm")
165
+
166
+ kwargs.update({'size_range': size_range})
167
+
168
+ kwargs.update({
169
+ 'suppress_warnings': suppress_warnings,
170
+ 'log_level': log_level
171
+ })
172
+
104
173
  # Instantiate the class and return the instance
105
- reader_module = instrument_class_map[instrument_name].Reader(
174
+ reader_module = instrument_class_map[instrument].Reader(
106
175
  path=path,
107
176
  reset=reset,
108
177
  qc=qc,
109
- qc_freq=qc_freq,
110
- rate=rate,
111
- append_data=append_data,
112
178
  **kwargs
113
179
  )
114
180
  return reader_module(
115
181
  start=start,
116
182
  end=end,
117
183
  mean_freq=mean_freq,
118
- csv_out=csv_out,
119
184
  )
@@ -58,6 +58,13 @@ meta = {
58
58
  "deter_key": {"BC Mass Conc. (880 nm)": ["BC5"]},
59
59
  },
60
60
 
61
+ "BAM1020": {
62
+ "pattern": ["*.csv"],
63
+ "freq": "1h",
64
+ "deter_key": {
65
+ "Mass Conc.": ["Conc"]},
66
+ },
67
+
61
68
  "TEOM": {
62
69
  "pattern": ["*.csv"],
63
70
  "freq": "6min",
@@ -71,10 +78,6 @@ meta = {
71
78
  "pattern": ["*LCRes.csv"],
72
79
  "freq": "1h",
73
80
  "deter_key": {
74
- "Thermal OC": ["Thermal_OC"],
75
- "Thermal EC": ["Thermal_EC"],
76
- "Optical OC": ["Optical_OC"],
77
- "Optical EC": ["Optical_EC"],
78
81
  "Thermal OC & EC": ["Thermal_OC", "Thermal_EC"],
79
82
  "Optical OC & EC": ["Optical_OC", "Optical_EC"],
80
83
  },
@@ -3,17 +3,16 @@ from abc import ABC, abstractmethod
3
3
  from contextlib import contextmanager
4
4
  from datetime import datetime
5
5
  from pathlib import Path
6
- from typing import Optional, Generator
6
+ from typing import Generator
7
7
 
8
8
  import numpy as np
9
9
  import pandas as pd
10
- from pandas import DataFrame, concat, read_pickle, to_numeric
11
10
  from rich.console import Console
12
11
  from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, TaskProgressColumn
13
12
 
14
13
  from AeroViz.rawDataReader.config.supported_instruments import meta
15
14
  from AeroViz.rawDataReader.core.logger import ReaderLogger
16
- from AeroViz.rawDataReader.core.qc import DataQualityControl
15
+ from AeroViz.rawDataReader.core.qc import QualityControl
17
16
 
18
17
  __all__ = ['AbstractReader']
19
18
 
@@ -32,45 +31,43 @@ class AbstractReader(ABC):
32
31
 
33
32
  def __init__(self,
34
33
  path: Path | str,
35
- reset: bool = False,
36
- qc: bool = True,
37
- qc_freq: Optional[str] = None,
38
- rate: bool = True,
39
- append_data: bool = False,
34
+ reset: bool | str = False,
35
+ qc: bool | str = True,
40
36
  **kwargs):
41
37
 
42
38
  self.path = Path(path)
43
39
  self.meta = meta[self.nam]
44
- self.logger = ReaderLogger(self.nam, self.path)
40
+ output_folder = self.path / f'{self.nam.lower()}_outputs'
41
+ output_folder.mkdir(parents=True, exist_ok=True)
45
42
 
46
- self.reset = reset
47
- self.qc = qc
48
- self.qc_freq = qc_freq
49
- self.rate = rate
50
- self.append = append_data and reset
43
+ self.logger = ReaderLogger(
44
+ self.nam, output_folder,
45
+ kwargs.get('log_level').upper() if not kwargs.get('suppress_warnings') else 'ERROR')
51
46
 
52
- self.pkl_nam = self.path / f'_read_{self.nam.lower()}.pkl'
53
- self.csv_nam = self.path / f'_read_{self.nam.lower()}.csv'
54
- self.pkl_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.pkl'
55
- self.csv_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.csv'
56
- self.csv_out = self.path / f'output_{self.nam.lower()}.csv'
47
+ self.reset = reset is True
48
+ self.append = reset == 'append'
49
+ self.qc = qc # if qc, then calculate rate
50
+ self.qc_freq = qc if isinstance(qc, str) else None
51
+ self.kwargs = kwargs
57
52
 
58
- self.size_range = kwargs.get('size_range', (11.8, 593.5))
53
+ self.pkl_nam = output_folder / f'_read_{self.nam.lower()}.pkl'
54
+ self.csv_nam = output_folder / f'_read_{self.nam.lower()}.csv'
55
+ self.pkl_nam_raw = output_folder / f'_read_{self.nam.lower()}_raw.pkl'
56
+ self.csv_nam_raw = output_folder / f'_read_{self.nam.lower()}_raw.csv'
57
+ self.csv_out = output_folder / f'output_{self.nam.lower()}.csv'
59
58
 
60
59
  def __call__(self,
61
60
  start: datetime,
62
61
  end: datetime,
63
62
  mean_freq: str = '1h',
64
- csv_out: bool = True,
65
- ) -> DataFrame:
63
+ ) -> pd.DataFrame:
66
64
 
67
65
  data = self._run(start, end)
68
66
 
69
67
  if data is not None:
70
- if mean_freq:
71
- data = data.resample(mean_freq).mean()
72
- if csv_out:
73
- data.to_csv(self.csv_out)
68
+ data = data.resample(mean_freq).mean()
69
+
70
+ data.to_csv(self.csv_out)
74
71
 
75
72
  return data
76
73
 
@@ -79,7 +76,7 @@ class AbstractReader(ABC):
79
76
  pass
80
77
 
81
78
  @abstractmethod
82
- def _QC(self, df: DataFrame) -> DataFrame:
79
+ def _QC(self, df: pd.DataFrame) -> pd.DataFrame:
83
80
  return df
84
81
 
85
82
  def _rate_calculate(self, raw_data, qc_data) -> None:
@@ -182,7 +179,7 @@ class AbstractReader(ABC):
182
179
 
183
180
  return _df
184
181
 
185
- def _save_data(self, raw_data: DataFrame, qc_data: DataFrame) -> None:
182
+ def _save_data(self, raw_data: pd.DataFrame, qc_data: pd.DataFrame) -> None:
186
183
  try:
187
184
  raw_data.to_pickle(self.pkl_nam_raw)
188
185
  raw_data.to_csv(self.csv_nam_raw)
@@ -222,7 +219,7 @@ class AbstractReader(ABC):
222
219
  for msg in msgs:
223
220
  original[level](msg)
224
221
 
225
- def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
222
+ def _read_raw_files(self) -> tuple[pd.DataFrame | None, pd.DataFrame | None]:
226
223
  files = [f
227
224
  for file_pattern in self.meta['pattern']
228
225
  for pattern in {file_pattern.lower(), file_pattern.upper(), file_pattern}
@@ -242,7 +239,7 @@ class AbstractReader(ABC):
242
239
  if (df := self._raw_reader(file)) is not None and not df.empty:
243
240
  df_list.append(df)
244
241
  else:
245
- self.logger.warning(f"\tFile {file.name} produced an empty DataFrame or None.")
242
+ self.logger.debug(f"\tFile {file.name} produced an empty DataFrame or None.")
246
243
 
247
244
  except Exception as e:
248
245
  self.logger.error(f"Error reading {file.name}: {e}")
@@ -250,13 +247,15 @@ class AbstractReader(ABC):
250
247
  if not df_list:
251
248
  raise ValueError(f"\033[41m\033[97mAll files were either empty or failed to read.\033[0m")
252
249
 
253
- raw_data = concat(df_list, axis=0).groupby(level=0).first()
250
+ raw_data = pd.concat(df_list, axis=0).groupby(level=0).first()
254
251
 
255
- if self.nam == 'SMPS':
252
+ if self.nam in ['SMPS', 'APS', 'GRIMM']:
256
253
  raw_data = raw_data.sort_index(axis=1, key=lambda x: x.astype(float))
257
254
 
258
- raw_data = self._timeIndex_process(raw_data).apply(to_numeric, errors='coerce').copy(deep=True)
259
- qc_data = self._QC(raw_data).apply(to_numeric, errors='coerce').copy(deep=True)
255
+ raw_data = self._timeIndex_process(raw_data)
256
+
257
+ raw_data = raw_data.apply(pd.to_numeric, errors='coerce').copy(deep=True)
258
+ qc_data = self._QC(raw_data).apply(pd.to_numeric, errors='coerce').copy(deep=True)
260
259
 
261
260
  return raw_data, qc_data
262
261
 
@@ -265,7 +264,7 @@ class AbstractReader(ABC):
265
264
  if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset:
266
265
  self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}", color_part="PICKLE")
267
266
 
268
- _f_raw_done, _f_qc_done = read_pickle(self.pkl_nam_raw), read_pickle(self.pkl_nam)
267
+ _f_raw_done, _f_qc_done = pd.read_pickle(self.pkl_nam_raw), pd.read_pickle(self.pkl_nam)
269
268
 
270
269
  if self.append:
271
270
  self.logger.info_box(f"Appending New data from {user_start} to {user_end}", color_part="New data")
@@ -292,25 +291,26 @@ class AbstractReader(ABC):
292
291
  # save
293
292
  self._save_data(_f_raw, _f_qc)
294
293
 
295
- if self.rate:
296
- self._rate_calculate(_f_raw.apply(to_numeric, errors='coerce'), _f_qc.apply(to_numeric, errors='coerce'))
294
+ if self.qc:
295
+ self._rate_calculate(_f_raw.apply(pd.to_numeric, errors='coerce'),
296
+ _f_qc.apply(pd.to_numeric, errors='coerce'))
297
297
 
298
298
  return _f_qc if self.qc else _f_raw
299
299
 
300
300
  @staticmethod
301
- def reorder_dataframe_columns(df, order_lists, others_col=False):
301
+ def reorder_dataframe_columns(df, order_lists: list[list], keep_others: bool = False):
302
302
  new_order = []
303
303
 
304
304
  for order in order_lists:
305
- # 只添加存在於DataFrame中的欄位,且不重複添加
305
+ # Only add column that exist in the DataFrame and do not add them repeatedly
306
306
  new_order.extend([col for col in order if col in df.columns and col not in new_order])
307
307
 
308
- if others_col:
309
- # 添加所有不在新順序列表中的原始欄位,保持它們的原始順序
308
+ if keep_others:
309
+ # Add all original fields not in the new order list, keeping their original order
310
310
  new_order.extend([col for col in df.columns if col not in new_order])
311
311
 
312
312
  return df[new_order]
313
313
 
314
314
  @staticmethod
315
315
  def time_aware_IQR_QC(df: pd.DataFrame, time_window='1D', log_dist=False) -> pd.DataFrame:
316
- return DataQualityControl().time_aware_iqr(df, time_window=time_window, log_dist=log_dist)
316
+ return QualityControl().time_aware_iqr(df, time_window=time_window, log_dist=log_dist)
@@ -8,9 +8,10 @@ from pathlib import Path
8
8
 
9
9
 
10
10
  class ReaderLogger:
11
- def __init__(self, name: str, log_path: Path):
11
+ def __init__(self, name: str, log_path: Path, log_level: str = 'WARNING'):
12
12
  self.name = name
13
13
  self.log_path = log_path
14
+ self._log_level = getattr(logging, log_level)
14
15
 
15
16
  # 檢查是否支持顏色輸出
16
17
  self.color_support = self._check_color_support()
@@ -92,7 +93,7 @@ class ReaderLogger:
92
93
  def _setup_logger(self) -> logging.Logger:
93
94
  """設置logger"""
94
95
  logger = logging.getLogger(self.name)
95
- logger.setLevel(logging.INFO)
96
+ logger.setLevel(self._log_level)
96
97
 
97
98
  # 移除現有的 handlers
98
99
  for handler in logger.handlers[:]:
@@ -135,6 +136,9 @@ class ReaderLogger:
135
136
  text = text.encode('ascii', 'replace').decode('ascii')
136
137
  return text
137
138
 
139
+ def debug(self, msg: str):
140
+ self.logger.debug(self._safe_print(msg))
141
+
138
142
  def info(self, msg: str):
139
143
  self.logger.info(self._safe_print(msg))
140
144
 
@@ -2,7 +2,7 @@ import numpy as np
2
2
  import pandas as pd
3
3
 
4
4
 
5
- class DataQualityControl:
5
+ class QualityControl:
6
6
  """A class providing various methods for data quality control and outlier detection"""
7
7
 
8
8
  @staticmethod
@@ -8,7 +8,7 @@ class Reader(AbstractReader):
8
8
 
9
9
  def _raw_reader(self, file):
10
10
  if file.stat().st_size / 1024 < 550:
11
- self.logger.info(f'\t {file.name} may not be a whole daily data. Make sure the file is correct.')
11
+ self.logger.warning(f'\t {file.name} may not be a whole daily data. Make sure the file is correct.')
12
12
 
13
13
  _df = read_table(file, parse_dates={'time': [0, 1]}, index_col='time',
14
14
  delimiter=r'\s+', skiprows=5, usecols=range(67))