AeroViz 0.1.9.4__py3-none-any.whl → 0.1.9.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (96) hide show
  1. AeroViz/__init__.py +1 -2
  2. AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
  3. AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
  4. AeroViz/{plot/optical → dataProcess/Optical}/PyMieScatt_update.py +4 -11
  5. AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
  6. AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
  7. AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
  8. AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
  9. AeroViz/dataProcess/SizeDistr/prop.py +62 -0
  10. AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
  11. AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
  12. AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
  13. AeroViz/plot/__init__.py +0 -1
  14. AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
  15. AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
  16. AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
  17. AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
  18. AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
  19. AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
  20. AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
  21. AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
  22. AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
  23. AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
  24. AeroViz/plot/meteorology/{meteorology.py → CBPF.py} +150 -122
  25. AeroViz/plot/meteorology/__init__.py +3 -1
  26. AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
  27. AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
  28. AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
  29. AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
  30. AeroViz/plot/meteorology/wind_rose.py +77 -0
  31. AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
  32. AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
  33. AeroViz/plot/optical/optical.py +2 -3
  34. AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
  35. AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
  36. AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
  37. AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
  38. AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
  39. AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
  40. AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
  41. AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
  42. AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
  43. AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
  44. AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  45. AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
  46. AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
  47. AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
  48. AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
  49. AeroViz/plot/utils/plt_utils.py +1 -1
  50. AeroViz/rawDataReader/__init__.py +4 -2
  51. AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
  52. AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
  53. AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
  54. AeroViz/rawDataReader/core/__init__.py +71 -72
  55. AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
  56. AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
  57. AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
  58. AeroViz/rawDataReader/core/logger.py +78 -0
  59. AeroViz/rawDataReader/script/Aurora.py +1 -1
  60. AeroViz/rawDataReader/script/EPA.py +1 -1
  61. AeroViz/rawDataReader/script/Minion.py +4 -3
  62. AeroViz/rawDataReader/script/NEPH.py +1 -1
  63. AeroViz/rawDataReader/script/OCEC.py +1 -1
  64. AeroViz/rawDataReader/script/SMPS.py +9 -7
  65. AeroViz/rawDataReader/script/TEOM.py +1 -1
  66. AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
  67. AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
  68. AeroViz/rawDataReader/script/__pycache__/APS_3321.cpython-312.pyc +0 -0
  69. AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
  70. AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
  71. AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
  72. AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
  73. AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
  74. AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
  75. AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
  76. AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
  77. AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
  78. AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
  79. AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
  80. AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
  81. AeroViz/rawDataReader/script/__pycache__/XRF.cpython-312.pyc +0 -0
  82. AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
  83. AeroViz/tools/__init__.py +0 -1
  84. AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  85. AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
  86. AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
  87. {AeroViz-0.1.9.4.dist-info → AeroViz-0.1.9.6.dist-info}/METADATA +40 -78
  88. AeroViz-0.1.9.6.dist-info/RECORD +169 -0
  89. {AeroViz-0.1.9.4.dist-info → AeroViz-0.1.9.6.dist-info}/WHEEL +1 -1
  90. AeroViz/plot/hysplit/__init__.py +0 -1
  91. AeroViz/tools/datareader.py +0 -66
  92. AeroViz-0.1.9.4.dist-info/RECORD +0 -102
  93. /AeroViz/{plot/optical → dataProcess/Optical}/mie_theory.py +0 -0
  94. /AeroViz/plot/{hysplit → meteorology}/hysplit.py +0 -0
  95. {AeroViz-0.1.9.4.dist-info → AeroViz-0.1.9.6.dist-info}/LICENSE +0 -0
  96. {AeroViz-0.1.9.4.dist-info → AeroViz-0.1.9.6.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  from datetime import datetime
2
2
  from pathlib import Path
3
+ from typing import Any
3
4
 
4
5
  from pandas import Grouper, Timedelta
5
6
 
@@ -25,7 +26,7 @@ def RawDataReader(instrument_name: str,
25
26
  end: datetime = None,
26
27
  mean_freq: str = '1h',
27
28
  csv_out: bool = True,
28
- ):
29
+ **kwargs: Any):
29
30
  """
30
31
  Factory function to instantiate the appropriate reader module for a given instrument and
31
32
  return the processed data over the specified time range.
@@ -107,7 +108,8 @@ def RawDataReader(instrument_name: str,
107
108
  qc=qc,
108
109
  qc_freq=qc_freq,
109
110
  rate=rate,
110
- append_data=append_data
111
+ append_data=append_data,
112
+ **kwargs
111
113
  )
112
114
  return reader_module(
113
115
  start=start,
@@ -1,9 +1,9 @@
1
1
  import json
2
- import logging
3
2
  from abc import ABC, abstractmethod
3
+ from contextlib import contextmanager
4
4
  from datetime import datetime
5
5
  from pathlib import Path
6
- from typing import Optional
6
+ from typing import Optional, Generator
7
7
 
8
8
  import numpy as np
9
9
  import pandas as pd
@@ -12,6 +12,7 @@ from rich.console import Console
12
12
  from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, TaskProgressColumn
13
13
 
14
14
  from AeroViz.rawDataReader.config.supported_instruments import meta
15
+ from AeroViz.rawDataReader.core.logger import ReaderLogger
15
16
  from AeroViz.rawDataReader.core.qc import DataQualityControl
16
17
 
17
18
  __all__ = ['AbstractReader']
@@ -35,11 +36,12 @@ class AbstractReader(ABC):
35
36
  qc: bool = True,
36
37
  qc_freq: Optional[str] = None,
37
38
  rate: bool = True,
38
- append_data: bool = False):
39
+ append_data: bool = False,
40
+ **kwargs):
39
41
 
40
42
  self.path = Path(path)
41
43
  self.meta = meta[self.nam]
42
- self.logger = self._setup_logger()
44
+ self.logger = ReaderLogger(self.nam, self.path)
43
45
 
44
46
  self.reset = reset
45
47
  self.qc = qc
@@ -53,6 +55,8 @@ class AbstractReader(ABC):
53
55
  self.csv_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.csv'
54
56
  self.csv_out = self.path / f'output_{self.nam.lower()}.csv'
55
57
 
58
+ self.size_range = kwargs.get('size_range', (11.8, 593.5))
59
+
56
60
  def __call__(self,
57
61
  start: datetime,
58
62
  end: datetime,
@@ -78,20 +82,6 @@ class AbstractReader(ABC):
78
82
  def _QC(self, df: DataFrame) -> DataFrame:
79
83
  return df
80
84
 
81
- def _setup_logger(self) -> logging.Logger:
82
- logger = logging.getLogger(self.nam)
83
- logger.setLevel(logging.INFO)
84
-
85
- for handler in logger.handlers[:]:
86
- handler.close()
87
- logger.removeHandler(handler)
88
-
89
- handler = logging.FileHandler(self.path / f'{self.nam}.log')
90
- handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
91
- logger.addHandler(handler)
92
-
93
- return logger
94
-
95
85
  def _rate_calculate(self, raw_data, qc_data) -> None:
96
86
  def __base_rate(raw_data, qc_data):
97
87
  period_size = len(raw_data.resample('1h').mean().index)
@@ -104,28 +94,27 @@ class AbstractReader(ABC):
104
94
 
105
95
  # validate rate calculation
106
96
  if period_size == 0 or sample_size == 0 or qc_size == 0:
107
- print(f'\t\t\033[91m No data for this period... skipping\033[0m')
97
+ self.logger.warning(f'\t\t No data for this period... skip')
108
98
  continue
109
-
110
- if period_size < sample_size or sample_size < qc_size:
111
- print(
112
- f'\t\tInvalid size relationship: period={period_size}, sample={sample_size}, QC={qc_size}... skipping')
99
+ if period_size < sample_size:
100
+ self.logger.warning(f'\t\tError: Sample({sample_size}) > Period({period_size})... skip')
101
+ continue
102
+ if sample_size < qc_size:
103
+ self.logger.warning(f'\t\tError: QC({qc_size}) > Sample({sample_size})... skip')
113
104
  continue
114
105
 
115
106
  else:
116
- _acq_rate = round((sample_size / period_size) * 100, 1)
117
- _yid_rate = round((qc_size / sample_size) * 100, 1)
118
- _OEE_rate = round((qc_size / period_size) * 100, 1)
119
-
120
- self.logger.info(f'{_nam}:')
121
- self.logger.info(f"\tAcquisition rate: {_acq_rate}%")
122
- self.logger.info(f'\tYield rate: {_yid_rate}%')
123
- self.logger.info(f'\tOEE rate: {_OEE_rate}%')
124
- self.logger.info(f"{'=' * 60}")
125
-
126
- print(f'\n\t{_nam} : ')
127
- print(f'\t\tacquisition rate | yield rate -> OEE rate : '
128
- f'\033[91m{_acq_rate}% | {_yid_rate}% -> {_OEE_rate}%\033[0m')
107
+ _sample_rate = round((sample_size / period_size) * 100, 1)
108
+ _valid_rate = round((qc_size / sample_size) * 100, 1)
109
+ _total_rate = round((qc_size / period_size) * 100, 1)
110
+
111
+ self.logger.info(f"\t\t{self.logger.CYAN}▶ {_nam}{self.logger.RESET}")
112
+ self.logger.info(
113
+ f"\t\t\t├─ {'Sample Rate':15}: {self.logger.BLUE}{_sample_rate:>6.1f}%{self.logger.RESET}")
114
+ self.logger.info(
115
+ f"\t\t\t├─ {'Valid Rate':15}: {self.logger.BLUE}{_valid_rate:>6.1f}%{self.logger.RESET}")
116
+ self.logger.info(
117
+ f"\t\t\t└─ {'Total Rate':15}: {self.logger.BLUE}{_total_rate:>6.1f}%{self.logger.RESET}")
129
118
 
130
119
  if self.meta['deter_key'] is not None:
131
120
  # use qc_freq to calculate each period rate
@@ -135,9 +124,8 @@ class AbstractReader(ABC):
135
124
 
136
125
  for (month, _sub_raw_data), (_, _sub_qc_data) in zip(raw_data_grouped, qc_data_grouped):
137
126
  self.logger.info(
138
- f"\tProcessing: {_sub_raw_data.index[0].strftime('%F')} to {_sub_raw_data.index[-1].strftime('%F')}")
139
- print(
140
- f"\n\tProcessing: {_sub_raw_data.index[0].strftime('%F')} to {_sub_raw_data.index[-1].strftime('%F')}")
127
+ f"\t{self.logger.BLUE} Processing: {_sub_raw_data.index[0].strftime('%F')}"
128
+ f" to {_sub_raw_data.index[-1].strftime('%F')}{self.logger.RESET}")
141
129
 
142
130
  __base_rate(_sub_raw_data, _sub_qc_data)
143
131
 
@@ -201,6 +189,34 @@ class AbstractReader(ABC):
201
189
  except Exception as e:
202
190
  raise IOError(f"Error saving data. {e}")
203
191
 
192
+ @contextmanager
193
+ def progress_reading(self, files: list) -> Generator:
194
+ # Create message temporary storage and replace logger method
195
+ logs = {level: [] for level in ['info', 'warning', 'error']}
196
+ original = {level: getattr(self.logger, level) for level in logs}
197
+
198
+ for level, msgs in logs.items():
199
+ setattr(self.logger, level, msgs.append)
200
+
201
+ try:
202
+ with Progress(
203
+ TextColumn("[bold blue]{task.description}", style="bold blue"),
204
+ BarColumn(bar_width=25, complete_style="green", finished_style="bright_green"),
205
+ TaskProgressColumn(),
206
+ TimeRemainingColumn(),
207
+ TextColumn("{task.fields[filename]}", style="yellow"),
208
+ console=Console(force_terminal=True, color_system="auto", width=120),
209
+ expand=False
210
+ ) as progress:
211
+ task = progress.add_task(f"▶ Reading {self.nam} files", total=len(files), filename="")
212
+ yield progress, task
213
+ finally:
214
+ # Restore logger method and output message
215
+ for level, msgs in logs.items():
216
+ setattr(self.logger, level, original[level])
217
+ for msg in msgs:
218
+ original[level](msg)
219
+
204
220
  def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
205
221
  files = [f
206
222
  for file_pattern in self.meta['pattern']
@@ -212,37 +228,28 @@ class AbstractReader(ABC):
212
228
  raise FileNotFoundError(f"No files in '{self.path}' could be read. Please check the current path.")
213
229
 
214
230
  df_list = []
215
- with Progress(
216
- TextColumn("[bold blue]{task.description}", style="bold blue"),
217
- BarColumn(bar_width=18, complete_style="green", finished_style="bright_green"),
218
- TaskProgressColumn(),
219
- TimeRemainingColumn(),
220
- TextColumn("{task.fields[filename]}", style="yellow"),
221
- console=Console(force_terminal=True, color_system="auto"),
222
- expand=False
223
- ) as progress:
224
- task = progress.add_task(f"Reading {self.nam} files", total=len(files), filename="")
231
+
232
+ # Context manager for progress bar display
233
+ with self.progress_reading(files) as (progress, task):
225
234
  for file in files:
226
235
  progress.update(task, advance=1, filename=file.name)
227
236
  try:
228
- df = self._raw_reader(file)
229
-
230
- if df is not None and not df.empty:
237
+ if (df := self._raw_reader(file)) is not None and not df.empty:
231
238
  df_list.append(df)
232
239
  else:
233
- self.logger.warning(f"File {file.name} produced an empty DataFrame or None.")
234
-
235
- except pd.errors.ParserError as e:
236
- self.logger.error(f"Error tokenizing data: {e}")
240
+ self.logger.warning(f"\tFile {file.name} produced an empty DataFrame or None.")
237
241
 
238
242
  except Exception as e:
239
243
  self.logger.error(f"Error reading {file.name}: {e}")
240
244
 
241
245
  if not df_list:
242
- raise ValueError("All files were either empty or failed to read.")
246
+ raise ValueError(f"\033[41m\033[97mAll files were either empty or failed to read.\033[0m")
243
247
 
244
248
  raw_data = concat(df_list, axis=0).groupby(level=0).first()
245
249
 
250
+ if self.nam == 'SMPS':
251
+ raw_data = raw_data.sort_index(axis=1, key=lambda x: x.astype(float))
252
+
246
253
  raw_data = self._timeIndex_process(raw_data).apply(to_numeric, errors='coerce').copy(deep=True)
247
254
  qc_data = self._QC(raw_data).apply(to_numeric, errors='coerce').copy(deep=True)
248
255
 
@@ -251,29 +258,28 @@ class AbstractReader(ABC):
251
258
  def _run(self, user_start, user_end):
252
259
  # read pickle if pickle file exists and 'reset=False' or process raw data or append new data
253
260
  if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset:
254
- print(f"\n{datetime.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mPICKLE\033[0m "
255
- f"from {user_start} to {user_end}\n")
261
+ self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}", color_part="PICKLE")
256
262
 
257
263
  _f_raw_done, _f_qc_done = read_pickle(self.pkl_nam_raw), read_pickle(self.pkl_nam)
258
264
 
259
265
  if self.append:
260
- print(f"Appending new data from {user_start} to {user_end}")
266
+ self.logger.info_box(f"Appending New data from {user_start} to {user_end}", color_part="New data")
267
+
261
268
  _f_raw_new, _f_qc_new = self._read_raw_files()
262
269
  _f_raw = self._timeIndex_process(_f_raw_done, append_df=_f_raw_new)
263
270
  _f_qc = self._timeIndex_process(_f_qc_done, append_df=_f_qc_new)
271
+
264
272
  else:
265
273
  _f_raw, _f_qc = _f_raw_done, _f_qc_done
274
+
266
275
  return _f_qc if self.qc else _f_raw
267
276
 
268
277
  else:
269
- print(f"\n{datetime.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mRAW DATA\033[0m "
270
- f"from {user_start} to {user_end}\n")
278
+ self.logger.info_box(f"Reading {self.nam} RAW DATA from {user_start} to {user_end}", color_part="RAW DATA")
271
279
 
272
280
  _f_raw, _f_qc = self._read_raw_files()
273
281
 
274
282
  # process time index
275
- data_start, data_end = _f_raw.index.sort_values()[[0, -1]]
276
-
277
283
  _f_raw = self._timeIndex_process(_f_raw, user_start, user_end)
278
284
  _f_qc = self._timeIndex_process(_f_qc, user_start, user_end)
279
285
  _f_qc = self._outlier_process(_f_qc)
@@ -281,15 +287,8 @@ class AbstractReader(ABC):
281
287
  # save
282
288
  self._save_data(_f_raw, _f_qc)
283
289
 
284
- self.logger.info(f"{'=' * 60}")
285
- self.logger.info(f"Raw data time : {data_start} to {data_end}")
286
- self.logger.info(f"Output time : {user_start} to {user_end}")
287
- self.logger.info(f"{'-' * 60}")
288
-
289
290
  if self.rate:
290
- _f_raw = _f_raw.apply(to_numeric, errors='coerce')
291
- _f_qc = _f_qc.apply(to_numeric, errors='coerce')
292
- self._rate_calculate(_f_raw, _f_qc)
291
+ self._rate_calculate(_f_raw.apply(to_numeric, errors='coerce'), _f_qc.apply(to_numeric, errors='coerce'))
293
292
 
294
293
  return _f_qc if self.qc else _f_raw
295
294
 
@@ -0,0 +1,78 @@
1
+ import logging
2
+ import re
3
+ import sys
4
+ from pathlib import Path
5
+
6
+
7
+ class ReaderLogger:
8
+ def __init__(self, name: str, log_path: Path):
9
+ self.name = name
10
+ self.log_path = log_path
11
+
12
+ # ANSI color codes
13
+ self.CYAN = '\033[96m'
14
+ self.BLUE = '\033[94m'
15
+ self.GREEN = '\033[92m'
16
+ self.YELLOW = '\033[93m'
17
+ self.RED = '\033[91m'
18
+ self.RESET = '\033[0m'
19
+
20
+ self.logger = self._setup_logger()
21
+
22
+ def _setup_logger(self) -> logging.Logger:
23
+ logger = logging.getLogger(self.name)
24
+ logger.setLevel(logging.INFO)
25
+
26
+ # Remove existing handlers
27
+ for handler in logger.handlers[:]:
28
+ handler.close()
29
+ logger.removeHandler(handler)
30
+
31
+ # clean ANSI formatter (for log file)
32
+ class CleanFormatter(logging.Formatter):
33
+ def format(self, record):
34
+ formatted_msg = super().format(record)
35
+ return re.sub(r'\033\[[0-9;]*m', '', formatted_msg)
36
+
37
+ # Set up handlers
38
+ file_handler = logging.FileHandler(self.log_path / f'{self.name}.log')
39
+ file_handler.setFormatter(CleanFormatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
40
+
41
+ console_handler = logging.StreamHandler(sys.stdout)
42
+ console_handler.setFormatter(logging.Formatter('%(message)s'))
43
+
44
+ logger.addHandler(file_handler)
45
+ logger.addHandler(console_handler)
46
+
47
+ return logger
48
+
49
+ def info(self, msg: str):
50
+ self.logger.info(msg)
51
+
52
+ def warning(self, msg: str):
53
+ self.logger.warning(msg)
54
+
55
+ def error(self, msg: str):
56
+ self.logger.error(msg)
57
+
58
+ def info_box(self, text: str, color_part: str = None, width: int = 80):
59
+ """
60
+ Create a boxed message with optional colored text
61
+
62
+ Args:
63
+ text: Base text format (e.g., "Reading {} RAW DATA from {} to {}")
64
+ color_part: Part of text to be colored (e.g., "RAW DATA")
65
+ width: Box width
66
+ """
67
+ display_text = text.replace(color_part, " " * len(color_part)) if color_part else text
68
+
69
+ left_padding = " " * ((width - len(display_text)) // 2)
70
+ right_padding = " " * (width - len(display_text) - len(left_padding))
71
+
72
+ content = text.replace(color_part, f"{self.CYAN}{color_part}{self.RESET}") if color_part else text
73
+
74
+ __content__ = f"{left_padding}{content}{right_padding}"
75
+
76
+ self.info(f"╔{'═' * width}╗")
77
+ self.info(f"║{__content__}║")
78
+ self.info(f"╚{'═' * width}╝")
@@ -38,7 +38,7 @@ class Reader(AbstractReader):
38
38
  _df = _df.loc[(_df['B'] > _df['G']) & (_df['G'] > _df['R'])]
39
39
 
40
40
  # use IQR_QC
41
- _df = self.time_aware_IQR_QC(_df)
41
+ _df = self.time_aware_IQR_QC(_df, time_window='1h')
42
42
 
43
43
  # make sure all columns have values, otherwise set to nan
44
44
  return _df.dropna(how='any').reindex(_index)
@@ -18,7 +18,7 @@ class Reader(AbstractReader):
18
18
  on_bad_lines='skip')
19
19
 
20
20
  if len(df.groupby('測站')) > 1:
21
- raise ValueError(f'Multiple stations found in the file: {df['測站'].unique()}')
21
+ raise ValueError(f"Multiple stations found in the file: {df['測站'].unique()}")
22
22
  else:
23
23
  if '測站' in df.columns:
24
24
  df.drop(columns=['測站'], inplace=True)
@@ -149,7 +149,7 @@ class Reader(AbstractReader):
149
149
  columns_to_convert = [col for col in MDL.keys() if col in df.columns]
150
150
  df[columns_to_convert] = df[columns_to_convert].div(1000)
151
151
 
152
- self.logger.info(f"XRF QAQC summary: transform values below MDL to {MDL_replace}")
152
+ self.logger.info(f"\t{'XRF QAQC summary':21}: transform values below MDL to {MDL_replace}")
153
153
 
154
154
  return df
155
155
 
@@ -206,9 +206,10 @@ class Reader(AbstractReader):
206
206
  # 計算保留的数據的百分比
207
207
  retained_percentage = (valid_mask.sum() / len(df)) * 100
208
208
 
209
- self.logger.info(f"Ions balance summary: {retained_percentage.__round__(0)}% within tolerance ± {tolerance}")
209
+ self.logger.info(
210
+ f"\t{'Ions balance summary':21}: {retained_percentage.__round__(0)}% within tolerance ± {tolerance}")
210
211
 
211
212
  if retained_percentage < 70:
212
- self.logger.warning("Warning: The percentage of retained data is less than 70%")
213
+ self.logger.warning("\tWarning: The percentage of retained data is less than 70%")
213
214
 
214
215
  return df
@@ -74,7 +74,7 @@ class Reader(AbstractReader):
74
74
  # _df = _df.loc[(_df['B'] > _df['G']) & (_df['G'] > _df['R'])]
75
75
 
76
76
  # use IQR_QC
77
- _df = self.time_aware_IQR_QC(_df)
77
+ _df = self.time_aware_IQR_QC(_df, time_window='1h')
78
78
 
79
79
  # make sure all columns have values, otherwise set to nan
80
80
  return _df.dropna(how='any').reindex(_index)
@@ -87,6 +87,6 @@ class Reader(AbstractReader):
87
87
  _df.loc[_df[col] <= threshold, col] = np.nan
88
88
 
89
89
  # use IQR_QC
90
- _df = self.time_aware_IQR_QC(_df, time_window='1h')
90
+ _df = self.time_aware_IQR_QC(_df)
91
91
 
92
92
  return _df.dropna(subset=['Thermal_OC', 'Optical_OC']).reindex(_index)
@@ -1,7 +1,7 @@
1
1
  import csv
2
2
 
3
3
  import numpy as np
4
- from pandas import to_datetime, to_numeric, read_csv, isna
4
+ from pandas import to_datetime, to_numeric, read_csv
5
5
 
6
6
  from AeroViz.rawDataReader.core import AbstractReader
7
7
 
@@ -40,7 +40,7 @@ class Reader(AbstractReader):
40
40
 
41
41
  for date_format in date_formats:
42
42
  _time_index = parse_date(_df, date_format)
43
- if not isna(_time_index).all():
43
+ if not _time_index.isna().all():
44
44
  break
45
45
  else:
46
46
  raise ValueError("Unable to parse dates with given formats")
@@ -56,14 +56,17 @@ class Reader(AbstractReader):
56
56
  _df_smps.columns = _df_smps.columns.astype(float)
57
57
  _df_smps = _df_smps.loc[_df_smps.index.dropna().copy()]
58
58
 
59
- if _df_smps.columns[0] != 11.8:
60
- print(f'file_name: {file.name}')
61
- return None
59
+ if _df_smps.columns[0] != self.size_range[0] or _df_smps.columns[-1] != self.size_range[1]:
60
+ self.logger.info(f'\tSMPS file: {file.name} is not match the default size range {self.size_range}, '
61
+ f'it is ({_df_smps.columns[0]}, {_df_smps.columns[-1]})')
62
62
 
63
63
  return _df_smps.apply(to_numeric, errors='coerce')
64
64
 
65
65
  # QC data
66
66
  def _QC(self, _df):
67
+ size_range_mask = (_df.columns.astype(float) >= self.size_range[0]) & (
68
+ _df.columns.astype(float) <= self.size_range[1])
69
+ _df = _df.loc[:, size_range_mask]
67
70
 
68
71
  # mask out the data size lower than 7
69
72
  _df['total'] = _df.sum(axis=1, min_count=1) * (np.diff(np.log(_df.keys().to_numpy(float)))).mean()
@@ -74,8 +77,7 @@ class Reader(AbstractReader):
74
77
  _df = _df.mask(_df['total'] < 2000)
75
78
 
76
79
  # remove the bin over 400 nm which num. conc. larger than 4000
77
- _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]
78
-
80
+ _df_remv_ky = _df.keys()[:-1][_df.keys()[:-1] >= 400.]
79
81
  _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
80
82
 
81
83
  return _df[_df.keys()[:-1]]
@@ -40,7 +40,7 @@ class Reader(AbstractReader):
40
40
 
41
41
  # QC data in 1 hr
42
42
  # use time_aware_IQR_QC
43
- _df = self.time_aware_IQR_QC(_df, time_window='1h')
43
+ _df = self.time_aware_IQR_QC(_df, time_window='6h')
44
44
 
45
45
  # remove data where size < 50% in 1-hr
46
46
  points_per_hour = Timedelta('1h') / Timedelta(self.meta['freq'])
AeroViz/tools/__init__.py CHANGED
@@ -1,3 +1,2 @@
1
1
  from .database import DataBase
2
2
  from .dataclassifier import DataClassifier
3
- from .datareader import DataReader