AeroViz 0.1.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. AeroViz/__init__.py +13 -0
  2. AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
  3. AeroViz/data/DEFAULT_DATA.csv +1417 -0
  4. AeroViz/data/DEFAULT_PNSD_DATA.csv +1417 -0
  5. AeroViz/data/hysplit_example_data.txt +101 -0
  6. AeroViz/dataProcess/Chemistry/__init__.py +149 -0
  7. AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
  8. AeroViz/dataProcess/Chemistry/_calculate.py +557 -0
  9. AeroViz/dataProcess/Chemistry/_isoropia.py +150 -0
  10. AeroViz/dataProcess/Chemistry/_mass_volume.py +487 -0
  11. AeroViz/dataProcess/Chemistry/_ocec.py +172 -0
  12. AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
  13. AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
  14. AeroViz/dataProcess/Optical/PyMieScatt_update.py +577 -0
  15. AeroViz/dataProcess/Optical/_IMPROVE.py +452 -0
  16. AeroViz/dataProcess/Optical/__init__.py +281 -0
  17. AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
  18. AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
  19. AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
  20. AeroViz/dataProcess/Optical/_derived.py +518 -0
  21. AeroViz/dataProcess/Optical/_extinction.py +123 -0
  22. AeroViz/dataProcess/Optical/_mie_sd.py +912 -0
  23. AeroViz/dataProcess/Optical/_retrieve_RI.py +243 -0
  24. AeroViz/dataProcess/Optical/coefficient.py +72 -0
  25. AeroViz/dataProcess/Optical/fRH.pkl +0 -0
  26. AeroViz/dataProcess/Optical/mie_theory.py +260 -0
  27. AeroViz/dataProcess/README.md +271 -0
  28. AeroViz/dataProcess/SizeDistr/__init__.py +245 -0
  29. AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
  30. AeroViz/dataProcess/SizeDistr/__pycache__/_size_dist.cpython-312.pyc +0 -0
  31. AeroViz/dataProcess/SizeDistr/_size_dist.py +810 -0
  32. AeroViz/dataProcess/SizeDistr/merge/README.md +93 -0
  33. AeroViz/dataProcess/SizeDistr/merge/__init__.py +20 -0
  34. AeroViz/dataProcess/SizeDistr/merge/_merge_v0.py +251 -0
  35. AeroViz/dataProcess/SizeDistr/merge/_merge_v0_1.py +246 -0
  36. AeroViz/dataProcess/SizeDistr/merge/_merge_v1.py +255 -0
  37. AeroViz/dataProcess/SizeDistr/merge/_merge_v2.py +244 -0
  38. AeroViz/dataProcess/SizeDistr/merge/_merge_v3.py +518 -0
  39. AeroViz/dataProcess/SizeDistr/merge/_merge_v4.py +422 -0
  40. AeroViz/dataProcess/SizeDistr/prop.py +62 -0
  41. AeroViz/dataProcess/VOC/__init__.py +14 -0
  42. AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
  43. AeroViz/dataProcess/VOC/_potential_par.py +108 -0
  44. AeroViz/dataProcess/VOC/support_voc.json +446 -0
  45. AeroViz/dataProcess/__init__.py +66 -0
  46. AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
  47. AeroViz/dataProcess/core/__init__.py +272 -0
  48. AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
  49. AeroViz/mcp_server.py +352 -0
  50. AeroViz/plot/__init__.py +13 -0
  51. AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
  52. AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
  53. AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
  54. AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
  55. AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
  56. AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
  57. AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
  58. AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
  59. AeroViz/plot/bar.py +126 -0
  60. AeroViz/plot/box.py +69 -0
  61. AeroViz/plot/distribution/__init__.py +1 -0
  62. AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
  63. AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
  64. AeroViz/plot/distribution/distribution.py +576 -0
  65. AeroViz/plot/meteorology/CBPF.py +295 -0
  66. AeroViz/plot/meteorology/__init__.py +3 -0
  67. AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
  68. AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
  69. AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
  70. AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
  71. AeroViz/plot/meteorology/hysplit.py +93 -0
  72. AeroViz/plot/meteorology/wind_rose.py +77 -0
  73. AeroViz/plot/optical/__init__.py +1 -0
  74. AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
  75. AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
  76. AeroViz/plot/optical/optical.py +388 -0
  77. AeroViz/plot/pie.py +210 -0
  78. AeroViz/plot/radar.py +184 -0
  79. AeroViz/plot/regression.py +200 -0
  80. AeroViz/plot/scatter.py +174 -0
  81. AeroViz/plot/templates/__init__.py +6 -0
  82. AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
  83. AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
  84. AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
  85. AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
  86. AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
  87. AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
  88. AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
  89. AeroViz/plot/templates/ammonium_rich.py +34 -0
  90. AeroViz/plot/templates/contour.py +47 -0
  91. AeroViz/plot/templates/corr_matrix.py +267 -0
  92. AeroViz/plot/templates/diurnal_pattern.py +61 -0
  93. AeroViz/plot/templates/koschmieder.py +95 -0
  94. AeroViz/plot/templates/metal_heatmap.py +164 -0
  95. AeroViz/plot/timeseries/__init__.py +2 -0
  96. AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
  97. AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
  98. AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
  99. AeroViz/plot/timeseries/template.py +47 -0
  100. AeroViz/plot/timeseries/timeseries.py +446 -0
  101. AeroViz/plot/utils/__init__.py +4 -0
  102. AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  103. AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
  104. AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
  105. AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
  106. AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
  107. AeroViz/plot/utils/_color.py +71 -0
  108. AeroViz/plot/utils/_unit.py +55 -0
  109. AeroViz/plot/utils/fRH.json +390 -0
  110. AeroViz/plot/utils/plt_utils.py +92 -0
  111. AeroViz/plot/utils/sklearn_utils.py +49 -0
  112. AeroViz/plot/utils/units.json +89 -0
  113. AeroViz/plot/violin.py +80 -0
  114. AeroViz/rawDataReader/FLOW.md +138 -0
  115. AeroViz/rawDataReader/__init__.py +220 -0
  116. AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
  117. AeroViz/rawDataReader/config/__init__.py +0 -0
  118. AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
  119. AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
  120. AeroViz/rawDataReader/config/supported_instruments.py +135 -0
  121. AeroViz/rawDataReader/core/__init__.py +658 -0
  122. AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
  123. AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
  124. AeroViz/rawDataReader/core/__pycache__/pre_process.cpython-312.pyc +0 -0
  125. AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
  126. AeroViz/rawDataReader/core/__pycache__/report.cpython-312.pyc +0 -0
  127. AeroViz/rawDataReader/core/logger.py +171 -0
  128. AeroViz/rawDataReader/core/pre_process.py +308 -0
  129. AeroViz/rawDataReader/core/qc.py +961 -0
  130. AeroViz/rawDataReader/core/report.py +579 -0
  131. AeroViz/rawDataReader/script/AE33.py +173 -0
  132. AeroViz/rawDataReader/script/AE43.py +151 -0
  133. AeroViz/rawDataReader/script/APS.py +339 -0
  134. AeroViz/rawDataReader/script/Aurora.py +191 -0
  135. AeroViz/rawDataReader/script/BAM1020.py +90 -0
  136. AeroViz/rawDataReader/script/BC1054.py +161 -0
  137. AeroViz/rawDataReader/script/EPA.py +79 -0
  138. AeroViz/rawDataReader/script/GRIMM.py +68 -0
  139. AeroViz/rawDataReader/script/IGAC.py +140 -0
  140. AeroViz/rawDataReader/script/MA350.py +179 -0
  141. AeroViz/rawDataReader/script/Minion.py +218 -0
  142. AeroViz/rawDataReader/script/NEPH.py +199 -0
  143. AeroViz/rawDataReader/script/OCEC.py +173 -0
  144. AeroViz/rawDataReader/script/Q-ACSM.py +12 -0
  145. AeroViz/rawDataReader/script/SMPS.py +389 -0
  146. AeroViz/rawDataReader/script/TEOM.py +181 -0
  147. AeroViz/rawDataReader/script/VOC.py +106 -0
  148. AeroViz/rawDataReader/script/Xact.py +244 -0
  149. AeroViz/rawDataReader/script/__init__.py +28 -0
  150. AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
  151. AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
  152. AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
  153. AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
  154. AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
  155. AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
  156. AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
  157. AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
  158. AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
  159. AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
  160. AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
  161. AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
  162. AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
  163. AeroViz/rawDataReader/script/__pycache__/Q-ACSM.cpython-312.pyc +0 -0
  164. AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
  165. AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
  166. AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
  167. AeroViz/rawDataReader/script/__pycache__/Xact.cpython-312.pyc +0 -0
  168. AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
  169. AeroViz/tools/__init__.py +2 -0
  170. AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  171. AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
  172. AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
  173. AeroViz/tools/database.py +95 -0
  174. AeroViz/tools/dataclassifier.py +117 -0
  175. AeroViz/tools/dataprinter.py +58 -0
  176. aeroviz-0.1.21.dist-info/METADATA +294 -0
  177. aeroviz-0.1.21.dist-info/RECORD +180 -0
  178. aeroviz-0.1.21.dist-info/WHEEL +5 -0
  179. aeroviz-0.1.21.dist-info/licenses/LICENSE +21 -0
  180. aeroviz-0.1.21.dist-info/top_level.txt +1 -0
@@ -0,0 +1,579 @@
1
+ import os
2
+ from datetime import datetime, timedelta
3
+
4
+ import pandas as pd
5
+ from rich.console import Console
6
+ from rich.table import Table
7
+ from rich.text import Text
8
+ from rich.panel import Panel
9
+
10
+
11
+ def print_timeline_visual(timeline_data: list, start_date: str = None, end_date: str = None,
12
+ width: int = 80, show_details: bool = True) -> None:
13
+ """
14
+ Print a visual representation of the timeline to the console.
15
+
16
+ Parameters
17
+ ----------
18
+ timeline_data : list
19
+ List of timeline entries with 'start', 'end', 'status', 'reason', 'duration' keys.
20
+ start_date : str, optional
21
+ Overall start date (format: 'YYYY/MM/DD HH:MM').
22
+ end_date : str, optional
23
+ Overall end date (format: 'YYYY/MM/DD HH:MM').
24
+ width : int, default 80
25
+ Width of the timeline bar in characters.
26
+ show_details : bool, default True
27
+ Whether to show detailed downtime information.
28
+
29
+ Notes
30
+ -----
31
+ Creates a visual bar showing operational (green) and down (red) periods,
32
+ followed by a summary table of downtime events.
33
+ """
34
+ if not timeline_data:
35
+ return
36
+
37
+ console = Console(force_terminal=True)
38
+
39
+ # Parse dates
40
+ def parse_date(date_str):
41
+ try:
42
+ return datetime.strptime(date_str, '%Y/%m/%d %H:%M')
43
+ except ValueError:
44
+ return datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S')
45
+
46
+ # Get time range
47
+ if start_date and end_date:
48
+ total_start = parse_date(start_date)
49
+ total_end = parse_date(end_date)
50
+ else:
51
+ all_times = []
52
+ for entry in timeline_data:
53
+ all_times.append(parse_date(entry['start']))
54
+ all_times.append(parse_date(entry['end']))
55
+ total_start = min(all_times)
56
+ total_end = max(all_times)
57
+
58
+ total_duration = (total_end - total_start).total_seconds()
59
+ if total_duration <= 0:
60
+ return
61
+
62
+ # Build visual bar
63
+ bar_chars = [' '] * width
64
+
65
+ for entry in timeline_data:
66
+ entry_start = parse_date(entry['start'])
67
+ entry_end = parse_date(entry['end'])
68
+
69
+ # Calculate positions
70
+ start_pos = int(((entry_start - total_start).total_seconds() / total_duration) * width)
71
+ end_pos = int(((entry_end - total_start).total_seconds() / total_duration) * width)
72
+
73
+ start_pos = max(0, min(width - 1, start_pos))
74
+ end_pos = max(0, min(width, end_pos))
75
+
76
+ char = '█' if entry['status'] == 'operational' else '░'
77
+ for i in range(start_pos, end_pos):
78
+ bar_chars[i] = char
79
+
80
+ # Create colored bar
81
+ bar_text = Text()
82
+ current_char = None
83
+ current_count = 0
84
+
85
+ for char in bar_chars:
86
+ if char == current_char:
87
+ current_count += 1
88
+ else:
89
+ if current_char is not None:
90
+ color = 'green' if current_char == '█' else 'red'
91
+ bar_text.append(current_char * current_count, style=color)
92
+ current_char = char
93
+ current_count = 1
94
+
95
+ if current_char is not None:
96
+ color = 'green' if current_char == '█' else 'red'
97
+ bar_text.append(current_char * current_count, style=color)
98
+
99
+ # Calculate statistics
100
+ operational_hours = 0
101
+ down_hours = 0
102
+ down_events = []
103
+
104
+ for entry in timeline_data:
105
+ entry_start = parse_date(entry['start'])
106
+ entry_end = parse_date(entry['end'])
107
+ duration_hours = (entry_end - entry_start).total_seconds() / 3600
108
+
109
+ if entry['status'] == 'operational':
110
+ operational_hours += duration_hours
111
+ else:
112
+ down_hours += duration_hours
113
+ down_events.append(entry)
114
+
115
+ total_hours = operational_hours + down_hours
116
+ uptime_pct = (operational_hours / total_hours * 100) if total_hours > 0 else 0
117
+
118
+ # Print header
119
+ console.print()
120
+ console.print(f"[bold cyan]Data Timeline[/bold cyan]")
121
+ console.print(f"[dim]{total_start.strftime('%Y/%m/%d')} → {total_end.strftime('%Y/%m/%d')}[/dim]")
122
+ console.print()
123
+
124
+ # Print bar with date markers
125
+ console.print(f"[dim]{total_start.strftime('%m/%d')}[/dim]", end="")
126
+ padding = width - 10
127
+ console.print(" " * (padding // 2), end="")
128
+ console.print(f"[dim]{total_end.strftime('%m/%d')}[/dim]")
129
+
130
+ # Print the bar
131
+ console.print("[", end="")
132
+ console.print(bar_text, end="")
133
+ console.print("]")
134
+
135
+ # Print legend
136
+ console.print()
137
+ console.print(Text("█ Operational", style="green"), end=" ")
138
+ console.print(Text("░ Down", style="red"))
139
+
140
+ # Print summary
141
+ console.print()
142
+ console.print(f"[bold]Summary:[/bold]")
143
+ console.print(f" • Uptime: [green]{uptime_pct:.1f}%[/green] ({operational_hours:.1f} hours)")
144
+ console.print(f" • Downtime: [red]{100-uptime_pct:.1f}%[/red] ({down_hours:.1f} hours)")
145
+ console.print(f" • Down events: {len(down_events)}")
146
+
147
+ # Print detailed downtime table if requested
148
+ if show_details and down_events:
149
+ console.print()
150
+ table = Table(title="Downtime Events", show_header=True, header_style="bold magenta")
151
+ table.add_column("Start", style="dim")
152
+ table.add_column("End", style="dim")
153
+ table.add_column("Duration", justify="right")
154
+ table.add_column("Reason", style="yellow")
155
+
156
+ # Only show first 10 events to avoid clutter
157
+ display_events = down_events[:10]
158
+ for event in display_events:
159
+ table.add_row(
160
+ event['start'],
161
+ event['end'],
162
+ event.get('duration', 'N/A'),
163
+ event.get('reason', 'Unknown')
164
+ )
165
+
166
+ if len(down_events) > 10:
167
+ table.add_row("...", "...", "...", f"(+{len(down_events) - 10} more events)")
168
+
169
+ console.print(table)
170
+
171
+ console.print()
172
+
173
+
174
+ def process_timeline_report(report_dict: dict, df: pd.DataFrame, max_gap_hours: int = 2,
175
+ logger=None, show_visual: bool = True) -> dict:
176
+ """
177
+ Process instrument data and generate timeline data showing operational status.
178
+
179
+ Parameters
180
+ ----------
181
+ report_dict : dict
182
+ Report dictionary containing instrument information and configuration.
183
+ df : pandas.DataFrame
184
+ Data frame containing instrument measurements with datetime index or column.
185
+ max_gap_hours : int, default 2
186
+ Maximum allowed downtime (hours) before considering it as a significant data gap
187
+ rather than brief downtime.
188
+ logger : Logger, optional
189
+ Logger object to use for logging messages. If None, print statements are used.
190
+ show_visual : bool, default True
191
+ If True, prints a visual timeline to the console showing operational/down periods.
192
+
193
+ Returns
194
+ -------
195
+ dict
196
+ Updated report_dict with added 'timeline' key containing status changes.
197
+ Timeline data includes operational periods and downtime periods with reasons.
198
+
199
+ Notes
200
+ -----
201
+ The function detects data gaps based on whether any data exists in each row.
202
+ Known issues are loaded from a YAML configuration file and matched against
203
+ detected downtime periods to provide specific reason information.
204
+
205
+ When show_visual=True, a graphical timeline is displayed in the console showing:
206
+ - Green blocks for operational periods
207
+ - Red blocks for downtime periods
208
+ - Summary statistics (uptime percentage, downtime hours, event count)
209
+ - Detailed table of downtime events with reasons
210
+ """
211
+
212
+ # Helper function for logging
213
+ def log_message(level: str, message: str) -> None:
214
+ if logger:
215
+ if level == "info":
216
+ logger.info(message)
217
+ elif level == "warning":
218
+ logger.warning(message)
219
+ elif level == "error":
220
+ logger.error(message)
221
+ else:
222
+ print(message)
223
+
224
+ # 使用報告中的儀器ID
225
+ instrument_id = report_dict.get('instrument_id')
226
+
227
+ # 查找已知問題 - 使用環境變量或默認路徑
228
+ known_issues_file = os.environ.get(
229
+ 'KNOWN_ISSUES_PATH',
230
+ '/Users/chanchihyu/DataCenter/Config/known_issues.yml'
231
+ )
232
+ try:
233
+ import yaml
234
+ with open(known_issues_file, 'r', encoding='utf-8') as f:
235
+ known_issues = yaml.safe_load(f)
236
+ except ImportError:
237
+ known_issues = {}
238
+ except FileNotFoundError:
239
+ # Silently ignore missing known issues file - it's optional
240
+ known_issues = {}
241
+ except Exception as e:
242
+ log_message("error", f"Error loading known issues: {e}")
243
+ known_issues = {}
244
+
245
+ # 檢查數據是否為空
246
+ if df.empty:
247
+ return report_dict
248
+
249
+ # 處理時間列
250
+ df = df.copy()
251
+ time_col = None
252
+
253
+ # 如果是DatetimeIndex,將其重置為列
254
+ if isinstance(df.index, pd.DatetimeIndex):
255
+ df = df.reset_index()
256
+ time_col = df.columns[0]
257
+ else:
258
+ # 嘗試找到時間列
259
+ time_col_candidates = ['time', 'Time', 'timestamp', 'Timestamp',
260
+ 'datetime', 'DateTime', 'date', 'Date']
261
+ for col in time_col_candidates:
262
+ if col in df.columns:
263
+ time_col = col
264
+ break
265
+
266
+ if not time_col:
267
+ return report_dict
268
+
269
+ # 確保時間列是datetime格式
270
+ try:
271
+ df[time_col] = pd.to_datetime(df[time_col])
272
+ except Exception as e:
273
+ log_message("error", f"Time format conversion failed: {e}")
274
+ return report_dict
275
+
276
+ # 排序數據
277
+ df = df.sort_values(time_col)
278
+
279
+ # 判斷狀態:檢查每行是否有任何數據(排除時間列)
280
+ data_columns = [col for col in df.columns if col != time_col]
281
+ df['operational'] = df[data_columns].notna().any(axis=1)
282
+
283
+ # 處理狀態變化
284
+ status_changes = []
285
+ current_status = None
286
+ current_start = None
287
+ previous_time = None
288
+ current_down_start = None
289
+
290
+ # 內部函數:格式化時間間隔
291
+ def format_duration(duration):
292
+ """格式化時間間隔為人類可讀格式"""
293
+ hours = duration.total_seconds() / 3600
294
+ if hours < 1:
295
+ return f"{int(hours * 60)} minutes"
296
+ elif hours < 24:
297
+ return f"{int(hours)} hours"
298
+ else:
299
+ days = int(hours / 24)
300
+ remaining_hours = int(hours % 24)
301
+ return f"{days} days{' ' + str(remaining_hours) + ' hours' if remaining_hours > 0 else ''}"
302
+
303
+ # 內部函數:查找已知問題
304
+ def find_known_issue(start_time, end_time):
305
+ """查找指定時間段內的已知問題"""
306
+ if not known_issues or not instrument_id or instrument_id not in known_issues:
307
+ return None
308
+
309
+ instrument_issues = known_issues.get(instrument_id, [])
310
+ start_time = pd.to_datetime(start_time)
311
+ end_time = pd.to_datetime(end_time)
312
+
313
+ for issue in instrument_issues:
314
+ try:
315
+ issue_start = pd.to_datetime(issue['start'])
316
+ issue_end = pd.to_datetime(issue['end'])
317
+
318
+ # 檢查時間段是否重疊
319
+ if not (end_time <= issue_start or start_time >= issue_end):
320
+ return issue['reason']
321
+ except Exception as e:
322
+ log_message("error", f"Error processing issue: {e}")
323
+
324
+ return None
325
+
326
+ # 處理每一行數據
327
+ for i, row in df.iterrows():
328
+ time_str = row[time_col].strftime('%Y/%m/%d %H:%M')
329
+ status = 'operational' if row['operational'] else 'down'
330
+
331
+ # 第一條記錄
332
+ if current_status is None:
333
+ current_status = status
334
+ current_start = time_str
335
+ previous_time = row[time_col]
336
+ if status == 'down':
337
+ current_down_start = row[time_col]
338
+ continue
339
+
340
+ # 狀態變化
341
+ if status != current_status:
342
+ # 從運行變為停機
343
+ if status == 'down':
344
+ status_changes.append({
345
+ 'start': current_start,
346
+ 'end': time_str,
347
+ 'status': 'operational',
348
+ 'reason': None,
349
+ 'duration': None
350
+ })
351
+ current_start = time_str
352
+ current_down_start = row[time_col]
353
+ # 從停機變為運行
354
+ else:
355
+ down_duration = row[time_col] - current_down_start
356
+ known_reason = find_known_issue(current_start, time_str)
357
+
358
+ if known_reason:
359
+ reason = known_reason
360
+ elif down_duration <= pd.Timedelta(hours=max_gap_hours):
361
+ reason = "Brief Downtime"
362
+ else:
363
+ reason = "Data Gap"
364
+
365
+ status_changes.append({
366
+ 'start': current_start,
367
+ 'end': time_str,
368
+ 'status': 'down',
369
+ 'reason': reason,
370
+ 'duration': format_duration(down_duration)
371
+ })
372
+ current_start = time_str
373
+ current_down_start = None
374
+
375
+ current_status = status
376
+
377
+ previous_time = row[time_col]
378
+
379
+ # 添加最後一個時間段
380
+ if current_start is not None:
381
+ if current_status == 'down':
382
+ down_duration = previous_time - current_down_start
383
+ known_reason = find_known_issue(current_start, previous_time.strftime('%Y/%m/%d %H:%M'))
384
+
385
+ if known_reason:
386
+ reason = known_reason
387
+ elif down_duration <= pd.Timedelta(hours=max_gap_hours):
388
+ reason = "Brief Downtime"
389
+ else:
390
+ reason = "Data Gap"
391
+
392
+ status_changes.append({
393
+ 'start': current_start,
394
+ 'end': previous_time.strftime('%Y/%m/%d %H:%M'),
395
+ 'status': 'down',
396
+ 'reason': reason,
397
+ 'duration': format_duration(down_duration)
398
+ })
399
+ else:
400
+ status_changes.append({
401
+ 'start': current_start,
402
+ 'end': previous_time.strftime('%Y/%m/%d %H:%M'),
403
+ 'status': current_status,
404
+ 'reason': None,
405
+ 'duration': None
406
+ })
407
+
408
+ # 將結果添加到報告中
409
+ report_dict['timeline'] = status_changes
410
+
411
+ # Print visual timeline if requested
412
+ if show_visual and status_changes:
413
+ print_timeline_visual(
414
+ status_changes,
415
+ start_date=report_dict.get('startDate'),
416
+ end_date=report_dict.get('endDate'),
417
+ show_details=True
418
+ )
419
+
420
+ return report_dict
421
+
422
+
423
+ def process_rates_report(logger, report_dict: dict,
424
+ weekly_raw_groups, monthly_raw_groups,
425
+ weekly_flag_groups, monthly_flag_groups) -> dict:
426
+ """
427
+ Generate acquisition and yield reports based on grouped data.
428
+
429
+ Parameters
430
+ ----------
431
+ logger : Logger
432
+ Logger object for outputting messages
433
+ report_dict : dict
434
+ Report dictionary to update with rate information
435
+ weekly_raw_groups : pandas.core.groupby.GroupBy
436
+ Raw data grouped by week
437
+ monthly_raw_groups : pandas.core.groupby.GroupBy
438
+ Raw data grouped by month
439
+ weekly_flag_groups : pandas.core.groupby.GroupBy
440
+ QC flag data grouped by week
441
+ monthly_flag_groups : pandas.core.groupby.GroupBy
442
+ QC flag data grouped by month
443
+
444
+ Returns
445
+ -------
446
+ dict
447
+ Updated report dictionary with weekly and monthly rate information
448
+
449
+ Notes
450
+ -----
451
+ The report contains acquisition rates (percentage of data acquired vs expected),
452
+ yield rates (percentage of data passing QC vs acquired), and
453
+ total rates (overall percentage of valid data) for each time period.
454
+ """
455
+ report = report_dict.copy()
456
+
457
+ # 確保 report 中有必要的結構
458
+ if "rates" not in report:
459
+ report["rates"] = {
460
+ "weekly": {},
461
+ "monthly": {}
462
+ }
463
+
464
+ # 處理週數據 - 使用標準週時間範圍
465
+ for week_start, week_raw_data in weekly_raw_groups:
466
+ # 獲取對應的 QC Flag 數據
467
+ week_qc_flag = None
468
+ if week_start in weekly_flag_groups.groups:
469
+ week_qc_flag = weekly_flag_groups.get_group(week_start)
470
+
471
+ if not week_raw_data.empty and week_qc_flag is not None:
472
+ # 計算標準週結束時間(週日23:59:59)
473
+ week_end = week_start + pd.Timedelta(days=6, hours=23, minutes=59, seconds=59)
474
+
475
+ # 使用週的開始日期作為鍵
476
+ period_key = week_start.strftime('%Y-%m-%d')
477
+
478
+ report["rates"]["weekly"][period_key] = {
479
+ "start_time": week_start.strftime('%Y-%m-%d %H:%M:%S'),
480
+ "end_time": week_end.strftime('%Y-%m-%d %H:%M:%S'),
481
+ "rates": calculate_rates(logger, week_raw_data, week_qc_flag)
482
+ }
483
+
484
+ # 處理月數據 - 使用標準月時間範圍
485
+ for month_start, month_raw_data in monthly_raw_groups:
486
+ # 獲取對應的 QC Flag 數據
487
+ month_qc_flag = None
488
+ if month_start in monthly_flag_groups.groups:
489
+ month_qc_flag = monthly_flag_groups.get_group(month_start)
490
+
491
+ if not month_raw_data.empty and month_qc_flag is not None:
492
+ # 計算標準月結束時間(月末23:59:59)
493
+ next_month_start = (month_start + pd.Timedelta(days=32)).replace(day=1)
494
+ month_end = next_month_start - pd.Timedelta(seconds=1)
495
+
496
+ # 使用月份作為鍵
497
+ period_key = month_start.strftime('%Y-%m')
498
+
499
+ report["rates"]["monthly"][period_key] = {
500
+ "start_time": month_start.strftime('%Y-%m-%d %H:%M:%S'),
501
+ "end_time": month_end.strftime('%Y-%m-%d %H:%M:%S'),
502
+ "rates": calculate_rates(logger, month_raw_data, month_qc_flag)
503
+ }
504
+
505
+ return report
506
+
507
+
508
+ def calculate_rates(logger, raw_data: pd.DataFrame, qc_flag: pd.Series,
509
+ with_log: bool = False, resample_freq: str = '1h') -> dict:
510
+ """
511
+ Calculate data quality rates using QC_Flag.
512
+
513
+ Parameters
514
+ ----------
515
+ logger : Logger
516
+ Logger to use for message output
517
+ raw_data : pd.DataFrame
518
+ Raw data before quality control
519
+ qc_flag : pd.Series
520
+ QC flag series indicating validity of each row ("Valid" or error type)
521
+ with_log : bool, default=False
522
+ If True, outputs calculation logs
523
+ resample_freq : str, default='1h'
524
+ Frequency for resampling data when calculating rates
525
+
526
+ Returns
527
+ -------
528
+ dict
529
+ Dictionary containing:
530
+ acquisition_rate : float
531
+ Percentage of data acquired vs expected (期望時段內有資料的比例)
532
+ yield_rate : float
533
+ Percentage of data passing QC vs acquired (取得資料中通過QC的比例)
534
+ total_rate : float
535
+ Overall percentage of valid data (期望時段內有效資料的比例)
536
+
537
+ Notes
538
+ -----
539
+ - Acquisition Rate: periods with data / expected periods
540
+ - Yield Rate: periods passed QC / periods with data
541
+ - Total Rate: periods passed QC / expected periods
542
+ """
543
+ if raw_data.empty or qc_flag is None:
544
+ return {'acquisition_rate': 0, 'yield_rate': 0, 'total_rate': 0}
545
+
546
+ # 期望的時段數量(基於 resample 頻率)
547
+ period_size = len(raw_data.resample(resample_freq).mean().index)
548
+
549
+ # 有資料的時段數量(raw_data 中至少有一個非 NaN 值的時段)
550
+ sample_size = len(raw_data.resample(resample_freq).mean().dropna(how='all').index)
551
+
552
+ # 使用 QC_Flag 計算有效時段
553
+ valid_mask = qc_flag == 'Valid'
554
+ # 重採樣:計算每個時段內 Valid 的比例
555
+ valid_ratio_per_period = valid_mask.resample(resample_freq).mean()
556
+ # 確保只計算 raw_data 有資料的時段
557
+ has_data_mask = raw_data.resample(resample_freq).mean().notna().any(axis=1)
558
+ # 有效時段:該時段內有資料且超過 50% 通過 QC
559
+ qc_size = ((valid_ratio_per_period > 0.5) & has_data_mask).sum()
560
+
561
+ # 防止除以零
562
+ if period_size == 0:
563
+ return {'acquisition_rate': 0, 'yield_rate': 0, 'total_rate': 0}
564
+
565
+ # 計算比率
566
+ sample_rate = round((sample_size / period_size) * 100, 1)
567
+ valid_rate = round((qc_size / sample_size) * 100, 1) if sample_size > 0 else 0
568
+ total_rate = round((qc_size / period_size) * 100, 1)
569
+
570
+ if with_log:
571
+ logger.info(f" Acquisition Rate : {logger.BLUE}{sample_rate:>5.1f}%{logger.RESET} ({sample_size}/{period_size} periods with data)")
572
+ logger.info(f" Yield Rate : {logger.BLUE}{valid_rate:>5.1f}%{logger.RESET} ({qc_size}/{sample_size} periods passed QC)")
573
+ logger.info(f" Total Rate : {logger.BLUE}{total_rate:>5.1f}%{logger.RESET} ({qc_size}/{period_size} valid periods)")
574
+
575
+ return {
576
+ 'acquisition_rate': sample_rate,
577
+ 'yield_rate': valid_rate,
578
+ 'total_rate': total_rate
579
+ }