AeroViz 0.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AeroViz/__init__.py +13 -0
- AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/data/DEFAULT_DATA.csv +1417 -0
- AeroViz/data/DEFAULT_PNSD_DATA.csv +1417 -0
- AeroViz/data/hysplit_example_data.txt +101 -0
- AeroViz/dataProcess/Chemistry/__init__.py +149 -0
- AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Chemistry/_calculate.py +557 -0
- AeroViz/dataProcess/Chemistry/_isoropia.py +150 -0
- AeroViz/dataProcess/Chemistry/_mass_volume.py +487 -0
- AeroViz/dataProcess/Chemistry/_ocec.py +172 -0
- AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
- AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
- AeroViz/dataProcess/Optical/PyMieScatt_update.py +577 -0
- AeroViz/dataProcess/Optical/_IMPROVE.py +452 -0
- AeroViz/dataProcess/Optical/__init__.py +281 -0
- AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/_derived.py +518 -0
- AeroViz/dataProcess/Optical/_extinction.py +123 -0
- AeroViz/dataProcess/Optical/_mie_sd.py +912 -0
- AeroViz/dataProcess/Optical/_retrieve_RI.py +243 -0
- AeroViz/dataProcess/Optical/coefficient.py +72 -0
- AeroViz/dataProcess/Optical/fRH.pkl +0 -0
- AeroViz/dataProcess/Optical/mie_theory.py +260 -0
- AeroViz/dataProcess/README.md +271 -0
- AeroViz/dataProcess/SizeDistr/__init__.py +245 -0
- AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/SizeDistr/__pycache__/_size_dist.cpython-312.pyc +0 -0
- AeroViz/dataProcess/SizeDistr/_size_dist.py +810 -0
- AeroViz/dataProcess/SizeDistr/merge/README.md +93 -0
- AeroViz/dataProcess/SizeDistr/merge/__init__.py +20 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v0.py +251 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v0_1.py +246 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v1.py +255 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v2.py +244 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v3.py +518 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v4.py +422 -0
- AeroViz/dataProcess/SizeDistr/prop.py +62 -0
- AeroViz/dataProcess/VOC/__init__.py +14 -0
- AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/VOC/_potential_par.py +108 -0
- AeroViz/dataProcess/VOC/support_voc.json +446 -0
- AeroViz/dataProcess/__init__.py +66 -0
- AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/core/__init__.py +272 -0
- AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/mcp_server.py +352 -0
- AeroViz/plot/__init__.py +13 -0
- AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
- AeroViz/plot/bar.py +126 -0
- AeroViz/plot/box.py +69 -0
- AeroViz/plot/distribution/__init__.py +1 -0
- AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/distribution.py +576 -0
- AeroViz/plot/meteorology/CBPF.py +295 -0
- AeroViz/plot/meteorology/__init__.py +3 -0
- AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/hysplit.py +93 -0
- AeroViz/plot/meteorology/wind_rose.py +77 -0
- AeroViz/plot/optical/__init__.py +1 -0
- AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
- AeroViz/plot/optical/optical.py +388 -0
- AeroViz/plot/pie.py +210 -0
- AeroViz/plot/radar.py +184 -0
- AeroViz/plot/regression.py +200 -0
- AeroViz/plot/scatter.py +174 -0
- AeroViz/plot/templates/__init__.py +6 -0
- AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
- AeroViz/plot/templates/ammonium_rich.py +34 -0
- AeroViz/plot/templates/contour.py +47 -0
- AeroViz/plot/templates/corr_matrix.py +267 -0
- AeroViz/plot/templates/diurnal_pattern.py +61 -0
- AeroViz/plot/templates/koschmieder.py +95 -0
- AeroViz/plot/templates/metal_heatmap.py +164 -0
- AeroViz/plot/timeseries/__init__.py +2 -0
- AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/template.py +47 -0
- AeroViz/plot/timeseries/timeseries.py +446 -0
- AeroViz/plot/utils/__init__.py +4 -0
- AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/_color.py +71 -0
- AeroViz/plot/utils/_unit.py +55 -0
- AeroViz/plot/utils/fRH.json +390 -0
- AeroViz/plot/utils/plt_utils.py +92 -0
- AeroViz/plot/utils/sklearn_utils.py +49 -0
- AeroViz/plot/utils/units.json +89 -0
- AeroViz/plot/violin.py +80 -0
- AeroViz/rawDataReader/FLOW.md +138 -0
- AeroViz/rawDataReader/__init__.py +220 -0
- AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__init__.py +0 -0
- AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/supported_instruments.py +135 -0
- AeroViz/rawDataReader/core/__init__.py +658 -0
- AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/pre_process.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/report.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/logger.py +171 -0
- AeroViz/rawDataReader/core/pre_process.py +308 -0
- AeroViz/rawDataReader/core/qc.py +961 -0
- AeroViz/rawDataReader/core/report.py +579 -0
- AeroViz/rawDataReader/script/AE33.py +173 -0
- AeroViz/rawDataReader/script/AE43.py +151 -0
- AeroViz/rawDataReader/script/APS.py +339 -0
- AeroViz/rawDataReader/script/Aurora.py +191 -0
- AeroViz/rawDataReader/script/BAM1020.py +90 -0
- AeroViz/rawDataReader/script/BC1054.py +161 -0
- AeroViz/rawDataReader/script/EPA.py +79 -0
- AeroViz/rawDataReader/script/GRIMM.py +68 -0
- AeroViz/rawDataReader/script/IGAC.py +140 -0
- AeroViz/rawDataReader/script/MA350.py +179 -0
- AeroViz/rawDataReader/script/Minion.py +218 -0
- AeroViz/rawDataReader/script/NEPH.py +199 -0
- AeroViz/rawDataReader/script/OCEC.py +173 -0
- AeroViz/rawDataReader/script/Q-ACSM.py +12 -0
- AeroViz/rawDataReader/script/SMPS.py +389 -0
- AeroViz/rawDataReader/script/TEOM.py +181 -0
- AeroViz/rawDataReader/script/VOC.py +106 -0
- AeroViz/rawDataReader/script/Xact.py +244 -0
- AeroViz/rawDataReader/script/__init__.py +28 -0
- AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Q-ACSM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Xact.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__init__.py +2 -0
- AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
- AeroViz/tools/database.py +95 -0
- AeroViz/tools/dataclassifier.py +117 -0
- AeroViz/tools/dataprinter.py +58 -0
- aeroviz-0.1.21.dist-info/METADATA +294 -0
- aeroviz-0.1.21.dist-info/RECORD +180 -0
- aeroviz-0.1.21.dist-info/WHEEL +5 -0
- aeroviz-0.1.21.dist-info/licenses/LICENSE +21 -0
- aeroviz-0.1.21.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,579 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from datetime import datetime, timedelta
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from rich.console import Console
|
|
6
|
+
from rich.table import Table
|
|
7
|
+
from rich.text import Text
|
|
8
|
+
from rich.panel import Panel
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def print_timeline_visual(timeline_data: list, start_date: str = None, end_date: str = None,
|
|
12
|
+
width: int = 80, show_details: bool = True) -> None:
|
|
13
|
+
"""
|
|
14
|
+
Print a visual representation of the timeline to the console.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
timeline_data : list
|
|
19
|
+
List of timeline entries with 'start', 'end', 'status', 'reason', 'duration' keys.
|
|
20
|
+
start_date : str, optional
|
|
21
|
+
Overall start date (format: 'YYYY/MM/DD HH:MM').
|
|
22
|
+
end_date : str, optional
|
|
23
|
+
Overall end date (format: 'YYYY/MM/DD HH:MM').
|
|
24
|
+
width : int, default 80
|
|
25
|
+
Width of the timeline bar in characters.
|
|
26
|
+
show_details : bool, default True
|
|
27
|
+
Whether to show detailed downtime information.
|
|
28
|
+
|
|
29
|
+
Notes
|
|
30
|
+
-----
|
|
31
|
+
Creates a visual bar showing operational (green) and down (red) periods,
|
|
32
|
+
followed by a summary table of downtime events.
|
|
33
|
+
"""
|
|
34
|
+
if not timeline_data:
|
|
35
|
+
return
|
|
36
|
+
|
|
37
|
+
console = Console(force_terminal=True)
|
|
38
|
+
|
|
39
|
+
# Parse dates
|
|
40
|
+
def parse_date(date_str):
|
|
41
|
+
try:
|
|
42
|
+
return datetime.strptime(date_str, '%Y/%m/%d %H:%M')
|
|
43
|
+
except ValueError:
|
|
44
|
+
return datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S')
|
|
45
|
+
|
|
46
|
+
# Get time range
|
|
47
|
+
if start_date and end_date:
|
|
48
|
+
total_start = parse_date(start_date)
|
|
49
|
+
total_end = parse_date(end_date)
|
|
50
|
+
else:
|
|
51
|
+
all_times = []
|
|
52
|
+
for entry in timeline_data:
|
|
53
|
+
all_times.append(parse_date(entry['start']))
|
|
54
|
+
all_times.append(parse_date(entry['end']))
|
|
55
|
+
total_start = min(all_times)
|
|
56
|
+
total_end = max(all_times)
|
|
57
|
+
|
|
58
|
+
total_duration = (total_end - total_start).total_seconds()
|
|
59
|
+
if total_duration <= 0:
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
# Build visual bar
|
|
63
|
+
bar_chars = [' '] * width
|
|
64
|
+
|
|
65
|
+
for entry in timeline_data:
|
|
66
|
+
entry_start = parse_date(entry['start'])
|
|
67
|
+
entry_end = parse_date(entry['end'])
|
|
68
|
+
|
|
69
|
+
# Calculate positions
|
|
70
|
+
start_pos = int(((entry_start - total_start).total_seconds() / total_duration) * width)
|
|
71
|
+
end_pos = int(((entry_end - total_start).total_seconds() / total_duration) * width)
|
|
72
|
+
|
|
73
|
+
start_pos = max(0, min(width - 1, start_pos))
|
|
74
|
+
end_pos = max(0, min(width, end_pos))
|
|
75
|
+
|
|
76
|
+
char = '█' if entry['status'] == 'operational' else '░'
|
|
77
|
+
for i in range(start_pos, end_pos):
|
|
78
|
+
bar_chars[i] = char
|
|
79
|
+
|
|
80
|
+
# Create colored bar
|
|
81
|
+
bar_text = Text()
|
|
82
|
+
current_char = None
|
|
83
|
+
current_count = 0
|
|
84
|
+
|
|
85
|
+
for char in bar_chars:
|
|
86
|
+
if char == current_char:
|
|
87
|
+
current_count += 1
|
|
88
|
+
else:
|
|
89
|
+
if current_char is not None:
|
|
90
|
+
color = 'green' if current_char == '█' else 'red'
|
|
91
|
+
bar_text.append(current_char * current_count, style=color)
|
|
92
|
+
current_char = char
|
|
93
|
+
current_count = 1
|
|
94
|
+
|
|
95
|
+
if current_char is not None:
|
|
96
|
+
color = 'green' if current_char == '█' else 'red'
|
|
97
|
+
bar_text.append(current_char * current_count, style=color)
|
|
98
|
+
|
|
99
|
+
# Calculate statistics
|
|
100
|
+
operational_hours = 0
|
|
101
|
+
down_hours = 0
|
|
102
|
+
down_events = []
|
|
103
|
+
|
|
104
|
+
for entry in timeline_data:
|
|
105
|
+
entry_start = parse_date(entry['start'])
|
|
106
|
+
entry_end = parse_date(entry['end'])
|
|
107
|
+
duration_hours = (entry_end - entry_start).total_seconds() / 3600
|
|
108
|
+
|
|
109
|
+
if entry['status'] == 'operational':
|
|
110
|
+
operational_hours += duration_hours
|
|
111
|
+
else:
|
|
112
|
+
down_hours += duration_hours
|
|
113
|
+
down_events.append(entry)
|
|
114
|
+
|
|
115
|
+
total_hours = operational_hours + down_hours
|
|
116
|
+
uptime_pct = (operational_hours / total_hours * 100) if total_hours > 0 else 0
|
|
117
|
+
|
|
118
|
+
# Print header
|
|
119
|
+
console.print()
|
|
120
|
+
console.print(f"[bold cyan]Data Timeline[/bold cyan]")
|
|
121
|
+
console.print(f"[dim]{total_start.strftime('%Y/%m/%d')} → {total_end.strftime('%Y/%m/%d')}[/dim]")
|
|
122
|
+
console.print()
|
|
123
|
+
|
|
124
|
+
# Print bar with date markers
|
|
125
|
+
console.print(f"[dim]{total_start.strftime('%m/%d')}[/dim]", end="")
|
|
126
|
+
padding = width - 10
|
|
127
|
+
console.print(" " * (padding // 2), end="")
|
|
128
|
+
console.print(f"[dim]{total_end.strftime('%m/%d')}[/dim]")
|
|
129
|
+
|
|
130
|
+
# Print the bar
|
|
131
|
+
console.print("[", end="")
|
|
132
|
+
console.print(bar_text, end="")
|
|
133
|
+
console.print("]")
|
|
134
|
+
|
|
135
|
+
# Print legend
|
|
136
|
+
console.print()
|
|
137
|
+
console.print(Text("█ Operational", style="green"), end=" ")
|
|
138
|
+
console.print(Text("░ Down", style="red"))
|
|
139
|
+
|
|
140
|
+
# Print summary
|
|
141
|
+
console.print()
|
|
142
|
+
console.print(f"[bold]Summary:[/bold]")
|
|
143
|
+
console.print(f" • Uptime: [green]{uptime_pct:.1f}%[/green] ({operational_hours:.1f} hours)")
|
|
144
|
+
console.print(f" • Downtime: [red]{100-uptime_pct:.1f}%[/red] ({down_hours:.1f} hours)")
|
|
145
|
+
console.print(f" • Down events: {len(down_events)}")
|
|
146
|
+
|
|
147
|
+
# Print detailed downtime table if requested
|
|
148
|
+
if show_details and down_events:
|
|
149
|
+
console.print()
|
|
150
|
+
table = Table(title="Downtime Events", show_header=True, header_style="bold magenta")
|
|
151
|
+
table.add_column("Start", style="dim")
|
|
152
|
+
table.add_column("End", style="dim")
|
|
153
|
+
table.add_column("Duration", justify="right")
|
|
154
|
+
table.add_column("Reason", style="yellow")
|
|
155
|
+
|
|
156
|
+
# Only show first 10 events to avoid clutter
|
|
157
|
+
display_events = down_events[:10]
|
|
158
|
+
for event in display_events:
|
|
159
|
+
table.add_row(
|
|
160
|
+
event['start'],
|
|
161
|
+
event['end'],
|
|
162
|
+
event.get('duration', 'N/A'),
|
|
163
|
+
event.get('reason', 'Unknown')
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
if len(down_events) > 10:
|
|
167
|
+
table.add_row("...", "...", "...", f"(+{len(down_events) - 10} more events)")
|
|
168
|
+
|
|
169
|
+
console.print(table)
|
|
170
|
+
|
|
171
|
+
console.print()
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def process_timeline_report(report_dict: dict, df: pd.DataFrame, max_gap_hours: int = 2,
|
|
175
|
+
logger=None, show_visual: bool = True) -> dict:
|
|
176
|
+
"""
|
|
177
|
+
Process instrument data and generate timeline data showing operational status.
|
|
178
|
+
|
|
179
|
+
Parameters
|
|
180
|
+
----------
|
|
181
|
+
report_dict : dict
|
|
182
|
+
Report dictionary containing instrument information and configuration.
|
|
183
|
+
df : pandas.DataFrame
|
|
184
|
+
Data frame containing instrument measurements with datetime index or column.
|
|
185
|
+
max_gap_hours : int, default 2
|
|
186
|
+
Maximum allowed downtime (hours) before considering it as a significant data gap
|
|
187
|
+
rather than brief downtime.
|
|
188
|
+
logger : Logger, optional
|
|
189
|
+
Logger object to use for logging messages. If None, print statements are used.
|
|
190
|
+
show_visual : bool, default True
|
|
191
|
+
If True, prints a visual timeline to the console showing operational/down periods.
|
|
192
|
+
|
|
193
|
+
Returns
|
|
194
|
+
-------
|
|
195
|
+
dict
|
|
196
|
+
Updated report_dict with added 'timeline' key containing status changes.
|
|
197
|
+
Timeline data includes operational periods and downtime periods with reasons.
|
|
198
|
+
|
|
199
|
+
Notes
|
|
200
|
+
-----
|
|
201
|
+
The function detects data gaps based on whether any data exists in each row.
|
|
202
|
+
Known issues are loaded from a YAML configuration file and matched against
|
|
203
|
+
detected downtime periods to provide specific reason information.
|
|
204
|
+
|
|
205
|
+
When show_visual=True, a graphical timeline is displayed in the console showing:
|
|
206
|
+
- Green blocks for operational periods
|
|
207
|
+
- Red blocks for downtime periods
|
|
208
|
+
- Summary statistics (uptime percentage, downtime hours, event count)
|
|
209
|
+
- Detailed table of downtime events with reasons
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
# Helper function for logging
|
|
213
|
+
def log_message(level: str, message: str) -> None:
|
|
214
|
+
if logger:
|
|
215
|
+
if level == "info":
|
|
216
|
+
logger.info(message)
|
|
217
|
+
elif level == "warning":
|
|
218
|
+
logger.warning(message)
|
|
219
|
+
elif level == "error":
|
|
220
|
+
logger.error(message)
|
|
221
|
+
else:
|
|
222
|
+
print(message)
|
|
223
|
+
|
|
224
|
+
# 使用報告中的儀器ID
|
|
225
|
+
instrument_id = report_dict.get('instrument_id')
|
|
226
|
+
|
|
227
|
+
# 查找已知問題 - 使用環境變量或默認路徑
|
|
228
|
+
known_issues_file = os.environ.get(
|
|
229
|
+
'KNOWN_ISSUES_PATH',
|
|
230
|
+
'/Users/chanchihyu/DataCenter/Config/known_issues.yml'
|
|
231
|
+
)
|
|
232
|
+
try:
|
|
233
|
+
import yaml
|
|
234
|
+
with open(known_issues_file, 'r', encoding='utf-8') as f:
|
|
235
|
+
known_issues = yaml.safe_load(f)
|
|
236
|
+
except ImportError:
|
|
237
|
+
known_issues = {}
|
|
238
|
+
except FileNotFoundError:
|
|
239
|
+
# Silently ignore missing known issues file - it's optional
|
|
240
|
+
known_issues = {}
|
|
241
|
+
except Exception as e:
|
|
242
|
+
log_message("error", f"Error loading known issues: {e}")
|
|
243
|
+
known_issues = {}
|
|
244
|
+
|
|
245
|
+
# 檢查數據是否為空
|
|
246
|
+
if df.empty:
|
|
247
|
+
return report_dict
|
|
248
|
+
|
|
249
|
+
# 處理時間列
|
|
250
|
+
df = df.copy()
|
|
251
|
+
time_col = None
|
|
252
|
+
|
|
253
|
+
# 如果是DatetimeIndex,將其重置為列
|
|
254
|
+
if isinstance(df.index, pd.DatetimeIndex):
|
|
255
|
+
df = df.reset_index()
|
|
256
|
+
time_col = df.columns[0]
|
|
257
|
+
else:
|
|
258
|
+
# 嘗試找到時間列
|
|
259
|
+
time_col_candidates = ['time', 'Time', 'timestamp', 'Timestamp',
|
|
260
|
+
'datetime', 'DateTime', 'date', 'Date']
|
|
261
|
+
for col in time_col_candidates:
|
|
262
|
+
if col in df.columns:
|
|
263
|
+
time_col = col
|
|
264
|
+
break
|
|
265
|
+
|
|
266
|
+
if not time_col:
|
|
267
|
+
return report_dict
|
|
268
|
+
|
|
269
|
+
# 確保時間列是datetime格式
|
|
270
|
+
try:
|
|
271
|
+
df[time_col] = pd.to_datetime(df[time_col])
|
|
272
|
+
except Exception as e:
|
|
273
|
+
log_message("error", f"Time format conversion failed: {e}")
|
|
274
|
+
return report_dict
|
|
275
|
+
|
|
276
|
+
# 排序數據
|
|
277
|
+
df = df.sort_values(time_col)
|
|
278
|
+
|
|
279
|
+
# 判斷狀態:檢查每行是否有任何數據(排除時間列)
|
|
280
|
+
data_columns = [col for col in df.columns if col != time_col]
|
|
281
|
+
df['operational'] = df[data_columns].notna().any(axis=1)
|
|
282
|
+
|
|
283
|
+
# 處理狀態變化
|
|
284
|
+
status_changes = []
|
|
285
|
+
current_status = None
|
|
286
|
+
current_start = None
|
|
287
|
+
previous_time = None
|
|
288
|
+
current_down_start = None
|
|
289
|
+
|
|
290
|
+
# 內部函數:格式化時間間隔
|
|
291
|
+
def format_duration(duration):
|
|
292
|
+
"""格式化時間間隔為人類可讀格式"""
|
|
293
|
+
hours = duration.total_seconds() / 3600
|
|
294
|
+
if hours < 1:
|
|
295
|
+
return f"{int(hours * 60)} minutes"
|
|
296
|
+
elif hours < 24:
|
|
297
|
+
return f"{int(hours)} hours"
|
|
298
|
+
else:
|
|
299
|
+
days = int(hours / 24)
|
|
300
|
+
remaining_hours = int(hours % 24)
|
|
301
|
+
return f"{days} days{' ' + str(remaining_hours) + ' hours' if remaining_hours > 0 else ''}"
|
|
302
|
+
|
|
303
|
+
# 內部函數:查找已知問題
|
|
304
|
+
def find_known_issue(start_time, end_time):
|
|
305
|
+
"""查找指定時間段內的已知問題"""
|
|
306
|
+
if not known_issues or not instrument_id or instrument_id not in known_issues:
|
|
307
|
+
return None
|
|
308
|
+
|
|
309
|
+
instrument_issues = known_issues.get(instrument_id, [])
|
|
310
|
+
start_time = pd.to_datetime(start_time)
|
|
311
|
+
end_time = pd.to_datetime(end_time)
|
|
312
|
+
|
|
313
|
+
for issue in instrument_issues:
|
|
314
|
+
try:
|
|
315
|
+
issue_start = pd.to_datetime(issue['start'])
|
|
316
|
+
issue_end = pd.to_datetime(issue['end'])
|
|
317
|
+
|
|
318
|
+
# 檢查時間段是否重疊
|
|
319
|
+
if not (end_time <= issue_start or start_time >= issue_end):
|
|
320
|
+
return issue['reason']
|
|
321
|
+
except Exception as e:
|
|
322
|
+
log_message("error", f"Error processing issue: {e}")
|
|
323
|
+
|
|
324
|
+
return None
|
|
325
|
+
|
|
326
|
+
# 處理每一行數據
|
|
327
|
+
for i, row in df.iterrows():
|
|
328
|
+
time_str = row[time_col].strftime('%Y/%m/%d %H:%M')
|
|
329
|
+
status = 'operational' if row['operational'] else 'down'
|
|
330
|
+
|
|
331
|
+
# 第一條記錄
|
|
332
|
+
if current_status is None:
|
|
333
|
+
current_status = status
|
|
334
|
+
current_start = time_str
|
|
335
|
+
previous_time = row[time_col]
|
|
336
|
+
if status == 'down':
|
|
337
|
+
current_down_start = row[time_col]
|
|
338
|
+
continue
|
|
339
|
+
|
|
340
|
+
# 狀態變化
|
|
341
|
+
if status != current_status:
|
|
342
|
+
# 從運行變為停機
|
|
343
|
+
if status == 'down':
|
|
344
|
+
status_changes.append({
|
|
345
|
+
'start': current_start,
|
|
346
|
+
'end': time_str,
|
|
347
|
+
'status': 'operational',
|
|
348
|
+
'reason': None,
|
|
349
|
+
'duration': None
|
|
350
|
+
})
|
|
351
|
+
current_start = time_str
|
|
352
|
+
current_down_start = row[time_col]
|
|
353
|
+
# 從停機變為運行
|
|
354
|
+
else:
|
|
355
|
+
down_duration = row[time_col] - current_down_start
|
|
356
|
+
known_reason = find_known_issue(current_start, time_str)
|
|
357
|
+
|
|
358
|
+
if known_reason:
|
|
359
|
+
reason = known_reason
|
|
360
|
+
elif down_duration <= pd.Timedelta(hours=max_gap_hours):
|
|
361
|
+
reason = "Brief Downtime"
|
|
362
|
+
else:
|
|
363
|
+
reason = "Data Gap"
|
|
364
|
+
|
|
365
|
+
status_changes.append({
|
|
366
|
+
'start': current_start,
|
|
367
|
+
'end': time_str,
|
|
368
|
+
'status': 'down',
|
|
369
|
+
'reason': reason,
|
|
370
|
+
'duration': format_duration(down_duration)
|
|
371
|
+
})
|
|
372
|
+
current_start = time_str
|
|
373
|
+
current_down_start = None
|
|
374
|
+
|
|
375
|
+
current_status = status
|
|
376
|
+
|
|
377
|
+
previous_time = row[time_col]
|
|
378
|
+
|
|
379
|
+
# 添加最後一個時間段
|
|
380
|
+
if current_start is not None:
|
|
381
|
+
if current_status == 'down':
|
|
382
|
+
down_duration = previous_time - current_down_start
|
|
383
|
+
known_reason = find_known_issue(current_start, previous_time.strftime('%Y/%m/%d %H:%M'))
|
|
384
|
+
|
|
385
|
+
if known_reason:
|
|
386
|
+
reason = known_reason
|
|
387
|
+
elif down_duration <= pd.Timedelta(hours=max_gap_hours):
|
|
388
|
+
reason = "Brief Downtime"
|
|
389
|
+
else:
|
|
390
|
+
reason = "Data Gap"
|
|
391
|
+
|
|
392
|
+
status_changes.append({
|
|
393
|
+
'start': current_start,
|
|
394
|
+
'end': previous_time.strftime('%Y/%m/%d %H:%M'),
|
|
395
|
+
'status': 'down',
|
|
396
|
+
'reason': reason,
|
|
397
|
+
'duration': format_duration(down_duration)
|
|
398
|
+
})
|
|
399
|
+
else:
|
|
400
|
+
status_changes.append({
|
|
401
|
+
'start': current_start,
|
|
402
|
+
'end': previous_time.strftime('%Y/%m/%d %H:%M'),
|
|
403
|
+
'status': current_status,
|
|
404
|
+
'reason': None,
|
|
405
|
+
'duration': None
|
|
406
|
+
})
|
|
407
|
+
|
|
408
|
+
# 將結果添加到報告中
|
|
409
|
+
report_dict['timeline'] = status_changes
|
|
410
|
+
|
|
411
|
+
# Print visual timeline if requested
|
|
412
|
+
if show_visual and status_changes:
|
|
413
|
+
print_timeline_visual(
|
|
414
|
+
status_changes,
|
|
415
|
+
start_date=report_dict.get('startDate'),
|
|
416
|
+
end_date=report_dict.get('endDate'),
|
|
417
|
+
show_details=True
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
return report_dict
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def process_rates_report(logger, report_dict: dict,
|
|
424
|
+
weekly_raw_groups, monthly_raw_groups,
|
|
425
|
+
weekly_flag_groups, monthly_flag_groups) -> dict:
|
|
426
|
+
"""
|
|
427
|
+
Generate acquisition and yield reports based on grouped data.
|
|
428
|
+
|
|
429
|
+
Parameters
|
|
430
|
+
----------
|
|
431
|
+
logger : Logger
|
|
432
|
+
Logger object for outputting messages
|
|
433
|
+
report_dict : dict
|
|
434
|
+
Report dictionary to update with rate information
|
|
435
|
+
weekly_raw_groups : pandas.core.groupby.GroupBy
|
|
436
|
+
Raw data grouped by week
|
|
437
|
+
monthly_raw_groups : pandas.core.groupby.GroupBy
|
|
438
|
+
Raw data grouped by month
|
|
439
|
+
weekly_flag_groups : pandas.core.groupby.GroupBy
|
|
440
|
+
QC flag data grouped by week
|
|
441
|
+
monthly_flag_groups : pandas.core.groupby.GroupBy
|
|
442
|
+
QC flag data grouped by month
|
|
443
|
+
|
|
444
|
+
Returns
|
|
445
|
+
-------
|
|
446
|
+
dict
|
|
447
|
+
Updated report dictionary with weekly and monthly rate information
|
|
448
|
+
|
|
449
|
+
Notes
|
|
450
|
+
-----
|
|
451
|
+
The report contains acquisition rates (percentage of data acquired vs expected),
|
|
452
|
+
yield rates (percentage of data passing QC vs acquired), and
|
|
453
|
+
total rates (overall percentage of valid data) for each time period.
|
|
454
|
+
"""
|
|
455
|
+
report = report_dict.copy()
|
|
456
|
+
|
|
457
|
+
# 確保 report 中有必要的結構
|
|
458
|
+
if "rates" not in report:
|
|
459
|
+
report["rates"] = {
|
|
460
|
+
"weekly": {},
|
|
461
|
+
"monthly": {}
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
# 處理週數據 - 使用標準週時間範圍
|
|
465
|
+
for week_start, week_raw_data in weekly_raw_groups:
|
|
466
|
+
# 獲取對應的 QC Flag 數據
|
|
467
|
+
week_qc_flag = None
|
|
468
|
+
if week_start in weekly_flag_groups.groups:
|
|
469
|
+
week_qc_flag = weekly_flag_groups.get_group(week_start)
|
|
470
|
+
|
|
471
|
+
if not week_raw_data.empty and week_qc_flag is not None:
|
|
472
|
+
# 計算標準週結束時間(週日23:59:59)
|
|
473
|
+
week_end = week_start + pd.Timedelta(days=6, hours=23, minutes=59, seconds=59)
|
|
474
|
+
|
|
475
|
+
# 使用週的開始日期作為鍵
|
|
476
|
+
period_key = week_start.strftime('%Y-%m-%d')
|
|
477
|
+
|
|
478
|
+
report["rates"]["weekly"][period_key] = {
|
|
479
|
+
"start_time": week_start.strftime('%Y-%m-%d %H:%M:%S'),
|
|
480
|
+
"end_time": week_end.strftime('%Y-%m-%d %H:%M:%S'),
|
|
481
|
+
"rates": calculate_rates(logger, week_raw_data, week_qc_flag)
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
# 處理月數據 - 使用標準月時間範圍
|
|
485
|
+
for month_start, month_raw_data in monthly_raw_groups:
|
|
486
|
+
# 獲取對應的 QC Flag 數據
|
|
487
|
+
month_qc_flag = None
|
|
488
|
+
if month_start in monthly_flag_groups.groups:
|
|
489
|
+
month_qc_flag = monthly_flag_groups.get_group(month_start)
|
|
490
|
+
|
|
491
|
+
if not month_raw_data.empty and month_qc_flag is not None:
|
|
492
|
+
# 計算標準月結束時間(月末23:59:59)
|
|
493
|
+
next_month_start = (month_start + pd.Timedelta(days=32)).replace(day=1)
|
|
494
|
+
month_end = next_month_start - pd.Timedelta(seconds=1)
|
|
495
|
+
|
|
496
|
+
# 使用月份作為鍵
|
|
497
|
+
period_key = month_start.strftime('%Y-%m')
|
|
498
|
+
|
|
499
|
+
report["rates"]["monthly"][period_key] = {
|
|
500
|
+
"start_time": month_start.strftime('%Y-%m-%d %H:%M:%S'),
|
|
501
|
+
"end_time": month_end.strftime('%Y-%m-%d %H:%M:%S'),
|
|
502
|
+
"rates": calculate_rates(logger, month_raw_data, month_qc_flag)
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
return report
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
def calculate_rates(logger, raw_data: pd.DataFrame, qc_flag: pd.Series,
|
|
509
|
+
with_log: bool = False, resample_freq: str = '1h') -> dict:
|
|
510
|
+
"""
|
|
511
|
+
Calculate data quality rates using QC_Flag.
|
|
512
|
+
|
|
513
|
+
Parameters
|
|
514
|
+
----------
|
|
515
|
+
logger : Logger
|
|
516
|
+
Logger to use for message output
|
|
517
|
+
raw_data : pd.DataFrame
|
|
518
|
+
Raw data before quality control
|
|
519
|
+
qc_flag : pd.Series
|
|
520
|
+
QC flag series indicating validity of each row ("Valid" or error type)
|
|
521
|
+
with_log : bool, default=False
|
|
522
|
+
If True, outputs calculation logs
|
|
523
|
+
resample_freq : str, default='1h'
|
|
524
|
+
Frequency for resampling data when calculating rates
|
|
525
|
+
|
|
526
|
+
Returns
|
|
527
|
+
-------
|
|
528
|
+
dict
|
|
529
|
+
Dictionary containing:
|
|
530
|
+
acquisition_rate : float
|
|
531
|
+
Percentage of data acquired vs expected (期望時段內有資料的比例)
|
|
532
|
+
yield_rate : float
|
|
533
|
+
Percentage of data passing QC vs acquired (取得資料中通過QC的比例)
|
|
534
|
+
total_rate : float
|
|
535
|
+
Overall percentage of valid data (期望時段內有效資料的比例)
|
|
536
|
+
|
|
537
|
+
Notes
|
|
538
|
+
-----
|
|
539
|
+
- Acquisition Rate: periods with data / expected periods
|
|
540
|
+
- Yield Rate: periods passed QC / periods with data
|
|
541
|
+
- Total Rate: periods passed QC / expected periods
|
|
542
|
+
"""
|
|
543
|
+
if raw_data.empty or qc_flag is None:
|
|
544
|
+
return {'acquisition_rate': 0, 'yield_rate': 0, 'total_rate': 0}
|
|
545
|
+
|
|
546
|
+
# 期望的時段數量(基於 resample 頻率)
|
|
547
|
+
period_size = len(raw_data.resample(resample_freq).mean().index)
|
|
548
|
+
|
|
549
|
+
# 有資料的時段數量(raw_data 中至少有一個非 NaN 值的時段)
|
|
550
|
+
sample_size = len(raw_data.resample(resample_freq).mean().dropna(how='all').index)
|
|
551
|
+
|
|
552
|
+
# 使用 QC_Flag 計算有效時段
|
|
553
|
+
valid_mask = qc_flag == 'Valid'
|
|
554
|
+
# 重採樣:計算每個時段內 Valid 的比例
|
|
555
|
+
valid_ratio_per_period = valid_mask.resample(resample_freq).mean()
|
|
556
|
+
# 確保只計算 raw_data 有資料的時段
|
|
557
|
+
has_data_mask = raw_data.resample(resample_freq).mean().notna().any(axis=1)
|
|
558
|
+
# 有效時段:該時段內有資料且超過 50% 通過 QC
|
|
559
|
+
qc_size = ((valid_ratio_per_period > 0.5) & has_data_mask).sum()
|
|
560
|
+
|
|
561
|
+
# 防止除以零
|
|
562
|
+
if period_size == 0:
|
|
563
|
+
return {'acquisition_rate': 0, 'yield_rate': 0, 'total_rate': 0}
|
|
564
|
+
|
|
565
|
+
# 計算比率
|
|
566
|
+
sample_rate = round((sample_size / period_size) * 100, 1)
|
|
567
|
+
valid_rate = round((qc_size / sample_size) * 100, 1) if sample_size > 0 else 0
|
|
568
|
+
total_rate = round((qc_size / period_size) * 100, 1)
|
|
569
|
+
|
|
570
|
+
if with_log:
|
|
571
|
+
logger.info(f" Acquisition Rate : {logger.BLUE}{sample_rate:>5.1f}%{logger.RESET} ({sample_size}/{period_size} periods with data)")
|
|
572
|
+
logger.info(f" Yield Rate : {logger.BLUE}{valid_rate:>5.1f}%{logger.RESET} ({qc_size}/{sample_size} periods passed QC)")
|
|
573
|
+
logger.info(f" Total Rate : {logger.BLUE}{total_rate:>5.1f}%{logger.RESET} ({qc_size}/{period_size} valid periods)")
|
|
574
|
+
|
|
575
|
+
return {
|
|
576
|
+
'acquisition_rate': sample_rate,
|
|
577
|
+
'yield_rate': valid_rate,
|
|
578
|
+
'total_rate': total_rate
|
|
579
|
+
}
|