fraclab-sdk 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- CHANGELOG.md +50 -0
- README.md +73 -7
- fraclab_sdk/__init__.py +3 -0
- fraclab_sdk/devkit/__init__.py +8 -0
- fraclab_sdk/devkit/validate.py +836 -75
- fraclab_sdk/specs/__init__.py +22 -0
- fraclab_sdk/specs/output.py +33 -0
- fraclab_sdk/version.py +5 -0
- fraclab_sdk/workbench/Home.py +162 -0
- fraclab_sdk/workbench/__init__.py +4 -0
- fraclab_sdk/workbench/__main__.py +48 -0
- fraclab_sdk/workbench/pages/1_Snapshots.py +577 -0
- fraclab_sdk/workbench/pages/2_Browse.py +513 -0
- fraclab_sdk/workbench/pages/3_Selection.py +464 -0
- fraclab_sdk/workbench/pages/4_Run.py +331 -0
- fraclab_sdk/workbench/pages/5_Results.py +298 -0
- fraclab_sdk/workbench/pages/6_Algorithm_Edit.py +116 -0
- fraclab_sdk/workbench/pages/7_Schema_Edit.py +160 -0
- fraclab_sdk/workbench/pages/8_Output_Edit.py +155 -0
- fraclab_sdk/workbench/pages/9_Export_Algorithm.py +386 -0
- fraclab_sdk/workbench/pages/__init__.py +1 -0
- fraclab_sdk/workbench/ui_styles.py +103 -0
- fraclab_sdk/workbench/utils.py +43 -0
- {fraclab_sdk-0.1.0.dist-info → fraclab_sdk-0.1.2.dist-info}/METADATA +77 -8
- {fraclab_sdk-0.1.0.dist-info → fraclab_sdk-0.1.2.dist-info}/RECORD +27 -8
- {fraclab_sdk-0.1.0.dist-info → fraclab_sdk-0.1.2.dist-info}/entry_points.txt +1 -0
- {fraclab_sdk-0.1.0.dist-info → fraclab_sdk-0.1.2.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,513 @@
|
|
|
1
|
+
"""Browse snapshot data page."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from typing import Any, Iterable
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import streamlit as st
|
|
8
|
+
|
|
9
|
+
from fraclab_sdk.config import SDKConfig
|
|
10
|
+
from fraclab_sdk.snapshot import SnapshotLibrary
|
|
11
|
+
from fraclab_sdk.workbench import ui_styles
|
|
12
|
+
|
|
13
|
+
st.set_page_config(page_title="Browse", page_icon="🔍", layout="wide", initial_sidebar_state="expanded")
|
|
14
|
+
st.title("Browse")
|
|
15
|
+
|
|
16
|
+
ui_styles.apply_global_styles()
|
|
17
|
+
|
|
18
|
+
# --- Page-Specific CSS ---
|
|
19
|
+
st.markdown("""
|
|
20
|
+
<style>
|
|
21
|
+
/* Hide download button */
|
|
22
|
+
[data-testid="stDownloadButton"] {
|
|
23
|
+
display: none !important;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/* Pagination button styling (override global) */
|
|
27
|
+
div[data-testid="stButton"] button {
|
|
28
|
+
padding: 0.25rem 0.75rem !important;
|
|
29
|
+
min-width: 40px !important;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/* Pagination ellipsis styling */
|
|
33
|
+
.pagination-ellipsis {
|
|
34
|
+
text-align: center;
|
|
35
|
+
line-height: 2.3rem;
|
|
36
|
+
color: #888;
|
|
37
|
+
font-weight: bold;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/* Custom static table styling (replacement for st.dataframe) */
|
|
41
|
+
.table-wrapper {
|
|
42
|
+
max-height: 500px;
|
|
43
|
+
overflow: auto;
|
|
44
|
+
border: 1px solid #e6e9ef;
|
|
45
|
+
border-radius: 0.25rem;
|
|
46
|
+
margin-bottom: 1rem;
|
|
47
|
+
background-color: white;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
.custom-table {
|
|
51
|
+
width: 100%;
|
|
52
|
+
border-collapse: collapse;
|
|
53
|
+
font-family: "Source Sans Pro", sans-serif;
|
|
54
|
+
font-size: 14px;
|
|
55
|
+
color: #31333F;
|
|
56
|
+
user-select: none !important;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/* Sticky table header */
|
|
60
|
+
.custom-table th {
|
|
61
|
+
position: sticky;
|
|
62
|
+
top: 0;
|
|
63
|
+
background-color: #f0f2f6;
|
|
64
|
+
color: #31333F;
|
|
65
|
+
z-index: 2;
|
|
66
|
+
padding: 8px 12px;
|
|
67
|
+
text-align: left;
|
|
68
|
+
font-weight: 600;
|
|
69
|
+
border-bottom: 2px solid #e6e9ef;
|
|
70
|
+
white-space: nowrap;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/* Table cell styling */
|
|
74
|
+
.custom-table td {
|
|
75
|
+
padding: 8px 12px;
|
|
76
|
+
border-bottom: 1px solid #f0f2f6;
|
|
77
|
+
white-space: nowrap;
|
|
78
|
+
vertical-align: middle;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/* Zebra striping */
|
|
82
|
+
.custom-table tr:nth-child(even) {
|
|
83
|
+
background-color: #f9f9fb;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/* Hover highlight */
|
|
87
|
+
.custom-table tr:hover {
|
|
88
|
+
background-color: #f1f3f8;
|
|
89
|
+
}
|
|
90
|
+
</style>
|
|
91
|
+
""", unsafe_allow_html=True)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# --- Utils & Components ---
|
|
95
|
+
|
|
96
|
+
def _render_static_table(df: pd.DataFrame):
|
|
97
|
+
"""
|
|
98
|
+
Renders a static HTML table to replace st.dataframe.
|
|
99
|
+
Features: No menus, full content display (no truncation), sticky headers, custom styling.
|
|
100
|
+
"""
|
|
101
|
+
# 预处理:填充 NaN 为空字符串,防止 HTML 显示 'nan'
|
|
102
|
+
df_display = df.fillna("")
|
|
103
|
+
|
|
104
|
+
# 转换为 HTML,不包含索引列(我们会在外面手动处理索引或不需要索引)
|
|
105
|
+
# escape=True 防止 XSS,但会转义 HTML 标签
|
|
106
|
+
html_table = df_display.to_html(index=False, classes="custom-table", border=0, escape=True)
|
|
107
|
+
|
|
108
|
+
# 渲染容器
|
|
109
|
+
st.markdown(f'<div class="table-wrapper">{html_table}</div>', unsafe_allow_html=True)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _read_ndjson_slice(path, start: int, limit: int) -> list[tuple[int, dict]]:
|
|
113
|
+
"""Read a slice of ndjson lines [start, start+limit)."""
|
|
114
|
+
results: list[tuple[int, dict]] = []
|
|
115
|
+
with path.open() as f:
|
|
116
|
+
for i, line in enumerate(f):
|
|
117
|
+
if i < start:
|
|
118
|
+
continue
|
|
119
|
+
if len(results) >= limit:
|
|
120
|
+
break
|
|
121
|
+
try:
|
|
122
|
+
results.append((i, json.loads(line)))
|
|
123
|
+
except Exception:
|
|
124
|
+
results.append((i, {"_error": "Failed to parse line", "raw": line.strip()}))
|
|
125
|
+
return results
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _render_pagination(current: int, total: int, key_prefix: str) -> int:
|
|
129
|
+
"""
|
|
130
|
+
Render a compact, centered pagination bar.
|
|
131
|
+
Updates session state and reruns if clicked.
|
|
132
|
+
"""
|
|
133
|
+
if f"{key_prefix}_current" not in st.session_state:
|
|
134
|
+
st.session_state[f"{key_prefix}_current"] = current
|
|
135
|
+
|
|
136
|
+
display_current = int(st.session_state.get(f"{key_prefix}_current", current))
|
|
137
|
+
clicked = False
|
|
138
|
+
|
|
139
|
+
def _page_buttons(cur: int) -> Iterable[int | str]:
|
|
140
|
+
if total <= 9:
|
|
141
|
+
return list(range(1, total + 1))
|
|
142
|
+
window = [cur - 1, cur, cur + 1]
|
|
143
|
+
window = [p for p in window if 1 <= p <= total]
|
|
144
|
+
pages = [1, 2] + window + [total - 1, total]
|
|
145
|
+
pages = sorted(set(pages))
|
|
146
|
+
display = []
|
|
147
|
+
last = None
|
|
148
|
+
for p in pages:
|
|
149
|
+
if last and p - last > 1:
|
|
150
|
+
display.append("…")
|
|
151
|
+
display.append(p)
|
|
152
|
+
last = p
|
|
153
|
+
return display
|
|
154
|
+
|
|
155
|
+
buttons = list(_page_buttons(display_current))
|
|
156
|
+
|
|
157
|
+
st.markdown("---")
|
|
158
|
+
|
|
159
|
+
num_slots = len(buttons) + 2
|
|
160
|
+
spacer_ratio = 6 if num_slots < 6 else 1.5
|
|
161
|
+
col_ratios = [spacer_ratio] + [1] * num_slots + [spacer_ratio]
|
|
162
|
+
|
|
163
|
+
cols = st.columns(col_ratios, gap="small")
|
|
164
|
+
action_cols = cols[1:-1]
|
|
165
|
+
|
|
166
|
+
chosen = display_current
|
|
167
|
+
|
|
168
|
+
if action_cols[0].button("‹", key=f"{key_prefix}_prev", disabled=display_current <= 1):
|
|
169
|
+
chosen = max(1, display_current - 1)
|
|
170
|
+
clicked = True
|
|
171
|
+
|
|
172
|
+
for idx, p in enumerate(buttons, start=1):
|
|
173
|
+
if p == "…":
|
|
174
|
+
action_cols[idx].markdown("<div class='pagination-ellipsis'>…</div>", unsafe_allow_html=True)
|
|
175
|
+
continue
|
|
176
|
+
|
|
177
|
+
if action_cols[idx].button(
|
|
178
|
+
f"{p}",
|
|
179
|
+
key=f"{key_prefix}_page_{p}",
|
|
180
|
+
type="primary" if p == display_current else "secondary",
|
|
181
|
+
):
|
|
182
|
+
chosen = p
|
|
183
|
+
clicked = True
|
|
184
|
+
|
|
185
|
+
if action_cols[-1].button("›", key=f"{key_prefix}_next", disabled=display_current >= total):
|
|
186
|
+
chosen = min(total, display_current + 1)
|
|
187
|
+
clicked = True
|
|
188
|
+
|
|
189
|
+
st.session_state[f"{key_prefix}_current"] = chosen
|
|
190
|
+
|
|
191
|
+
if clicked:
|
|
192
|
+
try:
|
|
193
|
+
st.rerun()
|
|
194
|
+
except AttributeError:
|
|
195
|
+
st.experimental_rerun()
|
|
196
|
+
return chosen
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _detect_layout(dir_path) -> str | None:
|
|
200
|
+
if (dir_path / "object.ndjson").exists():
|
|
201
|
+
return "object_ndjson_lines"
|
|
202
|
+
if (dir_path / "parquet").exists():
|
|
203
|
+
return "frame_parquet_item_dirs"
|
|
204
|
+
return None
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def get_library():
|
|
208
|
+
config = SDKConfig()
|
|
209
|
+
return SnapshotLibrary(config)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
# --- Main Logic ---
|
|
213
|
+
|
|
214
|
+
snapshot_lib = get_library()
|
|
215
|
+
snapshots = snapshot_lib.list_snapshots()
|
|
216
|
+
|
|
217
|
+
if not snapshots:
|
|
218
|
+
st.info("No snapshots available. Import a snapshot first.")
|
|
219
|
+
st.stop()
|
|
220
|
+
|
|
221
|
+
# 1. Select Snapshot
|
|
222
|
+
snapshot_options = {s.snapshot_id: s for s in snapshots}
|
|
223
|
+
selected_id = st.selectbox(
|
|
224
|
+
"Select Snapshot",
|
|
225
|
+
options=list(snapshot_options.keys()),
|
|
226
|
+
format_func=lambda x: f"{x} ({snapshot_options[x].bundle_id})",
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
if not selected_id:
|
|
230
|
+
st.stop()
|
|
231
|
+
|
|
232
|
+
snapshot = snapshot_lib.get_snapshot(selected_id)
|
|
233
|
+
|
|
234
|
+
st.divider()
|
|
235
|
+
|
|
236
|
+
# 2. Select Dataset
|
|
237
|
+
st.subheader("Datasets")
|
|
238
|
+
datasets = snapshot.get_datasets()
|
|
239
|
+
|
|
240
|
+
if not datasets:
|
|
241
|
+
st.info("No datasets in this snapshot")
|
|
242
|
+
st.stop()
|
|
243
|
+
|
|
244
|
+
dataset_options = {d["dataset_key"]: d for d in datasets}
|
|
245
|
+
selected_dataset_key = st.selectbox(
|
|
246
|
+
"Select Dataset",
|
|
247
|
+
options=list(dataset_options.keys()),
|
|
248
|
+
format_func=lambda k: f"{k} ({dataset_options[k]['item_count']} items)",
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
if not selected_dataset_key:
|
|
252
|
+
st.stop()
|
|
253
|
+
|
|
254
|
+
dataset_info = dataset_options[selected_dataset_key]
|
|
255
|
+
|
|
256
|
+
with st.container(border=True):
|
|
257
|
+
c1, c2, c3 = st.columns(3)
|
|
258
|
+
c1.metric("Total Items", dataset_info["item_count"])
|
|
259
|
+
c2.caption("Layout")
|
|
260
|
+
c2.markdown(f"**{dataset_info['layout'] or 'N/A'}**")
|
|
261
|
+
c3.caption("Resource Type")
|
|
262
|
+
c3.markdown(f"**{dataset_info['resource_type'] or 'N/A'}**")
|
|
263
|
+
|
|
264
|
+
st.divider()
|
|
265
|
+
|
|
266
|
+
# 3. Items Explorer
|
|
267
|
+
st.subheader("Items Explorer")
|
|
268
|
+
|
|
269
|
+
items = snapshot.get_items(selected_dataset_key)
|
|
270
|
+
layout = dataset_info["layout"]
|
|
271
|
+
|
|
272
|
+
if not items:
|
|
273
|
+
st.info("No items in this dataset")
|
|
274
|
+
else:
|
|
275
|
+
# --- Pagination for Items ---
|
|
276
|
+
items_per_page = 20
|
|
277
|
+
total_items = len(items)
|
|
278
|
+
total_pages = (total_items + items_per_page - 1) // items_per_page
|
|
279
|
+
|
|
280
|
+
page_key = f"items_page_{selected_dataset_key}"
|
|
281
|
+
page = st.session_state.get(f"{page_key}_current", st.session_state.get(page_key, 1))
|
|
282
|
+
|
|
283
|
+
start_idx = (page - 1) * items_per_page
|
|
284
|
+
end_idx = min(start_idx + items_per_page, total_items)
|
|
285
|
+
|
|
286
|
+
current_items_slice = items[start_idx:end_idx]
|
|
287
|
+
|
|
288
|
+
# --- Prepare Data ---
|
|
289
|
+
item_dicts = []
|
|
290
|
+
if current_items_slice:
|
|
291
|
+
for real_idx, item_obj in current_items_slice:
|
|
292
|
+
try:
|
|
293
|
+
d = item_obj.model_dump(exclude_none=True)
|
|
294
|
+
d["_index"] = real_idx
|
|
295
|
+
item_dicts.append(d)
|
|
296
|
+
except AttributeError:
|
|
297
|
+
item_dicts.append({"_index": real_idx, "raw": str(item_obj)})
|
|
298
|
+
|
|
299
|
+
# --- View Tabs ---
|
|
300
|
+
tab_table, tab_cards = st.tabs(["📊 Table View", "📝 Detail Cards"])
|
|
301
|
+
|
|
302
|
+
with tab_table:
|
|
303
|
+
st.markdown(f"<small>Showing items {start_idx + 1}-{end_idx} of {total_items}</small>", unsafe_allow_html=True)
|
|
304
|
+
|
|
305
|
+
if item_dicts:
|
|
306
|
+
df = pd.DataFrame(item_dicts)
|
|
307
|
+
|
|
308
|
+
# Reorder columns
|
|
309
|
+
cols = df.columns.tolist()
|
|
310
|
+
if "_index" in cols:
|
|
311
|
+
cols.insert(0, cols.pop(cols.index("_index")))
|
|
312
|
+
df = df[cols]
|
|
313
|
+
|
|
314
|
+
# 使用自定义静态表格替代 st.dataframe
|
|
315
|
+
_render_static_table(df)
|
|
316
|
+
|
|
317
|
+
else:
|
|
318
|
+
st.warning("No data to display.")
|
|
319
|
+
|
|
320
|
+
with tab_cards:
|
|
321
|
+
st.markdown(f"<small>Showing items {start_idx + 1}-{end_idx} of {total_items}</small>", unsafe_allow_html=True)
|
|
322
|
+
for real_idx, item_obj in current_items_slice:
|
|
323
|
+
with st.expander(f"**Item {real_idx}**", expanded=False):
|
|
324
|
+
try:
|
|
325
|
+
json_str = json.dumps(item_obj.model_dump(exclude_none=True), indent=2, ensure_ascii=False)
|
|
326
|
+
st.code(json_str, language="json")
|
|
327
|
+
except AttributeError:
|
|
328
|
+
st.text(str(item_obj))
|
|
329
|
+
|
|
330
|
+
if layout == "object_ndjson_lines":
|
|
331
|
+
if st.button(f"Load Data #{real_idx}", key=f"btn_load_ndjson_{real_idx}_{selected_dataset_key}"):
|
|
332
|
+
try:
|
|
333
|
+
data = snapshot.read_object_line(selected_dataset_key, real_idx)
|
|
334
|
+
st.info("Data Content:")
|
|
335
|
+
st.code(json.dumps(data, indent=2, ensure_ascii=False), language="json")
|
|
336
|
+
except Exception as e:
|
|
337
|
+
st.error(f"Error: {e}")
|
|
338
|
+
|
|
339
|
+
elif layout == "frame_parquet_item_dirs":
|
|
340
|
+
try:
|
|
341
|
+
files = snapshot.read_frame_parts(selected_dataset_key, real_idx)
|
|
342
|
+
if files:
|
|
343
|
+
st.markdown("**Parquet Files:**")
|
|
344
|
+
for f in files:
|
|
345
|
+
st.code(f.name, language="text")
|
|
346
|
+
else:
|
|
347
|
+
st.caption("No files found.")
|
|
348
|
+
except Exception as e:
|
|
349
|
+
st.error(f"Error: {e}")
|
|
350
|
+
|
|
351
|
+
if total_pages > 1:
|
|
352
|
+
center_cols = st.columns([1, 8, 1])
|
|
353
|
+
with center_cols[1]:
|
|
354
|
+
page = _render_pagination(page, total_pages, page_key)
|
|
355
|
+
|
|
356
|
+
st.divider()
|
|
357
|
+
|
|
358
|
+
# 4. Data Files Preview
|
|
359
|
+
st.subheader("Data Files (from data/)")
|
|
360
|
+
|
|
361
|
+
manifest = snapshot.manifest
|
|
362
|
+
manifest_ds = manifest.datasets.get(selected_dataset_key)
|
|
363
|
+
data_root = manifest.dataRoot or "data"
|
|
364
|
+
dataset_dir = snapshot.directory / data_root / selected_dataset_key
|
|
365
|
+
|
|
366
|
+
resolved_layout = layout or (manifest_ds.layout if manifest_ds else None) or _detect_layout(dataset_dir)
|
|
367
|
+
|
|
368
|
+
if resolved_layout == "object_ndjson_lines":
|
|
369
|
+
# --- NDJSON View ---
|
|
370
|
+
ndjson_path = dataset_dir / "object.ndjson"
|
|
371
|
+
if ndjson_path.exists():
|
|
372
|
+
total_count = manifest_ds.count if manifest_ds else dataset_info["item_count"]
|
|
373
|
+
st.caption(f"File: {ndjson_path} (count: {total_count})")
|
|
374
|
+
|
|
375
|
+
page_size = 10
|
|
376
|
+
total_pages = (total_count + page_size - 1) // page_size or 1
|
|
377
|
+
|
|
378
|
+
ndjson_page_key = f"ndjson_preview_page_{selected_dataset_key}"
|
|
379
|
+
cp = st.session_state.get(f"{ndjson_page_key}_current", st.session_state.get(ndjson_page_key, 1))
|
|
380
|
+
|
|
381
|
+
if cp > total_pages: cp = 1
|
|
382
|
+
|
|
383
|
+
start = (cp - 1) * page_size
|
|
384
|
+
limit = page_size
|
|
385
|
+
|
|
386
|
+
st.text(f"Lines {start + 1}-{min(start + limit, total_count)}")
|
|
387
|
+
|
|
388
|
+
lines_data = _read_ndjson_slice(ndjson_path, start, limit)
|
|
389
|
+
for line_idx, obj in lines_data:
|
|
390
|
+
with st.expander(f"Line {line_idx}", expanded=False):
|
|
391
|
+
st.code(json.dumps(obj, indent=2, ensure_ascii=False), language="json")
|
|
392
|
+
|
|
393
|
+
if total_pages > 1:
|
|
394
|
+
center_cols = st.columns([1, 8, 1])
|
|
395
|
+
with center_cols[1]:
|
|
396
|
+
_render_pagination(cp, total_pages, ndjson_page_key)
|
|
397
|
+
else:
|
|
398
|
+
st.warning(f"File not found: {ndjson_path}")
|
|
399
|
+
|
|
400
|
+
elif resolved_layout == "frame_parquet_item_dirs":
|
|
401
|
+
# --- Parquet View ---
|
|
402
|
+
parquet_dir = dataset_dir / "parquet"
|
|
403
|
+
search_dir = parquet_dir if parquet_dir.exists() else dataset_dir
|
|
404
|
+
|
|
405
|
+
if search_dir.exists():
|
|
406
|
+
st.caption(f"Searching Parquet in: {search_dir}")
|
|
407
|
+
files = sorted(search_dir.rglob("*.parquet"))
|
|
408
|
+
|
|
409
|
+
if not files:
|
|
410
|
+
st.info("No parquet files found.")
|
|
411
|
+
else:
|
|
412
|
+
# --- File List Pagination ---
|
|
413
|
+
page_size = 10
|
|
414
|
+
total_pages = (len(files) + page_size - 1) // page_size or 1
|
|
415
|
+
|
|
416
|
+
file_page_key = f"parquet_file_page_{selected_dataset_key}"
|
|
417
|
+
cp_files = st.session_state.get(f"{file_page_key}_current", 1)
|
|
418
|
+
|
|
419
|
+
if cp_files > total_pages: cp_files = 1
|
|
420
|
+
|
|
421
|
+
start = (cp_files - 1) * page_size
|
|
422
|
+
page_files = files[start : start + page_size]
|
|
423
|
+
|
|
424
|
+
# Display File List
|
|
425
|
+
with st.container(border=True):
|
|
426
|
+
st.markdown(f"**Parquet Files (Page {cp_files}/{total_pages})**")
|
|
427
|
+
for f in page_files:
|
|
428
|
+
st.text(f"📄 {f.relative_to(dataset_dir)}")
|
|
429
|
+
|
|
430
|
+
if total_pages > 1:
|
|
431
|
+
center_cols = st.columns([1, 8, 1])
|
|
432
|
+
with center_cols[1]:
|
|
433
|
+
_render_pagination(cp_files, total_pages, file_page_key)
|
|
434
|
+
|
|
435
|
+
st.divider()
|
|
436
|
+
|
|
437
|
+
# --- File Selection Logic ---
|
|
438
|
+
options = [f.relative_to(dataset_dir) for f in files]
|
|
439
|
+
select_key = f"parquet_file_select_{selected_dataset_key}"
|
|
440
|
+
|
|
441
|
+
selected_rel = st.selectbox(
|
|
442
|
+
"Select file to preview content",
|
|
443
|
+
options=options,
|
|
444
|
+
key=select_key,
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
# --- Parquet Content Preview ---
|
|
448
|
+
sample_path = dataset_dir / selected_rel
|
|
449
|
+
|
|
450
|
+
st.markdown(f"#### Preview: `{sample_path.name}`")
|
|
451
|
+
|
|
452
|
+
try:
|
|
453
|
+
import pyarrow.parquet as pq
|
|
454
|
+
|
|
455
|
+
table = pq.read_table(sample_path)
|
|
456
|
+
total_rows = table.num_rows
|
|
457
|
+
|
|
458
|
+
if total_rows == 0:
|
|
459
|
+
st.warning("Empty file.")
|
|
460
|
+
else:
|
|
461
|
+
row_page_size = 20
|
|
462
|
+
row_total_pages = (total_rows + row_page_size - 1) // row_page_size or 1
|
|
463
|
+
|
|
464
|
+
preview_page_key = f"pq_view_{selected_dataset_key}_{str(selected_rel)}"
|
|
465
|
+
|
|
466
|
+
cp_row = st.session_state.get(f"{preview_page_key}_current", 1)
|
|
467
|
+
if cp_row > row_total_pages: cp_row = 1
|
|
468
|
+
|
|
469
|
+
start_r = (cp_row - 1) * row_page_size
|
|
470
|
+
table_slice = table.slice(start_r, row_page_size)
|
|
471
|
+
|
|
472
|
+
cols = table_slice.column_names
|
|
473
|
+
data_dict = table_slice.to_pydict()
|
|
474
|
+
rows = [{col: data_dict[col][i] for col in cols} for i in range(table_slice.num_rows)]
|
|
475
|
+
|
|
476
|
+
df_rows = pd.DataFrame(rows)
|
|
477
|
+
|
|
478
|
+
# --- 时间戳优化处理 ---
|
|
479
|
+
for col in df_rows.columns:
|
|
480
|
+
if pd.api.types.is_datetime64_any_dtype(df_rows[col]):
|
|
481
|
+
try:
|
|
482
|
+
df_rows[col] = df_rows[col].dt.round('1s')
|
|
483
|
+
if df_rows[col].dt.tz is not None:
|
|
484
|
+
df_rows[col] = df_rows[col].dt.tz_localize(None)
|
|
485
|
+
except Exception:
|
|
486
|
+
pass
|
|
487
|
+
|
|
488
|
+
# 使用自定义静态表格替代 st.dataframe
|
|
489
|
+
_render_static_table(df_rows)
|
|
490
|
+
|
|
491
|
+
if row_total_pages > 1:
|
|
492
|
+
st.caption(f"Page {cp_row} of {row_total_pages} ({total_rows} rows)")
|
|
493
|
+
center_cols = st.columns([1, 8, 1])
|
|
494
|
+
with center_cols[1]:
|
|
495
|
+
_render_pagination(cp_row, row_total_pages, preview_page_key)
|
|
496
|
+
|
|
497
|
+
except ImportError:
|
|
498
|
+
st.error("pyarrow not installed.")
|
|
499
|
+
except Exception as e:
|
|
500
|
+
st.error(f"Failed to read parquet: {e}")
|
|
501
|
+
|
|
502
|
+
else:
|
|
503
|
+
st.warning(f"Parquet directory not found: {search_dir}")
|
|
504
|
+
|
|
505
|
+
st.divider()
|
|
506
|
+
|
|
507
|
+
# DRS info
|
|
508
|
+
with st.expander("Show DRS (Data Requirement Specification)"):
|
|
509
|
+
try:
|
|
510
|
+
drs = snapshot.drs
|
|
511
|
+
st.code(json.dumps(drs.model_dump(exclude_none=True), indent=2, ensure_ascii=False), language="json")
|
|
512
|
+
except Exception as e:
|
|
513
|
+
st.error(f"Failed to load DRS: {e}")
|