parqv 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parqv/__init__.py +31 -0
- parqv/app.py +84 -102
- parqv/cli.py +112 -0
- parqv/core/__init__.py +31 -0
- parqv/core/config.py +26 -0
- parqv/core/file_utils.py +88 -0
- parqv/core/handler_factory.py +90 -0
- parqv/core/logging.py +46 -0
- parqv/data_sources/__init__.py +48 -0
- parqv/data_sources/base/__init__.py +28 -0
- parqv/data_sources/base/exceptions.py +38 -0
- parqv/{handlers/base_handler.py → data_sources/base/handler.py} +54 -25
- parqv/{handlers → data_sources/formats}/__init__.py +13 -5
- parqv/data_sources/formats/csv.py +460 -0
- parqv/{handlers → data_sources/formats}/json.py +68 -32
- parqv/{handlers → data_sources/formats}/parquet.py +67 -56
- parqv/views/__init__.py +38 -0
- parqv/views/base.py +98 -0
- parqv/views/components/__init__.py +13 -0
- parqv/views/components/enhanced_data_table.py +152 -0
- parqv/views/components/error_display.py +72 -0
- parqv/views/components/loading_display.py +44 -0
- parqv/views/data_view.py +119 -46
- parqv/views/metadata_view.py +57 -20
- parqv/views/schema_view.py +190 -200
- parqv/views/utils/__init__.py +19 -0
- parqv/views/utils/data_formatters.py +184 -0
- parqv/views/utils/stats_formatters.py +220 -0
- parqv/views/utils/visualization.py +204 -0
- {parqv-0.2.0.dist-info → parqv-0.3.0.dist-info}/METADATA +5 -6
- parqv-0.3.0.dist-info/RECORD +36 -0
- {parqv-0.2.0.dist-info → parqv-0.3.0.dist-info}/WHEEL +1 -1
- parqv-0.2.0.dist-info/RECORD +0 -17
- {parqv-0.2.0.dist-info → parqv-0.3.0.dist-info}/entry_points.txt +0 -0
- {parqv-0.2.0.dist-info → parqv-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {parqv-0.2.0.dist-info → parqv-0.3.0.dist-info}/top_level.txt +0 -0
parqv/views/schema_view.py
CHANGED
@@ -1,17 +1,21 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
"""
|
2
|
+
Schema view for displaying column schema and statistics.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import Dict, Any, Optional, List
|
3
6
|
|
4
7
|
from rich.text import Text
|
5
8
|
from textual.app import ComposeResult
|
6
|
-
from textual.containers import VerticalScroll, Container
|
9
|
+
from textual.containers import VerticalScroll, Container, Horizontal
|
7
10
|
from textual.reactive import var
|
8
11
|
from textual.widgets import Static, ListView, ListItem, Label, LoadingIndicator
|
9
12
|
|
10
|
-
|
13
|
+
from .base import BaseView
|
14
|
+
from .utils import format_stats_for_display
|
11
15
|
|
12
16
|
|
13
17
|
class ColumnListItem(ListItem):
|
14
|
-
"""A ListItem that stores the column name."""
|
18
|
+
"""A ListItem that stores the column name for schema display."""
|
15
19
|
|
16
20
|
def __init__(self, column_name: str) -> None:
|
17
21
|
# Ensure IDs are CSS-safe (replace spaces, etc.)
|
@@ -20,227 +24,213 @@ class ColumnListItem(ListItem):
|
|
20
24
|
self.column_name = column_name
|
21
25
|
|
22
26
|
|
23
|
-
|
24
|
-
"""
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
col_type = stats_data.get("type", "Unknown")
|
31
|
-
nullable_val = stats_data.get("nullable")
|
32
|
-
|
33
|
-
if nullable_val is True:
|
34
|
-
nullable_str = "Nullable"
|
35
|
-
elif nullable_val is False:
|
36
|
-
nullable_str = "Required"
|
37
|
-
else:
|
38
|
-
nullable_str = "Unknown Nullability"
|
39
|
-
lines.append(Text.assemble(("Column: ", "bold"), f"`{col_name}`"))
|
40
|
-
lines.append(Text.assemble(("Type: ", "bold"), f"{col_type} ({nullable_str})"))
|
41
|
-
lines.append("─" * (len(col_name) + len(col_type) + 20))
|
42
|
-
|
43
|
-
calc_error = stats_data.get("error")
|
44
|
-
if calc_error:
|
45
|
-
lines.append(Text("Calculation Error:", style="bold red"))
|
46
|
-
lines.append(f"```\n{calc_error}\n```")
|
47
|
-
lines.append("")
|
48
|
-
|
49
|
-
message = stats_data.get("message")
|
50
|
-
if message:
|
51
|
-
lines.append(Text(f"Info: {message}", style="italic cyan"))
|
52
|
-
lines.append("")
|
53
|
-
|
54
|
-
calculated = stats_data.get("calculated")
|
55
|
-
if calculated:
|
56
|
-
lines.append(Text("Calculated Statistics:", style="bold"))
|
57
|
-
keys_to_display = [
|
58
|
-
"Total Count", "Valid Count", "Null Count", "Null Percentage",
|
59
|
-
"Min", "Max", "Mean", "StdDev", "Variance",
|
60
|
-
"Distinct Count", "Min Length", "Max Length", "Avg Length",
|
61
|
-
"Value Counts"
|
62
|
-
]
|
63
|
-
found_stats = False
|
64
|
-
for key in keys_to_display:
|
65
|
-
if key in calculated:
|
66
|
-
found_stats = True
|
67
|
-
value = calculated[key]
|
68
|
-
if key == "Value Counts" and isinstance(value, dict):
|
69
|
-
lines.append(f" - {key}:")
|
70
|
-
for sub_key, sub_val in value.items():
|
71
|
-
sub_val_str = f"{sub_val:,}" if isinstance(sub_val, (int, float)) else str(sub_val)
|
72
|
-
lines.append(f" - {sub_key}: {sub_val_str}")
|
73
|
-
elif isinstance(value, (int, float)):
|
74
|
-
lines.append(f" - {key}: {value:,}")
|
75
|
-
else:
|
76
|
-
lines.append(f" - {key}: {value}")
|
77
|
-
if not found_stats and not calc_error:
|
78
|
-
lines.append(Text(" (No specific stats calculated for this type)", style="dim"))
|
79
|
-
return lines
|
27
|
+
class SchemaView(BaseView):
|
28
|
+
"""
|
29
|
+
View for displaying schema information and column statistics.
|
30
|
+
|
31
|
+
Shows a list of columns on the left and detailed statistics
|
32
|
+
for the selected column on the right.
|
33
|
+
"""
|
80
34
|
|
35
|
+
DEFAULT_STATS_MESSAGE = "Select a column from the list to view its statistics."
|
81
36
|
|
82
|
-
|
83
|
-
"""Displays a list of columns and the statistics for the selected column."""
|
84
|
-
DEFAULT_STATS_MESSAGE = "Select a column from the list above to view its statistics."
|
37
|
+
# Reactive variable for loading state
|
85
38
|
loading = var(False)
|
86
39
|
|
40
|
+
def __init__(self, **kwargs):
|
41
|
+
super().__init__(**kwargs)
|
42
|
+
self._columns_data: Optional[List[Dict[str, Any]]] = None
|
43
|
+
self._current_column: Optional[str] = None
|
44
|
+
|
87
45
|
def compose(self) -> ComposeResult:
|
88
|
-
"""
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
46
|
+
"""Compose the schema view layout."""
|
47
|
+
with Horizontal():
|
48
|
+
# Left side: Column list
|
49
|
+
with Container(id="column-list-container", classes="column-list"):
|
50
|
+
yield Static("Columns", classes="section-title")
|
51
|
+
yield ListView(id="column-list-view")
|
52
|
+
|
53
|
+
# Right side: Column statistics
|
54
|
+
with Container(id="stats-container", classes="column-stats"):
|
55
|
+
yield Static("Column Statistics", classes="section-title")
|
56
|
+
with VerticalScroll(id="schema-stats-scroll"):
|
57
|
+
yield Container(id="schema-stats-content")
|
58
|
+
yield LoadingIndicator(id="schema-loading-indicator")
|
59
|
+
|
60
|
+
def load_content(self) -> None:
|
61
|
+
"""Load schema content."""
|
62
|
+
if not self.check_handler_available():
|
63
|
+
return
|
64
|
+
|
65
|
+
try:
|
66
|
+
# Load column list
|
67
|
+
self._load_column_list()
|
68
|
+
|
69
|
+
# Display default message in stats area
|
70
|
+
self._display_default_message()
|
71
|
+
|
72
|
+
self.logger.info("Schema loaded successfully")
|
73
|
+
|
74
|
+
except Exception as e:
|
75
|
+
self.show_error("Failed to load schema", e)
|
76
|
+
|
77
|
+
def _load_column_list(self) -> None:
|
78
|
+
"""Load the list of columns from the data handler."""
|
79
|
+
try:
|
80
|
+
list_view = self.query_one("#column-list-view", ListView)
|
81
|
+
list_view.clear()
|
82
|
+
|
83
|
+
# Get schema data from handler
|
84
|
+
self._columns_data = self.handler.get_schema_data()
|
85
|
+
self.logger.debug(f"Received schema data: {self._columns_data}")
|
86
|
+
|
87
|
+
if self._columns_data is None:
|
88
|
+
self._show_list_error("Could not load schema data")
|
89
|
+
return
|
90
|
+
|
91
|
+
if not self._columns_data:
|
92
|
+
self._show_list_warning("Schema has no columns")
|
93
|
+
return
|
94
|
+
|
95
|
+
# Populate column list
|
96
|
+
column_count = 0
|
97
|
+
for col_info in self._columns_data:
|
98
|
+
column_name = col_info.get("name")
|
99
|
+
if column_name:
|
100
|
+
list_view.append(ColumnListItem(column_name))
|
101
|
+
column_count += 1
|
102
|
+
else:
|
103
|
+
self.logger.warning("Found column info without a 'name' key")
|
104
|
+
|
105
|
+
self.logger.info(f"Populated column list with {column_count} columns")
|
106
|
+
|
107
|
+
except Exception as e:
|
108
|
+
self.logger.exception("Error loading column list")
|
109
|
+
self._show_list_error(f"Error loading schema: {e}")
|
110
|
+
|
111
|
+
def _show_list_error(self, message: str) -> None:
|
112
|
+
"""Show error message in the column list."""
|
113
|
+
try:
|
114
|
+
list_view = self.query_one("#column-list-view", ListView)
|
115
|
+
list_view.clear()
|
116
|
+
list_view.append(ListItem(Label(f"[red]{message}[/red]")))
|
117
|
+
except Exception as e:
|
118
|
+
self.logger.error(f"Failed to show list error: {e}")
|
119
|
+
|
120
|
+
def _show_list_warning(self, message: str) -> None:
|
121
|
+
"""Show warning message in the column list."""
|
122
|
+
try:
|
123
|
+
list_view = self.query_one("#column-list-view", ListView)
|
124
|
+
list_view.clear()
|
125
|
+
list_view.append(ListItem(Label(f"[yellow]{message}[/yellow]")))
|
126
|
+
except Exception as e:
|
127
|
+
self.logger.error(f"Failed to show list warning: {e}")
|
128
|
+
|
129
|
+
def _display_default_message(self) -> None:
|
130
|
+
"""Display the initial message in the stats area."""
|
102
131
|
try:
|
103
132
|
stats_container = self.query_one("#schema-stats-content", Container)
|
104
133
|
stats_container.query("*").remove()
|
105
134
|
stats_container.mount(Static(self.DEFAULT_STATS_MESSAGE, classes="stats-line"))
|
106
|
-
stats_container.display = True
|
107
135
|
except Exception as e:
|
108
|
-
|
136
|
+
self.logger.error(f"Failed to display default stats message: {e}")
|
137
|
+
|
138
|
+
def on_list_view_selected(self, event: ListView.Selected) -> None:
|
139
|
+
"""Handle column selection from the list."""
|
140
|
+
if hasattr(event.item, 'column_name'):
|
141
|
+
column_name = event.item.column_name
|
142
|
+
self._current_column = column_name
|
143
|
+
self._load_column_stats(column_name)
|
144
|
+
else:
|
145
|
+
self.logger.warning("Selected item does not have column_name attribute")
|
146
|
+
|
147
|
+
def _load_column_stats(self, column_name: str) -> None:
|
148
|
+
"""
|
149
|
+
Load and display statistics for the selected column.
|
150
|
+
|
151
|
+
Args:
|
152
|
+
column_name: Name of the column to analyze
|
153
|
+
"""
|
154
|
+
if not self.handler:
|
155
|
+
self._show_stats_error("Data handler not available")
|
156
|
+
return
|
157
|
+
|
158
|
+
try:
|
159
|
+
# Set loading state
|
160
|
+
self.loading = True
|
109
161
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
162
|
+
# Get column statistics
|
163
|
+
self.logger.debug(f"Loading stats for column: {column_name}")
|
164
|
+
raw_stats = self.handler.get_column_stats(column_name)
|
165
|
+
|
166
|
+
# Format stats for display
|
167
|
+
formatted_lines = format_stats_for_display(raw_stats)
|
168
|
+
|
169
|
+
# Display the formatted stats
|
170
|
+
self._display_column_stats(formatted_lines)
|
171
|
+
|
172
|
+
except Exception as e:
|
173
|
+
self.logger.exception(f"Error loading stats for column {column_name}")
|
174
|
+
self._show_stats_error(f"Failed to load statistics: {e}")
|
175
|
+
finally:
|
176
|
+
self.loading = False
|
114
177
|
|
178
|
+
def _display_column_stats(self, formatted_lines: List) -> None:
|
179
|
+
"""
|
180
|
+
Display formatted column statistics.
|
181
|
+
|
182
|
+
Args:
|
183
|
+
formatted_lines: List of formatted text lines to display
|
184
|
+
"""
|
115
185
|
try:
|
116
|
-
|
117
|
-
|
118
|
-
list_view.append(ListItem(Label("[red]Data handler not available.[/red]")))
|
119
|
-
return
|
186
|
+
stats_container = self.query_one("#schema-stats-content", Container)
|
187
|
+
stats_container.query("*").remove()
|
120
188
|
|
121
|
-
|
122
|
-
|
189
|
+
for line in formatted_lines:
|
190
|
+
if isinstance(line, Text):
|
191
|
+
stats_container.mount(Static(line, classes="stats-line"))
|
192
|
+
else:
|
193
|
+
stats_container.mount(Static(str(line), classes="stats-line"))
|
123
194
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
elif not schema_data:
|
128
|
-
log.warning("SchemaView: Schema has no columns.")
|
129
|
-
list_view.append(ListItem(Label("[yellow]Schema has no columns.[/yellow]")))
|
130
|
-
else:
|
131
|
-
column_count = 0
|
132
|
-
for col_info in schema_data:
|
133
|
-
column_name = col_info.get("name")
|
134
|
-
if column_name:
|
135
|
-
list_view.append(ColumnListItem(column_name))
|
136
|
-
column_count += 1
|
137
|
-
else:
|
138
|
-
log.warning("SchemaView: Found column info without a 'name' key.")
|
139
|
-
log.info(f"SchemaView: Populated column list with {column_count} columns.")
|
195
|
+
except Exception as e:
|
196
|
+
self.logger.error(f"Failed to display column stats: {e}")
|
197
|
+
self._show_stats_error("Failed to display statistics")
|
140
198
|
|
199
|
+
def _show_stats_error(self, message: str) -> None:
|
200
|
+
"""Show error message in the stats area."""
|
201
|
+
try:
|
202
|
+
stats_container = self.query_one("#schema-stats-content", Container)
|
203
|
+
stats_container.query("*").remove()
|
204
|
+
stats_container.mount(Static(f"[red]Error: {message}[/red]", classes="error-content"))
|
141
205
|
except Exception as e:
|
142
|
-
|
143
|
-
list_view.clear()
|
144
|
-
list_view.append(ListItem(Label(f"[red]Error loading schema: {e}[/red]")))
|
206
|
+
self.logger.error(f"Failed to show stats error: {e}")
|
145
207
|
|
146
208
|
def watch_loading(self, loading: bool) -> None:
|
147
209
|
"""React to changes in the loading state."""
|
148
210
|
try:
|
149
211
|
loading_indicator = self.query_one("#schema-loading-indicator", LoadingIndicator)
|
150
212
|
stats_scroll = self.query_one("#schema-stats-scroll", VerticalScroll)
|
151
|
-
loading_indicator.display = loading
|
152
|
-
stats_scroll.display = not loading
|
153
|
-
if loading:
|
154
|
-
stats_content = self.query_one("#schema-stats-content", Container)
|
155
|
-
stats_content.display = False
|
156
|
-
except Exception as e:
|
157
|
-
log.error(f"Error updating loading display: {e}")
|
158
|
-
|
159
|
-
async def _update_stats_display(self, lines: List[Union[str, Text]]) -> None:
|
160
|
-
"""Updates the statistics display area with formatted lines."""
|
161
|
-
try:
|
162
|
-
stats_content_container = self.query_one("#schema-stats-content", Container)
|
163
|
-
stats_scroll_container = self.query_one("#schema-stats-scroll", VerticalScroll)
|
164
|
-
await stats_content_container.query("*").remove()
|
165
213
|
|
166
|
-
if
|
167
|
-
|
214
|
+
if loading:
|
215
|
+
loading_indicator.display = True
|
216
|
+
stats_scroll.display = False
|
168
217
|
else:
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
css_class = "stats-line"
|
173
|
-
if isinstance(line, str) and line.startswith("```"):
|
174
|
-
content = line.strip()
|
175
|
-
if content.startswith("```json"):
|
176
|
-
content = content[7:]
|
177
|
-
elif content.startswith("```"):
|
178
|
-
content = content[3:]
|
179
|
-
if content.endswith("```"):
|
180
|
-
content = content[:-3]
|
181
|
-
content = content.strip()
|
182
|
-
css_class = "stats-code"
|
183
|
-
elif isinstance(line, Text):
|
184
|
-
style_str = str(line.style).lower()
|
185
|
-
if "red" in style_str:
|
186
|
-
css_class = "stats-error stats-line"
|
187
|
-
elif "yellow" in style_str:
|
188
|
-
css_class = "stats-warning stats-line"
|
189
|
-
elif "italic" in style_str:
|
190
|
-
css_class = "stats-info stats-line"
|
191
|
-
elif "bold" in style_str:
|
192
|
-
css_class = "stats-header stats-line"
|
193
|
-
new_widgets.append(Static(content, classes=css_class))
|
194
|
-
if new_widgets:
|
195
|
-
await stats_content_container.mount_all(new_widgets)
|
196
|
-
|
197
|
-
stats_content_container.display = True
|
198
|
-
stats_scroll_container.display = True
|
199
|
-
stats_scroll_container.scroll_home(animate=False)
|
218
|
+
loading_indicator.display = False
|
219
|
+
stats_scroll.display = True
|
220
|
+
|
200
221
|
except Exception as e:
|
201
|
-
|
202
|
-
try:
|
203
|
-
await stats_content_container.query("*").remove()
|
204
|
-
await stats_content_container.mount(Static(f"[red]Internal error displaying stats: {e}[/red]"))
|
205
|
-
stats_content_container.display = True
|
206
|
-
stats_scroll_container.display = True
|
207
|
-
except Exception:
|
208
|
-
pass
|
209
|
-
|
210
|
-
async def on_list_view_selected(self, event: ListView.Selected) -> None:
|
211
|
-
"""Handle column selection in the ListView."""
|
212
|
-
event.stop()
|
213
|
-
selected_item = event.item
|
214
|
-
|
215
|
-
if isinstance(selected_item, ColumnListItem):
|
216
|
-
column_name = selected_item.column_name
|
217
|
-
self.loading = True
|
222
|
+
self.logger.error(f"Error updating loading state: {e}")
|
218
223
|
|
219
|
-
|
220
|
-
|
224
|
+
def refresh_schema(self) -> None:
|
225
|
+
"""Refresh the schema display."""
|
226
|
+
self._current_column = None
|
227
|
+
self.clear_content()
|
228
|
+
self.load_content()
|
221
229
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
if stats_data.get("error"):
|
226
|
-
log.warning(f"Handler returned error for column '{column_name}': {stats_data['error']}")
|
227
|
-
error_markup = f"[red]Error getting stats: {stats_data['error']}[/]"
|
228
|
-
stats_data = {}
|
229
|
-
else:
|
230
|
-
error_markup = "[red]Error: Data handler not available.[/]"
|
231
|
-
log.error("SchemaView: Data handler not found on app.")
|
232
|
-
except Exception as e:
|
233
|
-
log.exception(f"Exception calculating stats for {column_name}")
|
234
|
-
error_markup = f"[red]Error loading stats for '{column_name}':\n{type(e).__name__}: {e}[/]"
|
235
|
-
|
236
|
-
if error_markup:
|
237
|
-
lines_to_render = [Text.from_markup(error_markup)]
|
238
|
-
else:
|
239
|
-
lines_to_render = format_stats_for_display(stats_data)
|
230
|
+
def get_current_column(self) -> Optional[str]:
|
231
|
+
"""Get the currently selected column name."""
|
232
|
+
return self._current_column
|
240
233
|
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
log.debug("Non-column item selected in ListView.")
|
245
|
-
await self._update_stats_display([])
|
246
|
-
self.loading = False
|
234
|
+
def get_columns_data(self) -> Optional[List[Dict[str, Any]]]:
|
235
|
+
"""Get the current columns data."""
|
236
|
+
return self._columns_data
|
@@ -0,0 +1,19 @@
|
|
1
|
+
"""
|
2
|
+
Utility functions for parqv views.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from .data_formatters import format_metadata_for_display, format_value_for_display
|
6
|
+
from .stats_formatters import format_stats_for_display, format_column_info
|
7
|
+
from .visualization import create_text_histogram, should_show_histogram
|
8
|
+
|
9
|
+
__all__ = [
|
10
|
+
# Data formatting
|
11
|
+
"format_metadata_for_display",
|
12
|
+
"format_value_for_display",
|
13
|
+
"format_stats_for_display",
|
14
|
+
"format_column_info",
|
15
|
+
|
16
|
+
# Visualization
|
17
|
+
"create_text_histogram",
|
18
|
+
"should_show_histogram",
|
19
|
+
]
|
@@ -0,0 +1,184 @@
|
|
1
|
+
"""
|
2
|
+
Data formatting utilities for parqv views.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import Any, Dict, Union
|
6
|
+
from rich.text import Text
|
7
|
+
|
8
|
+
|
9
|
+
def format_metadata_for_display(metadata: Dict[str, Any]) -> Dict[str, Any]:
|
10
|
+
"""
|
11
|
+
Format metadata dictionary for consistent display.
|
12
|
+
|
13
|
+
Args:
|
14
|
+
metadata: Raw metadata dictionary from handler
|
15
|
+
|
16
|
+
Returns:
|
17
|
+
Formatted metadata dictionary ready for display
|
18
|
+
"""
|
19
|
+
if not metadata:
|
20
|
+
return {"Error": "No metadata available"}
|
21
|
+
|
22
|
+
# Check for error in metadata
|
23
|
+
if "error" in metadata:
|
24
|
+
return {"Error": metadata["error"]}
|
25
|
+
|
26
|
+
formatted = {}
|
27
|
+
|
28
|
+
# Format specific known fields with better presentation
|
29
|
+
field_formatters = {
|
30
|
+
"File Path": lambda x: str(x),
|
31
|
+
"Path": lambda x: str(x),
|
32
|
+
"Format": lambda x: str(x).upper(),
|
33
|
+
"Total Rows": lambda x: _format_number(x),
|
34
|
+
"Total Columns": lambda x: _format_number(x),
|
35
|
+
"Columns": lambda x: _format_number(x),
|
36
|
+
"Size": lambda x: _format_size_if_bytes(x),
|
37
|
+
"Memory Usage": lambda x: _format_size_if_bytes(x),
|
38
|
+
"DuckDB View": lambda x: f"`{x}`" if x else "N/A",
|
39
|
+
}
|
40
|
+
|
41
|
+
for key, value in metadata.items():
|
42
|
+
if isinstance(value, dict):
|
43
|
+
# Handle nested dictionaries (like grouped metadata)
|
44
|
+
formatted[key] = _format_nested_metadata(value, field_formatters)
|
45
|
+
elif key in field_formatters:
|
46
|
+
formatted[key] = field_formatters[key](value)
|
47
|
+
else:
|
48
|
+
formatted[key] = format_value_for_display(value)
|
49
|
+
|
50
|
+
return formatted
|
51
|
+
|
52
|
+
|
53
|
+
def _format_nested_metadata(nested_dict: Dict[str, Any], field_formatters: Dict) -> Dict[str, Any]:
|
54
|
+
"""Format nested metadata dictionaries."""
|
55
|
+
formatted_nested = {}
|
56
|
+
|
57
|
+
for key, value in nested_dict.items():
|
58
|
+
if isinstance(value, dict):
|
59
|
+
# Handle further nesting if needed
|
60
|
+
formatted_nested[key] = _format_nested_metadata(value, field_formatters)
|
61
|
+
elif key in field_formatters:
|
62
|
+
formatted_nested[key] = field_formatters[key](value)
|
63
|
+
else:
|
64
|
+
formatted_nested[key] = format_value_for_display(value)
|
65
|
+
|
66
|
+
return formatted_nested
|
67
|
+
|
68
|
+
|
69
|
+
def format_value_for_display(value: Any) -> str:
|
70
|
+
"""
|
71
|
+
Format a single value for display in the UI.
|
72
|
+
|
73
|
+
Args:
|
74
|
+
value: The value to format
|
75
|
+
|
76
|
+
Returns:
|
77
|
+
String representation suitable for display
|
78
|
+
"""
|
79
|
+
if value is None:
|
80
|
+
return "N/A"
|
81
|
+
|
82
|
+
if isinstance(value, (int, float)):
|
83
|
+
return _format_number(value)
|
84
|
+
|
85
|
+
if isinstance(value, bool):
|
86
|
+
return "Yes" if value else "No"
|
87
|
+
|
88
|
+
if isinstance(value, str):
|
89
|
+
# Handle empty strings
|
90
|
+
if not value.strip():
|
91
|
+
return "N/A"
|
92
|
+
return value
|
93
|
+
|
94
|
+
# For other types, convert to string
|
95
|
+
return str(value)
|
96
|
+
|
97
|
+
|
98
|
+
def _format_number(value: Union[str, int, float]) -> str:
|
99
|
+
"""
|
100
|
+
Format numbers with thousand separators.
|
101
|
+
|
102
|
+
Args:
|
103
|
+
value: Numeric value or string representation
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
Formatted number string
|
107
|
+
"""
|
108
|
+
if isinstance(value, str):
|
109
|
+
# Try to extract number from string like "1,234" or "1234"
|
110
|
+
try:
|
111
|
+
# Remove existing commas and convert
|
112
|
+
clean_str = value.replace(",", "").strip()
|
113
|
+
if clean_str.isdigit():
|
114
|
+
return f"{int(clean_str):,}"
|
115
|
+
elif "." in clean_str:
|
116
|
+
return f"{float(clean_str):,.2f}"
|
117
|
+
else:
|
118
|
+
return value # Return as-is if not numeric
|
119
|
+
except (ValueError, AttributeError):
|
120
|
+
return value
|
121
|
+
|
122
|
+
if isinstance(value, int):
|
123
|
+
return f"{value:,}"
|
124
|
+
|
125
|
+
if isinstance(value, float):
|
126
|
+
return f"{value:,.2f}"
|
127
|
+
|
128
|
+
return str(value)
|
129
|
+
|
130
|
+
|
131
|
+
def _format_size_if_bytes(value: Union[str, int]) -> str:
|
132
|
+
"""
|
133
|
+
Format size values, detecting if they represent bytes.
|
134
|
+
|
135
|
+
Args:
|
136
|
+
value: Size value that might be in bytes
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
Formatted size string
|
140
|
+
"""
|
141
|
+
if isinstance(value, str):
|
142
|
+
# If it already contains size units, return as-is
|
143
|
+
if any(unit in value.lower() for unit in ["kb", "mb", "gb", "tb", "bytes"]):
|
144
|
+
return value
|
145
|
+
|
146
|
+
# Try to parse as number and format as bytes
|
147
|
+
try:
|
148
|
+
clean_str = value.replace(",", "").strip()
|
149
|
+
if "bytes" in value.lower():
|
150
|
+
num_bytes = int(clean_str.split()[0])
|
151
|
+
return _format_bytes(num_bytes)
|
152
|
+
else:
|
153
|
+
return value
|
154
|
+
except (ValueError, IndexError):
|
155
|
+
return value
|
156
|
+
|
157
|
+
if isinstance(value, int):
|
158
|
+
# Assume it's bytes if it's a large integer
|
159
|
+
if value > 1024:
|
160
|
+
return _format_bytes(value)
|
161
|
+
else:
|
162
|
+
return f"{value:,}"
|
163
|
+
|
164
|
+
return str(value)
|
165
|
+
|
166
|
+
|
167
|
+
def _format_bytes(num_bytes: int) -> str:
|
168
|
+
"""
|
169
|
+
Format bytes into human-readable format.
|
170
|
+
|
171
|
+
Args:
|
172
|
+
num_bytes: Number of bytes
|
173
|
+
|
174
|
+
Returns:
|
175
|
+
Human-readable size string
|
176
|
+
"""
|
177
|
+
if num_bytes < 1024:
|
178
|
+
return f"{num_bytes:,} bytes"
|
179
|
+
elif num_bytes < 1024 ** 2:
|
180
|
+
return f"{num_bytes / 1024:.1f} KB"
|
181
|
+
elif num_bytes < 1024 ** 3:
|
182
|
+
return f"{num_bytes / 1024 ** 2:.1f} MB"
|
183
|
+
else:
|
184
|
+
return f"{num_bytes / 1024 ** 3:.1f} GB"
|