parqv 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. parqv/__init__.py +31 -0
  2. parqv/app.py +97 -78
  3. parqv/cli.py +112 -0
  4. parqv/core/__init__.py +31 -0
  5. parqv/core/config.py +25 -0
  6. parqv/core/file_utils.py +88 -0
  7. parqv/core/handler_factory.py +89 -0
  8. parqv/core/logging.py +46 -0
  9. parqv/data_sources/__init__.py +44 -0
  10. parqv/data_sources/base/__init__.py +28 -0
  11. parqv/data_sources/base/exceptions.py +38 -0
  12. parqv/data_sources/base/handler.py +143 -0
  13. parqv/data_sources/formats/__init__.py +16 -0
  14. parqv/data_sources/formats/json.py +449 -0
  15. parqv/data_sources/formats/parquet.py +624 -0
  16. parqv/views/__init__.py +38 -0
  17. parqv/views/base.py +98 -0
  18. parqv/views/components/__init__.py +13 -0
  19. parqv/views/components/enhanced_data_table.py +152 -0
  20. parqv/views/components/error_display.py +72 -0
  21. parqv/views/components/loading_display.py +44 -0
  22. parqv/views/data_view.py +119 -46
  23. parqv/views/metadata_view.py +57 -13
  24. parqv/views/schema_view.py +197 -148
  25. parqv/views/utils/__init__.py +13 -0
  26. parqv/views/utils/data_formatters.py +162 -0
  27. parqv/views/utils/stats_formatters.py +160 -0
  28. parqv-0.2.1.dist-info/METADATA +104 -0
  29. parqv-0.2.1.dist-info/RECORD +34 -0
  30. {parqv-0.1.0.dist-info → parqv-0.2.1.dist-info}/WHEEL +1 -1
  31. parqv/parquet_handler.py +0 -389
  32. parqv/views/row_group_view.py +0 -33
  33. parqv-0.1.0.dist-info/METADATA +0 -91
  34. parqv-0.1.0.dist-info/RECORD +0 -15
  35. {parqv-0.1.0.dist-info → parqv-0.2.1.dist-info}/entry_points.txt +0 -0
  36. {parqv-0.1.0.dist-info → parqv-0.2.1.dist-info}/licenses/LICENSE +0 -0
  37. {parqv-0.1.0.dist-info → parqv-0.2.1.dist-info}/top_level.txt +0 -0
@@ -1,187 +1,236 @@
1
- import json
2
- import logging
3
- from typing import Dict, Any, Optional, List, Union
1
+ """
2
+ Schema view for displaying column schema and statistics.
3
+ """
4
+
5
+ from typing import Dict, Any, Optional, List
4
6
 
5
7
  from rich.text import Text
6
8
  from textual.app import ComposeResult
7
- from textual.containers import VerticalScroll, Container
9
+ from textual.containers import VerticalScroll, Container, Horizontal
8
10
  from textual.reactive import var
9
11
  from textual.widgets import Static, ListView, ListItem, Label, LoadingIndicator
10
12
 
11
- log = logging.getLogger(__name__)
13
+ from .base import BaseView
14
+ from .utils import format_stats_for_display
12
15
 
13
16
 
14
17
  class ColumnListItem(ListItem):
18
+ """A ListItem that stores the column name for schema display."""
19
+
15
20
  def __init__(self, column_name: str) -> None:
16
- super().__init__(Label(column_name), name=column_name, id=f"col-item-{column_name.replace(' ', '_')}")
21
+ # Ensure IDs are CSS-safe (replace spaces, etc.)
22
+ safe_id_name = "".join(c if c.isalnum() else '_' for c in column_name)
23
+ super().__init__(Label(column_name), name=column_name, id=f"col-item-{safe_id_name}")
17
24
  self.column_name = column_name
18
25
 
19
26
 
20
- def format_stats_for_display(stats_data: Dict[str, Any]) -> List[Union[str, Text]]:
21
- if not stats_data:
22
- return [Text.from_markup("[red]No statistics data available.[/red]")]
23
-
24
- lines: List[Union[str, Text]] = []
25
- col_name = stats_data.get("column", "N/A")
26
- col_type = stats_data.get("type", "Unknown")
27
- nullable = stats_data.get("nullable", "Unknown")
28
-
29
- lines.append(Text.assemble(("Column: ", "bold"), f"`{col_name}`"))
30
- lines.append(Text.assemble(("Type: ", "bold"), f"{col_type} ({'Nullable' if nullable else 'Required'})"))
31
- lines.append("─" * (len(col_name) + len(col_type) + 20))
32
-
33
- calc_error = stats_data.get("error")
34
- if calc_error:
35
- lines.append(Text("Calculation Error:", style="bold red"))
36
- lines.append(f"```{calc_error}```")
37
-
38
- calculated = stats_data.get("calculated")
39
- if calculated:
40
- lines.append(Text("Calculated Statistics:", style="bold"))
41
- keys_to_display = [
42
- "Total Count", "Valid Count", "Null Count", "Null Percentage",
43
- "Min", "Max", "Mean", "StdDev", "Distinct Count", "Value Counts"
44
- ]
45
- for key in keys_to_display:
46
- if key in calculated:
47
- value = calculated[key]
48
- if isinstance(value, dict):
49
- lines.append(f" - {key}:")
50
- for sub_key, sub_val in value.items():
51
- lines.append(f" - {sub_key}: {sub_val:,}")
52
- else:
53
- lines.append(f" - {key}: {value}")
54
- lines.append("")
27
+ class SchemaView(BaseView):
28
+ """
29
+ View for displaying schema information and column statistics.
30
+
31
+ Shows a list of columns on the left and detailed statistics
32
+ for the selected column on the right.
33
+ """
55
34
 
56
- meta_stats = stats_data.get("basic_metadata_stats")
57
- if meta_stats:
58
- lines.append(Text("Stats from File Metadata (Per Row Group):", style="bold"))
59
- try:
60
- json_str = json.dumps(meta_stats, indent=2, default=str)
61
- lines.append(f"```json\n{json_str}\n```")
62
- except Exception as e:
63
- lines.append(f" (Error formatting metadata: {e})")
64
- lines.append("")
35
+ DEFAULT_STATS_MESSAGE = "Select a column from the list to view its statistics."
65
36
 
66
- meta_stats_error = stats_data.get("metadata_stats_error")
67
- if meta_stats_error:
68
- lines.append(Text(f"Metadata Stats Warning: {meta_stats_error}", style="yellow"))
37
+ # Reactive variable for loading state
38
+ loading = var(False)
69
39
 
70
- message = stats_data.get("message")
71
- if message and not calculated:
72
- lines.append(Text(message, style="italic"))
40
+ def __init__(self, **kwargs):
41
+ super().__init__(**kwargs)
42
+ self._columns_data: Optional[List[Dict[str, Any]]] = None
43
+ self._current_column: Optional[str] = None
73
44
 
74
- return lines
45
+ def compose(self) -> ComposeResult:
46
+ """Compose the schema view layout."""
47
+ with Horizontal():
48
+ # Left side: Column list
49
+ with Container(id="column-list-container", classes="column-list"):
50
+ yield Static("Columns", classes="section-title")
51
+ yield ListView(id="column-list-view")
52
+
53
+ # Right side: Column statistics
54
+ with Container(id="stats-container", classes="column-stats"):
55
+ yield Static("Column Statistics", classes="section-title")
56
+ with VerticalScroll(id="schema-stats-scroll"):
57
+ yield Container(id="schema-stats-content")
58
+ yield LoadingIndicator(id="schema-loading-indicator")
59
+
60
+ def load_content(self) -> None:
61
+ """Load schema content."""
62
+ if not self.check_handler_available():
63
+ return
75
64
 
65
+ try:
66
+ # Load column list
67
+ self._load_column_list()
76
68
 
77
- class SchemaView(VerticalScroll):
78
- DEFAULT_STATS_MESSAGE = "Select a column above to view statistics."
79
- loading = var(False)
69
+ # Display default message in stats area
70
+ self._display_default_message()
80
71
 
81
- def compose(self) -> ComposeResult:
82
- yield ListView(id="column-list-view")
83
- yield LoadingIndicator(id="schema-loading-indicator")
84
- yield Container(id="schema-stats-content")
72
+ self.logger.info("Schema loaded successfully")
85
73
 
86
- def on_mount(self) -> None:
87
- self.query_one("#schema-loading-indicator", LoadingIndicator).styles.display = "none"
88
- self.call_later(self.load_column_list)
89
- self.call_later(self._update_stats_display, [])
74
+ except Exception as e:
75
+ self.show_error("Failed to load schema", e)
90
76
 
91
- def load_column_list(self):
92
- list_view: Optional[ListView] = None
77
+ def _load_column_list(self) -> None:
78
+ """Load the list of columns from the data handler."""
93
79
  try:
94
- list_views = self.query("#column-list-view")
95
- if not list_views:
96
- log.error("ListView widget (#column-list-view) not found!")
80
+ list_view = self.query_one("#column-list-view", ListView)
81
+ list_view.clear()
82
+
83
+ # Get schema data from handler
84
+ self._columns_data = self.handler.get_schema_data()
85
+ self.logger.debug(f"Received schema data: {self._columns_data}")
86
+
87
+ if self._columns_data is None:
88
+ self._show_list_error("Could not load schema data")
97
89
  return
98
- list_view = list_views.first()
99
- log.debug("ListView widget found.")
100
90
 
101
- list_view.clear()
91
+ if not self._columns_data:
92
+ self._show_list_warning("Schema has no columns")
93
+ return
102
94
 
103
- if self.app.handler and self.app.handler.schema:
104
- column_names: List[str] = self.app.handler.schema.names
105
- if column_names:
106
- for name in column_names:
107
- list_view.append(ColumnListItem(name))
95
+ # Populate column list
96
+ column_count = 0
97
+ for col_info in self._columns_data:
98
+ column_name = col_info.get("name")
99
+ if column_name:
100
+ list_view.append(ColumnListItem(column_name))
101
+ column_count += 1
108
102
  else:
109
- log.warning("Schema has no columns.")
110
- list_view.append(ListItem(Label("[yellow]Schema has no columns.[/yellow]")))
111
- elif not self.app.handler:
112
- log.error("Parquet handler not available.")
113
- list_view.append(ListItem(Label("[red]Parquet handler not available.[/red]")))
114
- else:
115
- log.error("Parquet schema not available.")
116
- list_view.append(ListItem(Label("[red]Parquet schema not available.[/red]")))
103
+ self.logger.warning("Found column info without a 'name' key")
104
+
105
+ self.logger.info(f"Populated column list with {column_count} columns")
117
106
 
118
107
  except Exception as e:
119
- log.exception("Error loading column list in SchemaView:")
120
- if list_view:
121
- list_view.clear()
122
- list_view.append(ListItem(Label(f"[red]Error loading schema view: {e}[/red]")))
108
+ self.logger.exception("Error loading column list")
109
+ self._show_list_error(f"Error loading schema: {e}")
123
110
 
124
- def watch_loading(self, loading: bool) -> None:
125
- loading_indicator = self.query_one("#schema-loading-indicator", LoadingIndicator)
126
- stats_content = self.query_one("#schema-stats-content", Container)
127
- loading_indicator.styles.display = "block" if loading else "none"
128
- stats_content.styles.display = "none" if loading else "block"
111
+ def _show_list_error(self, message: str) -> None:
112
+ """Show error message in the column list."""
113
+ try:
114
+ list_view = self.query_one("#column-list-view", ListView)
115
+ list_view.clear()
116
+ list_view.append(ListItem(Label(f"[red]{message}[/red]")))
117
+ except Exception as e:
118
+ self.logger.error(f"Failed to show list error: {e}")
129
119
 
130
- async def _update_stats_display(self, lines: List[Union[str, Text]]) -> None:
120
+ def _show_list_warning(self, message: str) -> None:
121
+ """Show warning message in the column list."""
131
122
  try:
132
- content_area = self.query_one("#schema-stats-content", Container)
133
- await content_area.query("*").remove()
123
+ list_view = self.query_one("#column-list-view", ListView)
124
+ list_view.clear()
125
+ list_view.append(ListItem(Label(f"[yellow]{message}[/yellow]")))
126
+ except Exception as e:
127
+ self.logger.error(f"Failed to show list warning: {e}")
134
128
 
135
- if not lines:
136
- await content_area.mount(Static(self.DEFAULT_STATS_MESSAGE, classes="stats-line"))
137
- return
129
+ def _display_default_message(self) -> None:
130
+ """Display the initial message in the stats area."""
131
+ try:
132
+ stats_container = self.query_one("#schema-stats-content", Container)
133
+ stats_container.query("*").remove()
134
+ stats_container.mount(Static(self.DEFAULT_STATS_MESSAGE, classes="stats-line"))
135
+ except Exception as e:
136
+ self.logger.error(f"Failed to display default stats message: {e}")
137
+
138
+ def on_list_view_selected(self, event: ListView.Selected) -> None:
139
+ """Handle column selection from the list."""
140
+ if hasattr(event.item, 'column_name'):
141
+ column_name = event.item.column_name
142
+ self._current_column = column_name
143
+ self._load_column_stats(column_name)
144
+ else:
145
+ self.logger.warning("Selected item does not have column_name attribute")
146
+
147
+ def _load_column_stats(self, column_name: str) -> None:
148
+ """
149
+ Load and display statistics for the selected column.
150
+
151
+ Args:
152
+ column_name: Name of the column to analyze
153
+ """
154
+ if not self.handler:
155
+ self._show_stats_error("Data handler not available")
156
+ return
157
+
158
+ try:
159
+ # Set loading state
160
+ self.loading = True
138
161
 
139
- new_widgets: List[Static] = []
140
- for line in lines:
141
- content: Union[str, Text] = line
142
- css_class = "stats-line"
143
- if isinstance(line, str) and line.startswith("```"):
144
- content = line.strip("` \n")
145
- css_class = "stats-code"
146
- elif isinstance(line, Text) and ("red" in str(line.style) or "yellow" in str(line.style)):
147
- css_class = "stats-error stats-line"
162
+ # Get column statistics
163
+ self.logger.debug(f"Loading stats for column: {column_name}")
164
+ raw_stats = self.handler.get_column_stats(column_name)
148
165
 
149
- new_widgets.append(Static(content, classes=css_class))
166
+ # Format stats for display
167
+ formatted_lines = format_stats_for_display(raw_stats)
168
+
169
+ # Display the formatted stats
170
+ self._display_column_stats(formatted_lines)
150
171
 
151
- if new_widgets:
152
- await content_area.mount_all(new_widgets)
153
172
  except Exception as e:
154
- log.error(f"Error updating stats display: {e}", exc_info=True)
155
- try:
156
- await content_area.query("*").remove()
157
- await content_area.mount(Static(f"[red]Internal error displaying stats: {e}[/red]"))
158
- except Exception:
159
- pass
160
-
161
- async def on_list_view_selected(self, event: ListView.Selected) -> None:
162
- event.stop()
163
- selected_item = event.item
164
-
165
- if isinstance(selected_item, ColumnListItem):
166
- column_name = selected_item.column_name
167
- log.info(f"Column selected: {column_name}")
168
- self.loading = True
173
+ self.logger.exception(f"Error loading stats for column {column_name}")
174
+ self._show_stats_error(f"Failed to load statistics: {e}")
175
+ finally:
176
+ self.loading = False
169
177
 
170
- stats_data: Dict[str, Any] = {}
171
- error_str: Optional[str] = None
172
- try:
173
- if self.app.handler:
174
- stats_data = self.app.handler.get_column_stats(column_name)
178
+ def _display_column_stats(self, formatted_lines: List) -> None:
179
+ """
180
+ Display formatted column statistics.
181
+
182
+ Args:
183
+ formatted_lines: List of formatted text lines to display
184
+ """
185
+ try:
186
+ stats_container = self.query_one("#schema-stats-content", Container)
187
+ stats_container.query("*").remove()
188
+
189
+ for line in formatted_lines:
190
+ if isinstance(line, Text):
191
+ stats_container.mount(Static(line, classes="stats-line"))
175
192
  else:
176
- error_str = "[red]Error: Parquet handler not available.[/]"
177
- log.error("Parquet handler not found on app.")
178
- except Exception as e:
179
- log.exception(f"ERROR calculating stats for {column_name}")
180
- error_str = f"[red]Error loading stats for {column_name}:\n{type(e).__name__}: {e}[/]"
181
-
182
- lines_to_render = format_stats_for_display(stats_data) if not error_str else [Text.from_markup(error_str)]
183
- await self._update_stats_display(lines_to_render)
184
- self.loading = False
185
- else:
186
- await self._update_stats_display([])
187
- self.loading = False
193
+ stats_container.mount(Static(str(line), classes="stats-line"))
194
+
195
+ except Exception as e:
196
+ self.logger.error(f"Failed to display column stats: {e}")
197
+ self._show_stats_error("Failed to display statistics")
198
+
199
+ def _show_stats_error(self, message: str) -> None:
200
+ """Show error message in the stats area."""
201
+ try:
202
+ stats_container = self.query_one("#schema-stats-content", Container)
203
+ stats_container.query("*").remove()
204
+ stats_container.mount(Static(f"[red]Error: {message}[/red]", classes="error-content"))
205
+ except Exception as e:
206
+ self.logger.error(f"Failed to show stats error: {e}")
207
+
208
+ def watch_loading(self, loading: bool) -> None:
209
+ """React to changes in the loading state."""
210
+ try:
211
+ loading_indicator = self.query_one("#schema-loading-indicator", LoadingIndicator)
212
+ stats_scroll = self.query_one("#schema-stats-scroll", VerticalScroll)
213
+
214
+ if loading:
215
+ loading_indicator.display = True
216
+ stats_scroll.display = False
217
+ else:
218
+ loading_indicator.display = False
219
+ stats_scroll.display = True
220
+
221
+ except Exception as e:
222
+ self.logger.error(f"Error updating loading state: {e}")
223
+
224
+ def refresh_schema(self) -> None:
225
+ """Refresh the schema display."""
226
+ self._current_column = None
227
+ self.clear_content()
228
+ self.load_content()
229
+
230
+ def get_current_column(self) -> Optional[str]:
231
+ """Get the currently selected column name."""
232
+ return self._current_column
233
+
234
+ def get_columns_data(self) -> Optional[List[Dict[str, Any]]]:
235
+ """Get the current columns data."""
236
+ return self._columns_data
@@ -0,0 +1,13 @@
1
+ """
2
+ Utility functions for parqv views.
3
+ """
4
+
5
+ from .data_formatters import format_metadata_for_display, format_value_for_display
6
+ from .stats_formatters import format_stats_for_display, format_column_info
7
+
8
+ __all__ = [
9
+ "format_metadata_for_display",
10
+ "format_value_for_display",
11
+ "format_stats_for_display",
12
+ "format_column_info",
13
+ ]
@@ -0,0 +1,162 @@
1
+ """
2
+ Data formatting utilities for parqv views.
3
+ """
4
+
5
+ from typing import Any, Dict, Union
6
+ from rich.text import Text
7
+
8
+
9
+ def format_metadata_for_display(metadata: Dict[str, Any]) -> Dict[str, Any]:
10
+ """
11
+ Format metadata dictionary for consistent display.
12
+
13
+ Args:
14
+ metadata: Raw metadata dictionary from handler
15
+
16
+ Returns:
17
+ Formatted metadata dictionary ready for display
18
+ """
19
+ if not metadata:
20
+ return {"Error": "No metadata available"}
21
+
22
+ # Check for error in metadata
23
+ if "error" in metadata:
24
+ return {"Error": metadata["error"]}
25
+
26
+ formatted = {}
27
+
28
+ # Format specific known fields with better presentation
29
+ field_formatters = {
30
+ "File Path": lambda x: str(x),
31
+ "Format": lambda x: str(x).upper(),
32
+ "Total Rows": lambda x: _format_number(x),
33
+ "Columns": lambda x: _format_number(x),
34
+ "Size": lambda x: _format_size_if_bytes(x),
35
+ "DuckDB View": lambda x: f"`{x}`" if x else "N/A",
36
+ }
37
+
38
+ for key, value in metadata.items():
39
+ if key in field_formatters:
40
+ formatted[key] = field_formatters[key](value)
41
+ else:
42
+ formatted[key] = format_value_for_display(value)
43
+
44
+ return formatted
45
+
46
+
47
+ def format_value_for_display(value: Any) -> str:
48
+ """
49
+ Format a single value for display in the UI.
50
+
51
+ Args:
52
+ value: The value to format
53
+
54
+ Returns:
55
+ String representation suitable for display
56
+ """
57
+ if value is None:
58
+ return "N/A"
59
+
60
+ if isinstance(value, (int, float)):
61
+ return _format_number(value)
62
+
63
+ if isinstance(value, bool):
64
+ return "Yes" if value else "No"
65
+
66
+ if isinstance(value, str):
67
+ # Handle empty strings
68
+ if not value.strip():
69
+ return "N/A"
70
+ return value
71
+
72
+ # For other types, convert to string
73
+ return str(value)
74
+
75
+
76
+ def _format_number(value: Union[str, int, float]) -> str:
77
+ """
78
+ Format numbers with thousand separators.
79
+
80
+ Args:
81
+ value: Numeric value or string representation
82
+
83
+ Returns:
84
+ Formatted number string
85
+ """
86
+ if isinstance(value, str):
87
+ # Try to extract number from string like "1,234" or "1234"
88
+ try:
89
+ # Remove existing commas and convert
90
+ clean_str = value.replace(",", "").strip()
91
+ if clean_str.isdigit():
92
+ return f"{int(clean_str):,}"
93
+ elif "." in clean_str:
94
+ return f"{float(clean_str):,.2f}"
95
+ else:
96
+ return value # Return as-is if not numeric
97
+ except (ValueError, AttributeError):
98
+ return value
99
+
100
+ if isinstance(value, int):
101
+ return f"{value:,}"
102
+
103
+ if isinstance(value, float):
104
+ return f"{value:,.2f}"
105
+
106
+ return str(value)
107
+
108
+
109
+ def _format_size_if_bytes(value: Union[str, int]) -> str:
110
+ """
111
+ Format size values, detecting if they represent bytes.
112
+
113
+ Args:
114
+ value: Size value that might be in bytes
115
+
116
+ Returns:
117
+ Formatted size string
118
+ """
119
+ if isinstance(value, str):
120
+ # If it already contains size units, return as-is
121
+ if any(unit in value.lower() for unit in ["kb", "mb", "gb", "tb", "bytes"]):
122
+ return value
123
+
124
+ # Try to parse as number and format as bytes
125
+ try:
126
+ clean_str = value.replace(",", "").strip()
127
+ if "bytes" in value.lower():
128
+ num_bytes = int(clean_str.split()[0])
129
+ return _format_bytes(num_bytes)
130
+ else:
131
+ return value
132
+ except (ValueError, IndexError):
133
+ return value
134
+
135
+ if isinstance(value, int):
136
+ # Assume it's bytes if it's a large integer
137
+ if value > 1024:
138
+ return _format_bytes(value)
139
+ else:
140
+ return f"{value:,}"
141
+
142
+ return str(value)
143
+
144
+
145
+ def _format_bytes(num_bytes: int) -> str:
146
+ """
147
+ Format bytes into human-readable format.
148
+
149
+ Args:
150
+ num_bytes: Number of bytes
151
+
152
+ Returns:
153
+ Human-readable size string
154
+ """
155
+ if num_bytes < 1024:
156
+ return f"{num_bytes:,} bytes"
157
+ elif num_bytes < 1024 ** 2:
158
+ return f"{num_bytes / 1024:.1f} KB"
159
+ elif num_bytes < 1024 ** 3:
160
+ return f"{num_bytes / 1024 ** 2:.1f} MB"
161
+ else:
162
+ return f"{num_bytes / 1024 ** 3:.1f} GB"