parqv 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parqv/app.py +87 -50
- parqv/handlers/__init__.py +13 -0
- parqv/handlers/base_handler.py +114 -0
- parqv/handlers/json.py +450 -0
- parqv/handlers/parquet.py +640 -0
- parqv/views/metadata_view.py +11 -4
- parqv/views/schema_view.py +147 -88
- parqv-0.2.0.dist-info/METADATA +104 -0
- parqv-0.2.0.dist-info/RECORD +17 -0
- {parqv-0.1.0.dist-info → parqv-0.2.0.dist-info}/WHEEL +1 -1
- parqv/parquet_handler.py +0 -389
- parqv/views/row_group_view.py +0 -33
- parqv-0.1.0.dist-info/METADATA +0 -91
- parqv-0.1.0.dist-info/RECORD +0 -15
- {parqv-0.1.0.dist-info → parqv-0.2.0.dist-info}/entry_points.txt +0 -0
- {parqv-0.1.0.dist-info → parqv-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {parqv-0.1.0.dist-info → parqv-0.2.0.dist-info}/top_level.txt +0 -0
parqv/app.py
CHANGED
@@ -1,34 +1,40 @@
|
|
1
|
-
import sys
|
2
|
-
from pathlib import Path
|
3
1
|
import logging
|
2
|
+
import sys
|
4
3
|
from logging.handlers import RotatingFileHandler
|
5
|
-
from
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import Optional, Type
|
6
6
|
|
7
7
|
from textual.app import App, ComposeResult, Binding
|
8
8
|
from textual.containers import Container
|
9
9
|
from textual.widgets import Header, Footer, Static, Label, TabbedContent, TabPane
|
10
10
|
|
11
|
-
from .
|
11
|
+
from .handlers import (
|
12
|
+
DataHandler,
|
13
|
+
DataHandlerError,
|
14
|
+
ParquetHandler,
|
15
|
+
JsonHandler,
|
16
|
+
)
|
17
|
+
from .views.data_view import DataView
|
12
18
|
from .views.metadata_view import MetadataView
|
13
19
|
from .views.schema_view import SchemaView
|
14
|
-
from .views.data_view import DataView
|
15
|
-
from .views.row_group_view import RowGroupView
|
16
20
|
|
17
21
|
LOG_FILENAME = "parqv.log"
|
18
22
|
file_handler = RotatingFileHandler(
|
19
23
|
LOG_FILENAME, maxBytes=1024 * 1024 * 5, backupCount=3, encoding="utf-8"
|
20
24
|
)
|
21
25
|
logging.basicConfig(
|
22
|
-
level=logging.
|
26
|
+
level=logging.INFO,
|
23
27
|
format="%(asctime)s [%(levelname)-5.5s] %(name)s (%(filename)s:%(lineno)d) - %(message)s",
|
24
|
-
handlers=[file_handler],
|
28
|
+
handlers=[file_handler, logging.StreamHandler(sys.stdout)],
|
25
29
|
)
|
26
|
-
|
27
30
|
log = logging.getLogger(__name__)
|
28
31
|
|
32
|
+
AnyHandler = DataHandler
|
33
|
+
AnyHandlerError = DataHandlerError
|
34
|
+
|
29
35
|
|
30
36
|
class ParqV(App[None]):
|
31
|
-
"""A Textual app to visualize Parquet files."""
|
37
|
+
"""A Textual app to visualize Parquet or JSON files."""
|
32
38
|
|
33
39
|
CSS_PATH = "parqv.css"
|
34
40
|
BINDINGS = [
|
@@ -37,60 +43,89 @@ class ParqV(App[None]):
|
|
37
43
|
|
38
44
|
# App State
|
39
45
|
file_path: Optional[Path] = None
|
40
|
-
handler: Optional[
|
46
|
+
handler: Optional[AnyHandler] = None # Use ABC type hint
|
47
|
+
handler_type: Optional[str] = None # Keep for display ('parquet', 'json')
|
41
48
|
error_message: Optional[str] = None
|
42
49
|
|
43
50
|
def __init__(self, file_path_str: Optional[str] = None, *args, **kwargs):
|
44
51
|
super().__init__(*args, **kwargs)
|
45
|
-
|
46
|
-
|
47
|
-
self.
|
48
|
-
|
52
|
+
if not file_path_str:
|
53
|
+
self.error_message = "No file path provided."
|
54
|
+
log.error(self.error_message)
|
55
|
+
return
|
56
|
+
|
57
|
+
self.file_path = Path(file_path_str)
|
58
|
+
log.debug(f"Input file path: {self.file_path}")
|
59
|
+
|
60
|
+
if not self.file_path.is_file():
|
61
|
+
self.error_message = f"File not found or is not a regular file: {self.file_path}"
|
62
|
+
log.error(self.error_message)
|
63
|
+
return
|
64
|
+
|
65
|
+
# Handler Detection
|
66
|
+
handler_class: Optional[Type[AnyHandler]] = None
|
67
|
+
handler_error_class: Type[AnyHandlerError] = DataHandlerError
|
68
|
+
detected_type = "unknown"
|
69
|
+
file_suffix = self.file_path.suffix.lower()
|
70
|
+
|
71
|
+
if file_suffix == ".parquet":
|
72
|
+
log.info("Detected '.parquet' extension, using ParquetHandler.")
|
73
|
+
handler_class = ParquetHandler
|
74
|
+
detected_type = "parquet"
|
75
|
+
elif file_suffix in [".json", ".ndjson"]:
|
76
|
+
log.info(f"Detected '{file_suffix}' extension, using JsonHandler.")
|
77
|
+
handler_class = JsonHandler
|
78
|
+
detected_type = "json"
|
79
|
+
else:
|
80
|
+
self.error_message = f"Unsupported file extension: '{file_suffix}'. Only .parquet, .json, .ndjson are supported."
|
81
|
+
log.error(self.error_message)
|
82
|
+
return
|
83
|
+
|
84
|
+
# Instantiate Handler
|
85
|
+
if handler_class:
|
86
|
+
log.info(f"Attempting to initialize {detected_type.capitalize()} handler for: {self.file_path}")
|
49
87
|
try:
|
50
|
-
|
51
|
-
self.
|
52
|
-
log.info("
|
53
|
-
except
|
54
|
-
self.error_message =
|
55
|
-
log.error(
|
88
|
+
self.handler = handler_class(self.file_path)
|
89
|
+
self.handler_type = detected_type
|
90
|
+
log.info(f"{detected_type.capitalize()} handler initialized successfully.")
|
91
|
+
except DataHandlerError as e:
|
92
|
+
self.error_message = f"Failed to initialize {detected_type} handler: {e}"
|
93
|
+
log.error(self.error_message, exc_info=True)
|
56
94
|
except Exception as e:
|
57
|
-
self.error_message =
|
58
|
-
|
59
|
-
)
|
60
|
-
log.exception("Unexpected error during app initialization:")
|
95
|
+
self.error_message = f"An unexpected error occurred during {detected_type} handler initialization: {e}"
|
96
|
+
log.exception(f"Unexpected error during {detected_type} handler initialization:")
|
61
97
|
|
62
98
|
def compose(self) -> ComposeResult:
|
63
99
|
yield Header()
|
64
|
-
|
65
100
|
if self.error_message:
|
66
101
|
log.error(f"Displaying error message: {self.error_message}")
|
67
102
|
yield Container(
|
68
103
|
Label("Error Loading File:", classes="error-title"),
|
69
104
|
Static(self.error_message, classes="error-content"),
|
105
|
+
id="error-container"
|
70
106
|
)
|
71
107
|
elif self.handler:
|
72
|
-
log.debug("Composing main layout with TabbedContent.")
|
108
|
+
log.debug(f"Composing main layout with TabbedContent for {self.handler_type} handler.")
|
73
109
|
with TabbedContent(id="main-tabs"):
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
yield SchemaView(id="schema-view")
|
78
|
-
with TabPane("Data Preview", id="tab-data"):
|
79
|
-
yield DataView(id="data-view")
|
80
|
-
with TabPane("Row Groups", id="tab-rowgroups"):
|
81
|
-
yield RowGroupView(id="rowgroup-view")
|
110
|
+
yield TabPane("Metadata", MetadataView(id="metadata-view"), id="tab-metadata")
|
111
|
+
yield TabPane("Schema", SchemaView(id="schema-view"), id="tab-schema")
|
112
|
+
yield TabPane("Data Preview", DataView(id="data-view"), id="tab-data")
|
82
113
|
else:
|
83
|
-
log.
|
84
|
-
yield Container(Label("
|
85
|
-
|
114
|
+
log.error("Compose called but no handler and no error message. Initialization likely failed silently.")
|
115
|
+
yield Container(Label("Initialization failed."), id="init-failed")
|
86
116
|
yield Footer()
|
87
117
|
|
88
118
|
def on_mount(self) -> None:
|
89
119
|
log.debug("App mounted.")
|
90
120
|
try:
|
91
121
|
header = self.query_one(Header)
|
122
|
+
display_name = "N/A"
|
123
|
+
format_name = "Unknown"
|
92
124
|
if self.handler and self.file_path:
|
93
|
-
|
125
|
+
display_name = self.file_path.name
|
126
|
+
format_name = self.handler_type.capitalize() if self.handler_type else "Unknown"
|
127
|
+
header.title = f"parqv - {display_name}"
|
128
|
+
header.sub_title = f"Format: {format_name}"
|
94
129
|
elif self.error_message:
|
95
130
|
header.title = "parqv - Error"
|
96
131
|
else:
|
@@ -98,34 +133,36 @@ class ParqV(App[None]):
|
|
98
133
|
except Exception as e:
|
99
134
|
log.error(f"Failed to set header title: {e}")
|
100
135
|
|
101
|
-
|
102
136
|
def action_quit(self) -> None:
|
103
137
|
log.info("Quit action triggered.")
|
138
|
+
if self.handler:
|
139
|
+
try:
|
140
|
+
self.handler.close()
|
141
|
+
except Exception as e:
|
142
|
+
log.error(f"Error during handler cleanup: {e}")
|
104
143
|
self.exit()
|
105
144
|
|
106
145
|
|
107
146
|
# CLI Entry Point
|
108
147
|
def run_app():
|
109
|
-
log.info("--- parqv started ---")
|
148
|
+
log.info("--- parqv (ABC Handler) started ---")
|
110
149
|
if len(sys.argv) < 2:
|
111
|
-
print("Usage: parqv <
|
150
|
+
print("Usage: parqv <path_to_parquet_or_json_file>")
|
112
151
|
log.error("No file path provided.")
|
113
152
|
sys.exit(1)
|
114
153
|
|
115
154
|
file_path_str = sys.argv[1]
|
116
|
-
|
117
|
-
log.debug(f"File path from argument: {file_path}")
|
155
|
+
log.debug(f"File path from argument: {file_path_str}")
|
118
156
|
|
119
|
-
|
120
|
-
if not
|
121
|
-
print(f"Error:
|
122
|
-
log.error(f"
|
157
|
+
_path = Path(file_path_str)
|
158
|
+
if not _path.suffix.lower() in ['.parquet', '.json', '.ndjson']:
|
159
|
+
print(f"Error: Unsupported file type '{_path.suffix}'. Please provide a .parquet, .json, or .ndjson file.")
|
160
|
+
log.error(f"Unsupported file type provided via CLI: {_path.suffix}")
|
123
161
|
sys.exit(1)
|
124
162
|
|
125
163
|
app = ParqV(file_path_str=file_path_str)
|
126
164
|
app.run()
|
127
|
-
log.info("--- parqv finished ---")
|
128
165
|
|
129
166
|
|
130
167
|
if __name__ == "__main__":
|
131
|
-
run_app()
|
168
|
+
run_app()
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# src/parqv/handlers/__init__.py
|
2
|
+
from .base_handler import DataHandler, DataHandlerError
|
3
|
+
from .parquet import ParquetHandler, ParquetHandlerError
|
4
|
+
from .json import JsonHandler, JsonHandlerError
|
5
|
+
|
6
|
+
__all__ = [
|
7
|
+
"DataHandler",
|
8
|
+
"DataHandlerError",
|
9
|
+
"ParquetHandler",
|
10
|
+
"ParquetHandlerError",
|
11
|
+
"JsonHandler",
|
12
|
+
"JsonHandlerError",
|
13
|
+
]
|
@@ -0,0 +1,114 @@
|
|
1
|
+
import logging
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Any, Dict, List, Optional
|
5
|
+
|
6
|
+
import pandas as pd
|
7
|
+
|
8
|
+
log = logging.getLogger(__name__)
|
9
|
+
|
10
|
+
|
11
|
+
class DataHandlerError(Exception):
|
12
|
+
"""Base exception for all data handler errors."""
|
13
|
+
pass
|
14
|
+
|
15
|
+
|
16
|
+
class DataHandler(ABC):
|
17
|
+
"""
|
18
|
+
Abstract Base Class for data handlers.
|
19
|
+
Defines the common interface required by the ParqV application
|
20
|
+
to interact with different data file formats.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(self, file_path: Path):
|
24
|
+
"""
|
25
|
+
Initializes the handler with the file path.
|
26
|
+
Subclasses should open the file or set up necessary resources here.
|
27
|
+
|
28
|
+
Args:
|
29
|
+
file_path: Path to the data file.
|
30
|
+
|
31
|
+
Raises:
|
32
|
+
DataHandlerError: If initialization fails (e.g., file not found, format error).
|
33
|
+
"""
|
34
|
+
self.file_path = file_path
|
35
|
+
|
36
|
+
@abstractmethod
|
37
|
+
def close(self) -> None:
|
38
|
+
"""
|
39
|
+
Closes any open resources (files, connections, etc.).
|
40
|
+
Must be implemented by subclasses.
|
41
|
+
"""
|
42
|
+
pass
|
43
|
+
|
44
|
+
@abstractmethod
|
45
|
+
def get_metadata_summary(self) -> Dict[str, Any]:
|
46
|
+
"""
|
47
|
+
Returns a dictionary containing summary metadata about the data source.
|
48
|
+
Keys should be human-readable strings. Values can be of various types.
|
49
|
+
Should include an 'error' key if metadata retrieval fails.
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
A dictionary with metadata summary or an error dictionary.
|
53
|
+
"""
|
54
|
+
pass
|
55
|
+
|
56
|
+
@abstractmethod
|
57
|
+
def get_schema_data(self) -> Optional[List[Dict[str, str]]]:
|
58
|
+
"""
|
59
|
+
Returns the schema as a list of dictionaries.
|
60
|
+
Each dictionary should represent a column and ideally contain keys:
|
61
|
+
'name' (str): Column name.
|
62
|
+
'type' (str): Formatted data type string.
|
63
|
+
'nullable' (Any): Indicator of nullability (e.g., bool, str "YES"/"NO").
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
A list of schema dictionaries, an empty list if no columns,
|
67
|
+
or None if schema retrieval failed.
|
68
|
+
"""
|
69
|
+
pass
|
70
|
+
|
71
|
+
@abstractmethod
|
72
|
+
def get_data_preview(self, num_rows: int = 50) -> Optional[pd.DataFrame]:
|
73
|
+
"""
|
74
|
+
Fetches a preview of the data.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
num_rows: The maximum number of rows to fetch.
|
78
|
+
|
79
|
+
Returns:
|
80
|
+
A pandas DataFrame with preview data, an empty DataFrame if no data,
|
81
|
+
a DataFrame with an 'error' column on failure, or None on critical failure.
|
82
|
+
"""
|
83
|
+
pass
|
84
|
+
|
85
|
+
@abstractmethod
|
86
|
+
def get_column_stats(self, column_name: str) -> Dict[str, Any]:
|
87
|
+
"""
|
88
|
+
Calculates and returns statistics for a specific column.
|
89
|
+
The returned dictionary should ideally contain keys like:
|
90
|
+
'column' (str): Column name.
|
91
|
+
'type' (str): Formatted data type string.
|
92
|
+
'nullable' (Any): Nullability indicator.
|
93
|
+
'calculated' (Dict[str, Any]): Dictionary of computed statistics.
|
94
|
+
'error' (Optional[str]): Error message if calculation failed.
|
95
|
+
'message' (Optional[str]): Informational message.
|
96
|
+
|
97
|
+
Args:
|
98
|
+
column_name: The name of the column.
|
99
|
+
|
100
|
+
Returns:
|
101
|
+
A dictionary containing column statistics or error information.
|
102
|
+
"""
|
103
|
+
pass
|
104
|
+
|
105
|
+
def _format_size(self, num_bytes: int) -> str:
|
106
|
+
"""Formats bytes into a human-readable string."""
|
107
|
+
if num_bytes < 1024:
|
108
|
+
return f"{num_bytes} bytes"
|
109
|
+
elif num_bytes < 1024 ** 2:
|
110
|
+
return f"{num_bytes / 1024:.1f} KB"
|
111
|
+
elif num_bytes < 1024 ** 3:
|
112
|
+
return f"{num_bytes / 1024 ** 2:.1f} MB"
|
113
|
+
else:
|
114
|
+
return f"{num_bytes / 1024 ** 3:.1f} GB"
|