parqv 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
parqv/app.py CHANGED
@@ -1,34 +1,40 @@
1
- import sys
2
- from pathlib import Path
3
1
  import logging
2
+ import sys
4
3
  from logging.handlers import RotatingFileHandler
5
- from typing import Optional
4
+ from pathlib import Path
5
+ from typing import Optional, Type
6
6
 
7
7
  from textual.app import App, ComposeResult, Binding
8
8
  from textual.containers import Container
9
9
  from textual.widgets import Header, Footer, Static, Label, TabbedContent, TabPane
10
10
 
11
- from .parquet_handler import ParquetHandler, ParquetHandlerError
11
+ from .handlers import (
12
+ DataHandler,
13
+ DataHandlerError,
14
+ ParquetHandler,
15
+ JsonHandler,
16
+ )
17
+ from .views.data_view import DataView
12
18
  from .views.metadata_view import MetadataView
13
19
  from .views.schema_view import SchemaView
14
- from .views.data_view import DataView
15
- from .views.row_group_view import RowGroupView
16
20
 
17
21
  LOG_FILENAME = "parqv.log"
18
22
  file_handler = RotatingFileHandler(
19
23
  LOG_FILENAME, maxBytes=1024 * 1024 * 5, backupCount=3, encoding="utf-8"
20
24
  )
21
25
  logging.basicConfig(
22
- level=logging.DEBUG,
26
+ level=logging.INFO,
23
27
  format="%(asctime)s [%(levelname)-5.5s] %(name)s (%(filename)s:%(lineno)d) - %(message)s",
24
- handlers=[file_handler], # Log to file
28
+ handlers=[file_handler, logging.StreamHandler(sys.stdout)],
25
29
  )
26
-
27
30
  log = logging.getLogger(__name__)
28
31
 
32
+ AnyHandler = DataHandler
33
+ AnyHandlerError = DataHandlerError
34
+
29
35
 
30
36
  class ParqV(App[None]):
31
- """A Textual app to visualize Parquet files."""
37
+ """A Textual app to visualize Parquet or JSON files."""
32
38
 
33
39
  CSS_PATH = "parqv.css"
34
40
  BINDINGS = [
@@ -37,60 +43,89 @@ class ParqV(App[None]):
37
43
 
38
44
  # App State
39
45
  file_path: Optional[Path] = None
40
- handler: Optional[ParquetHandler] = None
46
+ handler: Optional[AnyHandler] = None # Use ABC type hint
47
+ handler_type: Optional[str] = None # Keep for display ('parquet', 'json')
41
48
  error_message: Optional[str] = None
42
49
 
43
50
  def __init__(self, file_path_str: Optional[str] = None, *args, **kwargs):
44
51
  super().__init__(*args, **kwargs)
45
- log.info("Initializing ParqVApp...")
46
- if file_path_str:
47
- self.file_path = Path(file_path_str)
48
- log.info(f"Attempting to load file: {self.file_path}")
52
+ if not file_path_str:
53
+ self.error_message = "No file path provided."
54
+ log.error(self.error_message)
55
+ return
56
+
57
+ self.file_path = Path(file_path_str)
58
+ log.debug(f"Input file path: {self.file_path}")
59
+
60
+ if not self.file_path.is_file():
61
+ self.error_message = f"File not found or is not a regular file: {self.file_path}"
62
+ log.error(self.error_message)
63
+ return
64
+
65
+ # Handler Detection
66
+ handler_class: Optional[Type[AnyHandler]] = None
67
+ handler_error_class: Type[AnyHandlerError] = DataHandlerError
68
+ detected_type = "unknown"
69
+ file_suffix = self.file_path.suffix.lower()
70
+
71
+ if file_suffix == ".parquet":
72
+ log.info("Detected '.parquet' extension, using ParquetHandler.")
73
+ handler_class = ParquetHandler
74
+ detected_type = "parquet"
75
+ elif file_suffix in [".json", ".ndjson"]:
76
+ log.info(f"Detected '{file_suffix}' extension, using JsonHandler.")
77
+ handler_class = JsonHandler
78
+ detected_type = "json"
79
+ else:
80
+ self.error_message = f"Unsupported file extension: '{file_suffix}'. Only .parquet, .json, .ndjson are supported."
81
+ log.error(self.error_message)
82
+ return
83
+
84
+ # Instantiate Handler
85
+ if handler_class:
86
+ log.info(f"Attempting to initialize {detected_type.capitalize()} handler for: {self.file_path}")
49
87
  try:
50
- # Initialize the Parquet handler on app start
51
- self.handler = ParquetHandler(self.file_path)
52
- log.info("Parquet handler initialized successfully.")
53
- except ParquetHandlerError as e:
54
- self.error_message = str(e)
55
- log.error(f"Failed to initialize handler: {e}", exc_info=True)
88
+ self.handler = handler_class(self.file_path)
89
+ self.handler_type = detected_type
90
+ log.info(f"{detected_type.capitalize()} handler initialized successfully.")
91
+ except DataHandlerError as e:
92
+ self.error_message = f"Failed to initialize {detected_type} handler: {e}"
93
+ log.error(self.error_message, exc_info=True)
56
94
  except Exception as e:
57
- self.error_message = (
58
- f"An unexpected error occurred during initialization: {e}"
59
- )
60
- log.exception("Unexpected error during app initialization:")
95
+ self.error_message = f"An unexpected error occurred during {detected_type} handler initialization: {e}"
96
+ log.exception(f"Unexpected error during {detected_type} handler initialization:")
61
97
 
62
98
  def compose(self) -> ComposeResult:
63
99
  yield Header()
64
-
65
100
  if self.error_message:
66
101
  log.error(f"Displaying error message: {self.error_message}")
67
102
  yield Container(
68
103
  Label("Error Loading File:", classes="error-title"),
69
104
  Static(self.error_message, classes="error-content"),
105
+ id="error-container"
70
106
  )
71
107
  elif self.handler:
72
- log.debug("Composing main layout with TabbedContent.")
108
+ log.debug(f"Composing main layout with TabbedContent for {self.handler_type} handler.")
73
109
  with TabbedContent(id="main-tabs"):
74
- with TabPane("Metadata", id="tab-metadata"):
75
- yield MetadataView(id="metadata-view")
76
- with TabPane("Schema", id="tab-schema"):
77
- yield SchemaView(id="schema-view")
78
- with TabPane("Data Preview", id="tab-data"):
79
- yield DataView(id="data-view")
80
- with TabPane("Row Groups", id="tab-rowgroups"):
81
- yield RowGroupView(id="rowgroup-view")
110
+ yield TabPane("Metadata", MetadataView(id="metadata-view"), id="tab-metadata")
111
+ yield TabPane("Schema", SchemaView(id="schema-view"), id="tab-schema")
112
+ yield TabPane("Data Preview", DataView(id="data-view"), id="tab-data")
82
113
  else:
83
- log.warning("No handler available, showing 'no file' message.")
84
- yield Container(Label("No file loaded or handler initialization failed."))
85
-
114
+ log.error("Compose called but no handler and no error message. Initialization likely failed silently.")
115
+ yield Container(Label("Initialization failed."), id="init-failed")
86
116
  yield Footer()
87
117
 
88
118
  def on_mount(self) -> None:
89
119
  log.debug("App mounted.")
90
120
  try:
91
121
  header = self.query_one(Header)
122
+ display_name = "N/A"
123
+ format_name = "Unknown"
92
124
  if self.handler and self.file_path:
93
- header.title = f"parqv - {self.file_path.name}"
125
+ display_name = self.file_path.name
126
+ format_name = self.handler_type.capitalize() if self.handler_type else "Unknown"
127
+ header.title = f"parqv - {display_name}"
128
+ header.sub_title = f"Format: {format_name}"
94
129
  elif self.error_message:
95
130
  header.title = "parqv - Error"
96
131
  else:
@@ -98,34 +133,36 @@ class ParqV(App[None]):
98
133
  except Exception as e:
99
134
  log.error(f"Failed to set header title: {e}")
100
135
 
101
-
102
136
  def action_quit(self) -> None:
103
137
  log.info("Quit action triggered.")
138
+ if self.handler:
139
+ try:
140
+ self.handler.close()
141
+ except Exception as e:
142
+ log.error(f"Error during handler cleanup: {e}")
104
143
  self.exit()
105
144
 
106
145
 
107
146
  # CLI Entry Point
108
147
  def run_app():
109
- log.info("--- parqv started ---")
148
+ log.info("--- parqv (ABC Handler) started ---")
110
149
  if len(sys.argv) < 2:
111
- print("Usage: parqv <path_to_parquet_file>")
150
+ print("Usage: parqv <path_to_parquet_or_json_file>")
112
151
  log.error("No file path provided.")
113
152
  sys.exit(1)
114
153
 
115
154
  file_path_str = sys.argv[1]
116
- file_path = Path(file_path_str)
117
- log.debug(f"File path from argument: {file_path}")
155
+ log.debug(f"File path from argument: {file_path_str}")
118
156
 
119
- # Basic file validation
120
- if not file_path.is_file():
121
- print(f"Error: Path is not a file or does not exist: {file_path}")
122
- log.error(f"Invalid file path provided: {file_path}")
157
+ _path = Path(file_path_str)
158
+ if not _path.suffix.lower() in ['.parquet', '.json', '.ndjson']:
159
+ print(f"Error: Unsupported file type '{_path.suffix}'. Please provide a .parquet, .json, or .ndjson file.")
160
+ log.error(f"Unsupported file type provided via CLI: {_path.suffix}")
123
161
  sys.exit(1)
124
162
 
125
163
  app = ParqV(file_path_str=file_path_str)
126
164
  app.run()
127
- log.info("--- parqv finished ---")
128
165
 
129
166
 
130
167
  if __name__ == "__main__":
131
- run_app()
168
+ run_app()
@@ -0,0 +1,13 @@
1
+ # src/parqv/handlers/__init__.py
2
+ from .base_handler import DataHandler, DataHandlerError
3
+ from .parquet import ParquetHandler, ParquetHandlerError
4
+ from .json import JsonHandler, JsonHandlerError
5
+
6
+ __all__ = [
7
+ "DataHandler",
8
+ "DataHandlerError",
9
+ "ParquetHandler",
10
+ "ParquetHandlerError",
11
+ "JsonHandler",
12
+ "JsonHandlerError",
13
+ ]
@@ -0,0 +1,114 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+ from pathlib import Path
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ import pandas as pd
7
+
8
+ log = logging.getLogger(__name__)
9
+
10
+
11
+ class DataHandlerError(Exception):
12
+ """Base exception for all data handler errors."""
13
+ pass
14
+
15
+
16
+ class DataHandler(ABC):
17
+ """
18
+ Abstract Base Class for data handlers.
19
+ Defines the common interface required by the ParqV application
20
+ to interact with different data file formats.
21
+ """
22
+
23
+ def __init__(self, file_path: Path):
24
+ """
25
+ Initializes the handler with the file path.
26
+ Subclasses should open the file or set up necessary resources here.
27
+
28
+ Args:
29
+ file_path: Path to the data file.
30
+
31
+ Raises:
32
+ DataHandlerError: If initialization fails (e.g., file not found, format error).
33
+ """
34
+ self.file_path = file_path
35
+
36
+ @abstractmethod
37
+ def close(self) -> None:
38
+ """
39
+ Closes any open resources (files, connections, etc.).
40
+ Must be implemented by subclasses.
41
+ """
42
+ pass
43
+
44
+ @abstractmethod
45
+ def get_metadata_summary(self) -> Dict[str, Any]:
46
+ """
47
+ Returns a dictionary containing summary metadata about the data source.
48
+ Keys should be human-readable strings. Values can be of various types.
49
+ Should include an 'error' key if metadata retrieval fails.
50
+
51
+ Returns:
52
+ A dictionary with metadata summary or an error dictionary.
53
+ """
54
+ pass
55
+
56
+ @abstractmethod
57
+ def get_schema_data(self) -> Optional[List[Dict[str, str]]]:
58
+ """
59
+ Returns the schema as a list of dictionaries.
60
+ Each dictionary should represent a column and ideally contain keys:
61
+ 'name' (str): Column name.
62
+ 'type' (str): Formatted data type string.
63
+ 'nullable' (Any): Indicator of nullability (e.g., bool, str "YES"/"NO").
64
+
65
+ Returns:
66
+ A list of schema dictionaries, an empty list if no columns,
67
+ or None if schema retrieval failed.
68
+ """
69
+ pass
70
+
71
+ @abstractmethod
72
+ def get_data_preview(self, num_rows: int = 50) -> Optional[pd.DataFrame]:
73
+ """
74
+ Fetches a preview of the data.
75
+
76
+ Args:
77
+ num_rows: The maximum number of rows to fetch.
78
+
79
+ Returns:
80
+ A pandas DataFrame with preview data, an empty DataFrame if no data,
81
+ a DataFrame with an 'error' column on failure, or None on critical failure.
82
+ """
83
+ pass
84
+
85
+ @abstractmethod
86
+ def get_column_stats(self, column_name: str) -> Dict[str, Any]:
87
+ """
88
+ Calculates and returns statistics for a specific column.
89
+ The returned dictionary should ideally contain keys like:
90
+ 'column' (str): Column name.
91
+ 'type' (str): Formatted data type string.
92
+ 'nullable' (Any): Nullability indicator.
93
+ 'calculated' (Dict[str, Any]): Dictionary of computed statistics.
94
+ 'error' (Optional[str]): Error message if calculation failed.
95
+ 'message' (Optional[str]): Informational message.
96
+
97
+ Args:
98
+ column_name: The name of the column.
99
+
100
+ Returns:
101
+ A dictionary containing column statistics or error information.
102
+ """
103
+ pass
104
+
105
+ def _format_size(self, num_bytes: int) -> str:
106
+ """Formats bytes into a human-readable string."""
107
+ if num_bytes < 1024:
108
+ return f"{num_bytes} bytes"
109
+ elif num_bytes < 1024 ** 2:
110
+ return f"{num_bytes / 1024:.1f} KB"
111
+ elif num_bytes < 1024 ** 3:
112
+ return f"{num_bytes / 1024 ** 2:.1f} MB"
113
+ else:
114
+ return f"{num_bytes / 1024 ** 3:.1f} GB"