parqv 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parqv/__init__.py +31 -0
- parqv/app.py +84 -102
- parqv/cli.py +112 -0
- parqv/core/__init__.py +31 -0
- parqv/core/config.py +25 -0
- parqv/core/file_utils.py +88 -0
- parqv/core/handler_factory.py +89 -0
- parqv/core/logging.py +46 -0
- parqv/data_sources/__init__.py +44 -0
- parqv/data_sources/base/__init__.py +28 -0
- parqv/data_sources/base/exceptions.py +38 -0
- parqv/{handlers/base_handler.py → data_sources/base/handler.py} +54 -25
- parqv/{handlers → data_sources/formats}/__init__.py +8 -5
- parqv/{handlers → data_sources/formats}/json.py +31 -32
- parqv/{handlers → data_sources/formats}/parquet.py +40 -56
- parqv/views/__init__.py +38 -0
- parqv/views/base.py +98 -0
- parqv/views/components/__init__.py +13 -0
- parqv/views/components/enhanced_data_table.py +152 -0
- parqv/views/components/error_display.py +72 -0
- parqv/views/components/loading_display.py +44 -0
- parqv/views/data_view.py +119 -46
- parqv/views/metadata_view.py +57 -20
- parqv/views/schema_view.py +190 -200
- parqv/views/utils/__init__.py +13 -0
- parqv/views/utils/data_formatters.py +162 -0
- parqv/views/utils/stats_formatters.py +160 -0
- {parqv-0.2.0.dist-info → parqv-0.2.1.dist-info}/METADATA +2 -2
- parqv-0.2.1.dist-info/RECORD +34 -0
- {parqv-0.2.0.dist-info → parqv-0.2.1.dist-info}/WHEEL +1 -1
- parqv-0.2.0.dist-info/RECORD +0 -17
- {parqv-0.2.0.dist-info → parqv-0.2.1.dist-info}/entry_points.txt +0 -0
- {parqv-0.2.0.dist-info → parqv-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {parqv-0.2.0.dist-info → parqv-0.2.1.dist-info}/top_level.txt +0 -0
parqv/__init__.py
CHANGED
@@ -0,0 +1,31 @@
|
|
1
|
+
"""
|
2
|
+
parqv - A Textual application for visualizing Parquet and JSON files.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from .app import ParqV
|
6
|
+
from .cli import run_app
|
7
|
+
from .core import (
|
8
|
+
SUPPORTED_EXTENSIONS,
|
9
|
+
DEFAULT_PREVIEW_ROWS,
|
10
|
+
FileValidationError,
|
11
|
+
validate_and_detect_file,
|
12
|
+
HandlerFactory,
|
13
|
+
HandlerCreationError,
|
14
|
+
setup_logging,
|
15
|
+
get_logger
|
16
|
+
)
|
17
|
+
|
18
|
+
__version__ = "1.0.0"
|
19
|
+
|
20
|
+
__all__ = [
|
21
|
+
"ParqV",
|
22
|
+
"run_app",
|
23
|
+
"SUPPORTED_EXTENSIONS",
|
24
|
+
"DEFAULT_PREVIEW_ROWS",
|
25
|
+
"FileValidationError",
|
26
|
+
"validate_and_detect_file",
|
27
|
+
"HandlerFactory",
|
28
|
+
"HandlerCreationError",
|
29
|
+
"setup_logging",
|
30
|
+
"get_logger",
|
31
|
+
]
|
parqv/app.py
CHANGED
@@ -1,104 +1,77 @@
|
|
1
|
-
import logging
|
2
|
-
import sys
|
3
|
-
from logging.handlers import RotatingFileHandler
|
4
1
|
from pathlib import Path
|
5
|
-
from typing import Optional
|
2
|
+
from typing import Optional
|
6
3
|
|
7
4
|
from textual.app import App, ComposeResult, Binding
|
8
5
|
from textual.containers import Container
|
9
6
|
from textual.widgets import Header, Footer, Static, Label, TabbedContent, TabPane
|
10
7
|
|
11
|
-
from .
|
12
|
-
|
13
|
-
DataHandlerError,
|
14
|
-
ParquetHandler,
|
15
|
-
JsonHandler,
|
16
|
-
)
|
8
|
+
from .core import CSS_PATH, FileValidationError, validate_and_detect_file, HandlerFactory, HandlerCreationError, get_logger
|
9
|
+
from .data_sources import DataHandler
|
17
10
|
from .views.data_view import DataView
|
18
11
|
from .views.metadata_view import MetadataView
|
19
12
|
from .views.schema_view import SchemaView
|
20
13
|
|
21
|
-
|
22
|
-
file_handler = RotatingFileHandler(
|
23
|
-
LOG_FILENAME, maxBytes=1024 * 1024 * 5, backupCount=3, encoding="utf-8"
|
24
|
-
)
|
25
|
-
logging.basicConfig(
|
26
|
-
level=logging.INFO,
|
27
|
-
format="%(asctime)s [%(levelname)-5.5s] %(name)s (%(filename)s:%(lineno)d) - %(message)s",
|
28
|
-
handlers=[file_handler, logging.StreamHandler(sys.stdout)],
|
29
|
-
)
|
30
|
-
log = logging.getLogger(__name__)
|
31
|
-
|
32
|
-
AnyHandler = DataHandler
|
33
|
-
AnyHandlerError = DataHandlerError
|
14
|
+
log = get_logger(__name__)
|
34
15
|
|
35
16
|
|
36
17
|
class ParqV(App[None]):
|
37
18
|
"""A Textual app to visualize Parquet or JSON files."""
|
38
19
|
|
39
|
-
CSS_PATH =
|
20
|
+
CSS_PATH = CSS_PATH
|
40
21
|
BINDINGS = [
|
41
22
|
Binding("q", "quit", "Quit", priority=True),
|
42
23
|
]
|
43
24
|
|
44
|
-
# App State
|
45
|
-
file_path: Optional[Path] = None
|
46
|
-
handler: Optional[AnyHandler] = None # Use ABC type hint
|
47
|
-
handler_type: Optional[str] = None # Keep for display ('parquet', 'json')
|
48
|
-
error_message: Optional[str] = None
|
49
|
-
|
50
25
|
def __init__(self, file_path_str: Optional[str] = None, *args, **kwargs):
|
26
|
+
"""
|
27
|
+
Initialize the ParqV application.
|
28
|
+
|
29
|
+
Args:
|
30
|
+
file_path_str: Path to the file to visualize
|
31
|
+
*args, **kwargs: Additional arguments for the Textual App
|
32
|
+
"""
|
51
33
|
super().__init__(*args, **kwargs)
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
self.
|
58
|
-
|
59
|
-
|
60
|
-
if
|
61
|
-
self.
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
self.error_message =
|
81
|
-
log.error(
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
log.info(f"Attempting to initialize {detected_type.capitalize()} handler for: {self.file_path}")
|
87
|
-
try:
|
88
|
-
self.handler = handler_class(self.file_path)
|
89
|
-
self.handler_type = detected_type
|
90
|
-
log.info(f"{detected_type.capitalize()} handler initialized successfully.")
|
91
|
-
except DataHandlerError as e:
|
92
|
-
self.error_message = f"Failed to initialize {detected_type} handler: {e}"
|
93
|
-
log.error(self.error_message, exc_info=True)
|
94
|
-
except Exception as e:
|
95
|
-
self.error_message = f"An unexpected error occurred during {detected_type} handler initialization: {e}"
|
96
|
-
log.exception(f"Unexpected error during {detected_type} handler initialization:")
|
34
|
+
|
35
|
+
# Application state
|
36
|
+
self.file_path: Optional[Path] = None
|
37
|
+
self.handler: Optional[DataHandler] = None
|
38
|
+
self.handler_type: Optional[str] = None
|
39
|
+
self.error_message: Optional[str] = None
|
40
|
+
|
41
|
+
# Initialize with file if provided
|
42
|
+
if file_path_str:
|
43
|
+
self._initialize_file_handler(file_path_str)
|
44
|
+
|
45
|
+
def _initialize_file_handler(self, file_path_str: str) -> None:
|
46
|
+
"""
|
47
|
+
Initialize the file handler for the given file path.
|
48
|
+
|
49
|
+
Args:
|
50
|
+
file_path_str: Path to the file to process
|
51
|
+
"""
|
52
|
+
try:
|
53
|
+
# Validate file and detect type
|
54
|
+
self.file_path, self.handler_type = validate_and_detect_file(file_path_str)
|
55
|
+
|
56
|
+
# Create appropriate handler
|
57
|
+
self.handler = HandlerFactory.create_handler(self.file_path, self.handler_type)
|
58
|
+
|
59
|
+
log.info(f"Successfully initialized {self.handler_type} handler for: {self.file_path.name}")
|
60
|
+
|
61
|
+
except (FileValidationError, HandlerCreationError) as e:
|
62
|
+
self.error_message = str(e)
|
63
|
+
log.error(f"Failed to initialize handler: {e}")
|
64
|
+
|
65
|
+
except Exception as e:
|
66
|
+
self.error_message = f"An unexpected error occurred: {e}"
|
67
|
+
log.exception("Unexpected error during handler initialization")
|
97
68
|
|
98
69
|
def compose(self) -> ComposeResult:
|
70
|
+
"""Compose the UI layout."""
|
99
71
|
yield Header()
|
72
|
+
|
100
73
|
if self.error_message:
|
101
|
-
log.
|
74
|
+
log.debug(f"Displaying error message: {self.error_message}")
|
102
75
|
yield Container(
|
103
76
|
Label("Error Loading File:", classes="error-title"),
|
104
77
|
Static(self.error_message, classes="error-content"),
|
@@ -111,57 +84,66 @@ class ParqV(App[None]):
|
|
111
84
|
yield TabPane("Schema", SchemaView(id="schema-view"), id="tab-schema")
|
112
85
|
yield TabPane("Data Preview", DataView(id="data-view"), id="tab-data")
|
113
86
|
else:
|
114
|
-
log.
|
115
|
-
yield Container(
|
87
|
+
log.warning("No handler available and no error message set")
|
88
|
+
yield Container(
|
89
|
+
Label("No file loaded.", classes="error-title"),
|
90
|
+
Static("Please provide a valid file path.", classes="error-content"),
|
91
|
+
id="no-file-container"
|
92
|
+
)
|
93
|
+
|
116
94
|
yield Footer()
|
117
95
|
|
118
96
|
def on_mount(self) -> None:
|
97
|
+
"""Handle app mount event - set up header information."""
|
119
98
|
log.debug("App mounted.")
|
99
|
+
self._update_header()
|
100
|
+
|
101
|
+
def _update_header(self) -> None:
|
102
|
+
"""Update the header with file and format information."""
|
120
103
|
try:
|
121
104
|
header = self.query_one(Header)
|
122
|
-
|
123
|
-
|
124
|
-
if self.handler and self.file_path:
|
105
|
+
|
106
|
+
if self.handler and self.file_path and self.handler_type:
|
125
107
|
display_name = self.file_path.name
|
126
|
-
format_name = self.handler_type.capitalize()
|
108
|
+
format_name = self.handler_type.capitalize()
|
127
109
|
header.title = f"parqv - {display_name}"
|
128
110
|
header.sub_title = f"Format: {format_name}"
|
129
111
|
elif self.error_message:
|
130
112
|
header.title = "parqv - Error"
|
113
|
+
header.sub_title = "Failed to load file"
|
131
114
|
else:
|
132
115
|
header.title = "parqv"
|
116
|
+
header.sub_title = "File Viewer"
|
117
|
+
|
133
118
|
except Exception as e:
|
134
|
-
log.error(f"Failed to
|
119
|
+
log.error(f"Failed to update header: {e}")
|
135
120
|
|
136
121
|
def action_quit(self) -> None:
|
122
|
+
"""Handle quit action - cleanup and exit."""
|
137
123
|
log.info("Quit action triggered.")
|
124
|
+
self._cleanup()
|
125
|
+
self.exit()
|
126
|
+
|
127
|
+
def _cleanup(self) -> None:
|
128
|
+
"""Clean up resources before exit."""
|
138
129
|
if self.handler:
|
139
130
|
try:
|
140
131
|
self.handler.close()
|
132
|
+
log.info("Handler closed successfully.")
|
141
133
|
except Exception as e:
|
142
134
|
log.error(f"Error during handler cleanup: {e}")
|
143
|
-
self.exit()
|
144
135
|
|
145
136
|
|
146
|
-
# CLI
|
137
|
+
# For backward compatibility, keep the old CLI entry point
|
147
138
|
def run_app():
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
_path = Path(file_path_str)
|
158
|
-
if not _path.suffix.lower() in ['.parquet', '.json', '.ndjson']:
|
159
|
-
print(f"Error: Unsupported file type '{_path.suffix}'. Please provide a .parquet, .json, or .ndjson file.")
|
160
|
-
log.error(f"Unsupported file type provided via CLI: {_path.suffix}")
|
161
|
-
sys.exit(1)
|
162
|
-
|
163
|
-
app = ParqV(file_path_str=file_path_str)
|
164
|
-
app.run()
|
139
|
+
"""
|
140
|
+
Legacy CLI entry point for backward compatibility.
|
141
|
+
|
142
|
+
Note: New code should use parqv.cli.run_app() instead.
|
143
|
+
"""
|
144
|
+
from .cli import run_app as new_run_app
|
145
|
+
log.warning("Using legacy run_app(). Consider importing from parqv.cli instead.")
|
146
|
+
new_run_app()
|
165
147
|
|
166
148
|
|
167
149
|
if __name__ == "__main__":
|
parqv/cli.py
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
"""
|
2
|
+
Command Line Interface for parqv application.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import sys
|
6
|
+
|
7
|
+
from .app import ParqV
|
8
|
+
from .core import SUPPORTED_EXTENSIONS, FileValidationError, validate_and_detect_file, setup_logging, get_logger
|
9
|
+
|
10
|
+
|
11
|
+
def _print_user_message(message: str, log_level: str = "info") -> None:
|
12
|
+
"""
|
13
|
+
Show a message to the user and log it.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
message: message to display and log
|
17
|
+
log_level: log level ('info', 'error', 'warning')
|
18
|
+
"""
|
19
|
+
log = get_logger(__name__)
|
20
|
+
|
21
|
+
print(message, file=sys.stderr)
|
22
|
+
|
23
|
+
if log_level == "error":
|
24
|
+
log.error(message)
|
25
|
+
elif log_level == "warning":
|
26
|
+
log.warning(message)
|
27
|
+
else:
|
28
|
+
log.info(message)
|
29
|
+
|
30
|
+
|
31
|
+
def validate_cli_arguments() -> str:
|
32
|
+
"""
|
33
|
+
Validates command line arguments.
|
34
|
+
|
35
|
+
Returns:
|
36
|
+
The file path string from command line arguments
|
37
|
+
|
38
|
+
Raises:
|
39
|
+
SystemExit: If arguments are invalid
|
40
|
+
"""
|
41
|
+
log = get_logger(__name__)
|
42
|
+
|
43
|
+
if len(sys.argv) < 2:
|
44
|
+
usage_message = "Usage: parqv <path_to_parquet_or_json_file>"
|
45
|
+
supported_message = f"Supported file types: {', '.join(SUPPORTED_EXTENSIONS.keys())}"
|
46
|
+
|
47
|
+
_print_user_message(usage_message, "error")
|
48
|
+
_print_user_message(supported_message, "info")
|
49
|
+
|
50
|
+
log.error("No file path provided via CLI arguments")
|
51
|
+
sys.exit(1)
|
52
|
+
|
53
|
+
file_path_str = sys.argv[1]
|
54
|
+
log.debug(f"File path received from CLI: {file_path_str}")
|
55
|
+
return file_path_str
|
56
|
+
|
57
|
+
|
58
|
+
def run_app() -> None:
|
59
|
+
"""
|
60
|
+
Main entry point for the parqv CLI application.
|
61
|
+
|
62
|
+
This function:
|
63
|
+
1. Sets up logging
|
64
|
+
2. Validates command line arguments
|
65
|
+
3. Validates the file path and type
|
66
|
+
4. Creates and runs the Textual app
|
67
|
+
"""
|
68
|
+
# Setup logging first
|
69
|
+
log = setup_logging()
|
70
|
+
log.info("--- parqv CLI started ---")
|
71
|
+
|
72
|
+
try:
|
73
|
+
# Get and validate CLI arguments
|
74
|
+
file_path_str = validate_cli_arguments()
|
75
|
+
|
76
|
+
# Validate file path and detect type (for early validation)
|
77
|
+
file_path, file_type = validate_and_detect_file(file_path_str)
|
78
|
+
log.info(f"File validated successfully: {file_path} (type: {file_type})")
|
79
|
+
|
80
|
+
# Create and run the app
|
81
|
+
log.info("Starting parqv application...")
|
82
|
+
app = ParqV(file_path_str=file_path_str)
|
83
|
+
app.run()
|
84
|
+
|
85
|
+
log.info("parqv application finished successfully")
|
86
|
+
|
87
|
+
except FileValidationError as e:
|
88
|
+
log.error(f"File validation failed: {e}")
|
89
|
+
|
90
|
+
error_message = f"Error: {e}"
|
91
|
+
help_message = f"Please provide a file with one of these extensions: {', '.join(SUPPORTED_EXTENSIONS.keys())}"
|
92
|
+
|
93
|
+
_print_user_message(error_message, "error")
|
94
|
+
_print_user_message(help_message, "info")
|
95
|
+
|
96
|
+
log.error("Exiting due to file validation error")
|
97
|
+
sys.exit(1)
|
98
|
+
|
99
|
+
except KeyboardInterrupt:
|
100
|
+
log.info("Application interrupted by user (Ctrl+C)")
|
101
|
+
_print_user_message("\nApplication interrupted by user.", "info")
|
102
|
+
sys.exit(0)
|
103
|
+
|
104
|
+
except Exception as e:
|
105
|
+
log.exception(f"Unexpected error in CLI: {e}")
|
106
|
+
_print_user_message(f"An unexpected error occurred: {e}", "error")
|
107
|
+
_print_user_message("Check the log file for more details.", "info")
|
108
|
+
sys.exit(1)
|
109
|
+
|
110
|
+
|
111
|
+
if __name__ == "__main__":
|
112
|
+
run_app()
|
parqv/core/__init__.py
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
"""
|
2
|
+
Core modules for parqv application.
|
3
|
+
|
4
|
+
This package contains fundamental configuration, utilities, and factory classes.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from .config import SUPPORTED_EXTENSIONS, DEFAULT_PREVIEW_ROWS, CSS_PATH
|
8
|
+
from .logging import setup_logging, get_logger
|
9
|
+
from .file_utils import FileValidationError, validate_and_detect_file, validate_file_path, detect_file_type
|
10
|
+
from .handler_factory import HandlerFactory, HandlerCreationError
|
11
|
+
|
12
|
+
__all__ = [
|
13
|
+
# Configuration
|
14
|
+
"SUPPORTED_EXTENSIONS",
|
15
|
+
"DEFAULT_PREVIEW_ROWS",
|
16
|
+
"CSS_PATH",
|
17
|
+
|
18
|
+
# Logging
|
19
|
+
"setup_logging",
|
20
|
+
"get_logger",
|
21
|
+
|
22
|
+
# File utilities
|
23
|
+
"FileValidationError",
|
24
|
+
"validate_and_detect_file",
|
25
|
+
"validate_file_path",
|
26
|
+
"detect_file_type",
|
27
|
+
|
28
|
+
# Factory
|
29
|
+
"HandlerFactory",
|
30
|
+
"HandlerCreationError",
|
31
|
+
]
|
parqv/core/config.py
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
"""
|
2
|
+
Configuration constants and settings for parqv application.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import Dict, Type, List
|
6
|
+
from pathlib import Path
|
7
|
+
|
8
|
+
# File extensions and their corresponding handler types
|
9
|
+
SUPPORTED_EXTENSIONS: Dict[str, str] = {
|
10
|
+
".parquet": "parquet",
|
11
|
+
".json": "json",
|
12
|
+
".ndjson": "json"
|
13
|
+
}
|
14
|
+
|
15
|
+
# Application constants
|
16
|
+
LOG_FILENAME = "parqv.log"
|
17
|
+
LOG_MAX_BYTES = 1024 * 1024 * 5 # 5MB
|
18
|
+
LOG_BACKUP_COUNT = 3
|
19
|
+
LOG_ENCODING = "utf-8"
|
20
|
+
|
21
|
+
# UI Constants
|
22
|
+
DEFAULT_PREVIEW_ROWS = 50
|
23
|
+
|
24
|
+
# CSS Path (relative to the app module)
|
25
|
+
CSS_PATH = "parqv.css"
|
parqv/core/file_utils.py
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
"""
|
2
|
+
File utilities for parqv application.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from pathlib import Path
|
6
|
+
from typing import Optional, Tuple
|
7
|
+
|
8
|
+
from .config import SUPPORTED_EXTENSIONS
|
9
|
+
from .logging import get_logger
|
10
|
+
|
11
|
+
log = get_logger(__name__)
|
12
|
+
|
13
|
+
|
14
|
+
class FileValidationError(Exception):
|
15
|
+
"""Exception raised when file validation fails."""
|
16
|
+
pass
|
17
|
+
|
18
|
+
|
19
|
+
def validate_file_path(file_path_str: Optional[str]) -> Path:
|
20
|
+
"""
|
21
|
+
Validates and resolves the file path.
|
22
|
+
|
23
|
+
Args:
|
24
|
+
file_path_str: String representation of the file path
|
25
|
+
|
26
|
+
Returns:
|
27
|
+
Resolved Path object
|
28
|
+
|
29
|
+
Raises:
|
30
|
+
FileValidationError: If file path is invalid or file doesn't exist
|
31
|
+
"""
|
32
|
+
if not file_path_str:
|
33
|
+
raise FileValidationError("No file path provided.")
|
34
|
+
|
35
|
+
file_path = Path(file_path_str)
|
36
|
+
log.debug(f"Validating file path: {file_path}")
|
37
|
+
|
38
|
+
if not file_path.is_file():
|
39
|
+
raise FileValidationError(f"File not found or is not a regular file: {file_path}")
|
40
|
+
|
41
|
+
return file_path
|
42
|
+
|
43
|
+
|
44
|
+
def detect_file_type(file_path: Path) -> str:
|
45
|
+
"""
|
46
|
+
Detects the file type based on its extension.
|
47
|
+
|
48
|
+
Args:
|
49
|
+
file_path: Path object representing the file
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
String representing the detected file type ('parquet' or 'json')
|
53
|
+
|
54
|
+
Raises:
|
55
|
+
FileValidationError: If file extension is not supported
|
56
|
+
"""
|
57
|
+
file_suffix = file_path.suffix.lower()
|
58
|
+
|
59
|
+
if file_suffix not in SUPPORTED_EXTENSIONS:
|
60
|
+
supported_exts = ", ".join(SUPPORTED_EXTENSIONS.keys())
|
61
|
+
raise FileValidationError(
|
62
|
+
f"Unsupported file extension: '{file_suffix}'. "
|
63
|
+
f"Only {supported_exts} are supported."
|
64
|
+
)
|
65
|
+
|
66
|
+
detected_type = SUPPORTED_EXTENSIONS[file_suffix]
|
67
|
+
log.info(f"Detected '{file_suffix}' extension, type: {detected_type}")
|
68
|
+
|
69
|
+
return detected_type
|
70
|
+
|
71
|
+
|
72
|
+
def validate_and_detect_file(file_path_str: Optional[str]) -> Tuple[Path, str]:
|
73
|
+
"""
|
74
|
+
Convenience function that validates file path and detects file type.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
file_path_str: String representation of the file path
|
78
|
+
|
79
|
+
Returns:
|
80
|
+
Tuple of (validated_path, detected_type)
|
81
|
+
|
82
|
+
Raises:
|
83
|
+
FileValidationError: If validation or type detection fails
|
84
|
+
"""
|
85
|
+
file_path = validate_file_path(file_path_str)
|
86
|
+
file_type = detect_file_type(file_path)
|
87
|
+
|
88
|
+
return file_path, file_type
|
@@ -0,0 +1,89 @@
|
|
1
|
+
"""
|
2
|
+
Handler factory for creating appropriate data handlers based on file type.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from pathlib import Path
|
6
|
+
from typing import Optional
|
7
|
+
|
8
|
+
from ..data_sources import DataHandler, DataHandlerError, ParquetHandler, JsonHandler
|
9
|
+
from .logging import get_logger
|
10
|
+
|
11
|
+
log = get_logger(__name__)
|
12
|
+
|
13
|
+
|
14
|
+
class HandlerCreationError(Exception):
|
15
|
+
"""Exception raised when handler creation fails."""
|
16
|
+
pass
|
17
|
+
|
18
|
+
|
19
|
+
class HandlerFactory:
|
20
|
+
"""Factory class for creating data handlers."""
|
21
|
+
|
22
|
+
# Registry of handler types to handler classes
|
23
|
+
_HANDLER_REGISTRY = {
|
24
|
+
"parquet": ParquetHandler,
|
25
|
+
"json": JsonHandler,
|
26
|
+
}
|
27
|
+
|
28
|
+
@classmethod
|
29
|
+
def create_handler(cls, file_path: Path, handler_type: str) -> DataHandler:
|
30
|
+
"""
|
31
|
+
Creates an appropriate handler for the given file type.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
file_path: Path to the data file
|
35
|
+
handler_type: Type of handler to create ('parquet' or 'json')
|
36
|
+
|
37
|
+
Returns:
|
38
|
+
An instance of the appropriate DataHandler subclass
|
39
|
+
|
40
|
+
Raises:
|
41
|
+
HandlerCreationError: If handler creation fails
|
42
|
+
"""
|
43
|
+
if handler_type not in cls._HANDLER_REGISTRY:
|
44
|
+
available_types = ", ".join(cls._HANDLER_REGISTRY.keys())
|
45
|
+
raise HandlerCreationError(
|
46
|
+
f"Unknown handler type: '{handler_type}'. "
|
47
|
+
f"Available types: {available_types}"
|
48
|
+
)
|
49
|
+
|
50
|
+
handler_class = cls._HANDLER_REGISTRY[handler_type]
|
51
|
+
|
52
|
+
log.info(f"Creating {handler_type.capitalize()} handler for: {file_path}")
|
53
|
+
|
54
|
+
try:
|
55
|
+
handler = handler_class(file_path)
|
56
|
+
log.info(f"{handler_type.capitalize()} handler created successfully.")
|
57
|
+
return handler
|
58
|
+
|
59
|
+
except DataHandlerError as e:
|
60
|
+
log.error(f"Failed to create {handler_type} handler: {e}")
|
61
|
+
raise HandlerCreationError(f"Failed to initialize {handler_type} handler: {e}") from e
|
62
|
+
|
63
|
+
except Exception as e:
|
64
|
+
log.exception(f"Unexpected error creating {handler_type} handler")
|
65
|
+
raise HandlerCreationError(
|
66
|
+
f"Unexpected error during {handler_type} handler creation: {e}"
|
67
|
+
) from e
|
68
|
+
|
69
|
+
@classmethod
|
70
|
+
def get_supported_types(cls) -> list[str]:
|
71
|
+
"""
|
72
|
+
Returns a list of supported handler types.
|
73
|
+
|
74
|
+
Returns:
|
75
|
+
List of supported handler type strings
|
76
|
+
"""
|
77
|
+
return list(cls._HANDLER_REGISTRY.keys())
|
78
|
+
|
79
|
+
@classmethod
|
80
|
+
def register_handler(cls, handler_type: str, handler_class: type[DataHandler]) -> None:
|
81
|
+
"""
|
82
|
+
Registers a new handler type (for extensibility).
|
83
|
+
|
84
|
+
Args:
|
85
|
+
handler_type: String identifier for the handler type
|
86
|
+
handler_class: Class that implements DataHandler interface
|
87
|
+
"""
|
88
|
+
log.info(f"Registering handler type '{handler_type}' with class {handler_class.__name__}")
|
89
|
+
cls._HANDLER_REGISTRY[handler_type] = handler_class
|
parqv/core/logging.py
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
"""
|
2
|
+
Logging configuration for parqv application.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import logging
|
6
|
+
import sys
|
7
|
+
from logging.handlers import RotatingFileHandler
|
8
|
+
|
9
|
+
from .config import LOG_FILENAME, LOG_MAX_BYTES, LOG_BACKUP_COUNT, LOG_ENCODING
|
10
|
+
|
11
|
+
|
12
|
+
def setup_logging() -> logging.Logger:
|
13
|
+
"""
|
14
|
+
Sets up logging configuration for the parqv application.
|
15
|
+
|
16
|
+
Returns:
|
17
|
+
The root logger instance configured for parqv.
|
18
|
+
"""
|
19
|
+
file_handler = RotatingFileHandler(
|
20
|
+
LOG_FILENAME,
|
21
|
+
maxBytes=LOG_MAX_BYTES,
|
22
|
+
backupCount=LOG_BACKUP_COUNT,
|
23
|
+
encoding=LOG_ENCODING
|
24
|
+
)
|
25
|
+
|
26
|
+
logging.basicConfig(
|
27
|
+
level=logging.INFO,
|
28
|
+
format="%(asctime)s [%(levelname)-5.5s] %(name)s (%(filename)s:%(lineno)d) - %(message)s",
|
29
|
+
handlers=[file_handler, logging.StreamHandler(sys.stdout)],
|
30
|
+
force=True # Override any existing configuration
|
31
|
+
)
|
32
|
+
|
33
|
+
return logging.getLogger(__name__)
|
34
|
+
|
35
|
+
|
36
|
+
def get_logger(name: str) -> logging.Logger:
|
37
|
+
"""
|
38
|
+
Gets a logger instance for the given name.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
name: The name for the logger (typically __name__)
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
A logger instance.
|
45
|
+
"""
|
46
|
+
return logging.getLogger(name)
|