parqv 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {parqv-0.2.0/src/parqv.egg-info → parqv-0.3.0}/PKG-INFO +5 -6
  2. {parqv-0.2.0 → parqv-0.3.0}/README.md +5 -6
  3. {parqv-0.2.0 → parqv-0.3.0}/pyproject.toml +1 -1
  4. parqv-0.3.0/src/parqv/__init__.py +31 -0
  5. parqv-0.3.0/src/parqv/app.py +150 -0
  6. parqv-0.3.0/src/parqv/cli.py +112 -0
  7. parqv-0.3.0/src/parqv/core/__init__.py +31 -0
  8. parqv-0.3.0/src/parqv/core/config.py +26 -0
  9. parqv-0.3.0/src/parqv/core/file_utils.py +88 -0
  10. parqv-0.3.0/src/parqv/core/handler_factory.py +90 -0
  11. parqv-0.3.0/src/parqv/core/logging.py +46 -0
  12. parqv-0.3.0/src/parqv/data_sources/__init__.py +48 -0
  13. parqv-0.3.0/src/parqv/data_sources/base/__init__.py +28 -0
  14. parqv-0.3.0/src/parqv/data_sources/base/exceptions.py +38 -0
  15. parqv-0.2.0/src/parqv/handlers/base_handler.py → parqv-0.3.0/src/parqv/data_sources/base/handler.py +54 -25
  16. {parqv-0.2.0/src/parqv/handlers → parqv-0.3.0/src/parqv/data_sources/formats}/__init__.py +13 -5
  17. parqv-0.3.0/src/parqv/data_sources/formats/csv.py +460 -0
  18. {parqv-0.2.0/src/parqv/handlers → parqv-0.3.0/src/parqv/data_sources/formats}/json.py +68 -32
  19. {parqv-0.2.0/src/parqv/handlers → parqv-0.3.0/src/parqv/data_sources/formats}/parquet.py +67 -56
  20. parqv-0.3.0/src/parqv/views/__init__.py +38 -0
  21. parqv-0.3.0/src/parqv/views/base.py +98 -0
  22. parqv-0.3.0/src/parqv/views/components/__init__.py +13 -0
  23. parqv-0.3.0/src/parqv/views/components/enhanced_data_table.py +152 -0
  24. parqv-0.3.0/src/parqv/views/components/error_display.py +72 -0
  25. parqv-0.3.0/src/parqv/views/components/loading_display.py +44 -0
  26. parqv-0.3.0/src/parqv/views/data_view.py +141 -0
  27. parqv-0.3.0/src/parqv/views/metadata_view.py +63 -0
  28. parqv-0.3.0/src/parqv/views/schema_view.py +236 -0
  29. parqv-0.3.0/src/parqv/views/utils/__init__.py +19 -0
  30. parqv-0.3.0/src/parqv/views/utils/data_formatters.py +184 -0
  31. parqv-0.3.0/src/parqv/views/utils/stats_formatters.py +220 -0
  32. parqv-0.3.0/src/parqv/views/utils/visualization.py +204 -0
  33. {parqv-0.2.0 → parqv-0.3.0/src/parqv.egg-info}/PKG-INFO +5 -6
  34. parqv-0.3.0/src/parqv.egg-info/SOURCES.txt +39 -0
  35. parqv-0.2.0/src/parqv/__init__.py +0 -0
  36. parqv-0.2.0/src/parqv/app.py +0 -168
  37. parqv-0.2.0/src/parqv/views/__init__.py +0 -0
  38. parqv-0.2.0/src/parqv/views/data_view.py +0 -68
  39. parqv-0.2.0/src/parqv/views/metadata_view.py +0 -26
  40. parqv-0.2.0/src/parqv/views/schema_view.py +0 -246
  41. parqv-0.2.0/src/parqv.egg-info/SOURCES.txt +0 -20
  42. {parqv-0.2.0 → parqv-0.3.0}/LICENSE +0 -0
  43. {parqv-0.2.0 → parqv-0.3.0}/setup.cfg +0 -0
  44. {parqv-0.2.0 → parqv-0.3.0}/src/parqv/parqv.css +0 -0
  45. {parqv-0.2.0 → parqv-0.3.0}/src/parqv.egg-info/dependency_links.txt +0 -0
  46. {parqv-0.2.0 → parqv-0.3.0}/src/parqv.egg-info/entry_points.txt +0 -0
  47. {parqv-0.2.0 → parqv-0.3.0}/src/parqv.egg-info/requires.txt +0 -0
  48. {parqv-0.2.0 → parqv-0.3.0}/src/parqv.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: parqv
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: An interactive Python TUI for visualizing, exploring, and analyzing files directly in your terminal.
5
5
  Author-email: Sangmin Yoon <sanspareilsmyn@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -23,14 +23,13 @@ Dynamic: license-file
23
23
 
24
24
  ---
25
25
 
26
- **Supported File Formats:** ✅ **Parquet** | ✅ **JSON** / **JSON Lines (ndjson)** | *(More planned!)*
26
+ **Supported File Formats:** ✅ **Parquet** | ✅ **JSON** / **JSON Lines (ndjson)** | ✅ **CSV / TSV** | *(More planned!)*
27
27
 
28
28
  ---
29
29
 
30
- **`parqv` is a Python-based interactive TUI (Text User Interface) tool designed to explore, analyze, and understand various data file formats directly within your terminal.** Initially supporting Parquet and JSON, `parqv` aims to provide a unified, visual experience for quick data inspection without leaving your console.
31
-
32
- ## 💻 Demo (Showing Parquet)
30
+ **`parqv` is a Python-based interactive TUI (Text User Interface) tool designed to explore, analyze, and understand various data file formats directly within your terminal.** `parqv` aims to provide a unified, visual experience for quick data inspection without leaving your console.
33
31
 
32
+ ## 💻 Demo
34
33
  ![parqv.gif](assets/parqv.gif)
35
34
  *(Demo shows Parquet features; UI adapts for other formats)*
36
35
 
@@ -47,7 +46,7 @@ Dynamic: license-file
47
46
  * **🔌 Extensible:** Designed with a handler interface to easily add support for more file formats in the future (like CSV, Arrow IPC, etc.).
48
47
 
49
48
  ## ✨ Features (TUI Mode)
50
- * **Multi-Format Support:** Currently supports **Parquet** (`.parquet`) and **JSON/JSON Lines** (`.json`, `.ndjson`). Run `parqv <your_file.{parquet,json,ndjson}>`.
49
+ * **Multi-Format Support:** Now supports **Parquet** (`.parquet`), **JSON/JSON Lines** (`.json`, `.ndjson`), and **CSV/TSV** (`.csv`, `.tsv`). Run `parqv <your_file.{parquet,json,ndjson,csv,tsv}>`.
51
50
  * **Metadata Panel:** Displays key file information (path, format, size, total rows, column count, etc.). *Fields may vary slightly depending on the file format.*
52
51
  * **Schema Explorer:**
53
52
  * Interactive list view of columns.
@@ -7,14 +7,13 @@
7
7
 
8
8
  ---
9
9
 
10
- **Supported File Formats:** ✅ **Parquet** | ✅ **JSON** / **JSON Lines (ndjson)** | *(More planned!)*
10
+ **Supported File Formats:** ✅ **Parquet** | ✅ **JSON** / **JSON Lines (ndjson)** | ✅ **CSV / TSV** | *(More planned!)*
11
11
 
12
12
  ---
13
13
 
14
- **`parqv` is a Python-based interactive TUI (Text User Interface) tool designed to explore, analyze, and understand various data file formats directly within your terminal.** Initially supporting Parquet and JSON, `parqv` aims to provide a unified, visual experience for quick data inspection without leaving your console.
15
-
16
- ## 💻 Demo (Showing Parquet)
14
+ **`parqv` is a Python-based interactive TUI (Text User Interface) tool designed to explore, analyze, and understand various data file formats directly within your terminal.** `parqv` aims to provide a unified, visual experience for quick data inspection without leaving your console.
17
15
 
16
+ ## 💻 Demo
18
17
  ![parqv.gif](assets/parqv.gif)
19
18
  *(Demo shows Parquet features; UI adapts for other formats)*
20
19
 
@@ -31,7 +30,7 @@
31
30
  * **🔌 Extensible:** Designed with a handler interface to easily add support for more file formats in the future (like CSV, Arrow IPC, etc.).
32
31
 
33
32
  ## ✨ Features (TUI Mode)
34
- * **Multi-Format Support:** Currently supports **Parquet** (`.parquet`) and **JSON/JSON Lines** (`.json`, `.ndjson`). Run `parqv <your_file.{parquet,json,ndjson}>`.
33
+ * **Multi-Format Support:** Now supports **Parquet** (`.parquet`), **JSON/JSON Lines** (`.json`, `.ndjson`), and **CSV/TSV** (`.csv`, `.tsv`). Run `parqv <your_file.{parquet,json,ndjson,csv,tsv}>`.
35
34
  * **Metadata Panel:** Displays key file information (path, format, size, total rows, column count, etc.). *Fields may vary slightly depending on the file format.*
36
35
  * **Schema Explorer:**
37
36
  * Interactive list view of columns.
@@ -85,4 +84,4 @@
85
84
 
86
85
  ## 📄 License
87
86
 
88
- Licensed under the Apache License, Version 2.0. See [LICENSE](LICENSE) for the full license text.
87
+ Licensed under the Apache License, Version 2.0. See [LICENSE](LICENSE) for the full license text.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "parqv"
7
- version = "0.2.0"
7
+ version = "0.3.0"
8
8
  description = "An interactive Python TUI for visualizing, exploring, and analyzing files directly in your terminal."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -0,0 +1,31 @@
1
+ """
2
+ parqv - A Textual application for visualizing Parquet and JSON files.
3
+ """
4
+
5
+ from .app import ParqV
6
+ from .cli import run_app
7
+ from .core import (
8
+ SUPPORTED_EXTENSIONS,
9
+ DEFAULT_PREVIEW_ROWS,
10
+ FileValidationError,
11
+ validate_and_detect_file,
12
+ HandlerFactory,
13
+ HandlerCreationError,
14
+ setup_logging,
15
+ get_logger
16
+ )
17
+
18
+ __version__ = "1.0.0"
19
+
20
+ __all__ = [
21
+ "ParqV",
22
+ "run_app",
23
+ "SUPPORTED_EXTENSIONS",
24
+ "DEFAULT_PREVIEW_ROWS",
25
+ "FileValidationError",
26
+ "validate_and_detect_file",
27
+ "HandlerFactory",
28
+ "HandlerCreationError",
29
+ "setup_logging",
30
+ "get_logger",
31
+ ]
@@ -0,0 +1,150 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ from textual.app import App, ComposeResult, Binding
5
+ from textual.containers import Container
6
+ from textual.widgets import Header, Footer, Static, Label, TabbedContent, TabPane
7
+
8
+ from .core import CSS_PATH, FileValidationError, validate_and_detect_file, HandlerFactory, HandlerCreationError, get_logger
9
+ from .data_sources import DataHandler
10
+ from .views.data_view import DataView
11
+ from .views.metadata_view import MetadataView
12
+ from .views.schema_view import SchemaView
13
+
14
+ log = get_logger(__name__)
15
+
16
+
17
+ class ParqV(App[None]):
18
+ """A Textual app to visualize Parquet or JSON files."""
19
+
20
+ CSS_PATH = CSS_PATH
21
+ BINDINGS = [
22
+ Binding("q", "quit", "Quit", priority=True),
23
+ ]
24
+
25
+ def __init__(self, file_path_str: Optional[str] = None, *args, **kwargs):
26
+ """
27
+ Initialize the ParqV application.
28
+
29
+ Args:
30
+ file_path_str: Path to the file to visualize
31
+ *args, **kwargs: Additional arguments for the Textual App
32
+ """
33
+ super().__init__(*args, **kwargs)
34
+
35
+ # Application state
36
+ self.file_path: Optional[Path] = None
37
+ self.handler: Optional[DataHandler] = None
38
+ self.handler_type: Optional[str] = None
39
+ self.error_message: Optional[str] = None
40
+
41
+ # Initialize with file if provided
42
+ if file_path_str:
43
+ self._initialize_file_handler(file_path_str)
44
+
45
+ def _initialize_file_handler(self, file_path_str: str) -> None:
46
+ """
47
+ Initialize the file handler for the given file path.
48
+
49
+ Args:
50
+ file_path_str: Path to the file to process
51
+ """
52
+ try:
53
+ # Validate file and detect type
54
+ self.file_path, self.handler_type = validate_and_detect_file(file_path_str)
55
+
56
+ # Create appropriate handler
57
+ self.handler = HandlerFactory.create_handler(self.file_path, self.handler_type)
58
+
59
+ log.info(f"Successfully initialized {self.handler_type} handler for: {self.file_path.name}")
60
+
61
+ except (FileValidationError, HandlerCreationError) as e:
62
+ self.error_message = str(e)
63
+ log.error(f"Failed to initialize handler: {e}")
64
+
65
+ except Exception as e:
66
+ self.error_message = f"An unexpected error occurred: {e}"
67
+ log.exception("Unexpected error during handler initialization")
68
+
69
+ def compose(self) -> ComposeResult:
70
+ """Compose the UI layout."""
71
+ yield Header()
72
+
73
+ if self.error_message:
74
+ log.debug(f"Displaying error message: {self.error_message}")
75
+ yield Container(
76
+ Label("Error Loading File:", classes="error-title"),
77
+ Static(self.error_message, classes="error-content"),
78
+ id="error-container"
79
+ )
80
+ elif self.handler:
81
+ log.debug(f"Composing main layout with TabbedContent for {self.handler_type} handler.")
82
+ with TabbedContent(id="main-tabs"):
83
+ yield TabPane("Metadata", MetadataView(id="metadata-view"), id="tab-metadata")
84
+ yield TabPane("Schema", SchemaView(id="schema-view"), id="tab-schema")
85
+ yield TabPane("Data Preview", DataView(id="data-view"), id="tab-data")
86
+ else:
87
+ log.warning("No handler available and no error message set")
88
+ yield Container(
89
+ Label("No file loaded.", classes="error-title"),
90
+ Static("Please provide a valid file path.", classes="error-content"),
91
+ id="no-file-container"
92
+ )
93
+
94
+ yield Footer()
95
+
96
+ def on_mount(self) -> None:
97
+ """Handle app mount event - set up header information."""
98
+ log.debug("App mounted.")
99
+ self._update_header()
100
+
101
+ def _update_header(self) -> None:
102
+ """Update the header with file and format information."""
103
+ try:
104
+ header = self.query_one(Header)
105
+
106
+ if self.handler and self.file_path and self.handler_type:
107
+ display_name = self.file_path.name
108
+ format_name = self.handler_type.capitalize()
109
+ header.title = f"parqv - {display_name}"
110
+ header.sub_title = f"Format: {format_name}"
111
+ elif self.error_message:
112
+ header.title = "parqv - Error"
113
+ header.sub_title = "Failed to load file"
114
+ else:
115
+ header.title = "parqv"
116
+ header.sub_title = "File Viewer"
117
+
118
+ except Exception as e:
119
+ log.error(f"Failed to update header: {e}")
120
+
121
+ def action_quit(self) -> None:
122
+ """Handle quit action - cleanup and exit."""
123
+ log.info("Quit action triggered.")
124
+ self._cleanup()
125
+ self.exit()
126
+
127
+ def _cleanup(self) -> None:
128
+ """Clean up resources before exit."""
129
+ if self.handler:
130
+ try:
131
+ self.handler.close()
132
+ log.info("Handler closed successfully.")
133
+ except Exception as e:
134
+ log.error(f"Error during handler cleanup: {e}")
135
+
136
+
137
+ # For backward compatibility, keep the old CLI entry point
138
+ def run_app():
139
+ """
140
+ Legacy CLI entry point for backward compatibility.
141
+
142
+ Note: New code should use parqv.cli.run_app() instead.
143
+ """
144
+ from .cli import run_app as new_run_app
145
+ log.warning("Using legacy run_app(). Consider importing from parqv.cli instead.")
146
+ new_run_app()
147
+
148
+
149
+ if __name__ == "__main__":
150
+ run_app()
@@ -0,0 +1,112 @@
1
+ """
2
+ Command Line Interface for parqv application.
3
+ """
4
+
5
+ import sys
6
+
7
+ from .app import ParqV
8
+ from .core import SUPPORTED_EXTENSIONS, FileValidationError, validate_and_detect_file, setup_logging, get_logger
9
+
10
+
11
+ def _print_user_message(message: str, log_level: str = "info") -> None:
12
+ """
13
+ Show a message to the user and log it.
14
+
15
+ Args:
16
+ message: message to display and log
17
+ log_level: log level ('info', 'error', 'warning')
18
+ """
19
+ log = get_logger(__name__)
20
+
21
+ print(message, file=sys.stderr)
22
+
23
+ if log_level == "error":
24
+ log.error(message)
25
+ elif log_level == "warning":
26
+ log.warning(message)
27
+ else:
28
+ log.info(message)
29
+
30
+
31
+ def validate_cli_arguments() -> str:
32
+ """
33
+ Validates command line arguments.
34
+
35
+ Returns:
36
+ The file path string from command line arguments
37
+
38
+ Raises:
39
+ SystemExit: If arguments are invalid
40
+ """
41
+ log = get_logger(__name__)
42
+
43
+ if len(sys.argv) < 2:
44
+ usage_message = "Usage: parqv <path_to_parquet_or_json_file>"
45
+ supported_message = f"Supported file types: {', '.join(SUPPORTED_EXTENSIONS.keys())}"
46
+
47
+ _print_user_message(usage_message, "error")
48
+ _print_user_message(supported_message, "info")
49
+
50
+ log.error("No file path provided via CLI arguments")
51
+ sys.exit(1)
52
+
53
+ file_path_str = sys.argv[1]
54
+ log.debug(f"File path received from CLI: {file_path_str}")
55
+ return file_path_str
56
+
57
+
58
+ def run_app() -> None:
59
+ """
60
+ Main entry point for the parqv CLI application.
61
+
62
+ This function:
63
+ 1. Sets up logging
64
+ 2. Validates command line arguments
65
+ 3. Validates the file path and type
66
+ 4. Creates and runs the Textual app
67
+ """
68
+ # Setup logging first
69
+ log = setup_logging()
70
+ log.info("--- parqv CLI started ---")
71
+
72
+ try:
73
+ # Get and validate CLI arguments
74
+ file_path_str = validate_cli_arguments()
75
+
76
+ # Validate file path and detect type (for early validation)
77
+ file_path, file_type = validate_and_detect_file(file_path_str)
78
+ log.info(f"File validated successfully: {file_path} (type: {file_type})")
79
+
80
+ # Create and run the app
81
+ log.info("Starting parqv application...")
82
+ app = ParqV(file_path_str=file_path_str)
83
+ app.run()
84
+
85
+ log.info("parqv application finished successfully")
86
+
87
+ except FileValidationError as e:
88
+ log.error(f"File validation failed: {e}")
89
+
90
+ error_message = f"Error: {e}"
91
+ help_message = f"Please provide a file with one of these extensions: {', '.join(SUPPORTED_EXTENSIONS.keys())}"
92
+
93
+ _print_user_message(error_message, "error")
94
+ _print_user_message(help_message, "info")
95
+
96
+ log.error("Exiting due to file validation error")
97
+ sys.exit(1)
98
+
99
+ except KeyboardInterrupt:
100
+ log.info("Application interrupted by user (Ctrl+C)")
101
+ _print_user_message("\nApplication interrupted by user.", "info")
102
+ sys.exit(0)
103
+
104
+ except Exception as e:
105
+ log.exception(f"Unexpected error in CLI: {e}")
106
+ _print_user_message(f"An unexpected error occurred: {e}", "error")
107
+ _print_user_message("Check the log file for more details.", "info")
108
+ sys.exit(1)
109
+
110
+
111
+ if __name__ == "__main__":
112
+ run_app()
@@ -0,0 +1,31 @@
1
+ """
2
+ Core modules for parqv application.
3
+
4
+ This package contains fundamental configuration, utilities, and factory classes.
5
+ """
6
+
7
+ from .config import SUPPORTED_EXTENSIONS, DEFAULT_PREVIEW_ROWS, CSS_PATH
8
+ from .logging import setup_logging, get_logger
9
+ from .file_utils import FileValidationError, validate_and_detect_file, validate_file_path, detect_file_type
10
+ from .handler_factory import HandlerFactory, HandlerCreationError
11
+
12
+ __all__ = [
13
+ # Configuration
14
+ "SUPPORTED_EXTENSIONS",
15
+ "DEFAULT_PREVIEW_ROWS",
16
+ "CSS_PATH",
17
+
18
+ # Logging
19
+ "setup_logging",
20
+ "get_logger",
21
+
22
+ # File utilities
23
+ "FileValidationError",
24
+ "validate_and_detect_file",
25
+ "validate_file_path",
26
+ "detect_file_type",
27
+
28
+ # Factory
29
+ "HandlerFactory",
30
+ "HandlerCreationError",
31
+ ]
@@ -0,0 +1,26 @@
1
+ """
2
+ Configuration constants and settings for parqv application.
3
+ """
4
+
5
+ from typing import Dict, Type, List
6
+ from pathlib import Path
7
+
8
+ # File extensions and their corresponding handler types
9
+ SUPPORTED_EXTENSIONS: Dict[str, str] = {
10
+ ".parquet": "parquet",
11
+ ".json": "json",
12
+ ".ndjson": "json",
13
+ ".csv": "csv"
14
+ }
15
+
16
+ # Application constants
17
+ LOG_FILENAME = "parqv.log"
18
+ LOG_MAX_BYTES = 1024 * 1024 * 5 # 5MB
19
+ LOG_BACKUP_COUNT = 3
20
+ LOG_ENCODING = "utf-8"
21
+
22
+ # UI Constants
23
+ DEFAULT_PREVIEW_ROWS = 50
24
+
25
+ # CSS Path (relative to the app module)
26
+ CSS_PATH = "parqv.css"
@@ -0,0 +1,88 @@
1
+ """
2
+ File utilities for parqv application.
3
+ """
4
+
5
+ from pathlib import Path
6
+ from typing import Optional, Tuple
7
+
8
+ from .config import SUPPORTED_EXTENSIONS
9
+ from .logging import get_logger
10
+
11
+ log = get_logger(__name__)
12
+
13
+
14
+ class FileValidationError(Exception):
15
+ """Exception raised when file validation fails."""
16
+ pass
17
+
18
+
19
+ def validate_file_path(file_path_str: Optional[str]) -> Path:
20
+ """
21
+ Validates and resolves the file path.
22
+
23
+ Args:
24
+ file_path_str: String representation of the file path
25
+
26
+ Returns:
27
+ Resolved Path object
28
+
29
+ Raises:
30
+ FileValidationError: If file path is invalid or file doesn't exist
31
+ """
32
+ if not file_path_str:
33
+ raise FileValidationError("No file path provided.")
34
+
35
+ file_path = Path(file_path_str)
36
+ log.debug(f"Validating file path: {file_path}")
37
+
38
+ if not file_path.is_file():
39
+ raise FileValidationError(f"File not found or is not a regular file: {file_path}")
40
+
41
+ return file_path
42
+
43
+
44
+ def detect_file_type(file_path: Path) -> str:
45
+ """
46
+ Detects the file type based on its extension.
47
+
48
+ Args:
49
+ file_path: Path object representing the file
50
+
51
+ Returns:
52
+ String representing the detected file type ('parquet' or 'json')
53
+
54
+ Raises:
55
+ FileValidationError: If file extension is not supported
56
+ """
57
+ file_suffix = file_path.suffix.lower()
58
+
59
+ if file_suffix not in SUPPORTED_EXTENSIONS:
60
+ supported_exts = ", ".join(SUPPORTED_EXTENSIONS.keys())
61
+ raise FileValidationError(
62
+ f"Unsupported file extension: '{file_suffix}'. "
63
+ f"Only {supported_exts} are supported."
64
+ )
65
+
66
+ detected_type = SUPPORTED_EXTENSIONS[file_suffix]
67
+ log.info(f"Detected '{file_suffix}' extension, type: {detected_type}")
68
+
69
+ return detected_type
70
+
71
+
72
+ def validate_and_detect_file(file_path_str: Optional[str]) -> Tuple[Path, str]:
73
+ """
74
+ Convenience function that validates file path and detects file type.
75
+
76
+ Args:
77
+ file_path_str: String representation of the file path
78
+
79
+ Returns:
80
+ Tuple of (validated_path, detected_type)
81
+
82
+ Raises:
83
+ FileValidationError: If validation or type detection fails
84
+ """
85
+ file_path = validate_file_path(file_path_str)
86
+ file_type = detect_file_type(file_path)
87
+
88
+ return file_path, file_type
@@ -0,0 +1,90 @@
1
+ """
2
+ Handler factory for creating appropriate data handlers based on file type.
3
+ """
4
+
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ from ..data_sources import DataHandler, DataHandlerError, ParquetHandler, JsonHandler, CsvHandler
9
+ from .logging import get_logger
10
+
11
+ log = get_logger(__name__)
12
+
13
+
14
+ class HandlerCreationError(Exception):
15
+ """Exception raised when handler creation fails."""
16
+ pass
17
+
18
+
19
+ class HandlerFactory:
20
+ """Factory class for creating data handlers."""
21
+
22
+ # Registry of handler types to handler classes
23
+ _HANDLER_REGISTRY = {
24
+ "parquet": ParquetHandler,
25
+ "json": JsonHandler,
26
+ "csv": CsvHandler,
27
+ }
28
+
29
+ @classmethod
30
+ def create_handler(cls, file_path: Path, handler_type: str) -> DataHandler:
31
+ """
32
+ Creates an appropriate handler for the given file type.
33
+
34
+ Args:
35
+ file_path: Path to the data file
36
+ handler_type: Type of handler to create ('parquet' or 'json')
37
+
38
+ Returns:
39
+ An instance of the appropriate DataHandler subclass
40
+
41
+ Raises:
42
+ HandlerCreationError: If handler creation fails
43
+ """
44
+ if handler_type not in cls._HANDLER_REGISTRY:
45
+ available_types = ", ".join(cls._HANDLER_REGISTRY.keys())
46
+ raise HandlerCreationError(
47
+ f"Unknown handler type: '{handler_type}'. "
48
+ f"Available types: {available_types}"
49
+ )
50
+
51
+ handler_class = cls._HANDLER_REGISTRY[handler_type]
52
+
53
+ log.info(f"Creating {handler_type.capitalize()} handler for: {file_path}")
54
+
55
+ try:
56
+ handler = handler_class(file_path)
57
+ log.info(f"{handler_type.capitalize()} handler created successfully.")
58
+ return handler
59
+
60
+ except DataHandlerError as e:
61
+ log.error(f"Failed to create {handler_type} handler: {e}")
62
+ raise HandlerCreationError(f"Failed to initialize {handler_type} handler: {e}") from e
63
+
64
+ except Exception as e:
65
+ log.exception(f"Unexpected error creating {handler_type} handler")
66
+ raise HandlerCreationError(
67
+ f"Unexpected error during {handler_type} handler creation: {e}"
68
+ ) from e
69
+
70
+ @classmethod
71
+ def get_supported_types(cls) -> list[str]:
72
+ """
73
+ Returns a list of supported handler types.
74
+
75
+ Returns:
76
+ List of supported handler type strings
77
+ """
78
+ return list(cls._HANDLER_REGISTRY.keys())
79
+
80
+ @classmethod
81
+ def register_handler(cls, handler_type: str, handler_class: type[DataHandler]) -> None:
82
+ """
83
+ Registers a new handler type (for extensibility).
84
+
85
+ Args:
86
+ handler_type: String identifier for the handler type
87
+ handler_class: Class that implements DataHandler interface
88
+ """
89
+ log.info(f"Registering handler type '{handler_type}' with class {handler_class.__name__}")
90
+ cls._HANDLER_REGISTRY[handler_type] = handler_class
@@ -0,0 +1,46 @@
1
+ """
2
+ Logging configuration for parqv application.
3
+ """
4
+
5
+ import logging
6
+ import sys
7
+ from logging.handlers import RotatingFileHandler
8
+
9
+ from .config import LOG_FILENAME, LOG_MAX_BYTES, LOG_BACKUP_COUNT, LOG_ENCODING
10
+
11
+
12
+ def setup_logging() -> logging.Logger:
13
+ """
14
+ Sets up logging configuration for the parqv application.
15
+
16
+ Returns:
17
+ The root logger instance configured for parqv.
18
+ """
19
+ file_handler = RotatingFileHandler(
20
+ LOG_FILENAME,
21
+ maxBytes=LOG_MAX_BYTES,
22
+ backupCount=LOG_BACKUP_COUNT,
23
+ encoding=LOG_ENCODING
24
+ )
25
+
26
+ logging.basicConfig(
27
+ level=logging.INFO,
28
+ format="%(asctime)s [%(levelname)-5.5s] %(name)s (%(filename)s:%(lineno)d) - %(message)s",
29
+ handlers=[file_handler, logging.StreamHandler(sys.stdout)],
30
+ force=True # Override any existing configuration
31
+ )
32
+
33
+ return logging.getLogger(__name__)
34
+
35
+
36
+ def get_logger(name: str) -> logging.Logger:
37
+ """
38
+ Gets a logger instance for the given name.
39
+
40
+ Args:
41
+ name: The name for the logger (typically __name__)
42
+
43
+ Returns:
44
+ A logger instance.
45
+ """
46
+ return logging.getLogger(name)