parqv 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parqv/__init__.py +31 -0
- parqv/app.py +97 -78
- parqv/cli.py +112 -0
- parqv/core/__init__.py +31 -0
- parqv/core/config.py +25 -0
- parqv/core/file_utils.py +88 -0
- parqv/core/handler_factory.py +89 -0
- parqv/core/logging.py +46 -0
- parqv/data_sources/__init__.py +44 -0
- parqv/data_sources/base/__init__.py +28 -0
- parqv/data_sources/base/exceptions.py +38 -0
- parqv/data_sources/base/handler.py +143 -0
- parqv/data_sources/formats/__init__.py +16 -0
- parqv/data_sources/formats/json.py +449 -0
- parqv/data_sources/formats/parquet.py +624 -0
- parqv/views/__init__.py +38 -0
- parqv/views/base.py +98 -0
- parqv/views/components/__init__.py +13 -0
- parqv/views/components/enhanced_data_table.py +152 -0
- parqv/views/components/error_display.py +72 -0
- parqv/views/components/loading_display.py +44 -0
- parqv/views/data_view.py +119 -46
- parqv/views/metadata_view.py +57 -13
- parqv/views/schema_view.py +197 -148
- parqv/views/utils/__init__.py +13 -0
- parqv/views/utils/data_formatters.py +162 -0
- parqv/views/utils/stats_formatters.py +160 -0
- parqv-0.2.1.dist-info/METADATA +104 -0
- parqv-0.2.1.dist-info/RECORD +34 -0
- {parqv-0.1.0.dist-info → parqv-0.2.1.dist-info}/WHEEL +1 -1
- parqv/parquet_handler.py +0 -389
- parqv/views/row_group_view.py +0 -33
- parqv-0.1.0.dist-info/METADATA +0 -91
- parqv-0.1.0.dist-info/RECORD +0 -15
- {parqv-0.1.0.dist-info → parqv-0.2.1.dist-info}/entry_points.txt +0 -0
- {parqv-0.1.0.dist-info → parqv-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {parqv-0.1.0.dist-info → parqv-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,44 @@
|
|
1
|
+
"""
|
2
|
+
Data sources package for parqv application.
|
3
|
+
|
4
|
+
This package provides adapters for various data file formats,
|
5
|
+
offering a unified interface for data access.
|
6
|
+
"""
|
7
|
+
|
8
|
+
# Base classes and exceptions
|
9
|
+
from .base import (
|
10
|
+
DataHandler,
|
11
|
+
DataHandlerError,
|
12
|
+
DataSourceError,
|
13
|
+
FileValidationError,
|
14
|
+
UnsupportedFormatError,
|
15
|
+
DataReadError,
|
16
|
+
SchemaError,
|
17
|
+
MetadataError,
|
18
|
+
)
|
19
|
+
|
20
|
+
# Format-specific handlers
|
21
|
+
from .formats import (
|
22
|
+
ParquetHandler,
|
23
|
+
ParquetHandlerError,
|
24
|
+
JsonHandler,
|
25
|
+
JsonHandlerError,
|
26
|
+
)
|
27
|
+
|
28
|
+
__all__ = [
|
29
|
+
# Base interface and exceptions
|
30
|
+
"DataHandler",
|
31
|
+
"DataHandlerError",
|
32
|
+
"DataSourceError",
|
33
|
+
"FileValidationError",
|
34
|
+
"UnsupportedFormatError",
|
35
|
+
"DataReadError",
|
36
|
+
"SchemaError",
|
37
|
+
"MetadataError",
|
38
|
+
|
39
|
+
# Format handlers
|
40
|
+
"ParquetHandler",
|
41
|
+
"ParquetHandlerError",
|
42
|
+
"JsonHandler",
|
43
|
+
"JsonHandlerError",
|
44
|
+
]
|
@@ -0,0 +1,28 @@
|
|
1
|
+
"""
|
2
|
+
Base classes and interfaces for data sources.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from .handler import DataHandler
|
6
|
+
from .exceptions import (
|
7
|
+
DataSourceError,
|
8
|
+
DataHandlerError,
|
9
|
+
FileValidationError,
|
10
|
+
UnsupportedFormatError,
|
11
|
+
DataReadError,
|
12
|
+
SchemaError,
|
13
|
+
MetadataError,
|
14
|
+
)
|
15
|
+
|
16
|
+
__all__ = [
|
17
|
+
# Base handler interface
|
18
|
+
"DataHandler",
|
19
|
+
|
20
|
+
# Exception classes
|
21
|
+
"DataSourceError",
|
22
|
+
"DataHandlerError",
|
23
|
+
"FileValidationError",
|
24
|
+
"UnsupportedFormatError",
|
25
|
+
"DataReadError",
|
26
|
+
"SchemaError",
|
27
|
+
"MetadataError",
|
28
|
+
]
|
@@ -0,0 +1,38 @@
|
|
1
|
+
"""
|
2
|
+
Exception classes for data sources.
|
3
|
+
"""
|
4
|
+
|
5
|
+
|
6
|
+
class DataSourceError(Exception):
|
7
|
+
"""Base exception for all data source errors."""
|
8
|
+
pass
|
9
|
+
|
10
|
+
|
11
|
+
class DataHandlerError(DataSourceError):
|
12
|
+
"""Base exception for all data handler errors."""
|
13
|
+
pass
|
14
|
+
|
15
|
+
|
16
|
+
class FileValidationError(DataSourceError):
|
17
|
+
"""Exception raised when file validation fails."""
|
18
|
+
pass
|
19
|
+
|
20
|
+
|
21
|
+
class UnsupportedFormatError(DataSourceError):
|
22
|
+
"""Exception raised when an unsupported file format is encountered."""
|
23
|
+
pass
|
24
|
+
|
25
|
+
|
26
|
+
class DataReadError(DataSourceError):
|
27
|
+
"""Exception raised when data reading fails."""
|
28
|
+
pass
|
29
|
+
|
30
|
+
|
31
|
+
class SchemaError(DataSourceError):
|
32
|
+
"""Exception raised when schema operations fail."""
|
33
|
+
pass
|
34
|
+
|
35
|
+
|
36
|
+
class MetadataError(DataSourceError):
|
37
|
+
"""Exception raised when metadata operations fail."""
|
38
|
+
pass
|
@@ -0,0 +1,143 @@
|
|
1
|
+
"""
|
2
|
+
Base data handler interface for parqv data sources.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from abc import ABC, abstractmethod
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Any, Dict, List, Optional
|
8
|
+
|
9
|
+
import pandas as pd
|
10
|
+
|
11
|
+
from ...core import get_logger
|
12
|
+
|
13
|
+
|
14
|
+
class DataHandler(ABC):
|
15
|
+
"""
|
16
|
+
Abstract Base Class for data handlers.
|
17
|
+
|
18
|
+
Defines the common interface required by the ParqV application
|
19
|
+
to interact with different data file formats.
|
20
|
+
"""
|
21
|
+
|
22
|
+
def __init__(self, file_path: Path):
|
23
|
+
"""
|
24
|
+
Initialize the handler with the file path.
|
25
|
+
|
26
|
+
Subclasses should open the file or set up necessary resources here.
|
27
|
+
|
28
|
+
Args:
|
29
|
+
file_path: Path to the data file.
|
30
|
+
|
31
|
+
Raises:
|
32
|
+
DataHandlerError: If initialization fails (e.g., file not found, format error).
|
33
|
+
"""
|
34
|
+
self.file_path = file_path
|
35
|
+
self.logger = get_logger(f"{self.__class__.__module__}.{self.__class__.__name__}")
|
36
|
+
|
37
|
+
@abstractmethod
|
38
|
+
def close(self) -> None:
|
39
|
+
"""
|
40
|
+
Close any open resources (files, connections, etc.).
|
41
|
+
|
42
|
+
Must be implemented by subclasses.
|
43
|
+
"""
|
44
|
+
pass
|
45
|
+
|
46
|
+
@abstractmethod
|
47
|
+
def get_metadata_summary(self) -> Dict[str, Any]:
|
48
|
+
"""
|
49
|
+
Get a dictionary containing summary metadata about the data source.
|
50
|
+
|
51
|
+
Keys should be human-readable strings. Values can be of various types.
|
52
|
+
Should include an 'error' key if metadata retrieval fails.
|
53
|
+
|
54
|
+
Returns:
|
55
|
+
A dictionary with metadata summary or an error dictionary.
|
56
|
+
"""
|
57
|
+
pass
|
58
|
+
|
59
|
+
@abstractmethod
|
60
|
+
def get_schema_data(self) -> Optional[List[Dict[str, Any]]]:
|
61
|
+
"""
|
62
|
+
Get the schema as a list of dictionaries.
|
63
|
+
|
64
|
+
Each dictionary should represent a column and ideally contain keys:
|
65
|
+
- 'name' (str): Column name.
|
66
|
+
- 'type' (str): Formatted data type string.
|
67
|
+
- 'nullable' (Any): Indicator of nullability (e.g., bool, str "YES"/"NO").
|
68
|
+
|
69
|
+
Returns:
|
70
|
+
A list of schema dictionaries, an empty list if no columns,
|
71
|
+
or None if schema retrieval failed.
|
72
|
+
"""
|
73
|
+
pass
|
74
|
+
|
75
|
+
@abstractmethod
|
76
|
+
def get_data_preview(self, num_rows: int = 50) -> Optional[pd.DataFrame]:
|
77
|
+
"""
|
78
|
+
Fetch a preview of the data.
|
79
|
+
|
80
|
+
Args:
|
81
|
+
num_rows: The maximum number of rows to fetch.
|
82
|
+
|
83
|
+
Returns:
|
84
|
+
A pandas DataFrame with preview data, an empty DataFrame if no data,
|
85
|
+
a DataFrame with an 'error' column on failure, or None on critical failure.
|
86
|
+
"""
|
87
|
+
pass
|
88
|
+
|
89
|
+
@abstractmethod
|
90
|
+
def get_column_stats(self, column_name: str) -> Dict[str, Any]:
|
91
|
+
"""
|
92
|
+
Calculate and return statistics for a specific column.
|
93
|
+
|
94
|
+
The returned dictionary should ideally contain keys like:
|
95
|
+
- 'column' (str): Column name.
|
96
|
+
- 'type' (str): Formatted data type string.
|
97
|
+
- 'nullable' (Any): Nullability indicator.
|
98
|
+
- 'calculated' (Dict[str, Any]): Dictionary of computed statistics.
|
99
|
+
- 'error' (Optional[str]): Error message if calculation failed.
|
100
|
+
- 'message' (Optional[str]): Informational message.
|
101
|
+
|
102
|
+
Args:
|
103
|
+
column_name: The name of the column.
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
A dictionary containing column statistics or error information.
|
107
|
+
"""
|
108
|
+
pass
|
109
|
+
|
110
|
+
def format_size(self, num_bytes: int) -> str:
|
111
|
+
"""
|
112
|
+
Format bytes into a human-readable string.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
num_bytes: Number of bytes to format
|
116
|
+
|
117
|
+
Returns:
|
118
|
+
Human-readable size string
|
119
|
+
"""
|
120
|
+
if num_bytes < 1024:
|
121
|
+
return f"{num_bytes} bytes"
|
122
|
+
elif num_bytes < 1024 ** 2:
|
123
|
+
return f"{num_bytes / 1024:.1f} KB"
|
124
|
+
elif num_bytes < 1024 ** 3:
|
125
|
+
return f"{num_bytes / 1024 ** 2:.1f} MB"
|
126
|
+
else:
|
127
|
+
return f"{num_bytes / 1024 ** 3:.1f} GB"
|
128
|
+
|
129
|
+
def __enter__(self):
|
130
|
+
"""Enter the runtime context related to this object."""
|
131
|
+
return self
|
132
|
+
|
133
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
134
|
+
"""Exit the runtime context related to this object, ensuring cleanup."""
|
135
|
+
self.close()
|
136
|
+
|
137
|
+
def __del__(self):
|
138
|
+
"""Attempt to close the handler when the object is garbage collected (best effort)."""
|
139
|
+
try:
|
140
|
+
self.close()
|
141
|
+
except Exception:
|
142
|
+
# Ignore exceptions during garbage collection
|
143
|
+
pass
|
@@ -0,0 +1,16 @@
|
|
1
|
+
"""
|
2
|
+
Format-specific data handlers for parqv.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from .parquet import ParquetHandler, ParquetHandlerError
|
6
|
+
from .json import JsonHandler, JsonHandlerError
|
7
|
+
|
8
|
+
__all__ = [
|
9
|
+
# Parquet format
|
10
|
+
"ParquetHandler",
|
11
|
+
"ParquetHandlerError",
|
12
|
+
|
13
|
+
# JSON format
|
14
|
+
"JsonHandler",
|
15
|
+
"JsonHandlerError",
|
16
|
+
]
|