parqv 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. parqv/__init__.py +31 -0
  2. parqv/app.py +97 -78
  3. parqv/cli.py +112 -0
  4. parqv/core/__init__.py +31 -0
  5. parqv/core/config.py +25 -0
  6. parqv/core/file_utils.py +88 -0
  7. parqv/core/handler_factory.py +89 -0
  8. parqv/core/logging.py +46 -0
  9. parqv/data_sources/__init__.py +44 -0
  10. parqv/data_sources/base/__init__.py +28 -0
  11. parqv/data_sources/base/exceptions.py +38 -0
  12. parqv/data_sources/base/handler.py +143 -0
  13. parqv/data_sources/formats/__init__.py +16 -0
  14. parqv/data_sources/formats/json.py +449 -0
  15. parqv/data_sources/formats/parquet.py +624 -0
  16. parqv/views/__init__.py +38 -0
  17. parqv/views/base.py +98 -0
  18. parqv/views/components/__init__.py +13 -0
  19. parqv/views/components/enhanced_data_table.py +152 -0
  20. parqv/views/components/error_display.py +72 -0
  21. parqv/views/components/loading_display.py +44 -0
  22. parqv/views/data_view.py +119 -46
  23. parqv/views/metadata_view.py +57 -13
  24. parqv/views/schema_view.py +197 -148
  25. parqv/views/utils/__init__.py +13 -0
  26. parqv/views/utils/data_formatters.py +162 -0
  27. parqv/views/utils/stats_formatters.py +160 -0
  28. parqv-0.2.1.dist-info/METADATA +104 -0
  29. parqv-0.2.1.dist-info/RECORD +34 -0
  30. {parqv-0.1.0.dist-info → parqv-0.2.1.dist-info}/WHEEL +1 -1
  31. parqv/parquet_handler.py +0 -389
  32. parqv/views/row_group_view.py +0 -33
  33. parqv-0.1.0.dist-info/METADATA +0 -91
  34. parqv-0.1.0.dist-info/RECORD +0 -15
  35. {parqv-0.1.0.dist-info → parqv-0.2.1.dist-info}/entry_points.txt +0 -0
  36. {parqv-0.1.0.dist-info → parqv-0.2.1.dist-info}/licenses/LICENSE +0 -0
  37. {parqv-0.1.0.dist-info → parqv-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,44 @@
1
+ """
2
+ Data sources package for parqv application.
3
+
4
+ This package provides adapters for various data file formats,
5
+ offering a unified interface for data access.
6
+ """
7
+
8
+ # Base classes and exceptions
9
+ from .base import (
10
+ DataHandler,
11
+ DataHandlerError,
12
+ DataSourceError,
13
+ FileValidationError,
14
+ UnsupportedFormatError,
15
+ DataReadError,
16
+ SchemaError,
17
+ MetadataError,
18
+ )
19
+
20
+ # Format-specific handlers
21
+ from .formats import (
22
+ ParquetHandler,
23
+ ParquetHandlerError,
24
+ JsonHandler,
25
+ JsonHandlerError,
26
+ )
27
+
28
+ __all__ = [
29
+ # Base interface and exceptions
30
+ "DataHandler",
31
+ "DataHandlerError",
32
+ "DataSourceError",
33
+ "FileValidationError",
34
+ "UnsupportedFormatError",
35
+ "DataReadError",
36
+ "SchemaError",
37
+ "MetadataError",
38
+
39
+ # Format handlers
40
+ "ParquetHandler",
41
+ "ParquetHandlerError",
42
+ "JsonHandler",
43
+ "JsonHandlerError",
44
+ ]
@@ -0,0 +1,28 @@
1
+ """
2
+ Base classes and interfaces for data sources.
3
+ """
4
+
5
+ from .handler import DataHandler
6
+ from .exceptions import (
7
+ DataSourceError,
8
+ DataHandlerError,
9
+ FileValidationError,
10
+ UnsupportedFormatError,
11
+ DataReadError,
12
+ SchemaError,
13
+ MetadataError,
14
+ )
15
+
16
+ __all__ = [
17
+ # Base handler interface
18
+ "DataHandler",
19
+
20
+ # Exception classes
21
+ "DataSourceError",
22
+ "DataHandlerError",
23
+ "FileValidationError",
24
+ "UnsupportedFormatError",
25
+ "DataReadError",
26
+ "SchemaError",
27
+ "MetadataError",
28
+ ]
@@ -0,0 +1,38 @@
1
+ """
2
+ Exception classes for data sources.
3
+ """
4
+
5
+
6
+ class DataSourceError(Exception):
7
+ """Base exception for all data source errors."""
8
+ pass
9
+
10
+
11
+ class DataHandlerError(DataSourceError):
12
+ """Base exception for all data handler errors."""
13
+ pass
14
+
15
+
16
+ class FileValidationError(DataSourceError):
17
+ """Exception raised when file validation fails."""
18
+ pass
19
+
20
+
21
+ class UnsupportedFormatError(DataSourceError):
22
+ """Exception raised when an unsupported file format is encountered."""
23
+ pass
24
+
25
+
26
+ class DataReadError(DataSourceError):
27
+ """Exception raised when data reading fails."""
28
+ pass
29
+
30
+
31
+ class SchemaError(DataSourceError):
32
+ """Exception raised when schema operations fail."""
33
+ pass
34
+
35
+
36
+ class MetadataError(DataSourceError):
37
+ """Exception raised when metadata operations fail."""
38
+ pass
@@ -0,0 +1,143 @@
1
+ """
2
+ Base data handler interface for parqv data sources.
3
+ """
4
+
5
+ from abc import ABC, abstractmethod
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ import pandas as pd
10
+
11
+ from ...core import get_logger
12
+
13
+
14
+ class DataHandler(ABC):
15
+ """
16
+ Abstract Base Class for data handlers.
17
+
18
+ Defines the common interface required by the ParqV application
19
+ to interact with different data file formats.
20
+ """
21
+
22
+ def __init__(self, file_path: Path):
23
+ """
24
+ Initialize the handler with the file path.
25
+
26
+ Subclasses should open the file or set up necessary resources here.
27
+
28
+ Args:
29
+ file_path: Path to the data file.
30
+
31
+ Raises:
32
+ DataHandlerError: If initialization fails (e.g., file not found, format error).
33
+ """
34
+ self.file_path = file_path
35
+ self.logger = get_logger(f"{self.__class__.__module__}.{self.__class__.__name__}")
36
+
37
+ @abstractmethod
38
+ def close(self) -> None:
39
+ """
40
+ Close any open resources (files, connections, etc.).
41
+
42
+ Must be implemented by subclasses.
43
+ """
44
+ pass
45
+
46
+ @abstractmethod
47
+ def get_metadata_summary(self) -> Dict[str, Any]:
48
+ """
49
+ Get a dictionary containing summary metadata about the data source.
50
+
51
+ Keys should be human-readable strings. Values can be of various types.
52
+ Should include an 'error' key if metadata retrieval fails.
53
+
54
+ Returns:
55
+ A dictionary with metadata summary or an error dictionary.
56
+ """
57
+ pass
58
+
59
+ @abstractmethod
60
+ def get_schema_data(self) -> Optional[List[Dict[str, Any]]]:
61
+ """
62
+ Get the schema as a list of dictionaries.
63
+
64
+ Each dictionary should represent a column and ideally contain keys:
65
+ - 'name' (str): Column name.
66
+ - 'type' (str): Formatted data type string.
67
+ - 'nullable' (Any): Indicator of nullability (e.g., bool, str "YES"/"NO").
68
+
69
+ Returns:
70
+ A list of schema dictionaries, an empty list if no columns,
71
+ or None if schema retrieval failed.
72
+ """
73
+ pass
74
+
75
+ @abstractmethod
76
+ def get_data_preview(self, num_rows: int = 50) -> Optional[pd.DataFrame]:
77
+ """
78
+ Fetch a preview of the data.
79
+
80
+ Args:
81
+ num_rows: The maximum number of rows to fetch.
82
+
83
+ Returns:
84
+ A pandas DataFrame with preview data, an empty DataFrame if no data,
85
+ a DataFrame with an 'error' column on failure, or None on critical failure.
86
+ """
87
+ pass
88
+
89
+ @abstractmethod
90
+ def get_column_stats(self, column_name: str) -> Dict[str, Any]:
91
+ """
92
+ Calculate and return statistics for a specific column.
93
+
94
+ The returned dictionary should ideally contain keys like:
95
+ - 'column' (str): Column name.
96
+ - 'type' (str): Formatted data type string.
97
+ - 'nullable' (Any): Nullability indicator.
98
+ - 'calculated' (Dict[str, Any]): Dictionary of computed statistics.
99
+ - 'error' (Optional[str]): Error message if calculation failed.
100
+ - 'message' (Optional[str]): Informational message.
101
+
102
+ Args:
103
+ column_name: The name of the column.
104
+
105
+ Returns:
106
+ A dictionary containing column statistics or error information.
107
+ """
108
+ pass
109
+
110
+ def format_size(self, num_bytes: int) -> str:
111
+ """
112
+ Format bytes into a human-readable string.
113
+
114
+ Args:
115
+ num_bytes: Number of bytes to format
116
+
117
+ Returns:
118
+ Human-readable size string
119
+ """
120
+ if num_bytes < 1024:
121
+ return f"{num_bytes} bytes"
122
+ elif num_bytes < 1024 ** 2:
123
+ return f"{num_bytes / 1024:.1f} KB"
124
+ elif num_bytes < 1024 ** 3:
125
+ return f"{num_bytes / 1024 ** 2:.1f} MB"
126
+ else:
127
+ return f"{num_bytes / 1024 ** 3:.1f} GB"
128
+
129
+ def __enter__(self):
130
+ """Enter the runtime context related to this object."""
131
+ return self
132
+
133
+ def __exit__(self, exc_type, exc_val, exc_tb):
134
+ """Exit the runtime context related to this object, ensuring cleanup."""
135
+ self.close()
136
+
137
+ def __del__(self):
138
+ """Attempt to close the handler when the object is garbage collected (best effort)."""
139
+ try:
140
+ self.close()
141
+ except Exception:
142
+ # Ignore exceptions during garbage collection
143
+ pass
@@ -0,0 +1,16 @@
1
+ """
2
+ Format-specific data handlers for parqv.
3
+ """
4
+
5
+ from .parquet import ParquetHandler, ParquetHandlerError
6
+ from .json import JsonHandler, JsonHandlerError
7
+
8
+ __all__ = [
9
+ # Parquet format
10
+ "ParquetHandler",
11
+ "ParquetHandlerError",
12
+
13
+ # JSON format
14
+ "JsonHandler",
15
+ "JsonHandlerError",
16
+ ]