parqv 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. parqv/__init__.py +31 -0
  2. parqv/app.py +84 -102
  3. parqv/cli.py +112 -0
  4. parqv/core/__init__.py +31 -0
  5. parqv/core/config.py +26 -0
  6. parqv/core/file_utils.py +88 -0
  7. parqv/core/handler_factory.py +90 -0
  8. parqv/core/logging.py +46 -0
  9. parqv/data_sources/__init__.py +48 -0
  10. parqv/data_sources/base/__init__.py +28 -0
  11. parqv/data_sources/base/exceptions.py +38 -0
  12. parqv/{handlers/base_handler.py → data_sources/base/handler.py} +54 -25
  13. parqv/{handlers → data_sources/formats}/__init__.py +13 -5
  14. parqv/data_sources/formats/csv.py +460 -0
  15. parqv/{handlers → data_sources/formats}/json.py +68 -32
  16. parqv/{handlers → data_sources/formats}/parquet.py +67 -56
  17. parqv/views/__init__.py +38 -0
  18. parqv/views/base.py +98 -0
  19. parqv/views/components/__init__.py +13 -0
  20. parqv/views/components/enhanced_data_table.py +152 -0
  21. parqv/views/components/error_display.py +72 -0
  22. parqv/views/components/loading_display.py +44 -0
  23. parqv/views/data_view.py +119 -46
  24. parqv/views/metadata_view.py +57 -20
  25. parqv/views/schema_view.py +190 -200
  26. parqv/views/utils/__init__.py +19 -0
  27. parqv/views/utils/data_formatters.py +184 -0
  28. parqv/views/utils/stats_formatters.py +220 -0
  29. parqv/views/utils/visualization.py +204 -0
  30. {parqv-0.2.0.dist-info → parqv-0.3.0.dist-info}/METADATA +5 -6
  31. parqv-0.3.0.dist-info/RECORD +36 -0
  32. {parqv-0.2.0.dist-info → parqv-0.3.0.dist-info}/WHEEL +1 -1
  33. parqv-0.2.0.dist-info/RECORD +0 -17
  34. {parqv-0.2.0.dist-info → parqv-0.3.0.dist-info}/entry_points.txt +0 -0
  35. {parqv-0.2.0.dist-info → parqv-0.3.0.dist-info}/licenses/LICENSE +0 -0
  36. {parqv-0.2.0.dist-info → parqv-0.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,48 @@
1
+ """
2
+ Data sources package for parqv application.
3
+
4
+ This package provides adapters for various data file formats,
5
+ offering a unified interface for data access.
6
+ """
7
+
8
+ # Base classes and exceptions
9
+ from .base import (
10
+ DataHandler,
11
+ DataHandlerError,
12
+ DataSourceError,
13
+ FileValidationError,
14
+ UnsupportedFormatError,
15
+ DataReadError,
16
+ SchemaError,
17
+ MetadataError,
18
+ )
19
+
20
+ # Format-specific handlers
21
+ from .formats import (
22
+ ParquetHandler,
23
+ ParquetHandlerError,
24
+ JsonHandler,
25
+ JsonHandlerError,
26
+ CsvHandler,
27
+ CsvHandlerError,
28
+ )
29
+
30
+ __all__ = [
31
+ # Base interface and exceptions
32
+ "DataHandler",
33
+ "DataHandlerError",
34
+ "DataSourceError",
35
+ "FileValidationError",
36
+ "UnsupportedFormatError",
37
+ "DataReadError",
38
+ "SchemaError",
39
+ "MetadataError",
40
+
41
+ # Format handlers
42
+ "ParquetHandler",
43
+ "ParquetHandlerError",
44
+ "JsonHandler",
45
+ "JsonHandlerError",
46
+ "CsvHandler",
47
+ "CsvHandlerError",
48
+ ]
@@ -0,0 +1,28 @@
1
+ """
2
+ Base classes and interfaces for data sources.
3
+ """
4
+
5
+ from .handler import DataHandler
6
+ from .exceptions import (
7
+ DataSourceError,
8
+ DataHandlerError,
9
+ FileValidationError,
10
+ UnsupportedFormatError,
11
+ DataReadError,
12
+ SchemaError,
13
+ MetadataError,
14
+ )
15
+
16
+ __all__ = [
17
+ # Base handler interface
18
+ "DataHandler",
19
+
20
+ # Exception classes
21
+ "DataSourceError",
22
+ "DataHandlerError",
23
+ "FileValidationError",
24
+ "UnsupportedFormatError",
25
+ "DataReadError",
26
+ "SchemaError",
27
+ "MetadataError",
28
+ ]
@@ -0,0 +1,38 @@
1
+ """
2
+ Exception classes for data sources.
3
+ """
4
+
5
+
6
+ class DataSourceError(Exception):
7
+ """Base exception for all data source errors."""
8
+ pass
9
+
10
+
11
+ class DataHandlerError(DataSourceError):
12
+ """Base exception for all data handler errors."""
13
+ pass
14
+
15
+
16
+ class FileValidationError(DataSourceError):
17
+ """Exception raised when file validation fails."""
18
+ pass
19
+
20
+
21
+ class UnsupportedFormatError(DataSourceError):
22
+ """Exception raised when an unsupported file format is encountered."""
23
+ pass
24
+
25
+
26
+ class DataReadError(DataSourceError):
27
+ """Exception raised when data reading fails."""
28
+ pass
29
+
30
+
31
+ class SchemaError(DataSourceError):
32
+ """Exception raised when schema operations fail."""
33
+ pass
34
+
35
+
36
+ class MetadataError(DataSourceError):
37
+ """Exception raised when metadata operations fail."""
38
+ pass
@@ -1,28 +1,28 @@
1
- import logging
1
+ """
2
+ Base data handler interface for parqv data sources.
3
+ """
4
+
2
5
  from abc import ABC, abstractmethod
3
6
  from pathlib import Path
4
7
  from typing import Any, Dict, List, Optional
5
8
 
6
9
  import pandas as pd
7
10
 
8
- log = logging.getLogger(__name__)
9
-
10
-
11
- class DataHandlerError(Exception):
12
- """Base exception for all data handler errors."""
13
- pass
11
+ from ...core import get_logger
14
12
 
15
13
 
16
14
  class DataHandler(ABC):
17
15
  """
18
16
  Abstract Base Class for data handlers.
17
+
19
18
  Defines the common interface required by the ParqV application
20
19
  to interact with different data file formats.
21
20
  """
22
21
 
23
22
  def __init__(self, file_path: Path):
24
23
  """
25
- Initializes the handler with the file path.
24
+ Initialize the handler with the file path.
25
+
26
26
  Subclasses should open the file or set up necessary resources here.
27
27
 
28
28
  Args:
@@ -32,11 +32,13 @@ class DataHandler(ABC):
32
32
  DataHandlerError: If initialization fails (e.g., file not found, format error).
33
33
  """
34
34
  self.file_path = file_path
35
+ self.logger = get_logger(f"{self.__class__.__module__}.{self.__class__.__name__}")
35
36
 
36
37
  @abstractmethod
37
38
  def close(self) -> None:
38
39
  """
39
- Closes any open resources (files, connections, etc.).
40
+ Close any open resources (files, connections, etc.).
41
+
40
42
  Must be implemented by subclasses.
41
43
  """
42
44
  pass
@@ -44,7 +46,8 @@ class DataHandler(ABC):
44
46
  @abstractmethod
45
47
  def get_metadata_summary(self) -> Dict[str, Any]:
46
48
  """
47
- Returns a dictionary containing summary metadata about the data source.
49
+ Get a dictionary containing summary metadata about the data source.
50
+
48
51
  Keys should be human-readable strings. Values can be of various types.
49
52
  Should include an 'error' key if metadata retrieval fails.
50
53
 
@@ -54,13 +57,14 @@ class DataHandler(ABC):
54
57
  pass
55
58
 
56
59
  @abstractmethod
57
- def get_schema_data(self) -> Optional[List[Dict[str, str]]]:
60
+ def get_schema_data(self) -> Optional[List[Dict[str, Any]]]:
58
61
  """
59
- Returns the schema as a list of dictionaries.
62
+ Get the schema as a list of dictionaries.
63
+
60
64
  Each dictionary should represent a column and ideally contain keys:
61
- 'name' (str): Column name.
62
- 'type' (str): Formatted data type string.
63
- 'nullable' (Any): Indicator of nullability (e.g., bool, str "YES"/"NO").
65
+ - 'name' (str): Column name.
66
+ - 'type' (str): Formatted data type string.
67
+ - 'nullable' (Any): Indicator of nullability (e.g., bool, str "YES"/"NO").
64
68
 
65
69
  Returns:
66
70
  A list of schema dictionaries, an empty list if no columns,
@@ -71,7 +75,7 @@ class DataHandler(ABC):
71
75
  @abstractmethod
72
76
  def get_data_preview(self, num_rows: int = 50) -> Optional[pd.DataFrame]:
73
77
  """
74
- Fetches a preview of the data.
78
+ Fetch a preview of the data.
75
79
 
76
80
  Args:
77
81
  num_rows: The maximum number of rows to fetch.
@@ -85,14 +89,15 @@ class DataHandler(ABC):
85
89
  @abstractmethod
86
90
  def get_column_stats(self, column_name: str) -> Dict[str, Any]:
87
91
  """
88
- Calculates and returns statistics for a specific column.
92
+ Calculate and return statistics for a specific column.
93
+
89
94
  The returned dictionary should ideally contain keys like:
90
- 'column' (str): Column name.
91
- 'type' (str): Formatted data type string.
92
- 'nullable' (Any): Nullability indicator.
93
- 'calculated' (Dict[str, Any]): Dictionary of computed statistics.
94
- 'error' (Optional[str]): Error message if calculation failed.
95
- 'message' (Optional[str]): Informational message.
95
+ - 'column' (str): Column name.
96
+ - 'type' (str): Formatted data type string.
97
+ - 'nullable' (Any): Nullability indicator.
98
+ - 'calculated' (Dict[str, Any]): Dictionary of computed statistics.
99
+ - 'error' (Optional[str]): Error message if calculation failed.
100
+ - 'message' (Optional[str]): Informational message.
96
101
 
97
102
  Args:
98
103
  column_name: The name of the column.
@@ -102,8 +107,16 @@ class DataHandler(ABC):
102
107
  """
103
108
  pass
104
109
 
105
- def _format_size(self, num_bytes: int) -> str:
106
- """Formats bytes into a human-readable string."""
110
+ def format_size(self, num_bytes: int) -> str:
111
+ """
112
+ Format bytes into a human-readable string.
113
+
114
+ Args:
115
+ num_bytes: Number of bytes to format
116
+
117
+ Returns:
118
+ Human-readable size string
119
+ """
107
120
  if num_bytes < 1024:
108
121
  return f"{num_bytes} bytes"
109
122
  elif num_bytes < 1024 ** 2:
@@ -112,3 +125,19 @@ class DataHandler(ABC):
112
125
  return f"{num_bytes / 1024 ** 2:.1f} MB"
113
126
  else:
114
127
  return f"{num_bytes / 1024 ** 3:.1f} GB"
128
+
129
+ def __enter__(self):
130
+ """Enter the runtime context related to this object."""
131
+ return self
132
+
133
+ def __exit__(self, exc_type, exc_val, exc_tb):
134
+ """Exit the runtime context related to this object, ensuring cleanup."""
135
+ self.close()
136
+
137
+ def __del__(self):
138
+ """Attempt to close the handler when the object is garbage collected (best effort)."""
139
+ try:
140
+ self.close()
141
+ except Exception:
142
+ # Ignore exceptions during garbage collection
143
+ pass
@@ -1,13 +1,21 @@
1
- # src/parqv/handlers/__init__.py
2
- from .base_handler import DataHandler, DataHandlerError
1
+ """
2
+ Format-specific data handlers for parqv.
3
+ """
4
+
3
5
  from .parquet import ParquetHandler, ParquetHandlerError
4
6
  from .json import JsonHandler, JsonHandlerError
7
+ from .csv import CsvHandler, CsvHandlerError
5
8
 
6
9
  __all__ = [
7
- "DataHandler",
8
- "DataHandlerError",
10
+ # Parquet format
9
11
  "ParquetHandler",
10
12
  "ParquetHandlerError",
13
+
14
+ # JSON format
11
15
  "JsonHandler",
12
16
  "JsonHandlerError",
13
- ]
17
+
18
+ # CSV format
19
+ "CsvHandler",
20
+ "CsvHandlerError",
21
+ ]