parqv 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parqv/__init__.py +31 -0
- parqv/app.py +84 -102
- parqv/cli.py +112 -0
- parqv/core/__init__.py +31 -0
- parqv/core/config.py +25 -0
- parqv/core/file_utils.py +88 -0
- parqv/core/handler_factory.py +89 -0
- parqv/core/logging.py +46 -0
- parqv/data_sources/__init__.py +44 -0
- parqv/data_sources/base/__init__.py +28 -0
- parqv/data_sources/base/exceptions.py +38 -0
- parqv/{handlers/base_handler.py → data_sources/base/handler.py} +54 -25
- parqv/{handlers → data_sources/formats}/__init__.py +8 -5
- parqv/{handlers → data_sources/formats}/json.py +31 -32
- parqv/{handlers → data_sources/formats}/parquet.py +40 -56
- parqv/views/__init__.py +38 -0
- parqv/views/base.py +98 -0
- parqv/views/components/__init__.py +13 -0
- parqv/views/components/enhanced_data_table.py +152 -0
- parqv/views/components/error_display.py +72 -0
- parqv/views/components/loading_display.py +44 -0
- parqv/views/data_view.py +119 -46
- parqv/views/metadata_view.py +57 -20
- parqv/views/schema_view.py +190 -200
- parqv/views/utils/__init__.py +13 -0
- parqv/views/utils/data_formatters.py +162 -0
- parqv/views/utils/stats_formatters.py +160 -0
- {parqv-0.2.0.dist-info → parqv-0.2.1.dist-info}/METADATA +2 -2
- parqv-0.2.1.dist-info/RECORD +34 -0
- {parqv-0.2.0.dist-info → parqv-0.2.1.dist-info}/WHEEL +1 -1
- parqv-0.2.0.dist-info/RECORD +0 -17
- {parqv-0.2.0.dist-info → parqv-0.2.1.dist-info}/entry_points.txt +0 -0
- {parqv-0.2.0.dist-info → parqv-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {parqv-0.2.0.dist-info → parqv-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,44 @@
|
|
1
|
+
"""
|
2
|
+
Data sources package for parqv application.
|
3
|
+
|
4
|
+
This package provides adapters for various data file formats,
|
5
|
+
offering a unified interface for data access.
|
6
|
+
"""
|
7
|
+
|
8
|
+
# Base classes and exceptions
|
9
|
+
from .base import (
|
10
|
+
DataHandler,
|
11
|
+
DataHandlerError,
|
12
|
+
DataSourceError,
|
13
|
+
FileValidationError,
|
14
|
+
UnsupportedFormatError,
|
15
|
+
DataReadError,
|
16
|
+
SchemaError,
|
17
|
+
MetadataError,
|
18
|
+
)
|
19
|
+
|
20
|
+
# Format-specific handlers
|
21
|
+
from .formats import (
|
22
|
+
ParquetHandler,
|
23
|
+
ParquetHandlerError,
|
24
|
+
JsonHandler,
|
25
|
+
JsonHandlerError,
|
26
|
+
)
|
27
|
+
|
28
|
+
__all__ = [
|
29
|
+
# Base interface and exceptions
|
30
|
+
"DataHandler",
|
31
|
+
"DataHandlerError",
|
32
|
+
"DataSourceError",
|
33
|
+
"FileValidationError",
|
34
|
+
"UnsupportedFormatError",
|
35
|
+
"DataReadError",
|
36
|
+
"SchemaError",
|
37
|
+
"MetadataError",
|
38
|
+
|
39
|
+
# Format handlers
|
40
|
+
"ParquetHandler",
|
41
|
+
"ParquetHandlerError",
|
42
|
+
"JsonHandler",
|
43
|
+
"JsonHandlerError",
|
44
|
+
]
|
@@ -0,0 +1,28 @@
|
|
1
|
+
"""
|
2
|
+
Base classes and interfaces for data sources.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from .handler import DataHandler
|
6
|
+
from .exceptions import (
|
7
|
+
DataSourceError,
|
8
|
+
DataHandlerError,
|
9
|
+
FileValidationError,
|
10
|
+
UnsupportedFormatError,
|
11
|
+
DataReadError,
|
12
|
+
SchemaError,
|
13
|
+
MetadataError,
|
14
|
+
)
|
15
|
+
|
16
|
+
__all__ = [
|
17
|
+
# Base handler interface
|
18
|
+
"DataHandler",
|
19
|
+
|
20
|
+
# Exception classes
|
21
|
+
"DataSourceError",
|
22
|
+
"DataHandlerError",
|
23
|
+
"FileValidationError",
|
24
|
+
"UnsupportedFormatError",
|
25
|
+
"DataReadError",
|
26
|
+
"SchemaError",
|
27
|
+
"MetadataError",
|
28
|
+
]
|
@@ -0,0 +1,38 @@
|
|
1
|
+
"""
|
2
|
+
Exception classes for data sources.
|
3
|
+
"""
|
4
|
+
|
5
|
+
|
6
|
+
class DataSourceError(Exception):
|
7
|
+
"""Base exception for all data source errors."""
|
8
|
+
pass
|
9
|
+
|
10
|
+
|
11
|
+
class DataHandlerError(DataSourceError):
|
12
|
+
"""Base exception for all data handler errors."""
|
13
|
+
pass
|
14
|
+
|
15
|
+
|
16
|
+
class FileValidationError(DataSourceError):
|
17
|
+
"""Exception raised when file validation fails."""
|
18
|
+
pass
|
19
|
+
|
20
|
+
|
21
|
+
class UnsupportedFormatError(DataSourceError):
|
22
|
+
"""Exception raised when an unsupported file format is encountered."""
|
23
|
+
pass
|
24
|
+
|
25
|
+
|
26
|
+
class DataReadError(DataSourceError):
|
27
|
+
"""Exception raised when data reading fails."""
|
28
|
+
pass
|
29
|
+
|
30
|
+
|
31
|
+
class SchemaError(DataSourceError):
|
32
|
+
"""Exception raised when schema operations fail."""
|
33
|
+
pass
|
34
|
+
|
35
|
+
|
36
|
+
class MetadataError(DataSourceError):
|
37
|
+
"""Exception raised when metadata operations fail."""
|
38
|
+
pass
|
@@ -1,28 +1,28 @@
|
|
1
|
-
|
1
|
+
"""
|
2
|
+
Base data handler interface for parqv data sources.
|
3
|
+
"""
|
4
|
+
|
2
5
|
from abc import ABC, abstractmethod
|
3
6
|
from pathlib import Path
|
4
7
|
from typing import Any, Dict, List, Optional
|
5
8
|
|
6
9
|
import pandas as pd
|
7
10
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
class DataHandlerError(Exception):
|
12
|
-
"""Base exception for all data handler errors."""
|
13
|
-
pass
|
11
|
+
from ...core import get_logger
|
14
12
|
|
15
13
|
|
16
14
|
class DataHandler(ABC):
|
17
15
|
"""
|
18
16
|
Abstract Base Class for data handlers.
|
17
|
+
|
19
18
|
Defines the common interface required by the ParqV application
|
20
19
|
to interact with different data file formats.
|
21
20
|
"""
|
22
21
|
|
23
22
|
def __init__(self, file_path: Path):
|
24
23
|
"""
|
25
|
-
|
24
|
+
Initialize the handler with the file path.
|
25
|
+
|
26
26
|
Subclasses should open the file or set up necessary resources here.
|
27
27
|
|
28
28
|
Args:
|
@@ -32,11 +32,13 @@ class DataHandler(ABC):
|
|
32
32
|
DataHandlerError: If initialization fails (e.g., file not found, format error).
|
33
33
|
"""
|
34
34
|
self.file_path = file_path
|
35
|
+
self.logger = get_logger(f"{self.__class__.__module__}.{self.__class__.__name__}")
|
35
36
|
|
36
37
|
@abstractmethod
|
37
38
|
def close(self) -> None:
|
38
39
|
"""
|
39
|
-
|
40
|
+
Close any open resources (files, connections, etc.).
|
41
|
+
|
40
42
|
Must be implemented by subclasses.
|
41
43
|
"""
|
42
44
|
pass
|
@@ -44,7 +46,8 @@ class DataHandler(ABC):
|
|
44
46
|
@abstractmethod
|
45
47
|
def get_metadata_summary(self) -> Dict[str, Any]:
|
46
48
|
"""
|
47
|
-
|
49
|
+
Get a dictionary containing summary metadata about the data source.
|
50
|
+
|
48
51
|
Keys should be human-readable strings. Values can be of various types.
|
49
52
|
Should include an 'error' key if metadata retrieval fails.
|
50
53
|
|
@@ -54,13 +57,14 @@ class DataHandler(ABC):
|
|
54
57
|
pass
|
55
58
|
|
56
59
|
@abstractmethod
|
57
|
-
def get_schema_data(self) -> Optional[List[Dict[str,
|
60
|
+
def get_schema_data(self) -> Optional[List[Dict[str, Any]]]:
|
58
61
|
"""
|
59
|
-
|
62
|
+
Get the schema as a list of dictionaries.
|
63
|
+
|
60
64
|
Each dictionary should represent a column and ideally contain keys:
|
61
|
-
'name' (str): Column name.
|
62
|
-
'type' (str): Formatted data type string.
|
63
|
-
'nullable' (Any): Indicator of nullability (e.g., bool, str "YES"/"NO").
|
65
|
+
- 'name' (str): Column name.
|
66
|
+
- 'type' (str): Formatted data type string.
|
67
|
+
- 'nullable' (Any): Indicator of nullability (e.g., bool, str "YES"/"NO").
|
64
68
|
|
65
69
|
Returns:
|
66
70
|
A list of schema dictionaries, an empty list if no columns,
|
@@ -71,7 +75,7 @@ class DataHandler(ABC):
|
|
71
75
|
@abstractmethod
|
72
76
|
def get_data_preview(self, num_rows: int = 50) -> Optional[pd.DataFrame]:
|
73
77
|
"""
|
74
|
-
|
78
|
+
Fetch a preview of the data.
|
75
79
|
|
76
80
|
Args:
|
77
81
|
num_rows: The maximum number of rows to fetch.
|
@@ -85,14 +89,15 @@ class DataHandler(ABC):
|
|
85
89
|
@abstractmethod
|
86
90
|
def get_column_stats(self, column_name: str) -> Dict[str, Any]:
|
87
91
|
"""
|
88
|
-
|
92
|
+
Calculate and return statistics for a specific column.
|
93
|
+
|
89
94
|
The returned dictionary should ideally contain keys like:
|
90
|
-
'column' (str): Column name.
|
91
|
-
'type' (str): Formatted data type string.
|
92
|
-
'nullable' (Any): Nullability indicator.
|
93
|
-
'calculated' (Dict[str, Any]): Dictionary of computed statistics.
|
94
|
-
'error' (Optional[str]): Error message if calculation failed.
|
95
|
-
'message' (Optional[str]): Informational message.
|
95
|
+
- 'column' (str): Column name.
|
96
|
+
- 'type' (str): Formatted data type string.
|
97
|
+
- 'nullable' (Any): Nullability indicator.
|
98
|
+
- 'calculated' (Dict[str, Any]): Dictionary of computed statistics.
|
99
|
+
- 'error' (Optional[str]): Error message if calculation failed.
|
100
|
+
- 'message' (Optional[str]): Informational message.
|
96
101
|
|
97
102
|
Args:
|
98
103
|
column_name: The name of the column.
|
@@ -102,8 +107,16 @@ class DataHandler(ABC):
|
|
102
107
|
"""
|
103
108
|
pass
|
104
109
|
|
105
|
-
def
|
106
|
-
"""
|
110
|
+
def format_size(self, num_bytes: int) -> str:
|
111
|
+
"""
|
112
|
+
Format bytes into a human-readable string.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
num_bytes: Number of bytes to format
|
116
|
+
|
117
|
+
Returns:
|
118
|
+
Human-readable size string
|
119
|
+
"""
|
107
120
|
if num_bytes < 1024:
|
108
121
|
return f"{num_bytes} bytes"
|
109
122
|
elif num_bytes < 1024 ** 2:
|
@@ -112,3 +125,19 @@ class DataHandler(ABC):
|
|
112
125
|
return f"{num_bytes / 1024 ** 2:.1f} MB"
|
113
126
|
else:
|
114
127
|
return f"{num_bytes / 1024 ** 3:.1f} GB"
|
128
|
+
|
129
|
+
def __enter__(self):
|
130
|
+
"""Enter the runtime context related to this object."""
|
131
|
+
return self
|
132
|
+
|
133
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
134
|
+
"""Exit the runtime context related to this object, ensuring cleanup."""
|
135
|
+
self.close()
|
136
|
+
|
137
|
+
def __del__(self):
|
138
|
+
"""Attempt to close the handler when the object is garbage collected (best effort)."""
|
139
|
+
try:
|
140
|
+
self.close()
|
141
|
+
except Exception:
|
142
|
+
# Ignore exceptions during garbage collection
|
143
|
+
pass
|
@@ -1,13 +1,16 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
"""
|
2
|
+
Format-specific data handlers for parqv.
|
3
|
+
"""
|
4
|
+
|
3
5
|
from .parquet import ParquetHandler, ParquetHandlerError
|
4
6
|
from .json import JsonHandler, JsonHandlerError
|
5
7
|
|
6
8
|
__all__ = [
|
7
|
-
|
8
|
-
"DataHandlerError",
|
9
|
+
# Parquet format
|
9
10
|
"ParquetHandler",
|
10
11
|
"ParquetHandlerError",
|
12
|
+
|
13
|
+
# JSON format
|
11
14
|
"JsonHandler",
|
12
15
|
"JsonHandlerError",
|
13
|
-
]
|
16
|
+
]
|
@@ -1,13 +1,10 @@
|
|
1
|
-
import logging
|
2
1
|
from pathlib import Path
|
3
2
|
from typing import Any, Dict, List, Optional, Tuple
|
4
3
|
|
5
4
|
import duckdb
|
6
5
|
import pandas as pd
|
7
6
|
|
8
|
-
from
|
9
|
-
|
10
|
-
log = logging.getLogger(__name__)
|
7
|
+
from ..base import DataHandler, DataHandlerError
|
11
8
|
|
12
9
|
|
13
10
|
class JsonHandlerError(DataHandlerError):
|
@@ -38,6 +35,8 @@ class JsonHandler(DataHandler):
|
|
38
35
|
JsonHandlerError: If the file doesn't exist, isn't a file, or if
|
39
36
|
initialization fails (e.g., DuckDB connection, view creation).
|
40
37
|
"""
|
38
|
+
super().__init__(file_path)
|
39
|
+
|
41
40
|
self.file_path = self._validate_file_path(file_path)
|
42
41
|
self._db_conn: Optional[duckdb.DuckDBPyConnection] = None
|
43
42
|
self._view_name: str = self.DEFAULT_VIEW_NAME
|
@@ -48,9 +47,9 @@ class JsonHandler(DataHandler):
|
|
48
47
|
self._connect_db()
|
49
48
|
self._create_duckdb_view()
|
50
49
|
self._load_metadata()
|
51
|
-
|
50
|
+
self.logger.info(f"JsonHandler initialized successfully for: {self.file_path}")
|
52
51
|
except Exception as e:
|
53
|
-
|
52
|
+
self.logger.exception(f"Error during JsonHandler initialization for {self.file_path}")
|
54
53
|
self.close()
|
55
54
|
if isinstance(e, JsonHandlerError):
|
56
55
|
raise
|
@@ -67,9 +66,9 @@ class JsonHandler(DataHandler):
|
|
67
66
|
"""Establishes a connection to an in-memory DuckDB database."""
|
68
67
|
try:
|
69
68
|
self._db_conn = duckdb.connect(database=':memory:', read_only=False)
|
70
|
-
|
69
|
+
self.logger.debug("DuckDB in-memory connection established.")
|
71
70
|
except Exception as e:
|
72
|
-
|
71
|
+
self.logger.exception("Failed to initialize DuckDB connection.")
|
73
72
|
raise JsonHandlerError(f"DuckDB connection failed: {e}") from e
|
74
73
|
|
75
74
|
def _create_duckdb_view(self):
|
@@ -83,9 +82,9 @@ class JsonHandler(DataHandler):
|
|
83
82
|
|
84
83
|
try:
|
85
84
|
self._db_conn.sql(load_query)
|
86
|
-
|
85
|
+
self.logger.debug(f"DuckDB view '{self._view_name}' created for file '{file_path_str}'.")
|
87
86
|
except duckdb.Error as db_err:
|
88
|
-
|
87
|
+
self.logger.exception(f"DuckDB Error creating view '{self._view_name}' from '{file_path_str}': {db_err}")
|
89
88
|
if "Could not open file" in str(db_err):
|
90
89
|
raise JsonHandlerError(
|
91
90
|
f"DuckDB could not open file: {file_path_str}. Check permissions or path. Error: {db_err}") from db_err
|
@@ -95,7 +94,7 @@ class JsonHandler(DataHandler):
|
|
95
94
|
else:
|
96
95
|
raise JsonHandlerError(f"DuckDB failed create view for JSON file: {db_err}") from db_err
|
97
96
|
except Exception as e:
|
98
|
-
|
97
|
+
self.logger.exception(f"Unexpected error creating DuckDB view '{self._view_name}'.")
|
99
98
|
raise JsonHandlerError(f"Failed to create DuckDB view: {e}") from e
|
100
99
|
|
101
100
|
def _load_metadata(self):
|
@@ -108,27 +107,27 @@ class JsonHandler(DataHandler):
|
|
108
107
|
describe_query = f"DESCRIBE \"{self._view_name}\";"
|
109
108
|
schema_result = self._db_conn.sql(describe_query).fetchall()
|
110
109
|
self._schema = self._parse_schema(schema_result)
|
111
|
-
|
110
|
+
self.logger.debug(f"Schema fetched for view '{self._view_name}': {len(self._schema)} columns.")
|
112
111
|
|
113
112
|
# Fetch Row Count
|
114
113
|
count_query = f"SELECT COUNT(*) FROM \"{self._view_name}\";"
|
115
114
|
count_result = self._db_conn.sql(count_query).fetchone()
|
116
115
|
self._row_count = count_result[0] if count_result else 0
|
117
|
-
|
116
|
+
self.logger.debug(f"Row count fetched for view '{self._view_name}': {self._row_count}")
|
118
117
|
|
119
118
|
except duckdb.Error as db_err:
|
120
|
-
|
119
|
+
self.logger.exception(f"DuckDB Error fetching metadata for view '{self._view_name}': {db_err}")
|
121
120
|
self._schema = None
|
122
121
|
self._row_count = None
|
123
122
|
except Exception as e:
|
124
|
-
|
123
|
+
self.logger.exception(f"Unexpected error fetching metadata for view '{self._view_name}'")
|
125
124
|
self._schema = None
|
126
125
|
self._row_count = None
|
127
126
|
|
128
127
|
def _parse_schema(self, describe_output: List[Tuple]) -> List[Dict[str, Any]]:
|
129
128
|
"""Parses the output of DuckDB's DESCRIBE query."""
|
130
129
|
if not describe_output:
|
131
|
-
|
130
|
+
self.logger.warning(f"DESCRIBE query for view '{self._view_name}' returned no schema info.")
|
132
131
|
return []
|
133
132
|
|
134
133
|
parsed_schema = []
|
@@ -141,7 +140,7 @@ class JsonHandler(DataHandler):
|
|
141
140
|
is_nullable = null_str.upper() == 'YES'
|
142
141
|
parsed_schema.append({"name": name, "type": type_str, "nullable": is_nullable})
|
143
142
|
else:
|
144
|
-
|
143
|
+
self.logger.warning(f"Unexpected format in DESCRIBE output row: {row}")
|
145
144
|
return parsed_schema
|
146
145
|
|
147
146
|
def get_schema_data(self) -> Optional[List[Dict[str, Any]]]:
|
@@ -153,7 +152,7 @@ class JsonHandler(DataHandler):
|
|
153
152
|
or None if schema couldn't be fetched.
|
154
153
|
"""
|
155
154
|
if self._schema is None:
|
156
|
-
|
155
|
+
self.logger.warning("Schema is unavailable. It might not have been fetched successfully.")
|
157
156
|
return self._schema
|
158
157
|
|
159
158
|
def get_metadata_summary(self) -> Dict[str, Any]:
|
@@ -184,7 +183,7 @@ class JsonHandler(DataHandler):
|
|
184
183
|
try:
|
185
184
|
summary["Size"] = f"{self.file_path.stat().st_size:,} bytes"
|
186
185
|
except Exception as e:
|
187
|
-
|
186
|
+
self.logger.warning(f"Could not get file size for {self.file_path}: {e}")
|
188
187
|
summary["Size"] = "N/A"
|
189
188
|
|
190
189
|
return summary
|
@@ -202,13 +201,13 @@ class JsonHandler(DataHandler):
|
|
202
201
|
error message if fetching fails.
|
203
202
|
"""
|
204
203
|
if not self._db_conn:
|
205
|
-
|
204
|
+
self.logger.warning("Data preview unavailable: DuckDB connection is closed or uninitialized.")
|
206
205
|
return pd.DataFrame({"error": ["DuckDB connection not available."]})
|
207
206
|
if self._schema is None:
|
208
|
-
|
207
|
+
self.logger.warning("Data preview unavailable: Schema couldn't be determined.")
|
209
208
|
return pd.DataFrame({"error": ["Schema not available, cannot fetch preview."]})
|
210
209
|
if self._row_count == 0:
|
211
|
-
|
210
|
+
self.logger.info("Data preview: Source JSON view is empty.")
|
212
211
|
# Return empty DataFrame with correct columns if possible
|
213
212
|
if self._schema:
|
214
213
|
return pd.DataFrame(columns=[col['name'] for col in self._schema])
|
@@ -221,10 +220,10 @@ class JsonHandler(DataHandler):
|
|
221
220
|
df = self._db_conn.sql(preview_query).df()
|
222
221
|
return df
|
223
222
|
except duckdb.Error as db_err:
|
224
|
-
|
223
|
+
self.logger.exception(f"DuckDB error getting data preview from '{self._view_name}': {db_err}")
|
225
224
|
return pd.DataFrame({"error": [f"DuckDB error fetching preview: {db_err}"]})
|
226
225
|
except Exception as e:
|
227
|
-
|
226
|
+
self.logger.exception(f"Unexpected error getting data preview from '{self._view_name}'")
|
228
227
|
return pd.DataFrame({"error": [f"Failed to fetch preview: {e}"]})
|
229
228
|
|
230
229
|
def _get_column_info(self, column_name: str) -> Optional[Dict[str, Any]]:
|
@@ -274,7 +273,7 @@ class JsonHandler(DataHandler):
|
|
274
273
|
|
275
274
|
if is_complex:
|
276
275
|
# Use basic counts for complex types as SUMMARIZE is less informative
|
277
|
-
|
276
|
+
self.logger.debug(f"Calculating basic counts for complex type column: {column_name}")
|
278
277
|
stats = self._get_basic_column_counts(safe_column_name)
|
279
278
|
message = f"Only basic counts calculated for complex type '{col_type}'."
|
280
279
|
# Attempt distinct count for complex types (can be slow/error-prone)
|
@@ -286,13 +285,13 @@ class JsonHandler(DataHandler):
|
|
286
285
|
else:
|
287
286
|
stats["Distinct Count"] = "N/A" # Or 0 if appropriate
|
288
287
|
except duckdb.Error as distinct_err:
|
289
|
-
|
288
|
+
self.logger.warning(
|
290
289
|
f"Could not calculate distinct count for complex column '{column_name}': {distinct_err}")
|
291
290
|
stats["Distinct Count"] = "Error"
|
292
291
|
|
293
292
|
else:
|
294
293
|
# Use SUMMARIZE for non-complex types
|
295
|
-
|
294
|
+
self.logger.debug(f"Using SUMMARIZE for simple type column: {column_name}")
|
296
295
|
summarize_query = f"SUMMARIZE SELECT {safe_column_name} FROM \"{self._view_name}\";"
|
297
296
|
summarize_df = self._db_conn.sql(summarize_query).df()
|
298
297
|
|
@@ -305,10 +304,10 @@ class JsonHandler(DataHandler):
|
|
305
304
|
stats = self._format_summarize_stats(summarize_df.iloc[0])
|
306
305
|
|
307
306
|
except duckdb.Error as db_err:
|
308
|
-
|
307
|
+
self.logger.exception(f"DuckDB Error calculating statistics for column '{column_name}': {db_err}")
|
309
308
|
error_msg = f"DuckDB calculation failed: {db_err}"
|
310
309
|
except Exception as e:
|
311
|
-
|
310
|
+
self.logger.exception(f"Unexpected error calculating statistics for column '{column_name}'")
|
312
311
|
error_msg = f"Calculation failed unexpectedly: {e}"
|
313
312
|
|
314
313
|
return self._create_stats_result(
|
@@ -351,7 +350,7 @@ class JsonHandler(DataHandler):
|
|
351
350
|
stats["Null Percentage"] = "Error"
|
352
351
|
|
353
352
|
except duckdb.Error as db_err:
|
354
|
-
|
353
|
+
self.logger.warning(f"Failed to get basic counts for {safe_column_name}: {db_err}")
|
355
354
|
stats["Counts Error"] = str(db_err)
|
356
355
|
return stats
|
357
356
|
|
@@ -430,11 +429,11 @@ class JsonHandler(DataHandler):
|
|
430
429
|
if self._db_conn:
|
431
430
|
try:
|
432
431
|
self._db_conn.close()
|
433
|
-
|
432
|
+
self.logger.info(f"DuckDB connection closed for {self.file_path}.")
|
434
433
|
self._db_conn = None
|
435
434
|
except Exception as e:
|
436
435
|
# Log error but don't raise during close typically
|
437
|
-
|
436
|
+
self.logger.error(f"Error closing DuckDB connection for {self.file_path}: {e}")
|
438
437
|
self._db_conn = None # Assume closed even if error occurred
|
439
438
|
|
440
439
|
def __enter__(self):
|