datablade 0.0.0__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datablade/__init__.py +49 -1
- datablade/blade.py +322 -0
- datablade/core/__init__.py +28 -7
- datablade/core/frames.py +23 -236
- datablade/core/json.py +5 -10
- datablade/core/lists.py +5 -10
- datablade/core/messages.py +23 -11
- datablade/core/strings.py +5 -43
- datablade/core/zip.py +5 -24
- datablade/dataframes/__init__.py +51 -0
- datablade/dataframes/frames.py +585 -0
- datablade/dataframes/readers.py +1367 -0
- datablade/docs/ARCHITECTURE.md +102 -0
- datablade/docs/OBJECT_REGISTRY.md +194 -0
- datablade/docs/README.md +57 -0
- datablade/docs/TESTING.md +37 -0
- datablade/docs/USAGE.md +409 -0
- datablade/docs/__init__.py +87 -0
- datablade/docs/__main__.py +6 -0
- datablade/io/__init__.py +15 -0
- datablade/io/json.py +70 -0
- datablade/io/zip.py +111 -0
- datablade/registry.py +581 -0
- datablade/sql/__init__.py +56 -0
- datablade/sql/bulk_load.py +665 -0
- datablade/sql/ddl.py +402 -0
- datablade/sql/ddl_pyarrow.py +411 -0
- datablade/sql/dialects.py +12 -0
- datablade/sql/quoting.py +44 -0
- datablade/sql/schema_spec.py +65 -0
- datablade/sql/sqlserver.py +390 -0
- datablade/utils/__init__.py +38 -0
- datablade/utils/lists.py +32 -0
- datablade/utils/logging.py +204 -0
- datablade/utils/messages.py +29 -0
- datablade/utils/strings.py +249 -0
- datablade-0.0.6.dist-info/METADATA +406 -0
- datablade-0.0.6.dist-info/RECORD +41 -0
- {datablade-0.0.0.dist-info → datablade-0.0.6.dist-info}/WHEEL +1 -1
- {datablade-0.0.0.dist-info → datablade-0.0.6.dist-info/licenses}/LICENSE +20 -20
- datablade-0.0.0.dist-info/METADATA +0 -13
- datablade-0.0.0.dist-info/RECORD +0 -13
- {datablade-0.0.0.dist-info → datablade-0.0.6.dist-info}/top_level.txt +0 -0
datablade/core/lists.py
CHANGED
|
@@ -1,10 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
if isinstance(item, list):
|
|
7
|
-
result.extend(flatten(item))
|
|
8
|
-
else:
|
|
9
|
-
result.append(item)
|
|
10
|
-
return result
|
|
1
|
+
"""Backward-compatibility re-exports for list utilities."""
|
|
2
|
+
|
|
3
|
+
from ..utils.lists import flatten # noqa: F401
|
|
4
|
+
|
|
5
|
+
__all__ = ["flatten"]
|
datablade/core/messages.py
CHANGED
|
@@ -1,11 +1,23 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
1
|
+
"""Backward-compatibility re-exports for message/logging helpers."""
|
|
2
|
+
|
|
3
|
+
from ..utils.messages import ( # noqa: F401
|
|
4
|
+
configure_logging,
|
|
5
|
+
get_logger,
|
|
6
|
+
log,
|
|
7
|
+
log_debug,
|
|
8
|
+
log_error,
|
|
9
|
+
log_info,
|
|
10
|
+
log_warning,
|
|
11
|
+
print_verbose,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"print_verbose",
|
|
16
|
+
"log",
|
|
17
|
+
"log_debug",
|
|
18
|
+
"log_info",
|
|
19
|
+
"log_warning",
|
|
20
|
+
"log_error",
|
|
21
|
+
"get_logger",
|
|
22
|
+
"configure_logging",
|
|
23
|
+
]
|
datablade/core/strings.py
CHANGED
|
@@ -1,43 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
Quote a SQL Server name string.
|
|
7
|
-
Parameters:
|
|
8
|
-
name (str): The name to quote.
|
|
9
|
-
brackets (bool): Whether to use brackets.
|
|
10
|
-
Returns:
|
|
11
|
-
str: The quoted name.
|
|
12
|
-
"""
|
|
13
|
-
if name is None:
|
|
14
|
-
print_verbose("No name provided; exiting sql_quotename.", verbose)
|
|
15
|
-
exit
|
|
16
|
-
return_value = f"{name.replace('[','').replace(']','')}"
|
|
17
|
-
if brackets:
|
|
18
|
-
return_value = f"[{return_value}]"
|
|
19
|
-
if ticks or not brackets:
|
|
20
|
-
return_value = f"'{return_value}'"
|
|
21
|
-
return return_value
|
|
22
|
-
|
|
23
|
-
def pathing(input: str | pathlib.Path, verbose: bool=False) -> pathlib.Path|None:
|
|
24
|
-
"""
|
|
25
|
-
Standardize a path string.
|
|
26
|
-
Parameters:
|
|
27
|
-
path (str): The path to standardize.
|
|
28
|
-
Returns:
|
|
29
|
-
str: The standardized path.
|
|
30
|
-
"""
|
|
31
|
-
if input is None:
|
|
32
|
-
print_verbose("No path provided; exiting pathing.", verbose)
|
|
33
|
-
exit
|
|
34
|
-
if isinstance(input, str):
|
|
35
|
-
input.replace('\\','/')
|
|
36
|
-
input = pathlib.Path(input)
|
|
37
|
-
else:
|
|
38
|
-
input = input
|
|
39
|
-
if input.exists():
|
|
40
|
-
return input
|
|
41
|
-
else:
|
|
42
|
-
print_verbose(f"Path {input} does not exist; exiting pathing.", verbose)
|
|
43
|
-
exit
|
|
1
|
+
"""Backward-compatibility re-exports for string/path utilities."""
|
|
2
|
+
|
|
3
|
+
from ..utils.strings import pathing, sql_quotename # noqa: F401
|
|
4
|
+
|
|
5
|
+
__all__ = ["sql_quotename", "pathing"]
|
datablade/core/zip.py
CHANGED
|
@@ -1,24 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
from .
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
"""Download a file from a URL and save it to a path."""
|
|
7
|
-
try:
|
|
8
|
-
print_verbose(f"Downloading {url}", verbose=verbose)
|
|
9
|
-
data = requests.get(url, **kwargs).content
|
|
10
|
-
zip_buffer = io.BytesIO(data)
|
|
11
|
-
if path is None:
|
|
12
|
-
return zip_buffer
|
|
13
|
-
else:
|
|
14
|
-
print_verbose(f"Saving data to {path}", verbose=verbose)
|
|
15
|
-
zip_buffer.seek(0)
|
|
16
|
-
with zipfile.ZipFile(zip_buffer, 'r') as zip_ref:
|
|
17
|
-
for zip_info in zip_ref.infolist():
|
|
18
|
-
extract_path = pathing(path) / zip_info.filename
|
|
19
|
-
extract_path.parent.mkdir(parents=True, exist_ok=True)
|
|
20
|
-
with open(extract_path, 'wb') as f:
|
|
21
|
-
f.write(zip_ref.read(zip_info.filename))
|
|
22
|
-
f.close()
|
|
23
|
-
except requests.exceptions.RequestException as e:
|
|
24
|
-
print_verbose(f"Error: {e}", verbose=verbose)
|
|
1
|
+
"""Backward-compatibility re-exports for IO ZIP helpers."""
|
|
2
|
+
|
|
3
|
+
from ..io.zip import get # noqa: F401
|
|
4
|
+
|
|
5
|
+
__all__ = ["get"]
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DataFrame operations and utilities for data transformation.
|
|
3
|
+
|
|
4
|
+
This module provides functions for:
|
|
5
|
+
- DataFrame column cleaning and type conversion
|
|
6
|
+
- Parquet schema generation and conversion
|
|
7
|
+
- SQL Server schema generation
|
|
8
|
+
- Memory-aware file reading with optional Polars support
|
|
9
|
+
- Chunked file reading for large files
|
|
10
|
+
- Partitioned Parquet writing
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from .frames import (
|
|
14
|
+
clean_dataframe_columns,
|
|
15
|
+
generate_parquet_schema,
|
|
16
|
+
generate_sql_server_create_table_string,
|
|
17
|
+
pandas_to_parquet_table,
|
|
18
|
+
try_cast_string_columns_to_numeric,
|
|
19
|
+
write_to_file_and_sql,
|
|
20
|
+
)
|
|
21
|
+
from .readers import (
|
|
22
|
+
excel_to_parquets,
|
|
23
|
+
json_to_jsonl,
|
|
24
|
+
parquet_to_csv_partitions,
|
|
25
|
+
read_file_chunked,
|
|
26
|
+
read_file_iter,
|
|
27
|
+
read_file_smart,
|
|
28
|
+
read_file_to_parquets,
|
|
29
|
+
stream_to_parquets,
|
|
30
|
+
stream_to_sink,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
__all__ = [
|
|
34
|
+
# DataFrame operations
|
|
35
|
+
"try_cast_string_columns_to_numeric",
|
|
36
|
+
"clean_dataframe_columns",
|
|
37
|
+
"generate_parquet_schema",
|
|
38
|
+
"pandas_to_parquet_table",
|
|
39
|
+
"generate_sql_server_create_table_string",
|
|
40
|
+
"write_to_file_and_sql",
|
|
41
|
+
# Memory-aware readers
|
|
42
|
+
"excel_to_parquets",
|
|
43
|
+
"json_to_jsonl",
|
|
44
|
+
"parquet_to_csv_partitions",
|
|
45
|
+
"read_file_chunked",
|
|
46
|
+
"read_file_iter",
|
|
47
|
+
"read_file_to_parquets",
|
|
48
|
+
"stream_to_sink",
|
|
49
|
+
"stream_to_parquets",
|
|
50
|
+
"read_file_smart",
|
|
51
|
+
]
|