datablade 0.0.0__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. datablade/__init__.py +49 -1
  2. datablade/blade.py +322 -0
  3. datablade/core/__init__.py +28 -7
  4. datablade/core/frames.py +23 -236
  5. datablade/core/json.py +5 -10
  6. datablade/core/lists.py +5 -10
  7. datablade/core/messages.py +23 -11
  8. datablade/core/strings.py +5 -43
  9. datablade/core/zip.py +5 -24
  10. datablade/dataframes/__init__.py +51 -0
  11. datablade/dataframes/frames.py +585 -0
  12. datablade/dataframes/readers.py +1367 -0
  13. datablade/docs/ARCHITECTURE.md +102 -0
  14. datablade/docs/OBJECT_REGISTRY.md +194 -0
  15. datablade/docs/README.md +57 -0
  16. datablade/docs/TESTING.md +37 -0
  17. datablade/docs/USAGE.md +409 -0
  18. datablade/docs/__init__.py +87 -0
  19. datablade/docs/__main__.py +6 -0
  20. datablade/io/__init__.py +15 -0
  21. datablade/io/json.py +70 -0
  22. datablade/io/zip.py +111 -0
  23. datablade/registry.py +581 -0
  24. datablade/sql/__init__.py +56 -0
  25. datablade/sql/bulk_load.py +665 -0
  26. datablade/sql/ddl.py +402 -0
  27. datablade/sql/ddl_pyarrow.py +411 -0
  28. datablade/sql/dialects.py +12 -0
  29. datablade/sql/quoting.py +44 -0
  30. datablade/sql/schema_spec.py +65 -0
  31. datablade/sql/sqlserver.py +390 -0
  32. datablade/utils/__init__.py +38 -0
  33. datablade/utils/lists.py +32 -0
  34. datablade/utils/logging.py +204 -0
  35. datablade/utils/messages.py +29 -0
  36. datablade/utils/strings.py +249 -0
  37. datablade-0.0.6.dist-info/METADATA +406 -0
  38. datablade-0.0.6.dist-info/RECORD +41 -0
  39. {datablade-0.0.0.dist-info → datablade-0.0.6.dist-info}/WHEEL +1 -1
  40. {datablade-0.0.0.dist-info → datablade-0.0.6.dist-info/licenses}/LICENSE +20 -20
  41. datablade-0.0.0.dist-info/METADATA +0 -13
  42. datablade-0.0.0.dist-info/RECORD +0 -13
  43. {datablade-0.0.0.dist-info → datablade-0.0.6.dist-info}/top_level.txt +0 -0
datablade/core/lists.py CHANGED
@@ -1,10 +1,5 @@
1
-
2
- def flatten(nest: list) -> list:
3
- """Flatten a nested list."""
4
- result = []
5
- for item in nest:
6
- if isinstance(item, list):
7
- result.extend(flatten(item))
8
- else:
9
- result.append(item)
10
- return result
1
+ """Backward-compatibility re-exports for list utilities."""
2
+
3
+ from ..utils.lists import flatten # noqa: F401
4
+
5
+ __all__ = ["flatten"]
@@ -1,11 +1,23 @@
1
-
2
- def print_verbose(message: str, verbose: bool=True) -> None:
3
- """
4
- Print a message if verbose is True.
5
-
6
- Parameters:
7
- message (str): The message to print.
8
- verbose (bool): If True, the message will be printed.
9
- """
10
- if verbose:
11
- print(str(message))
1
+ """Backward-compatibility re-exports for message/logging helpers."""
2
+
3
+ from ..utils.messages import ( # noqa: F401
4
+ configure_logging,
5
+ get_logger,
6
+ log,
7
+ log_debug,
8
+ log_error,
9
+ log_info,
10
+ log_warning,
11
+ print_verbose,
12
+ )
13
+
14
+ __all__ = [
15
+ "print_verbose",
16
+ "log",
17
+ "log_debug",
18
+ "log_info",
19
+ "log_warning",
20
+ "log_error",
21
+ "get_logger",
22
+ "configure_logging",
23
+ ]
datablade/core/strings.py CHANGED
@@ -1,43 +1,5 @@
1
- from .messages import print_verbose
2
- import pathlib
3
-
4
- def sql_quotename(name: str=None, brackets: bool=True, ticks: bool=False, verbose: bool=False) -> str|None:
5
- """
6
- Quote a SQL Server name string.
7
- Parameters:
8
- name (str): The name to quote.
9
- brackets (bool): Whether to use brackets.
10
- Returns:
11
- str: The quoted name.
12
- """
13
- if name is None:
14
- print_verbose("No name provided; exiting sql_quotename.", verbose)
15
- exit
16
- return_value = f"{name.replace('[','').replace(']','')}"
17
- if brackets:
18
- return_value = f"[{return_value}]"
19
- if ticks or not brackets:
20
- return_value = f"'{return_value}'"
21
- return return_value
22
-
23
- def pathing(input: str | pathlib.Path, verbose: bool=False) -> pathlib.Path|None:
24
- """
25
- Standardize a path string.
26
- Parameters:
27
- path (str): The path to standardize.
28
- Returns:
29
- str: The standardized path.
30
- """
31
- if input is None:
32
- print_verbose("No path provided; exiting pathing.", verbose)
33
- exit
34
- if isinstance(input, str):
35
- input.replace('\\','/')
36
- input = pathlib.Path(input)
37
- else:
38
- input = input
39
- if input.exists():
40
- return input
41
- else:
42
- print_verbose(f"Path {input} does not exist; exiting pathing.", verbose)
43
- exit
1
+ """Backward-compatibility re-exports for string/path utilities."""
2
+
3
+ from ..utils.strings import pathing, sql_quotename # noqa: F401
4
+
5
+ __all__ = ["sql_quotename", "pathing"]
datablade/core/zip.py CHANGED
@@ -1,24 +1,5 @@
1
- import requests, zipfile, io, pathlib
2
- from .messages import print_verbose
3
- from .strings import pathing
4
-
5
- def get(url:str, path:str|pathlib.Path=None, verbose:bool=False, **kwargs) -> None|io.BytesIO:
6
- """Download a file from a URL and save it to a path."""
7
- try:
8
- print_verbose(f"Downloading {url}", verbose=verbose)
9
- data = requests.get(url, **kwargs).content
10
- zip_buffer = io.BytesIO(data)
11
- if path is None:
12
- return zip_buffer
13
- else:
14
- print_verbose(f"Saving data to {path}", verbose=verbose)
15
- zip_buffer.seek(0)
16
- with zipfile.ZipFile(zip_buffer, 'r') as zip_ref:
17
- for zip_info in zip_ref.infolist():
18
- extract_path = pathing(path) / zip_info.filename
19
- extract_path.parent.mkdir(parents=True, exist_ok=True)
20
- with open(extract_path, 'wb') as f:
21
- f.write(zip_ref.read(zip_info.filename))
22
- f.close()
23
- except requests.exceptions.RequestException as e:
24
- print_verbose(f"Error: {e}", verbose=verbose)
1
+ """Backward-compatibility re-exports for IO ZIP helpers."""
2
+
3
+ from ..io.zip import get # noqa: F401
4
+
5
+ __all__ = ["get"]
@@ -0,0 +1,51 @@
1
+ """
2
+ DataFrame operations and utilities for data transformation.
3
+
4
+ This module provides functions for:
5
+ - DataFrame column cleaning and type conversion
6
+ - Parquet schema generation and conversion
7
+ - SQL Server schema generation
8
+ - Memory-aware file reading with optional Polars support
9
+ - Chunked file reading for large files
10
+ - Partitioned Parquet writing
11
+ """
12
+
13
+ from .frames import (
14
+ clean_dataframe_columns,
15
+ generate_parquet_schema,
16
+ generate_sql_server_create_table_string,
17
+ pandas_to_parquet_table,
18
+ try_cast_string_columns_to_numeric,
19
+ write_to_file_and_sql,
20
+ )
21
+ from .readers import (
22
+ excel_to_parquets,
23
+ json_to_jsonl,
24
+ parquet_to_csv_partitions,
25
+ read_file_chunked,
26
+ read_file_iter,
27
+ read_file_smart,
28
+ read_file_to_parquets,
29
+ stream_to_parquets,
30
+ stream_to_sink,
31
+ )
32
+
33
+ __all__ = [
34
+ # DataFrame operations
35
+ "try_cast_string_columns_to_numeric",
36
+ "clean_dataframe_columns",
37
+ "generate_parquet_schema",
38
+ "pandas_to_parquet_table",
39
+ "generate_sql_server_create_table_string",
40
+ "write_to_file_and_sql",
41
+ # Memory-aware readers
42
+ "excel_to_parquets",
43
+ "json_to_jsonl",
44
+ "parquet_to_csv_partitions",
45
+ "read_file_chunked",
46
+ "read_file_iter",
47
+ "read_file_to_parquets",
48
+ "stream_to_sink",
49
+ "stream_to_parquets",
50
+ "read_file_smart",
51
+ ]