datablade 0.0.0__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datablade/__init__.py CHANGED
@@ -1 +1,41 @@
1
- #empty
1
+ """
2
+ datablade - A suite of functions providing standard syntax across data engineering projects.
3
+
4
+ The package is organized into four main modules:
5
+ - dataframes: DataFrame operations, transformations, and memory-aware file reading
6
+ - io: Input/output operations for external data
7
+ - utils: General utility functions and logging
8
+ - sql: Multi-dialect SQL generation, quoting, and bulk loading
9
+
10
+ For backward compatibility, all functions are also available from datablade.core.
11
+ """
12
+
13
+ # Also maintain core for backward compatibility
14
+ # Import from new organized structure
15
+ from . import core, dataframes, io, sql, utils
16
+ from .blade import Blade
17
+ from .dataframes import read_file_chunked, read_file_smart, read_file_to_parquets
18
+ from .sql import Dialect, bulk_load, generate_create_table
19
+
20
+ # Convenience re-exports for commonly used functions
21
+ from .utils.logging import configure_logging, get_logger
22
+
23
+ __version__ = "0.0.5"
24
+
25
+ __all__ = [
26
+ "dataframes",
27
+ "io",
28
+ "utils",
29
+ "sql",
30
+ "core", # Maintain backward compatibility
31
+ # Convenience re-exports
32
+ "configure_logging",
33
+ "get_logger",
34
+ "read_file_smart",
35
+ "read_file_chunked",
36
+ "read_file_to_parquets",
37
+ "Dialect",
38
+ "generate_create_table",
39
+ "bulk_load",
40
+ "Blade",
41
+ ]
datablade/blade.py ADDED
@@ -0,0 +1,153 @@
1
+ """Optional facade class for datablade.
2
+
3
+ The canonical API is module-level functions (e.g., datablade.dataframes.read_file_iter).
4
+ This module provides a small convenience wrapper for users who prefer an object-style
5
+ entrypoint with shared defaults.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+ from typing import Any, Iterator, Optional, Union
13
+
14
+ import pandas as pd
15
+
16
+ from .dataframes import (
17
+ clean_dataframe_columns,
18
+ read_file_iter,
19
+ read_file_smart,
20
+ read_file_to_parquets,
21
+ stream_to_parquets,
22
+ try_cast_string_columns_to_numeric,
23
+ )
24
+ from .sql import Dialect, generate_create_table, generate_create_table_from_parquet
25
+
26
+ PathLike = Union[str, Path]
27
+
28
+
29
+ @dataclass(frozen=True)
30
+ class Blade:
31
+ """Convenience facade for common datablade workflows.
32
+
33
+ Stores default options that are threaded through to the underlying functions.
34
+ """
35
+
36
+ memory_fraction: float = 0.5
37
+ verbose: bool = False
38
+ convert_types: bool = True
39
+
40
+ def read(self, file_path: PathLike, **read_kwargs: Any) -> pd.DataFrame:
41
+ return read_file_smart(
42
+ file_path=file_path,
43
+ memory_fraction=self.memory_fraction,
44
+ verbose=self.verbose,
45
+ **read_kwargs,
46
+ )
47
+
48
+ def iter(
49
+ self,
50
+ file_path: PathLike,
51
+ *,
52
+ chunksize: Optional[int] = None,
53
+ **read_kwargs: Any,
54
+ ) -> Iterator[pd.DataFrame]:
55
+ return read_file_iter(
56
+ file_path=file_path,
57
+ chunksize=chunksize,
58
+ memory_fraction=self.memory_fraction,
59
+ verbose=self.verbose,
60
+ **read_kwargs,
61
+ )
62
+
63
+ def partition_to_parquets(
64
+ self,
65
+ file_path: PathLike,
66
+ output_dir: PathLike,
67
+ *,
68
+ output_prefix: str = "part",
69
+ rows_per_file: Optional[int] = None,
70
+ convert_types: Optional[bool] = None,
71
+ **read_kwargs: Any,
72
+ ):
73
+ return read_file_to_parquets(
74
+ file_path=file_path,
75
+ output_dir=output_dir,
76
+ output_prefix=output_prefix,
77
+ rows_per_file=rows_per_file,
78
+ memory_fraction=self.memory_fraction,
79
+ convert_types=(
80
+ self.convert_types if convert_types is None else convert_types
81
+ ),
82
+ verbose=self.verbose,
83
+ **read_kwargs,
84
+ )
85
+
86
+ def stream_to_parquets(
87
+ self,
88
+ file_path: PathLike,
89
+ output_dir: PathLike,
90
+ *,
91
+ output_prefix: str = "part",
92
+ rows_per_file: Optional[int] = None,
93
+ convert_types: Optional[bool] = None,
94
+ **read_kwargs: Any,
95
+ ):
96
+ return stream_to_parquets(
97
+ file_path=file_path,
98
+ output_dir=output_dir,
99
+ output_prefix=output_prefix,
100
+ rows_per_file=rows_per_file,
101
+ memory_fraction=self.memory_fraction,
102
+ convert_types=(
103
+ self.convert_types if convert_types is None else convert_types
104
+ ),
105
+ verbose=self.verbose,
106
+ **read_kwargs,
107
+ )
108
+
109
+ def clean(self, df: pd.DataFrame) -> pd.DataFrame:
110
+ return clean_dataframe_columns(df, verbose=self.verbose)
111
+
112
+ def cast_numeric(self, df: pd.DataFrame) -> pd.DataFrame:
113
+ return try_cast_string_columns_to_numeric(df, verbose=self.verbose)
114
+
115
+ def create_table_sql(
116
+ self,
117
+ df: pd.DataFrame,
118
+ *,
119
+ catalog: Optional[str] = None,
120
+ schema: Optional[str] = None,
121
+ table: str = "table",
122
+ drop_existing: bool = True,
123
+ dialect: Dialect = Dialect.SQLSERVER,
124
+ ) -> str:
125
+ return generate_create_table(
126
+ df=df,
127
+ catalog=catalog,
128
+ schema=schema,
129
+ table=table,
130
+ drop_existing=drop_existing,
131
+ dialect=dialect,
132
+ verbose=self.verbose,
133
+ )
134
+
135
+ def create_table_sql_from_parquet(
136
+ self,
137
+ parquet_path: str,
138
+ *,
139
+ catalog: Optional[str] = None,
140
+ schema: Optional[str] = None,
141
+ table: str = "table",
142
+ drop_existing: bool = True,
143
+ dialect: Dialect = Dialect.SQLSERVER,
144
+ ) -> str:
145
+ return generate_create_table_from_parquet(
146
+ parquet_path=parquet_path,
147
+ catalog=catalog,
148
+ schema=schema,
149
+ table=table,
150
+ drop_existing=drop_existing,
151
+ dialect=dialect,
152
+ verbose=self.verbose,
153
+ )
@@ -1,7 +1,28 @@
1
- import os, re
2
-
3
- def find_python_files(path):
4
- return [one_file_name.replace('.py','') for one_file_name in os.listdir(os.path.abspath(path)) if one_file_name != '__init__.py' and re.match(r'.*\.py$',one_file_name) is not None]
5
-
6
- for each_file in find_python_files(path=os.path.dirname(__file__)):
7
- exec('from .'+each_file+' import *')
1
+ """Backward-compatible exports for the legacy datablade.core namespace.
2
+
3
+ Historically, this package used dynamic imports to re-export everything.
4
+ We keep the same runtime surface area but use explicit imports so that IDEs,
5
+ type checkers, and static analysis tools can reason about the module.
6
+ """
7
+
8
+ from . import frames as _frames
9
+ from . import json as _json
10
+ from . import lists as _lists
11
+ from . import messages as _messages
12
+ from . import strings as _strings
13
+ from . import zip as _zip
14
+ from .frames import * # noqa: F401,F403
15
+ from .json import * # noqa: F401,F403
16
+ from .lists import * # noqa: F401,F403
17
+ from .messages import * # noqa: F401,F403
18
+ from .strings import * # noqa: F401,F403
19
+ from .zip import * # noqa: F401,F403
20
+
21
+ __all__ = [
22
+ *_frames.__all__,
23
+ *_json.__all__,
24
+ *_lists.__all__,
25
+ *_messages.__all__,
26
+ *_strings.__all__,
27
+ *_zip.__all__,
28
+ ]
datablade/core/frames.py CHANGED
@@ -1,236 +1,23 @@
1
- import pandas as pd
2
- import pyarrow as pa
3
- import numpy as np
4
-
5
- from .messages import print_verbose
6
- from .strings import sql_quotename
7
-
8
- def try_cast_string_columns_to_numeric(df: pd.DataFrame=None, convert_partial: bool=False, verbose: bool=False) -> pd.DataFrame|None:
9
- """
10
- Attempt to cast DataFrame string columns to numeric values where possible.
11
-
12
- Parameters:
13
- df (pd.DataFrame): The DataFrame to process.
14
- convert_partial (bool): If True, columns with some values convertible to numeric types
15
- will be converted to numeric types with NaNs where conversion failed.
16
- If False, only columns where all values can be converted will be converted.
17
-
18
- Returns:
19
- pd.DataFrame: DataFrame with string columns converted to numeric types where possible.
20
- """
21
- if df is None:
22
- print_verbose("No DataFrame provided; exiting try_cast_string_columns_to_numeric.", verbose)
23
- exit # Exit the function if no DataFrame is provided
24
-
25
- for col in df.columns:
26
- if df[col].dtype == 'object':
27
- converted = pd.to_numeric(df[col], errors='coerce')
28
- has_nan = converted.isnull().any()
29
- if not has_nan:
30
- df[col] = converted
31
- print_verbose(f"Column '{col}' successfully converted to numeric.", verbose)
32
- else:
33
- if convert_partial:
34
- df[col] = converted
35
- print_verbose(f"Column '{col}' partially converted to numeric with NaNs where conversion failed.", verbose)
36
- else:
37
- print_verbose(f"Column '{col}' could not be fully converted to numeric; leaving as is.", verbose)
38
- return df
39
-
40
- def clean_dataframe_columns(df: pd.DataFrame=None, verbose: bool=False) -> pd.DataFrame|None:
41
- """
42
- Clean the DataFrame columns by:
43
- - Flattening MultiIndex columns
44
- - Converting non-string column names to strings
45
- - Removing duplicate columns, keeping the first occurrence
46
-
47
- Parameters:
48
- df (pd.DataFrame): The DataFrame to clean.
49
-
50
- Returns:
51
- pd.DataFrame: The cleaned DataFrame.
52
- """
53
- if df is None:
54
- print_verbose("No DataFrame provided; exiting clean_dataframe_columns.", verbose)
55
- exit
56
- # Step 1: Flatten MultiIndex columns
57
- if isinstance(df.columns, pd.MultiIndex):
58
- df.columns = ['_'.join(map(str, col)).strip() for col in df.columns.values]
59
- print_verbose("Flattened MultiIndex columns.", verbose)
60
-
61
- # Step 2: Convert non-string column names to strings
62
- df.columns = df.columns.map(str)
63
- print_verbose("Converted column names to strings.", verbose)
64
-
65
- # Step 3: Remove duplicate columns, keeping the first occurrence
66
- duplicates = df.columns.duplicated()
67
- if duplicates.any():
68
- duplicate_cols = df.columns[duplicates]
69
- print_verbose(f"Duplicate columns found: {list(duplicate_cols)}", verbose)
70
- df = df.loc[:, ~duplicates]
71
- print_verbose("Removed duplicate columns, keeping the first occurrence.", verbose)
72
-
73
- return df
74
-
75
- def generate_parquet_schema(df: pd.DataFrame=None, verbose: bool=False) -> pa.Schema|None:
76
- """
77
- Generate a PyArrow Schema from a pandas DataFrame.
78
- Parameters:
79
- df (pandas.DataFrame): The DataFrame to generate the schema from.
80
- Returns:
81
- pyarrow.Schema: The PyArrow Schema object.
82
- """
83
- if df is None:
84
- print_verbose("No DataFrame provided; exiting generate_parquet_schema.", verbose)
85
- exit
86
-
87
- fields = []
88
- for column in df.columns:
89
- col_data = df[column]
90
- col_name = column
91
- dtype = col_data.dtype
92
-
93
- # Determine if the column contains any nulls
94
- nullable = col_data.isnull().any()
95
-
96
- # Map pandas dtype to PyArrow type
97
- pa_type = None
98
-
99
- if pd.api.types.is_integer_dtype(dtype):
100
- # Check the range to determine the smallest integer type
101
- min_value = col_data.min()
102
- max_value = col_data.max()
103
- if min_value >= np.iinfo(np.int8).min and max_value <= np.iinfo(np.int8).max:
104
- pa_type = pa.int8()
105
- elif min_value >= np.iinfo(np.int16).min and max_value <= np.iinfo(np.int16).max:
106
- pa_type = pa.int16()
107
- elif min_value >= np.iinfo(np.int32).min and max_value <= np.iinfo(np.int32).max:
108
- pa_type = pa.int32()
109
- else:
110
- pa_type = pa.int64()
111
-
112
- elif pd.api.types.is_float_dtype(dtype):
113
- pa_type = pa.float64()
114
-
115
- elif pd.api.types.is_bool_dtype(dtype):
116
- pa_type = pa.bool_()
117
-
118
- elif pd.api.types.is_datetime64_any_dtype(dtype):
119
- pa_type = pa.timestamp('ms')
120
-
121
- elif isinstance(dtype, pd.CategoricalDtype) or pd.api.types.is_object_dtype(dtype):
122
- pa_type = pa.string()
123
-
124
- else:
125
- pa_type = pa.string()
126
-
127
- # Create a field
128
- field = pa.field(col_name, pa_type, nullable=nullable)
129
- fields.append(field)
130
-
131
- schema = pa.schema(fields)
132
- return schema
133
-
134
- def pandas_to_parquet_table(df: pd.DataFrame=None, convert: bool=True, partial: bool=False, preserve_index: bool=False, verbose: bool=False) -> pa.Table|None:
135
- """
136
- Generate a PyArrow Table from a pandas DataFrame.
137
-
138
- Parameters:
139
- df (pandas.DataFrame): The DataFrame to generate the table from.
140
- table (str): The name of the table.
141
-
142
- Returns:
143
- pyarrow.Table: The PyArrow Table object.
144
- """
145
- if df is None:
146
- print_verbose("No DataFrame provided; exiting generate_parquet_table.", verbose)
147
- exit
148
-
149
- df = clean_dataframe_columns(df=df, verbose=verbose)
150
-
151
- if convert:
152
- df = try_cast_string_columns_to_numeric(df=df, convert_partial=partial, verbose=verbose)
153
-
154
- schema = generate_parquet_schema(df=df, verbose=verbose)
155
- try:
156
- table = pa.Table.from_pandas(df, schema=schema, preserve_index=preserve_index)
157
- return table
158
- except Exception as e:
159
- print_verbose(f"Error generating PyArrow Table: {e}", verbose)
160
- exit
161
-
162
- def generate_sql_server_create_table_string(df: pd.DataFrame=None, catalog: str='database', schema: str='dbo', table: str='table', dropexisting: bool=True, verbose: bool=False) -> str|None:
163
- """
164
- Generate a SQL Server CREATE TABLE string from a pandas DataFrame.
165
-
166
- Parameters:
167
- df (pandas.DataFrame): The DataFrame to generate the schema from.
168
- table_name (str): The name of the SQL table.
169
-
170
- Returns:
171
- str: The SQL Server CREATE TABLE statement.
172
- """
173
- if df is None:
174
- print_verbose("No DataFrame provided; exiting try_cast_string_columns_to_numeric.", verbose)
175
- exit
176
-
177
- table_name = f"{sql_quotename(catalog)}.{sql_quotename(schema)}.{sql_quotename(table)}"
178
- drop_statement = f"use {sql_quotename(catalog)}\rgo\rif object_id('{table_name}') is not null drop table {table_name};\r" if dropexisting else ""
179
-
180
- create_statement = [f"{drop_statement};create table {table_name} ("]
181
- indent = " "
182
- column_lines = []
183
-
184
- for column in df.columns:
185
- col_data = df[column]
186
- col_name = column
187
- dtype = col_data.dtype
188
-
189
- # Determine if the column contains any nulls
190
- nullable = col_data.isnull().any()
191
- null_str = f"{' ' if nullable else 'not'} null"
192
-
193
- # Map pandas dtype to SQL Server type
194
- sql_type = None
195
-
196
- if pd.api.types.is_integer_dtype(dtype):
197
- min_value = col_data.min()
198
- max_value = col_data.max()
199
- if min_value >= 0 and max_value <= 255:
200
- sql_type = "tinyint"
201
- elif min_value >= -32768 and max_value <= 32767:
202
- sql_type = "smallint"
203
- elif min_value >= -2147483648 and max_value <= 2147483647:
204
- sql_type = "int"
205
- else:
206
- sql_type = "bigint"
207
-
208
- elif pd.api.types.is_float_dtype(dtype):
209
- sql_type = "float"
210
-
211
- elif pd.api.types.is_bool_dtype(dtype):
212
- sql_type = "bit"
213
-
214
- elif pd.api.types.is_datetime64_any_dtype(dtype):
215
- sql_type = "datetime2"
216
-
217
- elif isinstance(dtype, pd.CategoricalDtype) or pd.api.types.is_object_dtype(dtype):
218
- # Determine maximum length of string data
219
- max_length = col_data.dropna().astype(str).map(len).max()
220
- sql_type = f"nvarchar({str(max_length) if max_length <= 4000 else 'max'})"
221
-
222
- else:
223
- sql_type = "nvarchar(max)"
224
-
225
- # Build the column definition
226
- column_line = f"{indent}{sql_quotename(col_name)} {sql_type} {null_str},"
227
- column_lines.append(column_line)
228
-
229
- # Remove the last comma from the last column definition
230
- if column_lines:
231
- column_lines[-1] = column_lines[-1].rstrip(',')
232
-
233
- create_statement.extend(column_lines)
234
- create_statement.append(");")
235
- return_statement = "\r".join(create_statement)
236
- return return_statement
1
+ """Backward-compatibility re-exports.
2
+
3
+ This module intentionally contains no independent implementations.
4
+ All functionality is provided by the newer modules in datablade.dataframes.
5
+ """
6
+
7
+ from ..dataframes.frames import ( # noqa: F401
8
+ clean_dataframe_columns,
9
+ generate_parquet_schema,
10
+ generate_sql_server_create_table_string,
11
+ pandas_to_parquet_table,
12
+ try_cast_string_columns_to_numeric,
13
+ write_to_file_and_sql,
14
+ )
15
+
16
+ __all__ = [
17
+ "try_cast_string_columns_to_numeric",
18
+ "clean_dataframe_columns",
19
+ "generate_parquet_schema",
20
+ "pandas_to_parquet_table",
21
+ "generate_sql_server_create_table_string",
22
+ "write_to_file_and_sql",
23
+ ]
datablade/core/json.py CHANGED
@@ -1,10 +1,5 @@
1
- import requests
2
- from .messages import print_verbose
3
-
4
- def get(url: str, verbose: bool = False, **kwargs) -> dict:
5
- """Get JSON data from a URL."""
6
- try:
7
- response = requests.get(url, **kwargs)
8
- return response.json()
9
- except requests.exceptions.RequestException as e:
10
- print_verbose(f"Error: {e}", verbose=verbose)
1
+ """Backward-compatibility re-exports for IO JSON helpers."""
2
+
3
+ from ..io.json import get # noqa: F401
4
+
5
+ __all__ = ["get"]
datablade/core/lists.py CHANGED
@@ -1,10 +1,5 @@
1
-
2
- def flatten(nest: list) -> list:
3
- """Flatten a nested list."""
4
- result = []
5
- for item in nest:
6
- if isinstance(item, list):
7
- result.extend(flatten(item))
8
- else:
9
- result.append(item)
10
- return result
1
+ """Backward-compatibility re-exports for list utilities."""
2
+
3
+ from ..utils.lists import flatten # noqa: F401
4
+
5
+ __all__ = ["flatten"]
@@ -1,11 +1,23 @@
1
-
2
- def print_verbose(message: str, verbose: bool=True) -> None:
3
- """
4
- Print a message if verbose is True.
5
-
6
- Parameters:
7
- message (str): The message to print.
8
- verbose (bool): If True, the message will be printed.
9
- """
10
- if verbose:
11
- print(str(message))
1
+ """Backward-compatibility re-exports for message/logging helpers."""
2
+
3
+ from ..utils.messages import ( # noqa: F401
4
+ configure_logging,
5
+ get_logger,
6
+ log,
7
+ log_debug,
8
+ log_error,
9
+ log_info,
10
+ log_warning,
11
+ print_verbose,
12
+ )
13
+
14
+ __all__ = [
15
+ "print_verbose",
16
+ "log",
17
+ "log_debug",
18
+ "log_info",
19
+ "log_warning",
20
+ "log_error",
21
+ "get_logger",
22
+ "configure_logging",
23
+ ]
datablade/core/strings.py CHANGED
@@ -1,43 +1,5 @@
1
- from .messages import print_verbose
2
- import pathlib
3
-
4
- def sql_quotename(name: str=None, brackets: bool=True, ticks: bool=False, verbose: bool=False) -> str|None:
5
- """
6
- Quote a SQL Server name string.
7
- Parameters:
8
- name (str): The name to quote.
9
- brackets (bool): Whether to use brackets.
10
- Returns:
11
- str: The quoted name.
12
- """
13
- if name is None:
14
- print_verbose("No name provided; exiting sql_quotename.", verbose)
15
- exit
16
- return_value = f"{name.replace('[','').replace(']','')}"
17
- if brackets:
18
- return_value = f"[{return_value}]"
19
- if ticks or not brackets:
20
- return_value = f"'{return_value}'"
21
- return return_value
22
-
23
- def pathing(input: str | pathlib.Path, verbose: bool=False) -> pathlib.Path|None:
24
- """
25
- Standardize a path string.
26
- Parameters:
27
- path (str): The path to standardize.
28
- Returns:
29
- str: The standardized path.
30
- """
31
- if input is None:
32
- print_verbose("No path provided; exiting pathing.", verbose)
33
- exit
34
- if isinstance(input, str):
35
- input.replace('\\','/')
36
- input = pathlib.Path(input)
37
- else:
38
- input = input
39
- if input.exists():
40
- return input
41
- else:
42
- print_verbose(f"Path {input} does not exist; exiting pathing.", verbose)
43
- exit
1
+ """Backward-compatibility re-exports for string/path utilities."""
2
+
3
+ from ..utils.strings import pathing, sql_quotename # noqa: F401
4
+
5
+ __all__ = ["sql_quotename", "pathing"]
datablade/core/zip.py CHANGED
@@ -1,24 +1,5 @@
1
- import requests, zipfile, io, pathlib
2
- from .messages import print_verbose
3
- from .strings import pathing
4
-
5
- def get(url:str, path:str|pathlib.Path=None, verbose:bool=False, **kwargs) -> None|io.BytesIO:
6
- """Download a file from a URL and save it to a path."""
7
- try:
8
- print_verbose(f"Downloading {url}", verbose=verbose)
9
- data = requests.get(url, **kwargs).content
10
- zip_buffer = io.BytesIO(data)
11
- if path is None:
12
- return zip_buffer
13
- else:
14
- print_verbose(f"Saving data to {path}", verbose=verbose)
15
- zip_buffer.seek(0)
16
- with zipfile.ZipFile(zip_buffer, 'r') as zip_ref:
17
- for zip_info in zip_ref.infolist():
18
- extract_path = pathing(path) / zip_info.filename
19
- extract_path.parent.mkdir(parents=True, exist_ok=True)
20
- with open(extract_path, 'wb') as f:
21
- f.write(zip_ref.read(zip_info.filename))
22
- f.close()
23
- except requests.exceptions.RequestException as e:
24
- print_verbose(f"Error: {e}", verbose=verbose)
1
+ """Backward-compatibility re-exports for IO ZIP helpers."""
2
+
3
+ from ..io.zip import get # noqa: F401
4
+
5
+ __all__ = ["get"]
@@ -0,0 +1,43 @@
1
+ """
2
+ DataFrame operations and utilities for data transformation.
3
+
4
+ This module provides functions for:
5
+ - DataFrame column cleaning and type conversion
6
+ - Parquet schema generation and conversion
7
+ - SQL Server schema generation
8
+ - Memory-aware file reading with optional Polars support
9
+ - Chunked file reading for large files
10
+ - Partitioned Parquet writing
11
+ """
12
+
13
+ from .frames import (
14
+ clean_dataframe_columns,
15
+ generate_parquet_schema,
16
+ generate_sql_server_create_table_string,
17
+ pandas_to_parquet_table,
18
+ try_cast_string_columns_to_numeric,
19
+ write_to_file_and_sql,
20
+ )
21
+ from .readers import (
22
+ read_file_chunked,
23
+ read_file_iter,
24
+ read_file_smart,
25
+ read_file_to_parquets,
26
+ stream_to_parquets,
27
+ )
28
+
29
+ __all__ = [
30
+ # DataFrame operations
31
+ "try_cast_string_columns_to_numeric",
32
+ "clean_dataframe_columns",
33
+ "generate_parquet_schema",
34
+ "pandas_to_parquet_table",
35
+ "generate_sql_server_create_table_string",
36
+ "write_to_file_and_sql",
37
+ # Memory-aware readers
38
+ "read_file_chunked",
39
+ "read_file_iter",
40
+ "read_file_to_parquets",
41
+ "stream_to_parquets",
42
+ "read_file_smart",
43
+ ]