datagrunt 0.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Martin Graham
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,155 @@
1
+ Metadata-Version: 2.1
2
+ Name: datagrunt
3
+ Version: 0.0.0
4
+ Summary: Read CSV files and convert to other file formats easily
5
+ Author-email: Martin Graham <datagrunt@datagrunt.io>
6
+ License: MIT License
7
+ Project-URL: Homepage, https://pmgraham.github.io/datagrunt-docs
8
+ Project-URL: Bug Tracker, https://github.com/pmgraham/datagrunt/issues
9
+ Project-URL: Documentation, https://pmgraham.github.io/datagrunt-docs
10
+ Project-URL: Source Code, https://github.com/pmgraham/datagrunt
11
+ Keywords: csv,data,duckdb,polars,pyarrow,xlsx,delimiter
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Intended Audience :: Developers
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Requires-Python: >=3.10
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: duckdb>=1.1.0
23
+ Requires-Dist: polars>=1.7.1
24
+ Requires-Dist: pyarrow>=17.0.0
25
+ Requires-Dist: XlsxWriter>=3.2.0
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest>=7.0; extra == "dev"
28
+ Requires-Dist: pytest-cov>=3.0; extra == "dev"
29
+ Requires-Dist: black; extra == "dev"
30
+ Requires-Dist: isort; extra == "dev"
31
+ Requires-Dist: flake8; extra == "dev"
32
+ Provides-Extra: build
33
+ Requires-Dist: build; extra == "build"
34
+ Requires-Dist: twine; extra == "build"
35
+ Requires-Dist: bumpver; extra == "build"
36
+
37
+ # Welcome To Datagrunt
38
+
39
+ Datagrunt is a Python library designed to simplify the way you work with CSV files. It provides a streamlined approach to reading, processing, and transforming your data into various formats, making data manipulation efficient and intuitive.
40
+
41
+ ## Why Datagrunt?
42
+
43
+ Born out of real-world frustration, Datagrunt eliminates the need For repetitive coding when handling CSV files. Whether you're a data analyst, data engineer, or data scientist, Datagrunt empowers you to focus on insights, not tedious data wrangling.
44
+
45
+ ## Key Features
46
+
47
+ - **Intelligent Delimiter Inference:** Datagrunt automatically detects and applies the correct delimiter for your csv files.
48
+ - **Seamless Data Processing:** Leverage the robust capabilities of [DuckDB](https://duckdb.org) and [Polars](https://pola.rs) to perform advanced data processing tasks directly on your CSV data.
49
+ - **Flexible Transformation:** Easily convert your processed CSV data into various formats to suit your needs.
50
+ - **Pythonic API:** Enjoy a clean and intuitive API that integrates seamlessly into your existing Python workflows.
51
+
52
+ ## Installation
53
+
54
+ Get started with Datagrunt in seconds using pip:
55
+
56
+ ```bash
57
+ pip install datagrunt
58
+ ```
59
+
60
+ ## Getting Started
61
+
62
+ ```python
63
+ from datagrunt import CSVReader
64
+
65
+ # Load your CSV file
66
+ csv_file = 'electric_vehicle_population_data.csv'
67
+ engine = 'duckdb'
68
+
69
+ # Set duckdb as the processing engine. Engine set to 'polars' by default
70
+ dg = CSVReader(csv_file, engine=engine)
71
+
72
+ # return sample of the data to get a peek at the schema
73
+ dg.get_sample()
74
+ ┌────────────┬───────────┬──────────────┬───┬──────────────────────┬──────────────────────┬───────────────────┐
75
+ │ VIN (1-10) │ County │ City │ … │ Vehicle Location │ Electric Utility │ 2020 Census Tract │
76
+ │ varchar │ varchar │ varchar │ │ varchar │ varchar │ varchar │
77
+ ├────────────┼───────────┼──────────────┼───┼──────────────────────┼──────────────────────┼───────────────────┤
78
+ │ 5YJSA1E28K │ Snohomish │ Mukilteo │ … │ POINT (-122.29943 … │ PUGET SOUND ENERGY… │ 53061042001 │
79
+ │ 1C4JJXP68P │ Yakima │ Yakima │ … │ POINT (-120.468875… │ PACIFICORP │ 53077001601 │
80
+ │ WBY8P6C05L │ Kitsap │ Kingston │ … │ POINT (-122.517835… │ PUGET SOUND ENERGY… │ 53035090102 │
81
+ │ JTDKARFP1J │ Kitsap │ Port Orchard │ … │ POINT (-122.653005… │ PUGET SOUND ENERGY… │ 53035092802 │
82
+ │ 5UXTA6C09N │ Snohomish │ Everett │ … │ POINT (-122.203234… │ PUGET SOUND ENERGY… │ 53061041605 │
83
+ │ 5YJYGDEF8L │ King │ Seattle │ … │ POINT (-122.378886… │ CITY OF SEATTLE - … │ 53033004703 │
84
+ │ JTMAB3FV7P │ Thurston │ Rainier │ … │ POINT (-122.677141… │ PUGET SOUND ENERGY… │ 53067012530 │
85
+ │ JN1AZ0CPXC │ King │ Kirkland │ … │ POINT (-122.192596… │ PUGET SOUND ENERGY… │ 53033022402 │
86
+ │ JN1AZ0CP7B │ King │ Kirkland │ … │ POINT (-122.192596… │ PUGET SOUND ENERGY… │ 53033022603 │
87
+ │ 1N4AZ0CP0F │ Thurston │ Olympia │ … │ POINT (-122.86491 … │ PUGET SOUND ENERGY… │ 53067010300 │
88
+ │ · │ · │ · │ · │ · │ · │ · │
89
+ │ · │ · │ · │ · │ · │ · │ · │
90
+ │ · │ · │ · │ · │ · │ · │ · │
91
+ │ 5YJYGDEE7M │ Clark │ Vancouver │ … │ POINT (-122.515805… │ BONNEVILLE POWER A… │ 53011041310 │
92
+ │ 7SAYGAEE0P │ Snohomish │ Monroe │ … │ POINT (-121.968385… │ PUGET SOUND ENERGY… │ 53061052203 │
93
+ │ 2C4RC1N75P │ King │ Burien │ … │ POINT (-122.347227… │ CITY OF SEATTLE - … │ 53033027600 │
94
+ │ 1FTVW1EVXP │ King │ Kirkland │ … │ POINT (-122.202653… │ PUGET SOUND ENERGY… │ 53033022300 │
95
+ │ 4JGGM1CB2P │ King │ Seattle │ … │ POINT (-122.2453 4… │ CITY OF SEATTLE - … │ 53033011700 │
96
+ │ 1N4BZ0CP0G │ King │ Seattle │ … │ POINT (-122.334079… │ CITY OF SEATTLE - … │ 53033008300 │
97
+ │ 7SAYGDEF2N │ King │ Bellevue │ … │ POINT (-122.144149… │ PUGET SOUND ENERGY… │ 53033024704 │
98
+ │ 1N4BZ1DP7L │ King │ Bellevue │ … │ POINT (-122.144149… │ PUGET SOUND ENERGY… │ 53033024902 │
99
+ ...
100
+ ├────────────┴───────────┴──────────────┴───┴──────────────────────┴──────────────────────┴───────────────────┤
101
+ │ ? rows (>9999 rows, 20 shown) 17 columns (6 shown) │
102
+ └─────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
103
+ ```
104
+
105
+ ## DuckDB Integration for Performant SQL Queries
106
+ ```python
107
+ from datagrunt import CSVReader
108
+
109
+ csv_file = 'electric_vehicle_population_data.csv'
110
+ engine = 'duckdb'
111
+
112
+ dg = CSVReader(csv_file, engine=engine)
113
+
114
+ # Construct your SQL query
115
+ query = f"""
116
+ WITH core AS (
117
+ SELECT
118
+ City AS city,
119
+ "VIN (1-10)" AS vin
120
+ FROM {dg.db_table}
121
+ )
122
+ SELECT
123
+ city,
124
+ COUNT(vin) AS vehicle_count
125
+ FROM core
126
+ GROUP BY 1
127
+ ORDER BY 2 DESC
128
+ """
129
+
130
+ # Execute the query and get results as a Polars DataFrame
131
+ df = dg.query_data(query).pl()
132
+ print(df)
133
+ ┌────────────────┬───────────────┐
134
+ │ city ┆ vehicle_count │
135
+ │ --- ┆ --- │
136
+ │ str ┆ i64 │
137
+ ╞════════════════╪═══════════════╡
138
+ │ Seattle ┆ 32602 │
139
+ │ Bellevue ┆ 9960 │
140
+ │ Redmond ┆ 7165 │
141
+ │ Vancouver ┆ 7081 │
142
+ │ Bothell ┆ 6602 │
143
+ │ … ┆ … │
144
+ │ Glenwood ┆ 1 │
145
+ │ Walla Walla Co ┆ 1 │
146
+ │ Pittsburg ┆ 1 │
147
+ │ Decatur ┆ 1 │
148
+ │ Redwood City ┆ 1 │
149
+ └────────────────┴───────────────┘
150
+ ```
151
+ ## License
152
+ This project is licensed under the [MIT License](https://opensource.org/license/mit)
153
+
154
+ ## Acknowledgements
155
+ A HUGE thank you to the open source community and the creators of [DuckDB](https://duckdb.org) and [Polars](https://pola.rs) for their fantastic libraries that power Datagrunt.
@@ -0,0 +1,119 @@
1
+ # Welcome To Datagrunt
2
+
3
+ Datagrunt is a Python library designed to simplify the way you work with CSV files. It provides a streamlined approach to reading, processing, and transforming your data into various formats, making data manipulation efficient and intuitive.
4
+
5
+ ## Why Datagrunt?
6
+
7
+ Born out of real-world frustration, Datagrunt eliminates the need For repetitive coding when handling CSV files. Whether you're a data analyst, data engineer, or data scientist, Datagrunt empowers you to focus on insights, not tedious data wrangling.
8
+
9
+ ## Key Features
10
+
11
+ - **Intelligent Delimiter Inference:** Datagrunt automatically detects and applies the correct delimiter for your csv files.
12
+ - **Seamless Data Processing:** Leverage the robust capabilities of [DuckDB](https://duckdb.org) and [Polars](https://pola.rs) to perform advanced data processing tasks directly on your CSV data.
13
+ - **Flexible Transformation:** Easily convert your processed CSV data into various formats to suit your needs.
14
+ - **Pythonic API:** Enjoy a clean and intuitive API that integrates seamlessly into your existing Python workflows.
15
+
16
+ ## Installation
17
+
18
+ Get started with Datagrunt in seconds using pip:
19
+
20
+ ```bash
21
+ pip install datagrunt
22
+ ```
23
+
24
+ ## Getting Started
25
+
26
+ ```python
27
+ from datagrunt import CSVReader
28
+
29
+ # Load your CSV file
30
+ csv_file = 'electric_vehicle_population_data.csv'
31
+ engine = 'duckdb'
32
+
33
+ # Set duckdb as the processing engine. Engine set to 'polars' by default
34
+ dg = CSVReader(csv_file, engine=engine)
35
+
36
+ # return sample of the data to get a peek at the schema
37
+ dg.get_sample()
38
+ ┌────────────┬───────────┬──────────────┬───┬──────────────────────┬──────────────────────┬───────────────────┐
39
+ │ VIN (1-10) │ County │ City │ … │ Vehicle Location │ Electric Utility │ 2020 Census Tract │
40
+ │ varchar │ varchar │ varchar │ │ varchar │ varchar │ varchar │
41
+ ├────────────┼───────────┼──────────────┼───┼──────────────────────┼──────────────────────┼───────────────────┤
42
+ │ 5YJSA1E28K │ Snohomish │ Mukilteo │ … │ POINT (-122.29943 … │ PUGET SOUND ENERGY… │ 53061042001 │
43
+ │ 1C4JJXP68P │ Yakima │ Yakima │ … │ POINT (-120.468875… │ PACIFICORP │ 53077001601 │
44
+ │ WBY8P6C05L │ Kitsap │ Kingston │ … │ POINT (-122.517835… │ PUGET SOUND ENERGY… │ 53035090102 │
45
+ │ JTDKARFP1J │ Kitsap │ Port Orchard │ … │ POINT (-122.653005… │ PUGET SOUND ENERGY… │ 53035092802 │
46
+ │ 5UXTA6C09N │ Snohomish │ Everett │ … │ POINT (-122.203234… │ PUGET SOUND ENERGY… │ 53061041605 │
47
+ │ 5YJYGDEF8L │ King │ Seattle │ … │ POINT (-122.378886… │ CITY OF SEATTLE - … │ 53033004703 │
48
+ │ JTMAB3FV7P │ Thurston │ Rainier │ … │ POINT (-122.677141… │ PUGET SOUND ENERGY… │ 53067012530 │
49
+ │ JN1AZ0CPXC │ King │ Kirkland │ … │ POINT (-122.192596… │ PUGET SOUND ENERGY… │ 53033022402 │
50
+ │ JN1AZ0CP7B │ King │ Kirkland │ … │ POINT (-122.192596… │ PUGET SOUND ENERGY… │ 53033022603 │
51
+ │ 1N4AZ0CP0F │ Thurston │ Olympia │ … │ POINT (-122.86491 … │ PUGET SOUND ENERGY… │ 53067010300 │
52
+ │ · │ · │ · │ · │ · │ · │ · │
53
+ │ · │ · │ · │ · │ · │ · │ · │
54
+ │ · │ · │ · │ · │ · │ · │ · │
55
+ │ 5YJYGDEE7M │ Clark │ Vancouver │ … │ POINT (-122.515805… │ BONNEVILLE POWER A… │ 53011041310 │
56
+ │ 7SAYGAEE0P │ Snohomish │ Monroe │ … │ POINT (-121.968385… │ PUGET SOUND ENERGY… │ 53061052203 │
57
+ │ 2C4RC1N75P │ King │ Burien │ … │ POINT (-122.347227… │ CITY OF SEATTLE - … │ 53033027600 │
58
+ │ 1FTVW1EVXP │ King │ Kirkland │ … │ POINT (-122.202653… │ PUGET SOUND ENERGY… │ 53033022300 │
59
+ │ 4JGGM1CB2P │ King │ Seattle │ … │ POINT (-122.2453 4… │ CITY OF SEATTLE - … │ 53033011700 │
60
+ │ 1N4BZ0CP0G │ King │ Seattle │ … │ POINT (-122.334079… │ CITY OF SEATTLE - … │ 53033008300 │
61
+ │ 7SAYGDEF2N │ King │ Bellevue │ … │ POINT (-122.144149… │ PUGET SOUND ENERGY… │ 53033024704 │
62
+ │ 1N4BZ1DP7L │ King │ Bellevue │ … │ POINT (-122.144149… │ PUGET SOUND ENERGY… │ 53033024902 │
63
+ ...
64
+ ├────────────┴───────────┴──────────────┴───┴──────────────────────┴──────────────────────┴───────────────────┤
65
+ │ ? rows (>9999 rows, 20 shown) 17 columns (6 shown) │
66
+ └─────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
67
+ ```
68
+
69
+ ## DuckDB Integration for Performant SQL Queries
70
+ ```python
71
+ from datagrunt import CSVReader
72
+
73
+ csv_file = 'electric_vehicle_population_data.csv'
74
+ engine = 'duckdb'
75
+
76
+ dg = CSVReader(csv_file, engine=engine)
77
+
78
+ # Construct your SQL query
79
+ query = f"""
80
+ WITH core AS (
81
+ SELECT
82
+ City AS city,
83
+ "VIN (1-10)" AS vin
84
+ FROM {dg.db_table}
85
+ )
86
+ SELECT
87
+ city,
88
+ COUNT(vin) AS vehicle_count
89
+ FROM core
90
+ GROUP BY 1
91
+ ORDER BY 2 DESC
92
+ """
93
+
94
+ # Execute the query and get results as a Polars DataFrame
95
+ df = dg.query_data(query).pl()
96
+ print(df)
97
+ ┌────────────────┬───────────────┐
98
+ │ city ┆ vehicle_count │
99
+ │ --- ┆ --- │
100
+ │ str ┆ i64 │
101
+ ╞════════════════╪═══════════════╡
102
+ │ Seattle ┆ 32602 │
103
+ │ Bellevue ┆ 9960 │
104
+ │ Redmond ┆ 7165 │
105
+ │ Vancouver ┆ 7081 │
106
+ │ Bothell ┆ 6602 │
107
+ │ … ┆ … │
108
+ │ Glenwood ┆ 1 │
109
+ │ Walla Walla Co ┆ 1 │
110
+ │ Pittsburg ┆ 1 │
111
+ │ Decatur ┆ 1 │
112
+ │ Redwood City ┆ 1 │
113
+ └────────────────┴───────────────┘
114
+ ```
115
+ ## License
116
+ This project is licensed under the [MIT License](https://opensource.org/license/mit)
117
+
118
+ ## Acknowledgements
119
+ A HUGE thank you to the open source community and the creators of [DuckDB](https://duckdb.org) and [Polars](https://pola.rs) for their fantastic libraries that power Datagrunt.
@@ -0,0 +1,71 @@
1
+ [build-system]
2
+ requires = ["setuptools>=74.0.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "datagrunt"
7
+ version = "0.0.0"
8
+ description = "Read CSV files and convert to other file formats easily"
9
+ readme = "README.md"
10
+ authors = [{ name = "Martin Graham", email = "datagrunt@datagrunt.io" }]
11
+ license = {text = "MIT License"}
12
+ classifiers = [
13
+ "License :: OSI Approved :: MIT License",
14
+ "Programming Language :: Python",
15
+ "Programming Language :: Python :: 3",
16
+ "Programming Language :: Python :: 3.10",
17
+ "Programming Language :: Python :: 3.11",
18
+ "Intended Audience :: Developers",
19
+ "Topic :: Software Development :: Libraries :: Python Modules",
20
+ ]
21
+ keywords = ["csv", "data", "duckdb", "polars", "pyarrow", "xlsx", "delimiter"]
22
+ dependencies = [
23
+ "duckdb>=1.1.0",
24
+ "polars>=1.7.1",
25
+ "pyarrow>=17.0.0",
26
+ "XlsxWriter>=3.2.0"
27
+ ]
28
+ requires-python = ">=3.10"
29
+
30
+ [project.optional-dependencies]
31
+ dev = ["pytest>=7.0", "pytest-cov>=3.0", "black", "isort", "flake8"]
32
+ build = ["build", "twine", "bumpver"]
33
+
34
+ [project.urls]
35
+ Homepage = "https://pmgraham.github.io/datagrunt-docs"
36
+ "Bug Tracker" = "https://github.com/pmgraham/datagrunt/issues"
37
+ Documentation = "https://pmgraham.github.io/datagrunt-docs"
38
+ "Source Code" = "https://github.com/pmgraham/datagrunt"
39
+
40
+ [tool.setuptools.packages.find]
41
+ where = ["src"]
42
+ include = ["datagrunt*"]
43
+ exclude = ["tests*"]
44
+
45
+ [tool.bumpver]
46
+ current_version = "0.0.0"
47
+ version_pattern = "MAJOR.MINOR.PATCH"
48
+ commit_message = "bump version {old_version} -> {new_version}"
49
+ commit = true
50
+ tag = true
51
+ push = false
52
+
53
+ [tool.bumpver.file_patterns]
54
+ "pyproject.toml" = [
55
+ 'current_version = "{version}"',
56
+ 'version = "{version}"',
57
+ ]
58
+ "src/datagrunt/__init__.py" = [
59
+ '^__version__ = "{version}"$',
60
+ ]
61
+
62
+ [tool.black]
63
+ line-length = 88
64
+ target-version = ['py310']
65
+
66
+ [tool.isort]
67
+ profile = "black"
68
+
69
+ [tool.flake8]
70
+ max-line-length = 88
71
+ extend-ignore = "E203"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,39 @@
1
+ """
2
+ Datagrunt
3
+
4
+ A Python library designed to simplify the way you work with CSV files.
5
+
6
+ This module provides inferred CSV delimiters and helper methods for reading and writing CSV files.
7
+
8
+ Example:
9
+ A simple example of how to use the main functionality of your package:
10
+
11
+ from datagrunt.csvfile import CSVReader
12
+
13
+ csv_file = 'electric_vehicle_population_data.csv'
14
+ engine = 'duckdb'
15
+
16
+ dg = CSVReader(csv_file, engine=engine)
17
+
18
+ dg.get_sample()
19
+
20
+ Attributes:
21
+ __version__: A string representing the version of this module.
22
+ __author__: The name of the package author.
23
+ __license__: The license under which the package is released.
24
+ """
25
+
26
+ __version__ = "0.0.0"
27
+ __author__ = "Martin Graham"
28
+ __license__ = "MIT"
29
+
30
+ # Import key classes, functions, or submodules that should be available at the package level
31
+ from .csvfile import CSVReader, CSVWriter
32
+
33
+ # You can define __all__ to specify what gets imported with "from package import *"
34
+ __all__ = ['CSVReader', 'CSVWriter']
35
+
36
+ # Optionally, you can include a logger for your package
37
+ import logging
38
+
39
+ logging.getLogger(__name__).addHandler(logging.NullHandler())
File without changes
@@ -0,0 +1,53 @@
1
+ """Module for interfacing with databases."""
2
+
3
+ # standard library
4
+ import os
5
+ from pathlib import Path
6
+ import re
7
+
8
+ # third party libraries
9
+ import duckdb
10
+
11
+ class DuckDBDatabase:
12
+ """Class to configure local database for file processing.
13
+ Utilizes duckdb as the processing engine.
14
+ """
15
+ DEFAULT_ENCODING = 'utf-8'
16
+ DEFAULT_THREAD_COUNT = 16
17
+
18
+ def __init__(self, filepath):
19
+ """
20
+ Initialize the FileDatabase class.
21
+
22
+ Args:
23
+ filepath (str): Path to the file.
24
+ """
25
+ self.filepath = filepath
26
+ self.database_filename = self._set_database_filename()
27
+ self.database_table_name = self._set_database_table_name()
28
+ self.database_connection = self._set_database_connection()
29
+
30
+ def __del__(self):
31
+ """Delete .db files after use."""
32
+ if os.path.exists(self.database_filename):
33
+ os.remove(self.database_filename)
34
+
35
+ def _format_filename_string(self):
36
+ """Remove all non alphanumeric characters from filename."""
37
+ return re.sub(r'[^a-zA-Z0-9]', '', Path(self.filepath).stem)
38
+
39
+ def _set_database_filename(self):
40
+ """Return name of duckdb file created at runtime."""
41
+ return f'{self._format_filename_string()}.db'
42
+
43
+ def _set_database_table_name(self):
44
+ """Return name of duckdb import table created during file import."""
45
+ return f'{self._format_filename_string()}'
46
+
47
+ def _set_database_connection(self, threads=DEFAULT_THREAD_COUNT):
48
+ """Establish a connection with duckdb.
49
+
50
+ Args:
51
+ threads (int): Number of threads to use for duckdb.
52
+ """
53
+ return duckdb.connect(self.database_filename, config = {'threads': threads})