nuthatch 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nuthatch might be problematic. Click here for more details.
- nuthatch-0.1.0/.gitignore +13 -0
- nuthatch-0.1.0/.python-version +1 -0
- nuthatch-0.1.0/PKG-INFO +38 -0
- nuthatch-0.1.0/README.md +17 -0
- nuthatch-0.1.0/pyproject.toml +68 -0
- nuthatch-0.1.0/requirements-dev.lock +969 -0
- nuthatch-0.1.0/requirements.lock +166 -0
- nuthatch-0.1.0/src/nuthatch/__init__.py +14 -0
- nuthatch-0.1.0/src/nuthatch/backend.py +301 -0
- nuthatch-0.1.0/src/nuthatch/backends/__init__.py +8 -0
- nuthatch-0.1.0/src/nuthatch/backends/basic.py +28 -0
- nuthatch-0.1.0/src/nuthatch/backends/delta.py +46 -0
- nuthatch-0.1.0/src/nuthatch/backends/parquet.py +130 -0
- nuthatch-0.1.0/src/nuthatch/backends/sql.py +147 -0
- nuthatch-0.1.0/src/nuthatch/backends/terracotta.py +199 -0
- nuthatch-0.1.0/src/nuthatch/backends/zarr.py +207 -0
- nuthatch-0.1.0/src/nuthatch/cache.py +529 -0
- nuthatch-0.1.0/src/nuthatch/cli.py +174 -0
- nuthatch-0.1.0/src/nuthatch/config.py +94 -0
- nuthatch-0.1.0/src/nuthatch/memoizer.py +67 -0
- nuthatch-0.1.0/src/nuthatch/nuthatch.py +498 -0
- nuthatch-0.1.0/src/nuthatch/processor.py +89 -0
- nuthatch-0.1.0/src/nuthatch/processors/__init__.py +6 -0
- nuthatch-0.1.0/src/nuthatch/processors/timeseries.py +157 -0
- nuthatch-0.1.0/src/nuthatch/test_secrets.py +17 -0
- nuthatch-0.1.0/tests/backends/tabular_test.py +62 -0
- nuthatch-0.1.0/tests/backends/test_basic.py +22 -0
- nuthatch-0.1.0/tests/backends/test_delta.py +5 -0
- nuthatch-0.1.0/tests/backends/test_parquet.py +5 -0
- nuthatch-0.1.0/tests/backends/test_sql.py +19 -0
- nuthatch-0.1.0/tests/backends/test_zarr.py +55 -0
- nuthatch-0.1.0/tests/processors/test_timeseries.py +134 -0
- nuthatch-0.1.0/tests/test_backend_identification.py +10 -0
- nuthatch-0.1.0/tests/test_cache_args.py +144 -0
- nuthatch-0.1.0/tests/test_config_registration.py +35 -0
- nuthatch-0.1.0/tests/test_core.py +50 -0
- nuthatch-0.1.0/tests/test_db_metastore.py +97 -0
- nuthatch-0.1.0/tests/test_delta_metastore.py +104 -0
- nuthatch-0.1.0/tests/test_engines.py +1 -0
- nuthatch-0.1.0/tests/test_independent_storage.py +1 -0
- nuthatch-0.1.0/tests/test_local.py +1 -0
- nuthatch-0.1.0/tests/test_memoizer.py +1 -0
- nuthatch-0.1.0/tests/test_namespace.py +1 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12.4
|
nuthatch-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: nuthatch
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Cacheable big data pipelines
|
|
5
|
+
Author-email: Joshua Adkins <josh@rhizaresearch.org>, Genevieve Flaspohler <geneviee@rhizaresearch.org>
|
|
6
|
+
Requires-Python: >=3.12
|
|
7
|
+
Requires-Dist: click
|
|
8
|
+
Requires-Dist: dask-deltatable
|
|
9
|
+
Requires-Dist: dask[dataframe]
|
|
10
|
+
Requires-Dist: deltalake==1.1.2
|
|
11
|
+
Requires-Dist: fsspec
|
|
12
|
+
Requires-Dist: gitpython
|
|
13
|
+
Requires-Dist: pandas
|
|
14
|
+
Requires-Dist: psycopg2
|
|
15
|
+
Requires-Dist: pyarrow
|
|
16
|
+
Requires-Dist: sqlalchemy
|
|
17
|
+
Requires-Dist: terracotta
|
|
18
|
+
Requires-Dist: xarray
|
|
19
|
+
Requires-Dist: zarr
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# Nuthatch
|
|
23
|
+
|
|
24
|
+
Nuthatch is a tool for building pure-python big data pipelines. At its core it
|
|
25
|
+
enables the transparent multi-level caching and recall of results in formats that
|
|
26
|
+
are efficient for each data type. It supports a variety of
|
|
27
|
+
common storage backends, data processing frameworks, and their associated
|
|
28
|
+
data types for caching.
|
|
29
|
+
|
|
30
|
+
It also provides a framework for re-using and sharing data-type specific
|
|
31
|
+
post-processing, and for these data type
|
|
32
|
+
processors to pass hints to storage backends for more efficient storager and recall.
|
|
33
|
+
|
|
34
|
+
Nuthatch was created to alleviate the comon pattern of data processing pipelines manually
|
|
35
|
+
specifying their output storage locations, and the requirements of pipeline builders to
|
|
36
|
+
use external data orchestration tools to specify the execution of their pipeliness. With Nuthatch
|
|
37
|
+
simply tag your functions and anyone who has access to your storage backend - you, your
|
|
38
|
+
team, or the public - can acess and build off of your most up-to-date data.
|
nuthatch-0.1.0/README.md
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Nuthatch
|
|
2
|
+
|
|
3
|
+
Nuthatch is a tool for building pure-python big data pipelines. At its core it
|
|
4
|
+
enables the transparent multi-level caching and recall of results in formats that
|
|
5
|
+
are efficient for each data type. It supports a variety of
|
|
6
|
+
common storage backends, data processing frameworks, and their associated
|
|
7
|
+
data types for caching.
|
|
8
|
+
|
|
9
|
+
It also provides a framework for re-using and sharing data-type specific
|
|
10
|
+
post-processing, and for these data type
|
|
11
|
+
processors to pass hints to storage backends for more efficient storager and recall.
|
|
12
|
+
|
|
13
|
+
Nuthatch was created to alleviate the comon pattern of data processing pipelines manually
|
|
14
|
+
specifying their output storage locations, and the requirements of pipeline builders to
|
|
15
|
+
use external data orchestration tools to specify the execution of their pipeliness. With Nuthatch
|
|
16
|
+
simply tag your functions and anyone who has access to your storage backend - you, your
|
|
17
|
+
team, or the public - can acess and build off of your most up-to-date data.
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "nuthatch"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Cacheable big data pipelines"
|
|
5
|
+
authors = [
|
|
6
|
+
{ name = "Joshua Adkins", email = "josh@rhizaresearch.org" },
|
|
7
|
+
{ name = "Genevieve Flaspohler", email = "geneviee@rhizaresearch.org" }
|
|
8
|
+
]
|
|
9
|
+
dependencies = [
|
|
10
|
+
"fsspec",
|
|
11
|
+
"sqlalchemy",
|
|
12
|
+
"pandas",
|
|
13
|
+
"dask[dataframe]",
|
|
14
|
+
"deltalake==1.1.2",
|
|
15
|
+
"dask-deltatable",
|
|
16
|
+
"xarray",
|
|
17
|
+
"pyarrow",
|
|
18
|
+
"terracotta",
|
|
19
|
+
"gitpython",
|
|
20
|
+
"psycopg2",
|
|
21
|
+
"zarr",
|
|
22
|
+
"click"
|
|
23
|
+
]
|
|
24
|
+
readme = "README.md"
|
|
25
|
+
requires-python = ">= 3.12"
|
|
26
|
+
|
|
27
|
+
[project.scripts]
|
|
28
|
+
nuthatch = "nuthatch.cli:main"
|
|
29
|
+
|
|
30
|
+
[build-system]
|
|
31
|
+
requires = ["hatchling==1.26.3"]
|
|
32
|
+
build-backend = "hatchling.build"
|
|
33
|
+
|
|
34
|
+
[tool.rye]
|
|
35
|
+
managed = true
|
|
36
|
+
dev-dependencies = [
|
|
37
|
+
"google-cloud-secret-manager",
|
|
38
|
+
"gcsfs",
|
|
39
|
+
"pytest",
|
|
40
|
+
"sheerwater-benchmarking @ git+https://github.com/rhiza-research/sheerwater-benchmarking",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
[tool.hatch.metadata]
|
|
44
|
+
allow-direct-references = true
|
|
45
|
+
|
|
46
|
+
[tool.hatch.build.targets.wheel]
|
|
47
|
+
packages = ["src/nuthatch"]
|
|
48
|
+
exclude = [
|
|
49
|
+
"src/nuthatch/test_secrets.py" # Exclude a specific file
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
[tool.nuthatch]
|
|
53
|
+
filesystem = "gs://sheerwater-datalake/caches"
|
|
54
|
+
host = "postgres.sheerwater.rhizaresearch.org"
|
|
55
|
+
port = 5432
|
|
56
|
+
database = "postgres"
|
|
57
|
+
username = "write"
|
|
58
|
+
driver = "postgresql"
|
|
59
|
+
raster_store = "gs://terracotta-files"
|
|
60
|
+
metadata_location = "database"
|
|
61
|
+
dynamic_config_path = "nuthatch.test_secrets"
|
|
62
|
+
|
|
63
|
+
[tool.nuthatch.filesystem_options]
|
|
64
|
+
token = "google_default"
|
|
65
|
+
cache_timeout = 0
|
|
66
|
+
|
|
67
|
+
[tool.nuthatch.local]
|
|
68
|
+
filesystem = ".cache/"
|