nuthatch 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nuthatch might be problematic. Click here for more details.

Files changed (43) hide show
  1. nuthatch-0.1.0/.gitignore +13 -0
  2. nuthatch-0.1.0/.python-version +1 -0
  3. nuthatch-0.1.0/PKG-INFO +38 -0
  4. nuthatch-0.1.0/README.md +17 -0
  5. nuthatch-0.1.0/pyproject.toml +68 -0
  6. nuthatch-0.1.0/requirements-dev.lock +969 -0
  7. nuthatch-0.1.0/requirements.lock +166 -0
  8. nuthatch-0.1.0/src/nuthatch/__init__.py +14 -0
  9. nuthatch-0.1.0/src/nuthatch/backend.py +301 -0
  10. nuthatch-0.1.0/src/nuthatch/backends/__init__.py +8 -0
  11. nuthatch-0.1.0/src/nuthatch/backends/basic.py +28 -0
  12. nuthatch-0.1.0/src/nuthatch/backends/delta.py +46 -0
  13. nuthatch-0.1.0/src/nuthatch/backends/parquet.py +130 -0
  14. nuthatch-0.1.0/src/nuthatch/backends/sql.py +147 -0
  15. nuthatch-0.1.0/src/nuthatch/backends/terracotta.py +199 -0
  16. nuthatch-0.1.0/src/nuthatch/backends/zarr.py +207 -0
  17. nuthatch-0.1.0/src/nuthatch/cache.py +529 -0
  18. nuthatch-0.1.0/src/nuthatch/cli.py +174 -0
  19. nuthatch-0.1.0/src/nuthatch/config.py +94 -0
  20. nuthatch-0.1.0/src/nuthatch/memoizer.py +67 -0
  21. nuthatch-0.1.0/src/nuthatch/nuthatch.py +498 -0
  22. nuthatch-0.1.0/src/nuthatch/processor.py +89 -0
  23. nuthatch-0.1.0/src/nuthatch/processors/__init__.py +6 -0
  24. nuthatch-0.1.0/src/nuthatch/processors/timeseries.py +157 -0
  25. nuthatch-0.1.0/src/nuthatch/test_secrets.py +17 -0
  26. nuthatch-0.1.0/tests/backends/tabular_test.py +62 -0
  27. nuthatch-0.1.0/tests/backends/test_basic.py +22 -0
  28. nuthatch-0.1.0/tests/backends/test_delta.py +5 -0
  29. nuthatch-0.1.0/tests/backends/test_parquet.py +5 -0
  30. nuthatch-0.1.0/tests/backends/test_sql.py +19 -0
  31. nuthatch-0.1.0/tests/backends/test_zarr.py +55 -0
  32. nuthatch-0.1.0/tests/processors/test_timeseries.py +134 -0
  33. nuthatch-0.1.0/tests/test_backend_identification.py +10 -0
  34. nuthatch-0.1.0/tests/test_cache_args.py +144 -0
  35. nuthatch-0.1.0/tests/test_config_registration.py +35 -0
  36. nuthatch-0.1.0/tests/test_core.py +50 -0
  37. nuthatch-0.1.0/tests/test_db_metastore.py +97 -0
  38. nuthatch-0.1.0/tests/test_delta_metastore.py +104 -0
  39. nuthatch-0.1.0/tests/test_engines.py +1 -0
  40. nuthatch-0.1.0/tests/test_independent_storage.py +1 -0
  41. nuthatch-0.1.0/tests/test_local.py +1 -0
  42. nuthatch-0.1.0/tests/test_memoizer.py +1 -0
  43. nuthatch-0.1.0/tests/test_namespace.py +1 -0
@@ -0,0 +1,13 @@
1
+ # python generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # venv
10
+ .venv
11
+
12
+ .cache
13
+ .cache*
@@ -0,0 +1 @@
1
+ 3.12.4
@@ -0,0 +1,38 @@
1
+ Metadata-Version: 2.3
2
+ Name: nuthatch
3
+ Version: 0.1.0
4
+ Summary: Cacheable big data pipelines
5
+ Author-email: Joshua Adkins <josh@rhizaresearch.org>, Genevieve Flaspohler <geneviee@rhizaresearch.org>
6
+ Requires-Python: >=3.12
7
+ Requires-Dist: click
8
+ Requires-Dist: dask-deltatable
9
+ Requires-Dist: dask[dataframe]
10
+ Requires-Dist: deltalake==1.1.2
11
+ Requires-Dist: fsspec
12
+ Requires-Dist: gitpython
13
+ Requires-Dist: pandas
14
+ Requires-Dist: psycopg2
15
+ Requires-Dist: pyarrow
16
+ Requires-Dist: sqlalchemy
17
+ Requires-Dist: terracotta
18
+ Requires-Dist: xarray
19
+ Requires-Dist: zarr
20
+ Description-Content-Type: text/markdown
21
+
22
+ # Nuthatch
23
+
24
+ Nuthatch is a tool for building pure-python big data pipelines. At its core it
25
+ enables the transparent multi-level caching and recall of results in formats that
26
+ are efficient for each data type. It supports a variety of
27
+ common storage backends, data processing frameworks, and their associated
28
+ data types for caching.
29
+
30
+ It also provides a framework for re-using and sharing data-type specific
31
+ post-processing, and for these data type
32
+ processors to pass hints to storage backends for more efficient storager and recall.
33
+
34
+ Nuthatch was created to alleviate the comon pattern of data processing pipelines manually
35
+ specifying their output storage locations, and the requirements of pipeline builders to
36
+ use external data orchestration tools to specify the execution of their pipeliness. With Nuthatch
37
+ simply tag your functions and anyone who has access to your storage backend - you, your
38
+ team, or the public - can acess and build off of your most up-to-date data.
@@ -0,0 +1,17 @@
1
+ # Nuthatch
2
+
3
+ Nuthatch is a tool for building pure-python big data pipelines. At its core it
4
+ enables the transparent multi-level caching and recall of results in formats that
5
+ are efficient for each data type. It supports a variety of
6
+ common storage backends, data processing frameworks, and their associated
7
+ data types for caching.
8
+
9
+ It also provides a framework for re-using and sharing data-type specific
10
+ post-processing, and for these data type
11
+ processors to pass hints to storage backends for more efficient storager and recall.
12
+
13
+ Nuthatch was created to alleviate the comon pattern of data processing pipelines manually
14
+ specifying their output storage locations, and the requirements of pipeline builders to
15
+ use external data orchestration tools to specify the execution of their pipeliness. With Nuthatch
16
+ simply tag your functions and anyone who has access to your storage backend - you, your
17
+ team, or the public - can acess and build off of your most up-to-date data.
@@ -0,0 +1,68 @@
1
+ [project]
2
+ name = "nuthatch"
3
+ version = "0.1.0"
4
+ description = "Cacheable big data pipelines"
5
+ authors = [
6
+ { name = "Joshua Adkins", email = "josh@rhizaresearch.org" },
7
+ { name = "Genevieve Flaspohler", email = "geneviee@rhizaresearch.org" }
8
+ ]
9
+ dependencies = [
10
+ "fsspec",
11
+ "sqlalchemy",
12
+ "pandas",
13
+ "dask[dataframe]",
14
+ "deltalake==1.1.2",
15
+ "dask-deltatable",
16
+ "xarray",
17
+ "pyarrow",
18
+ "terracotta",
19
+ "gitpython",
20
+ "psycopg2",
21
+ "zarr",
22
+ "click"
23
+ ]
24
+ readme = "README.md"
25
+ requires-python = ">= 3.12"
26
+
27
+ [project.scripts]
28
+ nuthatch = "nuthatch.cli:main"
29
+
30
+ [build-system]
31
+ requires = ["hatchling==1.26.3"]
32
+ build-backend = "hatchling.build"
33
+
34
+ [tool.rye]
35
+ managed = true
36
+ dev-dependencies = [
37
+ "google-cloud-secret-manager",
38
+ "gcsfs",
39
+ "pytest",
40
+ "sheerwater-benchmarking @ git+https://github.com/rhiza-research/sheerwater-benchmarking",
41
+ ]
42
+
43
+ [tool.hatch.metadata]
44
+ allow-direct-references = true
45
+
46
+ [tool.hatch.build.targets.wheel]
47
+ packages = ["src/nuthatch"]
48
+ exclude = [
49
+ "src/nuthatch/test_secrets.py" # Exclude a specific file
50
+ ]
51
+
52
+ [tool.nuthatch]
53
+ filesystem = "gs://sheerwater-datalake/caches"
54
+ host = "postgres.sheerwater.rhizaresearch.org"
55
+ port = 5432
56
+ database = "postgres"
57
+ username = "write"
58
+ driver = "postgresql"
59
+ raster_store = "gs://terracotta-files"
60
+ metadata_location = "database"
61
+ dynamic_config_path = "nuthatch.test_secrets"
62
+
63
+ [tool.nuthatch.filesystem_options]
64
+ token = "google_default"
65
+ cache_timeout = 0
66
+
67
+ [tool.nuthatch.local]
68
+ filesystem = ".cache/"