sciduckdb 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sciduckdb-0.1.0/.gitignore +15 -0
- sciduckdb-0.1.0/PKG-INFO +57 -0
- sciduckdb-0.1.0/README.md +28 -0
- sciduckdb-0.1.0/pyproject.toml +79 -0
- sciduckdb-0.1.0/src/sciduckdb/__init__.py +31 -0
- sciduckdb-0.1.0/src/sciduckdb/sciduckdb.py +1276 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
.venv
|
|
2
|
+
.DS_Store
|
|
3
|
+
*/*/__pycache__
|
|
4
|
+
others_projects/sciforge
|
|
5
|
+
scistack-gui/extension/node_modules/
|
|
6
|
+
scistack-gui/frontend/node_modules/__pycache__/
|
|
7
|
+
*.pyc
|
|
8
|
+
__pycache__/
|
|
9
|
+
*.pyc
|
|
10
|
+
*.pyo
|
|
11
|
+
|
|
12
|
+
# Generated database artifacts (DuckDB data/lineage + write-ahead logs)
|
|
13
|
+
*.duckdb
|
|
14
|
+
*.duckdb.wal
|
|
15
|
+
*.wal
|
sciduckdb-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sciduckdb
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A thin DuckDB layer for managing versioned scientific data
|
|
5
|
+
Author: SciStack Contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Keywords: data-management,duckdb,scientific-data,versioning
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Requires-Dist: duckdb>=0.9.0
|
|
21
|
+
Requires-Dist: numpy>=1.20
|
|
22
|
+
Requires-Dist: pandas>=1.3
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: mypy>=1.0; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
26
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# SciDuck
|
|
31
|
+
|
|
32
|
+
A thin DuckDB layer for managing versioned scientific data.
|
|
33
|
+
|
|
34
|
+
Each variable is stored in its own table. Variables are associated with a hierarchical dataset schema (e.g. subject -> session -> trial) and can be saved at any level of that hierarchy. Multiple versions of each variable are supported natively.
|
|
35
|
+
|
|
36
|
+
All data -- including arrays -- is stored in queryable DuckDB types (LIST, nested LIST, JSON) so the database can be inspected with DBeaver or any DuckDB-compatible viewer.
|
|
37
|
+
|
|
38
|
+
## Usage
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from sciduckdb import SciDuck
|
|
42
|
+
|
|
43
|
+
duck = SciDuck("data.duckdb", dataset_schema=["subject", "session"])
|
|
44
|
+
duck.save("MyVar", data, subject="S01", session=1)
|
|
45
|
+
loaded = duck.load("MyVar", subject="S01", session=1)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Features
|
|
49
|
+
|
|
50
|
+
- **Three save modes**: DataFrame with schema columns (Mode A), single entry via kwargs (Mode B), or dict mapping tuples to values (Mode C)
|
|
51
|
+
- **Automatic type inference**: Maps Python/numpy types to DuckDB types
|
|
52
|
+
- **Round-trip restoration**: Metadata tracks original types for lossless load
|
|
53
|
+
- **Version management**: Automatic version numbering, duplicate hash detection
|
|
54
|
+
- **Variable groups**: Organize variables into named groups
|
|
55
|
+
- **Schema validation**: Validates dataset schema consistency across sessions
|
|
56
|
+
|
|
57
|
+
Note: all schema key values are coerced to strings before storage.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# SciDuck
|
|
2
|
+
|
|
3
|
+
A thin DuckDB layer for managing versioned scientific data.
|
|
4
|
+
|
|
5
|
+
Each variable is stored in its own table. Variables are associated with a hierarchical dataset schema (e.g. subject -> session -> trial) and can be saved at any level of that hierarchy. Multiple versions of each variable are supported natively.
|
|
6
|
+
|
|
7
|
+
All data -- including arrays -- is stored in queryable DuckDB types (LIST, nested LIST, JSON) so the database can be inspected with DBeaver or any DuckDB-compatible viewer.
|
|
8
|
+
|
|
9
|
+
## Usage
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
from sciduckdb import SciDuck
|
|
13
|
+
|
|
14
|
+
duck = SciDuck("data.duckdb", dataset_schema=["subject", "session"])
|
|
15
|
+
duck.save("MyVar", data, subject="S01", session=1)
|
|
16
|
+
loaded = duck.load("MyVar", subject="S01", session=1)
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Features
|
|
20
|
+
|
|
21
|
+
- **Three save modes**: DataFrame with schema columns (Mode A), single entry via kwargs (Mode B), or dict mapping tuples to values (Mode C)
|
|
22
|
+
- **Automatic type inference**: Maps Python/numpy types to DuckDB types
|
|
23
|
+
- **Round-trip restoration**: Metadata tracks original types for lossless load
|
|
24
|
+
- **Version management**: Automatic version numbering, duplicate hash detection
|
|
25
|
+
- **Variable groups**: Organize variables into named groups
|
|
26
|
+
- **Schema validation**: Validates dataset schema consistency across sessions
|
|
27
|
+
|
|
28
|
+
Note: all schema key values are coerced to strings before storage.
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "sciduckdb"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A thin DuckDB layer for managing versioned scientific data"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "SciStack Contributors" }
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"duckdb",
|
|
17
|
+
"scientific-data",
|
|
18
|
+
"versioning",
|
|
19
|
+
"data-management",
|
|
20
|
+
]
|
|
21
|
+
classifiers = [
|
|
22
|
+
"Development Status :: 4 - Beta",
|
|
23
|
+
"Intended Audience :: Developers",
|
|
24
|
+
"Intended Audience :: Science/Research",
|
|
25
|
+
"License :: OSI Approved :: MIT License",
|
|
26
|
+
"Operating System :: OS Independent",
|
|
27
|
+
"Programming Language :: Python :: 3",
|
|
28
|
+
"Programming Language :: Python :: 3.10",
|
|
29
|
+
"Programming Language :: Python :: 3.11",
|
|
30
|
+
"Programming Language :: Python :: 3.12",
|
|
31
|
+
"Topic :: Scientific/Engineering",
|
|
32
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
33
|
+
]
|
|
34
|
+
dependencies = [
|
|
35
|
+
"duckdb>=0.9.0",
|
|
36
|
+
"pandas>=1.3",
|
|
37
|
+
"numpy>=1.20",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
[project.optional-dependencies]
|
|
41
|
+
dev = [
|
|
42
|
+
"pytest>=7.0",
|
|
43
|
+
"pytest-cov>=4.0",
|
|
44
|
+
"mypy>=1.0",
|
|
45
|
+
"ruff>=0.1.0",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
[tool.hatch.build.targets.sdist]
|
|
49
|
+
include = [
|
|
50
|
+
"/src",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
[tool.hatch.build.targets.wheel]
|
|
54
|
+
packages = ["src/sciduckdb"]
|
|
55
|
+
|
|
56
|
+
[tool.pytest.ini_options]
|
|
57
|
+
testpaths = ["tests"]
|
|
58
|
+
pythonpath = ["src"]
|
|
59
|
+
|
|
60
|
+
[tool.ruff]
|
|
61
|
+
target-version = "py310"
|
|
62
|
+
line-length = 88
|
|
63
|
+
|
|
64
|
+
[tool.ruff.lint]
|
|
65
|
+
select = [
|
|
66
|
+
"E", # pycodestyle errors
|
|
67
|
+
"W", # pycodestyle warnings
|
|
68
|
+
"F", # Pyflakes
|
|
69
|
+
"I", # isort
|
|
70
|
+
"B", # flake8-bugbear
|
|
71
|
+
"C4", # flake8-comprehensions
|
|
72
|
+
"UP", # pyupgrade
|
|
73
|
+
]
|
|
74
|
+
ignore = [
|
|
75
|
+
"E501", # line too long (handled by formatter)
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
[tool.ruff.lint.isort]
|
|
79
|
+
known-first-party = ["sciduckdb"]
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""SciDuck — A thin DuckDB layer for managing versioned scientific data."""
|
|
2
|
+
|
|
3
|
+
from .sciduckdb import (
|
|
4
|
+
SciDuck,
|
|
5
|
+
_infer_duckdb_type,
|
|
6
|
+
_numpy_dtype_to_duckdb,
|
|
7
|
+
_python_to_storage,
|
|
8
|
+
_storage_to_python,
|
|
9
|
+
_storage_to_python_column,
|
|
10
|
+
_infer_data_columns,
|
|
11
|
+
_value_to_storage_row,
|
|
12
|
+
_dataframe_to_storage_rows,
|
|
13
|
+
_bulk_df_to_storage_rows,
|
|
14
|
+
_flatten_dict,
|
|
15
|
+
_unflatten_dict,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"SciDuck",
|
|
20
|
+
"_infer_duckdb_type",
|
|
21
|
+
"_numpy_dtype_to_duckdb",
|
|
22
|
+
"_python_to_storage",
|
|
23
|
+
"_storage_to_python",
|
|
24
|
+
"_storage_to_python_column",
|
|
25
|
+
"_infer_data_columns",
|
|
26
|
+
"_value_to_storage_row",
|
|
27
|
+
"_dataframe_to_storage_rows",
|
|
28
|
+
"_bulk_df_to_storage_rows",
|
|
29
|
+
"_flatten_dict",
|
|
30
|
+
"_unflatten_dict",
|
|
31
|
+
]
|