sciduckdb 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ .venv
2
+ .DS_Store
3
+ */*/__pycache__
4
+ others_projects/sciforge
5
+ scistack-gui/extension/node_modules/
6
+ scistack-gui/frontend/node_modules/__pycache__/
7
+ *.pyc
8
+ __pycache__/
9
+ *.pyc
10
+ *.pyo
11
+
12
+ # Generated database artifacts (DuckDB data/lineage + write-ahead logs)
13
+ *.duckdb
14
+ *.duckdb.wal
15
+ *.wal
@@ -0,0 +1,57 @@
1
+ Metadata-Version: 2.4
2
+ Name: sciduckdb
3
+ Version: 0.1.0
4
+ Summary: A thin DuckDB layer for managing versioned scientific data
5
+ Author: SciStack Contributors
6
+ License-Expression: MIT
7
+ Keywords: data-management,duckdb,scientific-data,versioning
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Requires-Python: >=3.9
20
+ Requires-Dist: duckdb>=0.9.0
21
+ Requires-Dist: numpy>=1.20
22
+ Requires-Dist: pandas>=1.3
23
+ Provides-Extra: dev
24
+ Requires-Dist: mypy>=1.0; extra == 'dev'
25
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
26
+ Requires-Dist: pytest>=7.0; extra == 'dev'
27
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
28
+ Description-Content-Type: text/markdown
29
+
30
+ # SciDuck
31
+
32
+ A thin DuckDB layer for managing versioned scientific data.
33
+
34
+ Each variable is stored in its own table. Variables are associated with a hierarchical dataset schema (e.g. subject -> session -> trial) and can be saved at any level of that hierarchy. Multiple versions of each variable are supported natively.
35
+
36
+ All data -- including arrays -- is stored in queryable DuckDB types (LIST, nested LIST, JSON) so the database can be inspected with DBeaver or any DuckDB-compatible viewer.
37
+
38
+ ## Usage
39
+
40
+ ```python
41
+ from sciduckdb import SciDuck
42
+
43
+ duck = SciDuck("data.duckdb", dataset_schema=["subject", "session"])
44
+ duck.save("MyVar", data, subject="S01", session=1)
45
+ loaded = duck.load("MyVar", subject="S01", session=1)
46
+ ```
47
+
48
+ ## Features
49
+
50
+ - **Three save modes**: DataFrame with schema columns (Mode A), single entry via kwargs (Mode B), or dict mapping tuples to values (Mode C)
51
+ - **Automatic type inference**: Maps Python/numpy types to DuckDB types
52
+ - **Round-trip restoration**: Metadata tracks original types for lossless load
53
+ - **Version management**: Automatic version numbering, duplicate hash detection
54
+ - **Variable groups**: Organize variables into named groups
55
+ - **Schema validation**: Validates dataset schema consistency across sessions
56
+
57
+ Note: all schema key values are coerced to strings before storage.
@@ -0,0 +1,28 @@
1
+ # SciDuck
2
+
3
+ A thin DuckDB layer for managing versioned scientific data.
4
+
5
+ Each variable is stored in its own table. Variables are associated with a hierarchical dataset schema (e.g. subject -> session -> trial) and can be saved at any level of that hierarchy. Multiple versions of each variable are supported natively.
6
+
7
+ All data -- including arrays -- is stored in queryable DuckDB types (LIST, nested LIST, JSON) so the database can be inspected with DBeaver or any DuckDB-compatible viewer.
8
+
9
+ ## Usage
10
+
11
+ ```python
12
+ from sciduckdb import SciDuck
13
+
14
+ duck = SciDuck("data.duckdb", dataset_schema=["subject", "session"])
15
+ duck.save("MyVar", data, subject="S01", session=1)
16
+ loaded = duck.load("MyVar", subject="S01", session=1)
17
+ ```
18
+
19
+ ## Features
20
+
21
+ - **Three save modes**: DataFrame with schema columns (Mode A), single entry via kwargs (Mode B), or dict mapping tuples to values (Mode C)
22
+ - **Automatic type inference**: Maps Python/numpy types to DuckDB types
23
+ - **Round-trip restoration**: Metadata tracks original types for lossless load
24
+ - **Version management**: Automatic version numbering, duplicate hash detection
25
+ - **Variable groups**: Organize variables into named groups
26
+ - **Schema validation**: Validates dataset schema consistency across sessions
27
+
28
+ Note: all schema key values are coerced to strings before storage.
@@ -0,0 +1,79 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "sciduckdb"
7
+ version = "0.1.0"
8
+ description = "A thin DuckDB layer for managing versioned scientific data"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.9"
12
+ authors = [
13
+ { name = "SciStack Contributors" }
14
+ ]
15
+ keywords = [
16
+ "duckdb",
17
+ "scientific-data",
18
+ "versioning",
19
+ "data-management",
20
+ ]
21
+ classifiers = [
22
+ "Development Status :: 4 - Beta",
23
+ "Intended Audience :: Developers",
24
+ "Intended Audience :: Science/Research",
25
+ "License :: OSI Approved :: MIT License",
26
+ "Operating System :: OS Independent",
27
+ "Programming Language :: Python :: 3",
28
+ "Programming Language :: Python :: 3.10",
29
+ "Programming Language :: Python :: 3.11",
30
+ "Programming Language :: Python :: 3.12",
31
+ "Topic :: Scientific/Engineering",
32
+ "Topic :: Software Development :: Libraries :: Python Modules",
33
+ ]
34
+ dependencies = [
35
+ "duckdb>=0.9.0",
36
+ "pandas>=1.3",
37
+ "numpy>=1.20",
38
+ ]
39
+
40
+ [project.optional-dependencies]
41
+ dev = [
42
+ "pytest>=7.0",
43
+ "pytest-cov>=4.0",
44
+ "mypy>=1.0",
45
+ "ruff>=0.1.0",
46
+ ]
47
+
48
+ [tool.hatch.build.targets.sdist]
49
+ include = [
50
+ "/src",
51
+ ]
52
+
53
+ [tool.hatch.build.targets.wheel]
54
+ packages = ["src/sciduckdb"]
55
+
56
+ [tool.pytest.ini_options]
57
+ testpaths = ["tests"]
58
+ pythonpath = ["src"]
59
+
60
+ [tool.ruff]
61
+ target-version = "py310"
62
+ line-length = 88
63
+
64
+ [tool.ruff.lint]
65
+ select = [
66
+ "E", # pycodestyle errors
67
+ "W", # pycodestyle warnings
68
+ "F", # Pyflakes
69
+ "I", # isort
70
+ "B", # flake8-bugbear
71
+ "C4", # flake8-comprehensions
72
+ "UP", # pyupgrade
73
+ ]
74
+ ignore = [
75
+ "E501", # line too long (handled by formatter)
76
+ ]
77
+
78
+ [tool.ruff.lint.isort]
79
+ known-first-party = ["sciduckdb"]
@@ -0,0 +1,31 @@
1
+ """SciDuck — A thin DuckDB layer for managing versioned scientific data."""
2
+
3
+ from .sciduckdb import (
4
+ SciDuck,
5
+ _infer_duckdb_type,
6
+ _numpy_dtype_to_duckdb,
7
+ _python_to_storage,
8
+ _storage_to_python,
9
+ _storage_to_python_column,
10
+ _infer_data_columns,
11
+ _value_to_storage_row,
12
+ _dataframe_to_storage_rows,
13
+ _bulk_df_to_storage_rows,
14
+ _flatten_dict,
15
+ _unflatten_dict,
16
+ )
17
+
18
+ __all__ = [
19
+ "SciDuck",
20
+ "_infer_duckdb_type",
21
+ "_numpy_dtype_to_duckdb",
22
+ "_python_to_storage",
23
+ "_storage_to_python",
24
+ "_storage_to_python_column",
25
+ "_infer_data_columns",
26
+ "_value_to_storage_row",
27
+ "_dataframe_to_storage_rows",
28
+ "_bulk_df_to_storage_rows",
29
+ "_flatten_dict",
30
+ "_unflatten_dict",
31
+ ]