datumhub-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. datumhub_cli-0.1.0/.github/workflows/ci.yml +29 -0
  2. datumhub_cli-0.1.0/.github/workflows/publish.yml +51 -0
  3. datumhub_cli-0.1.0/.gitignore +60 -0
  4. datumhub_cli-0.1.0/PKG-INFO +231 -0
  5. datumhub_cli-0.1.0/README.md +206 -0
  6. datumhub_cli-0.1.0/pyproject.toml +59 -0
  7. datumhub_cli-0.1.0/src/datum/__init__.py +1 -0
  8. datumhub_cli-0.1.0/src/datum/commands/__init__.py +0 -0
  9. datumhub_cli-0.1.0/src/datum/commands/cache.py +188 -0
  10. datumhub_cli-0.1.0/src/datum/commands/check.py +233 -0
  11. datumhub_cli-0.1.0/src/datum/commands/config.py +171 -0
  12. datumhub_cli-0.1.0/src/datum/commands/info.py +141 -0
  13. datumhub_cli-0.1.0/src/datum/commands/init.py +290 -0
  14. datumhub_cli-0.1.0/src/datum/commands/list.py +76 -0
  15. datumhub_cli-0.1.0/src/datum/commands/login.py +116 -0
  16. datumhub_cli-0.1.0/src/datum/commands/publish.py +162 -0
  17. datumhub_cli-0.1.0/src/datum/commands/pull.py +246 -0
  18. datumhub_cli-0.1.0/src/datum/commands/search.py +88 -0
  19. datumhub_cli-0.1.0/src/datum/console.py +23 -0
  20. datumhub_cli-0.1.0/src/datum/main.py +112 -0
  21. datumhub_cli-0.1.0/src/datum/models.py +165 -0
  22. datumhub_cli-0.1.0/src/datum/registry/__init__.py +0 -0
  23. datumhub_cli-0.1.0/src/datum/registry/local.py +94 -0
  24. datumhub_cli-0.1.0/src/datum/state.py +23 -0
  25. datumhub_cli-0.1.0/tests/__init__.py +0 -0
  26. datumhub_cli-0.1.0/tests/test_cache.py +158 -0
  27. datumhub_cli-0.1.0/tests/test_check.py +158 -0
  28. datumhub_cli-0.1.0/tests/test_config.py +199 -0
  29. datumhub_cli-0.1.0/tests/test_info.py +173 -0
  30. datumhub_cli-0.1.0/tests/test_list.py +80 -0
  31. datumhub_cli-0.1.0/tests/test_login.py +176 -0
  32. datumhub_cli-0.1.0/tests/test_models.py +211 -0
  33. datumhub_cli-0.1.0/tests/test_publish.py +107 -0
  34. datumhub_cli-0.1.0/tests/test_pull.py +391 -0
  35. datumhub_cli-0.1.0/tests/test_registry.py +118 -0
  36. datumhub_cli-0.1.0/tests/test_search.py +174 -0
@@ -0,0 +1,29 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+
13
+ strategy:
14
+ matrix:
15
+ python-version: ["3.11", "3.12"]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Set up Python ${{ matrix.python-version }}
21
+ uses: actions/setup-python@v5
22
+ with:
23
+ python-version: ${{ matrix.python-version }}
24
+
25
+ - name: Install dependencies
26
+ run: pip install -e ".[dev]"
27
+
28
+ - name: Run tests
29
+ run: pytest tests/ -v
@@ -0,0 +1,51 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - name: Set up Python
16
+ uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.12"
19
+
20
+ - name: Install build tools
21
+ run: pip install build
22
+
23
+ - name: Build package
24
+ run: python -m build
25
+
26
+ - name: Upload dist as artifact
27
+ uses: actions/upload-artifact@v4
28
+ with:
29
+ name: dist
30
+ path: dist/
31
+
32
+ publish:
33
+ needs: build
34
+ runs-on: ubuntu-latest
35
+
36
+ environment:
37
+ name: pypi
38
+ url: https://pypi.org/project/datumhub-cli/
39
+
40
+ permissions:
41
+ id-token: write
42
+
43
+ steps:
44
+ - name: Download dist artifact
45
+ uses: actions/download-artifact@v4
46
+ with:
47
+ name: dist
48
+ path: dist/
49
+
50
+ - name: Publish to PyPI
51
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,60 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+ MANIFEST
23
+
24
+ # Environments
25
+ .env
26
+ .venv
27
+ env/
28
+ venv/
29
+ ENV/
30
+ env.bak/
31
+ venv.bak/
32
+ .python-version
33
+
34
+ # Hatch
35
+ .hatch/
36
+
37
+ # Testing
38
+ .pytest_cache/
39
+ .coverage
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+
44
+ # IDEs
45
+ .vscode/
46
+ .idea/
47
+ *.swp
48
+ *.swo
49
+
50
+ # macOS
51
+ .DS_Store
52
+ .AppleDouble
53
+ .LSOverride
54
+
55
+ # Datum
56
+ ~/.datum/
57
+
58
+ # Pulled dataset directories (created by datum pull in CWD)
59
+ /pulltest/
60
+ /sampledata/
@@ -0,0 +1,231 @@
1
+ Metadata-Version: 2.4
2
+ Name: datumhub-cli
3
+ Version: 0.1.0
4
+ Summary: Open datasets, open source — a CLI for publishing and consuming open datasets.
5
+ Author: Datum Contributors
6
+ License: MIT
7
+ Keywords: cli,data,datasets,open-data,registry
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Environment :: Console
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Scientific/Engineering
16
+ Classifier: Topic :: Utilities
17
+ Requires-Python: >=3.9
18
+ Requires-Dist: httpx>=0.27.0
19
+ Requires-Dist: pydantic>=2.6.0
20
+ Requires-Dist: typer[all]>=0.12.0
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
23
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
24
+ Description-Content-Type: text/markdown
25
+
26
+ # datum-cli
27
+
28
+ > The command-line tool for [Datum](https://github.com/simkjels/datum) — publish and consume open data with a single command.
29
+
30
+ ```
31
+ datum pull norges-bank.reports.financial-stability:2024
32
+ ```
33
+
34
+ ---
35
+
36
+ ## Installation
37
+
38
+ ```bash
39
+ pip install datumhub-cli
40
+ ```
41
+
42
+ Requires Python 3.11+.
43
+
44
+ ---
45
+
46
+ ## How it works
47
+
48
+ Every dataset in Datum has a three-part identifier:
49
+
50
+ ```
51
+ publisher.namespace.dataset:version
52
+ ```
53
+
54
+ For example: `met.no.weather.oslo-hourly:2024-01`
55
+
56
+ The metadata lives in a `datapackage.json` file — a small JSON document that
57
+ describes where the data files are hosted, their format, size, and checksum.
58
+ Datum never hosts the data itself.
59
+
60
+ ---
61
+
62
+ ## Quick start
63
+
64
+ **1. Describe your dataset**
65
+
66
+ ```bash
67
+ datum init
68
+ ```
69
+
70
+ Walks you through creating a `datapackage.json` interactively.
71
+
72
+ **2. Validate it**
73
+
74
+ ```bash
75
+ datum check datapackage.json
76
+ ```
77
+
78
+ **3. Publish to your local registry**
79
+
80
+ ```bash
81
+ datum publish datapackage.json
82
+ ```
83
+
84
+ **4. Pull it anywhere**
85
+
86
+ ```bash
87
+ datum pull publisher.namespace.dataset:version
88
+ ```
89
+
90
+ Files land in `./dataset/` in your current directory. The local cache at
91
+ `~/.datum/cache/` is used for deduplication — pulling the same dataset in a
92
+ second directory copies from cache with no network request.
93
+
94
+ ---
95
+
96
+ ## Commands
97
+
98
+ ### Publishing
99
+
100
+ | Command | Description |
101
+ |---|---|
102
+ | `datum init` | Create a `datapackage.json` via an interactive wizard |
103
+ | `datum check [FILE]` | Validate a `datapackage.json` against the Datum schema |
104
+ | `datum publish [FILE]` | Publish dataset metadata to the registry |
105
+
106
+ ### Consuming
107
+
108
+ | Command | Description |
109
+ |---|---|
110
+ | `datum pull IDENTIFIER` | Download a dataset and verify its checksum |
111
+ | `datum info IDENTIFIER` | Show full metadata for a dataset |
112
+ | `datum list` | List all datasets in the registry |
113
+ | `datum search QUERY` | Search the registry by keyword |
114
+
115
+ ### Cache
116
+
117
+ | Command | Description |
118
+ |---|---|
119
+ | `datum cache list` | Show all cached datasets |
120
+ | `datum cache size` | Show total cache disk usage |
121
+ | `datum cache clear` | Remove all cached files |
122
+
123
+ ### Configuration
124
+
125
+ | Command | Description |
126
+ |---|---|
127
+ | `datum config set KEY VALUE` | Set a configuration value |
128
+ | `datum config get KEY` | Get a configuration value |
129
+ | `datum config show [KEY]` | Show one key or all configuration |
130
+ | `datum config unset KEY` | Remove a configuration key |
131
+
132
+ ### Authentication
133
+
134
+ | Command | Description |
135
+ |---|---|
136
+ | `datum login [URL]` | Authenticate with a registry |
137
+ | `datum logout [URL]` | Remove stored credentials |
138
+
139
+ ---
140
+
141
+ ## Global flags
142
+
143
+ Global flags must come **before** the subcommand:
144
+
145
+ ```bash
146
+ datum --output json list
147
+ datum --quiet pull publisher.namespace.dataset:1.0.0
148
+ datum --registry https://datumhub.org pull publisher.namespace.dataset
149
+ ```
150
+
151
+ | Flag | Description |
152
+ |---|---|
153
+ | `--output`, `-o` | Output format: `table` (default), `json`, `plain` |
154
+ | `--quiet`, `-q` | Suppress non-essential output |
155
+ | `--registry` | Override the default registry URL or path |
156
+ | `--verbose`, `-v` | Emit additional diagnostic information |
157
+
158
+ ---
159
+
160
+ ## The datapackage.json format
161
+
162
+ ```json
163
+ {
164
+ "id": "publisher.namespace.dataset",
165
+ "version": "1.0.0",
166
+ "title": "My Dataset",
167
+ "description": "A short description.",
168
+ "license": "CC-BY-4.0",
169
+ "publisher": {
170
+ "name": "Publisher Name",
171
+ "url": "https://example.com"
172
+ },
173
+ "tags": ["tag1", "tag2"],
174
+ "sources": [
175
+ {
176
+ "url": "https://example.com/data.csv",
177
+ "format": "csv",
178
+ "size": 204800,
179
+ "checksum": "sha256:abc123..."
180
+ }
181
+ ]
182
+ }
183
+ ```
184
+
185
+ ---
186
+
187
+ ## Pull behaviour
188
+
189
+ ```bash
190
+ # Pull a specific version
191
+ datum pull publisher.namespace.dataset:1.0.0
192
+
193
+ # Pull the latest published version
194
+ datum pull publisher.namespace.dataset
195
+
196
+ # Re-download even if the file already exists locally
197
+ datum pull publisher.namespace.dataset:1.0.0 --force
198
+ ```
199
+
200
+ Files are placed in `./dataset/` relative to your current directory.
201
+ Once a file exists there, subsequent pulls skip it — your local edits are safe.
202
+
203
+ ---
204
+
205
+ ## Configuration
206
+
207
+ ```bash
208
+ # Set a default registry
209
+ datum config set registry https://datumhub.org
210
+
211
+ # Set a default output format
212
+ datum config set output json
213
+
214
+ # View all configuration
215
+ datum config show
216
+ ```
217
+
218
+ Configuration is stored at `~/.datum/config.json`.
219
+
220
+ ---
221
+
222
+ ## Status
223
+
224
+ Early development. The CLI is functional for local workflows.
225
+ Remote registry support via [DatumHub](https://datumhub.org) is coming.
226
+
227
+ ---
228
+
229
+ ## License
230
+
231
+ MIT
@@ -0,0 +1,206 @@
1
+ # datum-cli
2
+
3
+ > The command-line tool for [Datum](https://github.com/simkjels/datum) — publish and consume open data with a single command.
4
+
5
+ ```
6
+ datum pull norges-bank.reports.financial-stability:2024
7
+ ```
8
+
9
+ ---
10
+
11
+ ## Installation
12
+
13
+ ```bash
14
+ pip install datumhub-cli
15
+ ```
16
+
17
+ Requires Python 3.11+.
18
+
19
+ ---
20
+
21
+ ## How it works
22
+
23
+ Every dataset in Datum has a three-part identifier:
24
+
25
+ ```
26
+ publisher.namespace.dataset:version
27
+ ```
28
+
29
+ For example: `met.no.weather.oslo-hourly:2024-01`
30
+
31
+ The metadata lives in a `datapackage.json` file — a small JSON document that
32
+ describes where the data files are hosted, their format, size, and checksum.
33
+ Datum never hosts the data itself.
34
+
35
+ ---
36
+
37
+ ## Quick start
38
+
39
+ **1. Describe your dataset**
40
+
41
+ ```bash
42
+ datum init
43
+ ```
44
+
45
+ Walks you through creating a `datapackage.json` interactively.
46
+
47
+ **2. Validate it**
48
+
49
+ ```bash
50
+ datum check datapackage.json
51
+ ```
52
+
53
+ **3. Publish to your local registry**
54
+
55
+ ```bash
56
+ datum publish datapackage.json
57
+ ```
58
+
59
+ **4. Pull it anywhere**
60
+
61
+ ```bash
62
+ datum pull publisher.namespace.dataset:version
63
+ ```
64
+
65
+ Files land in `./dataset/` in your current directory. The local cache at
66
+ `~/.datum/cache/` is used for deduplication — pulling the same dataset in a
67
+ second directory copies from cache with no network request.
68
+
69
+ ---
70
+
71
+ ## Commands
72
+
73
+ ### Publishing
74
+
75
+ | Command | Description |
76
+ |---|---|
77
+ | `datum init` | Create a `datapackage.json` via an interactive wizard |
78
+ | `datum check [FILE]` | Validate a `datapackage.json` against the Datum schema |
79
+ | `datum publish [FILE]` | Publish dataset metadata to the registry |
80
+
81
+ ### Consuming
82
+
83
+ | Command | Description |
84
+ |---|---|
85
+ | `datum pull IDENTIFIER` | Download a dataset and verify its checksum |
86
+ | `datum info IDENTIFIER` | Show full metadata for a dataset |
87
+ | `datum list` | List all datasets in the registry |
88
+ | `datum search QUERY` | Search the registry by keyword |
89
+
90
+ ### Cache
91
+
92
+ | Command | Description |
93
+ |---|---|
94
+ | `datum cache list` | Show all cached datasets |
95
+ | `datum cache size` | Show total cache disk usage |
96
+ | `datum cache clear` | Remove all cached files |
97
+
98
+ ### Configuration
99
+
100
+ | Command | Description |
101
+ |---|---|
102
+ | `datum config set KEY VALUE` | Set a configuration value |
103
+ | `datum config get KEY` | Get a configuration value |
104
+ | `datum config show [KEY]` | Show one key or all configuration |
105
+ | `datum config unset KEY` | Remove a configuration key |
106
+
107
+ ### Authentication
108
+
109
+ | Command | Description |
110
+ |---|---|
111
+ | `datum login [URL]` | Authenticate with a registry |
112
+ | `datum logout [URL]` | Remove stored credentials |
113
+
114
+ ---
115
+
116
+ ## Global flags
117
+
118
+ Global flags must come **before** the subcommand:
119
+
120
+ ```bash
121
+ datum --output json list
122
+ datum --quiet pull publisher.namespace.dataset:1.0.0
123
+ datum --registry https://datumhub.org pull publisher.namespace.dataset
124
+ ```
125
+
126
+ | Flag | Description |
127
+ |---|---|
128
+ | `--output`, `-o` | Output format: `table` (default), `json`, `plain` |
129
+ | `--quiet`, `-q` | Suppress non-essential output |
130
+ | `--registry` | Override the default registry URL or path |
131
+ | `--verbose`, `-v` | Emit additional diagnostic information |
132
+
133
+ ---
134
+
135
+ ## The datapackage.json format
136
+
137
+ ```json
138
+ {
139
+ "id": "publisher.namespace.dataset",
140
+ "version": "1.0.0",
141
+ "title": "My Dataset",
142
+ "description": "A short description.",
143
+ "license": "CC-BY-4.0",
144
+ "publisher": {
145
+ "name": "Publisher Name",
146
+ "url": "https://example.com"
147
+ },
148
+ "tags": ["tag1", "tag2"],
149
+ "sources": [
150
+ {
151
+ "url": "https://example.com/data.csv",
152
+ "format": "csv",
153
+ "size": 204800,
154
+ "checksum": "sha256:abc123..."
155
+ }
156
+ ]
157
+ }
158
+ ```
159
+
160
+ ---
161
+
162
+ ## Pull behaviour
163
+
164
+ ```bash
165
+ # Pull a specific version
166
+ datum pull publisher.namespace.dataset:1.0.0
167
+
168
+ # Pull the latest published version
169
+ datum pull publisher.namespace.dataset
170
+
171
+ # Re-download even if the file already exists locally
172
+ datum pull publisher.namespace.dataset:1.0.0 --force
173
+ ```
174
+
175
+ Files are placed in `./dataset/` relative to your current directory.
176
+ Once a file exists there, subsequent pulls skip it — your local edits are safe.
177
+
178
+ ---
179
+
180
+ ## Configuration
181
+
182
+ ```bash
183
+ # Set a default registry
184
+ datum config set registry https://datumhub.org
185
+
186
+ # Set a default output format
187
+ datum config set output json
188
+
189
+ # View all configuration
190
+ datum config show
191
+ ```
192
+
193
+ Configuration is stored at `~/.datum/config.json`.
194
+
195
+ ---
196
+
197
+ ## Status
198
+
199
+ Early development. The CLI is functional for local workflows.
200
+ Remote registry support via [DatumHub](https://datumhub.org) is coming.
201
+
202
+ ---
203
+
204
+ ## License
205
+
206
+ MIT
@@ -0,0 +1,59 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "datumhub-cli"
7
+ version = "0.1.0"
8
+ description = "Open datasets, open source — a CLI for publishing and consuming open datasets."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "Datum Contributors" }]
13
+ keywords = ["data", "datasets", "registry", "open-data", "cli"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Environment :: Console",
17
+ "Intended Audience :: Developers",
18
+ "Intended Audience :: Science/Research",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Topic :: Scientific/Engineering",
23
+ "Topic :: Utilities",
24
+ ]
25
+ dependencies = [
26
+ "typer[all]>=0.12.0",
27
+ "pydantic>=2.6.0",
28
+ "httpx>=0.27.0",
29
+ ]
30
+
31
+ [project.scripts]
32
+ datum = "datum.main:app"
33
+
34
+ [project.optional-dependencies]
35
+ dev = [
36
+ "pytest>=8.0.0",
37
+ "pytest-asyncio>=0.23.0",
38
+ ]
39
+
40
+ [tool.hatch.version]
41
+ path = "src/datum/__init__.py"
42
+
43
+ [tool.hatch.build.targets.wheel]
44
+ packages = ["src/datum"]
45
+
46
+ [tool.hatch.envs.default]
47
+ features = ["dev"]
48
+
49
+ [tool.hatch.envs.default.scripts]
50
+ test = "pytest {args}"
51
+ lint = "ruff check src tests"
52
+ typecheck = "mypy src"
53
+
54
+ [tool.pytest.ini_options]
55
+ testpaths = ["tests"]
56
+
57
+ [tool.ruff]
58
+ target-version = "py311"
59
+ line-length = 100
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
File without changes