datalad-core 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datalad_core-0.1.0/.appveyor.yml +218 -0
- datalad_core-0.1.0/.changelog.md.j2 +39 -0
- datalad_core-0.1.0/.dsops/create.py +444 -0
- datalad_core-0.1.0/.dsops/tests/__init__.py +0 -0
- datalad_core-0.1.0/.dsops/tests/test_create.py +21 -0
- datalad_core-0.1.0/.github/workflows/codespell.yml +19 -0
- datalad_core-0.1.0/.github/workflows/conventional-commits.yml +23 -0
- datalad_core-0.1.0/.github/workflows/mypy-pr.yml +41 -0
- datalad_core-0.1.0/.github/workflows/mypy-project.yml +29 -0
- datalad_core-0.1.0/.github/workflows/ruff.yml +17 -0
- datalad_core-0.1.0/.gitignore +6 -0
- datalad_core-0.1.0/.noannex +0 -0
- datalad_core-0.1.0/.readthedocs.yaml +35 -0
- datalad_core-0.1.0/CHANGELOG.md +61 -0
- datalad_core-0.1.0/CONTRIBUTING.md +192 -0
- datalad_core-0.1.0/LICENSE +24 -0
- datalad_core-0.1.0/PKG-INFO +78 -0
- datalad_core-0.1.0/README.md +38 -0
- datalad_core-0.1.0/WHATNEXT +4 -0
- datalad_core-0.1.0/conftest.py +35 -0
- datalad_core-0.1.0/datalad_core/__init__.py +7 -0
- datalad_core-0.1.0/datalad_core/_version.py +21 -0
- datalad_core-0.1.0/datalad_core/commands/__init__.py +80 -0
- datalad_core-0.1.0/datalad_core/commands/dataset.py +248 -0
- datalad_core-0.1.0/datalad_core/commands/decorator.py +216 -0
- datalad_core-0.1.0/datalad_core/commands/default_result_handler.py +122 -0
- datalad_core-0.1.0/datalad_core/commands/exceptions.py +198 -0
- datalad_core-0.1.0/datalad_core/commands/param_constraint.py +67 -0
- datalad_core-0.1.0/datalad_core/commands/preproc.py +366 -0
- datalad_core-0.1.0/datalad_core/commands/result_handler.py +52 -0
- datalad_core-0.1.0/datalad_core/commands/tests/__init__.py +0 -0
- datalad_core-0.1.0/datalad_core/commands/tests/test_cmd.py +192 -0
- datalad_core-0.1.0/datalad_core/commands/tests/test_dataset.py +145 -0
- datalad_core-0.1.0/datalad_core/commands/tests/test_preproc.py +257 -0
- datalad_core-0.1.0/datalad_core/config/__init__.py +90 -0
- datalad_core-0.1.0/datalad_core/config/defaults.py +66 -0
- datalad_core-0.1.0/datalad_core/config/git.py +387 -0
- datalad_core-0.1.0/datalad_core/config/gitenv.py +116 -0
- datalad_core-0.1.0/datalad_core/config/item.py +13 -0
- datalad_core-0.1.0/datalad_core/config/manager.py +240 -0
- datalad_core-0.1.0/datalad_core/config/tests/__init__.py +0 -0
- datalad_core-0.1.0/datalad_core/config/tests/test_defaults.py +20 -0
- datalad_core-0.1.0/datalad_core/config/tests/test_git.py +112 -0
- datalad_core-0.1.0/datalad_core/config/tests/test_gitenv.py +40 -0
- datalad_core-0.1.0/datalad_core/config/tests/test_manager.py +136 -0
- datalad_core-0.1.0/datalad_core/config/tests/test_utils.py +183 -0
- datalad_core-0.1.0/datalad_core/config/utils.py +117 -0
- datalad_core-0.1.0/datalad_core/constraints/__init__.py +77 -0
- datalad_core-0.1.0/datalad_core/constraints/basic.py +75 -0
- datalad_core-0.1.0/datalad_core/constraints/constraint.py +218 -0
- datalad_core-0.1.0/datalad_core/constraints/exceptions.py +130 -0
- datalad_core-0.1.0/datalad_core/constraints/path.py +202 -0
- datalad_core-0.1.0/datalad_core/constraints/tests/__init__.py +0 -0
- datalad_core-0.1.0/datalad_core/constraints/tests/test_basic.py +45 -0
- datalad_core-0.1.0/datalad_core/constraints/tests/test_constraint.py +143 -0
- datalad_core-0.1.0/datalad_core/constraints/tests/test_exceptions.py +52 -0
- datalad_core-0.1.0/datalad_core/constraints/tests/test_path.py +85 -0
- datalad_core-0.1.0/datalad_core/constraints/tests/test_wrapper.py +52 -0
- datalad_core-0.1.0/datalad_core/constraints/wrapper.py +118 -0
- datalad_core-0.1.0/datalad_core/consts/__init__.py +26 -0
- datalad_core-0.1.0/datalad_core/py.typed +0 -0
- datalad_core-0.1.0/datalad_core/repo/__init__.py +45 -0
- datalad_core-0.1.0/datalad_core/repo/annex.py +149 -0
- datalad_core-0.1.0/datalad_core/repo/flyweight.py +101 -0
- datalad_core-0.1.0/datalad_core/repo/gitmanaged.py +112 -0
- datalad_core-0.1.0/datalad_core/repo/repo.py +173 -0
- datalad_core-0.1.0/datalad_core/repo/tests/__init__.py +0 -0
- datalad_core-0.1.0/datalad_core/repo/tests/test_annex.py +106 -0
- datalad_core-0.1.0/datalad_core/repo/tests/test_repo.py +50 -0
- datalad_core-0.1.0/datalad_core/repo/tests/test_worktree.py +161 -0
- datalad_core-0.1.0/datalad_core/repo/utils.py +35 -0
- datalad_core-0.1.0/datalad_core/repo/worktree.py +230 -0
- datalad_core-0.1.0/datalad_core/runners/__init__.py +49 -0
- datalad_core-0.1.0/datalad_core/runners/annex.py +97 -0
- datalad_core-0.1.0/datalad_core/runners/git.py +228 -0
- datalad_core-0.1.0/datalad_core/runners/imports.py +9 -0
- datalad_core-0.1.0/datalad_core/runners/tests/__init__.py +0 -0
- datalad_core-0.1.0/datalad_core/runners/tests/test_callannex.py +54 -0
- datalad_core-0.1.0/datalad_core/runners/tests/test_callgit.py +56 -0
- datalad_core-0.1.0/datalad_core/tests/__init__.py +9 -0
- datalad_core-0.1.0/datalad_core/tests/fixtures.py +248 -0
- datalad_core-0.1.0/datalad_core/tests/test_dummy.py +6 -0
- datalad_core-0.1.0/datalad_core/tests/test_fixtures.py +6 -0
- datalad_core-0.1.0/datalad_core/tests/test_utils.py +12 -0
- datalad_core-0.1.0/datalad_core/tests/utils.py +180 -0
- datalad_core-0.1.0/docs/.gitignore +2 -0
- datalad_core-0.1.0/docs/CODEOWNERS +12 -0
- datalad_core-0.1.0/docs/Makefile +20 -0
- datalad_core-0.1.0/docs/_static/.gitkeep +0 -0
- datalad_core-0.1.0/docs/_templates/autosummary/class.rst +10 -0
- datalad_core-0.1.0/docs/_templates/autosummary/module.rst +4 -0
- datalad_core-0.1.0/docs/conf.py +37 -0
- datalad_core-0.1.0/docs/index.rst +31 -0
- datalad_core-0.1.0/docs/patterns/dataset_paths.rst +57 -0
- datalad_core-0.1.0/pyproject.toml +210 -0
- datalad_core-0.1.0/tools/appveyor/env_setup.bat +4 -0
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
# This CI setup provides a largely homogeneous configuration across all
|
|
2
|
+
# major platforms (Windows, MacOS, and Linux). The aim of this test setup is
|
|
3
|
+
# to create a "native" platform experience, using as few cross-platform
|
|
4
|
+
# helper tools as possible.
|
|
5
|
+
#
|
|
6
|
+
# On all platforms `hatch` is used for testing, and managing the test
|
|
7
|
+
# environment. This yields a near-identical environment/behavior across
|
|
8
|
+
# platforms and Python versions. The main difference between running tests
|
|
9
|
+
# on Appveyor and locally should be the service setup (e.g., SSH, HTTPBIN).
|
|
10
|
+
#
|
|
11
|
+
# All workers support remote login (grep for LOGIN in this file to locate
|
|
12
|
+
# the lines that enable it). Once enabled, login details are shown at the
|
|
13
|
+
# top of each CI run log.
|
|
14
|
+
#
|
|
15
|
+
# - Linux/Mac workers (via SSH):
|
|
16
|
+
#
|
|
17
|
+
# - A permitted SSH key must be defined in an APPVEYOR_SSH_KEY environment
|
|
18
|
+
# variable (via the appveyor project settings)
|
|
19
|
+
#
|
|
20
|
+
# - SSH login info is given in the form of: 'appveyor@67.225.164.xx -p 22xxx'
|
|
21
|
+
#
|
|
22
|
+
# - Login with:
|
|
23
|
+
#
|
|
24
|
+
# ssh -o StrictHostKeyChecking=no <LOGIN>
|
|
25
|
+
#
|
|
26
|
+
# - to prevent the CI run from exiting, `touch` a file named `BLOCK` in the
|
|
27
|
+
# user HOME directory (current directory directly after login). The session
|
|
28
|
+
# will run until the file is removed (or 60 min have passed)
|
|
29
|
+
#
|
|
30
|
+
# - Windows workers (via RDP):
|
|
31
|
+
#
|
|
32
|
+
# - An RDP password should be defined in an APPVEYOR_RDP_PASSWORD environment
|
|
33
|
+
# variable (via the appveyor project settings), or a random password is used
|
|
34
|
+
# every time
|
|
35
|
+
#
|
|
36
|
+
# - RDP login info is given in the form of IP:PORT
|
|
37
|
+
#
|
|
38
|
+
# - Login with:
|
|
39
|
+
#
|
|
40
|
+
# xfreerdp /cert:ignore /dynamic-resolution /u:appveyor /p:<PASSWORD> /v:<LOGIN>
|
|
41
|
+
#
|
|
42
|
+
# - to prevent the CI run from exiting, create a textfile named `BLOCK` on the
|
|
43
|
+
# Desktop (a required .txt extension will be added automatically). The session
|
|
44
|
+
# will run until the file is removed (or 60 min have passed)
|
|
45
|
+
#
|
|
46
|
+
|
|
47
|
+
# do not make repository clone cheap: interfers with VCS-based version determination
|
|
48
|
+
shallow_clone: false
|
|
49
|
+
|
|
50
|
+
# turn of support for MS project build support (not needed)
|
|
51
|
+
build: off
|
|
52
|
+
|
|
53
|
+
environment:
|
|
54
|
+
# place coverage files to a known location regardless of where a test run
|
|
55
|
+
# is happening. also ensures proper report path configurations for codecov
|
|
56
|
+
COVERAGE_ROOT: /home/appveyor/DLTMP
|
|
57
|
+
# we pin hatch's data file to make it easy to cache it
|
|
58
|
+
HATCH_DATA_DIR: /home/appveyor/hatch-data-dir
|
|
59
|
+
UV_CACHE_DIR: /home/appveyor/.cache/uv
|
|
60
|
+
HATCH_ENV_TYPE_VIRTUAL_UV_PATH: /home/appveyor/.local/bin/uv
|
|
61
|
+
# oldest and newest supported, by default
|
|
62
|
+
TEST_SCRIPT: "hatch test -i py=3.9,3.13 --cover --doctest-modules --durations 10"
|
|
63
|
+
matrix:
|
|
64
|
+
- job_name: test-linux
|
|
65
|
+
APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2204
|
|
66
|
+
|
|
67
|
+
# same as 'test-linux', but TMPDIR is on a crippled filesystem, causing
|
|
68
|
+
# most, if not all test datasets to be created on that filesystem
|
|
69
|
+
- job_name: test-linux-crippled
|
|
70
|
+
APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2204
|
|
71
|
+
|
|
72
|
+
- job_name: test-mac
|
|
73
|
+
APPVEYOR_BUILD_WORKER_IMAGE: macos-sonoma
|
|
74
|
+
COVERAGE_ROOT: /Users/appveyor/DLTMP
|
|
75
|
+
HATCH_DATA_DIR: /Users/appveyor/hatch-data-dir
|
|
76
|
+
HATCH_ENV_TYPE_VIRTUAL_UV_PATH: /Users/appveyor/.local/bin/uv
|
|
77
|
+
UV_CACHE_DIR: /Users/appveyor/.cache/uv
|
|
78
|
+
|
|
79
|
+
- job_name: test-win
|
|
80
|
+
APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2022
|
|
81
|
+
COVERAGE_ROOT: C:\DLTMP
|
|
82
|
+
# Python version specification is non-standard on windows
|
|
83
|
+
HATCH_DATA_DIR: C:\hatch-data-dir
|
|
84
|
+
# make hatch use uv as an installer
|
|
85
|
+
HATCH_ENV_TYPE_VIRTUAL_UV_PATH: C:\Users\appveyor\.local\bin\uv.exe
|
|
86
|
+
# appveyor has a default path that comprises _everything_. here we trim
|
|
87
|
+
# it to the bare necessities to reduce using preinstalled software
|
|
88
|
+
# that changes without notice. importantly, we
|
|
89
|
+
# - reset the default python to be a 64bit one
|
|
90
|
+
# - include the installation target path for `uv`
|
|
91
|
+
CUSTOMPATH: C:\Users\\appveyor\.local\bin;C:\Program Files\Git\cmd;C:\Program Files\Git\usr\bin;C:\Windows\system32;C:\Windows\System32\WindowsPowerShell\v1.0;C:\Windows\System32\OpenSSH;C:\Program Files\PowerShell\7;C:\Program Files\7-Zip;C:\Python312-x64;C:\Python312-x64\Scripts"
|
|
92
|
+
UV_CACHE_DIR: C:\Users\appveyor\AppData\Local\uv\cache
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# only run the CI if there are code or tooling changes
|
|
96
|
+
only_commits:
|
|
97
|
+
files:
|
|
98
|
+
- datalad_core/**/*
|
|
99
|
+
- tools/**/*
|
|
100
|
+
- pyproject.toml
|
|
101
|
+
- .appveyor.yml
|
|
102
|
+
|
|
103
|
+
# job-specific configurations
|
|
104
|
+
for:
|
|
105
|
+
#
|
|
106
|
+
# POSIX TEST RUNS
|
|
107
|
+
#
|
|
108
|
+
- matrix:
|
|
109
|
+
only:
|
|
110
|
+
- job_name: test-linux
|
|
111
|
+
- job_name: test-linux-crippled
|
|
112
|
+
- job_name: test-mac
|
|
113
|
+
|
|
114
|
+
cache:
|
|
115
|
+
- "${UV_CACHE_DIR} -> .appveyor.yml"
|
|
116
|
+
|
|
117
|
+
# init cannot use any components from the repo, because it runs prior to
|
|
118
|
+
# cloning it
|
|
119
|
+
init:
|
|
120
|
+
# LOGIN: enable external SSH access to CI worker
|
|
121
|
+
# needs APPVEYOR_SSH_KEY defined in project settings (or environment)
|
|
122
|
+
#- curl -sflL 'https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-ssh.sh' | bash -e -
|
|
123
|
+
# wipe out appveyor's collection of environment shims to prevent
|
|
124
|
+
# hatch from being confused by it
|
|
125
|
+
- rm -rf /home/appveyor/.pyenv
|
|
126
|
+
# install `uv`
|
|
127
|
+
- curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
128
|
+
- source $HOME/.local/bin/env
|
|
129
|
+
# Scratch space
|
|
130
|
+
# we place the "unix" one into the user's HOME to avoid git-annex issues on MacOSX
|
|
131
|
+
# gh-5291
|
|
132
|
+
- mkdir ~/DLTMP && export TMPDIR=~/DLTMP
|
|
133
|
+
|
|
134
|
+
before_test:
|
|
135
|
+
# store original TMPDIR setting to limit modification to test execution
|
|
136
|
+
- export PREV_TMPDIR=$TMPDIR
|
|
137
|
+
# make TMPDIR a "crippled filesystem" to test wrong assumptions of POSIX-ness
|
|
138
|
+
# on POSIX OSes. The test fixtures will create all test datasets under TMPDIR
|
|
139
|
+
- |
|
|
140
|
+
set -e
|
|
141
|
+
if [ "$APPVEYOR_JOB_NAME" = "test-linux-crippled" ]; then
|
|
142
|
+
# 200 MB VFAT FS in a box
|
|
143
|
+
sudo dd if=/dev/zero of=/crippledfs.img count=200 bs=1M
|
|
144
|
+
sudo mkfs.vfat /crippledfs.img
|
|
145
|
+
sudo mkdir /crippledfs
|
|
146
|
+
sudo mount -o "uid=$(id -u),gid=$(id -g)" /crippledfs.img /crippledfs
|
|
147
|
+
echo "== mount >>"
|
|
148
|
+
mount | grep crippled
|
|
149
|
+
echo "<< mount =="
|
|
150
|
+
export TMPDIR=/crippledfs
|
|
151
|
+
fi
|
|
152
|
+
- echo TMPDIR=$TMPDIR
|
|
153
|
+
|
|
154
|
+
after_test:
|
|
155
|
+
- coverage xml
|
|
156
|
+
- codecovcli --auto-load-params-from AppVeyor upload-process -n "appveyor-${APPVEYOR_JOB_NAME}" --disable-search -f coverage.xml
|
|
157
|
+
|
|
158
|
+
on_finish:
|
|
159
|
+
# conditionally block the exit of a CI run for direct debugging
|
|
160
|
+
- while [ -f ~/BLOCK ]; do sleep 5; done
|
|
161
|
+
|
|
162
|
+
#
|
|
163
|
+
# WINDOWS TEST RUNS
|
|
164
|
+
#
|
|
165
|
+
- matrix:
|
|
166
|
+
only:
|
|
167
|
+
- job_name: test-win
|
|
168
|
+
cache:
|
|
169
|
+
- "%UV_CACHE_DIR% -> .appveyor.yml"
|
|
170
|
+
# hatch-managed python versions
|
|
171
|
+
- "%HATCH_DATA_DIR%\\env\\virtual\\.pythons -> pyproject.toml"
|
|
172
|
+
|
|
173
|
+
# init cannot use any components from the repo, because it runs prior to
|
|
174
|
+
# cloning it
|
|
175
|
+
init:
|
|
176
|
+
# LOGIN: enable RDP access on windows (RDP password is in appveyor project config)
|
|
177
|
+
# this is relatively expensive (1-2min), but very convenient to jump into any build at any time
|
|
178
|
+
#- ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
|
|
179
|
+
- cmd: "set PATH=%CUSTOMPATH%"
|
|
180
|
+
# install `uv`
|
|
181
|
+
- cmd: powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
|
|
182
|
+
# remove windows 260-char limit on path names
|
|
183
|
+
- ps: Set-Itemproperty -path "HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem" -Name LongPathsEnabled -value 1
|
|
184
|
+
# Scratch space
|
|
185
|
+
- cmd: md C:\DLTMP
|
|
186
|
+
# and use that scratch space to get short paths in test repos
|
|
187
|
+
# (avoiding length-limits as much as possible)
|
|
188
|
+
- cmd: "set TMP=C:\\DLTMP & set TEMP=C:\\DLTMP"
|
|
189
|
+
|
|
190
|
+
install:
|
|
191
|
+
# place a debug setup helper at a convenient location
|
|
192
|
+
- cmd: copy tools\appveyor\env_setup.bat C:\\datalad_debug.bat
|
|
193
|
+
|
|
194
|
+
after_test:
|
|
195
|
+
- coverage xml
|
|
196
|
+
- codecovcli --auto-load-params-from AppVeyor upload-process -n "appveyor-%APPVEYOR_JOB_NAME%" --disable-search -f coverage.xml
|
|
197
|
+
|
|
198
|
+
on_finish:
|
|
199
|
+
# conditionally block the exit of a CI run for direct debugging
|
|
200
|
+
- ps: while ((Test-Path "C:\Users\\appveyor\\Desktop\\BLOCK.txt")) { Start-Sleep 5 }
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
#
|
|
204
|
+
# ALL TEST RUNS
|
|
205
|
+
#
|
|
206
|
+
build_script:
|
|
207
|
+
- uv tool install hatch
|
|
208
|
+
- uv tool install coverage[toml]
|
|
209
|
+
- uv tool install codecov-cli
|
|
210
|
+
|
|
211
|
+
after_build:
|
|
212
|
+
# Identity setup
|
|
213
|
+
- git config --global user.email "test@appveyor.land"
|
|
214
|
+
- git config --global user.name "Appveyor Almighty"
|
|
215
|
+
|
|
216
|
+
test_script:
|
|
217
|
+
# oldest and newest supported, by default
|
|
218
|
+
- "hatch test -i py=3.9,3.13 --cover --doctest-modules --durations 10"
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{% for entry in tree %}
|
|
2
|
+
|
|
3
|
+
# {{ entry.version }}{% if entry.date %} ({{ entry.date }}){% endif %}
|
|
4
|
+
|
|
5
|
+
{% for change_key, changes in entry.changes.items() %}
|
|
6
|
+
|
|
7
|
+
{% set change_key_map = {
|
|
8
|
+
'BREAKING CHANGE': '🪓 Breaking changes',
|
|
9
|
+
'doc': '📝 Documentation',
|
|
10
|
+
'feat': '💫 New features',
|
|
11
|
+
'fix': '🐛 Bug Fixes',
|
|
12
|
+
'test': '🛡 Tests',
|
|
13
|
+
'rf': '🏠 Refactorings',
|
|
14
|
+
'perf': '🚀 Performance improvements',
|
|
15
|
+
} %}
|
|
16
|
+
{% if change_key %}
|
|
17
|
+
## {{ change_key_map.get(change_key, change_key) }}
|
|
18
|
+
{% endif %}
|
|
19
|
+
{% set scopemap = {
|
|
20
|
+
'changelog': 'Changelog',
|
|
21
|
+
'contributing': 'Contributing guide',
|
|
22
|
+
'helpers': 'Helpers',
|
|
23
|
+
'sphinx': 'Rendered documentation',
|
|
24
|
+
'typeannotation': 'Type annotation',
|
|
25
|
+
} %}
|
|
26
|
+
|
|
27
|
+
{# no-scope changes #}
|
|
28
|
+
{% for change in changes | rejectattr("scope") %}
|
|
29
|
+
- {{ change.message }} [[{{ change.sha1 | truncate(8, true, '') }}]](https://github.com/datalad/datalad-core/commit/{{ change.sha1 | truncate(8, true, '') }})
|
|
30
|
+
{% endfor %}
|
|
31
|
+
{# scoped changes #}
|
|
32
|
+
{% for scope, scope_changes in changes | selectattr("scope") | groupby("scope") %}
|
|
33
|
+
- {{ scopemap.get(scope, scope) }}:
|
|
34
|
+
{% for change in scope_changes %}
|
|
35
|
+
- {{ change.message }} [[{{ change.sha1 | truncate(8, true, '') }}]](https://github.com/datalad/datalad-core/commit/{{ change.sha1 | truncate(8, true, '') }})
|
|
36
|
+
{% endfor %}
|
|
37
|
+
{% endfor %}
|
|
38
|
+
{% endfor %}
|
|
39
|
+
{% endfor %}
|
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
|
|
2
|
+
# ex: set sts=4 ts=4 sw=4 et:
|
|
3
|
+
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
|
|
4
|
+
#
|
|
5
|
+
# See COPYING file distributed along with the datalad package for the
|
|
6
|
+
# copyright and license terms.
|
|
7
|
+
#
|
|
8
|
+
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
|
|
9
|
+
"""High-level interface for dataset creation
|
|
10
|
+
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import random
|
|
14
|
+
import uuid
|
|
15
|
+
from collections.abc import Mapping
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
#from os import listdir
|
|
19
|
+
#from datalad import _seed
|
|
20
|
+
#from datalad.support.constraints import (
|
|
21
|
+
# EnsureStr,
|
|
22
|
+
# EnsureNone,
|
|
23
|
+
# EnsureKeyChoice,
|
|
24
|
+
#)
|
|
25
|
+
from datalad_core.commands import (
|
|
26
|
+
EnsureDataset,
|
|
27
|
+
JointParamProcessor,
|
|
28
|
+
ParamSetConstraint,
|
|
29
|
+
datalad_command,
|
|
30
|
+
)
|
|
31
|
+
from datalad_core.constraints import (
|
|
32
|
+
EnsureChoice,
|
|
33
|
+
EnsurePath,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class EnsureParentDatasetConditions(ParamSetConstraint):
|
|
38
|
+
input_synopsis = 'existing parent dataset without content conflict'
|
|
39
|
+
|
|
40
|
+
def __init__(self):
|
|
41
|
+
# announce which args will be validated
|
|
42
|
+
super().__init__(('dataset', 'path'))
|
|
43
|
+
|
|
44
|
+
def __call__(self, val: Mapping[str, Any]) -> Mapping[str, Any]:
|
|
45
|
+
if val['dataset'].pristine_spec is None:
|
|
46
|
+
# no dataset was given. nothing to check
|
|
47
|
+
return val
|
|
48
|
+
|
|
49
|
+
# TODO: check for existence of parent repo
|
|
50
|
+
# TODO: check for collisions with parent repo content
|
|
51
|
+
return val
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class EnsureAnnexParams(ParamSetConstraint):
|
|
55
|
+
input_synopsis = 'coherent annex parameters'
|
|
56
|
+
|
|
57
|
+
def __init__(self):
|
|
58
|
+
# announce which args will be validated
|
|
59
|
+
super().__init__(('annex', 'annex_description'))
|
|
60
|
+
|
|
61
|
+
def __call__(self, val: Mapping[str, Any]) -> Mapping[str, Any]:
|
|
62
|
+
if val['annex'] is False and val['annex_description']:
|
|
63
|
+
self.raise_for(
|
|
64
|
+
val,
|
|
65
|
+
'cannot assign an annex description with no annex',
|
|
66
|
+
)
|
|
67
|
+
return val
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@datalad_command(
|
|
71
|
+
preproc=JointParamProcessor(
|
|
72
|
+
{
|
|
73
|
+
'dataset': EnsureDataset(),
|
|
74
|
+
'path': EnsurePath(),
|
|
75
|
+
'annex': EnsureChoice(
|
|
76
|
+
# no annex
|
|
77
|
+
False,
|
|
78
|
+
# standard annex
|
|
79
|
+
True,
|
|
80
|
+
# private-mode annex
|
|
81
|
+
'private',
|
|
82
|
+
),
|
|
83
|
+
},
|
|
84
|
+
proc_defaults={'dataset', 'path'},
|
|
85
|
+
tailor_for_dataset={
|
|
86
|
+
'path': 'dataset',
|
|
87
|
+
},
|
|
88
|
+
paramset_constraints=(
|
|
89
|
+
EnsureAnnexParams(),
|
|
90
|
+
EnsureParentDatasetConditions(),
|
|
91
|
+
),
|
|
92
|
+
),
|
|
93
|
+
)
|
|
94
|
+
def create_dataset(
|
|
95
|
+
path=None,
|
|
96
|
+
*,
|
|
97
|
+
dataset=None,
|
|
98
|
+
annex=True,
|
|
99
|
+
annex_description=None,
|
|
100
|
+
#initopts=None,
|
|
101
|
+
#force=False,
|
|
102
|
+
#cfg_proc=None
|
|
103
|
+
):
|
|
104
|
+
return []
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def dummy():
|
|
108
|
+
# two major cases
|
|
109
|
+
# 1. we got a `dataset` -> we either want to create it (path is None),
|
|
110
|
+
# or another dataset in it (path is not None)
|
|
111
|
+
# 2. we got no dataset -> we want to create a fresh dataset at the
|
|
112
|
+
# desired location, either at `path` or PWD
|
|
113
|
+
if (isinstance(initopts, (list, tuple)) and '--bare' in initopts) or (
|
|
114
|
+
isinstance(initopts, dict) and 'bare' in initopts):
|
|
115
|
+
raise ValueError(
|
|
116
|
+
"Creation of bare repositories is not supported. Consider "
|
|
117
|
+
"one of the create-sibling commands, or use "
|
|
118
|
+
"Git to init a bare repository and push an existing dataset "
|
|
119
|
+
"into it.")
|
|
120
|
+
|
|
121
|
+
# assure cfg_proc is a list (relevant if used via Python API)
|
|
122
|
+
cfg_proc = ensure_list(cfg_proc)
|
|
123
|
+
|
|
124
|
+
# prep for yield
|
|
125
|
+
res = dict(action='create', path=str(path),
|
|
126
|
+
logger=lgr, type='dataset',
|
|
127
|
+
refds=refds_path)
|
|
128
|
+
|
|
129
|
+
refds = None
|
|
130
|
+
if refds_path and refds_path != str(path):
|
|
131
|
+
refds = require_dataset(
|
|
132
|
+
refds_path, check_installed=True,
|
|
133
|
+
purpose='create a subdataset')
|
|
134
|
+
|
|
135
|
+
path_inrefds = path_under_rev_dataset(refds, path)
|
|
136
|
+
if path_inrefds is None:
|
|
137
|
+
yield dict(
|
|
138
|
+
res,
|
|
139
|
+
status='error',
|
|
140
|
+
message=(
|
|
141
|
+
"dataset containing given paths is not underneath "
|
|
142
|
+
"the reference dataset %s: %s",
|
|
143
|
+
ds, str(path)),
|
|
144
|
+
)
|
|
145
|
+
return
|
|
146
|
+
|
|
147
|
+
# try to locate an immediate parent dataset
|
|
148
|
+
# we want to know this (irrespective of whether we plan on adding
|
|
149
|
+
# this new dataset to a parent) in order to avoid conflicts with
|
|
150
|
+
# a potentially absent/uninstalled subdataset of the parent
|
|
151
|
+
# in this location
|
|
152
|
+
# it will cost some filesystem traversal though...
|
|
153
|
+
parentds_path = get_dataset_root(
|
|
154
|
+
op.normpath(op.join(str(path), os.pardir)))
|
|
155
|
+
if parentds_path:
|
|
156
|
+
prepo = GitRepo(parentds_path)
|
|
157
|
+
parentds_path = Path(parentds_path)
|
|
158
|
+
# we cannot get away with a simple
|
|
159
|
+
# GitRepo.get_content_info(), as we need to detect
|
|
160
|
+
# uninstalled/added subdatasets too
|
|
161
|
+
check_path = Path(path)
|
|
162
|
+
pstatus = prepo.status(
|
|
163
|
+
untracked='no',
|
|
164
|
+
# limit query to target path for a potentially massive speed-up
|
|
165
|
+
paths=[check_path.relative_to(parentds_path)])
|
|
166
|
+
if (pstatus.get(check_path, {}).get('type') != 'dataset' and
|
|
167
|
+
any(check_path == p or check_path in p.parents
|
|
168
|
+
for p in pstatus)):
|
|
169
|
+
# redo the check in a slower fashion, it is already broken
|
|
170
|
+
# let's take our time for a proper error message
|
|
171
|
+
conflict = [
|
|
172
|
+
p for p in pstatus
|
|
173
|
+
if check_path == p or check_path in p.parents]
|
|
174
|
+
res.update({
|
|
175
|
+
'status': 'error',
|
|
176
|
+
'message': (
|
|
177
|
+
'collision with content in parent dataset at %s: %s',
|
|
178
|
+
str(parentds_path),
|
|
179
|
+
[str(c) for c in conflict])})
|
|
180
|
+
yield res
|
|
181
|
+
return
|
|
182
|
+
if not force:
|
|
183
|
+
# another set of check to see whether the target path is pointing
|
|
184
|
+
# into a known subdataset that is not around ATM
|
|
185
|
+
subds_status = {
|
|
186
|
+
parentds_path / k.relative_to(prepo.path)
|
|
187
|
+
for k, v in pstatus.items()
|
|
188
|
+
if v.get('type', None) == 'dataset'}
|
|
189
|
+
check_paths = [check_path]
|
|
190
|
+
check_paths.extend(check_path.parents)
|
|
191
|
+
if any(p in subds_status for p in check_paths):
|
|
192
|
+
conflict = [p for p in check_paths if p in subds_status]
|
|
193
|
+
res.update({
|
|
194
|
+
'status': 'error',
|
|
195
|
+
'message': (
|
|
196
|
+
'collision with %s (dataset) in dataset %s',
|
|
197
|
+
str(conflict[0]),
|
|
198
|
+
str(parentds_path))})
|
|
199
|
+
yield res
|
|
200
|
+
return
|
|
201
|
+
|
|
202
|
+
# important to use the given Dataset object to avoid spurious ID
|
|
203
|
+
# changes with not-yet-materialized Datasets
|
|
204
|
+
tbds = ds if isinstance(ds, Dataset) and \
|
|
205
|
+
ds.path == path else Dataset(str(path))
|
|
206
|
+
|
|
207
|
+
# don't create in non-empty directory without `force`:
|
|
208
|
+
if op.isdir(tbds.path) and listdir(tbds.path) != [] and not force:
|
|
209
|
+
res.update({
|
|
210
|
+
'status': 'error',
|
|
211
|
+
'message':
|
|
212
|
+
'will not create a dataset in a non-empty directory, use '
|
|
213
|
+
'`--force` option to ignore'})
|
|
214
|
+
yield res
|
|
215
|
+
return
|
|
216
|
+
|
|
217
|
+
# Check if specified cfg_proc(s) can be discovered, storing
|
|
218
|
+
# the results so they can be used when the time comes to run
|
|
219
|
+
# the procedure. If a procedure cannot be found, raise an
|
|
220
|
+
# error to prevent creating the dataset.
|
|
221
|
+
cfg_proc_specs = []
|
|
222
|
+
if cfg_proc:
|
|
223
|
+
discovered_procs = tbds.run_procedure(
|
|
224
|
+
discover=True,
|
|
225
|
+
result_renderer='disabled',
|
|
226
|
+
return_type='list',
|
|
227
|
+
)
|
|
228
|
+
for cfg_proc_ in cfg_proc:
|
|
229
|
+
for discovered_proc in discovered_procs:
|
|
230
|
+
if discovered_proc['procedure_name'] == 'cfg_' + cfg_proc_:
|
|
231
|
+
cfg_proc_specs.append(discovered_proc)
|
|
232
|
+
break
|
|
233
|
+
else:
|
|
234
|
+
raise ValueError("Cannot find procedure with name "
|
|
235
|
+
"'%s'" % cfg_proc_)
|
|
236
|
+
|
|
237
|
+
if initopts is not None and isinstance(initopts, list):
|
|
238
|
+
initopts = {'_from_cmdline_': initopts}
|
|
239
|
+
|
|
240
|
+
# Note for the code below:
|
|
241
|
+
# OPT: be "smart" and avoid re-resolving .repo -- expensive in DataLad
|
|
242
|
+
# Reuse tbrepo instance, do not use tbds.repo
|
|
243
|
+
|
|
244
|
+
# create and configure desired repository
|
|
245
|
+
# also provides initial set of content to be tracked with git (not annex)
|
|
246
|
+
if no_annex:
|
|
247
|
+
tbrepo, add_to_git = _setup_git_repo(path, initopts, fake_dates)
|
|
248
|
+
else:
|
|
249
|
+
tbrepo, add_to_git = _setup_annex_repo(
|
|
250
|
+
path, initopts, fake_dates, description)
|
|
251
|
+
|
|
252
|
+
# OPT: be "smart" and avoid re-resolving .repo -- expensive in DataLad
|
|
253
|
+
# Note, must not happen earlier (before if) since "smart" it would not be
|
|
254
|
+
tbds_config = tbds.config
|
|
255
|
+
|
|
256
|
+
# record an ID for this repo for the afterlife
|
|
257
|
+
# to be able to track siblings and children
|
|
258
|
+
id_var = 'datalad.dataset.id'
|
|
259
|
+
# Note, that Dataset property `id` will change when we unset the
|
|
260
|
+
# respective config. Therefore store it before:
|
|
261
|
+
tbds_id = tbds.id
|
|
262
|
+
if id_var in tbds_config:
|
|
263
|
+
# make sure we reset this variable completely, in case of a
|
|
264
|
+
# re-create
|
|
265
|
+
tbds_config.unset(id_var, scope='branch')
|
|
266
|
+
|
|
267
|
+
if _seed is None:
|
|
268
|
+
# just the standard way
|
|
269
|
+
# use a fully random identifier (i.e. UUID version 4)
|
|
270
|
+
uuid_id = str(uuid.uuid4())
|
|
271
|
+
else:
|
|
272
|
+
# Let's generate preseeded ones
|
|
273
|
+
uuid_id = str(uuid.UUID(int=random.getrandbits(128)))
|
|
274
|
+
tbds_config.add(
|
|
275
|
+
id_var,
|
|
276
|
+
tbds_id if tbds_id is not None else uuid_id,
|
|
277
|
+
scope='branch',
|
|
278
|
+
reload=False)
|
|
279
|
+
|
|
280
|
+
# make config overrides permanent in the repo config
|
|
281
|
+
# this is similar to what `annex init` does
|
|
282
|
+
# we are only doing this for config overrides and do not expose
|
|
283
|
+
# a dedicated argument, because it is sufficient for the cmdline
|
|
284
|
+
# and unnecessary for the Python API (there could simply be a
|
|
285
|
+
# subsequence ds.config.add() call)
|
|
286
|
+
for k, v in tbds_config.overrides.items():
|
|
287
|
+
tbds_config.add(k, v, scope='local', reload=False)
|
|
288
|
+
|
|
289
|
+
# all config manipulation is done -> fll reload
|
|
290
|
+
tbds_config.reload()
|
|
291
|
+
|
|
292
|
+
# must use the repo.pathobj as this will have resolved symlinks
|
|
293
|
+
add_to_git[tbrepo.pathobj / '.datalad'] = {
|
|
294
|
+
'type': 'directory',
|
|
295
|
+
'state': 'untracked'}
|
|
296
|
+
|
|
297
|
+
# save everything, we need to do this now and cannot merge with the
|
|
298
|
+
# call below, because we may need to add this subdataset to a parent
|
|
299
|
+
# but cannot until we have a first commit
|
|
300
|
+
tbrepo.save(
|
|
301
|
+
message='[DATALAD] new dataset',
|
|
302
|
+
git=True,
|
|
303
|
+
# we have to supply our own custom status, as the repo does
|
|
304
|
+
# not have a single commit yet and the is no HEAD reference
|
|
305
|
+
# TODO make `GitRepo.status()` robust to this state.
|
|
306
|
+
_status=add_to_git,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
for cfg_proc_spec in cfg_proc_specs:
|
|
310
|
+
yield from tbds.run_procedure(
|
|
311
|
+
cfg_proc_spec,
|
|
312
|
+
result_renderer='disabled',
|
|
313
|
+
return_type='generator',
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
# the next only makes sense if we saved the created dataset,
|
|
317
|
+
# otherwise we have no committed state to be registered
|
|
318
|
+
# in the parent
|
|
319
|
+
if isinstance(refds, Dataset) and refds.path != tbds.path:
|
|
320
|
+
# we created a dataset in another dataset
|
|
321
|
+
# -> make submodule
|
|
322
|
+
yield from refds.save(
|
|
323
|
+
path=tbds.path,
|
|
324
|
+
return_type='generator',
|
|
325
|
+
result_renderer='disabled',
|
|
326
|
+
)
|
|
327
|
+
else:
|
|
328
|
+
# if we do not save, we touch the root directory of the new
|
|
329
|
+
# dataset to signal a change in the nature of the directory.
|
|
330
|
+
# this is useful for apps like datalad-gooey (or other
|
|
331
|
+
# inotify consumers) to pick up on such changes.
|
|
332
|
+
tbds.pathobj.touch()
|
|
333
|
+
|
|
334
|
+
res.update({'status': 'ok'})
|
|
335
|
+
yield res
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def _setup_git_repo(path, initopts=None, fake_dates=False):
|
|
339
|
+
"""Create and configure a repository at `path`
|
|
340
|
+
|
|
341
|
+
Parameters
|
|
342
|
+
----------
|
|
343
|
+
path: str or Path
|
|
344
|
+
Path of the repository
|
|
345
|
+
initopts: dict, optional
|
|
346
|
+
Git options to be passed to the GitRepo constructor
|
|
347
|
+
fake_dates: bool, optional
|
|
348
|
+
Passed to the GitRepo constructor
|
|
349
|
+
|
|
350
|
+
Returns
|
|
351
|
+
-------
|
|
352
|
+
GitRepo, dict
|
|
353
|
+
Created repository and records for any repo component that needs to be
|
|
354
|
+
passed to git-add as a result of the setup procedure.
|
|
355
|
+
"""
|
|
356
|
+
tbrepo = GitRepo(
|
|
357
|
+
path,
|
|
358
|
+
create=True,
|
|
359
|
+
create_sanity_checks=False,
|
|
360
|
+
git_opts=initopts,
|
|
361
|
+
fake_dates=fake_dates)
|
|
362
|
+
# place a .noannex file to indicate annex to leave this repo alone
|
|
363
|
+
stamp_path = Path(tbrepo.path) / '.noannex'
|
|
364
|
+
stamp_path.touch()
|
|
365
|
+
add_to_git = {
|
|
366
|
+
stamp_path: {
|
|
367
|
+
'type': 'file',
|
|
368
|
+
'state': 'untracked',
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
return tbrepo, add_to_git
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def _setup_annex_repo(path, initopts=None, fake_dates=False,
|
|
375
|
+
description=None):
|
|
376
|
+
"""Create and configure a repository at `path`
|
|
377
|
+
|
|
378
|
+
This includes a default setup of annex.largefiles.
|
|
379
|
+
|
|
380
|
+
Parameters
|
|
381
|
+
----------
|
|
382
|
+
path: str or Path
|
|
383
|
+
Path of the repository
|
|
384
|
+
initopts: dict, optional
|
|
385
|
+
Git options to be passed to the AnnexRepo constructor
|
|
386
|
+
fake_dates: bool, optional
|
|
387
|
+
Passed to the AnnexRepo constructor
|
|
388
|
+
description: str, optional
|
|
389
|
+
Passed to the AnnexRepo constructor
|
|
390
|
+
|
|
391
|
+
Returns
|
|
392
|
+
-------
|
|
393
|
+
AnnexRepo, dict
|
|
394
|
+
Created repository and records for any repo component that needs to be
|
|
395
|
+
passed to git-add as a result of the setup procedure.
|
|
396
|
+
"""
|
|
397
|
+
# always come with annex when created from scratch
|
|
398
|
+
tbrepo = AnnexRepo(
|
|
399
|
+
path,
|
|
400
|
+
create=True,
|
|
401
|
+
create_sanity_checks=False,
|
|
402
|
+
# do not set backend here, to avoid a dedicated commit
|
|
403
|
+
backend=None,
|
|
404
|
+
# None causes version to be taken from config
|
|
405
|
+
version=None,
|
|
406
|
+
description=description,
|
|
407
|
+
git_opts=initopts,
|
|
408
|
+
fake_dates=fake_dates
|
|
409
|
+
)
|
|
410
|
+
# set the annex backend in .gitattributes as a staged change
|
|
411
|
+
tbrepo.set_default_backend(
|
|
412
|
+
cfg.obtain('datalad.repo.backend'),
|
|
413
|
+
persistent=True, commit=False)
|
|
414
|
+
add_to_git = {
|
|
415
|
+
tbrepo.pathobj / '.gitattributes': {
|
|
416
|
+
'type': 'file',
|
|
417
|
+
'state': 'added',
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
# make sure that v6 annex repos never commit content under .datalad
|
|
421
|
+
attrs_cfg = (
|
|
422
|
+
('config', 'annex.largefiles', 'nothing'),
|
|
423
|
+
)
|
|
424
|
+
attrs = tbrepo.get_gitattributes(
|
|
425
|
+
[op.join('.datalad', i[0]) for i in attrs_cfg])
|
|
426
|
+
set_attrs = []
|
|
427
|
+
for p, k, v in attrs_cfg:
|
|
428
|
+
if attrs.get(op.join('.datalad', p), {}).get(k, None) != v:
|
|
429
|
+
set_attrs.append((p, {k: v}))
|
|
430
|
+
if set_attrs:
|
|
431
|
+
tbrepo.set_gitattributes(
|
|
432
|
+
set_attrs,
|
|
433
|
+
attrfile=op.join('.datalad', '.gitattributes'))
|
|
434
|
+
|
|
435
|
+
# prevent git annex from ever annexing .git* stuff (gh-1597)
|
|
436
|
+
attrs = tbrepo.get_gitattributes('.git')
|
|
437
|
+
if attrs.get('.git', {}).get('annex.largefiles', None) != 'nothing':
|
|
438
|
+
tbrepo.set_gitattributes([
|
|
439
|
+
('**/.git*', {'annex.largefiles': 'nothing'})])
|
|
440
|
+
# must use the repo.pathobj as this will have resolved symlinks
|
|
441
|
+
add_to_git[tbrepo.pathobj / '.gitattributes'] = {
|
|
442
|
+
'type': 'file',
|
|
443
|
+
'state': 'untracked'}
|
|
444
|
+
return tbrepo, add_to_git
|
|
File without changes
|