datalad-core 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. datalad_core-0.1.0/.appveyor.yml +218 -0
  2. datalad_core-0.1.0/.changelog.md.j2 +39 -0
  3. datalad_core-0.1.0/.dsops/create.py +444 -0
  4. datalad_core-0.1.0/.dsops/tests/__init__.py +0 -0
  5. datalad_core-0.1.0/.dsops/tests/test_create.py +21 -0
  6. datalad_core-0.1.0/.github/workflows/codespell.yml +19 -0
  7. datalad_core-0.1.0/.github/workflows/conventional-commits.yml +23 -0
  8. datalad_core-0.1.0/.github/workflows/mypy-pr.yml +41 -0
  9. datalad_core-0.1.0/.github/workflows/mypy-project.yml +29 -0
  10. datalad_core-0.1.0/.github/workflows/ruff.yml +17 -0
  11. datalad_core-0.1.0/.gitignore +6 -0
  12. datalad_core-0.1.0/.noannex +0 -0
  13. datalad_core-0.1.0/.readthedocs.yaml +35 -0
  14. datalad_core-0.1.0/CHANGELOG.md +61 -0
  15. datalad_core-0.1.0/CONTRIBUTING.md +192 -0
  16. datalad_core-0.1.0/LICENSE +24 -0
  17. datalad_core-0.1.0/PKG-INFO +78 -0
  18. datalad_core-0.1.0/README.md +38 -0
  19. datalad_core-0.1.0/WHATNEXT +4 -0
  20. datalad_core-0.1.0/conftest.py +35 -0
  21. datalad_core-0.1.0/datalad_core/__init__.py +7 -0
  22. datalad_core-0.1.0/datalad_core/_version.py +21 -0
  23. datalad_core-0.1.0/datalad_core/commands/__init__.py +80 -0
  24. datalad_core-0.1.0/datalad_core/commands/dataset.py +248 -0
  25. datalad_core-0.1.0/datalad_core/commands/decorator.py +216 -0
  26. datalad_core-0.1.0/datalad_core/commands/default_result_handler.py +122 -0
  27. datalad_core-0.1.0/datalad_core/commands/exceptions.py +198 -0
  28. datalad_core-0.1.0/datalad_core/commands/param_constraint.py +67 -0
  29. datalad_core-0.1.0/datalad_core/commands/preproc.py +366 -0
  30. datalad_core-0.1.0/datalad_core/commands/result_handler.py +52 -0
  31. datalad_core-0.1.0/datalad_core/commands/tests/__init__.py +0 -0
  32. datalad_core-0.1.0/datalad_core/commands/tests/test_cmd.py +192 -0
  33. datalad_core-0.1.0/datalad_core/commands/tests/test_dataset.py +145 -0
  34. datalad_core-0.1.0/datalad_core/commands/tests/test_preproc.py +257 -0
  35. datalad_core-0.1.0/datalad_core/config/__init__.py +90 -0
  36. datalad_core-0.1.0/datalad_core/config/defaults.py +66 -0
  37. datalad_core-0.1.0/datalad_core/config/git.py +387 -0
  38. datalad_core-0.1.0/datalad_core/config/gitenv.py +116 -0
  39. datalad_core-0.1.0/datalad_core/config/item.py +13 -0
  40. datalad_core-0.1.0/datalad_core/config/manager.py +240 -0
  41. datalad_core-0.1.0/datalad_core/config/tests/__init__.py +0 -0
  42. datalad_core-0.1.0/datalad_core/config/tests/test_defaults.py +20 -0
  43. datalad_core-0.1.0/datalad_core/config/tests/test_git.py +112 -0
  44. datalad_core-0.1.0/datalad_core/config/tests/test_gitenv.py +40 -0
  45. datalad_core-0.1.0/datalad_core/config/tests/test_manager.py +136 -0
  46. datalad_core-0.1.0/datalad_core/config/tests/test_utils.py +183 -0
  47. datalad_core-0.1.0/datalad_core/config/utils.py +117 -0
  48. datalad_core-0.1.0/datalad_core/constraints/__init__.py +77 -0
  49. datalad_core-0.1.0/datalad_core/constraints/basic.py +75 -0
  50. datalad_core-0.1.0/datalad_core/constraints/constraint.py +218 -0
  51. datalad_core-0.1.0/datalad_core/constraints/exceptions.py +130 -0
  52. datalad_core-0.1.0/datalad_core/constraints/path.py +202 -0
  53. datalad_core-0.1.0/datalad_core/constraints/tests/__init__.py +0 -0
  54. datalad_core-0.1.0/datalad_core/constraints/tests/test_basic.py +45 -0
  55. datalad_core-0.1.0/datalad_core/constraints/tests/test_constraint.py +143 -0
  56. datalad_core-0.1.0/datalad_core/constraints/tests/test_exceptions.py +52 -0
  57. datalad_core-0.1.0/datalad_core/constraints/tests/test_path.py +85 -0
  58. datalad_core-0.1.0/datalad_core/constraints/tests/test_wrapper.py +52 -0
  59. datalad_core-0.1.0/datalad_core/constraints/wrapper.py +118 -0
  60. datalad_core-0.1.0/datalad_core/consts/__init__.py +26 -0
  61. datalad_core-0.1.0/datalad_core/py.typed +0 -0
  62. datalad_core-0.1.0/datalad_core/repo/__init__.py +45 -0
  63. datalad_core-0.1.0/datalad_core/repo/annex.py +149 -0
  64. datalad_core-0.1.0/datalad_core/repo/flyweight.py +101 -0
  65. datalad_core-0.1.0/datalad_core/repo/gitmanaged.py +112 -0
  66. datalad_core-0.1.0/datalad_core/repo/repo.py +173 -0
  67. datalad_core-0.1.0/datalad_core/repo/tests/__init__.py +0 -0
  68. datalad_core-0.1.0/datalad_core/repo/tests/test_annex.py +106 -0
  69. datalad_core-0.1.0/datalad_core/repo/tests/test_repo.py +50 -0
  70. datalad_core-0.1.0/datalad_core/repo/tests/test_worktree.py +161 -0
  71. datalad_core-0.1.0/datalad_core/repo/utils.py +35 -0
  72. datalad_core-0.1.0/datalad_core/repo/worktree.py +230 -0
  73. datalad_core-0.1.0/datalad_core/runners/__init__.py +49 -0
  74. datalad_core-0.1.0/datalad_core/runners/annex.py +97 -0
  75. datalad_core-0.1.0/datalad_core/runners/git.py +228 -0
  76. datalad_core-0.1.0/datalad_core/runners/imports.py +9 -0
  77. datalad_core-0.1.0/datalad_core/runners/tests/__init__.py +0 -0
  78. datalad_core-0.1.0/datalad_core/runners/tests/test_callannex.py +54 -0
  79. datalad_core-0.1.0/datalad_core/runners/tests/test_callgit.py +56 -0
  80. datalad_core-0.1.0/datalad_core/tests/__init__.py +9 -0
  81. datalad_core-0.1.0/datalad_core/tests/fixtures.py +248 -0
  82. datalad_core-0.1.0/datalad_core/tests/test_dummy.py +6 -0
  83. datalad_core-0.1.0/datalad_core/tests/test_fixtures.py +6 -0
  84. datalad_core-0.1.0/datalad_core/tests/test_utils.py +12 -0
  85. datalad_core-0.1.0/datalad_core/tests/utils.py +180 -0
  86. datalad_core-0.1.0/docs/.gitignore +2 -0
  87. datalad_core-0.1.0/docs/CODEOWNERS +12 -0
  88. datalad_core-0.1.0/docs/Makefile +20 -0
  89. datalad_core-0.1.0/docs/_static/.gitkeep +0 -0
  90. datalad_core-0.1.0/docs/_templates/autosummary/class.rst +10 -0
  91. datalad_core-0.1.0/docs/_templates/autosummary/module.rst +4 -0
  92. datalad_core-0.1.0/docs/conf.py +37 -0
  93. datalad_core-0.1.0/docs/index.rst +31 -0
  94. datalad_core-0.1.0/docs/patterns/dataset_paths.rst +57 -0
  95. datalad_core-0.1.0/pyproject.toml +210 -0
  96. datalad_core-0.1.0/tools/appveyor/env_setup.bat +4 -0
@@ -0,0 +1,218 @@
1
+ # This CI setup provides a largely homogeneous configuration across all
2
+ # major platforms (Windows, MacOS, and Linux). The aim of this test setup is
3
+ # to create a "native" platform experience, using as few cross-platform
4
+ # helper tools as possible.
5
+ #
6
+ # On all platforms `hatch` is used for testing, and managing the test
7
+ # environment. This yields a near-identical environment/behavior across
8
+ # platforms and Python versions. The main difference between running tests
9
+ # on Appveyor and locally should be the service setup (e.g., SSH, HTTPBIN).
10
+ #
11
+ # All workers support remote login (grep for LOGIN in this file to locate
12
+ # the lines that enable it). Once enabled, login details are shown at the
13
+ # top of each CI run log.
14
+ #
15
+ # - Linux/Mac workers (via SSH):
16
+ #
17
+ # - A permitted SSH key must be defined in an APPVEYOR_SSH_KEY environment
18
+ # variable (via the appveyor project settings)
19
+ #
20
+ # - SSH login info is given in the form of: 'appveyor@67.225.164.xx -p 22xxx'
21
+ #
22
+ # - Login with:
23
+ #
24
+ # ssh -o StrictHostKeyChecking=no <LOGIN>
25
+ #
26
+ # - to prevent the CI run from exiting, `touch` a file named `BLOCK` in the
27
+ # user HOME directory (current directory directly after login). The session
28
+ # will run until the file is removed (or 60 min have passed)
29
+ #
30
+ # - Windows workers (via RDP):
31
+ #
32
+ # - An RDP password should be defined in an APPVEYOR_RDP_PASSWORD environment
33
+ # variable (via the appveyor project settings), or a random password is used
34
+ # every time
35
+ #
36
+ # - RDP login info is given in the form of IP:PORT
37
+ #
38
+ # - Login with:
39
+ #
40
+ # xfreerdp /cert:ignore /dynamic-resolution /u:appveyor /p:<PASSWORD> /v:<LOGIN>
41
+ #
42
+ # - to prevent the CI run from exiting, create a textfile named `BLOCK` on the
43
+ # Desktop (a required .txt extension will be added automatically). The session
44
+ # will run until the file is removed (or 60 min have passed)
45
+ #
46
+
47
+ # do not make repository clone cheap: interfers with VCS-based version determination
48
+ shallow_clone: false
49
+
50
+ # turn of support for MS project build support (not needed)
51
+ build: off
52
+
53
+ environment:
54
+ # place coverage files to a known location regardless of where a test run
55
+ # is happening. also ensures proper report path configurations for codecov
56
+ COVERAGE_ROOT: /home/appveyor/DLTMP
57
+ # we pin hatch's data file to make it easy to cache it
58
+ HATCH_DATA_DIR: /home/appveyor/hatch-data-dir
59
+ UV_CACHE_DIR: /home/appveyor/.cache/uv
60
+ HATCH_ENV_TYPE_VIRTUAL_UV_PATH: /home/appveyor/.local/bin/uv
61
+ # oldest and newest supported, by default
62
+ TEST_SCRIPT: "hatch test -i py=3.9,3.13 --cover --doctest-modules --durations 10"
63
+ matrix:
64
+ - job_name: test-linux
65
+ APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2204
66
+
67
+ # same as 'test-linux', but TMPDIR is on a crippled filesystem, causing
68
+ # most, if not all test datasets to be created on that filesystem
69
+ - job_name: test-linux-crippled
70
+ APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2204
71
+
72
+ - job_name: test-mac
73
+ APPVEYOR_BUILD_WORKER_IMAGE: macos-sonoma
74
+ COVERAGE_ROOT: /Users/appveyor/DLTMP
75
+ HATCH_DATA_DIR: /Users/appveyor/hatch-data-dir
76
+ HATCH_ENV_TYPE_VIRTUAL_UV_PATH: /Users/appveyor/.local/bin/uv
77
+ UV_CACHE_DIR: /Users/appveyor/.cache/uv
78
+
79
+ - job_name: test-win
80
+ APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2022
81
+ COVERAGE_ROOT: C:\DLTMP
82
+ # Python version specification is non-standard on windows
83
+ HATCH_DATA_DIR: C:\hatch-data-dir
84
+ # make hatch use uv as an installer
85
+ HATCH_ENV_TYPE_VIRTUAL_UV_PATH: C:\Users\appveyor\.local\bin\uv.exe
86
+ # appveyor has a default path that comprises _everything_. here we trim
87
+ # it to the bare necessities to reduce using preinstalled software
88
+ # that changes without notice. importantly, we
89
+ # - reset the default python to be a 64bit one
90
+ # - include the installation target path for `uv`
91
+ CUSTOMPATH: C:\Users\\appveyor\.local\bin;C:\Program Files\Git\cmd;C:\Program Files\Git\usr\bin;C:\Windows\system32;C:\Windows\System32\WindowsPowerShell\v1.0;C:\Windows\System32\OpenSSH;C:\Program Files\PowerShell\7;C:\Program Files\7-Zip;C:\Python312-x64;C:\Python312-x64\Scripts"
92
+ UV_CACHE_DIR: C:\Users\appveyor\AppData\Local\uv\cache
93
+
94
+
95
+ # only run the CI if there are code or tooling changes
96
+ only_commits:
97
+ files:
98
+ - datalad_core/**/*
99
+ - tools/**/*
100
+ - pyproject.toml
101
+ - .appveyor.yml
102
+
103
+ # job-specific configurations
104
+ for:
105
+ #
106
+ # POSIX TEST RUNS
107
+ #
108
+ - matrix:
109
+ only:
110
+ - job_name: test-linux
111
+ - job_name: test-linux-crippled
112
+ - job_name: test-mac
113
+
114
+ cache:
115
+ - "${UV_CACHE_DIR} -> .appveyor.yml"
116
+
117
+ # init cannot use any components from the repo, because it runs prior to
118
+ # cloning it
119
+ init:
120
+ # LOGIN: enable external SSH access to CI worker
121
+ # needs APPVEYOR_SSH_KEY defined in project settings (or environment)
122
+ #- curl -sflL 'https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-ssh.sh' | bash -e -
123
+ # wipe out appveyor's collection of environment shims to prevent
124
+ # hatch from being confused by it
125
+ - rm -rf /home/appveyor/.pyenv
126
+ # install `uv`
127
+ - curl -LsSf https://astral.sh/uv/install.sh | sh
128
+ - source $HOME/.local/bin/env
129
+ # Scratch space
130
+ # we place the "unix" one into the user's HOME to avoid git-annex issues on MacOSX
131
+ # gh-5291
132
+ - mkdir ~/DLTMP && export TMPDIR=~/DLTMP
133
+
134
+ before_test:
135
+ # store original TMPDIR setting to limit modification to test execution
136
+ - export PREV_TMPDIR=$TMPDIR
137
+ # make TMPDIR a "crippled filesystem" to test wrong assumptions of POSIX-ness
138
+ # on POSIX OSes. The test fixtures will create all test datasets under TMPDIR
139
+ - |
140
+ set -e
141
+ if [ "$APPVEYOR_JOB_NAME" = "test-linux-crippled" ]; then
142
+ # 200 MB VFAT FS in a box
143
+ sudo dd if=/dev/zero of=/crippledfs.img count=200 bs=1M
144
+ sudo mkfs.vfat /crippledfs.img
145
+ sudo mkdir /crippledfs
146
+ sudo mount -o "uid=$(id -u),gid=$(id -g)" /crippledfs.img /crippledfs
147
+ echo "== mount >>"
148
+ mount | grep crippled
149
+ echo "<< mount =="
150
+ export TMPDIR=/crippledfs
151
+ fi
152
+ - echo TMPDIR=$TMPDIR
153
+
154
+ after_test:
155
+ - coverage xml
156
+ - codecovcli --auto-load-params-from AppVeyor upload-process -n "appveyor-${APPVEYOR_JOB_NAME}" --disable-search -f coverage.xml
157
+
158
+ on_finish:
159
+ # conditionally block the exit of a CI run for direct debugging
160
+ - while [ -f ~/BLOCK ]; do sleep 5; done
161
+
162
+ #
163
+ # WINDOWS TEST RUNS
164
+ #
165
+ - matrix:
166
+ only:
167
+ - job_name: test-win
168
+ cache:
169
+ - "%UV_CACHE_DIR% -> .appveyor.yml"
170
+ # hatch-managed python versions
171
+ - "%HATCH_DATA_DIR%\\env\\virtual\\.pythons -> pyproject.toml"
172
+
173
+ # init cannot use any components from the repo, because it runs prior to
174
+ # cloning it
175
+ init:
176
+ # LOGIN: enable RDP access on windows (RDP password is in appveyor project config)
177
+ # this is relatively expensive (1-2min), but very convenient to jump into any build at any time
178
+ #- ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
179
+ - cmd: "set PATH=%CUSTOMPATH%"
180
+ # install `uv`
181
+ - cmd: powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
182
+ # remove windows 260-char limit on path names
183
+ - ps: Set-Itemproperty -path "HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem" -Name LongPathsEnabled -value 1
184
+ # Scratch space
185
+ - cmd: md C:\DLTMP
186
+ # and use that scratch space to get short paths in test repos
187
+ # (avoiding length-limits as much as possible)
188
+ - cmd: "set TMP=C:\\DLTMP & set TEMP=C:\\DLTMP"
189
+
190
+ install:
191
+ # place a debug setup helper at a convenient location
192
+ - cmd: copy tools\appveyor\env_setup.bat C:\\datalad_debug.bat
193
+
194
+ after_test:
195
+ - coverage xml
196
+ - codecovcli --auto-load-params-from AppVeyor upload-process -n "appveyor-%APPVEYOR_JOB_NAME%" --disable-search -f coverage.xml
197
+
198
+ on_finish:
199
+ # conditionally block the exit of a CI run for direct debugging
200
+ - ps: while ((Test-Path "C:\Users\\appveyor\\Desktop\\BLOCK.txt")) { Start-Sleep 5 }
201
+
202
+
203
+ #
204
+ # ALL TEST RUNS
205
+ #
206
+ build_script:
207
+ - uv tool install hatch
208
+ - uv tool install coverage[toml]
209
+ - uv tool install codecov-cli
210
+
211
+ after_build:
212
+ # Identity setup
213
+ - git config --global user.email "test@appveyor.land"
214
+ - git config --global user.name "Appveyor Almighty"
215
+
216
+ test_script:
217
+ # oldest and newest supported, by default
218
+ - "hatch test -i py=3.9,3.13 --cover --doctest-modules --durations 10"
@@ -0,0 +1,39 @@
1
+ {% for entry in tree %}
2
+
3
+ # {{ entry.version }}{% if entry.date %} ({{ entry.date }}){% endif %}
4
+
5
+ {% for change_key, changes in entry.changes.items() %}
6
+
7
+ {% set change_key_map = {
8
+ 'BREAKING CHANGE': '🪓 Breaking changes',
9
+ 'doc': '📝 Documentation',
10
+ 'feat': '💫 New features',
11
+ 'fix': '🐛 Bug Fixes',
12
+ 'test': '🛡 Tests',
13
+ 'rf': '🏠 Refactorings',
14
+ 'perf': '🚀 Performance improvements',
15
+ } %}
16
+ {% if change_key %}
17
+ ## {{ change_key_map.get(change_key, change_key) }}
18
+ {% endif %}
19
+ {% set scopemap = {
20
+ 'changelog': 'Changelog',
21
+ 'contributing': 'Contributing guide',
22
+ 'helpers': 'Helpers',
23
+ 'sphinx': 'Rendered documentation',
24
+ 'typeannotation': 'Type annotation',
25
+ } %}
26
+
27
+ {# no-scope changes #}
28
+ {% for change in changes | rejectattr("scope") %}
29
+ - {{ change.message }} [[{{ change.sha1 | truncate(8, true, '') }}]](https://github.com/datalad/datalad-core/commit/{{ change.sha1 | truncate(8, true, '') }})
30
+ {% endfor %}
31
+ {# scoped changes #}
32
+ {% for scope, scope_changes in changes | selectattr("scope") | groupby("scope") %}
33
+ - {{ scopemap.get(scope, scope) }}:
34
+ {% for change in scope_changes %}
35
+ - {{ change.message }} [[{{ change.sha1 | truncate(8, true, '') }}]](https://github.com/datalad/datalad-core/commit/{{ change.sha1 | truncate(8, true, '') }})
36
+ {% endfor %}
37
+ {% endfor %}
38
+ {% endfor %}
39
+ {% endfor %}
@@ -0,0 +1,444 @@
1
+ # emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
2
+ # ex: set sts=4 ts=4 sw=4 et:
3
+ # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
4
+ #
5
+ # See COPYING file distributed along with the datalad package for the
6
+ # copyright and license terms.
7
+ #
8
+ # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
9
+ """High-level interface for dataset creation
10
+
11
+ """
12
+
13
+ import random
14
+ import uuid
15
+ from collections.abc import Mapping
16
+ from typing import Any
17
+
18
+ #from os import listdir
19
+ #from datalad import _seed
20
+ #from datalad.support.constraints import (
21
+ # EnsureStr,
22
+ # EnsureNone,
23
+ # EnsureKeyChoice,
24
+ #)
25
+ from datalad_core.commands import (
26
+ EnsureDataset,
27
+ JointParamProcessor,
28
+ ParamSetConstraint,
29
+ datalad_command,
30
+ )
31
+ from datalad_core.constraints import (
32
+ EnsureChoice,
33
+ EnsurePath,
34
+ )
35
+
36
+
37
+ class EnsureParentDatasetConditions(ParamSetConstraint):
38
+ input_synopsis = 'existing parent dataset without content conflict'
39
+
40
+ def __init__(self):
41
+ # announce which args will be validated
42
+ super().__init__(('dataset', 'path'))
43
+
44
+ def __call__(self, val: Mapping[str, Any]) -> Mapping[str, Any]:
45
+ if val['dataset'].pristine_spec is None:
46
+ # no dataset was given. nothing to check
47
+ return val
48
+
49
+ # TODO: check for existence of parent repo
50
+ # TODO: check for collisions with parent repo content
51
+ return val
52
+
53
+
54
+ class EnsureAnnexParams(ParamSetConstraint):
55
+ input_synopsis = 'coherent annex parameters'
56
+
57
+ def __init__(self):
58
+ # announce which args will be validated
59
+ super().__init__(('annex', 'annex_description'))
60
+
61
+ def __call__(self, val: Mapping[str, Any]) -> Mapping[str, Any]:
62
+ if val['annex'] is False and val['annex_description']:
63
+ self.raise_for(
64
+ val,
65
+ 'cannot assign an annex description with no annex',
66
+ )
67
+ return val
68
+
69
+
70
+ @datalad_command(
71
+ preproc=JointParamProcessor(
72
+ {
73
+ 'dataset': EnsureDataset(),
74
+ 'path': EnsurePath(),
75
+ 'annex': EnsureChoice(
76
+ # no annex
77
+ False,
78
+ # standard annex
79
+ True,
80
+ # private-mode annex
81
+ 'private',
82
+ ),
83
+ },
84
+ proc_defaults={'dataset', 'path'},
85
+ tailor_for_dataset={
86
+ 'path': 'dataset',
87
+ },
88
+ paramset_constraints=(
89
+ EnsureAnnexParams(),
90
+ EnsureParentDatasetConditions(),
91
+ ),
92
+ ),
93
+ )
94
+ def create_dataset(
95
+ path=None,
96
+ *,
97
+ dataset=None,
98
+ annex=True,
99
+ annex_description=None,
100
+ #initopts=None,
101
+ #force=False,
102
+ #cfg_proc=None
103
+ ):
104
+ return []
105
+
106
+
107
+ def dummy():
108
+ # two major cases
109
+ # 1. we got a `dataset` -> we either want to create it (path is None),
110
+ # or another dataset in it (path is not None)
111
+ # 2. we got no dataset -> we want to create a fresh dataset at the
112
+ # desired location, either at `path` or PWD
113
+ if (isinstance(initopts, (list, tuple)) and '--bare' in initopts) or (
114
+ isinstance(initopts, dict) and 'bare' in initopts):
115
+ raise ValueError(
116
+ "Creation of bare repositories is not supported. Consider "
117
+ "one of the create-sibling commands, or use "
118
+ "Git to init a bare repository and push an existing dataset "
119
+ "into it.")
120
+
121
+ # assure cfg_proc is a list (relevant if used via Python API)
122
+ cfg_proc = ensure_list(cfg_proc)
123
+
124
+ # prep for yield
125
+ res = dict(action='create', path=str(path),
126
+ logger=lgr, type='dataset',
127
+ refds=refds_path)
128
+
129
+ refds = None
130
+ if refds_path and refds_path != str(path):
131
+ refds = require_dataset(
132
+ refds_path, check_installed=True,
133
+ purpose='create a subdataset')
134
+
135
+ path_inrefds = path_under_rev_dataset(refds, path)
136
+ if path_inrefds is None:
137
+ yield dict(
138
+ res,
139
+ status='error',
140
+ message=(
141
+ "dataset containing given paths is not underneath "
142
+ "the reference dataset %s: %s",
143
+ ds, str(path)),
144
+ )
145
+ return
146
+
147
+ # try to locate an immediate parent dataset
148
+ # we want to know this (irrespective of whether we plan on adding
149
+ # this new dataset to a parent) in order to avoid conflicts with
150
+ # a potentially absent/uninstalled subdataset of the parent
151
+ # in this location
152
+ # it will cost some filesystem traversal though...
153
+ parentds_path = get_dataset_root(
154
+ op.normpath(op.join(str(path), os.pardir)))
155
+ if parentds_path:
156
+ prepo = GitRepo(parentds_path)
157
+ parentds_path = Path(parentds_path)
158
+ # we cannot get away with a simple
159
+ # GitRepo.get_content_info(), as we need to detect
160
+ # uninstalled/added subdatasets too
161
+ check_path = Path(path)
162
+ pstatus = prepo.status(
163
+ untracked='no',
164
+ # limit query to target path for a potentially massive speed-up
165
+ paths=[check_path.relative_to(parentds_path)])
166
+ if (pstatus.get(check_path, {}).get('type') != 'dataset' and
167
+ any(check_path == p or check_path in p.parents
168
+ for p in pstatus)):
169
+ # redo the check in a slower fashion, it is already broken
170
+ # let's take our time for a proper error message
171
+ conflict = [
172
+ p for p in pstatus
173
+ if check_path == p or check_path in p.parents]
174
+ res.update({
175
+ 'status': 'error',
176
+ 'message': (
177
+ 'collision with content in parent dataset at %s: %s',
178
+ str(parentds_path),
179
+ [str(c) for c in conflict])})
180
+ yield res
181
+ return
182
+ if not force:
183
+ # another set of check to see whether the target path is pointing
184
+ # into a known subdataset that is not around ATM
185
+ subds_status = {
186
+ parentds_path / k.relative_to(prepo.path)
187
+ for k, v in pstatus.items()
188
+ if v.get('type', None) == 'dataset'}
189
+ check_paths = [check_path]
190
+ check_paths.extend(check_path.parents)
191
+ if any(p in subds_status for p in check_paths):
192
+ conflict = [p for p in check_paths if p in subds_status]
193
+ res.update({
194
+ 'status': 'error',
195
+ 'message': (
196
+ 'collision with %s (dataset) in dataset %s',
197
+ str(conflict[0]),
198
+ str(parentds_path))})
199
+ yield res
200
+ return
201
+
202
+ # important to use the given Dataset object to avoid spurious ID
203
+ # changes with not-yet-materialized Datasets
204
+ tbds = ds if isinstance(ds, Dataset) and \
205
+ ds.path == path else Dataset(str(path))
206
+
207
+ # don't create in non-empty directory without `force`:
208
+ if op.isdir(tbds.path) and listdir(tbds.path) != [] and not force:
209
+ res.update({
210
+ 'status': 'error',
211
+ 'message':
212
+ 'will not create a dataset in a non-empty directory, use '
213
+ '`--force` option to ignore'})
214
+ yield res
215
+ return
216
+
217
+ # Check if specified cfg_proc(s) can be discovered, storing
218
+ # the results so they can be used when the time comes to run
219
+ # the procedure. If a procedure cannot be found, raise an
220
+ # error to prevent creating the dataset.
221
+ cfg_proc_specs = []
222
+ if cfg_proc:
223
+ discovered_procs = tbds.run_procedure(
224
+ discover=True,
225
+ result_renderer='disabled',
226
+ return_type='list',
227
+ )
228
+ for cfg_proc_ in cfg_proc:
229
+ for discovered_proc in discovered_procs:
230
+ if discovered_proc['procedure_name'] == 'cfg_' + cfg_proc_:
231
+ cfg_proc_specs.append(discovered_proc)
232
+ break
233
+ else:
234
+ raise ValueError("Cannot find procedure with name "
235
+ "'%s'" % cfg_proc_)
236
+
237
+ if initopts is not None and isinstance(initopts, list):
238
+ initopts = {'_from_cmdline_': initopts}
239
+
240
+ # Note for the code below:
241
+ # OPT: be "smart" and avoid re-resolving .repo -- expensive in DataLad
242
+ # Reuse tbrepo instance, do not use tbds.repo
243
+
244
+ # create and configure desired repository
245
+ # also provides initial set of content to be tracked with git (not annex)
246
+ if no_annex:
247
+ tbrepo, add_to_git = _setup_git_repo(path, initopts, fake_dates)
248
+ else:
249
+ tbrepo, add_to_git = _setup_annex_repo(
250
+ path, initopts, fake_dates, description)
251
+
252
+ # OPT: be "smart" and avoid re-resolving .repo -- expensive in DataLad
253
+ # Note, must not happen earlier (before if) since "smart" it would not be
254
+ tbds_config = tbds.config
255
+
256
+ # record an ID for this repo for the afterlife
257
+ # to be able to track siblings and children
258
+ id_var = 'datalad.dataset.id'
259
+ # Note, that Dataset property `id` will change when we unset the
260
+ # respective config. Therefore store it before:
261
+ tbds_id = tbds.id
262
+ if id_var in tbds_config:
263
+ # make sure we reset this variable completely, in case of a
264
+ # re-create
265
+ tbds_config.unset(id_var, scope='branch')
266
+
267
+ if _seed is None:
268
+ # just the standard way
269
+ # use a fully random identifier (i.e. UUID version 4)
270
+ uuid_id = str(uuid.uuid4())
271
+ else:
272
+ # Let's generate preseeded ones
273
+ uuid_id = str(uuid.UUID(int=random.getrandbits(128)))
274
+ tbds_config.add(
275
+ id_var,
276
+ tbds_id if tbds_id is not None else uuid_id,
277
+ scope='branch',
278
+ reload=False)
279
+
280
+ # make config overrides permanent in the repo config
281
+ # this is similar to what `annex init` does
282
+ # we are only doing this for config overrides and do not expose
283
+ # a dedicated argument, because it is sufficient for the cmdline
284
+ # and unnecessary for the Python API (there could simply be a
285
+ # subsequence ds.config.add() call)
286
+ for k, v in tbds_config.overrides.items():
287
+ tbds_config.add(k, v, scope='local', reload=False)
288
+
289
+ # all config manipulation is done -> fll reload
290
+ tbds_config.reload()
291
+
292
+ # must use the repo.pathobj as this will have resolved symlinks
293
+ add_to_git[tbrepo.pathobj / '.datalad'] = {
294
+ 'type': 'directory',
295
+ 'state': 'untracked'}
296
+
297
+ # save everything, we need to do this now and cannot merge with the
298
+ # call below, because we may need to add this subdataset to a parent
299
+ # but cannot until we have a first commit
300
+ tbrepo.save(
301
+ message='[DATALAD] new dataset',
302
+ git=True,
303
+ # we have to supply our own custom status, as the repo does
304
+ # not have a single commit yet and the is no HEAD reference
305
+ # TODO make `GitRepo.status()` robust to this state.
306
+ _status=add_to_git,
307
+ )
308
+
309
+ for cfg_proc_spec in cfg_proc_specs:
310
+ yield from tbds.run_procedure(
311
+ cfg_proc_spec,
312
+ result_renderer='disabled',
313
+ return_type='generator',
314
+ )
315
+
316
+ # the next only makes sense if we saved the created dataset,
317
+ # otherwise we have no committed state to be registered
318
+ # in the parent
319
+ if isinstance(refds, Dataset) and refds.path != tbds.path:
320
+ # we created a dataset in another dataset
321
+ # -> make submodule
322
+ yield from refds.save(
323
+ path=tbds.path,
324
+ return_type='generator',
325
+ result_renderer='disabled',
326
+ )
327
+ else:
328
+ # if we do not save, we touch the root directory of the new
329
+ # dataset to signal a change in the nature of the directory.
330
+ # this is useful for apps like datalad-gooey (or other
331
+ # inotify consumers) to pick up on such changes.
332
+ tbds.pathobj.touch()
333
+
334
+ res.update({'status': 'ok'})
335
+ yield res
336
+
337
+
338
+ def _setup_git_repo(path, initopts=None, fake_dates=False):
339
+ """Create and configure a repository at `path`
340
+
341
+ Parameters
342
+ ----------
343
+ path: str or Path
344
+ Path of the repository
345
+ initopts: dict, optional
346
+ Git options to be passed to the GitRepo constructor
347
+ fake_dates: bool, optional
348
+ Passed to the GitRepo constructor
349
+
350
+ Returns
351
+ -------
352
+ GitRepo, dict
353
+ Created repository and records for any repo component that needs to be
354
+ passed to git-add as a result of the setup procedure.
355
+ """
356
+ tbrepo = GitRepo(
357
+ path,
358
+ create=True,
359
+ create_sanity_checks=False,
360
+ git_opts=initopts,
361
+ fake_dates=fake_dates)
362
+ # place a .noannex file to indicate annex to leave this repo alone
363
+ stamp_path = Path(tbrepo.path) / '.noannex'
364
+ stamp_path.touch()
365
+ add_to_git = {
366
+ stamp_path: {
367
+ 'type': 'file',
368
+ 'state': 'untracked',
369
+ }
370
+ }
371
+ return tbrepo, add_to_git
372
+
373
+
374
+ def _setup_annex_repo(path, initopts=None, fake_dates=False,
375
+ description=None):
376
+ """Create and configure a repository at `path`
377
+
378
+ This includes a default setup of annex.largefiles.
379
+
380
+ Parameters
381
+ ----------
382
+ path: str or Path
383
+ Path of the repository
384
+ initopts: dict, optional
385
+ Git options to be passed to the AnnexRepo constructor
386
+ fake_dates: bool, optional
387
+ Passed to the AnnexRepo constructor
388
+ description: str, optional
389
+ Passed to the AnnexRepo constructor
390
+
391
+ Returns
392
+ -------
393
+ AnnexRepo, dict
394
+ Created repository and records for any repo component that needs to be
395
+ passed to git-add as a result of the setup procedure.
396
+ """
397
+ # always come with annex when created from scratch
398
+ tbrepo = AnnexRepo(
399
+ path,
400
+ create=True,
401
+ create_sanity_checks=False,
402
+ # do not set backend here, to avoid a dedicated commit
403
+ backend=None,
404
+ # None causes version to be taken from config
405
+ version=None,
406
+ description=description,
407
+ git_opts=initopts,
408
+ fake_dates=fake_dates
409
+ )
410
+ # set the annex backend in .gitattributes as a staged change
411
+ tbrepo.set_default_backend(
412
+ cfg.obtain('datalad.repo.backend'),
413
+ persistent=True, commit=False)
414
+ add_to_git = {
415
+ tbrepo.pathobj / '.gitattributes': {
416
+ 'type': 'file',
417
+ 'state': 'added',
418
+ }
419
+ }
420
+ # make sure that v6 annex repos never commit content under .datalad
421
+ attrs_cfg = (
422
+ ('config', 'annex.largefiles', 'nothing'),
423
+ )
424
+ attrs = tbrepo.get_gitattributes(
425
+ [op.join('.datalad', i[0]) for i in attrs_cfg])
426
+ set_attrs = []
427
+ for p, k, v in attrs_cfg:
428
+ if attrs.get(op.join('.datalad', p), {}).get(k, None) != v:
429
+ set_attrs.append((p, {k: v}))
430
+ if set_attrs:
431
+ tbrepo.set_gitattributes(
432
+ set_attrs,
433
+ attrfile=op.join('.datalad', '.gitattributes'))
434
+
435
+ # prevent git annex from ever annexing .git* stuff (gh-1597)
436
+ attrs = tbrepo.get_gitattributes('.git')
437
+ if attrs.get('.git', {}).get('annex.largefiles', None) != 'nothing':
438
+ tbrepo.set_gitattributes([
439
+ ('**/.git*', {'annex.largefiles': 'nothing'})])
440
+ # must use the repo.pathobj as this will have resolved symlinks
441
+ add_to_git[tbrepo.pathobj / '.gitattributes'] = {
442
+ 'type': 'file',
443
+ 'state': 'untracked'}
444
+ return tbrepo, add_to_git
File without changes