bdm-tool 0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bdm_tool-0.2.dist-info/METADATA +405 -0
- bdm_tool-0.2.dist-info/RECORD +9 -0
- bdm_tool-0.2.dist-info/WHEEL +5 -0
- bdm_tool-0.2.dist-info/entry_points.txt +2 -0
- bdm_tool-0.2.dist-info/licenses/LICENSE +21 -0
- bdm_tool-0.2.dist-info/top_level.txt +1 -0
- bdmtool/__init__.py +0 -0
- bdmtool/_command.py +104 -0
- bdmtool/files_management.py +444 -0
@@ -0,0 +1,405 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: bdm-tool
|
3
|
+
Version: 0.2
|
4
|
+
Summary: Simple lightweight dataset versioning utility based purely on the file system and symbolic links
|
5
|
+
Author-email: Andrei Khobnia <andrei.khobnia@gmail.com>
|
6
|
+
License-Expression: MIT
|
7
|
+
Keywords: version-control,data-versioning,versioning,machine-learning,ai,data,developer-tools
|
8
|
+
Classifier: Operating System :: OS Independent
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
15
|
+
Requires-Python: >=3.9
|
16
|
+
Description-Content-Type: text/markdown
|
17
|
+
License-File: LICENSE
|
18
|
+
Provides-Extra: docs
|
19
|
+
Requires-Dist: sphinx; extra == "docs"
|
20
|
+
Requires-Dist: sphinx-rtd-theme; extra == "docs"
|
21
|
+
Dynamic: license-file
|
22
|
+
|
23
|
+
# BDM Tool
|
24
|
+
__BDM__ (Big Dataset Management) Tool is a __simple__ lightweight dataset versioning utility based purely on the file system and symbolic links.
|
25
|
+
|
26
|
+
BDM Tool Features:
|
27
|
+
* __No full downloads required__: Switch to any dataset version without downloading the entire dataset to your local machine.
|
28
|
+
* __Independent of external VCS__: Does not rely on external version control systems like Git or Mercurial, and does not require integrating with one.
|
29
|
+
* __Easy dataset sharing__: Supports sharing datasets via remote file systems on a data server.
|
30
|
+
* __Fast version switching__: Switching between dataset versions does not require long synchronization processes.
|
31
|
+
* __Transparent version access__: Different dataset versions are accessed through simple and intuitive paths (e.g., dataset/v1.0/, dataset/v2.0/, etc.), making versioning fully transparent to configuration files, MLflow parameters, and other tooling.
|
32
|
+
* __Storage optimization__: Efficiently stores multiple dataset versions using symbolic links to avoid duplication.
|
33
|
+
* __Designed for large, complex datasets__: Well-suited for managing big datasets with intricate directory and subdirectory structures.
|
34
|
+
* __Python API for automation__: Provides a simple Python API to automatically create new dataset versions within MLOps pipelines, workflows, ETL jobs, and other automated processes.
|
35
|
+
|
36
|
+
## General Principles
|
37
|
+
* Each version of a dataset is a path like `dataset/v1.0/`, `dataset/v2.0/`.
|
38
|
+
* A new dataset version is generated whenever modifications are made
|
39
|
+
* Each dataset version is immutable and read-only.
|
40
|
+
* A new version includes only the files that have been added or modified, while unchanged files and directories are stored as symbolic links.
|
41
|
+
* Each version contains a readme.txt file with a summary of changes.
|
42
|
+
|
43
|
+
## Intallation
|
44
|
+
### Installation from PyPI (Recommended)
|
45
|
+
Use `pip` to install tool by the following command:
|
46
|
+
```shell
|
47
|
+
pip install bdm-tool
|
48
|
+
```
|
49
|
+
|
50
|
+
### Installation from Sources
|
51
|
+
Use `pip` to install tool by the following command:
|
52
|
+
```shell
|
53
|
+
pip install git+https://github.com/aikho/bdm-tool.git
|
54
|
+
```
|
55
|
+
|
56
|
+
## Usage
|
57
|
+
### Start Versioning Dataset
|
58
|
+
Let's assume we have a dataset with the following structure:
|
59
|
+
```shell
|
60
|
+
tree testdata
|
61
|
+
testdata
|
62
|
+
├── annotation
|
63
|
+
│ ├── part01
|
64
|
+
│ │ ├── regions01.json
|
65
|
+
│ │ ├── regions02.json
|
66
|
+
│ │ ├── regions03.json
|
67
|
+
│ │ ├── regions04.json
|
68
|
+
│ │ └── regions05.json
|
69
|
+
│ ├── part02
|
70
|
+
│ │ ├── regions01.json
|
71
|
+
│ │ ├── regions02.json
|
72
|
+
│ │ ├── regions03.json
|
73
|
+
│ │ ├── regions04.json
|
74
|
+
│ │ └── regions05.json
|
75
|
+
│ └── part03
|
76
|
+
│ ├── regions01.json
|
77
|
+
│ ├── regions02.json
|
78
|
+
│ ├── regions03.json
|
79
|
+
│ ├── regions04.json
|
80
|
+
│ └── regions05.json
|
81
|
+
└── data
|
82
|
+
├── part01
|
83
|
+
│ ├── image01.png
|
84
|
+
│ ├── image02.png
|
85
|
+
│ ├── image03.png
|
86
|
+
│ ├── image04.png
|
87
|
+
│ └── image05.png
|
88
|
+
├── part02
|
89
|
+
│ ├── image01.png
|
90
|
+
│ ├── image02.png
|
91
|
+
│ ├── image03.png
|
92
|
+
│ ├── image04.png
|
93
|
+
│ └── image05.png
|
94
|
+
└── part03
|
95
|
+
├── image01.png
|
96
|
+
├── image02.png
|
97
|
+
├── image03.png
|
98
|
+
├── image04.png
|
99
|
+
└── image05.png
|
100
|
+
|
101
|
+
9 directories, 30 files
|
102
|
+
```
|
103
|
+
To put it under `bdm-tool` version control use command `bdm init`:
|
104
|
+
```shell
|
105
|
+
bdm init testdata
|
106
|
+
Version v0.1 of dataset has been created.
|
107
|
+
Files added: 3, updated: 0, removed: 0, symlinked: 0
|
108
|
+
```
|
109
|
+
The first version `v0.1` of the dataset has been created. Let’s take a look at the file structure:
|
110
|
+
```shell
|
111
|
+
tree testdata
|
112
|
+
testdata
|
113
|
+
├── current -> ./v0.1
|
114
|
+
└── v0.1
|
115
|
+
├── annotation
|
116
|
+
│ ├── part01
|
117
|
+
│ │ ├── regions01.json
|
118
|
+
│ │ ├── regions02.json
|
119
|
+
│ │ ├── regions03.json
|
120
|
+
│ │ ├── regions04.json
|
121
|
+
│ │ └── regions05.json
|
122
|
+
│ ├── part02
|
123
|
+
│ │ ├── regions01.json
|
124
|
+
│ │ ├── regions02.json
|
125
|
+
│ │ ├── regions03.json
|
126
|
+
│ │ ├── regions04.json
|
127
|
+
│ │ └── regions05.json
|
128
|
+
│ └── part03
|
129
|
+
│ ├── regions01.json
|
130
|
+
│ ├── regions02.json
|
131
|
+
│ ├── regions03.json
|
132
|
+
│ ├── regions04.json
|
133
|
+
│ └── regions05.json
|
134
|
+
├── data
|
135
|
+
│ ├── part01
|
136
|
+
│ │ ├── image01.png
|
137
|
+
│ │ ├── image02.png
|
138
|
+
│ │ ├── image03.png
|
139
|
+
│ │ ├── image04.png
|
140
|
+
│ │ └── image05.png
|
141
|
+
│ ├── part02
|
142
|
+
│ │ ├── image01.png
|
143
|
+
│ │ ├── image02.png
|
144
|
+
│ │ ├── image03.png
|
145
|
+
│ │ ├── image04.png
|
146
|
+
│ │ └── image05.png
|
147
|
+
│ └── part03
|
148
|
+
│ ├── image01.png
|
149
|
+
│ ├── image02.png
|
150
|
+
│ ├── image03.png
|
151
|
+
│ ├── image04.png
|
152
|
+
│ └── image05.png
|
153
|
+
└── readme.txt
|
154
|
+
|
155
|
+
11 directories, 31 files
|
156
|
+
```
|
157
|
+
We can see that version `v0.1` contains all the initial files along with a `readme.txt` file. Let’s take a look inside `readme.txt`:
|
158
|
+
```shell
|
159
|
+
cat testdata/v0.1/readme.txt
|
160
|
+
Dataset version v0.1 has been created!
|
161
|
+
Created timestamp: 2023-08-07 19:40:19.498656, OS user: rock-star-ml-engineer
|
162
|
+
Files added: 2, updated: 0, removed: 0, symlinked: 0
|
163
|
+
|
164
|
+
Files added:
|
165
|
+
annotation/
|
166
|
+
data/
|
167
|
+
```
|
168
|
+
The file shows the creation date, operating system user, relevant statistics, and a summary of performed operations.
|
169
|
+
|
170
|
+
### Add New Files
|
171
|
+
Suppose we have additional data stored in the `new_data` directory:
|
172
|
+
```shell
|
173
|
+
tree new_data/
|
174
|
+
new_data/
|
175
|
+
├── annotation
|
176
|
+
│ ├── regions06.json
|
177
|
+
│ └── regions07.json
|
178
|
+
└── data
|
179
|
+
├── image06.png
|
180
|
+
└── image07.png
|
181
|
+
|
182
|
+
2 directories, 4 files
|
183
|
+
```
|
184
|
+
New files can be added to a new dataset version using the `dbm change` command. Use the `--add` flag to add individual files, or `--add-all` to add all files from a specified directory:
|
185
|
+
```shell
|
186
|
+
bdm change --add_all new_data/annotation/:annotation/part03/ --add_all new_data/data/:data/part03/ -c -m "add new files" testdata
|
187
|
+
Version v0.2 of dataset has been created.
|
188
|
+
Files added: 4, updated: 0, removed: 0, symlinked: 14
|
189
|
+
```
|
190
|
+
The `:` character is used as a separator between the source path and the target subpath inside the dataset where the files should be added.
|
191
|
+
|
192
|
+
The `-c` flag stands for copy. When used, files are copied instead of moved. Moving files can be faster, so you may prefer it for performance reasons.
|
193
|
+
|
194
|
+
The `-m` flag allows you to add a message, which is then stored in the `readme.txt` file of the new dataset version.
|
195
|
+
|
196
|
+
Let’s take a look inside the `readme.txt` file of the new version:
|
197
|
+
```shell
|
198
|
+
cat testdata/current/readme.txt
|
199
|
+
Dataset version v0.2 has been created from previous version v0.1!
|
200
|
+
add new files
|
201
|
+
Created timestamp: 2023-08-07 19:38:39.758828, OS user: rock-star-ml-engineer
|
202
|
+
Files added: 4, updated: 0, removed: 0, symlinked: 14
|
203
|
+
|
204
|
+
Files added:
|
205
|
+
annotation/part03/regions06.json
|
206
|
+
annotation/part03/regions07.json
|
207
|
+
data/part03/image06.png
|
208
|
+
data/part03/image07.png
|
209
|
+
```
|
210
|
+
Next, let’s examine the updated file structure:
|
211
|
+
```shell
|
212
|
+
tree testdata
|
213
|
+
testdata
|
214
|
+
├── current -> ./v0.2
|
215
|
+
├── v0.1
|
216
|
+
│ ├── annotation
|
217
|
+
│ │ ├── part01
|
218
|
+
│ │ │ ├── regions01.json
|
219
|
+
│ │ │ ├── regions02.json
|
220
|
+
│ │ │ ├── regions03.json
|
221
|
+
│ │ │ ├── regions04.json
|
222
|
+
│ │ │ └── regions05.json
|
223
|
+
│ │ ├── part02
|
224
|
+
│ │ │ ├── regions01.json
|
225
|
+
│ │ │ ├── regions02.json
|
226
|
+
│ │ │ ├── regions03.json
|
227
|
+
│ │ │ ├── regions04.json
|
228
|
+
│ │ │ └── regions05.json
|
229
|
+
│ │ └── part03
|
230
|
+
│ │ ├── regions01.json
|
231
|
+
│ │ ├── regions02.json
|
232
|
+
│ │ ├── regions03.json
|
233
|
+
│ │ ├── regions04.json
|
234
|
+
│ │ └── regions05.json
|
235
|
+
│ ├── data
|
236
|
+
│ │ ├── part01
|
237
|
+
│ │ │ ├── image01.png
|
238
|
+
│ │ │ ├── image02.png
|
239
|
+
│ │ │ ├── image03.png
|
240
|
+
│ │ │ ├── image04.png
|
241
|
+
│ │ │ └── image05.png
|
242
|
+
│ │ ├── part02
|
243
|
+
│ │ │ ├── image01.png
|
244
|
+
│ │ │ ├── image02.png
|
245
|
+
│ │ │ ├── image03.png
|
246
|
+
│ │ │ ├── image04.png
|
247
|
+
│ │ │ └── image05.png
|
248
|
+
│ │ └── part03
|
249
|
+
│ │ ├── image01.png
|
250
|
+
│ │ ├── image02.png
|
251
|
+
│ │ ├── image03.png
|
252
|
+
│ │ ├── image04.png
|
253
|
+
│ │ └── image05.png
|
254
|
+
│ └── readme.txt
|
255
|
+
└── v0.2
|
256
|
+
├── annotation
|
257
|
+
│ ├── part01 -> ../../v0.1/annotation/part01
|
258
|
+
│ ├── part02 -> ../../v0.1/annotation/part02
|
259
|
+
│ └── part03
|
260
|
+
│ ├── regions01.json -> ../../../v0.1/annotation/part03/regions01.json
|
261
|
+
│ ├── regions02.json -> ../../../v0.1/annotation/part03/regions02.json
|
262
|
+
│ ├── regions03.json -> ../../../v0.1/annotation/part03/regions03.json
|
263
|
+
│ ├── regions04.json -> ../../../v0.1/annotation/part03/regions04.json
|
264
|
+
│ ├── regions05.json -> ../../../v0.1/annotation/part03/regions05.json
|
265
|
+
│ ├── regions06.json
|
266
|
+
│ └── regions07.json
|
267
|
+
├── data
|
268
|
+
│ ├── part01 -> ../../v0.1/data/part01
|
269
|
+
│ ├── part02 -> ../../v0.1/data/part02
|
270
|
+
│ └── part03
|
271
|
+
│ ├── image01.png -> ../../../v0.1/data/part03/image01.png
|
272
|
+
│ ├── image02.png -> ../../../v0.1/data/part03/image02.png
|
273
|
+
│ ├── image03.png -> ../../../v0.1/data/part03/image03.png
|
274
|
+
│ ├── image04.png -> ../../../v0.1/data/part03/image04.png
|
275
|
+
│ ├── image05.png -> ../../../v0.1/data/part03/image05.png
|
276
|
+
│ ├── image06.png
|
277
|
+
│ └── image07.png
|
278
|
+
└── readme.txt
|
279
|
+
|
280
|
+
20 directories, 46 files
|
281
|
+
```
|
282
|
+
|
283
|
+
### Update Files
|
284
|
+
Files can be updated in a new dataset version using the `dbm change` command. Use the `--update` flag to update individual files, or `--update-all` to update all files in a given directory:
|
285
|
+
```shell
|
286
|
+
bdm change --update data_update/regions05.json:annotation/part03/ -c -m "update" testdata
|
287
|
+
Version v0.3 of dataset has been created.
|
288
|
+
Files added: 0, updated: 1, removed: 0, symlinked: 9
|
289
|
+
```
|
290
|
+
Let’s take a look inside the `readme.txt` file of the new version:
|
291
|
+
```shell
|
292
|
+
cat testdata/current/readme.txt
|
293
|
+
Dataset version v0.3 has been created from previous version v0.2!
|
294
|
+
update
|
295
|
+
Created timestamp: 2023-08-07 19:40:01.753345, OS user: rock-star-data-scientist
|
296
|
+
Files added: 0, updated: 1, removed: 0, symlinked: 9
|
297
|
+
|
298
|
+
Files updated:
|
299
|
+
annotation/part03/regions05.json
|
300
|
+
```
|
301
|
+
Let’s take a look at the file structure:
|
302
|
+
```shell
|
303
|
+
tree testdata
|
304
|
+
testdata
|
305
|
+
├── current -> ./v0.3
|
306
|
+
├── v0.1
|
307
|
+
│ ├── annotation
|
308
|
+
│ │ ├── part01
|
309
|
+
│ │ │ ├── regions01.json
|
310
|
+
│ │ │ ├── regions02.json
|
311
|
+
│ │ │ ├── regions03.json
|
312
|
+
│ │ │ ├── regions04.json
|
313
|
+
│ │ │ └── regions05.json
|
314
|
+
│ │ ├── part02
|
315
|
+
│ │ │ ├── regions01.json
|
316
|
+
│ │ │ ├── regions02.json
|
317
|
+
│ │ │ ├── regions03.json
|
318
|
+
│ │ │ ├── regions04.json
|
319
|
+
│ │ │ └── regions05.json
|
320
|
+
│ │ └── part03
|
321
|
+
│ │ ├── regions01.json
|
322
|
+
│ │ ├── regions02.json
|
323
|
+
│ │ ├── regions03.json
|
324
|
+
│ │ ├── regions04.json
|
325
|
+
│ │ └── regions05.json
|
326
|
+
│ ├── data
|
327
|
+
│ │ ├── part01
|
328
|
+
│ │ │ ├── image01.png
|
329
|
+
│ │ │ ├── image02.png
|
330
|
+
│ │ │ ├── image03.png
|
331
|
+
│ │ │ ├── image04.png
|
332
|
+
│ │ │ └── image05.png
|
333
|
+
│ │ ├── part02
|
334
|
+
│ │ │ ├── image01.png
|
335
|
+
│ │ │ ├── image02.png
|
336
|
+
│ │ │ ├── image03.png
|
337
|
+
│ │ │ ├── image04.png
|
338
|
+
│ │ │ └── image05.png
|
339
|
+
│ │ └── part03
|
340
|
+
│ │ ├── image01.png
|
341
|
+
│ │ ├── image02.png
|
342
|
+
│ │ ├── image03.png
|
343
|
+
│ │ ├── image04.png
|
344
|
+
│ │ └── image05.png
|
345
|
+
│ └── readme.txt
|
346
|
+
├── v0.2
|
347
|
+
│ ├── annotation
|
348
|
+
│ │ ├── part01 -> ../../v0.1/annotation/part01
|
349
|
+
│ │ ├── part02 -> ../../v0.1/annotation/part02
|
350
|
+
│ │ └── part03
|
351
|
+
│ │ ├── regions01.json -> ../../../v0.1/annotation/part03/regions01.json
|
352
|
+
│ │ ├── regions02.json -> ../../../v0.1/annotation/part03/regions02.json
|
353
|
+
│ │ ├── regions03.json -> ../../../v0.1/annotation/part03/regions03.json
|
354
|
+
│ │ ├── regions04.json -> ../../../v0.1/annotation/part03/regions04.json
|
355
|
+
│ │ ├── regions05.json -> ../../../v0.1/annotation/part03/regions05.json
|
356
|
+
│ │ ├── regions06.json
|
357
|
+
│ │ └── regions07.json
|
358
|
+
│ ├── data
|
359
|
+
│ │ ├── part01 -> ../../v0.1/data/part01
|
360
|
+
│ │ ├── part02 -> ../../v0.1/data/part02
|
361
|
+
│ │ └── part03
|
362
|
+
│ │ ├── image01.png -> ../../../v0.1/data/part03/image01.png
|
363
|
+
│ │ ├── image02.png -> ../../../v0.1/data/part03/image02.png
|
364
|
+
│ │ ├── image03.png -> ../../../v0.1/data/part03/image03.png
|
365
|
+
│ │ ├── image04.png -> ../../../v0.1/data/part03/image04.png
|
366
|
+
│ │ ├── image05.png -> ../../../v0.1/data/part03/image05.png
|
367
|
+
│ │ ├── image06.png
|
368
|
+
│ │ └── image07.png
|
369
|
+
│ └── readme.txt
|
370
|
+
└── v0.3
|
371
|
+
├── annotation
|
372
|
+
│ ├── part01 -> ../../v0.2/annotation/part01
|
373
|
+
│ ├── part02 -> ../../v0.2/annotation/part02
|
374
|
+
│ └── part03
|
375
|
+
│ ├── regions01.json -> ../../../v0.2/annotation/part03/regions01.json
|
376
|
+
│ ├── regions02.json -> ../../../v0.2/annotation/part03/regions02.json
|
377
|
+
│ ├── regions03.json -> ../../../v0.2/annotation/part03/regions03.json
|
378
|
+
│ ├── regions04.json -> ../../../v0.2/annotation/part03/regions04.json
|
379
|
+
│ ├── regions05.json
|
380
|
+
│ ├── regions06.json -> ../../../v0.2/annotation/part03/regions06.json
|
381
|
+
│ └── regions07.json -> ../../../v0.2/annotation/part03/regions07.json
|
382
|
+
├── data -> ../v0.2/data
|
383
|
+
└── readme.txt
|
384
|
+
|
385
|
+
26 directories, 54 file
|
386
|
+
```
|
387
|
+
|
388
|
+
### Remove Files
|
389
|
+
Files or directories can be removed from the dataset using `dbm change` command with key `--remove`:
|
390
|
+
```shell
|
391
|
+
bdm change --remove annotation/part01/regions05.json --remove annotation/part01/regions04.json -c -m "remove obsolete data" testdata
|
392
|
+
Version v0.4 of dataset has been created.
|
393
|
+
Files added: 0, updated: 0, removed: 2, symlinked: 8
|
394
|
+
|
395
|
+
```
|
396
|
+
### Combining Operations
|
397
|
+
Adding, updating, and removing operations can be freely combined within a single dataset version. Use `bdm change -h` command to get detailed information on available keys and options:
|
398
|
+
```shell
|
399
|
+
bdm change -h
|
400
|
+
```
|
401
|
+
|
402
|
+
## License
|
403
|
+
See `LICENSE` file in the repo.
|
404
|
+
|
405
|
+
|
@@ -0,0 +1,9 @@
|
|
1
|
+
bdm_tool-0.2.dist-info/licenses/LICENSE,sha256=UNFulPGxQ4Kx1YUaup-ihKgoBue4hSf15VdsarGuxw8,1071
|
2
|
+
bdmtool/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
+
bdmtool/_command.py,sha256=ombC_xgcnYzojAjuZQDrw8zgPg3DgxFE4-2L_v5l9Mw,4912
|
4
|
+
bdmtool/files_management.py,sha256=-KLbLK7ayW3BtxVPFdGXsFXTfYrW9HbAtlSCkdhh8Yk,16207
|
5
|
+
bdm_tool-0.2.dist-info/METADATA,sha256=aozbWhoNPsN_0PKfrjjztP9Q1Yp14nCWpRY0ZJqIqNk,16412
|
6
|
+
bdm_tool-0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
7
|
+
bdm_tool-0.2.dist-info/entry_points.txt,sha256=o_0brtPDEXNoP4kr128AYUVl_8jeF1-eKOsw6Db32sw,49
|
8
|
+
bdm_tool-0.2.dist-info/top_level.txt,sha256=X6hV9SLhzm-mX4ZoYaxsVbG0QQ59oyNK1wxgv1_NrB0,8
|
9
|
+
bdm_tool-0.2.dist-info/RECORD,,
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2023 Andrei Khobnia
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1 @@
|
|
1
|
+
bdmtool
|
bdmtool/__init__.py
ADDED
File without changes
|
bdmtool/_command.py
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
"""
|
2
|
+
Command line parsing module of BDM tool.
|
3
|
+
"""
|
4
|
+
import argparse
|
5
|
+
from .files_management import init_dataset, make_new_dataset_version
|
6
|
+
|
7
|
+
|
8
|
+
class AppendWithSubpathAction(argparse.Action):
|
9
|
+
"""
|
10
|
+
Argparse action for appending to the list with subpath.
|
11
|
+
"""
|
12
|
+
def __call__(self, parser_name, namespace, values, option_string=None):
|
13
|
+
current_paths = getattr(namespace, self.dest) or []
|
14
|
+
paths = values.split(':', 1)
|
15
|
+
value = (paths[0], paths[1]) if len(paths) > 1 else (paths[0], "")
|
16
|
+
current_paths.append(value)
|
17
|
+
setattr(namespace, self.dest, current_paths)
|
18
|
+
|
19
|
+
|
20
|
+
parser = argparse.ArgumentParser(
|
21
|
+
prog='bdm',
|
22
|
+
description='Big Datasets version Management tool')
|
23
|
+
subparsers = parser.add_subparsers(dest='command', title="Commands")
|
24
|
+
|
25
|
+
# create the parser for the "init" command
|
26
|
+
parser_init = subparsers.add_parser('init', help='start versioning a dataset using BDM tool')
|
27
|
+
parser_init.add_argument('dataset_path', type=str, action='store', help='path to a dataset root')
|
28
|
+
parser_init.add_argument('-n', '--number', action='store', metavar='dataset_version_number',
|
29
|
+
help='custom number of initial dataset version')
|
30
|
+
|
31
|
+
# create the parser for the "b" command
|
32
|
+
parser_change = subparsers.add_parser('change', help='make a new version of a dataset')
|
33
|
+
parser_change.add_argument('dataset_path', type=str, action='store',
|
34
|
+
help='path to a dataset root directory')
|
35
|
+
parser_change.add_argument('-a', '--add', type=str, action=AppendWithSubpathAction,
|
36
|
+
metavar='file_path[:target_subpath]',
|
37
|
+
help='add file from `file_path` to `target_subpath` or ' \
|
38
|
+
'root of a dataset')
|
39
|
+
parser_change.add_argument('-al', '--add_all', type=str,
|
40
|
+
action=AppendWithSubpathAction, metavar='dir_path[:target_subpath]',
|
41
|
+
help='add all files from `dir_path` to `target_subpath` or ' \
|
42
|
+
'root of a dataset')
|
43
|
+
parser_change.add_argument('-u', '--update', type=str, action=AppendWithSubpathAction,
|
44
|
+
metavar='file_path[:target_subpath]',
|
45
|
+
help='update file in `target_subpath` or root of a dataset ' \
|
46
|
+
'by file from `file_path`')
|
47
|
+
parser_change.add_argument('-ua', '--update_all', '-ua', type=str, action=AppendWithSubpathAction,
|
48
|
+
metavar='dir_path[:target_subpath]',
|
49
|
+
help='update files in `target_subpath` or root of a dataset ' \
|
50
|
+
'by all files from `dir_path`')
|
51
|
+
parser_change.add_argument('-r', '--remove', type=str, action='append', metavar='file_subpath',
|
52
|
+
help='remove file `file_subpath` from a dataset')
|
53
|
+
parser_change.add_argument('-m', '--message', action='store', metavar='text',
|
54
|
+
help='optional message for new dataset version README file')
|
55
|
+
parser_change.add_argument('-c', '--copy_files', action='store_true',
|
56
|
+
help='make copy of files instead of moving when update or ' \
|
57
|
+
'add files to dataset ')
|
58
|
+
parser_change.add_argument('-n', '--number', action='store', metavar='dataset_version_number',
|
59
|
+
help='custom number of new dataset version')
|
60
|
+
parser_change.add_argument('-im', '--increase_major_version', action='store_true',
|
61
|
+
help='if custom version is not defined increase major number of version')
|
62
|
+
|
63
|
+
|
64
|
+
def print_results(results):
|
65
|
+
"""
|
66
|
+
Print statistics on dataset version changes.
|
67
|
+
"""
|
68
|
+
print(f"Version {results['version']} of dataset has been created.")
|
69
|
+
print(f"Files added: {len(results['added'])}, updated: {len(results['updated'])}, " +
|
70
|
+
f"removed: {len(results['removed'])}, symlinked: {len(results['symlinks'])}\n\n")
|
71
|
+
|
72
|
+
|
73
|
+
def run_bdm():
|
74
|
+
"""
|
75
|
+
Entry point function for `bdm` command.
|
76
|
+
"""
|
77
|
+
args = parser.parse_args()
|
78
|
+
if args.command == 'init':
|
79
|
+
results = init_dataset(args.dataset_path, args.number)
|
80
|
+
print_results(results)
|
81
|
+
elif args.command == "change":
|
82
|
+
try:
|
83
|
+
results = make_new_dataset_version(
|
84
|
+
dataset_root_path=args.dataset_path,
|
85
|
+
changes={
|
86
|
+
'add': args.add,
|
87
|
+
'add_all': args.add_all,
|
88
|
+
'update': args.update,
|
89
|
+
'update_all': args.update_all,
|
90
|
+
'remove': args.remove,
|
91
|
+
},
|
92
|
+
new_version=args.number,
|
93
|
+
increase_major=args.increase_major_version,
|
94
|
+
copy_files=args.copy_files,
|
95
|
+
message=args.message)
|
96
|
+
print_results(results)
|
97
|
+
except Exception as e:
|
98
|
+
print(e)
|
99
|
+
else:
|
100
|
+
print("Invalid command! Try `bdm --help` to get help.")
|
101
|
+
|
102
|
+
|
103
|
+
if __name__ == "__main__":
|
104
|
+
run_bdm()
|
@@ -0,0 +1,444 @@
|
|
1
|
+
"""
|
2
|
+
Dataset files managements Python API module.
|
3
|
+
"""
|
4
|
+
from typing import Dict, List, Set
|
5
|
+
import os
|
6
|
+
import re
|
7
|
+
import stat
|
8
|
+
import shutil
|
9
|
+
import datetime
|
10
|
+
import uuid
|
11
|
+
|
12
|
+
README_FILENAME = "readme.txt"
|
13
|
+
|
14
|
+
|
15
|
+
class UserMistakeException(Exception):
|
16
|
+
""" Exception class for user's errors. """
|
17
|
+
|
18
|
+
|
19
|
+
def init_dataset(
|
20
|
+
dataset_root_path: str,
|
21
|
+
init_version: str = None):
|
22
|
+
"""
|
23
|
+
Init version tracking for a dataset.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
dataset_root_path: root path of a dataset directory
|
27
|
+
init_version: optional initial dataset version string (for example, `v1.0`)
|
28
|
+
|
29
|
+
Returns:
|
30
|
+
Dict: Dictionary with statistics on added, removed, updated and linked files
|
31
|
+
"""
|
32
|
+
# check dataset path
|
33
|
+
if not os.path.isdir(dataset_root_path):
|
34
|
+
raise UserMistakeException("Dataset does not exist in provided path!")
|
35
|
+
|
36
|
+
if not init_version:
|
37
|
+
init_version = "v0.1"
|
38
|
+
|
39
|
+
to_move = []
|
40
|
+
for filename in os.listdir(dataset_root_path):
|
41
|
+
to_move.append(_format_path(filename, os.path.isdir(
|
42
|
+
os.path.join(dataset_root_path, filename))))
|
43
|
+
|
44
|
+
temp_dir = uuid.uuid4().hex
|
45
|
+
temp_version_path = os.path.join(dataset_root_path, temp_dir)
|
46
|
+
os.mkdir(temp_version_path)
|
47
|
+
|
48
|
+
# move all content to new dir
|
49
|
+
for filename in to_move:
|
50
|
+
shutil.move(
|
51
|
+
os.path.join(dataset_root_path, filename),
|
52
|
+
os.path.join(temp_version_path, filename))
|
53
|
+
|
54
|
+
# rename dir to version number
|
55
|
+
new_version_path = os.path.join(dataset_root_path, init_version)
|
56
|
+
shutil.move(temp_version_path, new_version_path)
|
57
|
+
|
58
|
+
results = {
|
59
|
+
"added": to_move,
|
60
|
+
"updated": [],
|
61
|
+
"removed": [],
|
62
|
+
"symlinks": [],
|
63
|
+
"version": init_version}
|
64
|
+
_make_version_readme(new_version_path, init_version, None, results, None)
|
65
|
+
_make_readonly(new_version_path)
|
66
|
+
_make_symlink_for_current_version(dataset_root_path, init_version)
|
67
|
+
|
68
|
+
return results
|
69
|
+
|
70
|
+
|
71
|
+
def make_new_dataset_version(
|
72
|
+
dataset_root_path: str,
|
73
|
+
changes: dict,
|
74
|
+
new_version: str = None,
|
75
|
+
increase_major: bool = False,
|
76
|
+
copy_files: bool = False,
|
77
|
+
message: str = None) -> Dict:
|
78
|
+
"""
|
79
|
+
Make new version of dataset by applying changes.
|
80
|
+
|
81
|
+
Args:
|
82
|
+
dataset_root_path: Root path of a dataset directory
|
83
|
+
changes: Dictinary with list fields "add", "add_all", "update",
|
84
|
+
"update_all", "remove" representing changes need to be applied to a dataset.
|
85
|
+
new_version: Optional new version string (for example, `v2.0`)
|
86
|
+
increase_major: Increase major version
|
87
|
+
copy_files: Copy new files to add or update dataset instead of move
|
88
|
+
message: Optional comment for `readme.txt` file
|
89
|
+
|
90
|
+
Returns:
|
91
|
+
Dict: Dictionary with statistics on added, removed, updated and linked files
|
92
|
+
"""
|
93
|
+
# check dataset path
|
94
|
+
if not os.path.isdir(dataset_root_path):
|
95
|
+
raise UserMistakeException("Dataset does not exist in provided path!")
|
96
|
+
|
97
|
+
# get the latest version
|
98
|
+
prev_version = _get_versions_list(dataset_root_path)[-1]
|
99
|
+
prev_version_path = os.path.join(os.path.abspath(dataset_root_path), prev_version)
|
100
|
+
# obtain new version number
|
101
|
+
if new_version is None:
|
102
|
+
new_version = _gen_next_version_number(prev_version, increase_major=increase_major)
|
103
|
+
if not _validate_next_version_number(prev_version, new_version):
|
104
|
+
raise UserMistakeException(
|
105
|
+
"New version number must be greater then the latest version number!")
|
106
|
+
|
107
|
+
# validate changes before applying
|
108
|
+
if "add" in changes and changes["add"] is not None:
|
109
|
+
for source_path, target_subpath in changes["add"]:
|
110
|
+
_validate_add(source_path, target_subpath)
|
111
|
+
if "add_all" in changes and changes["add_all"] is not None:
|
112
|
+
for source_path, target_subpath in changes["add_all"]:
|
113
|
+
_validate_add_all(source_path, target_subpath)
|
114
|
+
if "update" in changes and changes["update"] is not None:
|
115
|
+
for source_path, target_subpath in changes["update"]:
|
116
|
+
_validate_update(source_path, target_subpath, prev_version_path, prev_version)
|
117
|
+
if "update_all" in changes and changes["update_all"] is not None:
|
118
|
+
for source_path, target_subpath in changes["update_all"]:
|
119
|
+
_validate_update_all(
|
120
|
+
source_path,
|
121
|
+
target_subpath,
|
122
|
+
prev_version_path,
|
123
|
+
prev_version)
|
124
|
+
if "remove" in changes and changes["remove"] is not None:
|
125
|
+
for source_path in changes["remove"]:
|
126
|
+
_validate_remove(source_path, prev_version_path, prev_version)
|
127
|
+
|
128
|
+
# create new version directory
|
129
|
+
new_version_path = os.path.join(os.path.abspath(dataset_root_path), new_version)
|
130
|
+
os.mkdir(new_version_path)
|
131
|
+
|
132
|
+
results_stat = {
|
133
|
+
"added": [],
|
134
|
+
"updated": [],
|
135
|
+
"removed": [],
|
136
|
+
"symlinks": [],
|
137
|
+
"version": new_version}
|
138
|
+
# add new files
|
139
|
+
if "add" in changes and changes["add"] is not None:
|
140
|
+
for source_path, target_subpath in changes["add"]:
|
141
|
+
_apply_add(
|
142
|
+
source_path,
|
143
|
+
target_subpath,
|
144
|
+
new_version_path,
|
145
|
+
copy_files,
|
146
|
+
results_stat)
|
147
|
+
if "add_all" in changes and changes["add_all"] is not None:
|
148
|
+
for source_path, target_subpath in changes["add_all"]:
|
149
|
+
_apply_add_all(
|
150
|
+
source_path,
|
151
|
+
target_subpath,
|
152
|
+
new_version_path,
|
153
|
+
copy_files,
|
154
|
+
results_stat)
|
155
|
+
# update files
|
156
|
+
if "update" in changes and changes["update"] is not None:
|
157
|
+
for source_path, target_subpath in changes["update"]:
|
158
|
+
_apply_update(source_path, target_subpath, new_version_path, copy_files, results_stat)
|
159
|
+
if "update_all" in changes and changes["update_all"] is not None:
|
160
|
+
for source_path, target_subpath in changes["update_all"]:
|
161
|
+
_apply_update_all(
|
162
|
+
source_path,
|
163
|
+
target_subpath,
|
164
|
+
new_version_path,
|
165
|
+
copy_files,
|
166
|
+
results_stat)
|
167
|
+
# create subdirs for removing files
|
168
|
+
exclude_links = [os.path.join(prev_version_path, README_FILENAME)]
|
169
|
+
if "remove" in changes and changes["remove"] is not None:
|
170
|
+
for source_path in changes["remove"]:
|
171
|
+
_apply_remove(
|
172
|
+
source_path,
|
173
|
+
prev_version_path,
|
174
|
+
new_version_path,
|
175
|
+
exclude_links,
|
176
|
+
results_stat)
|
177
|
+
|
178
|
+
# create symlinks or remove files
|
179
|
+
results_stat["symlinks"] = _make_symlinks(prev_version_path, new_version_path, set(exclude_links))
|
180
|
+
|
181
|
+
_make_version_readme(new_version_path, new_version, prev_version, results_stat, message)
|
182
|
+
_make_readonly(new_version_path)
|
183
|
+
_make_symlink_for_current_version(dataset_root_path, new_version)
|
184
|
+
|
185
|
+
return results_stat
|
186
|
+
|
187
|
+
|
188
|
+
def _validate_add(source_path: str, target_subpath: str):
|
189
|
+
_check_source_file_existence(source_path)
|
190
|
+
_validate_subpath(target_subpath)
|
191
|
+
|
192
|
+
|
193
|
+
def _validate_add_all(source_path: str, target_subpath: str):
|
194
|
+
_check_source_dir_existence(source_path)
|
195
|
+
_validate_subpath(target_subpath)
|
196
|
+
|
197
|
+
|
198
|
+
def _validate_update(
|
199
|
+
source_path: str, target_subpath: str, prev_version_path: str, prev_version: str):
|
200
|
+
_check_source_file_existence(source_path)
|
201
|
+
_validate_subpath(target_subpath)
|
202
|
+
prev_file_path = os.path.join(
|
203
|
+
prev_version_path, target_subpath, source_path.split(os.path.sep)[-1])
|
204
|
+
_check_path_existence(prev_file_path, prev_version)
|
205
|
+
|
206
|
+
|
207
|
+
def _validate_update_all(
|
208
|
+
source_path: str, target_subpath: str, prev_version_path: str, prev_version: str):
|
209
|
+
_check_source_dir_existence(source_path)
|
210
|
+
_validate_subpath(target_subpath)
|
211
|
+
for entry in list(os.scandir(source_path)):
|
212
|
+
prev_file_path = os.path.join(prev_version_path, target_subpath, entry.name)
|
213
|
+
_check_path_existence(prev_file_path, prev_version)
|
214
|
+
|
215
|
+
|
216
|
+
def _validate_remove(source_path: str, prev_version_path: str, prev_version: str):
|
217
|
+
_validate_subpath(source_path)
|
218
|
+
prev_file_path = os.path.join(prev_version_path, source_path)
|
219
|
+
_check_path_existence(prev_file_path, prev_version)
|
220
|
+
|
221
|
+
|
222
|
+
def _apply_add(
|
223
|
+
source_path: str,
|
224
|
+
target_subpath: str,
|
225
|
+
new_version_path: str,
|
226
|
+
copy_files: bool,
|
227
|
+
results_stat: Dict):
|
228
|
+
_add_file_to_subpath(
|
229
|
+
new_version_path, os.path.abspath(source_path), target_subpath, copy_files)
|
230
|
+
results_stat["added"].append(
|
231
|
+
_format_path(os.path.join(
|
232
|
+
target_subpath, os.path.split(source_path)[-1]), os.path.isdir(source_path)))
|
233
|
+
|
234
|
+
|
235
|
+
def _apply_add_all(
|
236
|
+
source_path: str,
|
237
|
+
target_subpath: str,
|
238
|
+
new_version_path: str,
|
239
|
+
copy_files: bool,
|
240
|
+
results_stat: Dict):
|
241
|
+
for entry in list(os.scandir(source_path)):
|
242
|
+
entry_source_path = os.path.join(os.path.abspath(source_path), entry.name)
|
243
|
+
_add_file_to_subpath(
|
244
|
+
new_version_path, entry_source_path, target_subpath, copy_files)
|
245
|
+
results_stat["added"].append(
|
246
|
+
_format_path(
|
247
|
+
os.path.join(target_subpath, entry.name),
|
248
|
+
os.path.isdir(entry_source_path)))
|
249
|
+
|
250
|
+
|
251
|
+
def _apply_update(
|
252
|
+
source_path: str,
|
253
|
+
target_subpath: str,
|
254
|
+
new_version_path: str,
|
255
|
+
copy_files: bool,
|
256
|
+
results_stat: Dict):
|
257
|
+
_add_file_to_subpath(new_version_path, source_path, target_subpath, copy_files)
|
258
|
+
results_stat["updated"].append(
|
259
|
+
_format_path(
|
260
|
+
os.path.join(target_subpath, source_path.split(os.path.sep)[-1]),
|
261
|
+
os.path.isdir(source_path)))
|
262
|
+
|
263
|
+
|
264
|
+
def _apply_update_all(
|
265
|
+
source_path: str,
|
266
|
+
target_subpath: str,
|
267
|
+
new_version_path: str,
|
268
|
+
copy_files: bool,
|
269
|
+
results_stat: Dict):
|
270
|
+
for entry in list(os.scandir(source_path)):
|
271
|
+
entry_source_path = os.path.join(source_path, entry.name)
|
272
|
+
_add_file_to_subpath(
|
273
|
+
new_version_path, entry_source_path, target_subpath, copy_files)
|
274
|
+
results_stat["updated"].append(
|
275
|
+
_format_path(
|
276
|
+
os.path.join(target_subpath, entry.name),
|
277
|
+
os.path.isdir(entry_source_path)))
|
278
|
+
|
279
|
+
|
280
|
+
def _apply_remove(
|
281
|
+
source_path: str,
|
282
|
+
prev_version_path: str,
|
283
|
+
new_version_path: str,
|
284
|
+
exclude_links: List,
|
285
|
+
results_stat: Dict):
|
286
|
+
prev_file_path = os.path.join(prev_version_path, source_path)
|
287
|
+
os.makedirs(
|
288
|
+
os.path.join(new_version_path, *os.path.split(source_path)[:-1]), exist_ok=True)
|
289
|
+
results_stat["removed"].append(_format_path(source_path, os.path.isdir(prev_file_path)))
|
290
|
+
exclude_links.append(prev_file_path)
|
291
|
+
|
292
|
+
|
293
|
+
def _check_source_file_existence(file_path: str):
|
294
|
+
if not os.path.exists(file_path):
|
295
|
+
raise UserMistakeException(
|
296
|
+
f"File {file_path} doesn't exist")
|
297
|
+
|
298
|
+
|
299
|
+
def _check_source_dir_existence(file_path: str):
|
300
|
+
if not os.path.exists(file_path) or not os.path.isdir(file_path):
|
301
|
+
raise UserMistakeException(
|
302
|
+
f"Directory {file_path} doesn't exist")
|
303
|
+
|
304
|
+
|
305
|
+
def _check_path_existence(file_path: str, prev_version: str):
|
306
|
+
if not os.path.exists(file_path):
|
307
|
+
raise UserMistakeException(
|
308
|
+
f"File {file_path} doesn't exist in previous version {prev_version}")
|
309
|
+
|
310
|
+
|
311
|
+
def _format_path(file_path: str, is_dir: bool):
|
312
|
+
if is_dir and file_path[-1] != os.sep:
|
313
|
+
return file_path + os.sep
|
314
|
+
return file_path
|
315
|
+
|
316
|
+
|
317
|
+
def _validate_subpath(subpath: str):
|
318
|
+
if len(subpath) > 0 and subpath[0] in "./\\":
|
319
|
+
raise UserMistakeException(
|
320
|
+
f"Relative path {subpath} is invalid. Please, "\
|
321
|
+
"don't start relative path with '/' or '.'")
|
322
|
+
|
323
|
+
|
324
|
+
def _get_versions_list(dataset_root_path: str):
|
325
|
+
return sorted([item.name for item in os.scandir(dataset_root_path) if item.is_dir() and
|
326
|
+
item.name != "current" and item.name != "readme"])
|
327
|
+
|
328
|
+
|
329
|
+
def _add_file_to_subpath(
|
330
|
+
root_path: str,
|
331
|
+
source_path: str,
|
332
|
+
target_subpath: str,
|
333
|
+
copy_files: bool=True):
|
334
|
+
if target_subpath and len(target_subpath) > 0:
|
335
|
+
os.makedirs(os.path.join(root_path, target_subpath), exist_ok=True)
|
336
|
+
if os.path.isdir(source_path):
|
337
|
+
# recursively add folder
|
338
|
+
if copy_files:
|
339
|
+
shutil.copytree(source_path, os.path.join(root_path, target_subpath))
|
340
|
+
else:
|
341
|
+
shutil.move(source_path, os.path.join(root_path, target_subpath))
|
342
|
+
else:
|
343
|
+
# add file
|
344
|
+
if copy_files:
|
345
|
+
shutil.copy2(source_path, os.path.join(root_path, target_subpath))
|
346
|
+
else:
|
347
|
+
shutil.move(source_path, os.path.join(root_path, target_subpath))
|
348
|
+
|
349
|
+
|
350
|
+
def _make_readonly(path: str):
|
351
|
+
def chmod_operation(root_path, name):
|
352
|
+
os.chmod(
|
353
|
+
os.path.join(root_path, name), stat.S_IREAD | stat.S_IRGRP | stat.S_IROTH)
|
354
|
+
def chmod_dir_operation(root_path, name):
|
355
|
+
os.chmod(
|
356
|
+
os.path.join(root_path, name), (stat.S_IREAD | stat.S_IXUSR | stat.S_IRGRP |
|
357
|
+
stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH))
|
358
|
+
for root, dirs, files in os.walk(path, topdown=False):
|
359
|
+
for filename in files:
|
360
|
+
chmod_operation(root, filename)
|
361
|
+
for filename in dirs:
|
362
|
+
chmod_dir_operation(root, filename)
|
363
|
+
chmod_dir_operation(path, "")
|
364
|
+
|
365
|
+
|
366
|
+
def _make_symlinks(source_path: str, target_path: str, exclude: Set):
|
367
|
+
symlinks = []
|
368
|
+
for filename in os.listdir(source_path):
|
369
|
+
source_file_path = os.path.join(source_path, filename)
|
370
|
+
if source_file_path in exclude:
|
371
|
+
continue
|
372
|
+
symlink_path = os.path.join(target_path, filename)
|
373
|
+
if os.path.exists(symlink_path):
|
374
|
+
if os.path.isdir(symlink_path):
|
375
|
+
# create symlinks inside directory recursively
|
376
|
+
sublinks = _make_symlinks(
|
377
|
+
os.path.join(source_path, filename),
|
378
|
+
symlink_path,
|
379
|
+
exclude)
|
380
|
+
symlinks.extend(sublinks)
|
381
|
+
else:
|
382
|
+
os.symlink(os.path.relpath(source_file_path, start=target_path), symlink_path)
|
383
|
+
symlinks.append(symlink_path)
|
384
|
+
return symlinks
|
385
|
+
|
386
|
+
|
387
|
+
def _write_stats(stats, key, out):
|
388
|
+
if len(stats[key]) > 0:
|
389
|
+
out.write(f"Files {key}:\n")
|
390
|
+
for filename in stats[key]:
|
391
|
+
out.write(filename)
|
392
|
+
out.write("\n")
|
393
|
+
out.write("\n")
|
394
|
+
|
395
|
+
|
396
|
+
def _make_version_readme(
|
397
|
+
new_version_path: str,
|
398
|
+
new_version: str,
|
399
|
+
old_version: str,
|
400
|
+
stats: Dict,
|
401
|
+
message: str):
|
402
|
+
with open(os.path.join(new_version_path, README_FILENAME), "w", encoding="utf-8") as out:
|
403
|
+
if old_version:
|
404
|
+
out.write(f"Dataset version {new_version} has been created from "\
|
405
|
+
f"previous version {old_version}!\n")
|
406
|
+
else:
|
407
|
+
out.write(f"Dataset version {new_version} has been created!\n")
|
408
|
+
if message:
|
409
|
+
out.write(message)
|
410
|
+
if len(message) >= 1 and message[-1] != '\n':
|
411
|
+
out.write("\n")
|
412
|
+
out.write(f"Created timestamp: {str(datetime.datetime.now())}, OS user: {os.getlogin()}\n")
|
413
|
+
out.write("Files added: %d, updated: %d, removed: %d, symlinked: %d\n\n" % (
|
414
|
+
len(stats["added"]),
|
415
|
+
len(stats["updated"]),
|
416
|
+
len(stats["removed"]),
|
417
|
+
len(stats["symlinks"])))
|
418
|
+
_write_stats(stats, "added", out)
|
419
|
+
_write_stats(stats, "updated", out)
|
420
|
+
_write_stats(stats, "removed", out)
|
421
|
+
|
422
|
+
|
423
|
+
def _make_symlink_for_current_version(dataset_root_path: str, new_version: str):
|
424
|
+
symlink_path = os.path.join(dataset_root_path, "current")
|
425
|
+
if os.path.exists(symlink_path):
|
426
|
+
os.unlink(symlink_path)
|
427
|
+
os.symlink(os.path.join(".", new_version), symlink_path)
|
428
|
+
|
429
|
+
|
430
|
+
def _increase_version(version: str) -> str:
|
431
|
+
return str(list(map(int, re.findall(r'\d+', version)))[-1] + 1)
|
432
|
+
|
433
|
+
|
434
|
+
def _gen_next_version_number(prev_version: str, increase_major: bool=False):
|
435
|
+
version_numbers = prev_version.split('.')
|
436
|
+
if increase_major:
|
437
|
+
version_numbers[0] = _increase_version(version_numbers[0])
|
438
|
+
else:
|
439
|
+
version_numbers[-1] = _increase_version(version_numbers[-1])
|
440
|
+
return '.'.join(version_numbers)
|
441
|
+
|
442
|
+
|
443
|
+
def _validate_next_version_number(prev_version: str, next_version: str):
|
444
|
+
return prev_version < next_version
|