omdev 0.0.0.dev24__py3-none-any.whl → 0.0.0.dev26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of omdev might be problematic. Click here for more details.
- omdev/_manifests.json +1 -0
- omdev/amalg/amalg.py +60 -23
- omdev/cexts/_boilerplate.cc +82 -0
- omdev/cexts/_distutils/LICENSE +12 -0
- omdev/cexts/build.py +62 -18
- omdev/cexts/importhook.py +1 -1
- omdev/datacache/__init__.py +19 -0
- omdev/datacache/cache.py +149 -0
- omdev/datacache/consts.py +1 -0
- omdev/datacache/default.py +51 -0
- omdev/datacache/manifests.py +40 -0
- omdev/datacache/specs.py +93 -0
- omdev/findmagic.py +7 -0
- omdev/git.py +62 -0
- omdev/manifests.py +247 -0
- omdev/pyproject/pkg.py +35 -10
- omdev/scripts/interp.py +42 -37
- omdev/scripts/pyproject.py +82 -54
- omdev/tools/dockertools.py +102 -0
- {omdev-0.0.0.dev24.dist-info → omdev-0.0.0.dev26.dist-info}/METADATA +2 -2
- {omdev-0.0.0.dev24.dist-info → omdev-0.0.0.dev26.dist-info}/RECORD +24 -13
- {omdev-0.0.0.dev24.dist-info → omdev-0.0.0.dev26.dist-info}/WHEEL +1 -1
- {omdev-0.0.0.dev24.dist-info → omdev-0.0.0.dev26.dist-info}/LICENSE +0 -0
- {omdev-0.0.0.dev24.dist-info → omdev-0.0.0.dev26.dist-info}/top_level.txt +0 -0
omdev/_manifests.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
[]
|
omdev/amalg/amalg.py
CHANGED
|
@@ -29,13 +29,16 @@ import io
|
|
|
29
29
|
import logging
|
|
30
30
|
import os.path
|
|
31
31
|
import re
|
|
32
|
+
import textwrap
|
|
32
33
|
import typing as ta
|
|
33
34
|
|
|
34
35
|
import tokenize_rt as trt
|
|
35
36
|
|
|
36
37
|
from omlish import check
|
|
37
38
|
from omlish import collections as col
|
|
39
|
+
from omlish import lang
|
|
38
40
|
from omlish import logs
|
|
41
|
+
from omlish.lite.runtime import REQUIRED_PYTHON_VERSION
|
|
39
42
|
|
|
40
43
|
from .. import findmagic
|
|
41
44
|
from .. import tokens as tks
|
|
@@ -91,6 +94,7 @@ def strip_main_lines(cls: ta.Sequence[Tokens]) -> list[Tokens]:
|
|
|
91
94
|
|
|
92
95
|
STRIPPED_HEADER_MAGICS = [
|
|
93
96
|
'# @omlish-lite',
|
|
97
|
+
'# @omlish-script',
|
|
94
98
|
]
|
|
95
99
|
|
|
96
100
|
STRIPPED_HEADER_PATS = [findmagic.compile_magic_pat(m) for m in STRIPPED_HEADER_MAGICS]
|
|
@@ -268,6 +272,8 @@ class SrcFile:
|
|
|
268
272
|
typings: ta.Sequence[Typing] = dc.field(repr=False)
|
|
269
273
|
content_lines: ta.Sequence[Tokens] = dc.field(repr=False)
|
|
270
274
|
|
|
275
|
+
ruff_noqa: ta.AbstractSet[str] = dc.field(repr=False)
|
|
276
|
+
|
|
271
277
|
|
|
272
278
|
def make_src_file(
|
|
273
279
|
path: str,
|
|
@@ -283,6 +289,7 @@ def make_src_file(
|
|
|
283
289
|
hls, cls = split_header_lines(lines)
|
|
284
290
|
|
|
285
291
|
hls = strip_header_lines(hls)
|
|
292
|
+
rnls, hls = col.partition(hls, lambda l: tks.join_toks(l).startswith('# ruff: noqa: '))
|
|
286
293
|
|
|
287
294
|
imps: list[Import] = []
|
|
288
295
|
tys: list[Typing] = []
|
|
@@ -316,6 +323,8 @@ def make_src_file(
|
|
|
316
323
|
imports=imps,
|
|
317
324
|
typings=tys,
|
|
318
325
|
content_lines=ctls,
|
|
326
|
+
|
|
327
|
+
ruff_noqa=set(lang.flatten(tks.join_toks(l).strip().split()[3:] for l in rnls)), # noqa
|
|
319
328
|
)
|
|
320
329
|
|
|
321
330
|
|
|
@@ -324,10 +333,11 @@ def make_src_file(
|
|
|
324
333
|
|
|
325
334
|
SECTION_SEP = '#' * 40 + '\n'
|
|
326
335
|
|
|
327
|
-
RUFF_DISABLES: ta.
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
336
|
+
RUFF_DISABLES: ta.AbstractSet[str] = {
|
|
337
|
+
'UP006', # non-pep585-annotation
|
|
338
|
+
'UP007', # non-pep604-annotation
|
|
339
|
+
'UP036', # outdated-version-block
|
|
340
|
+
}
|
|
331
341
|
|
|
332
342
|
OUTPUT_COMMENT = '# @omdev-amalg-output '
|
|
333
343
|
SCAN_COMMENT = '# @omdev-amalg '
|
|
@@ -362,46 +372,70 @@ def gen_amalg(
|
|
|
362
372
|
|
|
363
373
|
##
|
|
364
374
|
|
|
375
|
+
hls = []
|
|
376
|
+
|
|
365
377
|
mf = src_files[main_path]
|
|
366
378
|
if mf.header_lines:
|
|
367
|
-
hls
|
|
379
|
+
hls.extend([
|
|
368
380
|
hl
|
|
369
381
|
for hlts in mf.header_lines
|
|
370
382
|
if not (hl := tks.join_toks(hlts)).startswith(SCAN_COMMENT)
|
|
371
|
-
]
|
|
372
|
-
if output_dir is not None:
|
|
373
|
-
ogf = os.path.relpath(main_path, output_dir)
|
|
374
|
-
else:
|
|
375
|
-
ogf = os.path.basename(main_path)
|
|
376
|
-
nhls = []
|
|
377
|
-
nhls.extend([
|
|
378
|
-
'#!/usr/bin/env python3\n',
|
|
379
|
-
'# noinspection DuplicatedCode\n',
|
|
380
|
-
'# @omlish-lite\n',
|
|
381
|
-
'# @omlish-script\n',
|
|
382
|
-
f'{OUTPUT_COMMENT.strip()} {ogf}\n',
|
|
383
383
|
])
|
|
384
|
-
hls = [*nhls, *hls]
|
|
385
|
-
out.write(''.join(hls))
|
|
386
384
|
|
|
387
|
-
if
|
|
388
|
-
|
|
385
|
+
if output_dir is not None:
|
|
386
|
+
ogf = os.path.relpath(main_path, output_dir)
|
|
387
|
+
else:
|
|
388
|
+
ogf = os.path.basename(main_path)
|
|
389
|
+
|
|
390
|
+
nhls = []
|
|
391
|
+
nhls.extend([
|
|
392
|
+
'#!/usr/bin/env python3\n',
|
|
393
|
+
'# noinspection DuplicatedCode\n',
|
|
394
|
+
'# @omlish-lite\n',
|
|
395
|
+
'# @omlish-script\n',
|
|
396
|
+
f'{OUTPUT_COMMENT.strip()} {ogf}\n',
|
|
397
|
+
])
|
|
398
|
+
|
|
399
|
+
ruff_disables = sorted({
|
|
400
|
+
*lang.flatten(f.ruff_noqa for f in src_files.values()),
|
|
401
|
+
*RUFF_DISABLES,
|
|
402
|
+
})
|
|
403
|
+
if ruff_disables:
|
|
404
|
+
nhls.append(f'# ruff: noqa: {" ".join(sorted(ruff_disables))}\n')
|
|
405
|
+
|
|
406
|
+
hls = [*nhls, *hls]
|
|
407
|
+
out.write(''.join(hls))
|
|
389
408
|
|
|
390
409
|
##
|
|
391
410
|
|
|
392
411
|
all_imps = [i for f in src_files.values() for i in f.imports]
|
|
393
412
|
gl_imps = [i for i in all_imps if i.mod_path is None]
|
|
394
413
|
|
|
395
|
-
dct: dict = {
|
|
414
|
+
dct: dict = {
|
|
415
|
+
('sys', None, None): ['import sys\n'],
|
|
416
|
+
}
|
|
396
417
|
for imp in gl_imps:
|
|
397
418
|
dct.setdefault((imp.mod, imp.item, imp.as_), []).append(imp)
|
|
398
419
|
for _, l in sorted(dct.items()):
|
|
399
|
-
|
|
420
|
+
il = l[0]
|
|
421
|
+
out.write(il if isinstance(il, str) else tks.join_toks(il.toks))
|
|
400
422
|
if dct:
|
|
401
423
|
out.write('\n\n')
|
|
402
424
|
|
|
403
425
|
##
|
|
404
426
|
|
|
427
|
+
out.write(SECTION_SEP)
|
|
428
|
+
out.write('\n\n')
|
|
429
|
+
|
|
430
|
+
out.write(textwrap.dedent(f"""
|
|
431
|
+
if sys.version_info < {REQUIRED_PYTHON_VERSION!r}:
|
|
432
|
+
raise OSError(
|
|
433
|
+
f'Requires python {REQUIRED_PYTHON_VERSION!r}, got {{sys.version_info}} from {{sys.executable}}') # noqa
|
|
434
|
+
""").lstrip())
|
|
435
|
+
out.write('\n\n')
|
|
436
|
+
|
|
437
|
+
##
|
|
438
|
+
|
|
405
439
|
ts = list(col.toposort({ # noqa
|
|
406
440
|
f.path: {mp for i in f.imports if (mp := i.mod_path) is not None}
|
|
407
441
|
for f in src_files.values()
|
|
@@ -418,6 +452,9 @@ def gen_amalg(
|
|
|
418
452
|
if ty.src not in tys:
|
|
419
453
|
tyd.setdefault(f.path, []).append(ty)
|
|
420
454
|
tys.add(ty.src)
|
|
455
|
+
if tys:
|
|
456
|
+
out.write(SECTION_SEP)
|
|
457
|
+
out.write('\n\n')
|
|
421
458
|
for i, (sf, ftys) in enumerate(tyd.items()):
|
|
422
459
|
f = src_files[sf]
|
|
423
460
|
if i:
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
// @omdev-cext
|
|
2
|
+
#define PY_SSIZE_T_CLEAN
|
|
3
|
+
#include "Python.h"
|
|
4
|
+
#include "structmember.h"
|
|
5
|
+
|
|
6
|
+
#include <unistd.h>
|
|
7
|
+
|
|
8
|
+
//
|
|
9
|
+
|
|
10
|
+
#define _MODULE_NAME "_boilerplate"
|
|
11
|
+
#define _PACKAGE_NAME "omdev.cexts"
|
|
12
|
+
#define _MODULE_FULL_NAME _PACKAGE_NAME "." _MODULE_NAME
|
|
13
|
+
|
|
14
|
+
typedef struct boilerplate_state {
|
|
15
|
+
} boilerplate_state;
|
|
16
|
+
|
|
17
|
+
static inline boilerplate_state * get_boilerplate_state(PyObject *module)
|
|
18
|
+
{
|
|
19
|
+
void *state = PyModule_GetState(module);
|
|
20
|
+
assert(state != NULL);
|
|
21
|
+
return (boilerplate_state *)state;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
//
|
|
25
|
+
|
|
26
|
+
PyDoc_STRVAR(boilerplate_doc, "boilerplate");
|
|
27
|
+
|
|
28
|
+
static int boilerplate_exec(PyObject *module)
|
|
29
|
+
{
|
|
30
|
+
get_boilerplate_state(module);
|
|
31
|
+
return 0;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
static int boilerplate_traverse(PyObject *module, visitproc visit, void *arg)
|
|
35
|
+
{
|
|
36
|
+
get_boilerplate_state(module);
|
|
37
|
+
return 0;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
static int boilerplate_clear(PyObject *module)
|
|
41
|
+
{
|
|
42
|
+
get_boilerplate_state(module);
|
|
43
|
+
return 0;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
static void boilerplate_free(void *module)
|
|
47
|
+
{
|
|
48
|
+
boilerplate_clear((PyObject *)module);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
static PyMethodDef boilerplate_methods[] = {
|
|
52
|
+
{NULL, NULL, 0, NULL}
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
static struct PyModuleDef_Slot boilerplate_slots[] = {
|
|
56
|
+
{Py_mod_exec, (void *) boilerplate_exec},
|
|
57
|
+
// #if PY_VERSION_HEX >= 0x030D0000
|
|
58
|
+
// {Py_mod_gil, Py_MOD_GIL_NOT_USED},
|
|
59
|
+
// #endif
|
|
60
|
+
{0, NULL}
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
static struct PyModuleDef boilerplate_module = {
|
|
64
|
+
.m_base = PyModuleDef_HEAD_INIT,
|
|
65
|
+
.m_name = _MODULE_NAME,
|
|
66
|
+
.m_doc = boilerplate_doc,
|
|
67
|
+
.m_size = sizeof(boilerplate_state),
|
|
68
|
+
.m_methods = boilerplate_methods,
|
|
69
|
+
.m_slots = boilerplate_slots,
|
|
70
|
+
.m_traverse = boilerplate_traverse,
|
|
71
|
+
.m_clear = boilerplate_clear,
|
|
72
|
+
.m_free = boilerplate_free,
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
extern "C" {
|
|
76
|
+
|
|
77
|
+
PyMODINIT_FUNC PyInit__boilerplate(void)
|
|
78
|
+
{
|
|
79
|
+
return PyModuleDef_Init(&boilerplate_module);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
|
2
|
+
documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
|
|
3
|
+
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
|
|
4
|
+
persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
5
|
+
|
|
6
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
|
|
7
|
+
Software.
|
|
8
|
+
|
|
9
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
|
10
|
+
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
11
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
|
12
|
+
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
omdev/cexts/build.py
CHANGED
|
@@ -1,43 +1,87 @@
|
|
|
1
|
+
import dataclasses as dc
|
|
1
2
|
import os.path
|
|
2
3
|
import sys
|
|
3
4
|
import sysconfig
|
|
5
|
+
import typing as ta
|
|
6
|
+
|
|
7
|
+
from omlish import check
|
|
8
|
+
from omlish import lang
|
|
4
9
|
|
|
5
10
|
from . import _distutils as du
|
|
6
11
|
|
|
7
12
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
)
|
|
13
|
+
CPP_STD = 'c++20'
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dc.dataclass(frozen=True)
|
|
17
|
+
class BuildExt:
|
|
18
|
+
full_name: str
|
|
19
|
+
src_file: str
|
|
20
|
+
|
|
21
|
+
inplace: bool = dc.field(default=True, kw_only=True)
|
|
22
|
+
debug: bool = dc.field(default=True, kw_only=True)
|
|
23
|
+
force: bool = dc.field(default=False, kw_only=True)
|
|
24
|
+
|
|
25
|
+
dry_run: bool = dc.field(default=False, kw_only=True)
|
|
26
|
+
verbose: bool = dc.field(default=False, kw_only=True)
|
|
27
|
+
|
|
28
|
+
extra_src_files: lang.SequenceNotStr[str] | None = dc.field(default=None, kw_only=True)
|
|
29
|
+
include_dirs: lang.SequenceNotStr[str] | None = dc.field(default=None, kw_only=True)
|
|
30
|
+
compile_args: lang.SequenceNotStr[str] | None = dc.field(default=None, kw_only=True)
|
|
31
|
+
link_args: lang.SequenceNotStr[str] | None = dc.field(default=None, kw_only=True)
|
|
32
|
+
define_macros: ta.Sequence[tuple[str, str]] | None = dc.field(default=None, kw_only=True)
|
|
33
|
+
undef_macros: lang.SequenceNotStr[str] | None = dc.field(default=None, kw_only=True)
|
|
34
|
+
|
|
35
|
+
def __post_init__(self) -> None:
|
|
36
|
+
check.not_isinstance(self.compile_args, str)
|
|
37
|
+
check.not_isinstance(self.link_args, str)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def build_ext(ext: BuildExt) -> str:
|
|
12
41
|
extra_link_args: list[str] = []
|
|
13
42
|
if sys.platform == 'darwin':
|
|
14
43
|
extra_link_args.append('-Wl,-no_fixup_chains')
|
|
15
44
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
sources=[
|
|
19
|
-
|
|
45
|
+
du_ext = du.Extension(
|
|
46
|
+
ext.full_name,
|
|
47
|
+
sources=[
|
|
48
|
+
ext.src_file,
|
|
49
|
+
*(ext.extra_src_files or []),
|
|
50
|
+
],
|
|
51
|
+
include_dirs=[
|
|
52
|
+
os.path.dirname(ext.src_file),
|
|
53
|
+
*(ext.include_dirs or []),
|
|
54
|
+
],
|
|
20
55
|
extra_compile_args=[
|
|
21
|
-
*(['-std=
|
|
56
|
+
*([f'-std={CPP_STD}'] if any(ext.src_file.endswith(sf) for sf in ('cc', 'cpp')) else []),
|
|
57
|
+
*(ext.compile_args or []),
|
|
58
|
+
],
|
|
59
|
+
extra_link_args=[
|
|
60
|
+
*extra_link_args,
|
|
61
|
+
*(ext.link_args or []),
|
|
22
62
|
],
|
|
23
|
-
|
|
24
|
-
undef_macros=
|
|
63
|
+
define_macros=(list(ext.define_macros) if ext.define_macros is not None else None),
|
|
64
|
+
undef_macros=(list(ext.undef_macros) if ext.undef_macros is not None else None),
|
|
25
65
|
)
|
|
26
66
|
|
|
27
67
|
cmd_obj = du.BuildExt(du.BuildExt.Options(
|
|
28
|
-
inplace=
|
|
29
|
-
debug=
|
|
68
|
+
inplace=ext.inplace,
|
|
69
|
+
debug=ext.debug,
|
|
70
|
+
force=ext.force,
|
|
71
|
+
|
|
72
|
+
dry_run=ext.dry_run,
|
|
73
|
+
verbose=ext.verbose,
|
|
30
74
|
))
|
|
31
|
-
cmd_obj.build_extension(
|
|
75
|
+
cmd_obj.build_extension(du_ext)
|
|
32
76
|
|
|
33
|
-
|
|
34
|
-
os.path.dirname(
|
|
77
|
+
so_file = os.path.join(
|
|
78
|
+
os.path.dirname(ext.src_file),
|
|
35
79
|
''.join([
|
|
36
|
-
|
|
80
|
+
ext.full_name.rpartition('.')[2],
|
|
37
81
|
'.',
|
|
38
82
|
sysconfig.get_config_var('SOABI'),
|
|
39
83
|
sysconfig.get_config_var('SHLIB_SUFFIX'),
|
|
40
84
|
]),
|
|
41
85
|
)
|
|
42
86
|
|
|
43
|
-
return
|
|
87
|
+
return so_file
|
omdev/cexts/importhook.py
CHANGED
|
@@ -42,7 +42,7 @@ class CextImportLoader(importlib.machinery.ExtensionFileLoader):
|
|
|
42
42
|
super().__init__(module_name, filename)
|
|
43
43
|
|
|
44
44
|
def create_module(self, spec: importlib.machinery.ModuleSpec) -> types.ModuleType:
|
|
45
|
-
so_path = build.build_ext(spec.name, check.non_empty_str(spec.origin))
|
|
45
|
+
so_path = build.build_ext(build.BuildExt(spec.name, check.non_empty_str(spec.origin)))
|
|
46
46
|
self.path = so_path # noqa
|
|
47
47
|
spec.origin = so_path
|
|
48
48
|
return super().create_module(spec)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from .cache import ( # noqa
|
|
2
|
+
DataCache,
|
|
3
|
+
)
|
|
4
|
+
|
|
5
|
+
from .default import ( # noqa
|
|
6
|
+
default,
|
|
7
|
+
default_dir,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
from .manifests import ( # noqa
|
|
11
|
+
CacheDataManifest,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
from .specs import ( # noqa
|
|
15
|
+
CacheDataSpec,
|
|
16
|
+
GitCacheDataSpec,
|
|
17
|
+
GithubContentCacheDataSpec,
|
|
18
|
+
HttpCacheDataSpec,
|
|
19
|
+
)
|
omdev/datacache/cache.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TODO:
|
|
3
|
+
- huggingface_hub
|
|
4
|
+
- postprocessing?
|
|
5
|
+
- unarchive
|
|
6
|
+
- stupid little progress bars
|
|
7
|
+
- return file path for single files
|
|
8
|
+
- thus, HttpSpec.url has derive=lambda url: ...
|
|
9
|
+
"""
|
|
10
|
+
import logging
|
|
11
|
+
import os.path
|
|
12
|
+
import shutil
|
|
13
|
+
import tempfile
|
|
14
|
+
import urllib.parse
|
|
15
|
+
import urllib.request
|
|
16
|
+
|
|
17
|
+
from omlish import check
|
|
18
|
+
from omlish import lang
|
|
19
|
+
from omlish import marshal as msh
|
|
20
|
+
from omlish.formats import json
|
|
21
|
+
|
|
22
|
+
from .. import git
|
|
23
|
+
from .manifests import CacheDataManifest
|
|
24
|
+
from .specs import CacheDataSpec
|
|
25
|
+
from .specs import GitCacheDataSpec
|
|
26
|
+
from .specs import GithubContentCacheDataSpec
|
|
27
|
+
from .specs import HttpCacheDataSpec
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
log = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
##
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class DataCache:
|
|
37
|
+
def __init__(self, base_dir: str) -> None:
|
|
38
|
+
super().__init__()
|
|
39
|
+
self._base_dir = base_dir
|
|
40
|
+
|
|
41
|
+
self._items_dir = os.path.join(base_dir, 'items')
|
|
42
|
+
|
|
43
|
+
def _fetch_url(self, url: str, out_file: str) -> None:
|
|
44
|
+
log.info('Fetching url: %s -> %s', url, out_file)
|
|
45
|
+
|
|
46
|
+
urllib.request.urlretrieve(url, out_file) # noqa
|
|
47
|
+
|
|
48
|
+
def _fetch_into(self, spec: CacheDataSpec, data_dir: str) -> None:
|
|
49
|
+
log.info('Fetching spec: %s %r', spec.digest, spec)
|
|
50
|
+
|
|
51
|
+
if isinstance(spec, HttpCacheDataSpec):
|
|
52
|
+
self._fetch_url(spec.url, os.path.join(data_dir, spec.file_name_or_default))
|
|
53
|
+
|
|
54
|
+
elif isinstance(spec, GithubContentCacheDataSpec):
|
|
55
|
+
for repo_file in spec.files:
|
|
56
|
+
out_file = os.path.join(data_dir, repo_file)
|
|
57
|
+
if not os.path.abspath(out_file).startswith(os.path.abspath(data_dir)):
|
|
58
|
+
raise RuntimeError(out_file) # noqa
|
|
59
|
+
|
|
60
|
+
url = f'https://raw.githubusercontent.com/{spec.repo}/{spec.rev}/{repo_file}'
|
|
61
|
+
os.makedirs(os.path.dirname(out_file), exist_ok=True)
|
|
62
|
+
self._fetch_url(url, os.path.join(data_dir, out_file))
|
|
63
|
+
|
|
64
|
+
elif isinstance(spec, GitCacheDataSpec):
|
|
65
|
+
if not spec.subtrees:
|
|
66
|
+
raise NotImplementedError
|
|
67
|
+
|
|
68
|
+
tmp_dir = tempfile.mkdtemp()
|
|
69
|
+
|
|
70
|
+
log.info('Cloning git repo: %s -> %s', spec.url, tmp_dir)
|
|
71
|
+
|
|
72
|
+
git.clone_subtree(
|
|
73
|
+
base_dir=tmp_dir,
|
|
74
|
+
repo_url=spec.url,
|
|
75
|
+
repo_dir='data',
|
|
76
|
+
branch=spec.branch,
|
|
77
|
+
rev=spec.rev,
|
|
78
|
+
repo_subtrees=spec.subtrees,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
repo_dir = os.path.join(tmp_dir, 'data')
|
|
82
|
+
if not os.path.isdir(repo_dir):
|
|
83
|
+
raise RuntimeError(repo_dir)
|
|
84
|
+
|
|
85
|
+
git_dir = os.path.join(repo_dir, '.git')
|
|
86
|
+
if not os.path.isdir(git_dir):
|
|
87
|
+
raise RuntimeError(git_dir)
|
|
88
|
+
shutil.rmtree(git_dir)
|
|
89
|
+
|
|
90
|
+
os.rmdir(data_dir)
|
|
91
|
+
os.rename(repo_dir, data_dir)
|
|
92
|
+
|
|
93
|
+
else:
|
|
94
|
+
raise TypeError(spec)
|
|
95
|
+
|
|
96
|
+
def _return_val(self, spec: CacheDataSpec, data_dir: str) -> str:
|
|
97
|
+
check.state(os.path.isdir(data_dir))
|
|
98
|
+
|
|
99
|
+
if isinstance(spec, HttpCacheDataSpec):
|
|
100
|
+
data_file = os.path.join(data_dir, spec.file_name_or_default)
|
|
101
|
+
if not os.path.isfile(data_file):
|
|
102
|
+
raise RuntimeError(data_file) # noqa
|
|
103
|
+
return data_file
|
|
104
|
+
|
|
105
|
+
else:
|
|
106
|
+
return data_dir
|
|
107
|
+
|
|
108
|
+
def get(self, spec: CacheDataSpec) -> str:
|
|
109
|
+
os.makedirs(self._items_dir, exist_ok=True)
|
|
110
|
+
|
|
111
|
+
#
|
|
112
|
+
|
|
113
|
+
item_dir = os.path.join(self._items_dir, spec.digest)
|
|
114
|
+
if os.path.isdir(item_dir):
|
|
115
|
+
data_dir = os.path.join(item_dir, 'data')
|
|
116
|
+
return self._return_val(spec, data_dir)
|
|
117
|
+
|
|
118
|
+
#
|
|
119
|
+
|
|
120
|
+
tmp_dir = tempfile.mkdtemp()
|
|
121
|
+
|
|
122
|
+
#
|
|
123
|
+
|
|
124
|
+
fetch_dir = os.path.join(tmp_dir, 'data')
|
|
125
|
+
os.mkdir(fetch_dir)
|
|
126
|
+
|
|
127
|
+
start_at = lang.utcnow()
|
|
128
|
+
self._fetch_into(spec, fetch_dir)
|
|
129
|
+
end_at = lang.utcnow()
|
|
130
|
+
|
|
131
|
+
#
|
|
132
|
+
|
|
133
|
+
manifest = CacheDataManifest(
|
|
134
|
+
spec,
|
|
135
|
+
start_at=start_at,
|
|
136
|
+
end_at=end_at,
|
|
137
|
+
)
|
|
138
|
+
manifest_json = json.dumps_pretty(msh.marshal(manifest))
|
|
139
|
+
|
|
140
|
+
manifest_file = os.path.join(tmp_dir, 'manifest.json')
|
|
141
|
+
with open(manifest_file, 'w') as f:
|
|
142
|
+
f.write(manifest_json)
|
|
143
|
+
|
|
144
|
+
##
|
|
145
|
+
|
|
146
|
+
os.rename(tmp_dir, item_dir)
|
|
147
|
+
|
|
148
|
+
data_dir = os.path.join(item_dir, 'data')
|
|
149
|
+
return self._return_val(spec, data_dir)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
MARSHAL_VERSION = 0
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import os.path
|
|
2
|
+
|
|
3
|
+
from omlish import lang
|
|
4
|
+
|
|
5
|
+
from .cache import DataCache
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
##
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@lang.cached_function(lock=True)
|
|
12
|
+
def default_dir() -> str:
|
|
13
|
+
return os.path.expanduser('~/.cache/omlish/data')
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@lang.cached_function(lock=True)
|
|
17
|
+
def default() -> DataCache:
|
|
18
|
+
return DataCache(default_dir())
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _main() -> None:
|
|
22
|
+
from omlish import logs
|
|
23
|
+
|
|
24
|
+
logs.configure_standard_logging('INFO')
|
|
25
|
+
|
|
26
|
+
#
|
|
27
|
+
|
|
28
|
+
from .specs import GitCacheDataSpec
|
|
29
|
+
from .specs import GithubContentCacheDataSpec
|
|
30
|
+
from .specs import HttpCacheDataSpec
|
|
31
|
+
|
|
32
|
+
for spec in [
|
|
33
|
+
GitCacheDataSpec(
|
|
34
|
+
'https://github.com/wrmsr/deep_learning_cookbook',
|
|
35
|
+
rev='138a99b09ffa3a728d261e461440f029e512ac93',
|
|
36
|
+
subtrees=['data/wp_movies_10k.ndjson'],
|
|
37
|
+
),
|
|
38
|
+
GithubContentCacheDataSpec(
|
|
39
|
+
'karpathy/char-rnn',
|
|
40
|
+
'master',
|
|
41
|
+
['data/tinyshakespeare/input.txt'],
|
|
42
|
+
),
|
|
43
|
+
HttpCacheDataSpec('https://github.com/VanushVaswani/keras_mnistm/releases/download/1.0/keras_mnistm.pkl.gz'),
|
|
44
|
+
]:
|
|
45
|
+
print(spec)
|
|
46
|
+
for _ in range(2):
|
|
47
|
+
print(default().get(spec))
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
if __name__ == '__main__':
|
|
51
|
+
_main()
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
|
|
3
|
+
from omlish import __about__ as about
|
|
4
|
+
from omlish import cached
|
|
5
|
+
from omlish import dataclasses as dc
|
|
6
|
+
|
|
7
|
+
from ..revisions import get_git_revision
|
|
8
|
+
from .consts import MARSHAL_VERSION
|
|
9
|
+
from .specs import CacheDataSpec
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
##
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@cached.function
|
|
16
|
+
def _lib_revision() -> str | None:
|
|
17
|
+
if (rev := about.__revision__) is not None:
|
|
18
|
+
return rev # type: ignore
|
|
19
|
+
|
|
20
|
+
return get_git_revision()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
##
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dc.dataclass(frozen=True)
|
|
27
|
+
class CacheDataManifest:
|
|
28
|
+
spec: CacheDataSpec
|
|
29
|
+
|
|
30
|
+
start_at: datetime.datetime = dc.field(kw_only=True)
|
|
31
|
+
end_at: datetime.datetime = dc.field(kw_only=True)
|
|
32
|
+
|
|
33
|
+
lib_version: str = dc.field(default_factory=lambda: about.__version__, kw_only=True)
|
|
34
|
+
lib_revision: str = dc.field(default_factory=_lib_revision, kw_only=True)
|
|
35
|
+
|
|
36
|
+
marshal_version: int = dc.field(default=MARSHAL_VERSION, kw_only=True)
|
|
37
|
+
|
|
38
|
+
@dc.validate
|
|
39
|
+
def _validate_marshal_versions(self) -> bool:
|
|
40
|
+
return self.marshal_version == self.spec.marshal_version
|