omdev 0.0.0.dev25__tar.gz → 0.0.0.dev26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of omdev might be problematic. Click here for more details.
- {omdev-0.0.0.dev25/omdev.egg-info → omdev-0.0.0.dev26}/PKG-INFO +2 -2
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/amalg/amalg.py +60 -23
- omdev-0.0.0.dev26/omdev/datacache/__init__.py +19 -0
- omdev-0.0.0.dev26/omdev/datacache/cache.py +149 -0
- omdev-0.0.0.dev26/omdev/datacache/consts.py +1 -0
- omdev-0.0.0.dev26/omdev/datacache/default.py +51 -0
- omdev-0.0.0.dev26/omdev/datacache/manifests.py +40 -0
- omdev-0.0.0.dev26/omdev/datacache/specs.py +93 -0
- omdev-0.0.0.dev26/omdev/git.py +62 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/manifests.py +29 -7
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/pyproject/pkg.py +26 -7
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/scripts/interp.py +42 -37
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/scripts/pyproject.py +66 -51
- omdev-0.0.0.dev26/omdev/tools/dockertools.py +183 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26/omdev.egg-info}/PKG-INFO +2 -2
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev.egg-info/SOURCES.txt +7 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev.egg-info/requires.txt +1 -1
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/pyproject.toml +2 -2
- omdev-0.0.0.dev25/omdev/tools/dockertools.py +0 -81
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/LICENSE +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/MANIFEST.in +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/README.rst +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/__about__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/__init__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/_manifests.json +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/amalg/__init__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/amalg/__main__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/bracepy.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/__init__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_boilerplate.cc +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/LICENSE +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/__init__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/build_ext.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/compilers/__init__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/compilers/ccompiler.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/compilers/options.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/compilers/unixccompiler.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/dir_util.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/errors.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/extension.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/file_util.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/modified.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/spawn.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/sysconfig.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/util.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/version.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/build.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/cmake.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/importhook.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/magic.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/scan.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/classdot.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cmake.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/findimports.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/findmagic.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/__init__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/__main__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/cli.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/inspect.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/providers.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/pyenv.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/resolvers.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/standalone.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/system.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/types.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/mypy/__init__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/mypy/debug.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/precheck/__init__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/precheck/__main__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/precheck/precheck.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/pyproject/__init__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/pyproject/__main__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/pyproject/cexts.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/pyproject/cli.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/pyproject/configs.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/pyproject/reqs.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/revisions.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/scripts/__init__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/scripts/bumpversion.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/scripts/execrss.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/tokens.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/toml/__init__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/toml/parser.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/toml/writer.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/tools/__init__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/tools/gittools.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/tools/importscan.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/tools/importtrace.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/tools/rst.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/tools/sqlrepl.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/versioning/__init__.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/versioning/specifiers.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/versioning/versions.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/wheelfile.py +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev.egg-info/dependency_links.txt +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev.egg-info/top_level.txt +0 -0
- {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: omdev
|
|
3
|
-
Version: 0.0.0.
|
|
3
|
+
Version: 0.0.0.dev26
|
|
4
4
|
Summary: omdev
|
|
5
5
|
Author: wrmsr
|
|
6
6
|
License: BSD-3-Clause
|
|
@@ -12,7 +12,7 @@ Classifier: Operating System :: OS Independent
|
|
|
12
12
|
Classifier: Operating System :: POSIX
|
|
13
13
|
Requires-Python: ~=3.12
|
|
14
14
|
License-File: LICENSE
|
|
15
|
-
Requires-Dist: omlish==0.0.0.
|
|
15
|
+
Requires-Dist: omlish==0.0.0.dev26
|
|
16
16
|
Provides-Extra: all
|
|
17
17
|
Requires-Dist: pycparser~=2.22; extra == "all"
|
|
18
18
|
Requires-Dist: cffi~=1.17; extra == "all"
|
|
@@ -29,13 +29,16 @@ import io
|
|
|
29
29
|
import logging
|
|
30
30
|
import os.path
|
|
31
31
|
import re
|
|
32
|
+
import textwrap
|
|
32
33
|
import typing as ta
|
|
33
34
|
|
|
34
35
|
import tokenize_rt as trt
|
|
35
36
|
|
|
36
37
|
from omlish import check
|
|
37
38
|
from omlish import collections as col
|
|
39
|
+
from omlish import lang
|
|
38
40
|
from omlish import logs
|
|
41
|
+
from omlish.lite.runtime import REQUIRED_PYTHON_VERSION
|
|
39
42
|
|
|
40
43
|
from .. import findmagic
|
|
41
44
|
from .. import tokens as tks
|
|
@@ -91,6 +94,7 @@ def strip_main_lines(cls: ta.Sequence[Tokens]) -> list[Tokens]:
|
|
|
91
94
|
|
|
92
95
|
STRIPPED_HEADER_MAGICS = [
|
|
93
96
|
'# @omlish-lite',
|
|
97
|
+
'# @omlish-script',
|
|
94
98
|
]
|
|
95
99
|
|
|
96
100
|
STRIPPED_HEADER_PATS = [findmagic.compile_magic_pat(m) for m in STRIPPED_HEADER_MAGICS]
|
|
@@ -268,6 +272,8 @@ class SrcFile:
|
|
|
268
272
|
typings: ta.Sequence[Typing] = dc.field(repr=False)
|
|
269
273
|
content_lines: ta.Sequence[Tokens] = dc.field(repr=False)
|
|
270
274
|
|
|
275
|
+
ruff_noqa: ta.AbstractSet[str] = dc.field(repr=False)
|
|
276
|
+
|
|
271
277
|
|
|
272
278
|
def make_src_file(
|
|
273
279
|
path: str,
|
|
@@ -283,6 +289,7 @@ def make_src_file(
|
|
|
283
289
|
hls, cls = split_header_lines(lines)
|
|
284
290
|
|
|
285
291
|
hls = strip_header_lines(hls)
|
|
292
|
+
rnls, hls = col.partition(hls, lambda l: tks.join_toks(l).startswith('# ruff: noqa: '))
|
|
286
293
|
|
|
287
294
|
imps: list[Import] = []
|
|
288
295
|
tys: list[Typing] = []
|
|
@@ -316,6 +323,8 @@ def make_src_file(
|
|
|
316
323
|
imports=imps,
|
|
317
324
|
typings=tys,
|
|
318
325
|
content_lines=ctls,
|
|
326
|
+
|
|
327
|
+
ruff_noqa=set(lang.flatten(tks.join_toks(l).strip().split()[3:] for l in rnls)), # noqa
|
|
319
328
|
)
|
|
320
329
|
|
|
321
330
|
|
|
@@ -324,10 +333,11 @@ def make_src_file(
|
|
|
324
333
|
|
|
325
334
|
SECTION_SEP = '#' * 40 + '\n'
|
|
326
335
|
|
|
327
|
-
RUFF_DISABLES: ta.
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
336
|
+
RUFF_DISABLES: ta.AbstractSet[str] = {
|
|
337
|
+
'UP006', # non-pep585-annotation
|
|
338
|
+
'UP007', # non-pep604-annotation
|
|
339
|
+
'UP036', # outdated-version-block
|
|
340
|
+
}
|
|
331
341
|
|
|
332
342
|
OUTPUT_COMMENT = '# @omdev-amalg-output '
|
|
333
343
|
SCAN_COMMENT = '# @omdev-amalg '
|
|
@@ -362,46 +372,70 @@ def gen_amalg(
|
|
|
362
372
|
|
|
363
373
|
##
|
|
364
374
|
|
|
375
|
+
hls = []
|
|
376
|
+
|
|
365
377
|
mf = src_files[main_path]
|
|
366
378
|
if mf.header_lines:
|
|
367
|
-
hls
|
|
379
|
+
hls.extend([
|
|
368
380
|
hl
|
|
369
381
|
for hlts in mf.header_lines
|
|
370
382
|
if not (hl := tks.join_toks(hlts)).startswith(SCAN_COMMENT)
|
|
371
|
-
]
|
|
372
|
-
if output_dir is not None:
|
|
373
|
-
ogf = os.path.relpath(main_path, output_dir)
|
|
374
|
-
else:
|
|
375
|
-
ogf = os.path.basename(main_path)
|
|
376
|
-
nhls = []
|
|
377
|
-
nhls.extend([
|
|
378
|
-
'#!/usr/bin/env python3\n',
|
|
379
|
-
'# noinspection DuplicatedCode\n',
|
|
380
|
-
'# @omlish-lite\n',
|
|
381
|
-
'# @omlish-script\n',
|
|
382
|
-
f'{OUTPUT_COMMENT.strip()} {ogf}\n',
|
|
383
383
|
])
|
|
384
|
-
hls = [*nhls, *hls]
|
|
385
|
-
out.write(''.join(hls))
|
|
386
384
|
|
|
387
|
-
if
|
|
388
|
-
|
|
385
|
+
if output_dir is not None:
|
|
386
|
+
ogf = os.path.relpath(main_path, output_dir)
|
|
387
|
+
else:
|
|
388
|
+
ogf = os.path.basename(main_path)
|
|
389
|
+
|
|
390
|
+
nhls = []
|
|
391
|
+
nhls.extend([
|
|
392
|
+
'#!/usr/bin/env python3\n',
|
|
393
|
+
'# noinspection DuplicatedCode\n',
|
|
394
|
+
'# @omlish-lite\n',
|
|
395
|
+
'# @omlish-script\n',
|
|
396
|
+
f'{OUTPUT_COMMENT.strip()} {ogf}\n',
|
|
397
|
+
])
|
|
398
|
+
|
|
399
|
+
ruff_disables = sorted({
|
|
400
|
+
*lang.flatten(f.ruff_noqa for f in src_files.values()),
|
|
401
|
+
*RUFF_DISABLES,
|
|
402
|
+
})
|
|
403
|
+
if ruff_disables:
|
|
404
|
+
nhls.append(f'# ruff: noqa: {" ".join(sorted(ruff_disables))}\n')
|
|
405
|
+
|
|
406
|
+
hls = [*nhls, *hls]
|
|
407
|
+
out.write(''.join(hls))
|
|
389
408
|
|
|
390
409
|
##
|
|
391
410
|
|
|
392
411
|
all_imps = [i for f in src_files.values() for i in f.imports]
|
|
393
412
|
gl_imps = [i for i in all_imps if i.mod_path is None]
|
|
394
413
|
|
|
395
|
-
dct: dict = {
|
|
414
|
+
dct: dict = {
|
|
415
|
+
('sys', None, None): ['import sys\n'],
|
|
416
|
+
}
|
|
396
417
|
for imp in gl_imps:
|
|
397
418
|
dct.setdefault((imp.mod, imp.item, imp.as_), []).append(imp)
|
|
398
419
|
for _, l in sorted(dct.items()):
|
|
399
|
-
|
|
420
|
+
il = l[0]
|
|
421
|
+
out.write(il if isinstance(il, str) else tks.join_toks(il.toks))
|
|
400
422
|
if dct:
|
|
401
423
|
out.write('\n\n')
|
|
402
424
|
|
|
403
425
|
##
|
|
404
426
|
|
|
427
|
+
out.write(SECTION_SEP)
|
|
428
|
+
out.write('\n\n')
|
|
429
|
+
|
|
430
|
+
out.write(textwrap.dedent(f"""
|
|
431
|
+
if sys.version_info < {REQUIRED_PYTHON_VERSION!r}:
|
|
432
|
+
raise OSError(
|
|
433
|
+
f'Requires python {REQUIRED_PYTHON_VERSION!r}, got {{sys.version_info}} from {{sys.executable}}') # noqa
|
|
434
|
+
""").lstrip())
|
|
435
|
+
out.write('\n\n')
|
|
436
|
+
|
|
437
|
+
##
|
|
438
|
+
|
|
405
439
|
ts = list(col.toposort({ # noqa
|
|
406
440
|
f.path: {mp for i in f.imports if (mp := i.mod_path) is not None}
|
|
407
441
|
for f in src_files.values()
|
|
@@ -418,6 +452,9 @@ def gen_amalg(
|
|
|
418
452
|
if ty.src not in tys:
|
|
419
453
|
tyd.setdefault(f.path, []).append(ty)
|
|
420
454
|
tys.add(ty.src)
|
|
455
|
+
if tys:
|
|
456
|
+
out.write(SECTION_SEP)
|
|
457
|
+
out.write('\n\n')
|
|
421
458
|
for i, (sf, ftys) in enumerate(tyd.items()):
|
|
422
459
|
f = src_files[sf]
|
|
423
460
|
if i:
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from .cache import ( # noqa
|
|
2
|
+
DataCache,
|
|
3
|
+
)
|
|
4
|
+
|
|
5
|
+
from .default import ( # noqa
|
|
6
|
+
default,
|
|
7
|
+
default_dir,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
from .manifests import ( # noqa
|
|
11
|
+
CacheDataManifest,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
from .specs import ( # noqa
|
|
15
|
+
CacheDataSpec,
|
|
16
|
+
GitCacheDataSpec,
|
|
17
|
+
GithubContentCacheDataSpec,
|
|
18
|
+
HttpCacheDataSpec,
|
|
19
|
+
)
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TODO:
|
|
3
|
+
- huggingface_hub
|
|
4
|
+
- postprocessing?
|
|
5
|
+
- unarchive
|
|
6
|
+
- stupid little progress bars
|
|
7
|
+
- return file path for single files
|
|
8
|
+
- thus, HttpSpec.url has derive=lambda url: ...
|
|
9
|
+
"""
|
|
10
|
+
import logging
|
|
11
|
+
import os.path
|
|
12
|
+
import shutil
|
|
13
|
+
import tempfile
|
|
14
|
+
import urllib.parse
|
|
15
|
+
import urllib.request
|
|
16
|
+
|
|
17
|
+
from omlish import check
|
|
18
|
+
from omlish import lang
|
|
19
|
+
from omlish import marshal as msh
|
|
20
|
+
from omlish.formats import json
|
|
21
|
+
|
|
22
|
+
from .. import git
|
|
23
|
+
from .manifests import CacheDataManifest
|
|
24
|
+
from .specs import CacheDataSpec
|
|
25
|
+
from .specs import GitCacheDataSpec
|
|
26
|
+
from .specs import GithubContentCacheDataSpec
|
|
27
|
+
from .specs import HttpCacheDataSpec
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
log = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
##
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class DataCache:
|
|
37
|
+
def __init__(self, base_dir: str) -> None:
|
|
38
|
+
super().__init__()
|
|
39
|
+
self._base_dir = base_dir
|
|
40
|
+
|
|
41
|
+
self._items_dir = os.path.join(base_dir, 'items')
|
|
42
|
+
|
|
43
|
+
def _fetch_url(self, url: str, out_file: str) -> None:
|
|
44
|
+
log.info('Fetching url: %s -> %s', url, out_file)
|
|
45
|
+
|
|
46
|
+
urllib.request.urlretrieve(url, out_file) # noqa
|
|
47
|
+
|
|
48
|
+
def _fetch_into(self, spec: CacheDataSpec, data_dir: str) -> None:
|
|
49
|
+
log.info('Fetching spec: %s %r', spec.digest, spec)
|
|
50
|
+
|
|
51
|
+
if isinstance(spec, HttpCacheDataSpec):
|
|
52
|
+
self._fetch_url(spec.url, os.path.join(data_dir, spec.file_name_or_default))
|
|
53
|
+
|
|
54
|
+
elif isinstance(spec, GithubContentCacheDataSpec):
|
|
55
|
+
for repo_file in spec.files:
|
|
56
|
+
out_file = os.path.join(data_dir, repo_file)
|
|
57
|
+
if not os.path.abspath(out_file).startswith(os.path.abspath(data_dir)):
|
|
58
|
+
raise RuntimeError(out_file) # noqa
|
|
59
|
+
|
|
60
|
+
url = f'https://raw.githubusercontent.com/{spec.repo}/{spec.rev}/{repo_file}'
|
|
61
|
+
os.makedirs(os.path.dirname(out_file), exist_ok=True)
|
|
62
|
+
self._fetch_url(url, os.path.join(data_dir, out_file))
|
|
63
|
+
|
|
64
|
+
elif isinstance(spec, GitCacheDataSpec):
|
|
65
|
+
if not spec.subtrees:
|
|
66
|
+
raise NotImplementedError
|
|
67
|
+
|
|
68
|
+
tmp_dir = tempfile.mkdtemp()
|
|
69
|
+
|
|
70
|
+
log.info('Cloning git repo: %s -> %s', spec.url, tmp_dir)
|
|
71
|
+
|
|
72
|
+
git.clone_subtree(
|
|
73
|
+
base_dir=tmp_dir,
|
|
74
|
+
repo_url=spec.url,
|
|
75
|
+
repo_dir='data',
|
|
76
|
+
branch=spec.branch,
|
|
77
|
+
rev=spec.rev,
|
|
78
|
+
repo_subtrees=spec.subtrees,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
repo_dir = os.path.join(tmp_dir, 'data')
|
|
82
|
+
if not os.path.isdir(repo_dir):
|
|
83
|
+
raise RuntimeError(repo_dir)
|
|
84
|
+
|
|
85
|
+
git_dir = os.path.join(repo_dir, '.git')
|
|
86
|
+
if not os.path.isdir(git_dir):
|
|
87
|
+
raise RuntimeError(git_dir)
|
|
88
|
+
shutil.rmtree(git_dir)
|
|
89
|
+
|
|
90
|
+
os.rmdir(data_dir)
|
|
91
|
+
os.rename(repo_dir, data_dir)
|
|
92
|
+
|
|
93
|
+
else:
|
|
94
|
+
raise TypeError(spec)
|
|
95
|
+
|
|
96
|
+
def _return_val(self, spec: CacheDataSpec, data_dir: str) -> str:
|
|
97
|
+
check.state(os.path.isdir(data_dir))
|
|
98
|
+
|
|
99
|
+
if isinstance(spec, HttpCacheDataSpec):
|
|
100
|
+
data_file = os.path.join(data_dir, spec.file_name_or_default)
|
|
101
|
+
if not os.path.isfile(data_file):
|
|
102
|
+
raise RuntimeError(data_file) # noqa
|
|
103
|
+
return data_file
|
|
104
|
+
|
|
105
|
+
else:
|
|
106
|
+
return data_dir
|
|
107
|
+
|
|
108
|
+
def get(self, spec: CacheDataSpec) -> str:
|
|
109
|
+
os.makedirs(self._items_dir, exist_ok=True)
|
|
110
|
+
|
|
111
|
+
#
|
|
112
|
+
|
|
113
|
+
item_dir = os.path.join(self._items_dir, spec.digest)
|
|
114
|
+
if os.path.isdir(item_dir):
|
|
115
|
+
data_dir = os.path.join(item_dir, 'data')
|
|
116
|
+
return self._return_val(spec, data_dir)
|
|
117
|
+
|
|
118
|
+
#
|
|
119
|
+
|
|
120
|
+
tmp_dir = tempfile.mkdtemp()
|
|
121
|
+
|
|
122
|
+
#
|
|
123
|
+
|
|
124
|
+
fetch_dir = os.path.join(tmp_dir, 'data')
|
|
125
|
+
os.mkdir(fetch_dir)
|
|
126
|
+
|
|
127
|
+
start_at = lang.utcnow()
|
|
128
|
+
self._fetch_into(spec, fetch_dir)
|
|
129
|
+
end_at = lang.utcnow()
|
|
130
|
+
|
|
131
|
+
#
|
|
132
|
+
|
|
133
|
+
manifest = CacheDataManifest(
|
|
134
|
+
spec,
|
|
135
|
+
start_at=start_at,
|
|
136
|
+
end_at=end_at,
|
|
137
|
+
)
|
|
138
|
+
manifest_json = json.dumps_pretty(msh.marshal(manifest))
|
|
139
|
+
|
|
140
|
+
manifest_file = os.path.join(tmp_dir, 'manifest.json')
|
|
141
|
+
with open(manifest_file, 'w') as f:
|
|
142
|
+
f.write(manifest_json)
|
|
143
|
+
|
|
144
|
+
##
|
|
145
|
+
|
|
146
|
+
os.rename(tmp_dir, item_dir)
|
|
147
|
+
|
|
148
|
+
data_dir = os.path.join(item_dir, 'data')
|
|
149
|
+
return self._return_val(spec, data_dir)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
MARSHAL_VERSION = 0
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import os.path
|
|
2
|
+
|
|
3
|
+
from omlish import lang
|
|
4
|
+
|
|
5
|
+
from .cache import DataCache
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
##
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@lang.cached_function(lock=True)
|
|
12
|
+
def default_dir() -> str:
|
|
13
|
+
return os.path.expanduser('~/.cache/omlish/data')
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@lang.cached_function(lock=True)
|
|
17
|
+
def default() -> DataCache:
|
|
18
|
+
return DataCache(default_dir())
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _main() -> None:
|
|
22
|
+
from omlish import logs
|
|
23
|
+
|
|
24
|
+
logs.configure_standard_logging('INFO')
|
|
25
|
+
|
|
26
|
+
#
|
|
27
|
+
|
|
28
|
+
from .specs import GitCacheDataSpec
|
|
29
|
+
from .specs import GithubContentCacheDataSpec
|
|
30
|
+
from .specs import HttpCacheDataSpec
|
|
31
|
+
|
|
32
|
+
for spec in [
|
|
33
|
+
GitCacheDataSpec(
|
|
34
|
+
'https://github.com/wrmsr/deep_learning_cookbook',
|
|
35
|
+
rev='138a99b09ffa3a728d261e461440f029e512ac93',
|
|
36
|
+
subtrees=['data/wp_movies_10k.ndjson'],
|
|
37
|
+
),
|
|
38
|
+
GithubContentCacheDataSpec(
|
|
39
|
+
'karpathy/char-rnn',
|
|
40
|
+
'master',
|
|
41
|
+
['data/tinyshakespeare/input.txt'],
|
|
42
|
+
),
|
|
43
|
+
HttpCacheDataSpec('https://github.com/VanushVaswani/keras_mnistm/releases/download/1.0/keras_mnistm.pkl.gz'),
|
|
44
|
+
]:
|
|
45
|
+
print(spec)
|
|
46
|
+
for _ in range(2):
|
|
47
|
+
print(default().get(spec))
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
if __name__ == '__main__':
|
|
51
|
+
_main()
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
|
|
3
|
+
from omlish import __about__ as about
|
|
4
|
+
from omlish import cached
|
|
5
|
+
from omlish import dataclasses as dc
|
|
6
|
+
|
|
7
|
+
from ..revisions import get_git_revision
|
|
8
|
+
from .consts import MARSHAL_VERSION
|
|
9
|
+
from .specs import CacheDataSpec
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
##
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@cached.function
|
|
16
|
+
def _lib_revision() -> str | None:
|
|
17
|
+
if (rev := about.__revision__) is not None:
|
|
18
|
+
return rev # type: ignore
|
|
19
|
+
|
|
20
|
+
return get_git_revision()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
##
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dc.dataclass(frozen=True)
|
|
27
|
+
class CacheDataManifest:
|
|
28
|
+
spec: CacheDataSpec
|
|
29
|
+
|
|
30
|
+
start_at: datetime.datetime = dc.field(kw_only=True)
|
|
31
|
+
end_at: datetime.datetime = dc.field(kw_only=True)
|
|
32
|
+
|
|
33
|
+
lib_version: str = dc.field(default_factory=lambda: about.__version__, kw_only=True)
|
|
34
|
+
lib_revision: str = dc.field(default_factory=_lib_revision, kw_only=True)
|
|
35
|
+
|
|
36
|
+
marshal_version: int = dc.field(default=MARSHAL_VERSION, kw_only=True)
|
|
37
|
+
|
|
38
|
+
@dc.validate
|
|
39
|
+
def _validate_marshal_versions(self) -> bool:
|
|
40
|
+
return self.marshal_version == self.spec.marshal_version
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import typing as ta
|
|
3
|
+
import urllib.parse
|
|
4
|
+
import urllib.request
|
|
5
|
+
|
|
6
|
+
from omlish import cached
|
|
7
|
+
from omlish import check
|
|
8
|
+
from omlish import dataclasses as dc
|
|
9
|
+
from omlish import lang
|
|
10
|
+
from omlish import marshal as msh
|
|
11
|
+
from omlish.formats import json
|
|
12
|
+
|
|
13
|
+
from .consts import MARSHAL_VERSION
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
##
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dc.dataclass(frozen=True)
|
|
20
|
+
class CacheDataSpec(lang.Abstract, lang.Sealed):
|
|
21
|
+
marshal_version: int = dc.field(default=MARSHAL_VERSION, kw_only=True)
|
|
22
|
+
|
|
23
|
+
@cached.property
|
|
24
|
+
def json(self) -> str:
|
|
25
|
+
return json.dumps_compact(msh.marshal(self, CacheDataSpec))
|
|
26
|
+
|
|
27
|
+
@cached.property
|
|
28
|
+
def digest(self) -> str:
|
|
29
|
+
return hashlib.md5(self.json.encode('utf-8')).hexdigest() # noqa
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
##
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _maybe_sorted_strs(v: ta.Iterable[str] | None) -> ta.Sequence[str] | None:
|
|
36
|
+
if v is None:
|
|
37
|
+
return None
|
|
38
|
+
return sorted(set(check.not_isinstance(v, str)))
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dc.dataclass(frozen=True)
|
|
42
|
+
class GitCacheDataSpec(CacheDataSpec):
|
|
43
|
+
url: str
|
|
44
|
+
|
|
45
|
+
branch: str | None = dc.field(default=None, kw_only=True)
|
|
46
|
+
rev: str | None = dc.field(default=None, kw_only=True)
|
|
47
|
+
|
|
48
|
+
subtrees: ta.Sequence[str] = dc.xfield(default=None, kw_only=True, coerce=_maybe_sorted_strs)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
##
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dc.dataclass(frozen=True)
|
|
55
|
+
class HttpCacheDataSpec(CacheDataSpec):
|
|
56
|
+
url: str = dc.xfield(validate=lambda u: bool(urllib.parse.urlparse(u)))
|
|
57
|
+
file_name: str | None = None
|
|
58
|
+
|
|
59
|
+
@cached.property
|
|
60
|
+
def file_name_or_default(self) -> str:
|
|
61
|
+
if self.file_name is not None:
|
|
62
|
+
return self.file_name
|
|
63
|
+
return urllib.parse.urlparse(self.url).path.split('/')[-1]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
##
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _repo_str(s: str) -> str:
|
|
70
|
+
u, r = check.non_empty_str(s).split('/')
|
|
71
|
+
check.non_empty_str(u)
|
|
72
|
+
check.non_empty_str(r)
|
|
73
|
+
return s
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@dc.dataclass(frozen=True)
|
|
77
|
+
class GithubContentCacheDataSpec(CacheDataSpec):
|
|
78
|
+
repo: str = dc.field(validate=_repo_str) # type: ignore
|
|
79
|
+
rev: str
|
|
80
|
+
files: lang.SequenceNotStr[str]
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
##
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@lang.cached_function
|
|
87
|
+
def _install_standard_marshalling() -> None:
|
|
88
|
+
specs_poly = msh.polymorphism_from_subclasses(CacheDataSpec)
|
|
89
|
+
msh.STANDARD_MARSHALER_FACTORIES[0:0] = [msh.PolymorphismMarshalerFactory(specs_poly)]
|
|
90
|
+
msh.STANDARD_UNMARSHALER_FACTORIES[0:0] = [msh.PolymorphismUnmarshalerFactory(specs_poly)]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
_install_standard_marshalling()
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import os.path
|
|
2
|
+
import subprocess
|
|
3
|
+
import typing as ta
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def clone_subtree(
|
|
7
|
+
*,
|
|
8
|
+
base_dir: str,
|
|
9
|
+
repo_url: str,
|
|
10
|
+
repo_dir: str,
|
|
11
|
+
branch: str | None = None,
|
|
12
|
+
rev: str | None = None,
|
|
13
|
+
repo_subtrees: ta.Sequence[str],
|
|
14
|
+
) -> None:
|
|
15
|
+
if not bool(branch) ^ bool(rev):
|
|
16
|
+
raise ValueError('must set branch or rev')
|
|
17
|
+
|
|
18
|
+
if isinstance(repo_subtrees, str):
|
|
19
|
+
raise TypeError(repo_subtrees)
|
|
20
|
+
|
|
21
|
+
git_opts = [
|
|
22
|
+
'-c', 'advice.detachedHead=false',
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
subprocess.check_call(
|
|
26
|
+
[
|
|
27
|
+
'git',
|
|
28
|
+
*git_opts,
|
|
29
|
+
'clone',
|
|
30
|
+
'-n',
|
|
31
|
+
'--depth=1',
|
|
32
|
+
'--filter=tree:0',
|
|
33
|
+
*(['-b', branch] if branch else []),
|
|
34
|
+
'--single-branch',
|
|
35
|
+
repo_url,
|
|
36
|
+
repo_dir,
|
|
37
|
+
],
|
|
38
|
+
cwd=base_dir,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
rd = os.path.join(base_dir, repo_dir)
|
|
42
|
+
subprocess.check_call(
|
|
43
|
+
[
|
|
44
|
+
'git',
|
|
45
|
+
*git_opts,
|
|
46
|
+
'sparse-checkout',
|
|
47
|
+
'set',
|
|
48
|
+
'--no-cone',
|
|
49
|
+
*repo_subtrees,
|
|
50
|
+
],
|
|
51
|
+
cwd=rd,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
subprocess.check_call(
|
|
55
|
+
[
|
|
56
|
+
'git',
|
|
57
|
+
*git_opts,
|
|
58
|
+
'checkout',
|
|
59
|
+
*([rev] if rev else []),
|
|
60
|
+
],
|
|
61
|
+
cwd=rd,
|
|
62
|
+
)
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
- # @omlish-manifest \n _CACHE_MANIFEST = {'cache': {'name': 'llm', …
|
|
11
11
|
- also can do prechecks!
|
|
12
12
|
"""
|
|
13
|
-
# ruff: noqa: UP006
|
|
13
|
+
# ruff: noqa: UP006 UP007
|
|
14
14
|
# @omlish-lite
|
|
15
15
|
import argparse
|
|
16
16
|
import collections
|
|
@@ -22,10 +22,13 @@ import re
|
|
|
22
22
|
import shlex
|
|
23
23
|
import subprocess
|
|
24
24
|
import sys
|
|
25
|
+
import time
|
|
25
26
|
import typing as ta
|
|
26
27
|
|
|
27
28
|
from omlish.lite.cached import cached_nullary
|
|
28
29
|
from omlish.lite.json import json_dumps_pretty
|
|
30
|
+
from omlish.lite.logs import configure_standard_logging
|
|
31
|
+
from omlish.lite.logs import log
|
|
29
32
|
|
|
30
33
|
from . import findmagic
|
|
31
34
|
|
|
@@ -84,13 +87,17 @@ def build_module_manifests(
|
|
|
84
87
|
base: str,
|
|
85
88
|
*,
|
|
86
89
|
shell_wrap: bool = True,
|
|
90
|
+
warn_threshold_s: ta.Optional[float] = 1.,
|
|
87
91
|
) -> ta.Sequence[Manifest]:
|
|
88
|
-
|
|
92
|
+
log.info('Extracting manifests from file %s', file)
|
|
89
93
|
|
|
90
94
|
if not file.endswith('.py'):
|
|
91
95
|
raise Exception(file)
|
|
92
96
|
|
|
93
97
|
mod_name = file.rpartition('.')[0].replace(os.sep, '.')
|
|
98
|
+
mod_base = mod_name.split('.')[0]
|
|
99
|
+
if mod_base != (first_dir := file.split(os.path.sep)[0]):
|
|
100
|
+
raise Exception(f'Unexpected module base: {mod_base=} != {first_dir=}')
|
|
94
101
|
|
|
95
102
|
with open(os.path.join(base, file)) as f:
|
|
96
103
|
src = f.read()
|
|
@@ -103,10 +110,10 @@ def build_module_manifests(
|
|
|
103
110
|
raise Exception(nl)
|
|
104
111
|
|
|
105
112
|
origins.append(ManifestOrigin(
|
|
106
|
-
module=mod_name,
|
|
113
|
+
module='.'.join(['', *mod_name.split('.')[1:]]),
|
|
107
114
|
attr=m.groupdict()['name'],
|
|
108
115
|
|
|
109
|
-
file=file,
|
|
116
|
+
file=os.path.join(*os.path.split(file)[1:]), # noqa
|
|
110
117
|
line=i + 1,
|
|
111
118
|
))
|
|
112
119
|
|
|
@@ -132,8 +139,15 @@ def build_module_manifests(
|
|
|
132
139
|
if shell_wrap:
|
|
133
140
|
args = ['sh', '-c', ' '.join(map(shlex.quote, args))]
|
|
134
141
|
|
|
142
|
+
start_time = time.time()
|
|
143
|
+
|
|
135
144
|
subproc_out = subprocess.check_output(args)
|
|
136
145
|
|
|
146
|
+
end_time = time.time()
|
|
147
|
+
|
|
148
|
+
if warn_threshold_s is not None and (elapsed_time := (end_time - start_time)) >= warn_threshold_s:
|
|
149
|
+
log.warning('Manifest extraction took a long time: %s, %.2f s', file, elapsed_time)
|
|
150
|
+
|
|
137
151
|
sp_lines = subproc_out.decode().strip().splitlines()
|
|
138
152
|
if len(sp_lines) != 1:
|
|
139
153
|
raise Exception('Unexpected subprocess output')
|
|
@@ -145,11 +159,17 @@ def build_module_manifests(
|
|
|
145
159
|
out: ta.List[Manifest] = []
|
|
146
160
|
|
|
147
161
|
for o in origins:
|
|
148
|
-
|
|
162
|
+
value = dct[o.attr]
|
|
163
|
+
|
|
164
|
+
if not (
|
|
165
|
+
isinstance(value, ta.Mapping) and
|
|
166
|
+
all(isinstance(k, str) and k.startswith('$') and len(k) > 1 for k in value)
|
|
167
|
+
):
|
|
168
|
+
raise TypeError(f'Manifests must be mapping of strings starting with $: {value!r}')
|
|
149
169
|
|
|
150
170
|
out.append(Manifest(
|
|
151
171
|
**dc.asdict(o),
|
|
152
|
-
value=
|
|
172
|
+
value=value,
|
|
153
173
|
))
|
|
154
174
|
|
|
155
175
|
return out
|
|
@@ -202,9 +222,11 @@ if __name__ == '__main__':
|
|
|
202
222
|
write=args.write or False,
|
|
203
223
|
)
|
|
204
224
|
if not args.quiet:
|
|
205
|
-
print(json_dumps_pretty(ms))
|
|
225
|
+
print(json_dumps_pretty([dc.asdict(m) for m in ms]))
|
|
206
226
|
|
|
207
227
|
def _main(argv=None) -> None:
|
|
228
|
+
configure_standard_logging('INFO')
|
|
229
|
+
|
|
208
230
|
parser = argparse.ArgumentParser()
|
|
209
231
|
subparsers = parser.add_subparsers()
|
|
210
232
|
|