omdev 0.0.0.dev25__tar.gz → 0.0.0.dev26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of omdev might be problematic. Click here for more details.

Files changed (97) hide show
  1. {omdev-0.0.0.dev25/omdev.egg-info → omdev-0.0.0.dev26}/PKG-INFO +2 -2
  2. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/amalg/amalg.py +60 -23
  3. omdev-0.0.0.dev26/omdev/datacache/__init__.py +19 -0
  4. omdev-0.0.0.dev26/omdev/datacache/cache.py +149 -0
  5. omdev-0.0.0.dev26/omdev/datacache/consts.py +1 -0
  6. omdev-0.0.0.dev26/omdev/datacache/default.py +51 -0
  7. omdev-0.0.0.dev26/omdev/datacache/manifests.py +40 -0
  8. omdev-0.0.0.dev26/omdev/datacache/specs.py +93 -0
  9. omdev-0.0.0.dev26/omdev/git.py +62 -0
  10. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/manifests.py +29 -7
  11. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/pyproject/pkg.py +26 -7
  12. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/scripts/interp.py +42 -37
  13. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/scripts/pyproject.py +66 -51
  14. omdev-0.0.0.dev26/omdev/tools/dockertools.py +183 -0
  15. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26/omdev.egg-info}/PKG-INFO +2 -2
  16. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev.egg-info/SOURCES.txt +7 -0
  17. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev.egg-info/requires.txt +1 -1
  18. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/pyproject.toml +2 -2
  19. omdev-0.0.0.dev25/omdev/tools/dockertools.py +0 -81
  20. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/LICENSE +0 -0
  21. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/MANIFEST.in +0 -0
  22. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/README.rst +0 -0
  23. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/__about__.py +0 -0
  24. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/__init__.py +0 -0
  25. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/_manifests.json +0 -0
  26. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/amalg/__init__.py +0 -0
  27. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/amalg/__main__.py +0 -0
  28. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/bracepy.py +0 -0
  29. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/__init__.py +0 -0
  30. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_boilerplate.cc +0 -0
  31. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/LICENSE +0 -0
  32. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/__init__.py +0 -0
  33. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/build_ext.py +0 -0
  34. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/compilers/__init__.py +0 -0
  35. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/compilers/ccompiler.py +0 -0
  36. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/compilers/options.py +0 -0
  37. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/compilers/unixccompiler.py +0 -0
  38. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/dir_util.py +0 -0
  39. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/errors.py +0 -0
  40. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/extension.py +0 -0
  41. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/file_util.py +0 -0
  42. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/modified.py +0 -0
  43. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/spawn.py +0 -0
  44. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/sysconfig.py +0 -0
  45. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/util.py +0 -0
  46. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/_distutils/version.py +0 -0
  47. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/build.py +0 -0
  48. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/cmake.py +0 -0
  49. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/importhook.py +0 -0
  50. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/magic.py +0 -0
  51. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cexts/scan.py +0 -0
  52. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/classdot.py +0 -0
  53. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/cmake.py +0 -0
  54. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/findimports.py +0 -0
  55. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/findmagic.py +0 -0
  56. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/__init__.py +0 -0
  57. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/__main__.py +0 -0
  58. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/cli.py +0 -0
  59. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/inspect.py +0 -0
  60. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/providers.py +0 -0
  61. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/pyenv.py +0 -0
  62. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/resolvers.py +0 -0
  63. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/standalone.py +0 -0
  64. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/system.py +0 -0
  65. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/interp/types.py +0 -0
  66. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/mypy/__init__.py +0 -0
  67. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/mypy/debug.py +0 -0
  68. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/precheck/__init__.py +0 -0
  69. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/precheck/__main__.py +0 -0
  70. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/precheck/precheck.py +0 -0
  71. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/pyproject/__init__.py +0 -0
  72. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/pyproject/__main__.py +0 -0
  73. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/pyproject/cexts.py +0 -0
  74. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/pyproject/cli.py +0 -0
  75. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/pyproject/configs.py +0 -0
  76. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/pyproject/reqs.py +0 -0
  77. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/revisions.py +0 -0
  78. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/scripts/__init__.py +0 -0
  79. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/scripts/bumpversion.py +0 -0
  80. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/scripts/execrss.py +0 -0
  81. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/tokens.py +0 -0
  82. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/toml/__init__.py +0 -0
  83. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/toml/parser.py +0 -0
  84. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/toml/writer.py +0 -0
  85. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/tools/__init__.py +0 -0
  86. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/tools/gittools.py +0 -0
  87. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/tools/importscan.py +0 -0
  88. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/tools/importtrace.py +0 -0
  89. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/tools/rst.py +0 -0
  90. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/tools/sqlrepl.py +0 -0
  91. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/versioning/__init__.py +0 -0
  92. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/versioning/specifiers.py +0 -0
  93. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/versioning/versions.py +0 -0
  94. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev/wheelfile.py +0 -0
  95. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev.egg-info/dependency_links.txt +0 -0
  96. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/omdev.egg-info/top_level.txt +0 -0
  97. {omdev-0.0.0.dev25 → omdev-0.0.0.dev26}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: omdev
3
- Version: 0.0.0.dev25
3
+ Version: 0.0.0.dev26
4
4
  Summary: omdev
5
5
  Author: wrmsr
6
6
  License: BSD-3-Clause
@@ -12,7 +12,7 @@ Classifier: Operating System :: OS Independent
12
12
  Classifier: Operating System :: POSIX
13
13
  Requires-Python: ~=3.12
14
14
  License-File: LICENSE
15
- Requires-Dist: omlish==0.0.0.dev25
15
+ Requires-Dist: omlish==0.0.0.dev26
16
16
  Provides-Extra: all
17
17
  Requires-Dist: pycparser~=2.22; extra == "all"
18
18
  Requires-Dist: cffi~=1.17; extra == "all"
@@ -29,13 +29,16 @@ import io
29
29
  import logging
30
30
  import os.path
31
31
  import re
32
+ import textwrap
32
33
  import typing as ta
33
34
 
34
35
  import tokenize_rt as trt
35
36
 
36
37
  from omlish import check
37
38
  from omlish import collections as col
39
+ from omlish import lang
38
40
  from omlish import logs
41
+ from omlish.lite.runtime import REQUIRED_PYTHON_VERSION
39
42
 
40
43
  from .. import findmagic
41
44
  from .. import tokens as tks
@@ -91,6 +94,7 @@ def strip_main_lines(cls: ta.Sequence[Tokens]) -> list[Tokens]:
91
94
 
92
95
  STRIPPED_HEADER_MAGICS = [
93
96
  '# @omlish-lite',
97
+ '# @omlish-script',
94
98
  ]
95
99
 
96
100
  STRIPPED_HEADER_PATS = [findmagic.compile_magic_pat(m) for m in STRIPPED_HEADER_MAGICS]
@@ -268,6 +272,8 @@ class SrcFile:
268
272
  typings: ta.Sequence[Typing] = dc.field(repr=False)
269
273
  content_lines: ta.Sequence[Tokens] = dc.field(repr=False)
270
274
 
275
+ ruff_noqa: ta.AbstractSet[str] = dc.field(repr=False)
276
+
271
277
 
272
278
  def make_src_file(
273
279
  path: str,
@@ -283,6 +289,7 @@ def make_src_file(
283
289
  hls, cls = split_header_lines(lines)
284
290
 
285
291
  hls = strip_header_lines(hls)
292
+ rnls, hls = col.partition(hls, lambda l: tks.join_toks(l).startswith('# ruff: noqa: '))
286
293
 
287
294
  imps: list[Import] = []
288
295
  tys: list[Typing] = []
@@ -316,6 +323,8 @@ def make_src_file(
316
323
  imports=imps,
317
324
  typings=tys,
318
325
  content_lines=ctls,
326
+
327
+ ruff_noqa=set(lang.flatten(tks.join_toks(l).strip().split()[3:] for l in rnls)), # noqa
319
328
  )
320
329
 
321
330
 
@@ -324,10 +333,11 @@ def make_src_file(
324
333
 
325
334
  SECTION_SEP = '#' * 40 + '\n'
326
335
 
327
- RUFF_DISABLES: ta.Sequence[str] = [
328
- # 'UP006', # non-pep585-annotation
329
- # 'UP007', # non-pep604-annotation
330
- ]
336
+ RUFF_DISABLES: ta.AbstractSet[str] = {
337
+ 'UP006', # non-pep585-annotation
338
+ 'UP007', # non-pep604-annotation
339
+ 'UP036', # outdated-version-block
340
+ }
331
341
 
332
342
  OUTPUT_COMMENT = '# @omdev-amalg-output '
333
343
  SCAN_COMMENT = '# @omdev-amalg '
@@ -362,46 +372,70 @@ def gen_amalg(
362
372
 
363
373
  ##
364
374
 
375
+ hls = []
376
+
365
377
  mf = src_files[main_path]
366
378
  if mf.header_lines:
367
- hls = [
379
+ hls.extend([
368
380
  hl
369
381
  for hlts in mf.header_lines
370
382
  if not (hl := tks.join_toks(hlts)).startswith(SCAN_COMMENT)
371
- ]
372
- if output_dir is not None:
373
- ogf = os.path.relpath(main_path, output_dir)
374
- else:
375
- ogf = os.path.basename(main_path)
376
- nhls = []
377
- nhls.extend([
378
- '#!/usr/bin/env python3\n',
379
- '# noinspection DuplicatedCode\n',
380
- '# @omlish-lite\n',
381
- '# @omlish-script\n',
382
- f'{OUTPUT_COMMENT.strip()} {ogf}\n',
383
383
  ])
384
- hls = [*nhls, *hls]
385
- out.write(''.join(hls))
386
384
 
387
- if RUFF_DISABLES:
388
- out.write(f'# ruff: noqa: {" ".join(RUFF_DISABLES)}\n')
385
+ if output_dir is not None:
386
+ ogf = os.path.relpath(main_path, output_dir)
387
+ else:
388
+ ogf = os.path.basename(main_path)
389
+
390
+ nhls = []
391
+ nhls.extend([
392
+ '#!/usr/bin/env python3\n',
393
+ '# noinspection DuplicatedCode\n',
394
+ '# @omlish-lite\n',
395
+ '# @omlish-script\n',
396
+ f'{OUTPUT_COMMENT.strip()} {ogf}\n',
397
+ ])
398
+
399
+ ruff_disables = sorted({
400
+ *lang.flatten(f.ruff_noqa for f in src_files.values()),
401
+ *RUFF_DISABLES,
402
+ })
403
+ if ruff_disables:
404
+ nhls.append(f'# ruff: noqa: {" ".join(sorted(ruff_disables))}\n')
405
+
406
+ hls = [*nhls, *hls]
407
+ out.write(''.join(hls))
389
408
 
390
409
  ##
391
410
 
392
411
  all_imps = [i for f in src_files.values() for i in f.imports]
393
412
  gl_imps = [i for i in all_imps if i.mod_path is None]
394
413
 
395
- dct: dict = {}
414
+ dct: dict = {
415
+ ('sys', None, None): ['import sys\n'],
416
+ }
396
417
  for imp in gl_imps:
397
418
  dct.setdefault((imp.mod, imp.item, imp.as_), []).append(imp)
398
419
  for _, l in sorted(dct.items()):
399
- out.write(tks.join_toks(l[0].toks))
420
+ il = l[0]
421
+ out.write(il if isinstance(il, str) else tks.join_toks(il.toks))
400
422
  if dct:
401
423
  out.write('\n\n')
402
424
 
403
425
  ##
404
426
 
427
+ out.write(SECTION_SEP)
428
+ out.write('\n\n')
429
+
430
+ out.write(textwrap.dedent(f"""
431
+ if sys.version_info < {REQUIRED_PYTHON_VERSION!r}:
432
+ raise OSError(
433
+ f'Requires python {REQUIRED_PYTHON_VERSION!r}, got {{sys.version_info}} from {{sys.executable}}') # noqa
434
+ """).lstrip())
435
+ out.write('\n\n')
436
+
437
+ ##
438
+
405
439
  ts = list(col.toposort({ # noqa
406
440
  f.path: {mp for i in f.imports if (mp := i.mod_path) is not None}
407
441
  for f in src_files.values()
@@ -418,6 +452,9 @@ def gen_amalg(
418
452
  if ty.src not in tys:
419
453
  tyd.setdefault(f.path, []).append(ty)
420
454
  tys.add(ty.src)
455
+ if tys:
456
+ out.write(SECTION_SEP)
457
+ out.write('\n\n')
421
458
  for i, (sf, ftys) in enumerate(tyd.items()):
422
459
  f = src_files[sf]
423
460
  if i:
@@ -0,0 +1,19 @@
1
+ from .cache import ( # noqa
2
+ DataCache,
3
+ )
4
+
5
+ from .default import ( # noqa
6
+ default,
7
+ default_dir,
8
+ )
9
+
10
+ from .manifests import ( # noqa
11
+ CacheDataManifest,
12
+ )
13
+
14
+ from .specs import ( # noqa
15
+ CacheDataSpec,
16
+ GitCacheDataSpec,
17
+ GithubContentCacheDataSpec,
18
+ HttpCacheDataSpec,
19
+ )
@@ -0,0 +1,149 @@
1
+ """
2
+ TODO:
3
+ - huggingface_hub
4
+ - postprocessing?
5
+ - unarchive
6
+ - stupid little progress bars
7
+ - return file path for single files
8
+ - thus, HttpSpec.url has derive=lambda url: ...
9
+ """
10
+ import logging
11
+ import os.path
12
+ import shutil
13
+ import tempfile
14
+ import urllib.parse
15
+ import urllib.request
16
+
17
+ from omlish import check
18
+ from omlish import lang
19
+ from omlish import marshal as msh
20
+ from omlish.formats import json
21
+
22
+ from .. import git
23
+ from .manifests import CacheDataManifest
24
+ from .specs import CacheDataSpec
25
+ from .specs import GitCacheDataSpec
26
+ from .specs import GithubContentCacheDataSpec
27
+ from .specs import HttpCacheDataSpec
28
+
29
+
30
+ log = logging.getLogger(__name__)
31
+
32
+
33
+ ##
34
+
35
+
36
+ class DataCache:
37
+ def __init__(self, base_dir: str) -> None:
38
+ super().__init__()
39
+ self._base_dir = base_dir
40
+
41
+ self._items_dir = os.path.join(base_dir, 'items')
42
+
43
+ def _fetch_url(self, url: str, out_file: str) -> None:
44
+ log.info('Fetching url: %s -> %s', url, out_file)
45
+
46
+ urllib.request.urlretrieve(url, out_file) # noqa
47
+
48
+ def _fetch_into(self, spec: CacheDataSpec, data_dir: str) -> None:
49
+ log.info('Fetching spec: %s %r', spec.digest, spec)
50
+
51
+ if isinstance(spec, HttpCacheDataSpec):
52
+ self._fetch_url(spec.url, os.path.join(data_dir, spec.file_name_or_default))
53
+
54
+ elif isinstance(spec, GithubContentCacheDataSpec):
55
+ for repo_file in spec.files:
56
+ out_file = os.path.join(data_dir, repo_file)
57
+ if not os.path.abspath(out_file).startswith(os.path.abspath(data_dir)):
58
+ raise RuntimeError(out_file) # noqa
59
+
60
+ url = f'https://raw.githubusercontent.com/{spec.repo}/{spec.rev}/{repo_file}'
61
+ os.makedirs(os.path.dirname(out_file), exist_ok=True)
62
+ self._fetch_url(url, os.path.join(data_dir, out_file))
63
+
64
+ elif isinstance(spec, GitCacheDataSpec):
65
+ if not spec.subtrees:
66
+ raise NotImplementedError
67
+
68
+ tmp_dir = tempfile.mkdtemp()
69
+
70
+ log.info('Cloning git repo: %s -> %s', spec.url, tmp_dir)
71
+
72
+ git.clone_subtree(
73
+ base_dir=tmp_dir,
74
+ repo_url=spec.url,
75
+ repo_dir='data',
76
+ branch=spec.branch,
77
+ rev=spec.rev,
78
+ repo_subtrees=spec.subtrees,
79
+ )
80
+
81
+ repo_dir = os.path.join(tmp_dir, 'data')
82
+ if not os.path.isdir(repo_dir):
83
+ raise RuntimeError(repo_dir)
84
+
85
+ git_dir = os.path.join(repo_dir, '.git')
86
+ if not os.path.isdir(git_dir):
87
+ raise RuntimeError(git_dir)
88
+ shutil.rmtree(git_dir)
89
+
90
+ os.rmdir(data_dir)
91
+ os.rename(repo_dir, data_dir)
92
+
93
+ else:
94
+ raise TypeError(spec)
95
+
96
+ def _return_val(self, spec: CacheDataSpec, data_dir: str) -> str:
97
+ check.state(os.path.isdir(data_dir))
98
+
99
+ if isinstance(spec, HttpCacheDataSpec):
100
+ data_file = os.path.join(data_dir, spec.file_name_or_default)
101
+ if not os.path.isfile(data_file):
102
+ raise RuntimeError(data_file) # noqa
103
+ return data_file
104
+
105
+ else:
106
+ return data_dir
107
+
108
+ def get(self, spec: CacheDataSpec) -> str:
109
+ os.makedirs(self._items_dir, exist_ok=True)
110
+
111
+ #
112
+
113
+ item_dir = os.path.join(self._items_dir, spec.digest)
114
+ if os.path.isdir(item_dir):
115
+ data_dir = os.path.join(item_dir, 'data')
116
+ return self._return_val(spec, data_dir)
117
+
118
+ #
119
+
120
+ tmp_dir = tempfile.mkdtemp()
121
+
122
+ #
123
+
124
+ fetch_dir = os.path.join(tmp_dir, 'data')
125
+ os.mkdir(fetch_dir)
126
+
127
+ start_at = lang.utcnow()
128
+ self._fetch_into(spec, fetch_dir)
129
+ end_at = lang.utcnow()
130
+
131
+ #
132
+
133
+ manifest = CacheDataManifest(
134
+ spec,
135
+ start_at=start_at,
136
+ end_at=end_at,
137
+ )
138
+ manifest_json = json.dumps_pretty(msh.marshal(manifest))
139
+
140
+ manifest_file = os.path.join(tmp_dir, 'manifest.json')
141
+ with open(manifest_file, 'w') as f:
142
+ f.write(manifest_json)
143
+
144
+ ##
145
+
146
+ os.rename(tmp_dir, item_dir)
147
+
148
+ data_dir = os.path.join(item_dir, 'data')
149
+ return self._return_val(spec, data_dir)
@@ -0,0 +1 @@
1
+ MARSHAL_VERSION = 0
@@ -0,0 +1,51 @@
1
+ import os.path
2
+
3
+ from omlish import lang
4
+
5
+ from .cache import DataCache
6
+
7
+
8
+ ##
9
+
10
+
11
+ @lang.cached_function(lock=True)
12
+ def default_dir() -> str:
13
+ return os.path.expanduser('~/.cache/omlish/data')
14
+
15
+
16
+ @lang.cached_function(lock=True)
17
+ def default() -> DataCache:
18
+ return DataCache(default_dir())
19
+
20
+
21
+ def _main() -> None:
22
+ from omlish import logs
23
+
24
+ logs.configure_standard_logging('INFO')
25
+
26
+ #
27
+
28
+ from .specs import GitCacheDataSpec
29
+ from .specs import GithubContentCacheDataSpec
30
+ from .specs import HttpCacheDataSpec
31
+
32
+ for spec in [
33
+ GitCacheDataSpec(
34
+ 'https://github.com/wrmsr/deep_learning_cookbook',
35
+ rev='138a99b09ffa3a728d261e461440f029e512ac93',
36
+ subtrees=['data/wp_movies_10k.ndjson'],
37
+ ),
38
+ GithubContentCacheDataSpec(
39
+ 'karpathy/char-rnn',
40
+ 'master',
41
+ ['data/tinyshakespeare/input.txt'],
42
+ ),
43
+ HttpCacheDataSpec('https://github.com/VanushVaswani/keras_mnistm/releases/download/1.0/keras_mnistm.pkl.gz'),
44
+ ]:
45
+ print(spec)
46
+ for _ in range(2):
47
+ print(default().get(spec))
48
+
49
+
50
+ if __name__ == '__main__':
51
+ _main()
@@ -0,0 +1,40 @@
1
+ import datetime
2
+
3
+ from omlish import __about__ as about
4
+ from omlish import cached
5
+ from omlish import dataclasses as dc
6
+
7
+ from ..revisions import get_git_revision
8
+ from .consts import MARSHAL_VERSION
9
+ from .specs import CacheDataSpec
10
+
11
+
12
+ ##
13
+
14
+
15
+ @cached.function
16
+ def _lib_revision() -> str | None:
17
+ if (rev := about.__revision__) is not None:
18
+ return rev # type: ignore
19
+
20
+ return get_git_revision()
21
+
22
+
23
+ ##
24
+
25
+
26
+ @dc.dataclass(frozen=True)
27
+ class CacheDataManifest:
28
+ spec: CacheDataSpec
29
+
30
+ start_at: datetime.datetime = dc.field(kw_only=True)
31
+ end_at: datetime.datetime = dc.field(kw_only=True)
32
+
33
+ lib_version: str = dc.field(default_factory=lambda: about.__version__, kw_only=True)
34
+ lib_revision: str = dc.field(default_factory=_lib_revision, kw_only=True)
35
+
36
+ marshal_version: int = dc.field(default=MARSHAL_VERSION, kw_only=True)
37
+
38
+ @dc.validate
39
+ def _validate_marshal_versions(self) -> bool:
40
+ return self.marshal_version == self.spec.marshal_version
@@ -0,0 +1,93 @@
1
+ import hashlib
2
+ import typing as ta
3
+ import urllib.parse
4
+ import urllib.request
5
+
6
+ from omlish import cached
7
+ from omlish import check
8
+ from omlish import dataclasses as dc
9
+ from omlish import lang
10
+ from omlish import marshal as msh
11
+ from omlish.formats import json
12
+
13
+ from .consts import MARSHAL_VERSION
14
+
15
+
16
+ ##
17
+
18
+
19
+ @dc.dataclass(frozen=True)
20
+ class CacheDataSpec(lang.Abstract, lang.Sealed):
21
+ marshal_version: int = dc.field(default=MARSHAL_VERSION, kw_only=True)
22
+
23
+ @cached.property
24
+ def json(self) -> str:
25
+ return json.dumps_compact(msh.marshal(self, CacheDataSpec))
26
+
27
+ @cached.property
28
+ def digest(self) -> str:
29
+ return hashlib.md5(self.json.encode('utf-8')).hexdigest() # noqa
30
+
31
+
32
+ ##
33
+
34
+
35
+ def _maybe_sorted_strs(v: ta.Iterable[str] | None) -> ta.Sequence[str] | None:
36
+ if v is None:
37
+ return None
38
+ return sorted(set(check.not_isinstance(v, str)))
39
+
40
+
41
+ @dc.dataclass(frozen=True)
42
+ class GitCacheDataSpec(CacheDataSpec):
43
+ url: str
44
+
45
+ branch: str | None = dc.field(default=None, kw_only=True)
46
+ rev: str | None = dc.field(default=None, kw_only=True)
47
+
48
+ subtrees: ta.Sequence[str] = dc.xfield(default=None, kw_only=True, coerce=_maybe_sorted_strs)
49
+
50
+
51
+ ##
52
+
53
+
54
+ @dc.dataclass(frozen=True)
55
+ class HttpCacheDataSpec(CacheDataSpec):
56
+ url: str = dc.xfield(validate=lambda u: bool(urllib.parse.urlparse(u)))
57
+ file_name: str | None = None
58
+
59
+ @cached.property
60
+ def file_name_or_default(self) -> str:
61
+ if self.file_name is not None:
62
+ return self.file_name
63
+ return urllib.parse.urlparse(self.url).path.split('/')[-1]
64
+
65
+
66
+ ##
67
+
68
+
69
+ def _repo_str(s: str) -> str:
70
+ u, r = check.non_empty_str(s).split('/')
71
+ check.non_empty_str(u)
72
+ check.non_empty_str(r)
73
+ return s
74
+
75
+
76
+ @dc.dataclass(frozen=True)
77
+ class GithubContentCacheDataSpec(CacheDataSpec):
78
+ repo: str = dc.field(validate=_repo_str) # type: ignore
79
+ rev: str
80
+ files: lang.SequenceNotStr[str]
81
+
82
+
83
+ ##
84
+
85
+
86
+ @lang.cached_function
87
+ def _install_standard_marshalling() -> None:
88
+ specs_poly = msh.polymorphism_from_subclasses(CacheDataSpec)
89
+ msh.STANDARD_MARSHALER_FACTORIES[0:0] = [msh.PolymorphismMarshalerFactory(specs_poly)]
90
+ msh.STANDARD_UNMARSHALER_FACTORIES[0:0] = [msh.PolymorphismUnmarshalerFactory(specs_poly)]
91
+
92
+
93
+ _install_standard_marshalling()
@@ -0,0 +1,62 @@
1
+ import os.path
2
+ import subprocess
3
+ import typing as ta
4
+
5
+
6
+ def clone_subtree(
7
+ *,
8
+ base_dir: str,
9
+ repo_url: str,
10
+ repo_dir: str,
11
+ branch: str | None = None,
12
+ rev: str | None = None,
13
+ repo_subtrees: ta.Sequence[str],
14
+ ) -> None:
15
+ if not bool(branch) ^ bool(rev):
16
+ raise ValueError('must set branch or rev')
17
+
18
+ if isinstance(repo_subtrees, str):
19
+ raise TypeError(repo_subtrees)
20
+
21
+ git_opts = [
22
+ '-c', 'advice.detachedHead=false',
23
+ ]
24
+
25
+ subprocess.check_call(
26
+ [
27
+ 'git',
28
+ *git_opts,
29
+ 'clone',
30
+ '-n',
31
+ '--depth=1',
32
+ '--filter=tree:0',
33
+ *(['-b', branch] if branch else []),
34
+ '--single-branch',
35
+ repo_url,
36
+ repo_dir,
37
+ ],
38
+ cwd=base_dir,
39
+ )
40
+
41
+ rd = os.path.join(base_dir, repo_dir)
42
+ subprocess.check_call(
43
+ [
44
+ 'git',
45
+ *git_opts,
46
+ 'sparse-checkout',
47
+ 'set',
48
+ '--no-cone',
49
+ *repo_subtrees,
50
+ ],
51
+ cwd=rd,
52
+ )
53
+
54
+ subprocess.check_call(
55
+ [
56
+ 'git',
57
+ *git_opts,
58
+ 'checkout',
59
+ *([rev] if rev else []),
60
+ ],
61
+ cwd=rd,
62
+ )
@@ -10,7 +10,7 @@
10
10
  - # @omlish-manifest \n _CACHE_MANIFEST = {'cache': {'name': 'llm', …
11
11
  - also can do prechecks!
12
12
  """
13
- # ruff: noqa: UP006
13
+ # ruff: noqa: UP006 UP007
14
14
  # @omlish-lite
15
15
  import argparse
16
16
  import collections
@@ -22,10 +22,13 @@ import re
22
22
  import shlex
23
23
  import subprocess
24
24
  import sys
25
+ import time
25
26
  import typing as ta
26
27
 
27
28
  from omlish.lite.cached import cached_nullary
28
29
  from omlish.lite.json import json_dumps_pretty
30
+ from omlish.lite.logs import configure_standard_logging
31
+ from omlish.lite.logs import log
29
32
 
30
33
  from . import findmagic
31
34
 
@@ -84,13 +87,17 @@ def build_module_manifests(
84
87
  base: str,
85
88
  *,
86
89
  shell_wrap: bool = True,
90
+ warn_threshold_s: ta.Optional[float] = 1.,
87
91
  ) -> ta.Sequence[Manifest]:
88
- print((file, base))
92
+ log.info('Extracting manifests from file %s', file)
89
93
 
90
94
  if not file.endswith('.py'):
91
95
  raise Exception(file)
92
96
 
93
97
  mod_name = file.rpartition('.')[0].replace(os.sep, '.')
98
+ mod_base = mod_name.split('.')[0]
99
+ if mod_base != (first_dir := file.split(os.path.sep)[0]):
100
+ raise Exception(f'Unexpected module base: {mod_base=} != {first_dir=}')
94
101
 
95
102
  with open(os.path.join(base, file)) as f:
96
103
  src = f.read()
@@ -103,10 +110,10 @@ def build_module_manifests(
103
110
  raise Exception(nl)
104
111
 
105
112
  origins.append(ManifestOrigin(
106
- module=mod_name,
113
+ module='.'.join(['', *mod_name.split('.')[1:]]),
107
114
  attr=m.groupdict()['name'],
108
115
 
109
- file=file,
116
+ file=os.path.join(*os.path.split(file)[1:]), # noqa
110
117
  line=i + 1,
111
118
  ))
112
119
 
@@ -132,8 +139,15 @@ def build_module_manifests(
132
139
  if shell_wrap:
133
140
  args = ['sh', '-c', ' '.join(map(shlex.quote, args))]
134
141
 
142
+ start_time = time.time()
143
+
135
144
  subproc_out = subprocess.check_output(args)
136
145
 
146
+ end_time = time.time()
147
+
148
+ if warn_threshold_s is not None and (elapsed_time := (end_time - start_time)) >= warn_threshold_s:
149
+ log.warning('Manifest extraction took a long time: %s, %.2f s', file, elapsed_time)
150
+
137
151
  sp_lines = subproc_out.decode().strip().splitlines()
138
152
  if len(sp_lines) != 1:
139
153
  raise Exception('Unexpected subprocess output')
@@ -145,11 +159,17 @@ def build_module_manifests(
145
159
  out: ta.List[Manifest] = []
146
160
 
147
161
  for o in origins:
148
- manifest = dct[o.attr]
162
+ value = dct[o.attr]
163
+
164
+ if not (
165
+ isinstance(value, ta.Mapping) and
166
+ all(isinstance(k, str) and k.startswith('$') and len(k) > 1 for k in value)
167
+ ):
168
+ raise TypeError(f'Manifests must be mapping of strings starting with $: {value!r}')
149
169
 
150
170
  out.append(Manifest(
151
171
  **dc.asdict(o),
152
- value=manifest,
172
+ value=value,
153
173
  ))
154
174
 
155
175
  return out
@@ -202,9 +222,11 @@ if __name__ == '__main__':
202
222
  write=args.write or False,
203
223
  )
204
224
  if not args.quiet:
205
- print(json_dumps_pretty(ms))
225
+ print(json_dumps_pretty([dc.asdict(m) for m in ms]))
206
226
 
207
227
  def _main(argv=None) -> None:
228
+ configure_standard_logging('INFO')
229
+
208
230
  parser = argparse.ArgumentParser()
209
231
  subparsers = parser.add_subparsers()
210
232