omdev 0.0.0.dev25__py3-none-any.whl → 0.0.0.dev27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of omdev might be problematic. Click here for more details.

omdev/amalg/amalg.py CHANGED
@@ -29,13 +29,16 @@ import io
29
29
  import logging
30
30
  import os.path
31
31
  import re
32
+ import textwrap
32
33
  import typing as ta
33
34
 
34
35
  import tokenize_rt as trt
35
36
 
36
37
  from omlish import check
37
38
  from omlish import collections as col
39
+ from omlish import lang
38
40
  from omlish import logs
41
+ from omlish.lite.runtime import REQUIRED_PYTHON_VERSION
39
42
 
40
43
  from .. import findmagic
41
44
  from .. import tokens as tks
@@ -91,6 +94,7 @@ def strip_main_lines(cls: ta.Sequence[Tokens]) -> list[Tokens]:
91
94
 
92
95
  STRIPPED_HEADER_MAGICS = [
93
96
  '# @omlish-lite',
97
+ '# @omlish-script',
94
98
  ]
95
99
 
96
100
  STRIPPED_HEADER_PATS = [findmagic.compile_magic_pat(m) for m in STRIPPED_HEADER_MAGICS]
@@ -268,6 +272,8 @@ class SrcFile:
268
272
  typings: ta.Sequence[Typing] = dc.field(repr=False)
269
273
  content_lines: ta.Sequence[Tokens] = dc.field(repr=False)
270
274
 
275
+ ruff_noqa: ta.AbstractSet[str] = dc.field(repr=False)
276
+
271
277
 
272
278
  def make_src_file(
273
279
  path: str,
@@ -283,6 +289,7 @@ def make_src_file(
283
289
  hls, cls = split_header_lines(lines)
284
290
 
285
291
  hls = strip_header_lines(hls)
292
+ rnls, hls = col.partition(hls, lambda l: tks.join_toks(l).startswith('# ruff: noqa: '))
286
293
 
287
294
  imps: list[Import] = []
288
295
  tys: list[Typing] = []
@@ -316,6 +323,8 @@ def make_src_file(
316
323
  imports=imps,
317
324
  typings=tys,
318
325
  content_lines=ctls,
326
+
327
+ ruff_noqa=set(lang.flatten(tks.join_toks(l).strip().split()[3:] for l in rnls)), # noqa
319
328
  )
320
329
 
321
330
 
@@ -324,10 +333,11 @@ def make_src_file(
324
333
 
325
334
  SECTION_SEP = '#' * 40 + '\n'
326
335
 
327
- RUFF_DISABLES: ta.Sequence[str] = [
328
- # 'UP006', # non-pep585-annotation
329
- # 'UP007', # non-pep604-annotation
330
- ]
336
+ RUFF_DISABLES: ta.AbstractSet[str] = {
337
+ 'UP006', # non-pep585-annotation
338
+ 'UP007', # non-pep604-annotation
339
+ 'UP036', # outdated-version-block
340
+ }
331
341
 
332
342
  OUTPUT_COMMENT = '# @omdev-amalg-output '
333
343
  SCAN_COMMENT = '# @omdev-amalg '
@@ -362,46 +372,70 @@ def gen_amalg(
362
372
 
363
373
  ##
364
374
 
375
+ hls = []
376
+
365
377
  mf = src_files[main_path]
366
378
  if mf.header_lines:
367
- hls = [
379
+ hls.extend([
368
380
  hl
369
381
  for hlts in mf.header_lines
370
382
  if not (hl := tks.join_toks(hlts)).startswith(SCAN_COMMENT)
371
- ]
372
- if output_dir is not None:
373
- ogf = os.path.relpath(main_path, output_dir)
374
- else:
375
- ogf = os.path.basename(main_path)
376
- nhls = []
377
- nhls.extend([
378
- '#!/usr/bin/env python3\n',
379
- '# noinspection DuplicatedCode\n',
380
- '# @omlish-lite\n',
381
- '# @omlish-script\n',
382
- f'{OUTPUT_COMMENT.strip()} {ogf}\n',
383
383
  ])
384
- hls = [*nhls, *hls]
385
- out.write(''.join(hls))
386
384
 
387
- if RUFF_DISABLES:
388
- out.write(f'# ruff: noqa: {" ".join(RUFF_DISABLES)}\n')
385
+ if output_dir is not None:
386
+ ogf = os.path.relpath(main_path, output_dir)
387
+ else:
388
+ ogf = os.path.basename(main_path)
389
+
390
+ nhls = []
391
+ nhls.extend([
392
+ '#!/usr/bin/env python3\n',
393
+ '# noinspection DuplicatedCode\n',
394
+ '# @omlish-lite\n',
395
+ '# @omlish-script\n',
396
+ f'{OUTPUT_COMMENT.strip()} {ogf}\n',
397
+ ])
398
+
399
+ ruff_disables = sorted({
400
+ *lang.flatten(f.ruff_noqa for f in src_files.values()),
401
+ *RUFF_DISABLES,
402
+ })
403
+ if ruff_disables:
404
+ nhls.append(f'# ruff: noqa: {" ".join(sorted(ruff_disables))}\n')
405
+
406
+ hls = [*nhls, *hls]
407
+ out.write(''.join(hls))
389
408
 
390
409
  ##
391
410
 
392
411
  all_imps = [i for f in src_files.values() for i in f.imports]
393
412
  gl_imps = [i for i in all_imps if i.mod_path is None]
394
413
 
395
- dct: dict = {}
414
+ dct: dict = {
415
+ ('sys', None, None): ['import sys\n'],
416
+ }
396
417
  for imp in gl_imps:
397
418
  dct.setdefault((imp.mod, imp.item, imp.as_), []).append(imp)
398
419
  for _, l in sorted(dct.items()):
399
- out.write(tks.join_toks(l[0].toks))
420
+ il = l[0]
421
+ out.write(il if isinstance(il, str) else tks.join_toks(il.toks))
400
422
  if dct:
401
423
  out.write('\n\n')
402
424
 
403
425
  ##
404
426
 
427
+ out.write(SECTION_SEP)
428
+ out.write('\n\n')
429
+
430
+ out.write(textwrap.dedent(f"""
431
+ if sys.version_info < {REQUIRED_PYTHON_VERSION!r}:
432
+ raise OSError(
433
+ f'Requires python {REQUIRED_PYTHON_VERSION!r}, got {{sys.version_info}} from {{sys.executable}}') # noqa
434
+ """).lstrip())
435
+ out.write('\n\n')
436
+
437
+ ##
438
+
405
439
  ts = list(col.toposort({ # noqa
406
440
  f.path: {mp for i in f.imports if (mp := i.mod_path) is not None}
407
441
  for f in src_files.values()
@@ -418,6 +452,9 @@ def gen_amalg(
418
452
  if ty.src not in tys:
419
453
  tyd.setdefault(f.path, []).append(ty)
420
454
  tys.add(ty.src)
455
+ if tys:
456
+ out.write(SECTION_SEP)
457
+ out.write('\n\n')
421
458
  for i, (sf, ftys) in enumerate(tyd.items()):
422
459
  f = src_files[sf]
423
460
  if i:
@@ -0,0 +1,19 @@
1
+ from .cache import ( # noqa
2
+ DataCache,
3
+ )
4
+
5
+ from .defaults import ( # noqa
6
+ default,
7
+ default_dir,
8
+ )
9
+
10
+ from .manifests import ( # noqa
11
+ CacheDataManifest,
12
+ )
13
+
14
+ from .specs import ( # noqa
15
+ CacheDataSpec,
16
+ GitCacheDataSpec,
17
+ GithubContentCacheDataSpec,
18
+ HttpCacheDataSpec,
19
+ )
@@ -0,0 +1,36 @@
1
+ import typing as ta
2
+
3
+ from omlish import dataclasses as dc
4
+ from omlish import lang
5
+ from omlish import marshal as msh
6
+
7
+ from .consts import SERIALIZATION_VERSION
8
+
9
+
10
+ ##
11
+
12
+
13
+ @dc.dataclass(frozen=True)
14
+ class Action(lang.Abstract, lang.Sealed):
15
+ serialization_version: int = dc.field(default=SERIALIZATION_VERSION, kw_only=True)
16
+
17
+
18
+ ##
19
+
20
+
21
+ @dc.dataclass(frozen=True)
22
+ class ExtractAction(Action, lang.Final):
23
+ files: ta.Sequence[str]
24
+
25
+
26
+ ##
27
+
28
+
29
+ @lang.cached_function
30
+ def _install_standard_marshalling() -> None:
31
+ actions_poly = msh.polymorphism_from_subclasses(Action)
32
+ msh.STANDARD_MARSHALER_FACTORIES[0:0] = [msh.PolymorphismMarshalerFactory(actions_poly)]
33
+ msh.STANDARD_UNMARSHALER_FACTORIES[0:0] = [msh.PolymorphismUnmarshalerFactory(actions_poly)]
34
+
35
+
36
+ _install_standard_marshalling()
@@ -0,0 +1,164 @@
1
+ """
2
+ TODO:
3
+ - huggingface_hub
4
+ - postprocessing?
5
+ - unarchive
6
+ - stupid little progress bars
7
+ - return file path for single files
8
+ - thus, HttpSpec.url has derive=lambda url: ...
9
+ """
10
+ import logging
11
+ import os.path
12
+ import shutil
13
+ import tempfile
14
+ import urllib.parse
15
+ import urllib.request
16
+
17
+ from omlish import check
18
+ from omlish import lang
19
+ from omlish import marshal as msh
20
+ from omlish.formats import json
21
+
22
+ from .. import git
23
+ from .manifests import CacheDataManifest
24
+ from .specs import CacheDataSpec
25
+ from .specs import GitCacheDataSpec
26
+ from .specs import GithubContentCacheDataSpec
27
+ from .specs import HttpCacheDataSpec
28
+
29
+
30
+ log = logging.getLogger(__name__)
31
+
32
+
33
+ ##
34
+
35
+
36
+ class DataCache:
37
+ def __init__(self, base_dir: str) -> None:
38
+ super().__init__()
39
+ self._base_dir = base_dir
40
+
41
+ self._items_dir = os.path.join(base_dir, 'items')
42
+
43
+ def _fetch_url(self, url: str, out_file: str) -> None:
44
+ log.info('Fetching url: %s -> %s', url, out_file)
45
+
46
+ urllib.request.urlretrieve(url, out_file) # noqa
47
+
48
+ def _fetch_into(self, spec: CacheDataSpec, data_dir: str) -> None:
49
+ log.info('Fetching spec: %s %r -> %s', spec.digest, spec, data_dir)
50
+
51
+ if isinstance(spec, HttpCacheDataSpec):
52
+ self._fetch_url(spec.url, os.path.join(data_dir, spec.file_name_or_default))
53
+
54
+ elif isinstance(spec, GithubContentCacheDataSpec):
55
+ for repo_file in spec.files:
56
+ out_file = os.path.join(data_dir, repo_file)
57
+ if not os.path.abspath(out_file).startswith(os.path.abspath(data_dir)):
58
+ raise RuntimeError(out_file) # noqa
59
+
60
+ url = f'https://raw.githubusercontent.com/{spec.repo}/{spec.rev}/{repo_file}'
61
+ os.makedirs(os.path.dirname(out_file), exist_ok=True)
62
+ self._fetch_url(url, os.path.join(data_dir, out_file))
63
+
64
+ elif isinstance(spec, GitCacheDataSpec):
65
+ if not spec.subtrees:
66
+ raise NotImplementedError
67
+
68
+ tmp_dir = tempfile.mkdtemp()
69
+
70
+ log.info('Cloning git repo: %s -> %s', spec.url, tmp_dir)
71
+
72
+ git.git_clone_subtree(
73
+ base_dir=tmp_dir,
74
+ repo_url=spec.url,
75
+ repo_dir='data',
76
+ branch=spec.branch,
77
+ rev=spec.rev,
78
+ repo_subtrees=spec.subtrees,
79
+ )
80
+
81
+ repo_dir = os.path.join(tmp_dir, 'data')
82
+ if not os.path.isdir(repo_dir):
83
+ raise RuntimeError(repo_dir)
84
+
85
+ git_dir = os.path.join(repo_dir, '.git')
86
+ if not os.path.isdir(git_dir):
87
+ raise RuntimeError(git_dir)
88
+ shutil.rmtree(git_dir)
89
+
90
+ os.rmdir(data_dir)
91
+ os.rename(repo_dir, data_dir)
92
+
93
+ else:
94
+ raise TypeError(spec)
95
+
96
+ def _return_val(self, spec: CacheDataSpec, data_dir: str) -> str:
97
+ check.state(os.path.isdir(data_dir))
98
+
99
+ if isinstance(spec, HttpCacheDataSpec):
100
+ data_file = os.path.join(data_dir, spec.file_name_or_default)
101
+ if not os.path.isfile(data_file):
102
+ raise RuntimeError(data_file) # noqa
103
+ return data_file
104
+
105
+ elif isinstance(spec, GithubContentCacheDataSpec):
106
+ if len(spec.files) != 1:
107
+ return data_dir
108
+ data_file = os.path.join(data_dir, check.single(spec.files))
109
+ if not os.path.isfile(data_file):
110
+ raise RuntimeError(data_file) # noqa
111
+ return data_file
112
+
113
+ else:
114
+ return data_dir
115
+
116
+ def get(self, spec: CacheDataSpec) -> str:
117
+ os.makedirs(self._items_dir, exist_ok=True)
118
+
119
+ #
120
+
121
+ item_dir = os.path.join(self._items_dir, spec.digest)
122
+ if os.path.isdir(item_dir):
123
+ data_dir = os.path.join(item_dir, 'data')
124
+ return self._return_val(spec, data_dir)
125
+
126
+ #
127
+
128
+ tmp_dir = tempfile.mkdtemp()
129
+
130
+ #
131
+
132
+ fetch_dir = os.path.join(tmp_dir, 'data')
133
+ os.mkdir(fetch_dir)
134
+
135
+ start_at = lang.utcnow()
136
+ self._fetch_into(spec, fetch_dir)
137
+ end_at = lang.utcnow()
138
+
139
+ #
140
+
141
+ manifest = CacheDataManifest(
142
+ spec,
143
+ start_at=start_at,
144
+ end_at=end_at,
145
+ )
146
+ manifest_json = json.dumps_pretty(msh.marshal(manifest))
147
+
148
+ manifest_file = os.path.join(tmp_dir, 'manifest.json')
149
+ with open(manifest_file, 'w') as f:
150
+ f.write(manifest_json)
151
+
152
+ #
153
+
154
+ # for p, ds, fs in os.walk(tmp_dir):
155
+ # for n in [*ds, *fs]:
156
+ # np = os.path.join(p, n)
157
+ # os.chmod(np, os.stat(np).st_mode & ~0o222)
158
+
159
+ #
160
+
161
+ shutil.move(tmp_dir, item_dir)
162
+
163
+ data_dir = os.path.join(item_dir, 'data')
164
+ return self._return_val(spec, data_dir)
@@ -0,0 +1 @@
1
+ SERIALIZATION_VERSION = 0
@@ -0,0 +1,18 @@
1
+ import os.path
2
+
3
+ from omlish import lang
4
+
5
+ from .cache import DataCache
6
+
7
+
8
+ ##
9
+
10
+
11
+ @lang.cached_function(lock=True)
12
+ def default_dir() -> str:
13
+ return os.path.expanduser('~/.cache/omlish/data')
14
+
15
+
16
+ @lang.cached_function(lock=True)
17
+ def default() -> DataCache:
18
+ return DataCache(default_dir())
@@ -0,0 +1,40 @@
1
+ import datetime
2
+
3
+ from omlish import __about__ as about
4
+ from omlish import cached
5
+ from omlish import dataclasses as dc
6
+
7
+ from ..git import get_git_revision
8
+ from .consts import SERIALIZATION_VERSION
9
+ from .specs import CacheDataSpec
10
+
11
+
12
+ ##
13
+
14
+
15
+ @cached.function
16
+ def _lib_revision() -> str | None:
17
+ if (rev := about.__revision__) is not None:
18
+ return rev # type: ignore
19
+
20
+ return get_git_revision()
21
+
22
+
23
+ ##
24
+
25
+
26
+ @dc.dataclass(frozen=True)
27
+ class CacheDataManifest:
28
+ spec: CacheDataSpec
29
+
30
+ start_at: datetime.datetime = dc.field(kw_only=True)
31
+ end_at: datetime.datetime = dc.field(kw_only=True)
32
+
33
+ lib_version: str = dc.field(default_factory=lambda: about.__version__, kw_only=True)
34
+ lib_revision: str = dc.field(default_factory=_lib_revision, kw_only=True)
35
+
36
+ serialization_version: int = dc.field(default=SERIALIZATION_VERSION, kw_only=True)
37
+
38
+ @dc.validate
39
+ def _validate_serialization_versions(self) -> bool:
40
+ return self.serialization_version == self.spec.serialization_version
@@ -0,0 +1,93 @@
1
+ import hashlib
2
+ import typing as ta
3
+ import urllib.parse
4
+ import urllib.request
5
+
6
+ from omlish import cached
7
+ from omlish import check
8
+ from omlish import dataclasses as dc
9
+ from omlish import lang
10
+ from omlish import marshal as msh
11
+ from omlish.formats import json
12
+
13
+ from .consts import SERIALIZATION_VERSION
14
+
15
+
16
+ ##
17
+
18
+
19
+ @dc.dataclass(frozen=True)
20
+ class CacheDataSpec(lang.Abstract, lang.Sealed):
21
+ serialization_version: int = dc.field(default=SERIALIZATION_VERSION, kw_only=True)
22
+
23
+ @cached.property
24
+ def json(self) -> str:
25
+ return json.dumps_compact(msh.marshal(self, CacheDataSpec), sort_keys=True)
26
+
27
+ @cached.property
28
+ def digest(self) -> str:
29
+ return hashlib.md5(self.json.encode('utf-8')).hexdigest() # noqa
30
+
31
+
32
+ ##
33
+
34
+
35
+ def _maybe_sorted_strs(v: ta.Iterable[str] | None) -> ta.Sequence[str] | None:
36
+ if v is None:
37
+ return None
38
+ return sorted(set(check.not_isinstance(v, str)))
39
+
40
+
41
+ @dc.dataclass(frozen=True)
42
+ class GitCacheDataSpec(CacheDataSpec):
43
+ url: str
44
+
45
+ branch: str | None = dc.field(default=None, kw_only=True)
46
+ rev: str | None = dc.field(default=None, kw_only=True)
47
+
48
+ subtrees: ta.Sequence[str] = dc.xfield(default=None, kw_only=True, coerce=_maybe_sorted_strs)
49
+
50
+
51
+ ##
52
+
53
+
54
+ @dc.dataclass(frozen=True)
55
+ class HttpCacheDataSpec(CacheDataSpec):
56
+ url: str = dc.xfield(validate=lambda u: bool(urllib.parse.urlparse(u)))
57
+ file_name: str | None = None
58
+
59
+ @cached.property
60
+ def file_name_or_default(self) -> str:
61
+ if self.file_name is not None:
62
+ return self.file_name
63
+ return urllib.parse.urlparse(self.url).path.split('/')[-1]
64
+
65
+
66
+ ##
67
+
68
+
69
+ def _repo_str(s: str) -> str:
70
+ u, r = check.non_empty_str(s).split('/')
71
+ check.non_empty_str(u)
72
+ check.non_empty_str(r)
73
+ return s
74
+
75
+
76
+ @dc.dataclass(frozen=True)
77
+ class GithubContentCacheDataSpec(CacheDataSpec):
78
+ repo: str = dc.field(validate=_repo_str) # type: ignore
79
+ rev: str
80
+ files: lang.SequenceNotStr[str]
81
+
82
+
83
+ ##
84
+
85
+
86
+ @lang.cached_function
87
+ def _install_standard_marshalling() -> None:
88
+ specs_poly = msh.polymorphism_from_subclasses(CacheDataSpec)
89
+ msh.STANDARD_MARSHALER_FACTORIES[0:0] = [msh.PolymorphismMarshalerFactory(specs_poly)]
90
+ msh.STANDARD_UNMARSHALER_FACTORIES[0:0] = [msh.PolymorphismUnmarshalerFactory(specs_poly)]
91
+
92
+
93
+ _install_standard_marshalling()
omdev/git.py ADDED
@@ -0,0 +1,100 @@
1
+ # ruff: noqa: UP007
2
+ # @omlish-lite
3
+ import os.path
4
+ import subprocess
5
+ import typing as ta
6
+
7
+
8
+ def git_clone_subtree(
9
+ *,
10
+ base_dir: str,
11
+ repo_url: str,
12
+ repo_dir: str,
13
+ branch: ta.Optional[str] = None,
14
+ rev: ta.Optional[str] = None,
15
+ repo_subtrees: ta.Sequence[str],
16
+ ) -> None:
17
+ if not bool(branch) ^ bool(rev):
18
+ raise ValueError('must set branch or rev')
19
+
20
+ if isinstance(repo_subtrees, str):
21
+ raise TypeError(repo_subtrees)
22
+
23
+ git_opts = [
24
+ '-c', 'advice.detachedHead=false',
25
+ ]
26
+
27
+ subprocess.check_call(
28
+ [
29
+ 'git',
30
+ *git_opts,
31
+ 'clone',
32
+ '-n',
33
+ '--depth=1',
34
+ '--filter=tree:0',
35
+ *(['-b', branch] if branch else []),
36
+ '--single-branch',
37
+ repo_url,
38
+ repo_dir,
39
+ ],
40
+ cwd=base_dir,
41
+ )
42
+
43
+ rd = os.path.join(base_dir, repo_dir)
44
+ subprocess.check_call(
45
+ [
46
+ 'git',
47
+ *git_opts,
48
+ 'sparse-checkout',
49
+ 'set',
50
+ '--no-cone',
51
+ *repo_subtrees,
52
+ ],
53
+ cwd=rd,
54
+ )
55
+
56
+ subprocess.check_call(
57
+ [
58
+ 'git',
59
+ *git_opts,
60
+ 'checkout',
61
+ *([rev] if rev else []),
62
+ ],
63
+ cwd=rd,
64
+ )
65
+
66
+
67
+ def get_git_revision(
68
+ *,
69
+ cwd: ta.Optional[str] = None,
70
+ ) -> ta.Optional[str]:
71
+ subprocess.check_output(['git', '--version'])
72
+
73
+ if cwd is None:
74
+ cwd = os.getcwd()
75
+
76
+ if subprocess.run([ # noqa
77
+ 'git',
78
+ 'rev-parse',
79
+ '--is-inside-work-tree',
80
+ ], stderr=subprocess.PIPE).returncode:
81
+ return None
82
+
83
+ has_untracked = bool(subprocess.check_output([
84
+ 'git',
85
+ 'ls-files',
86
+ '.',
87
+ '--exclude-standard',
88
+ '--others',
89
+ ], cwd=cwd).decode().strip())
90
+
91
+ dirty_rev = subprocess.check_output([
92
+ 'git',
93
+ 'describe',
94
+ '--match=NeVeRmAtCh',
95
+ '--always',
96
+ '--abbrev=40',
97
+ '--dirty',
98
+ ], cwd=cwd).decode().strip()
99
+
100
+ return dirty_rev + ('-untracked' if has_untracked else '')