omdev 0.0.0.dev28__py3-none-any.whl → 0.0.0.dev30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of omdev might be problematic. Click here for more details.

@@ -1,5 +1,9 @@
1
+ from .actions import ( # noqa
2
+ ExtractAction,
3
+ )
4
+
1
5
  from .cache import ( # noqa
2
- DataCache,
6
+ Cache,
3
7
  )
4
8
 
5
9
  from .defaults import ( # noqa
@@ -8,12 +12,12 @@ from .defaults import ( # noqa
8
12
  )
9
13
 
10
14
  from .manifests import ( # noqa
11
- CacheDataManifest,
15
+ Manifest,
12
16
  )
13
17
 
14
18
  from .specs import ( # noqa
15
- CacheDataSpec,
16
- GitCacheDataSpec,
17
- GithubContentCacheDataSpec,
18
- HttpCacheDataSpec,
19
+ Spec,
20
+ GitSpec,
21
+ GithubContentSpec,
22
+ UrlSpec,
19
23
  )
@@ -6,6 +6,7 @@ TODO:
6
6
  """
7
7
  import typing as ta
8
8
 
9
+ from omlish import check
9
10
  from omlish import dataclasses as dc
10
11
  from omlish import lang
11
12
  from omlish import marshal as msh
@@ -24,9 +25,14 @@ class Action(lang.Abstract, lang.Sealed):
24
25
  ##
25
26
 
26
27
 
28
+ def _non_empty_strs(v: ta.Sequence[str]) -> ta.Sequence[str]:
29
+ return [check.non_empty_str(s) for s in ([v] if isinstance(v, str) else v)]
30
+
31
+
27
32
  @dc.dataclass(frozen=True)
28
33
  class ExtractAction(Action, lang.Final):
29
- files: ta.Sequence[str]
34
+ files: ta.Sequence[str] = dc.xfield(coerce=_non_empty_strs)
35
+ keep_archive: bool = False
30
36
 
31
37
 
32
38
  ##
@@ -34,7 +40,7 @@ class ExtractAction(Action, lang.Final):
34
40
 
35
41
  @lang.cached_function
36
42
  def _install_standard_marshalling() -> None:
37
- actions_poly = msh.polymorphism_from_subclasses(Action)
43
+ actions_poly = msh.polymorphism_from_subclasses(Action, naming=msh.Naming.SNAKE, strip_suffix=True)
38
44
  msh.STANDARD_MARSHALER_FACTORIES[0:0] = [msh.PolymorphismMarshalerFactory(actions_poly)]
39
45
  msh.STANDARD_UNMARSHALER_FACTORIES[0:0] = [msh.PolymorphismUnmarshalerFactory(actions_poly)]
40
46
 
omdev/cache/data/cache.py CHANGED
@@ -2,13 +2,18 @@
2
2
  TODO:
3
3
  - mirrors
4
4
  - huggingface_hub
5
+ - datasets
6
+ - verify md5 (action)
5
7
  - stupid little progress bars
6
8
  - groups of multiple files downloaded - 'spec set'? idk
7
9
  - torchvision.datasets.FashionMNIST
10
+ - chaining? or is this compcache..
11
+ - download resume ala hf_hub
8
12
  """
9
13
  import logging
10
14
  import os.path
11
15
  import shutil
16
+ import subprocess
12
17
  import tempfile
13
18
  import urllib.parse
14
19
  import urllib.request
@@ -16,14 +21,17 @@ import urllib.request
16
21
  from omlish import check
17
22
  from omlish import lang
18
23
  from omlish import marshal as msh
24
+ from omlish import os as osu
19
25
  from omlish.formats import json
20
26
 
21
27
  from ... import git
22
- from .manifests import CacheDataManifest
23
- from .specs import CacheDataSpec
24
- from .specs import GitCacheDataSpec
25
- from .specs import GithubContentCacheDataSpec
26
- from .specs import HttpCacheDataSpec
28
+ from .actions import Action
29
+ from .actions import ExtractAction
30
+ from .manifests import Manifest
31
+ from .specs import GithubContentSpec
32
+ from .specs import GitSpec
33
+ from .specs import Spec
34
+ from .specs import UrlSpec
27
35
 
28
36
 
29
37
  log = logging.getLogger(__name__)
@@ -32,25 +40,27 @@ log = logging.getLogger(__name__)
32
40
  ##
33
41
 
34
42
 
35
- class DataCache:
43
+ class Cache:
36
44
  def __init__(self, base_dir: str) -> None:
37
45
  super().__init__()
38
46
  self._base_dir = base_dir
39
47
 
40
48
  self._items_dir = os.path.join(base_dir, 'items')
41
49
 
50
+ #
51
+
42
52
  def _fetch_url(self, url: str, out_file: str) -> None:
43
53
  log.info('Fetching url: %s -> %s', url, out_file)
44
54
 
45
55
  urllib.request.urlretrieve(url, out_file) # noqa
46
56
 
47
- def _fetch_into(self, spec: CacheDataSpec, data_dir: str) -> None:
57
+ def _fetch_into(self, spec: Spec, data_dir: str) -> None:
48
58
  log.info('Fetching spec: %s %r -> %s', spec.digest, spec, data_dir)
49
59
 
50
- if isinstance(spec, HttpCacheDataSpec):
60
+ if isinstance(spec, UrlSpec):
51
61
  self._fetch_url(spec.url, os.path.join(data_dir, spec.file_name_or_default))
52
62
 
53
- elif isinstance(spec, GithubContentCacheDataSpec):
63
+ elif isinstance(spec, GithubContentSpec):
54
64
  for repo_file in spec.files:
55
65
  out_file = os.path.join(data_dir, repo_file)
56
66
  if not os.path.abspath(out_file).startswith(os.path.abspath(data_dir)):
@@ -60,7 +70,7 @@ class DataCache:
60
70
  os.makedirs(os.path.dirname(out_file), exist_ok=True)
61
71
  self._fetch_url(url, os.path.join(data_dir, out_file))
62
72
 
63
- elif isinstance(spec, GitCacheDataSpec):
73
+ elif isinstance(spec, GitSpec):
64
74
  if not spec.subtrees:
65
75
  raise NotImplementedError
66
76
 
@@ -92,38 +102,52 @@ class DataCache:
92
102
  else:
93
103
  raise TypeError(spec)
94
104
 
95
- def _return_val(self, spec: CacheDataSpec, data_dir: str) -> str:
105
+ def _perform_action(self, action: Action, data_dir: str) -> None:
106
+ if isinstance(action, ExtractAction):
107
+ for f in action.files:
108
+ file = os.path.join(data_dir, f)
109
+ if not os.path.isfile(file):
110
+ raise Exception(f'Not file: {file}')
111
+
112
+ if file.endswith('.tar.gz'):
113
+ subprocess.check_call(['tar', 'xzf', file], cwd=data_dir)
114
+
115
+ elif file.endswith('.zip'):
116
+ subprocess.check_call(['unzip', file], cwd=data_dir)
117
+
118
+ else:
119
+ raise Exception(f'Unhandled archive extension: {file}')
120
+
121
+ if not action.keep_archive:
122
+ os.unlink(file)
123
+
124
+ else:
125
+ raise TypeError(action)
126
+
127
+ def _return_val(self, spec: Spec, data_dir: str) -> str:
96
128
  check.state(os.path.isdir(data_dir))
97
129
 
98
- if isinstance(spec, HttpCacheDataSpec):
99
- data_file = os.path.join(data_dir, spec.file_name_or_default)
100
- if not os.path.isfile(data_file):
101
- raise RuntimeError(data_file) # noqa
102
- return data_file
130
+ if any(isinstance(a, ExtractAction) for a in spec.actions):
131
+ return data_dir
103
132
 
104
- elif isinstance(spec, GithubContentCacheDataSpec):
133
+ single_file: str
134
+ if isinstance(spec, UrlSpec):
135
+ single_file = os.path.join(data_dir, spec.file_name_or_default)
136
+
137
+ elif isinstance(spec, GithubContentSpec):
105
138
  if len(spec.files) != 1:
106
139
  return data_dir
107
- data_file = os.path.join(data_dir, check.single(spec.files))
108
- if not os.path.isfile(data_file):
109
- raise RuntimeError(data_file) # noqa
110
- return data_file
140
+ single_file = os.path.join(data_dir, check.single(spec.files))
111
141
 
112
142
  else:
113
143
  return data_dir
114
144
 
115
- def get(self, spec: CacheDataSpec) -> str:
116
- os.makedirs(self._items_dir, exist_ok=True)
145
+ if not os.path.isfile(single_file):
146
+ raise RuntimeError(single_file) # noqa
117
147
 
118
- #
119
-
120
- item_dir = os.path.join(self._items_dir, spec.digest)
121
- if os.path.isdir(item_dir):
122
- data_dir = os.path.join(item_dir, 'data')
123
- return self._return_val(spec, data_dir)
124
-
125
- #
148
+ return single_file
126
149
 
150
+ def _fetch_item(self, spec: Spec, item_dir: str) -> None:
127
151
  tmp_dir = tempfile.mkdtemp()
128
152
 
129
153
  #
@@ -133,11 +157,13 @@ class DataCache:
133
157
 
134
158
  start_at = lang.utcnow()
135
159
  self._fetch_into(spec, fetch_dir)
160
+ for action in spec.actions:
161
+ self._perform_action(action, fetch_dir)
136
162
  end_at = lang.utcnow()
137
163
 
138
164
  #
139
165
 
140
- manifest = CacheDataManifest(
166
+ manifest = Manifest(
141
167
  spec,
142
168
  start_at=start_at,
143
169
  end_at=end_at,
@@ -159,5 +185,14 @@ class DataCache:
159
185
 
160
186
  shutil.move(tmp_dir, item_dir)
161
187
 
188
+ def get(self, spec: Spec) -> str:
189
+ os.makedirs(self._items_dir, exist_ok=True)
190
+
191
+ item_dir = os.path.join(self._items_dir, spec.digest)
192
+ if not os.path.isdir(item_dir):
193
+ self._fetch_item(spec, item_dir)
194
+
195
+ osu.touch(os.path.join(item_dir, 'accessed'))
196
+
162
197
  data_dir = os.path.join(item_dir, 'data')
163
198
  return self._return_val(spec, data_dir)
@@ -2,7 +2,7 @@ import os.path
2
2
 
3
3
  from omlish import lang
4
4
 
5
- from .cache import DataCache
5
+ from .cache import Cache
6
6
 
7
7
 
8
8
  ##
@@ -14,5 +14,5 @@ def default_dir() -> str:
14
14
 
15
15
 
16
16
  @lang.cached_function(lock=True)
17
- def default() -> DataCache:
18
- return DataCache(default_dir())
17
+ def default() -> Cache:
18
+ return Cache(default_dir())
@@ -6,7 +6,7 @@ from omlish import dataclasses as dc
6
6
 
7
7
  from ...git import get_git_revision
8
8
  from .consts import SERIALIZATION_VERSION
9
- from .specs import CacheDataSpec
9
+ from .specs import Spec
10
10
 
11
11
 
12
12
  ##
@@ -24,8 +24,8 @@ def _lib_revision() -> str | None:
24
24
 
25
25
 
26
26
  @dc.dataclass(frozen=True)
27
- class CacheDataManifest:
28
- spec: CacheDataSpec
27
+ class Manifest:
28
+ spec: Spec
29
29
 
30
30
  start_at: datetime.datetime = dc.field(kw_only=True)
31
31
  end_at: datetime.datetime = dc.field(kw_only=True)
omdev/cache/data/specs.py CHANGED
@@ -10,6 +10,7 @@ from omlish import lang
10
10
  from omlish import marshal as msh
11
11
  from omlish.formats import json
12
12
 
13
+ from .actions import Action
13
14
  from .consts import SERIALIZATION_VERSION
14
15
 
15
16
 
@@ -17,12 +18,14 @@ from .consts import SERIALIZATION_VERSION
17
18
 
18
19
 
19
20
  @dc.dataclass(frozen=True)
20
- class CacheDataSpec(lang.Abstract, lang.Sealed):
21
+ class Spec(lang.Abstract, lang.Sealed):
21
22
  serialization_version: int = dc.field(default=SERIALIZATION_VERSION, kw_only=True)
22
23
 
24
+ actions: ta.Sequence[Action] = dc.field(default=(), kw_only=True)
25
+
23
26
  @cached.property
24
27
  def json(self) -> str:
25
- return json.dumps_compact(msh.marshal(self, CacheDataSpec), sort_keys=True)
28
+ return json.dumps_compact(msh.marshal(self, Spec), sort_keys=True)
26
29
 
27
30
  @cached.property
28
31
  def digest(self) -> str:
@@ -39,7 +42,7 @@ def _maybe_sorted_strs(v: ta.Iterable[str] | None) -> ta.Sequence[str] | None:
39
42
 
40
43
 
41
44
  @dc.dataclass(frozen=True)
42
- class GitCacheDataSpec(CacheDataSpec):
45
+ class GitSpec(Spec):
43
46
  url: str
44
47
 
45
48
  branch: str | None = dc.field(default=None, kw_only=True)
@@ -52,7 +55,7 @@ class GitCacheDataSpec(CacheDataSpec):
52
55
 
53
56
 
54
57
  @dc.dataclass(frozen=True)
55
- class HttpCacheDataSpec(CacheDataSpec):
58
+ class UrlSpec(Spec):
56
59
  url: str = dc.xfield(validate=lambda u: bool(urllib.parse.urlparse(u)))
57
60
  file_name: str | None = None
58
61
 
@@ -74,7 +77,7 @@ def _repo_str(s: str) -> str:
74
77
 
75
78
 
76
79
  @dc.dataclass(frozen=True)
77
- class GithubContentCacheDataSpec(CacheDataSpec):
80
+ class GithubContentSpec(Spec):
78
81
  repo: str = dc.field(validate=_repo_str) # type: ignore
79
82
  rev: str
80
83
  files: lang.SequenceNotStr[str]
@@ -85,7 +88,7 @@ class GithubContentCacheDataSpec(CacheDataSpec):
85
88
 
86
89
  @lang.cached_function
87
90
  def _install_standard_marshalling() -> None:
88
- specs_poly = msh.polymorphism_from_subclasses(CacheDataSpec)
91
+ specs_poly = msh.polymorphism_from_subclasses(Spec, naming=msh.Naming.SNAKE, strip_suffix=True)
89
92
  msh.STANDARD_MARSHALER_FACTORIES[0:0] = [msh.PolymorphismMarshalerFactory(specs_poly)]
90
93
  msh.STANDARD_UNMARSHALER_FACTORIES[0:0] = [msh.PolymorphismUnmarshalerFactory(specs_poly)]
91
94
 
omdev/manifests.py CHANGED
@@ -113,7 +113,7 @@ def build_module_manifests(
113
113
  module='.'.join(['', *mod_name.split('.')[1:]]),
114
114
  attr=m.groupdict()['name'],
115
115
 
116
- file=os.path.join(*os.path.split(file)[1:]), # noqa
116
+ file=file,
117
117
  line=i + 1,
118
118
  ))
119
119
 
omdev/precheck/base.py ADDED
@@ -0,0 +1,37 @@
1
+ import abc
2
+ import dataclasses as dc
3
+ import typing as ta
4
+
5
+
6
+ PrecheckConfigT = ta.TypeVar('PrecheckConfigT', bound='Precheck.Config')
7
+
8
+
9
+ ##
10
+
11
+
12
+ @dc.dataclass(frozen=True, kw_only=True)
13
+ class PrecheckContext:
14
+ src_roots: ta.Sequence[str]
15
+
16
+
17
+ ##
18
+
19
+
20
+ class Precheck(abc.ABC, ta.Generic[PrecheckConfigT]):
21
+ @dc.dataclass(frozen=True)
22
+ class Config:
23
+ pass
24
+
25
+ def __init__(self, context: PrecheckContext, config: PrecheckConfigT) -> None:
26
+ super().__init__()
27
+ self._context = context
28
+ self._config = config
29
+
30
+ @dc.dataclass(frozen=True)
31
+ class Violation:
32
+ pc: 'Precheck'
33
+ msg: str
34
+
35
+ @abc.abstractmethod
36
+ def run(self) -> ta.AsyncIterator[Violation]:
37
+ raise NotImplementedError
omdev/precheck/git.py ADDED
@@ -0,0 +1,34 @@
1
+ import asyncio
2
+ import dataclasses as dc
3
+ import typing as ta
4
+
5
+ from .base import Precheck
6
+ from .base import PrecheckContext
7
+
8
+
9
+ ##
10
+
11
+
12
+ class GitBlacklistPrecheck(Precheck['GitBlacklistPrecheck.Config']):
13
+ """
14
+ TODO:
15
+ - globs
16
+ - regex
17
+ """
18
+
19
+ @dc.dataclass(frozen=True)
20
+ class Config(Precheck.Config):
21
+ files: ta.Sequence[str] = (
22
+ '.env',
23
+ 'secrets.yml',
24
+ )
25
+
26
+ def __init__(self, context: PrecheckContext, config: Config = Config()) -> None:
27
+ super().__init__(context, config)
28
+
29
+ async def run(self) -> ta.AsyncGenerator[Precheck.Violation, None]:
30
+ for f in self._config.files:
31
+ proc = await asyncio.create_subprocess_exec('git', 'status', '-s', f)
32
+ await proc.communicate()
33
+ if proc.returncode:
34
+ yield Precheck.Violation(self, f)
omdev/precheck/lite.py ADDED
@@ -0,0 +1,135 @@
1
+ import asyncio
2
+ import dataclasses as dc
3
+ import glob
4
+ import inspect
5
+ import logging
6
+ import os.path
7
+ import subprocess
8
+ import textwrap
9
+ import typing as ta
10
+
11
+ from omdev import findmagic
12
+ from omlish import cached
13
+
14
+ from .base import Precheck
15
+ from .base import PrecheckContext
16
+
17
+
18
+ log = logging.getLogger(__name__)
19
+
20
+
21
+ ##
22
+
23
+
24
+ class LitePython8Precheck(Precheck['LitePython8Precheck.Config']):
25
+ @dc.dataclass(frozen=True)
26
+ class Config(Precheck.Config):
27
+ pass
28
+
29
+ def __init__(self, context: PrecheckContext, config: Config = Config()) -> None:
30
+ super().__init__(context, config)
31
+
32
+ #
33
+
34
+ @staticmethod
35
+ def _load_file_module(fp: str) -> None:
36
+ import os.path # noqa
37
+ import types # noqa
38
+
39
+ fp = os.path.abspath(fp)
40
+
41
+ with open(fp) as f:
42
+ src = f.read()
43
+
44
+ mn = os.path.basename(fp).rpartition('.')[0]
45
+
46
+ mod = types.ModuleType(mn)
47
+ mod.__name__ = mn
48
+ mod.__file__ = fp
49
+ mod.__builtins__ = __builtins__ # type: ignore
50
+ mod.__spec__ = None
51
+
52
+ code = compile(src, fp, 'exec')
53
+ exec(code, mod.__dict__, mod.__dict__)
54
+
55
+ @cached.function
56
+ def _load_file_module_payload(self) -> str:
57
+ return '\n'.join([
58
+ 'import sys',
59
+ 'fp = sys.argv[-1]',
60
+ '',
61
+ textwrap.dedent('\n'.join(inspect.getsource(LitePython8Precheck._load_file_module).splitlines()[2:])),
62
+ ])
63
+
64
+ #
65
+
66
+ async def _run_script(self, fp: str) -> list[Precheck.Violation]:
67
+ log.debug('%s: loading script %s', self.__class__.__name__, fp)
68
+
69
+ vs: list[Precheck.Violation] = []
70
+
71
+ proc = await asyncio.create_subprocess_exec(
72
+ '.venvs/8/bin/python',
73
+ '-c',
74
+ self._load_file_module_payload(),
75
+ fp,
76
+ stderr=subprocess.PIPE,
77
+ )
78
+
79
+ _, stderr = await proc.communicate()
80
+ if proc.returncode != 0:
81
+ vs.append(Precheck.Violation(self, f'lite script {fp} failed to load in python8: {stderr.decode()}'))
82
+
83
+ return vs
84
+
85
+ async def _run_one_module(self, fp: str) -> list[Precheck.Violation]:
86
+ vs: list[Precheck.Violation] = []
87
+
88
+ mod = fp.rpartition('.')[0].replace(os.sep, '.')
89
+
90
+ log.debug('%s: loading module %s', self.__class__.__name__, mod)
91
+
92
+ proc = await asyncio.create_subprocess_exec(
93
+ '.venvs/8/bin/python',
94
+ '-c',
95
+ f'import {mod}',
96
+ stderr=subprocess.PIPE,
97
+ )
98
+
99
+ _, stderr = await proc.communicate()
100
+ if proc.returncode != 0:
101
+ vs.append(Precheck.Violation(self, f'lite module {fp} failed to import in python8: {stderr.decode()}')) # noqa
102
+
103
+ return vs
104
+
105
+ async def _run_module(self, fp: str) -> list[Precheck.Violation]:
106
+ vs: list[Precheck.Violation] = []
107
+
108
+ if fp.endswith('__init__.py'):
109
+ pfps = glob.glob(os.path.join(os.path.dirname(fp), '**/*.py'), recursive=True)
110
+ else:
111
+ pfps = [fp]
112
+
113
+ for pfp in pfps:
114
+ vs.extend(await self._run_one_module(pfp))
115
+
116
+ return vs
117
+
118
+ async def run(self) -> ta.AsyncGenerator[Precheck.Violation, None]:
119
+ for fp in findmagic.find_magic(
120
+ self._context.src_roots,
121
+ ['# @omlish-lite'],
122
+ ['py'],
123
+ ):
124
+ with open(fp) as f: # noqa # FIXME
125
+ src = f.read()
126
+
127
+ is_script = '# @omlish-script' in src.splitlines()
128
+
129
+ if is_script:
130
+ for v in await self._run_script(fp):
131
+ yield v
132
+
133
+ else:
134
+ for v in await self._run_module(fp):
135
+ yield v