omdev 0.0.0.dev221__py3-none-any.whl → 0.0.0.dev223__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. omdev/ci/cache.py +40 -23
  2. omdev/ci/ci.py +49 -109
  3. omdev/ci/cli.py +24 -23
  4. omdev/ci/docker/__init__.py +0 -0
  5. omdev/ci/docker/buildcaching.py +69 -0
  6. omdev/ci/docker/cache.py +57 -0
  7. omdev/ci/{docker.py → docker/cmds.py} +1 -44
  8. omdev/ci/docker/imagepulling.py +64 -0
  9. omdev/ci/docker/inject.py +37 -0
  10. omdev/ci/docker/utils.py +48 -0
  11. omdev/ci/github/cache.py +15 -5
  12. omdev/ci/github/inject.py +30 -0
  13. omdev/ci/inject.py +61 -0
  14. omdev/dataserver/__init__.py +1 -0
  15. omdev/dataserver/handlers.py +198 -0
  16. omdev/dataserver/http.py +69 -0
  17. omdev/dataserver/routes.py +49 -0
  18. omdev/dataserver/server.py +90 -0
  19. omdev/dataserver/targets.py +89 -0
  20. omdev/oci/__init__.py +0 -0
  21. omdev/oci/building.py +221 -0
  22. omdev/oci/compression.py +8 -0
  23. omdev/oci/data.py +151 -0
  24. omdev/oci/datarefs.py +138 -0
  25. omdev/oci/dataserver.py +61 -0
  26. omdev/oci/loading.py +142 -0
  27. omdev/oci/media.py +179 -0
  28. omdev/oci/packing.py +381 -0
  29. omdev/oci/repositories.py +159 -0
  30. omdev/oci/tars.py +144 -0
  31. omdev/pyproject/resources/python.sh +1 -1
  32. omdev/scripts/ci.py +1841 -384
  33. omdev/scripts/interp.py +100 -22
  34. omdev/scripts/pyproject.py +122 -28
  35. {omdev-0.0.0.dev221.dist-info → omdev-0.0.0.dev223.dist-info}/METADATA +2 -2
  36. {omdev-0.0.0.dev221.dist-info → omdev-0.0.0.dev223.dist-info}/RECORD +40 -15
  37. {omdev-0.0.0.dev221.dist-info → omdev-0.0.0.dev223.dist-info}/LICENSE +0 -0
  38. {omdev-0.0.0.dev221.dist-info → omdev-0.0.0.dev223.dist-info}/WHEEL +0 -0
  39. {omdev-0.0.0.dev221.dist-info → omdev-0.0.0.dev223.dist-info}/entry_points.txt +0 -0
  40. {omdev-0.0.0.dev221.dist-info → omdev-0.0.0.dev223.dist-info}/top_level.txt +0 -0
omdev/oci/loading.py ADDED
@@ -0,0 +1,142 @@
1
+ # ruff: noqa: UP006 UP007
2
+ # @omlish-lite
3
+ import dataclasses as dc
4
+ import json
5
+ import typing as ta
6
+
7
+ from omlish.lite.check import check
8
+
9
+ from .data import OciImageConfig
10
+ from .data import OciImageIndex
11
+ from .data import OciImageLayer
12
+ from .data import OciImageManifest
13
+ from .data import is_empty_oci_dataclass
14
+ from .media import OCI_IMAGE_LAYER_KIND_MEDIA_TYPES_
15
+ from .media import OCI_MEDIA_FIELDS
16
+ from .media import OciMediaDescriptor
17
+ from .media import OciMediaImageConfig
18
+ from .media import OciMediaImageIndex
19
+ from .media import OciMediaImageManifest
20
+ from .media import unmarshal_oci_media_dataclass
21
+ from .repositories import FileOciRepository
22
+ from .repositories import OciRepository
23
+
24
+
25
+ T = ta.TypeVar('T')
26
+
27
+
28
+ ##
29
+
30
+
31
+ class OciRepositoryLoader:
32
+ def __init__(
33
+ self,
34
+ repo: OciRepository,
35
+ ) -> None:
36
+ super().__init__()
37
+
38
+ self._repo = repo
39
+
40
+ #
41
+
42
+ def load_object(
43
+ self,
44
+ data: bytes,
45
+ cls: ta.Type[T] = object, # type: ignore[assignment]
46
+ *,
47
+ media_type: ta.Optional[str] = None,
48
+ ) -> T:
49
+ text = data.decode('utf-8')
50
+ dct = json.loads(text)
51
+ obj = unmarshal_oci_media_dataclass(
52
+ dct,
53
+ media_type=media_type,
54
+ )
55
+ return check.isinstance(obj, cls)
56
+
57
+ def read_object(
58
+ self,
59
+ digest: str,
60
+ cls: ta.Type[T] = object, # type: ignore[assignment]
61
+ *,
62
+ media_type: ta.Optional[str] = None,
63
+ ) -> T:
64
+ data = self._repo.read_blob(digest)
65
+ return self.load_object(
66
+ data,
67
+ cls,
68
+ media_type=media_type,
69
+ )
70
+
71
+ def read_descriptor(
72
+ self,
73
+ desc: OciMediaDescriptor,
74
+ cls: ta.Type[T] = object, # type: ignore[assignment]
75
+ ) -> ta.Any:
76
+ return self.read_object(
77
+ desc.digest,
78
+ cls,
79
+ media_type=desc.media_type,
80
+ )
81
+
82
+ #
83
+
84
+ def from_media(self, obj: ta.Any) -> ta.Any:
85
+ def make_kw(*exclude):
86
+ return {
87
+ a: getattr(obj, a)
88
+ for f in dc.fields(obj)
89
+ if (a := f.name) not in OCI_MEDIA_FIELDS
90
+ and a not in exclude
91
+ }
92
+
93
+ if isinstance(obj, OciMediaImageConfig):
94
+ return OciImageConfig(**make_kw())
95
+
96
+ elif isinstance(obj, OciMediaImageManifest):
97
+ return OciImageManifest(
98
+ **make_kw('config', 'layers'),
99
+ config=self.from_media(self.read_descriptor(obj.config)),
100
+ layers=[
101
+ OciImageLayer(
102
+ kind=lk,
103
+ data=self._repo.ref_blob(l.digest),
104
+ )
105
+ for l in obj.layers
106
+ if (lk := OCI_IMAGE_LAYER_KIND_MEDIA_TYPES_.get(l.media_type)) is not None
107
+ ],
108
+ )
109
+
110
+ elif isinstance(obj, OciMediaImageIndex):
111
+ return OciImageIndex(
112
+ **make_kw('manifests'),
113
+ manifests=[
114
+ fm
115
+ for m in obj.manifests
116
+ if self._repo.contains_blob(m.digest)
117
+ for fm in [self.from_media(self.read_descriptor(m))]
118
+ if not is_empty_oci_dataclass(fm)
119
+ ],
120
+ )
121
+
122
+ else:
123
+ raise TypeError(obj)
124
+
125
+
126
+ ##
127
+
128
+
129
+ def read_oci_repository_root_index(
130
+ obj: ta.Any,
131
+ *,
132
+ file_name: str = 'index.json',
133
+ ) -> OciImageIndex:
134
+ file_repo = check.isinstance(OciRepository.of(obj), FileOciRepository)
135
+
136
+ repo_ldr = OciRepositoryLoader(file_repo)
137
+
138
+ media_image_idx = repo_ldr.load_object(file_repo.read_file(file_name), OciMediaImageIndex)
139
+
140
+ image_idx = repo_ldr.from_media(media_image_idx)
141
+
142
+ return check.isinstance(image_idx, OciImageIndex)
omdev/oci/media.py ADDED
@@ -0,0 +1,179 @@
1
+ # ruff: noqa: UP006 UP007
2
+ # @omlish-lite
3
+ import abc
4
+ import dataclasses as dc
5
+ import typing as ta
6
+
7
+ from omlish.lite.check import check
8
+ from omlish.lite.marshal import OBJ_MARSHALER_FIELD_KEY
9
+ from omlish.lite.marshal import OBJ_MARSHALER_OMIT_IF_NONE
10
+ from omlish.lite.marshal import unmarshal_obj
11
+
12
+ from .data import OciImageConfig
13
+ from .data import OciImageLayer
14
+
15
+
16
+ ##
17
+
18
+
19
+ OCI_MEDIA_FIELDS: ta.Collection[str] = frozenset([
20
+ 'schema_version',
21
+ 'media_type',
22
+ ])
23
+
24
+
25
+ @dc.dataclass()
26
+ class OciMediaDataclass(abc.ABC): # noqa
27
+ SCHEMA_VERSION: ta.ClassVar[int]
28
+
29
+ @property
30
+ def schema_version(self) -> int:
31
+ raise TypeError
32
+
33
+ MEDIA_TYPE: ta.ClassVar[str]
34
+
35
+ @property
36
+ def media_type(self) -> str:
37
+ raise TypeError
38
+
39
+ #
40
+
41
+ def __init_subclass__(cls, **kwargs: ta.Any) -> None:
42
+ super().__init_subclass__(**kwargs)
43
+ for a in OCI_MEDIA_FIELDS:
44
+ check.in_(a, cls.__dict__)
45
+
46
+
47
+ _REGISTERED_OCI_MEDIA_DATACLASSES: ta.Dict[str, ta.Type[OciMediaDataclass]] = {}
48
+
49
+
50
+ def _register_oci_media_dataclass(cls):
51
+ check.issubclass(cls, OciMediaDataclass)
52
+ check.arg(dc.is_dataclass(cls))
53
+ mt = check.non_empty_str(cls.__dict__['MEDIA_TYPE'])
54
+ check.not_in(mt, _REGISTERED_OCI_MEDIA_DATACLASSES)
55
+ _REGISTERED_OCI_MEDIA_DATACLASSES[mt] = cls
56
+ return cls
57
+
58
+
59
+ def get_registered_oci_media_dataclass(media_type: str) -> ta.Optional[ta.Type[OciMediaDataclass]]:
60
+ return _REGISTERED_OCI_MEDIA_DATACLASSES.get(media_type)
61
+
62
+
63
+ def unmarshal_oci_media_dataclass(
64
+ dct: ta.Mapping[str, ta.Any],
65
+ *,
66
+ media_type: ta.Optional[str] = None,
67
+ ) -> ta.Any:
68
+ if media_type is None:
69
+ media_type = check.non_empty_str(dct['mediaType'])
70
+ cls = _REGISTERED_OCI_MEDIA_DATACLASSES[media_type]
71
+ return unmarshal_obj(dct, cls)
72
+
73
+
74
+ ##
75
+
76
+
77
+ @dc.dataclass()
78
+ class OciMediaDescriptor:
79
+ """https://github.com/opencontainers/image-spec/blob/92353b0bee778725c617e7d57317b568a7796bd0/descriptor.md#properties""" # noqa
80
+
81
+ media_type: str = dc.field(metadata={OBJ_MARSHALER_FIELD_KEY: 'mediaType'})
82
+ digest: str
83
+ size: int
84
+
85
+ #
86
+
87
+ urls: ta.Optional[ta.Sequence[str]] = dc.field(default=None, metadata={OBJ_MARSHALER_OMIT_IF_NONE: True})
88
+ annotations: ta.Optional[ta.Mapping[str, str]] = dc.field(default=None, metadata={OBJ_MARSHALER_OMIT_IF_NONE: True}) # noqa
89
+ data: ta.Optional[str] = dc.field(default=None, metadata={OBJ_MARSHALER_OMIT_IF_NONE: True})
90
+ artifact_type: ta.Optional[str] = dc.field(default=None, metadata={OBJ_MARSHALER_FIELD_KEY: 'artifactType', OBJ_MARSHALER_OMIT_IF_NONE: True}) # noqa
91
+
92
+ #
93
+
94
+ platform: ta.Optional[ta.Mapping[str, ta.Any]] = dc.field(default=None, metadata={OBJ_MARSHALER_OMIT_IF_NONE: True}) # noqa
95
+
96
+
97
+ ##
98
+
99
+
100
+ @_register_oci_media_dataclass
101
+ @dc.dataclass()
102
+ class OciMediaImageIndex(OciMediaDataclass):
103
+ """https://github.com/opencontainers/image-spec/blob/92353b0bee778725c617e7d57317b568a7796bd0/image-index.md"""
104
+
105
+ manifests: ta.Sequence[OciMediaDescriptor] # -> OciMediaImageIndex | OciMediaImageManifest
106
+
107
+ #
108
+
109
+ annotations: ta.Optional[ta.Mapping[str, str]] = dc.field(default=None, metadata={OBJ_MARSHALER_OMIT_IF_NONE: True}) # noqa
110
+
111
+ #
112
+
113
+ SCHEMA_VERSION: ta.ClassVar[int] = 2
114
+ schema_version: int = dc.field(default=SCHEMA_VERSION, metadata={OBJ_MARSHALER_FIELD_KEY: 'schemaVersion'})
115
+
116
+ MEDIA_TYPE: ta.ClassVar[str] = 'application/vnd.oci.image.index.v1+json'
117
+ media_type: str = dc.field(default=MEDIA_TYPE, metadata={OBJ_MARSHALER_FIELD_KEY: 'mediaType'})
118
+
119
+
120
+ #
121
+
122
+
123
+ @_register_oci_media_dataclass
124
+ @dc.dataclass()
125
+ class OciMediaImageManifest(OciMediaDataclass):
126
+ """https://github.com/opencontainers/image-spec/blob/92353b0bee778725c617e7d57317b568a7796bd0/manifest.md"""
127
+
128
+ config: OciMediaDescriptor # -> OciMediaImageConfig
129
+
130
+ layers: ta.Sequence[OciMediaDescriptor]
131
+
132
+ #
133
+
134
+ annotations: ta.Optional[ta.Mapping[str, str]] = dc.field(default=None, metadata={OBJ_MARSHALER_OMIT_IF_NONE: True}) # noqa
135
+
136
+ #
137
+
138
+ SCHEMA_VERSION: ta.ClassVar[int] = 2
139
+ schema_version: int = dc.field(default=SCHEMA_VERSION, metadata={OBJ_MARSHALER_FIELD_KEY: 'schemaVersion'})
140
+
141
+ MEDIA_TYPE: ta.ClassVar[str] = 'application/vnd.oci.image.manifest.v1+json'
142
+ media_type: str = dc.field(default=MEDIA_TYPE, metadata={OBJ_MARSHALER_FIELD_KEY: 'mediaType'})
143
+
144
+
145
+ #
146
+
147
+
148
+ OCI_IMAGE_LAYER_KIND_MEDIA_TYPES: ta.Mapping[OciImageLayer.Kind, str] = {
149
+ OciImageLayer.Kind.TAR: 'application/vnd.oci.image.layer.v1.tar',
150
+ OciImageLayer.Kind.TAR_GZIP: 'application/vnd.oci.image.layer.v1.tar+gzip',
151
+ OciImageLayer.Kind.TAR_ZSTD: 'application/vnd.oci.image.layer.v1.tar+zstd',
152
+ }
153
+
154
+ OCI_IMAGE_LAYER_KIND_MEDIA_TYPES_: ta.Mapping[str, OciImageLayer.Kind] = {
155
+ v: k
156
+ for k, v in OCI_IMAGE_LAYER_KIND_MEDIA_TYPES.items()
157
+ }
158
+
159
+
160
+ #
161
+
162
+
163
+ @_register_oci_media_dataclass
164
+ @dc.dataclass()
165
+ class OciMediaImageConfig(OciImageConfig, OciMediaDataclass):
166
+ SCHEMA_VERSION: ta.ClassVar[int] = 2
167
+ schema_version: int = dc.field(default=SCHEMA_VERSION, metadata={OBJ_MARSHALER_FIELD_KEY: 'schemaVersion'})
168
+
169
+ MEDIA_TYPE: ta.ClassVar[str] = 'application/vnd.oci.image.config.v1+json'
170
+ media_type: str = dc.field(default=MEDIA_TYPE, metadata={OBJ_MARSHALER_FIELD_KEY: 'mediaType'})
171
+
172
+
173
+ ##
174
+
175
+
176
+ OCI_MANIFEST_MEDIA_TYPES: ta.AbstractSet[str] = frozenset([
177
+ OciMediaImageIndex.MEDIA_TYPE,
178
+ OciMediaImageManifest.MEDIA_TYPE,
179
+ ])
omdev/oci/packing.py ADDED
@@ -0,0 +1,381 @@
1
+ # ruff: noqa: UP006 UP007
2
+ # @omlish-lite
3
+ import contextlib
4
+ import heapq
5
+ import os.path
6
+ import tarfile
7
+ import typing as ta
8
+
9
+ from omlish.lite.cached import cached_nullary
10
+ from omlish.lite.check import check
11
+ from omlish.lite.contextmanagers import ExitStacked
12
+
13
+ from .compression import OciCompression
14
+ from .tars import OciDataTarWriter
15
+ from .tars import WrittenOciDataTarFileInfo
16
+
17
+
18
+ ##
19
+
20
+
21
+ class OciLayerUnpacker(ExitStacked):
22
+ def __init__(
23
+ self,
24
+ input_files: ta.Sequence[ta.Union[str, tarfile.TarFile]],
25
+ output_file_path: str,
26
+ ) -> None:
27
+ super().__init__()
28
+
29
+ self._input_files = list(input_files)
30
+ self._output_file_path = output_file_path
31
+
32
+ #
33
+
34
+ @contextlib.contextmanager
35
+ def _open_input_file(self, input_file: ta.Union[str, tarfile.TarFile]) -> ta.Iterator[tarfile.TarFile]:
36
+ if isinstance(input_file, tarfile.TarFile):
37
+ yield input_file
38
+
39
+ elif isinstance(input_file, str):
40
+ with tarfile.open(input_file) as tar_file:
41
+ yield tar_file
42
+
43
+ else:
44
+ raise TypeError(input_file)
45
+
46
+ #
47
+
48
+ class _Entry(ta.NamedTuple):
49
+ file: ta.Union[str, tarfile.TarFile]
50
+ info: tarfile.TarInfo
51
+
52
+ def _build_input_file_sorted_entries(self, input_file: ta.Union[str, tarfile.TarFile]) -> ta.Sequence[_Entry]:
53
+ dct: ta.Dict[str, OciLayerUnpacker._Entry] = {}
54
+
55
+ with self._open_input_file(input_file) as input_tar_file:
56
+ for info in input_tar_file.getmembers():
57
+ check.not_in(info.name, dct)
58
+ dct[info.name] = self._Entry(
59
+ file=input_file,
60
+ info=info,
61
+ )
62
+
63
+ return sorted(dct.values(), key=lambda entry: entry.info.name)
64
+
65
+ @cached_nullary
66
+ def _entries_by_name(self) -> ta.Mapping[str, _Entry]:
67
+ root: dict = {}
68
+
69
+ def find_dir(dir_name: str) -> dict: # noqa
70
+ if dir_name:
71
+ dir_parts = dir_name.split('/')
72
+ else:
73
+ dir_parts = []
74
+
75
+ cur = root # noqa
76
+ for dir_part in dir_parts:
77
+ cur = cur[dir_part] # noqa
78
+
79
+ return check.isinstance(cur, dict)
80
+
81
+ #
82
+
83
+ for input_file in self._input_files:
84
+ sorted_entries = self._build_input_file_sorted_entries(input_file)
85
+
86
+ wh_names = set()
87
+ wh_opaques = set()
88
+
89
+ #
90
+
91
+ for entry in sorted_entries:
92
+ info = entry.info
93
+ name = check.non_empty_str(info.name)
94
+ base_name = os.path.basename(name)
95
+ dir_name = os.path.dirname(name)
96
+
97
+ if base_name == '.wh..wh..opq':
98
+ wh_opaques.add(dir_name)
99
+ continue
100
+
101
+ if base_name.startswith('.wh.'):
102
+ wh_base_name = os.path.basename(base_name[4:])
103
+ wh_name = os.path.join(dir_name, wh_base_name)
104
+ wh_names.add(wh_name)
105
+ continue
106
+
107
+ cur = find_dir(dir_name)
108
+
109
+ if info.type == tarfile.DIRTYPE:
110
+ try:
111
+ ex = cur[base_name]
112
+ except KeyError:
113
+ cur[base_name] = {'': entry}
114
+ else:
115
+ ex[''] = entry
116
+
117
+ else:
118
+ cur[base_name] = entry
119
+
120
+ #
121
+
122
+ for wh_name in reversed(sorted(wh_names)): # noqa
123
+ wh_dir_name = os.path.dirname(wh_name)
124
+ wh_base_name = os.path.basename(wh_name)
125
+
126
+ cur = find_dir(wh_dir_name)
127
+ rm = cur[wh_base_name]
128
+
129
+ if isinstance(rm, dict):
130
+ # Whiteouts wipe out whole directory:
131
+ # https://github.com/containerd/containerd/blob/59c8cf6ea5f4175ad512914dd5ce554942bf144f/pkg/archive/tar_test.go#L648
132
+ # check.equal(set(rm), '')
133
+ del cur[wh_base_name]
134
+
135
+ elif isinstance(rm, self._Entry):
136
+ del cur[wh_base_name]
137
+
138
+ else:
139
+ raise TypeError(rm)
140
+
141
+ if wh_opaques:
142
+ raise NotImplementedError
143
+
144
+ #
145
+
146
+ out: ta.Dict[str, OciLayerUnpacker._Entry] = {}
147
+
148
+ def rec(cur): # noqa
149
+ for _, child in sorted(cur.items(), key=lambda t: t[0]):
150
+ if isinstance(child, dict):
151
+ rec(child)
152
+
153
+ elif isinstance(child, self._Entry):
154
+ check.not_in(child.info.name, out)
155
+ out[child.info.name] = child
156
+
157
+ else:
158
+ raise TypeError(child)
159
+
160
+ rec(root)
161
+
162
+ return out
163
+
164
+ #
165
+
166
+ @cached_nullary
167
+ def _output_tar_file(self) -> tarfile.TarFile:
168
+ return self._enter_context(tarfile.open(self._output_file_path, 'w'))
169
+
170
+ #
171
+
172
+ def _add_unpacked_entry(
173
+ self,
174
+ input_tar_file: tarfile.TarFile,
175
+ info: tarfile.TarInfo,
176
+ ) -> None:
177
+ base_name = os.path.basename(info.name)
178
+ check.state(not base_name.startswith('.wh.'))
179
+
180
+ if info.type in tarfile.REGULAR_TYPES:
181
+ with check.not_none(input_tar_file.extractfile(info)) as f:
182
+ self._output_tar_file().addfile(info, f)
183
+
184
+ else:
185
+ self._output_tar_file().addfile(info)
186
+
187
+ def _unpack_file(
188
+ self,
189
+ input_file: ta.Union[str, tarfile.TarFile],
190
+ ) -> None:
191
+ entries_by_name = self._entries_by_name()
192
+
193
+ with self._open_input_file(input_file) as input_tar_file:
194
+ info: tarfile.TarInfo
195
+ for info in input_tar_file.getmembers():
196
+ try:
197
+ entry = entries_by_name[info.name]
198
+ except KeyError:
199
+ continue
200
+
201
+ if entry.file != input_file:
202
+ continue
203
+
204
+ self._add_unpacked_entry(input_tar_file, info)
205
+
206
+ @cached_nullary
207
+ def write(self) -> None:
208
+ for input_file in self._input_files:
209
+ self._unpack_file(input_file)
210
+
211
+
212
+ #
213
+
214
+
215
+ class OciLayerPacker(ExitStacked):
216
+ def __init__(
217
+ self,
218
+ input_file_path: str,
219
+ output_file_paths: ta.Sequence[str],
220
+ *,
221
+ compression: ta.Optional[OciCompression] = None,
222
+ ) -> None:
223
+ super().__init__()
224
+
225
+ self._input_file_path = input_file_path
226
+ self._output_file_paths = list(output_file_paths)
227
+ self._compression = compression
228
+
229
+ self._output_file_indexes_by_name: ta.Dict[str, int] = {}
230
+
231
+ #
232
+
233
+ @cached_nullary
234
+ def _input_tar_file(self) -> tarfile.TarFile:
235
+ # FIXME: check uncompressed
236
+ return self._enter_context(tarfile.open(self._input_file_path))
237
+
238
+ #
239
+
240
+ @cached_nullary
241
+ def _entries_by_name(self) -> ta.Mapping[str, tarfile.TarInfo]:
242
+ return {
243
+ info.name: info
244
+ for info in self._input_tar_file().getmembers()
245
+ }
246
+
247
+ #
248
+
249
+ class _CategorizedEntries(ta.NamedTuple):
250
+ files_by_name: ta.Mapping[str, tarfile.TarInfo]
251
+ non_files_by_name: ta.Mapping[str, tarfile.TarInfo]
252
+ links_by_name: ta.Mapping[str, tarfile.TarInfo]
253
+
254
+ @cached_nullary
255
+ def _categorized_entries(self) -> _CategorizedEntries:
256
+ files_by_name: ta.Dict[str, tarfile.TarInfo] = {}
257
+ non_files_by_name: ta.Dict[str, tarfile.TarInfo] = {}
258
+ links_by_name: ta.Dict[str, tarfile.TarInfo] = {}
259
+
260
+ for name, info in self._entries_by_name().items():
261
+ if info.type in tarfile.REGULAR_TYPES:
262
+ files_by_name[name] = info
263
+ elif info.type in (tarfile.LNKTYPE, tarfile.GNUTYPE_LONGLINK):
264
+ links_by_name[name] = info
265
+ else:
266
+ non_files_by_name[name] = info
267
+
268
+ return self._CategorizedEntries(
269
+ files_by_name=files_by_name,
270
+ non_files_by_name=non_files_by_name,
271
+ links_by_name=links_by_name,
272
+ )
273
+
274
+ #
275
+
276
+ @cached_nullary
277
+ def _non_files_sorted_by_name(self) -> ta.Sequence[tarfile.TarInfo]:
278
+ return sorted(
279
+ self._categorized_entries().non_files_by_name.values(),
280
+ key=lambda info: info.name,
281
+ )
282
+
283
+ @cached_nullary
284
+ def _files_descending_by_size(self) -> ta.Sequence[tarfile.TarInfo]:
285
+ return sorted(
286
+ self._categorized_entries().files_by_name.values(),
287
+ key=lambda info: -check.isinstance(info.size, int),
288
+ )
289
+
290
+ #
291
+
292
+ @cached_nullary
293
+ def _output_files(self) -> ta.Sequence[ta.BinaryIO]:
294
+ return [
295
+ self._enter_context(open(output_file_path, 'wb'))
296
+ for output_file_path in self._output_file_paths
297
+ ]
298
+
299
+ @cached_nullary
300
+ def _output_tar_writers(self) -> ta.Sequence[OciDataTarWriter]:
301
+ return [
302
+ self._enter_context(
303
+ OciDataTarWriter(
304
+ output_file,
305
+ compression=self._compression,
306
+ ),
307
+ )
308
+ for output_file in self._output_files()
309
+ ]
310
+
311
+ #
312
+
313
+ def _write_entry(
314
+ self,
315
+ info: tarfile.TarInfo,
316
+ output_file_idx: int,
317
+ ) -> None:
318
+ check.not_in(info.name, self._output_file_indexes_by_name)
319
+
320
+ writer = self._output_tar_writers()[output_file_idx]
321
+
322
+ if info.type in tarfile.REGULAR_TYPES:
323
+ with check.not_none(self._input_tar_file().extractfile(info)) as f:
324
+ writer.add_file(info, f) # type: ignore
325
+
326
+ else:
327
+ writer.add_file(info)
328
+
329
+ self._output_file_indexes_by_name[info.name] = output_file_idx
330
+
331
+ @cached_nullary
332
+ def _write_non_files(self) -> None:
333
+ for non_file in self._non_files_sorted_by_name():
334
+ self._write_entry(non_file, 0)
335
+
336
+ @cached_nullary
337
+ def _write_files(self) -> None:
338
+ writers = self._output_tar_writers()
339
+
340
+ bins = [
341
+ (writer.info().compressed_sz, i)
342
+ for i, writer in enumerate(writers)
343
+ ]
344
+
345
+ heapq.heapify(bins)
346
+
347
+ for file in self._files_descending_by_size():
348
+ _, bin_index = heapq.heappop(bins)
349
+
350
+ writer = writers[bin_index]
351
+
352
+ self._write_entry(file, bin_index)
353
+
354
+ bin_size = writer.info().compressed_sz
355
+
356
+ heapq.heappush(bins, (bin_size, bin_index))
357
+
358
+ @cached_nullary
359
+ def _write_links(self) -> None:
360
+ for link in self._categorized_entries().links_by_name.values():
361
+ link_name = check.non_empty_str(link.linkname)
362
+
363
+ output_file_idx = self._output_file_indexes_by_name[link_name]
364
+
365
+ self._write_entry(link, output_file_idx)
366
+
367
+ @cached_nullary
368
+ def write(self) -> ta.Mapping[str, WrittenOciDataTarFileInfo]:
369
+ writers = self._output_tar_writers()
370
+
371
+ self._write_non_files()
372
+ self._write_files()
373
+ self._write_links()
374
+
375
+ for output_tar_writer in writers:
376
+ output_tar_writer.tar_file().close()
377
+
378
+ return {
379
+ output_file_path: output_tar_writer.info()
380
+ for output_file_path, output_tar_writer in zip(self._output_file_paths, writers)
381
+ }