omdev 0.0.0.dev221__py3-none-any.whl → 0.0.0.dev223__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. omdev/ci/cache.py +40 -23
  2. omdev/ci/ci.py +49 -109
  3. omdev/ci/cli.py +24 -23
  4. omdev/ci/docker/__init__.py +0 -0
  5. omdev/ci/docker/buildcaching.py +69 -0
  6. omdev/ci/docker/cache.py +57 -0
  7. omdev/ci/{docker.py → docker/cmds.py} +1 -44
  8. omdev/ci/docker/imagepulling.py +64 -0
  9. omdev/ci/docker/inject.py +37 -0
  10. omdev/ci/docker/utils.py +48 -0
  11. omdev/ci/github/cache.py +15 -5
  12. omdev/ci/github/inject.py +30 -0
  13. omdev/ci/inject.py +61 -0
  14. omdev/dataserver/__init__.py +1 -0
  15. omdev/dataserver/handlers.py +198 -0
  16. omdev/dataserver/http.py +69 -0
  17. omdev/dataserver/routes.py +49 -0
  18. omdev/dataserver/server.py +90 -0
  19. omdev/dataserver/targets.py +89 -0
  20. omdev/oci/__init__.py +0 -0
  21. omdev/oci/building.py +221 -0
  22. omdev/oci/compression.py +8 -0
  23. omdev/oci/data.py +151 -0
  24. omdev/oci/datarefs.py +138 -0
  25. omdev/oci/dataserver.py +61 -0
  26. omdev/oci/loading.py +142 -0
  27. omdev/oci/media.py +179 -0
  28. omdev/oci/packing.py +381 -0
  29. omdev/oci/repositories.py +159 -0
  30. omdev/oci/tars.py +144 -0
  31. omdev/pyproject/resources/python.sh +1 -1
  32. omdev/scripts/ci.py +1841 -384
  33. omdev/scripts/interp.py +100 -22
  34. omdev/scripts/pyproject.py +122 -28
  35. {omdev-0.0.0.dev221.dist-info → omdev-0.0.0.dev223.dist-info}/METADATA +2 -2
  36. {omdev-0.0.0.dev221.dist-info → omdev-0.0.0.dev223.dist-info}/RECORD +40 -15
  37. {omdev-0.0.0.dev221.dist-info → omdev-0.0.0.dev223.dist-info}/LICENSE +0 -0
  38. {omdev-0.0.0.dev221.dist-info → omdev-0.0.0.dev223.dist-info}/WHEEL +0 -0
  39. {omdev-0.0.0.dev221.dist-info → omdev-0.0.0.dev223.dist-info}/entry_points.txt +0 -0
  40. {omdev-0.0.0.dev221.dist-info → omdev-0.0.0.dev223.dist-info}/top_level.txt +0 -0
omdev/oci/loading.py ADDED
@@ -0,0 +1,142 @@
1
+ # ruff: noqa: UP006 UP007
2
+ # @omlish-lite
3
+ import dataclasses as dc
4
+ import json
5
+ import typing as ta
6
+
7
+ from omlish.lite.check import check
8
+
9
+ from .data import OciImageConfig
10
+ from .data import OciImageIndex
11
+ from .data import OciImageLayer
12
+ from .data import OciImageManifest
13
+ from .data import is_empty_oci_dataclass
14
+ from .media import OCI_IMAGE_LAYER_KIND_MEDIA_TYPES_
15
+ from .media import OCI_MEDIA_FIELDS
16
+ from .media import OciMediaDescriptor
17
+ from .media import OciMediaImageConfig
18
+ from .media import OciMediaImageIndex
19
+ from .media import OciMediaImageManifest
20
+ from .media import unmarshal_oci_media_dataclass
21
+ from .repositories import FileOciRepository
22
+ from .repositories import OciRepository
23
+
24
+
25
+ T = ta.TypeVar('T')
26
+
27
+
28
+ ##
29
+
30
+
31
+ class OciRepositoryLoader:
32
+ def __init__(
33
+ self,
34
+ repo: OciRepository,
35
+ ) -> None:
36
+ super().__init__()
37
+
38
+ self._repo = repo
39
+
40
+ #
41
+
42
+ def load_object(
43
+ self,
44
+ data: bytes,
45
+ cls: ta.Type[T] = object, # type: ignore[assignment]
46
+ *,
47
+ media_type: ta.Optional[str] = None,
48
+ ) -> T:
49
+ text = data.decode('utf-8')
50
+ dct = json.loads(text)
51
+ obj = unmarshal_oci_media_dataclass(
52
+ dct,
53
+ media_type=media_type,
54
+ )
55
+ return check.isinstance(obj, cls)
56
+
57
+ def read_object(
58
+ self,
59
+ digest: str,
60
+ cls: ta.Type[T] = object, # type: ignore[assignment]
61
+ *,
62
+ media_type: ta.Optional[str] = None,
63
+ ) -> T:
64
+ data = self._repo.read_blob(digest)
65
+ return self.load_object(
66
+ data,
67
+ cls,
68
+ media_type=media_type,
69
+ )
70
+
71
+ def read_descriptor(
72
+ self,
73
+ desc: OciMediaDescriptor,
74
+ cls: ta.Type[T] = object, # type: ignore[assignment]
75
+ ) -> ta.Any:
76
+ return self.read_object(
77
+ desc.digest,
78
+ cls,
79
+ media_type=desc.media_type,
80
+ )
81
+
82
+ #
83
+
84
+ def from_media(self, obj: ta.Any) -> ta.Any:
85
+ def make_kw(*exclude):
86
+ return {
87
+ a: getattr(obj, a)
88
+ for f in dc.fields(obj)
89
+ if (a := f.name) not in OCI_MEDIA_FIELDS
90
+ and a not in exclude
91
+ }
92
+
93
+ if isinstance(obj, OciMediaImageConfig):
94
+ return OciImageConfig(**make_kw())
95
+
96
+ elif isinstance(obj, OciMediaImageManifest):
97
+ return OciImageManifest(
98
+ **make_kw('config', 'layers'),
99
+ config=self.from_media(self.read_descriptor(obj.config)),
100
+ layers=[
101
+ OciImageLayer(
102
+ kind=lk,
103
+ data=self._repo.ref_blob(l.digest),
104
+ )
105
+ for l in obj.layers
106
+ if (lk := OCI_IMAGE_LAYER_KIND_MEDIA_TYPES_.get(l.media_type)) is not None
107
+ ],
108
+ )
109
+
110
+ elif isinstance(obj, OciMediaImageIndex):
111
+ return OciImageIndex(
112
+ **make_kw('manifests'),
113
+ manifests=[
114
+ fm
115
+ for m in obj.manifests
116
+ if self._repo.contains_blob(m.digest)
117
+ for fm in [self.from_media(self.read_descriptor(m))]
118
+ if not is_empty_oci_dataclass(fm)
119
+ ],
120
+ )
121
+
122
+ else:
123
+ raise TypeError(obj)
124
+
125
+
126
+ ##
127
+
128
+
129
+ def read_oci_repository_root_index(
130
+ obj: ta.Any,
131
+ *,
132
+ file_name: str = 'index.json',
133
+ ) -> OciImageIndex:
134
+ file_repo = check.isinstance(OciRepository.of(obj), FileOciRepository)
135
+
136
+ repo_ldr = OciRepositoryLoader(file_repo)
137
+
138
+ media_image_idx = repo_ldr.load_object(file_repo.read_file(file_name), OciMediaImageIndex)
139
+
140
+ image_idx = repo_ldr.from_media(media_image_idx)
141
+
142
+ return check.isinstance(image_idx, OciImageIndex)
omdev/oci/media.py ADDED
@@ -0,0 +1,179 @@
1
+ # ruff: noqa: UP006 UP007
2
+ # @omlish-lite
3
+ import abc
4
+ import dataclasses as dc
5
+ import typing as ta
6
+
7
+ from omlish.lite.check import check
8
+ from omlish.lite.marshal import OBJ_MARSHALER_FIELD_KEY
9
+ from omlish.lite.marshal import OBJ_MARSHALER_OMIT_IF_NONE
10
+ from omlish.lite.marshal import unmarshal_obj
11
+
12
+ from .data import OciImageConfig
13
+ from .data import OciImageLayer
14
+
15
+
16
+ ##
17
+
18
+
19
+ OCI_MEDIA_FIELDS: ta.Collection[str] = frozenset([
20
+ 'schema_version',
21
+ 'media_type',
22
+ ])
23
+
24
+
25
+ @dc.dataclass()
26
+ class OciMediaDataclass(abc.ABC): # noqa
27
+ SCHEMA_VERSION: ta.ClassVar[int]
28
+
29
+ @property
30
+ def schema_version(self) -> int:
31
+ raise TypeError
32
+
33
+ MEDIA_TYPE: ta.ClassVar[str]
34
+
35
+ @property
36
+ def media_type(self) -> str:
37
+ raise TypeError
38
+
39
+ #
40
+
41
+ def __init_subclass__(cls, **kwargs: ta.Any) -> None:
42
+ super().__init_subclass__(**kwargs)
43
+ for a in OCI_MEDIA_FIELDS:
44
+ check.in_(a, cls.__dict__)
45
+
46
+
47
+ _REGISTERED_OCI_MEDIA_DATACLASSES: ta.Dict[str, ta.Type[OciMediaDataclass]] = {}
48
+
49
+
50
+ def _register_oci_media_dataclass(cls):
51
+ check.issubclass(cls, OciMediaDataclass)
52
+ check.arg(dc.is_dataclass(cls))
53
+ mt = check.non_empty_str(cls.__dict__['MEDIA_TYPE'])
54
+ check.not_in(mt, _REGISTERED_OCI_MEDIA_DATACLASSES)
55
+ _REGISTERED_OCI_MEDIA_DATACLASSES[mt] = cls
56
+ return cls
57
+
58
+
59
+ def get_registered_oci_media_dataclass(media_type: str) -> ta.Optional[ta.Type[OciMediaDataclass]]:
60
+ return _REGISTERED_OCI_MEDIA_DATACLASSES.get(media_type)
61
+
62
+
63
+ def unmarshal_oci_media_dataclass(
64
+ dct: ta.Mapping[str, ta.Any],
65
+ *,
66
+ media_type: ta.Optional[str] = None,
67
+ ) -> ta.Any:
68
+ if media_type is None:
69
+ media_type = check.non_empty_str(dct['mediaType'])
70
+ cls = _REGISTERED_OCI_MEDIA_DATACLASSES[media_type]
71
+ return unmarshal_obj(dct, cls)
72
+
73
+
74
+ ##
75
+
76
+
77
+ @dc.dataclass()
78
+ class OciMediaDescriptor:
79
+ """https://github.com/opencontainers/image-spec/blob/92353b0bee778725c617e7d57317b568a7796bd0/descriptor.md#properties""" # noqa
80
+
81
+ media_type: str = dc.field(metadata={OBJ_MARSHALER_FIELD_KEY: 'mediaType'})
82
+ digest: str
83
+ size: int
84
+
85
+ #
86
+
87
+ urls: ta.Optional[ta.Sequence[str]] = dc.field(default=None, metadata={OBJ_MARSHALER_OMIT_IF_NONE: True})
88
+ annotations: ta.Optional[ta.Mapping[str, str]] = dc.field(default=None, metadata={OBJ_MARSHALER_OMIT_IF_NONE: True}) # noqa
89
+ data: ta.Optional[str] = dc.field(default=None, metadata={OBJ_MARSHALER_OMIT_IF_NONE: True})
90
+ artifact_type: ta.Optional[str] = dc.field(default=None, metadata={OBJ_MARSHALER_FIELD_KEY: 'artifactType', OBJ_MARSHALER_OMIT_IF_NONE: True}) # noqa
91
+
92
+ #
93
+
94
+ platform: ta.Optional[ta.Mapping[str, ta.Any]] = dc.field(default=None, metadata={OBJ_MARSHALER_OMIT_IF_NONE: True}) # noqa
95
+
96
+
97
+ ##
98
+
99
+
100
+ @_register_oci_media_dataclass
101
+ @dc.dataclass()
102
+ class OciMediaImageIndex(OciMediaDataclass):
103
+ """https://github.com/opencontainers/image-spec/blob/92353b0bee778725c617e7d57317b568a7796bd0/image-index.md"""
104
+
105
+ manifests: ta.Sequence[OciMediaDescriptor] # -> OciMediaImageIndex | OciMediaImageManifest
106
+
107
+ #
108
+
109
+ annotations: ta.Optional[ta.Mapping[str, str]] = dc.field(default=None, metadata={OBJ_MARSHALER_OMIT_IF_NONE: True}) # noqa
110
+
111
+ #
112
+
113
+ SCHEMA_VERSION: ta.ClassVar[int] = 2
114
+ schema_version: int = dc.field(default=SCHEMA_VERSION, metadata={OBJ_MARSHALER_FIELD_KEY: 'schemaVersion'})
115
+
116
+ MEDIA_TYPE: ta.ClassVar[str] = 'application/vnd.oci.image.index.v1+json'
117
+ media_type: str = dc.field(default=MEDIA_TYPE, metadata={OBJ_MARSHALER_FIELD_KEY: 'mediaType'})
118
+
119
+
120
+ #
121
+
122
+
123
+ @_register_oci_media_dataclass
124
+ @dc.dataclass()
125
+ class OciMediaImageManifest(OciMediaDataclass):
126
+ """https://github.com/opencontainers/image-spec/blob/92353b0bee778725c617e7d57317b568a7796bd0/manifest.md"""
127
+
128
+ config: OciMediaDescriptor # -> OciMediaImageConfig
129
+
130
+ layers: ta.Sequence[OciMediaDescriptor]
131
+
132
+ #
133
+
134
+ annotations: ta.Optional[ta.Mapping[str, str]] = dc.field(default=None, metadata={OBJ_MARSHALER_OMIT_IF_NONE: True}) # noqa
135
+
136
+ #
137
+
138
+ SCHEMA_VERSION: ta.ClassVar[int] = 2
139
+ schema_version: int = dc.field(default=SCHEMA_VERSION, metadata={OBJ_MARSHALER_FIELD_KEY: 'schemaVersion'})
140
+
141
+ MEDIA_TYPE: ta.ClassVar[str] = 'application/vnd.oci.image.manifest.v1+json'
142
+ media_type: str = dc.field(default=MEDIA_TYPE, metadata={OBJ_MARSHALER_FIELD_KEY: 'mediaType'})
143
+
144
+
145
+ #
146
+
147
+
148
+ OCI_IMAGE_LAYER_KIND_MEDIA_TYPES: ta.Mapping[OciImageLayer.Kind, str] = {
149
+ OciImageLayer.Kind.TAR: 'application/vnd.oci.image.layer.v1.tar',
150
+ OciImageLayer.Kind.TAR_GZIP: 'application/vnd.oci.image.layer.v1.tar+gzip',
151
+ OciImageLayer.Kind.TAR_ZSTD: 'application/vnd.oci.image.layer.v1.tar+zstd',
152
+ }
153
+
154
+ OCI_IMAGE_LAYER_KIND_MEDIA_TYPES_: ta.Mapping[str, OciImageLayer.Kind] = {
155
+ v: k
156
+ for k, v in OCI_IMAGE_LAYER_KIND_MEDIA_TYPES.items()
157
+ }
158
+
159
+
160
+ #
161
+
162
+
163
+ @_register_oci_media_dataclass
164
+ @dc.dataclass()
165
+ class OciMediaImageConfig(OciImageConfig, OciMediaDataclass):
166
+ SCHEMA_VERSION: ta.ClassVar[int] = 2
167
+ schema_version: int = dc.field(default=SCHEMA_VERSION, metadata={OBJ_MARSHALER_FIELD_KEY: 'schemaVersion'})
168
+
169
+ MEDIA_TYPE: ta.ClassVar[str] = 'application/vnd.oci.image.config.v1+json'
170
+ media_type: str = dc.field(default=MEDIA_TYPE, metadata={OBJ_MARSHALER_FIELD_KEY: 'mediaType'})
171
+
172
+
173
+ ##
174
+
175
+
176
+ OCI_MANIFEST_MEDIA_TYPES: ta.AbstractSet[str] = frozenset([
177
+ OciMediaImageIndex.MEDIA_TYPE,
178
+ OciMediaImageManifest.MEDIA_TYPE,
179
+ ])
omdev/oci/packing.py ADDED
@@ -0,0 +1,381 @@
1
+ # ruff: noqa: UP006 UP007
2
+ # @omlish-lite
3
+ import contextlib
4
+ import heapq
5
+ import os.path
6
+ import tarfile
7
+ import typing as ta
8
+
9
+ from omlish.lite.cached import cached_nullary
10
+ from omlish.lite.check import check
11
+ from omlish.lite.contextmanagers import ExitStacked
12
+
13
+ from .compression import OciCompression
14
+ from .tars import OciDataTarWriter
15
+ from .tars import WrittenOciDataTarFileInfo
16
+
17
+
18
+ ##
19
+
20
+
21
+ class OciLayerUnpacker(ExitStacked):
22
+ def __init__(
23
+ self,
24
+ input_files: ta.Sequence[ta.Union[str, tarfile.TarFile]],
25
+ output_file_path: str,
26
+ ) -> None:
27
+ super().__init__()
28
+
29
+ self._input_files = list(input_files)
30
+ self._output_file_path = output_file_path
31
+
32
+ #
33
+
34
+ @contextlib.contextmanager
35
+ def _open_input_file(self, input_file: ta.Union[str, tarfile.TarFile]) -> ta.Iterator[tarfile.TarFile]:
36
+ if isinstance(input_file, tarfile.TarFile):
37
+ yield input_file
38
+
39
+ elif isinstance(input_file, str):
40
+ with tarfile.open(input_file) as tar_file:
41
+ yield tar_file
42
+
43
+ else:
44
+ raise TypeError(input_file)
45
+
46
+ #
47
+
48
+ class _Entry(ta.NamedTuple):
49
+ file: ta.Union[str, tarfile.TarFile]
50
+ info: tarfile.TarInfo
51
+
52
+ def _build_input_file_sorted_entries(self, input_file: ta.Union[str, tarfile.TarFile]) -> ta.Sequence[_Entry]:
53
+ dct: ta.Dict[str, OciLayerUnpacker._Entry] = {}
54
+
55
+ with self._open_input_file(input_file) as input_tar_file:
56
+ for info in input_tar_file.getmembers():
57
+ check.not_in(info.name, dct)
58
+ dct[info.name] = self._Entry(
59
+ file=input_file,
60
+ info=info,
61
+ )
62
+
63
+ return sorted(dct.values(), key=lambda entry: entry.info.name)
64
+
65
+ @cached_nullary
66
+ def _entries_by_name(self) -> ta.Mapping[str, _Entry]:
67
+ root: dict = {}
68
+
69
+ def find_dir(dir_name: str) -> dict: # noqa
70
+ if dir_name:
71
+ dir_parts = dir_name.split('/')
72
+ else:
73
+ dir_parts = []
74
+
75
+ cur = root # noqa
76
+ for dir_part in dir_parts:
77
+ cur = cur[dir_part] # noqa
78
+
79
+ return check.isinstance(cur, dict)
80
+
81
+ #
82
+
83
+ for input_file in self._input_files:
84
+ sorted_entries = self._build_input_file_sorted_entries(input_file)
85
+
86
+ wh_names = set()
87
+ wh_opaques = set()
88
+
89
+ #
90
+
91
+ for entry in sorted_entries:
92
+ info = entry.info
93
+ name = check.non_empty_str(info.name)
94
+ base_name = os.path.basename(name)
95
+ dir_name = os.path.dirname(name)
96
+
97
+ if base_name == '.wh..wh..opq':
98
+ wh_opaques.add(dir_name)
99
+ continue
100
+
101
+ if base_name.startswith('.wh.'):
102
+ wh_base_name = os.path.basename(base_name[4:])
103
+ wh_name = os.path.join(dir_name, wh_base_name)
104
+ wh_names.add(wh_name)
105
+ continue
106
+
107
+ cur = find_dir(dir_name)
108
+
109
+ if info.type == tarfile.DIRTYPE:
110
+ try:
111
+ ex = cur[base_name]
112
+ except KeyError:
113
+ cur[base_name] = {'': entry}
114
+ else:
115
+ ex[''] = entry
116
+
117
+ else:
118
+ cur[base_name] = entry
119
+
120
+ #
121
+
122
+ for wh_name in reversed(sorted(wh_names)): # noqa
123
+ wh_dir_name = os.path.dirname(wh_name)
124
+ wh_base_name = os.path.basename(wh_name)
125
+
126
+ cur = find_dir(wh_dir_name)
127
+ rm = cur[wh_base_name]
128
+
129
+ if isinstance(rm, dict):
130
+ # Whiteouts wipe out whole directory:
131
+ # https://github.com/containerd/containerd/blob/59c8cf6ea5f4175ad512914dd5ce554942bf144f/pkg/archive/tar_test.go#L648
132
+ # check.equal(set(rm), '')
133
+ del cur[wh_base_name]
134
+
135
+ elif isinstance(rm, self._Entry):
136
+ del cur[wh_base_name]
137
+
138
+ else:
139
+ raise TypeError(rm)
140
+
141
+ if wh_opaques:
142
+ raise NotImplementedError
143
+
144
+ #
145
+
146
+ out: ta.Dict[str, OciLayerUnpacker._Entry] = {}
147
+
148
+ def rec(cur): # noqa
149
+ for _, child in sorted(cur.items(), key=lambda t: t[0]):
150
+ if isinstance(child, dict):
151
+ rec(child)
152
+
153
+ elif isinstance(child, self._Entry):
154
+ check.not_in(child.info.name, out)
155
+ out[child.info.name] = child
156
+
157
+ else:
158
+ raise TypeError(child)
159
+
160
+ rec(root)
161
+
162
+ return out
163
+
164
+ #
165
+
166
+ @cached_nullary
167
+ def _output_tar_file(self) -> tarfile.TarFile:
168
+ return self._enter_context(tarfile.open(self._output_file_path, 'w'))
169
+
170
+ #
171
+
172
+ def _add_unpacked_entry(
173
+ self,
174
+ input_tar_file: tarfile.TarFile,
175
+ info: tarfile.TarInfo,
176
+ ) -> None:
177
+ base_name = os.path.basename(info.name)
178
+ check.state(not base_name.startswith('.wh.'))
179
+
180
+ if info.type in tarfile.REGULAR_TYPES:
181
+ with check.not_none(input_tar_file.extractfile(info)) as f:
182
+ self._output_tar_file().addfile(info, f)
183
+
184
+ else:
185
+ self._output_tar_file().addfile(info)
186
+
187
+ def _unpack_file(
188
+ self,
189
+ input_file: ta.Union[str, tarfile.TarFile],
190
+ ) -> None:
191
+ entries_by_name = self._entries_by_name()
192
+
193
+ with self._open_input_file(input_file) as input_tar_file:
194
+ info: tarfile.TarInfo
195
+ for info in input_tar_file.getmembers():
196
+ try:
197
+ entry = entries_by_name[info.name]
198
+ except KeyError:
199
+ continue
200
+
201
+ if entry.file != input_file:
202
+ continue
203
+
204
+ self._add_unpacked_entry(input_tar_file, info)
205
+
206
+ @cached_nullary
207
+ def write(self) -> None:
208
+ for input_file in self._input_files:
209
+ self._unpack_file(input_file)
210
+
211
+
212
+ #
213
+
214
+
215
+ class OciLayerPacker(ExitStacked):
216
+ def __init__(
217
+ self,
218
+ input_file_path: str,
219
+ output_file_paths: ta.Sequence[str],
220
+ *,
221
+ compression: ta.Optional[OciCompression] = None,
222
+ ) -> None:
223
+ super().__init__()
224
+
225
+ self._input_file_path = input_file_path
226
+ self._output_file_paths = list(output_file_paths)
227
+ self._compression = compression
228
+
229
+ self._output_file_indexes_by_name: ta.Dict[str, int] = {}
230
+
231
+ #
232
+
233
+ @cached_nullary
234
+ def _input_tar_file(self) -> tarfile.TarFile:
235
+ # FIXME: check uncompressed
236
+ return self._enter_context(tarfile.open(self._input_file_path))
237
+
238
+ #
239
+
240
+ @cached_nullary
241
+ def _entries_by_name(self) -> ta.Mapping[str, tarfile.TarInfo]:
242
+ return {
243
+ info.name: info
244
+ for info in self._input_tar_file().getmembers()
245
+ }
246
+
247
+ #
248
+
249
+ class _CategorizedEntries(ta.NamedTuple):
250
+ files_by_name: ta.Mapping[str, tarfile.TarInfo]
251
+ non_files_by_name: ta.Mapping[str, tarfile.TarInfo]
252
+ links_by_name: ta.Mapping[str, tarfile.TarInfo]
253
+
254
+ @cached_nullary
255
+ def _categorized_entries(self) -> _CategorizedEntries:
256
+ files_by_name: ta.Dict[str, tarfile.TarInfo] = {}
257
+ non_files_by_name: ta.Dict[str, tarfile.TarInfo] = {}
258
+ links_by_name: ta.Dict[str, tarfile.TarInfo] = {}
259
+
260
+ for name, info in self._entries_by_name().items():
261
+ if info.type in tarfile.REGULAR_TYPES:
262
+ files_by_name[name] = info
263
+ elif info.type in (tarfile.LNKTYPE, tarfile.GNUTYPE_LONGLINK):
264
+ links_by_name[name] = info
265
+ else:
266
+ non_files_by_name[name] = info
267
+
268
+ return self._CategorizedEntries(
269
+ files_by_name=files_by_name,
270
+ non_files_by_name=non_files_by_name,
271
+ links_by_name=links_by_name,
272
+ )
273
+
274
+ #
275
+
276
+ @cached_nullary
277
+ def _non_files_sorted_by_name(self) -> ta.Sequence[tarfile.TarInfo]:
278
+ return sorted(
279
+ self._categorized_entries().non_files_by_name.values(),
280
+ key=lambda info: info.name,
281
+ )
282
+
283
+ @cached_nullary
284
+ def _files_descending_by_size(self) -> ta.Sequence[tarfile.TarInfo]:
285
+ return sorted(
286
+ self._categorized_entries().files_by_name.values(),
287
+ key=lambda info: -check.isinstance(info.size, int),
288
+ )
289
+
290
+ #
291
+
292
+ @cached_nullary
293
+ def _output_files(self) -> ta.Sequence[ta.BinaryIO]:
294
+ return [
295
+ self._enter_context(open(output_file_path, 'wb'))
296
+ for output_file_path in self._output_file_paths
297
+ ]
298
+
299
+ @cached_nullary
300
+ def _output_tar_writers(self) -> ta.Sequence[OciDataTarWriter]:
301
+ return [
302
+ self._enter_context(
303
+ OciDataTarWriter(
304
+ output_file,
305
+ compression=self._compression,
306
+ ),
307
+ )
308
+ for output_file in self._output_files()
309
+ ]
310
+
311
+ #
312
+
313
+ def _write_entry(
314
+ self,
315
+ info: tarfile.TarInfo,
316
+ output_file_idx: int,
317
+ ) -> None:
318
+ check.not_in(info.name, self._output_file_indexes_by_name)
319
+
320
+ writer = self._output_tar_writers()[output_file_idx]
321
+
322
+ if info.type in tarfile.REGULAR_TYPES:
323
+ with check.not_none(self._input_tar_file().extractfile(info)) as f:
324
+ writer.add_file(info, f) # type: ignore
325
+
326
+ else:
327
+ writer.add_file(info)
328
+
329
+ self._output_file_indexes_by_name[info.name] = output_file_idx
330
+
331
+ @cached_nullary
332
+ def _write_non_files(self) -> None:
333
+ for non_file in self._non_files_sorted_by_name():
334
+ self._write_entry(non_file, 0)
335
+
336
+ @cached_nullary
337
+ def _write_files(self) -> None:
338
+ writers = self._output_tar_writers()
339
+
340
+ bins = [
341
+ (writer.info().compressed_sz, i)
342
+ for i, writer in enumerate(writers)
343
+ ]
344
+
345
+ heapq.heapify(bins)
346
+
347
+ for file in self._files_descending_by_size():
348
+ _, bin_index = heapq.heappop(bins)
349
+
350
+ writer = writers[bin_index]
351
+
352
+ self._write_entry(file, bin_index)
353
+
354
+ bin_size = writer.info().compressed_sz
355
+
356
+ heapq.heappush(bins, (bin_size, bin_index))
357
+
358
+ @cached_nullary
359
+ def _write_links(self) -> None:
360
+ for link in self._categorized_entries().links_by_name.values():
361
+ link_name = check.non_empty_str(link.linkname)
362
+
363
+ output_file_idx = self._output_file_indexes_by_name[link_name]
364
+
365
+ self._write_entry(link, output_file_idx)
366
+
367
+ @cached_nullary
368
+ def write(self) -> ta.Mapping[str, WrittenOciDataTarFileInfo]:
369
+ writers = self._output_tar_writers()
370
+
371
+ self._write_non_files()
372
+ self._write_files()
373
+ self._write_links()
374
+
375
+ for output_tar_writer in writers:
376
+ output_tar_writer.tar_file().close()
377
+
378
+ return {
379
+ output_file_path: output_tar_writer.info()
380
+ for output_file_path, output_tar_writer in zip(self._output_file_paths, writers)
381
+ }