occystrap 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. occystrap/_version.py +34 -0
  2. occystrap/filters/__init__.py +10 -0
  3. occystrap/filters/base.py +67 -0
  4. occystrap/filters/exclude.py +136 -0
  5. occystrap/filters/inspect.py +179 -0
  6. occystrap/filters/normalize_timestamps.py +123 -0
  7. occystrap/filters/search.py +177 -0
  8. occystrap/inputs/__init__.py +1 -0
  9. occystrap/inputs/base.py +40 -0
  10. occystrap/inputs/docker.py +171 -0
  11. occystrap/{docker_registry.py → inputs/registry.py} +112 -50
  12. occystrap/inputs/tarfile.py +88 -0
  13. occystrap/main.py +330 -31
  14. occystrap/outputs/__init__.py +1 -0
  15. occystrap/outputs/base.py +46 -0
  16. occystrap/{output_directory.py → outputs/directory.py} +10 -9
  17. occystrap/outputs/docker.py +137 -0
  18. occystrap/{output_mounts.py → outputs/mounts.py} +2 -1
  19. occystrap/{output_ocibundle.py → outputs/ocibundle.py} +1 -1
  20. occystrap/outputs/registry.py +240 -0
  21. occystrap/{output_tarfile.py → outputs/tarfile.py} +18 -2
  22. occystrap/pipeline.py +297 -0
  23. occystrap/tarformat.py +122 -0
  24. occystrap/tests/test_inspect.py +355 -0
  25. occystrap/tests/test_tarformat.py +199 -0
  26. occystrap/uri.py +231 -0
  27. occystrap/util.py +67 -38
  28. occystrap-0.4.1.dist-info/METADATA +444 -0
  29. occystrap-0.4.1.dist-info/RECORD +38 -0
  30. {occystrap-0.4.0.dist-info → occystrap-0.4.1.dist-info}/WHEEL +1 -1
  31. {occystrap-0.4.0.dist-info → occystrap-0.4.1.dist-info}/entry_points.txt +0 -1
  32. occystrap/docker_extract.py +0 -36
  33. occystrap-0.4.0.dist-info/METADATA +0 -131
  34. occystrap-0.4.0.dist-info/RECORD +0 -20
  35. occystrap-0.4.0.dist-info/pbr.json +0 -1
  36. {occystrap-0.4.0.dist-info → occystrap-0.4.1.dist-info/licenses}/AUTHORS +0 -0
  37. {occystrap-0.4.0.dist-info → occystrap-0.4.1.dist-info/licenses}/LICENSE +0 -0
  38. {occystrap-0.4.0.dist-info → occystrap-0.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,177 @@
1
+ import fnmatch
2
+ import logging
3
+ import os
4
+ import re
5
+ import tarfile
6
+
7
+ from occystrap import constants
8
+ from occystrap.filters.base import ImageFilter
9
+
10
+
11
+ LOG = logging.getLogger(__name__)
12
+ LOG.setLevel(logging.INFO)
13
+
14
+
15
+ class SearchFilter(ImageFilter):
16
+ """Searches layers for files matching a pattern.
17
+
18
+ This filter can operate in two modes:
19
+ - Search-only: wrapped_output is None, just prints results
20
+ - Passthrough: searches AND passes elements to wrapped output
21
+
22
+ In passthrough mode, this allows searching while also writing output,
23
+ enabling pipelines like:
24
+ input -> search -> tarfile (search while creating tarball)
25
+ """
26
+
27
+ def __init__(self, wrapped_output, pattern, use_regex=False,
28
+ image=None, tag=None, script_friendly=False):
29
+ """Initialize the search filter.
30
+
31
+ Args:
32
+ wrapped_output: The ImageOutput to pass elements to, or None
33
+ for search-only mode.
34
+ pattern: Glob pattern or regex to match file paths.
35
+ use_regex: If True, treat pattern as a regex instead of glob.
36
+ image: Image name for output formatting.
37
+ tag: Image tag for output formatting.
38
+ script_friendly: If True, output in machine-parseable format.
39
+ """
40
+ super().__init__(wrapped_output)
41
+ self.pattern = pattern
42
+ self.use_regex = use_regex
43
+ self.image = image
44
+ self.tag = tag
45
+ self.script_friendly = script_friendly
46
+ self.results = [] # List of (layer_digest, path, file_info_dict)
47
+
48
+ if use_regex:
49
+ self._compiled_pattern = re.compile(pattern)
50
+
51
+ def fetch_callback(self, digest):
52
+ """Always fetch all layers for searching."""
53
+ # If we have a wrapped output, also check its callback
54
+ if self._wrapped is not None:
55
+ # We need the layer for searching, but the wrapped output
56
+ # might not need it. We fetch it anyway for searching.
57
+ # The wrapped output's callback is still consulted but
58
+ # we always return True to ensure we get the data.
59
+ pass
60
+ return True
61
+
62
+ def _matches(self, path):
63
+ """Check if a path matches the search pattern."""
64
+ if self.use_regex:
65
+ return self._compiled_pattern.search(path) is not None
66
+ else:
67
+ # Match against full path or just the filename
68
+ # This allows patterns like "*bash" to match "/bin/bash"
69
+ filename = os.path.basename(path)
70
+ return (fnmatch.fnmatch(path, self.pattern) or
71
+ fnmatch.fnmatch(filename, self.pattern))
72
+
73
+ def _get_file_type(self, member):
74
+ """Get a human-readable file type string."""
75
+ if member.isfile():
76
+ return 'file'
77
+ elif member.isdir():
78
+ return 'directory'
79
+ elif member.issym():
80
+ return 'symlink'
81
+ elif member.islnk():
82
+ return 'hardlink'
83
+ elif member.isfifo():
84
+ return 'fifo'
85
+ elif member.ischr():
86
+ return 'character device'
87
+ elif member.isblk():
88
+ return 'block device'
89
+ else:
90
+ return 'unknown'
91
+
92
+ def _search_layer(self, name, data):
93
+ """Search a layer for matching files."""
94
+ LOG.info('Searching layer %s' % name)
95
+
96
+ data.seek(0)
97
+ try:
98
+ with tarfile.open(fileobj=data, mode='r') as layer_tar:
99
+ for member in layer_tar:
100
+ if self._matches(member.name):
101
+ file_info = {
102
+ 'type': self._get_file_type(member),
103
+ 'size': member.size,
104
+ 'mode': member.mode,
105
+ 'uid': member.uid,
106
+ 'gid': member.gid,
107
+ 'mtime': member.mtime,
108
+ }
109
+ if member.issym() or member.islnk():
110
+ file_info['linkname'] = member.linkname
111
+
112
+ self.results.append((name, member.name, file_info))
113
+ except tarfile.TarError as e:
114
+ LOG.error('Failed to read layer %s: %s' % (name, e))
115
+
116
+ def process_image_element(self, element_type, name, data):
117
+ """Process an image element, searching layers for matches."""
118
+ # Search layers
119
+ if element_type == constants.IMAGE_LAYER and data is not None:
120
+ self._search_layer(name, data)
121
+
122
+ # Pass through to wrapped output if present
123
+ if self._wrapped is not None:
124
+ if data is not None:
125
+ data.seek(0) # Reset for next consumer
126
+ self._wrapped.process_image_element(element_type, name, data)
127
+
128
+ def _print_results(self):
129
+ """Print search results to stdout."""
130
+ if not self.results:
131
+ if not self.script_friendly:
132
+ print('No matches found.')
133
+ return
134
+
135
+ if self.script_friendly:
136
+ # Output format: image:tag:layer:path
137
+ # One line per match, suitable for piping to other tools
138
+ for layer_digest, path, file_info in self.results:
139
+ print('%s:%s:%s:%s'
140
+ % (self.image, self.tag, layer_digest, path))
141
+ return
142
+
143
+ # Group results by layer
144
+ results_by_layer = {}
145
+ for layer_digest, path, file_info in self.results:
146
+ if layer_digest not in results_by_layer:
147
+ results_by_layer[layer_digest] = []
148
+ results_by_layer[layer_digest].append((path, file_info))
149
+
150
+ # Print results
151
+ for layer_digest in results_by_layer:
152
+ print('Layer: %s' % layer_digest)
153
+ for path, file_info in results_by_layer[layer_digest]:
154
+ if file_info['type'] in ('symlink', 'hardlink'):
155
+ print(' %s -> %s (%s)'
156
+ % (path, file_info['linkname'], file_info['type']))
157
+ elif file_info['type'] == 'file':
158
+ print(' %s (%s, %d bytes)'
159
+ % (path, file_info['type'], file_info['size']))
160
+ elif file_info['type'] == 'directory':
161
+ print(' %s (%s)' % (path, file_info['type']))
162
+ else:
163
+ print(' %s (%s)' % (path, file_info['type']))
164
+ print()
165
+
166
+ layer_count = len(results_by_layer)
167
+ match_count = len(self.results)
168
+ print('Found %d match%s in %d layer%s.'
169
+ % (match_count, '' if match_count == 1 else 'es',
170
+ layer_count, '' if layer_count == 1 else 's'))
171
+
172
+ def finalize(self):
173
+ """Print search results and finalize wrapped output."""
174
+ self._print_results()
175
+
176
+ if self._wrapped is not None:
177
+ self._wrapped.finalize()
@@ -0,0 +1 @@
1
+ # Input modules for reading container images from various sources
@@ -0,0 +1,40 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+
4
+ class ImageInput(ABC):
5
+ """Abstract base class for image input sources.
6
+
7
+ Input sources are responsible for fetching container images from various
8
+ sources (registries, local Docker daemon, tarfiles) and yielding image
9
+ elements (config files and layers) in a standard format.
10
+ """
11
+
12
+ @property
13
+ @abstractmethod
14
+ def image(self):
15
+ """Return the image name."""
16
+ pass
17
+
18
+ @property
19
+ @abstractmethod
20
+ def tag(self):
21
+ """Return the image tag."""
22
+ pass
23
+
24
+ @abstractmethod
25
+ def fetch(self, fetch_callback=None):
26
+ """Fetch image elements (config files and layers).
27
+
28
+ Args:
29
+ fetch_callback: Optional callable that takes a layer digest and
30
+ returns True if the layer should be fetched, False to skip.
31
+ If None, all layers are fetched.
32
+
33
+ Yields:
34
+ Tuples of (element_type, name, data) where:
35
+ - element_type is constants.CONFIG_FILE or constants.IMAGE_LAYER
36
+ - name is the element identifier (config filename or layer digest)
37
+ - data is a file-like object containing the element data,
38
+ or None if the layer was skipped by fetch_callback
39
+ """
40
+ pass
@@ -0,0 +1,171 @@
1
+ # Fetch images from the local Docker or Podman daemon via the Docker Engine API.
2
+ # This communicates over a Unix domain socket (default: /var/run/docker.sock).
3
+ #
4
+ # Docker Engine API documentation:
5
+ # https://docs.docker.com/engine/api/
6
+ #
7
+ # Podman compatibility:
8
+ # Podman provides a Docker-compatible API via podman.socket. Use the --socket
9
+ # option to point to the Podman socket:
10
+ # - Rootful: /run/podman/podman.sock
11
+ # - Rootless: /run/user/<uid>/podman/podman.sock
12
+ # See: https://docs.podman.io/en/latest/markdown/podman-system-service.1.html
13
+ #
14
+ # The API returns images in the same format as 'docker save', which is the
15
+ # same format that inputs/tarfile.py reads. We stream the tarball and parse
16
+ # it on the fly.
17
+ #
18
+ # API Limitation: Unlike the registry API (inputs/registry.py) which can fetch
19
+ # individual layer blobs via GET /v2/<name>/blobs/<digest>, the Docker Engine
20
+ # API only provides /images/{name}/get which returns a complete tarball. There
21
+ # is no endpoint to fetch individual image components (config, layers)
22
+ # separately. This is a fundamental limitation of the Docker Engine API.
23
+ # See: https://github.com/moby/moby/issues/24851
24
+ #
25
+ # The tarball streaming approach used here is the official supported method
26
+ # and matches what 'docker save' does internally.
27
+
28
+ import io
29
+ import json
30
+ import logging
31
+ import os
32
+ import tarfile
33
+
34
+ import requests_unixsocket
35
+
36
+ from occystrap import constants
37
+ from occystrap.inputs.base import ImageInput
38
+
39
+
40
+ LOG = logging.getLogger(__name__)
41
+ LOG.setLevel(logging.INFO)
42
+
43
+ DEFAULT_SOCKET_PATH = '/var/run/docker.sock'
44
+
45
+
46
+ def always_fetch(digest):
47
+ return True
48
+
49
+
50
+ class Image(ImageInput):
51
+ def __init__(self, image, tag='latest', socket_path=DEFAULT_SOCKET_PATH):
52
+ self._image = image
53
+ self._tag = tag
54
+ self.socket_path = socket_path
55
+ self._session = None
56
+
57
+ @property
58
+ def image(self):
59
+ """Return the image name."""
60
+ return self._image
61
+
62
+ @property
63
+ def tag(self):
64
+ """Return the image tag."""
65
+ return self._tag
66
+
67
+ def _get_session(self):
68
+ if self._session is None:
69
+ self._session = requests_unixsocket.Session()
70
+ return self._session
71
+
72
+ def _socket_url(self, path):
73
+ # requests_unixsocket uses http+unix:// scheme with URL-encoded path
74
+ encoded_socket = self.socket_path.replace('/', '%2F')
75
+ return 'http+unix://%s%s' % (encoded_socket, path)
76
+
77
+ def _request(self, method, path, stream=False):
78
+ session = self._get_session()
79
+ url = self._socket_url(path)
80
+ LOG.debug('Docker API request: %s %s' % (method, path))
81
+ r = session.request(method, url, stream=stream)
82
+ if r.status_code == 404:
83
+ raise Exception('Image not found: %s:%s' % (self.image, self.tag))
84
+ if r.status_code != 200:
85
+ raise Exception('Docker API error %d: %s' % (r.status_code, r.text))
86
+ return r
87
+
88
+ def _get_image_reference(self):
89
+ # Return the image reference in the format Docker expects
90
+ return '%s:%s' % (self.image, self.tag)
91
+
92
+ def inspect(self):
93
+ """Get image metadata from the Docker daemon."""
94
+ ref = self._get_image_reference()
95
+ r = self._request('GET', '/images/%s/json' % ref)
96
+ return r.json()
97
+
98
+ def fetch(self, fetch_callback=always_fetch):
99
+ """Fetch image layers from the local Docker daemon.
100
+
101
+ This uses the Docker Engine API to export the image as a tarball
102
+ (equivalent to 'docker save') and streams/parses it on the fly.
103
+ """
104
+ ref = self._get_image_reference()
105
+ LOG.info('Fetching image %s from Docker daemon at %s'
106
+ % (ref, self.socket_path))
107
+
108
+ # First verify the image exists
109
+ try:
110
+ self.inspect()
111
+ except Exception as e:
112
+ LOG.error('Failed to inspect image: %s' % str(e))
113
+ raise
114
+
115
+ # Stream the image tarball from Docker
116
+ LOG.info('Streaming image tarball from Docker daemon')
117
+ r = self._request('GET', '/images/%s/get' % ref, stream=True)
118
+
119
+ # We need to buffer the stream into a file-like object for tarfile
120
+ # because tarfile needs to seek. We use a temporary file approach
121
+ # similar to the registry input.
122
+ import tempfile
123
+ with tempfile.NamedTemporaryFile(delete=False) as tf:
124
+ LOG.info('Buffering image to temporary file %s' % tf.name)
125
+ for chunk in r.iter_content(8192):
126
+ tf.write(chunk)
127
+ temp_path = tf.name
128
+
129
+ try:
130
+ # Parse the tarball (same format as 'docker save')
131
+ with tarfile.open(temp_path, 'r') as tar:
132
+ # Read manifest.json
133
+ manifest_member = tar.getmember('manifest.json')
134
+ manifest_file = tar.extractfile(manifest_member)
135
+ manifest = json.loads(manifest_file.read().decode('utf-8'))
136
+
137
+ # Yield config file
138
+ config_filename = manifest[0]['Config']
139
+ LOG.info('Reading config file %s' % config_filename)
140
+ config_member = tar.getmember(config_filename)
141
+ config_file = tar.extractfile(config_member)
142
+ config_data = config_file.read()
143
+ yield (constants.CONFIG_FILE, config_filename,
144
+ io.BytesIO(config_data))
145
+
146
+ # Yield each layer
147
+ layers = manifest[0]['Layers']
148
+ LOG.info('There are %d image layers' % len(layers))
149
+
150
+ for layer_path in layers:
151
+ # Layer path is like "abc123/layer.tar"
152
+ layer_digest = os.path.dirname(layer_path)
153
+ if not fetch_callback(layer_digest):
154
+ LOG.info('Fetch callback says skip layer %s'
155
+ % layer_digest)
156
+ yield (constants.IMAGE_LAYER, layer_digest, None)
157
+ continue
158
+
159
+ LOG.info('Reading layer %s' % layer_path)
160
+ layer_member = tar.getmember(layer_path)
161
+ layer_file = tar.extractfile(layer_member)
162
+ layer_data = layer_file.read()
163
+ yield (constants.IMAGE_LAYER, layer_digest,
164
+ io.BytesIO(layer_data))
165
+
166
+ finally:
167
+ # Clean up temp file
168
+ if os.path.exists(temp_path):
169
+ os.unlink(temp_path)
170
+
171
+ LOG.info('Done')
@@ -13,12 +13,19 @@ import io
13
13
  import logging
14
14
  import os
15
15
  import re
16
+ from requests.exceptions import ChunkedEncodingError, ConnectionError
16
17
  import sys
17
18
  import tempfile
19
+ import time
18
20
  import zlib
19
21
 
20
22
  from occystrap import constants
21
23
  from occystrap import util
24
+ from occystrap.inputs.base import ImageInput
25
+
26
+ # Retry configuration
27
+ MAX_RETRIES = 3
28
+ RETRY_BACKOFF_BASE = 2 # Exponential backoff: 2^attempt seconds
22
29
 
23
30
  LOG = logging.getLogger(__name__)
24
31
  LOG.setLevel(logging.INFO)
@@ -30,19 +37,31 @@ def always_fetch():
30
37
  return True
31
38
 
32
39
 
33
- class Image(object):
34
- def __init__(self, registry, image, tag, os='linux', architecture='amd64', variant='',
35
- secure=True):
40
+ class Image(ImageInput):
41
+ def __init__(self, registry, image, tag, os='linux', architecture='amd64',
42
+ variant='', secure=True, username=None, password=None):
36
43
  self.registry = registry
37
- self.image = image
38
- self.tag = tag
44
+ self._image = image
45
+ self._tag = tag
39
46
  self.os = os
40
47
  self.architecture = architecture
41
48
  self.variant = variant
42
49
  self.secure = secure
50
+ self.username = username
51
+ self.password = password
43
52
 
44
53
  self._cached_auth = None
45
54
 
55
+ @property
56
+ def image(self):
57
+ """Return the image name."""
58
+ return self._image
59
+
60
+ @property
61
+ def tag(self):
62
+ """Return the image tag."""
63
+ return self._tag
64
+
46
65
  def request_url(self, method, url, headers=None, data=None, stream=False):
47
66
  if not headers:
48
67
  headers = {}
@@ -55,11 +74,17 @@ class Image(object):
55
74
  stream=stream)
56
75
  except util.UnauthorizedException as e:
57
76
  auth_re = re.compile('Bearer realm="([^"]*)",service="([^"]*)"')
58
- m = auth_re.match(e.args[5].get('Www-Authenticate'))
77
+ m = auth_re.match(e.args[5].get('Www-Authenticate', ''))
59
78
  if m:
60
79
  auth_url = ('%s?service=%s&scope=repository:%s:pull'
61
80
  % (m.group(1), m.group(2), self.image))
62
- r = util.request_url('GET', auth_url)
81
+ # If credentials are provided, use Basic auth for token request
82
+ if self.username and self.password:
83
+ r = util.request_url(
84
+ 'GET', auth_url,
85
+ auth=(self.username, self.password))
86
+ else:
87
+ r = util.request_url('GET', auth_url)
63
88
  token = r.json().get('token')
64
89
  headers.update({'Authorization': 'Bearer %s' % token})
65
90
  self._cached_auth = token
@@ -82,11 +107,17 @@ class Image(object):
82
107
  'image': self.image,
83
108
  'tag': self.tag
84
109
  },
85
- headers={'Accept': ('application/vnd.docker.distribution.manifest.v2+json,'
86
- 'application/vnd.docker.distribution.manifest.list.v2+json')})
110
+ headers={
111
+ 'Accept': ('application/vnd.docker.distribution.manifest.v2+json,'
112
+ 'application/vnd.docker.distribution.manifest.list.v2+json,'
113
+ 'application/vnd.oci.image.manifest.v1+json,'
114
+ 'application/vnd.oci.image.index.v1+json')
115
+ })
87
116
 
88
117
  config_digest = None
89
- if r.headers['Content-Type'] == 'application/vnd.docker.distribution.manifest.v2+json':
118
+ if r.headers['Content-Type'] in [
119
+ 'application/vnd.docker.distribution.manifest.v2+json',
120
+ 'application/vnd.oci.image.manifest.v1+json']:
90
121
  manifest = r.json()
91
122
  config_digest = manifest['config']['digest']
92
123
  elif r.headers['Content-Type'] in [
@@ -95,11 +126,13 @@ class Image(object):
95
126
  for m in r.json()['manifests']:
96
127
  if 'variant' in m['platform']:
97
128
  LOG.info('Found manifest for %s on %s %s'
98
- % (m['platform']['os'], m['platform']['architecture'],
129
+ % (m['platform']['os'],
130
+ m['platform']['architecture'],
99
131
  m['platform']['variant']))
100
132
  else:
101
133
  LOG.info('Found manifest for %s on %s'
102
- % (m['platform']['os'], m['platform']['architecture']))
134
+ % (m['platform']['os'],
135
+ m['platform']['architecture']))
103
136
 
104
137
  if (m['platform']['os'] == self.os and
105
138
  m['platform']['architecture'] == self.architecture and
@@ -114,8 +147,11 @@ class Image(object):
114
147
  'image': self.image,
115
148
  'tag': m['digest']
116
149
  },
117
- headers={'Accept': ('application/vnd.docker.distribution.manifest.v2+json, '
118
- 'application/vnd.oci.image.manifest.v1+json')})
150
+ headers={
151
+ 'Accept': ('application/vnd.docker.distribution.'
152
+ 'manifest.v2+json, '
153
+ 'application/vnd.oci.image.manifest.v1+json')
154
+ })
119
155
  manifest = r.json()
120
156
  config_digest = manifest['config']['digest']
121
157
 
@@ -158,41 +194,67 @@ class Image(object):
158
194
 
159
195
  LOG.info('Fetching layer %s (%d bytes)'
160
196
  % (layer['digest'], layer['size']))
161
- r = self.request_url(
162
- 'GET',
163
- '%(moniker)s://%(registry)s/v2/%(image)s/blobs/%(layer)s'
164
- % {
165
- 'moniker': moniker,
166
- 'registry': self.registry,
167
- 'image': self.image,
168
- 'layer': layer['digest']
169
- },
170
- stream=True)
171
-
172
- # We can use zlib for streaming decompression, but we need to tell it
173
- # to ignore the gzip header which it doesn't understand. Unfortunately
174
- # tarfile doesn't do streaming writes (and we need to know the
175
- # decompressed size before we can write to the tarfile), so we stream
176
- # to a temporary file on disk.
177
- try:
178
- h = hashlib.sha256()
179
- d = zlib.decompressobj(16 + zlib.MAX_WBITS)
180
-
181
- with tempfile.NamedTemporaryFile(delete=False) as tf:
182
- LOG.info('Temporary file for layer is %s' % tf.name)
183
- for chunk in r.iter_content(8192):
184
- tf.write(d.decompress(chunk))
185
- h.update(chunk)
186
-
187
- if h.hexdigest() != layer_filename:
188
- LOG.error('Hash verification failed for layer (%s vs %s)'
189
- % (layer_filename, h.hexdigest()))
190
- sys.exit(1)
191
-
192
- with open(tf.name, 'rb') as f:
193
- yield (constants.IMAGE_LAYER, layer_filename, f)
194
-
195
- finally:
196
- os.unlink(tf.name)
197
+
198
+ # Retry logic for streaming downloads which can fail mid-transfer
199
+ last_exception = None
200
+ for attempt in range(MAX_RETRIES + 1):
201
+ try:
202
+ r = self.request_url(
203
+ 'GET',
204
+ '%(moniker)s://%(registry)s/v2/%(image)s/blobs/%(layer)s'
205
+ % {
206
+ 'moniker': moniker,
207
+ 'registry': self.registry,
208
+ 'image': self.image,
209
+ 'layer': layer['digest']
210
+ },
211
+ stream=True)
212
+
213
+ # We can use zlib for streaming decompression, but we need
214
+ # to tell it to ignore the gzip header which it doesn't
215
+ # understand. Unfortunately tarfile doesn't do streaming
216
+ # writes (and we need to know the decompressed size before
217
+ # we can write to the tarfile), so we stream to a temporary
218
+ # file on disk.
219
+ h = hashlib.sha256()
220
+ d = zlib.decompressobj(16 + zlib.MAX_WBITS)
221
+
222
+ with tempfile.NamedTemporaryFile(delete=False) as tf:
223
+ LOG.info('Temporary file for layer is %s' % tf.name)
224
+ for chunk in r.iter_content(8192):
225
+ tf.write(d.decompress(chunk))
226
+ h.update(chunk)
227
+
228
+ if h.hexdigest() != layer_filename:
229
+ LOG.error('Hash verification failed for layer (%s vs %s)'
230
+ % (layer_filename, h.hexdigest()))
231
+ sys.exit(1)
232
+
233
+ try:
234
+ with open(tf.name, 'rb') as f:
235
+ yield (constants.IMAGE_LAYER, layer_filename, f)
236
+ finally:
237
+ os.unlink(tf.name)
238
+
239
+ # Success - break out of retry loop
240
+ break
241
+
242
+ except (ChunkedEncodingError, ConnectionError) as e:
243
+ last_exception = e
244
+ # Clean up temp file if it exists
245
+ if 'tf' in dir() and tf.name and os.path.exists(tf.name):
246
+ os.unlink(tf.name)
247
+
248
+ if attempt < MAX_RETRIES:
249
+ wait_time = RETRY_BACKOFF_BASE ** attempt
250
+ LOG.warning(
251
+ 'Layer download failed (attempt %d/%d): %s. '
252
+ 'Retrying in %d seconds...'
253
+ % (attempt + 1, MAX_RETRIES + 1, str(e), wait_time))
254
+ time.sleep(wait_time)
255
+ else:
256
+ LOG.error('Layer download failed after %d attempts: %s'
257
+ % (MAX_RETRIES + 1, str(e)))
258
+ raise last_exception
197
259
 
198
260
  LOG.info('Done')