occystrap 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. occystrap/_version.py +34 -0
  2. occystrap/filters/__init__.py +10 -0
  3. occystrap/filters/base.py +67 -0
  4. occystrap/filters/exclude.py +136 -0
  5. occystrap/filters/inspect.py +179 -0
  6. occystrap/filters/normalize_timestamps.py +123 -0
  7. occystrap/filters/search.py +177 -0
  8. occystrap/inputs/__init__.py +1 -0
  9. occystrap/inputs/base.py +40 -0
  10. occystrap/inputs/docker.py +171 -0
  11. occystrap/{docker_registry.py → inputs/registry.py} +112 -50
  12. occystrap/inputs/tarfile.py +88 -0
  13. occystrap/main.py +330 -31
  14. occystrap/outputs/__init__.py +1 -0
  15. occystrap/outputs/base.py +46 -0
  16. occystrap/{output_directory.py → outputs/directory.py} +10 -9
  17. occystrap/outputs/docker.py +137 -0
  18. occystrap/{output_mounts.py → outputs/mounts.py} +2 -1
  19. occystrap/{output_ocibundle.py → outputs/ocibundle.py} +1 -1
  20. occystrap/outputs/registry.py +240 -0
  21. occystrap/{output_tarfile.py → outputs/tarfile.py} +18 -2
  22. occystrap/pipeline.py +297 -0
  23. occystrap/tarformat.py +122 -0
  24. occystrap/tests/test_inspect.py +355 -0
  25. occystrap/tests/test_tarformat.py +199 -0
  26. occystrap/uri.py +231 -0
  27. occystrap/util.py +67 -38
  28. occystrap-0.4.1.dist-info/METADATA +444 -0
  29. occystrap-0.4.1.dist-info/RECORD +38 -0
  30. {occystrap-0.4.0.dist-info → occystrap-0.4.1.dist-info}/WHEEL +1 -1
  31. {occystrap-0.4.0.dist-info → occystrap-0.4.1.dist-info}/entry_points.txt +0 -1
  32. occystrap/docker_extract.py +0 -36
  33. occystrap-0.4.0.dist-info/METADATA +0 -131
  34. occystrap-0.4.0.dist-info/RECORD +0 -20
  35. occystrap-0.4.0.dist-info/pbr.json +0 -1
  36. {occystrap-0.4.0.dist-info → occystrap-0.4.1.dist-info/licenses}/AUTHORS +0 -0
  37. {occystrap-0.4.0.dist-info → occystrap-0.4.1.dist-info/licenses}/LICENSE +0 -0
  38. {occystrap-0.4.0.dist-info → occystrap-0.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,355 @@
1
+ """Tests for the inspect filter."""
2
+
3
+ import io
4
+ import json
5
+ import os
6
+ import tarfile
7
+ import tempfile
8
+ import unittest
9
+
10
+ from occystrap import constants
11
+ from occystrap.filters.inspect import InspectFilter
12
+
13
+
14
+ class TestInspectFilter(unittest.TestCase):
15
+ """Tests for the InspectFilter class."""
16
+
17
+ def setUp(self):
18
+ self.output_fd, self.output_file = tempfile.mkstemp(
19
+ suffix='.jsonl')
20
+ os.close(self.output_fd)
21
+ # Start with an empty file
22
+ with open(self.output_file, 'w') as f:
23
+ f.truncate(0)
24
+
25
+ def tearDown(self):
26
+ if os.path.exists(self.output_file):
27
+ os.unlink(self.output_file)
28
+
29
+ def _make_config(self, history_entries):
30
+ """Create a config JSON file-like object.
31
+
32
+ Args:
33
+ history_entries: List of dicts, each with optional
34
+ keys: created, created_by, comment, empty_layer.
35
+ """
36
+ config = {
37
+ 'history': history_entries,
38
+ 'rootfs': {
39
+ 'type': 'layers',
40
+ 'diff_ids': [],
41
+ },
42
+ }
43
+ data = json.dumps(config).encode('utf-8')
44
+ return io.BytesIO(data)
45
+
46
+ def _make_layer(self, files=None):
47
+ """Create a layer tarball file-like object.
48
+
49
+ Args:
50
+ files: List of (name, content) tuples. Defaults to
51
+ a single file.
52
+ """
53
+ if files is None:
54
+ files = [('file.txt', b'hello')]
55
+
56
+ buf = io.BytesIO()
57
+ with tarfile.open(fileobj=buf, mode='w') as tar:
58
+ for name, content in files:
59
+ ti = tarfile.TarInfo(name=name)
60
+ ti.size = len(content)
61
+ tar.addfile(ti, io.BytesIO(content))
62
+ buf.seek(0)
63
+ return buf
64
+
65
+ def test_basic_output(self):
66
+ """Test that inspect produces valid JSONL output."""
67
+ f = InspectFilter(
68
+ None, self.output_file,
69
+ image='myimage', tag='v1')
70
+
71
+ config = self._make_config([
72
+ {
73
+ 'created': '2025-01-15T10:00:00Z',
74
+ 'created_by': '/bin/sh -c echo hello',
75
+ 'comment': '',
76
+ },
77
+ ])
78
+ layer = self._make_layer()
79
+
80
+ f.process_image_element(
81
+ constants.CONFIG_FILE, 'config.json', config)
82
+ f.process_image_element(
83
+ constants.IMAGE_LAYER, 'abc123', layer)
84
+ f.finalize()
85
+
86
+ with open(self.output_file) as fh:
87
+ lines = fh.readlines()
88
+
89
+ self.assertEqual(len(lines), 1)
90
+ record = json.loads(lines[0])
91
+ self.assertEqual(record['name'], 'myimage:v1')
92
+ self.assertEqual(len(record['layers']), 1)
93
+
94
+ layer_entry = record['layers'][0]
95
+ self.assertEqual(
96
+ layer_entry['Id'], 'sha256:abc123')
97
+ self.assertGreater(layer_entry['Size'], 0)
98
+ self.assertEqual(
99
+ layer_entry['CreatedBy'],
100
+ '/bin/sh -c echo hello')
101
+ self.assertEqual(layer_entry['Tags'], ['myimage:v1'])
102
+
103
+ def test_empty_layer_history_skipped(self):
104
+ """Test that empty_layer history entries are skipped."""
105
+ f = InspectFilter(
106
+ None, self.output_file,
107
+ image='img', tag='latest')
108
+
109
+ config = self._make_config([
110
+ {
111
+ 'created': '2025-01-15T10:00:00Z',
112
+ 'created_by': '/bin/sh -c apt-get install',
113
+ },
114
+ {
115
+ 'created': '2025-01-15T10:01:00Z',
116
+ 'created_by': '/bin/sh -c #(nop) ENV FOO=bar',
117
+ 'empty_layer': True,
118
+ },
119
+ {
120
+ 'created': '2025-01-15T10:02:00Z',
121
+ 'created_by': '/bin/sh -c echo done',
122
+ },
123
+ ])
124
+ layer1 = self._make_layer(
125
+ [('pkg.deb', b'x' * 100)])
126
+ layer2 = self._make_layer(
127
+ [('done.txt', b'done')])
128
+
129
+ f.process_image_element(
130
+ constants.CONFIG_FILE, 'config.json', config)
131
+ f.process_image_element(
132
+ constants.IMAGE_LAYER, 'layer1hash', layer1)
133
+ f.process_image_element(
134
+ constants.IMAGE_LAYER, 'layer2hash', layer2)
135
+ f.finalize()
136
+
137
+ with open(self.output_file) as fh:
138
+ record = json.loads(fh.readline())
139
+
140
+ # Should have 2 layers, not 3
141
+ self.assertEqual(len(record['layers']), 2)
142
+
143
+ # Layers are reversed (newest first)
144
+ self.assertEqual(
145
+ record['layers'][0]['CreatedBy'],
146
+ '/bin/sh -c echo done')
147
+ self.assertEqual(
148
+ record['layers'][1]['CreatedBy'],
149
+ '/bin/sh -c apt-get install')
150
+
151
+ def test_digest_normalization(self):
152
+ """Test that digests get sha256: prefix."""
153
+ f = InspectFilter(
154
+ None, self.output_file,
155
+ image='img', tag='v1')
156
+
157
+ config = self._make_config([
158
+ {'created_by': 'step1'},
159
+ {'created_by': 'step2'},
160
+ ])
161
+ layer1 = self._make_layer()
162
+ layer2 = self._make_layer()
163
+
164
+ f.process_image_element(
165
+ constants.CONFIG_FILE, 'config.json', config)
166
+ f.process_image_element(
167
+ constants.IMAGE_LAYER, 'abc123', layer1)
168
+ f.process_image_element(
169
+ constants.IMAGE_LAYER,
170
+ 'sha256:def456', layer2)
171
+ f.finalize()
172
+
173
+ with open(self.output_file) as fh:
174
+ record = json.loads(fh.readline())
175
+
176
+ # Reversed order
177
+ self.assertEqual(
178
+ record['layers'][0]['Id'], 'sha256:def456')
179
+ self.assertEqual(
180
+ record['layers'][1]['Id'], 'sha256:abc123')
181
+
182
+ def test_append_mode(self):
183
+ """Test that multiple invocations append to the file."""
184
+ for i in range(3):
185
+ f = InspectFilter(
186
+ None, self.output_file,
187
+ image='img%d' % i, tag='v1')
188
+ config = self._make_config(
189
+ [{'created_by': 'step'}])
190
+ layer = self._make_layer()
191
+ f.process_image_element(
192
+ constants.CONFIG_FILE, 'cfg', config)
193
+ f.process_image_element(
194
+ constants.IMAGE_LAYER, 'hash%d' % i, layer)
195
+ f.finalize()
196
+
197
+ with open(self.output_file) as fh:
198
+ lines = fh.readlines()
199
+
200
+ self.assertEqual(len(lines), 3)
201
+ for i, line in enumerate(lines):
202
+ record = json.loads(line)
203
+ self.assertEqual(
204
+ record['name'], 'img%d:v1' % i)
205
+
206
+ def test_passthrough_mode(self):
207
+ """Test that elements are passed to wrapped output."""
208
+ received = []
209
+
210
+ class MockOutput:
211
+ def fetch_callback(self, digest):
212
+ return True
213
+
214
+ def process_image_element(self, et, name, data):
215
+ received.append((et, name))
216
+
217
+ def finalize(self):
218
+ pass
219
+
220
+ mock = MockOutput()
221
+ f = InspectFilter(
222
+ mock, self.output_file,
223
+ image='img', tag='v1')
224
+
225
+ config = self._make_config(
226
+ [{'created_by': 'step'}])
227
+ layer = self._make_layer()
228
+
229
+ f.process_image_element(
230
+ constants.CONFIG_FILE, 'config.json', config)
231
+ f.process_image_element(
232
+ constants.IMAGE_LAYER, 'abc123', layer)
233
+ f.finalize()
234
+
235
+ self.assertEqual(len(received), 2)
236
+ self.assertEqual(
237
+ received[0], (constants.CONFIG_FILE, 'config.json'))
238
+ self.assertEqual(
239
+ received[1], (constants.IMAGE_LAYER, 'abc123'))
240
+
241
+ # Output file should also have been written
242
+ with open(self.output_file) as fh:
243
+ record = json.loads(fh.readline())
244
+ self.assertEqual(record['name'], 'img:v1')
245
+
246
+ def test_no_config(self):
247
+ """Test graceful handling when no config is provided."""
248
+ f = InspectFilter(
249
+ None, self.output_file,
250
+ image='img', tag='v1')
251
+
252
+ layer = self._make_layer()
253
+ f.process_image_element(
254
+ constants.IMAGE_LAYER, 'abc123', layer)
255
+ f.finalize()
256
+
257
+ with open(self.output_file) as fh:
258
+ record = json.loads(fh.readline())
259
+
260
+ self.assertEqual(len(record['layers']), 1)
261
+ # No history, so CreatedBy should be empty
262
+ self.assertEqual(
263
+ record['layers'][0]['CreatedBy'], '')
264
+
265
+ def test_skipped_layer(self):
266
+ """Test handling of layers with data=None."""
267
+ f = InspectFilter(
268
+ None, self.output_file,
269
+ image='img', tag='v1')
270
+
271
+ config = self._make_config(
272
+ [{'created_by': 'step'}])
273
+ f.process_image_element(
274
+ constants.CONFIG_FILE, 'config.json', config)
275
+ f.process_image_element(
276
+ constants.IMAGE_LAYER, 'abc123', None)
277
+ f.finalize()
278
+
279
+ with open(self.output_file) as fh:
280
+ record = json.loads(fh.readline())
281
+
282
+ self.assertEqual(len(record['layers']), 1)
283
+ self.assertEqual(record['layers'][0]['Size'], 0)
284
+
285
+ def test_tags_on_topmost_layer_only(self):
286
+ """Test that Tags is set only on the topmost layer."""
287
+ f = InspectFilter(
288
+ None, self.output_file,
289
+ image='myimg', tag='latest')
290
+
291
+ config = self._make_config([
292
+ {'created_by': 'base'},
293
+ {'created_by': 'app'},
294
+ ])
295
+ layer1 = self._make_layer()
296
+ layer2 = self._make_layer()
297
+
298
+ f.process_image_element(
299
+ constants.CONFIG_FILE, 'config.json', config)
300
+ f.process_image_element(
301
+ constants.IMAGE_LAYER, 'base', layer1)
302
+ f.process_image_element(
303
+ constants.IMAGE_LAYER, 'app', layer2)
304
+ f.finalize()
305
+
306
+ with open(self.output_file) as fh:
307
+ record = json.loads(fh.readline())
308
+
309
+ # Reversed: app is first (topmost), base is second
310
+ self.assertEqual(
311
+ record['layers'][0]['Tags'],
312
+ ['myimg:latest'])
313
+ self.assertIsNone(record['layers'][1]['Tags'])
314
+
315
+ def test_created_timestamp_parsing(self):
316
+ """Test various timestamp formats in config history."""
317
+ f = InspectFilter(
318
+ None, self.output_file,
319
+ image='img', tag='v1')
320
+
321
+ config = self._make_config([
322
+ {
323
+ 'created': '2025-06-15T12:30:45Z',
324
+ 'created_by': 'iso-utc',
325
+ },
326
+ {
327
+ 'created': '2025-06-15T12:30:45+00:00',
328
+ 'created_by': 'iso-offset',
329
+ },
330
+ ])
331
+ layer1 = self._make_layer()
332
+ layer2 = self._make_layer()
333
+
334
+ f.process_image_element(
335
+ constants.CONFIG_FILE, 'config.json', config)
336
+ f.process_image_element(
337
+ constants.IMAGE_LAYER, 'l1', layer1)
338
+ f.process_image_element(
339
+ constants.IMAGE_LAYER, 'l2', layer2)
340
+ f.finalize()
341
+
342
+ with open(self.output_file) as fh:
343
+ record = json.loads(fh.readline())
344
+
345
+ # Both should parse to the same timestamp
346
+ ts1 = record['layers'][1]['Created'] # reversed
347
+ ts2 = record['layers'][0]['Created']
348
+ self.assertIsInstance(ts1, int)
349
+ self.assertIsInstance(ts2, int)
350
+ self.assertEqual(ts1, ts2)
351
+ self.assertGreater(ts1, 0)
352
+
353
+
354
+ if __name__ == '__main__':
355
+ unittest.main()
@@ -0,0 +1,199 @@
1
+ """Tests for the tarformat module."""
2
+
3
+ import io
4
+ import tarfile
5
+ import unittest
6
+
7
+ from occystrap.tarformat import (
8
+ needs_pax_format,
9
+ select_tar_format_for_layer,
10
+ USTAR_MAX_ID,
11
+ )
12
+
13
+
14
+ class TestNeedsPaxFormat(unittest.TestCase):
15
+ """Tests for the needs_pax_format function."""
16
+
17
+ def _make_member(self, name, **kwargs):
18
+ """Create a TarInfo with given attributes."""
19
+ ti = tarfile.TarInfo(name=name)
20
+ ti.size = kwargs.get('size', 0)
21
+ ti.uid = kwargs.get('uid', 0)
22
+ ti.gid = kwargs.get('gid', 0)
23
+ ti.linkname = kwargs.get('linkname', '')
24
+ return ti
25
+
26
+ def test_short_path_uses_ustar(self):
27
+ """Short paths should not require PAX."""
28
+ member = self._make_member('short/path/file.txt')
29
+ self.assertFalse(needs_pax_format(member))
30
+
31
+ def test_path_at_ustar_limit_uses_ustar(self):
32
+ """Paths exactly at USTAR limit should not require PAX."""
33
+ # 100 char basename + 155 char dirname + '/' = 256
34
+ dirname = 'a' * 155
35
+ basename = 'b' * 96 + '.txt' # 100 chars (96 + 4 for .txt)
36
+ path = dirname + '/' + basename
37
+ self.assertEqual(len(path), 256)
38
+ member = self._make_member(path)
39
+ self.assertFalse(needs_pax_format(member))
40
+
41
+ def test_path_over_limit_requires_pax(self):
42
+ """Paths over 256 chars should require PAX."""
43
+ path = 'a' * 257
44
+ member = self._make_member(path)
45
+ self.assertTrue(needs_pax_format(member))
46
+
47
+ def test_long_basename_requires_pax(self):
48
+ """Basenames over 100 chars should require PAX."""
49
+ basename = 'x' * 101
50
+ path = 'dir/' + basename
51
+ member = self._make_member(path)
52
+ self.assertTrue(needs_pax_format(member))
53
+
54
+ def test_long_dirname_requires_pax(self):
55
+ """Dirnames over 155 chars should require PAX."""
56
+ dirname = 'a' * 156
57
+ path = dirname + '/file.txt'
58
+ member = self._make_member(path)
59
+ self.assertTrue(needs_pax_format(member))
60
+
61
+ def test_long_linkname_requires_pax(self):
62
+ """Link targets over 100 chars should require PAX."""
63
+ member = self._make_member('mylink', linkname='x' * 101)
64
+ self.assertTrue(needs_pax_format(member))
65
+
66
+ def test_linkname_at_limit_uses_ustar(self):
67
+ """Link targets at exactly 100 chars should use USTAR."""
68
+ member = self._make_member('mylink', linkname='x' * 100)
69
+ self.assertFalse(needs_pax_format(member))
70
+
71
+ def test_large_uid_requires_pax(self):
72
+ """UID over 2097151 should require PAX."""
73
+ member = self._make_member('file.txt', uid=USTAR_MAX_ID + 1)
74
+ self.assertTrue(needs_pax_format(member))
75
+
76
+ def test_large_gid_requires_pax(self):
77
+ """GID over 2097151 should require PAX."""
78
+ member = self._make_member('file.txt', gid=USTAR_MAX_ID + 1)
79
+ self.assertTrue(needs_pax_format(member))
80
+
81
+ def test_uid_at_limit_uses_ustar(self):
82
+ """UID at exactly 2097151 should use USTAR."""
83
+ member = self._make_member('file.txt', uid=USTAR_MAX_ID)
84
+ self.assertFalse(needs_pax_format(member))
85
+
86
+ def test_non_ascii_path_requires_pax(self):
87
+ """Non-ASCII characters in path should require PAX."""
88
+ member = self._make_member('Főtanúsítvány.pem')
89
+ self.assertTrue(needs_pax_format(member))
90
+
91
+ def test_non_ascii_linkname_requires_pax(self):
92
+ """Non-ASCII characters in linkname should require PAX."""
93
+ member = self._make_member('mylink', linkname='célpont.txt')
94
+ self.assertTrue(needs_pax_format(member))
95
+
96
+ def test_ascii_path_uses_ustar(self):
97
+ """ASCII-only paths should use USTAR."""
98
+ member = self._make_member('normal/ascii/path.txt')
99
+ self.assertFalse(needs_pax_format(member))
100
+
101
+
102
+ class TestSelectTarFormatForLayer(unittest.TestCase):
103
+ """Tests for select_tar_format_for_layer function."""
104
+
105
+ def _create_tar(self, members):
106
+ """Create a tar archive with given members.
107
+
108
+ Args:
109
+ members: List of (name, content) or (name, content, kwargs) tuples.
110
+
111
+ Returns:
112
+ BytesIO containing the tar archive.
113
+ """
114
+ buf = io.BytesIO()
115
+ with tarfile.open(fileobj=buf, mode='w', format=tarfile.PAX_FORMAT) as tar:
116
+ for item in members:
117
+ if len(item) == 2:
118
+ name, content = item
119
+ kwargs = {}
120
+ else:
121
+ name, content, kwargs = item
122
+
123
+ data = content.encode() if isinstance(content, str) else content
124
+ ti = tarfile.TarInfo(name=name)
125
+ ti.size = len(data)
126
+ for k, v in kwargs.items():
127
+ setattr(ti, k, v)
128
+ tar.addfile(ti, io.BytesIO(data))
129
+ buf.seek(0)
130
+ return buf
131
+
132
+ def test_normal_files_select_ustar(self):
133
+ """Normal files should select USTAR format."""
134
+ layer = self._create_tar([
135
+ ('file1.txt', 'content1'),
136
+ ('dir/file2.txt', 'content2'),
137
+ ])
138
+ fmt = select_tar_format_for_layer(layer)
139
+ self.assertEqual(fmt, tarfile.USTAR_FORMAT)
140
+
141
+ def test_long_path_selects_pax(self):
142
+ """Layer with long path should select PAX format."""
143
+ long_path = 'a' * 200 + '/' + 'b' * 57 # 258 chars
144
+ layer = self._create_tar([
145
+ ('short.txt', 'content'),
146
+ (long_path, 'content'),
147
+ ])
148
+ fmt = select_tar_format_for_layer(layer)
149
+ self.assertEqual(fmt, tarfile.PAX_FORMAT)
150
+
151
+ def test_non_ascii_selects_pax(self):
152
+ """Layer with non-ASCII filename should select PAX format."""
153
+ layer = self._create_tar([
154
+ ('normal.txt', 'content'),
155
+ ('Főtanúsítvány.pem', 'certificate'),
156
+ ])
157
+ fmt = select_tar_format_for_layer(layer)
158
+ self.assertEqual(fmt, tarfile.PAX_FORMAT)
159
+
160
+ def test_transform_fn_applied(self):
161
+ """Transform function should be applied before format check."""
162
+ layer = self._create_tar([
163
+ ('file.txt', 'content'),
164
+ ])
165
+
166
+ def make_long_name(member):
167
+ member.name = 'x' * 257
168
+ return member
169
+
170
+ fmt = select_tar_format_for_layer(layer, transform_fn=make_long_name)
171
+ self.assertEqual(fmt, tarfile.PAX_FORMAT)
172
+
173
+ def test_skip_fn_excludes_members(self):
174
+ """Skip function should exclude members from format check."""
175
+ long_path = 'a' * 257
176
+ layer = self._create_tar([
177
+ ('keep.txt', 'content'),
178
+ (long_path, 'content'),
179
+ ])
180
+
181
+ # Skip the long path, should select USTAR
182
+ fmt = select_tar_format_for_layer(
183
+ layer,
184
+ skip_fn=lambda m: len(m.name) > 256
185
+ )
186
+ self.assertEqual(fmt, tarfile.USTAR_FORMAT)
187
+
188
+ def test_fileobj_reset_after_scan(self):
189
+ """File object should be reset to beginning after scan."""
190
+ layer = self._create_tar([
191
+ ('file.txt', 'content'),
192
+ ])
193
+ initial_pos = layer.tell()
194
+ select_tar_format_for_layer(layer)
195
+ self.assertEqual(layer.tell(), initial_pos)
196
+
197
+
198
+ if __name__ == '__main__':
199
+ unittest.main()