docker-assemble 0.2.2__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {docker_assemble-0.2.2 → docker_assemble-0.4.0}/.gitignore +1 -0
  2. {docker_assemble-0.2.2/docker_assemble.egg-info → docker_assemble-0.4.0}/PKG-INFO +1 -1
  3. docker_assemble-0.4.0/docker_assemble/image_exporter.py +216 -0
  4. docker_assemble-0.4.0/docker_assemble/main.py +40 -0
  5. {docker_assemble-0.2.2 → docker_assemble-0.4.0/docker_assemble.egg-info}/PKG-INFO +1 -1
  6. docker_assemble-0.2.2/docker_assemble/image_exporter.py +0 -68
  7. docker_assemble-0.2.2/docker_assemble/main.py +0 -18
  8. {docker_assemble-0.2.2 → docker_assemble-0.4.0}/.github/workflows/pypi-publish.yml +0 -0
  9. {docker_assemble-0.2.2 → docker_assemble-0.4.0}/README.md +0 -0
  10. {docker_assemble-0.2.2 → docker_assemble-0.4.0}/docker-assemble +0 -0
  11. {docker_assemble-0.2.2 → docker_assemble-0.4.0}/docker_assemble/__init__.py +0 -0
  12. {docker_assemble-0.2.2 → docker_assemble-0.4.0}/docker_assemble/docker_utils.py +0 -0
  13. {docker_assemble-0.2.2 → docker_assemble-0.4.0}/docker_assemble.egg-info/SOURCES.txt +0 -0
  14. {docker_assemble-0.2.2 → docker_assemble-0.4.0}/docker_assemble.egg-info/dependency_links.txt +0 -0
  15. {docker_assemble-0.2.2 → docker_assemble-0.4.0}/docker_assemble.egg-info/entry_points.txt +0 -0
  16. {docker_assemble-0.2.2 → docker_assemble-0.4.0}/docker_assemble.egg-info/requires.txt +0 -0
  17. {docker_assemble-0.2.2 → docker_assemble-0.4.0}/docker_assemble.egg-info/top_level.txt +0 -0
  18. {docker_assemble-0.2.2 → docker_assemble-0.4.0}/pyproject.toml +0 -0
  19. {docker_assemble-0.2.2 → docker_assemble-0.4.0}/requirements.txt +0 -0
  20. {docker_assemble-0.2.2 → docker_assemble-0.4.0}/setup.cfg +0 -0
@@ -10,4 +10,5 @@ dist/
10
10
  build/
11
11
  venv/
12
12
  .env
13
+ .idea
13
14
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docker-assemble
3
- Version: 0.2.2
3
+ Version: 0.4.0
4
4
  Summary: A CLI tool to extract and analyze Docker images
5
5
  Author: Sina
6
6
  License: Apache-2.0
@@ -0,0 +1,216 @@
1
+ import docker
2
+ import tarfile
3
+ import tempfile
4
+ import os
5
+ from pathlib import Path
6
+ import logging
7
+ import io
8
+
9
+
10
+ def get_or_pull_image_and_export_fs(client, image_name):
11
+ try:
12
+ image = client.images.get(image_name)
13
+ logging.info(f"Image '{image_name}' found locally.")
14
+ except docker.errors.ImageNotFound:
15
+ logging.info(f"Image '{image_name}' not found locally. Pulling...")
16
+ image = client.images.pull(image_name)
17
+
18
+ container = client.containers.run(image=image_name, command="sleep infinity", detach=True)
19
+ logging.debug(f"Created temporary container: {container.id[:12]}")
20
+
21
+ try:
22
+ stream, _ = container.get_archive("/")
23
+ tmp_tar_path = tempfile.mktemp(suffix=".tar")
24
+ with open(tmp_tar_path, "wb") as f:
25
+ for chunk in stream:
26
+ f.write(chunk)
27
+ return container, tmp_tar_path
28
+ except Exception as e:
29
+ container.remove(force=True)
30
+ raise e
31
+
32
+
33
+ def extract_image(image_name: str, output_dir: str):
34
+ try:
35
+ client = docker.from_env()
36
+ container, tmp_tar_path = get_or_pull_image_and_export_fs(client, image_name)
37
+
38
+ logging.debug(f"Filesystem archive saved to: {tmp_tar_path}")
39
+
40
+ output_path = Path(output_dir).resolve()
41
+ output_path.mkdir(parents=True, exist_ok=True)
42
+
43
+ extract_tar_safely(tmp_tar_path, output_path)
44
+
45
+ logging.info(f"Image filesystem extracted to: {output_path}")
46
+
47
+ finally:
48
+ container.remove(force=True)
49
+ if os.path.exists(tmp_tar_path):
50
+ os.remove(tmp_tar_path)
51
+ logging.debug("Cleaned up temporary container and tar file.")
52
+
53
+
54
+ def extract_tar_safely(tar_path: str, output_path: Path):
55
+ # def is_safe_path(base: Path, target: Path) -> bool:
56
+ # try:
57
+ # return target.resolve().is_relative_to(base.resolve())
58
+ # except AttributeError:
59
+ # # For Python < 3.9 fallback
60
+ # return str(target.resolve()).startswith(str(base.resolve()))
61
+
62
+ with tarfile.open(tar_path, "r") as tar:
63
+ for member in tar.getmembers():
64
+ member.name = member.name.lstrip("/")
65
+ member_path = output_path / member.name
66
+
67
+ # if not is_safe_path(output_path, member_path):
68
+ # logging.warning(f"Blocked unsafe path: {member.name}, output_path: {output_path}, member_path: {member_path}")
69
+ # continue
70
+
71
+ tar.extract(member, path=output_path)
72
+ logging.debug(f"Extracted: {member.name}")
73
+
74
+ logging.info(f"Extraction completed to: {output_path}")
75
+
76
+
77
+ def check_large_files(output_dir, max_size_bytes):
78
+ logging.info(f"Checking for files larger than {max_size_bytes} bytes.")
79
+ large_files = []
80
+ for root, _, files in os.walk(output_dir):
81
+ for file in files:
82
+ file_path = Path(root) / file
83
+ try:
84
+ file_size = os.path.getsize(file_path)
85
+ if file_size > max_size_bytes:
86
+ large_files.append((file_path, file_size))
87
+ except FileNotFoundError:
88
+ logging.error(f"File not found: {file_path}")
89
+ except OSError as e:
90
+ logging.error(f"OS error while getting size of {file_path}: {e}")
91
+
92
+ if large_files:
93
+ logging.warning("The following files exceed the maximum file size:")
94
+ for path, size in large_files:
95
+ logging.warning(f"- {path}: {size} bytes")
96
+ else:
97
+ logging.info("No files exceed the maximum file size.")
98
+
99
+ return large_files
100
+
101
+
102
+ def remove_files(large_files):
103
+ while True:
104
+ indices_str = input("Enter the indices of files to remove (comma-separated, or 'no' to skip): ")
105
+ if indices_str.lower() == 'no':
106
+ logging.info("No files will be removed.")
107
+ break
108
+
109
+ try:
110
+ indices = [int(i) for i in indices_str.split(',')]
111
+ files_to_remove = [large_files[i][0] for i in indices]
112
+
113
+ print("Files to be removed:")
114
+ for file in files_to_remove:
115
+ print(file)
116
+
117
+ confirmation = input("Are you sure you want to delete these files? (yes/no): ")
118
+ if confirmation.lower() == 'yes':
119
+ for file in files_to_remove:
120
+ os.remove(file)
121
+ logging.info(f"Removed file: {file}")
122
+ break
123
+ else:
124
+ print("Removal cancelled.")
125
+ except (ValueError, IndexError) as e:
126
+ print(f"Invalid input: {e}")
127
+
128
+
129
+ def filter_tar_member(member, large_files):
130
+ blocked_prefixes = [
131
+ "proc/",
132
+ "sys/",
133
+ "dev/",
134
+ "run/",
135
+ "tmp/",
136
+ "var/cache/",
137
+ "var/log/",
138
+ "usr/share/doc/",
139
+ "usr/share/man/",
140
+ "usr/share/locale/"
141
+ ]
142
+
143
+ for prefix in blocked_prefixes:
144
+ if member.name.startswith(prefix):
145
+ logging.info(f"Skipping blocked path: {member.name}")
146
+ return False
147
+
148
+ # Filter large files
149
+ if member.isfile() and any(Path(member.name) == Path(f[0].name) for f in large_files):
150
+ logging.info(f"Skipping large file: {member.name} ({member.size} bytes)")
151
+ return False
152
+
153
+ return True
154
+
155
+
156
+ # Rebuild tar stream while filtering and injecting Dockerfile
157
+ def filter_tar_and_inject_dockerfile(original_tar_path, dockerfile_content, filter_callback):
158
+ buffer = io.BytesIO()
159
+
160
+ with tarfile.open(original_tar_path, "r") as old_tar:
161
+ with tarfile.open(fileobj=buffer, mode="w:gz") as new_tar:
162
+ for member in old_tar.getmembers():
163
+ if not filter_callback(member):
164
+ continue
165
+
166
+ file_obj = old_tar.extractfile(member) if member.isfile() else None
167
+ new_tar.addfile(member, file_obj)
168
+
169
+ # Inject Dockerfile
170
+ dockerfile_data = dockerfile_content.encode("utf-8")
171
+ docker_info = tarfile.TarInfo(name="Dockerfile")
172
+ docker_info.size = len(dockerfile_data)
173
+ new_tar.addfile(docker_info, io.BytesIO(dockerfile_data))
174
+
175
+ buffer.seek(0)
176
+ return buffer
177
+
178
+
179
+ def create_new_image(image_name, new_image_name, large_files):
180
+ try:
181
+ logging.info(f"Creating new image '{new_image_name}' from '{image_name}' with filtered files.")
182
+ client = docker.from_env()
183
+ container, tmp_tar_path = get_or_pull_image_and_export_fs(client, image_name)
184
+
185
+ logging.info(f"Extraction complete. Archive saved at {tmp_tar_path}")
186
+
187
+ dockerfile_content = f"""
188
+ FROM scratch
189
+ COPY . /
190
+ """
191
+
192
+ # Build filtered tar stream
193
+ filtered_tar_stream = filter_tar_and_inject_dockerfile(
194
+ tmp_tar_path,
195
+ dockerfile_content,
196
+ lambda member: filter_tar_member(member, large_files)
197
+ )
198
+
199
+ # Build Docker image directly from filtered tar stream
200
+ image, logs = client.images.build(
201
+ fileobj=filtered_tar_stream,
202
+ tag=new_image_name,
203
+ rm=True,
204
+ custom_context=True
205
+ )
206
+
207
+ for line in logs:
208
+ logging.info(line)
209
+
210
+ logging.info(f"New image successfully created: {new_image_name}")
211
+
212
+ finally:
213
+ container.remove(force=True)
214
+ if os.path.exists(tmp_tar_path):
215
+ os.remove(tmp_tar_path)
216
+ logging.debug("Cleaned up temporary container and tar file.")
@@ -0,0 +1,40 @@
1
+ import argparse
2
+ import logging
3
+ import docker_assemble.image_exporter as image_exporter
4
+
5
+
6
+ def parse_size(size_str):
7
+ suffixes = {'K': 1024, 'M': 1024**2, 'G': 1024**3}
8
+ size_str = size_str.upper()
9
+ if size_str[-1] in suffixes:
10
+ num = size_str[:-1]
11
+ suffix = size_str[-1]
12
+ return int(float(num) * suffixes[suffix])
13
+ else:
14
+ return int(size_str)
15
+
16
+ def run():
17
+ parser = argparse.ArgumentParser(description="Docker Assemble CLI")
18
+ parser.add_argument("-d", action="store_true", help="Disassemble an image")
19
+ parser.add_argument("--debug", action="store_true", help="Enable debug mode")
20
+ parser.add_argument("--maximum-file-size", type=str, help="Maximum file size (e.g., 1G, 100M, 10K). Files larger than this size will be listed.")
21
+ parser.add_argument("--new-image-name", type=str, help="Name for the new Docker image after removing files.")
22
+ parser.add_argument("image", help="Docker image name")
23
+ parser.add_argument("output_dir", nargs="?", default=".", help="Optional output directory")
24
+
25
+ args = parser.parse_args()
26
+
27
+ logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
28
+
29
+ logging.debug(f"Extracting image: {args.image} to directory: {args.output_dir}")
30
+ image_exporter.extract_image(image_name=args.image, output_dir=args.output_dir)
31
+
32
+ if args.maximum_file_size:
33
+ max_size_bytes = parse_size(args.maximum_file_size)
34
+ large_files = image_exporter.check_large_files(args.output_dir, max_size_bytes)
35
+
36
+ if large_files:
37
+ image_exporter.remove_files(large_files)
38
+
39
+ if args.new_image_name:
40
+ image_exporter.create_new_image(args.image, args.new_image_name, large_files)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docker-assemble
3
- Version: 0.2.2
3
+ Version: 0.4.0
4
4
  Summary: A CLI tool to extract and analyze Docker images
5
5
  Author: Sina
6
6
  License: Apache-2.0
@@ -1,68 +0,0 @@
1
- import docker
2
- import tarfile
3
- import tempfile
4
- import os
5
- import shutil
6
- from pathlib import Path
7
- import logging
8
-
9
- def extract_image(image_name: str, output_dir: str):
10
- client = docker.from_env()
11
-
12
- try:
13
- image = client.images.get(image_name)
14
- logging.info(f"Image '{image_name}' found locally.")
15
- except docker.errors.ImageNotFound:
16
- logging.info(f"Image '{image_name}' not found locally. Pulling...")
17
- image = client.images.pull(image_name)
18
-
19
- container = client.containers.run(image=image_name, command="sleep infinity", detach=True)
20
- logging.debug(f"Created temporary container: {container.id[:12]}")
21
-
22
- try:
23
- stream, _ = container.get_archive("/")
24
- tmp_tar_path = tempfile.mktemp(suffix=".tar")
25
- with open(tmp_tar_path, "wb") as f:
26
- for chunk in stream:
27
- f.write(chunk)
28
-
29
- logging.debug(f"Filesystem archive saved to: {tmp_tar_path}")
30
-
31
- output_path = Path(output_dir).resolve()
32
- output_path.mkdir(parents=True, exist_ok=True)
33
-
34
- extract_tar_safely(tmp_tar_path, output_path)
35
-
36
- logging.info(f"Image filesystem extracted to: {output_path}")
37
-
38
- finally:
39
- container.remove(force=True)
40
- if os.path.exists(tmp_tar_path):
41
- os.remove(tmp_tar_path)
42
- logging.debug("Cleaned up temporary container and tar file.")
43
-
44
-
45
- def extract_tar_safely(tar_path: str, output_path: Path):
46
- # def is_safe_path(base: Path, target: Path) -> bool:
47
- # try:
48
- # return target.resolve().is_relative_to(base.resolve())
49
- # except AttributeError:
50
- # # For Python < 3.9 fallback
51
- # return str(target.resolve()).startswith(str(base.resolve()))
52
-
53
- with tarfile.open(tar_path, "r") as tar:
54
- for member in tar.getmembers():
55
- member.name = member.name.lstrip("/")
56
- member_path = output_path / member.name
57
-
58
- # if not is_safe_path(output_path, member_path):
59
- # logging.warning(f"Blocked unsafe path: {member.name}, output_path: {output_path}, member_path: {member_path}")
60
- # continue
61
-
62
- tar.extract(member, path=output_path)
63
- logging.debug(f"Extracted: {member.name}")
64
-
65
- logging.info(f"Extraction completed to: {output_path}")
66
-
67
-
68
-
@@ -1,18 +0,0 @@
1
- import argparse
2
- import logging
3
- import os
4
- from docker_assemble.image_exporter import extract_image
5
-
6
- def run():
7
- parser = argparse.ArgumentParser(description="Docker Assemble CLI")
8
- parser.add_argument("-d", action="store_true", help="Disassemble an image")
9
- parser.add_argument("--debug", action="store_true", help="Enable debug mode")
10
- parser.add_argument("image", help="Docker image name")
11
- parser.add_argument("output_dir", nargs="?", default=".", help="Optional output directory")
12
-
13
- args = parser.parse_args()
14
-
15
- logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
16
-
17
- logging.debug(f"Extracting image: {args.image} to directory: {args.output_dir}")
18
- extract_image(image_name=args.image, output_dir=args.output_dir)