dreadnode 1.0.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dreadnode/__init__.py +51 -0
- dreadnode/api/__init__.py +0 -0
- dreadnode/api/client.py +249 -0
- dreadnode/api/models.py +210 -0
- dreadnode/artifact/__init__.py +0 -0
- dreadnode/artifact/merger.py +599 -0
- dreadnode/artifact/storage.py +126 -0
- dreadnode/artifact/tree_builder.py +455 -0
- dreadnode/constants.py +16 -0
- dreadnode/integrations/__init__.py +0 -0
- dreadnode/integrations/transformers.py +183 -0
- dreadnode/main.py +1042 -0
- dreadnode/metric.py +225 -0
- dreadnode/object.py +29 -0
- dreadnode/py.typed +0 -0
- dreadnode/serialization.py +731 -0
- dreadnode/task.py +447 -0
- dreadnode/tracing/__init__.py +0 -0
- dreadnode/tracing/constants.py +35 -0
- dreadnode/tracing/exporters.py +157 -0
- dreadnode/tracing/span.py +811 -0
- dreadnode/types.py +25 -0
- dreadnode/util.py +150 -0
- dreadnode/version.py +3 -0
- dreadnode-1.0.0rc0.dist-info/METADATA +122 -0
- dreadnode-1.0.0rc0.dist-info/RECORD +27 -0
- dreadnode-1.0.0rc0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Artifact storage implementation for fsspec-compatible file systems.
|
|
3
|
+
Provides efficient uploading of files and directories with deduplication.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import hashlib
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import fsspec # type: ignore[import-untyped]
|
|
10
|
+
|
|
11
|
+
from dreadnode.util import logger
|
|
12
|
+
|
|
13
|
+
CHUNK_SIZE = 8 * 1024 * 1024 # 8MB
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ArtifactStorage:
|
|
17
|
+
"""
|
|
18
|
+
Storage for artifacts with efficient handling of large files and directories.
|
|
19
|
+
|
|
20
|
+
Supports:
|
|
21
|
+
- Content-based deduplication using SHA1 hashing
|
|
22
|
+
- Batch uploads for directories handled by fsspec
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, file_system: fsspec.AbstractFileSystem):
|
|
26
|
+
"""
|
|
27
|
+
Initialize artifact storage with a file system and prefix path.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
file_system: FSSpec-compatible file system
|
|
31
|
+
"""
|
|
32
|
+
self._file_system = file_system
|
|
33
|
+
|
|
34
|
+
def store_file(self, file_path: Path, target_key: str) -> str:
|
|
35
|
+
"""
|
|
36
|
+
Store a file in the storage system, using multipart upload for large files.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
file_path: Path to the local file
|
|
40
|
+
target_key: Key/path where the file should be stored
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Full URI with protocol to the stored file
|
|
44
|
+
"""
|
|
45
|
+
if not self._file_system.exists(target_key):
|
|
46
|
+
self._file_system.put(str(file_path), target_key)
|
|
47
|
+
logger.debug("Artifact successfully stored at %s", target_key)
|
|
48
|
+
else:
|
|
49
|
+
logger.debug("Artifact already exists at %s, skipping upload.", target_key)
|
|
50
|
+
|
|
51
|
+
return str(self._file_system.unstrip_protocol(target_key))
|
|
52
|
+
|
|
53
|
+
def batch_upload_files(self, source_paths: list[str], target_paths: list[str]) -> list[str]:
|
|
54
|
+
"""
|
|
55
|
+
Upload multiple files in a single batch operation.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
source_paths: List of local file paths
|
|
59
|
+
target_paths: List of target keys/paths
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
List of URIs for the uploaded files
|
|
63
|
+
"""
|
|
64
|
+
if not source_paths:
|
|
65
|
+
return []
|
|
66
|
+
|
|
67
|
+
logger.debug("Batch uploading %d files", len(source_paths))
|
|
68
|
+
|
|
69
|
+
srcs = []
|
|
70
|
+
dsts = []
|
|
71
|
+
|
|
72
|
+
for src, dst in zip(source_paths, target_paths, strict=False):
|
|
73
|
+
if not self._file_system.exists(dst):
|
|
74
|
+
srcs.append(src)
|
|
75
|
+
dsts.append(dst)
|
|
76
|
+
|
|
77
|
+
if srcs:
|
|
78
|
+
self._file_system.put(srcs, dsts)
|
|
79
|
+
logger.debug("Batch upload completed for %d files", len(srcs))
|
|
80
|
+
else:
|
|
81
|
+
logger.debug("All files already exist, skipping upload")
|
|
82
|
+
|
|
83
|
+
return [str(self._file_system.unstrip_protocol(target)) for target in target_paths]
|
|
84
|
+
|
|
85
|
+
def compute_file_hash(self, file_path: Path, stream_threshold_mb: int = 10) -> str:
|
|
86
|
+
"""
|
|
87
|
+
Compute SHA1 hash of a file, using streaming only for larger files.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
file_path: Path to the file
|
|
91
|
+
stream_threshold_mb: Size threshold in MB for streaming vs. loading whole file
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
First 16 chars of SHA1 hash
|
|
95
|
+
"""
|
|
96
|
+
file_size = file_path.stat().st_size
|
|
97
|
+
stream_threshold = stream_threshold_mb * 1024 * 1024 # Convert MB to bytes
|
|
98
|
+
|
|
99
|
+
sha1 = hashlib.sha1() # noqa: S324
|
|
100
|
+
|
|
101
|
+
if file_size < stream_threshold:
|
|
102
|
+
with file_path.open("rb") as f:
|
|
103
|
+
data = f.read()
|
|
104
|
+
sha1.update(data)
|
|
105
|
+
else:
|
|
106
|
+
with file_path.open("rb") as f:
|
|
107
|
+
for chunk in iter(lambda: f.read(CHUNK_SIZE), b""):
|
|
108
|
+
sha1.update(chunk)
|
|
109
|
+
|
|
110
|
+
return sha1.hexdigest()[:16]
|
|
111
|
+
|
|
112
|
+
def compute_file_hashes(self, file_paths: list[Path]) -> dict[str, str]:
|
|
113
|
+
"""
|
|
114
|
+
Compute SHA1 hashes for multiple files.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
file_paths: List of file paths to hash
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Dictionary mapping file paths to their hash values
|
|
121
|
+
"""
|
|
122
|
+
result = {}
|
|
123
|
+
for file_path in file_paths:
|
|
124
|
+
file_path_str = file_path.resolve().as_posix()
|
|
125
|
+
result[file_path_str] = self.compute_file_hash(file_path)
|
|
126
|
+
return result
|
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tree structure builder for artifacts with directory hierarchy preservation.
|
|
3
|
+
Provides efficient uploads and tree construction for frontend to consume.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import hashlib
|
|
7
|
+
import os
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Literal, TypedDict, Union
|
|
11
|
+
|
|
12
|
+
from dreadnode.artifact.storage import ArtifactStorage
|
|
13
|
+
from dreadnode.util import logger
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class FileNode(TypedDict):
|
|
17
|
+
"""
|
|
18
|
+
Represents a file node in the artifact tree.
|
|
19
|
+
Contains metadata about the file, including its name, uri, size_bytes, and final_real_path.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
type: Literal["file"]
|
|
23
|
+
hash: str
|
|
24
|
+
uri: str
|
|
25
|
+
size_bytes: int
|
|
26
|
+
final_real_path: str
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DirectoryNode(TypedDict):
|
|
30
|
+
"""
|
|
31
|
+
Represents a directory node in the artifact tree.
|
|
32
|
+
Contains metadata about the directory, including its dir_path, hash, and children nodes.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
type: Literal["dir"]
|
|
36
|
+
dir_path: str
|
|
37
|
+
hash: str
|
|
38
|
+
children: list[Union["DirectoryNode", FileNode]]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class ArtifactTreeBuilder:
|
|
43
|
+
"""
|
|
44
|
+
Builds a hierarchical tree structure for artifacts while uploading them to storage.
|
|
45
|
+
Preserves directory structure and handles efficient uploads.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
storage: ArtifactStorage
|
|
49
|
+
prefix_path: str | None = None
|
|
50
|
+
|
|
51
|
+
def process_artifact(self, local_uri: str | Path) -> DirectoryNode:
|
|
52
|
+
"""
|
|
53
|
+
Process an artifact (file or directory) and build its tree representation.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
local_uri: Path to the local file or directory
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Directory tree structure representing the artifact
|
|
60
|
+
|
|
61
|
+
Raises:
|
|
62
|
+
FileNotFoundError: If the path doesn't exist
|
|
63
|
+
"""
|
|
64
|
+
local_path = Path(local_uri).expanduser().resolve()
|
|
65
|
+
if not local_path.exists():
|
|
66
|
+
raise FileNotFoundError(f"{local_path} does not exist")
|
|
67
|
+
|
|
68
|
+
if local_path.is_dir():
|
|
69
|
+
return self._process_directory(local_path)
|
|
70
|
+
|
|
71
|
+
return self._process_single_file(local_path)
|
|
72
|
+
|
|
73
|
+
def _process_single_file(self, file_path: Path) -> DirectoryNode:
|
|
74
|
+
"""
|
|
75
|
+
Process a single file and create a directory structure for it.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
file_path: Path to the file to be processed
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
DirectoryNode containing the single file
|
|
82
|
+
"""
|
|
83
|
+
file_node = self._process_file(file_path)
|
|
84
|
+
|
|
85
|
+
file_node["final_real_path"] = file_path.resolve().as_posix()
|
|
86
|
+
|
|
87
|
+
dir_path = file_path.parent.resolve().as_posix()
|
|
88
|
+
return {
|
|
89
|
+
"type": "dir",
|
|
90
|
+
"dir_path": dir_path,
|
|
91
|
+
"hash": file_node["hash"],
|
|
92
|
+
"children": [file_node],
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
def _process_directory(self, dir_path: Path) -> DirectoryNode:
|
|
96
|
+
"""
|
|
97
|
+
Process a directory and all its contents efficiently.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
dir_path: Path to the directory to be processed.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
DirectoryNode: A hierarchical tree structure representing the directory and its contents.
|
|
104
|
+
"""
|
|
105
|
+
logger.debug("Processing directory: %s", dir_path)
|
|
106
|
+
|
|
107
|
+
all_files: list[Path] = []
|
|
108
|
+
for root, _, files in os.walk(dir_path):
|
|
109
|
+
root_path = Path(root)
|
|
110
|
+
for file in files:
|
|
111
|
+
file_path = root_path / file
|
|
112
|
+
all_files.append(file_path)
|
|
113
|
+
|
|
114
|
+
file_hashes = self.storage.compute_file_hashes(all_files)
|
|
115
|
+
|
|
116
|
+
source_paths = []
|
|
117
|
+
target_paths = []
|
|
118
|
+
file_nodes_by_path: dict[Path, FileNode] = {}
|
|
119
|
+
file_hash_cache: dict[str, FileNode] = {}
|
|
120
|
+
|
|
121
|
+
for file_path in all_files:
|
|
122
|
+
file_path_str = file_path.resolve().as_posix()
|
|
123
|
+
file_hash = file_hashes.get(file_path_str)
|
|
124
|
+
if not file_hash:
|
|
125
|
+
raise ValueError(f"File {file_path} not found in hash computation")
|
|
126
|
+
|
|
127
|
+
# Check local cache for duplicates within this directory
|
|
128
|
+
if file_hash in file_hash_cache:
|
|
129
|
+
cached_node = file_hash_cache[file_hash].copy()
|
|
130
|
+
cached_node["final_real_path"] = file_path_str
|
|
131
|
+
file_nodes_by_path[file_path] = cached_node
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
file_extension = file_path.suffix
|
|
135
|
+
file_size = file_path.stat().st_size
|
|
136
|
+
|
|
137
|
+
if self.prefix_path:
|
|
138
|
+
prefix = self.prefix_path.rstrip("/")
|
|
139
|
+
target_key = f"{prefix}/artifacts/{file_hash}{file_extension}"
|
|
140
|
+
else:
|
|
141
|
+
raise ValueError("Prefix path is invalid or empty")
|
|
142
|
+
|
|
143
|
+
source_paths.append(file_path_str)
|
|
144
|
+
target_paths.append(target_key)
|
|
145
|
+
|
|
146
|
+
# Create the file node without URI (will be set after upload)
|
|
147
|
+
file_node: FileNode = {
|
|
148
|
+
"type": "file",
|
|
149
|
+
"uri": "",
|
|
150
|
+
"hash": file_hash,
|
|
151
|
+
"size_bytes": file_size,
|
|
152
|
+
"final_real_path": file_path.resolve().as_posix(),
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
file_nodes_by_path[file_path] = file_node
|
|
156
|
+
file_hash_cache[file_hash] = file_node
|
|
157
|
+
|
|
158
|
+
if source_paths:
|
|
159
|
+
logger.debug("Uploading %d files in batch", len(source_paths))
|
|
160
|
+
uris = self.storage.batch_upload_files(source_paths, target_paths)
|
|
161
|
+
|
|
162
|
+
# Update file nodes with URIs
|
|
163
|
+
for i, file_path_str in enumerate(source_paths):
|
|
164
|
+
file_path = Path(file_path_str)
|
|
165
|
+
if file_path in file_nodes_by_path:
|
|
166
|
+
file_nodes_by_path[file_path]["uri"] = uris[i]
|
|
167
|
+
|
|
168
|
+
return self._build_tree_structure(dir_path, file_nodes_by_path)
|
|
169
|
+
|
|
170
|
+
def _build_tree_structure(
|
|
171
|
+
self,
|
|
172
|
+
base_dir: Path,
|
|
173
|
+
file_nodes_by_path: dict[Path, FileNode],
|
|
174
|
+
) -> DirectoryNode:
|
|
175
|
+
"""
|
|
176
|
+
Build a hierarchical tree structure from processed files and directories.
|
|
177
|
+
|
|
178
|
+
This method constructs a directory tree representation from a dictionary of
|
|
179
|
+
file paths and their corresponding `FileNode` objects, while preserving empty directories.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
base_dir (Path): The root directory for the tree structure.
|
|
183
|
+
file_nodes_by_path (dict[Path, FileNode]): A dictionary mapping file paths
|
|
184
|
+
to their corresponding `FileNode` objects.
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
DirectoryNode: A hierarchical tree structure representing the directory
|
|
188
|
+
and its contents.
|
|
189
|
+
|
|
190
|
+
Example:
|
|
191
|
+
Given the following directory structure:
|
|
192
|
+
```
|
|
193
|
+
base_dir/
|
|
194
|
+
├── file1.txt
|
|
195
|
+
├── subdir1/
|
|
196
|
+
│ ├── file2.txt
|
|
197
|
+
│ └── file3.txt
|
|
198
|
+
└── subdir2/
|
|
199
|
+
└── file4.txt
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
And the [file_nodes_by_path] dictionary:
|
|
203
|
+
{
|
|
204
|
+
Path("base_dir/file1.txt"): FileNode(...),
|
|
205
|
+
Path("base_dir/subdir1/file2.txt"): FileNode(...),
|
|
206
|
+
Path("base_dir/subdir1/file3.txt"): FileNode(...),
|
|
207
|
+
Path("base_dir/subdir2/file4.txt"): FileNode(...),
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
The returned tree structure will look like:
|
|
211
|
+
{
|
|
212
|
+
"type": "dir",
|
|
213
|
+
"name": "base_dir",
|
|
214
|
+
"hash": "<hash_of_base_dir>",
|
|
215
|
+
"children": [
|
|
216
|
+
{
|
|
217
|
+
"type": "file",
|
|
218
|
+
"name": "file1.txt",
|
|
219
|
+
...
|
|
220
|
+
},
|
|
221
|
+
{
|
|
222
|
+
"type": "dir",
|
|
223
|
+
"name": "subdir1",
|
|
224
|
+
"hash": "<hash_of_subdir1>",
|
|
225
|
+
"children": [
|
|
226
|
+
{
|
|
227
|
+
"type": "file",
|
|
228
|
+
"name": "file2.txt",
|
|
229
|
+
...
|
|
230
|
+
},
|
|
231
|
+
{
|
|
232
|
+
"type": "file",
|
|
233
|
+
"name": "file3.txt",
|
|
234
|
+
...
|
|
235
|
+
}
|
|
236
|
+
]
|
|
237
|
+
},
|
|
238
|
+
{
|
|
239
|
+
"type": "dir",
|
|
240
|
+
"name": "subdir2",
|
|
241
|
+
"hash": "<hash_of_subdir2>",
|
|
242
|
+
"children": [
|
|
243
|
+
{
|
|
244
|
+
"type": "file",
|
|
245
|
+
"name": "file4.txt",
|
|
246
|
+
...
|
|
247
|
+
}
|
|
248
|
+
]
|
|
249
|
+
}
|
|
250
|
+
]
|
|
251
|
+
}
|
|
252
|
+
"""
|
|
253
|
+
dir_structure: dict[str, DirectoryNode] = {}
|
|
254
|
+
|
|
255
|
+
# Create root node
|
|
256
|
+
root_dir_path = base_dir.resolve().as_posix()
|
|
257
|
+
root_node: DirectoryNode = {
|
|
258
|
+
"type": "dir",
|
|
259
|
+
"dir_path": root_dir_path,
|
|
260
|
+
"hash": "", # Will be computed later
|
|
261
|
+
"children": [],
|
|
262
|
+
}
|
|
263
|
+
dir_structure[root_dir_path] = root_node
|
|
264
|
+
|
|
265
|
+
for file_path, file_node in file_nodes_by_path.items():
|
|
266
|
+
try:
|
|
267
|
+
rel_path = file_path.relative_to(base_dir)
|
|
268
|
+
parts = rel_path.parts
|
|
269
|
+
except ValueError:
|
|
270
|
+
logger.debug("File %s is not relative to base directory %s", file_path, base_dir)
|
|
271
|
+
continue
|
|
272
|
+
|
|
273
|
+
# File in the root directory
|
|
274
|
+
if len(parts) == 1:
|
|
275
|
+
root_node["children"].append(file_node)
|
|
276
|
+
continue
|
|
277
|
+
|
|
278
|
+
# Create parent directories
|
|
279
|
+
current_dir = base_dir
|
|
280
|
+
current_dir_str = current_dir.resolve().as_posix()
|
|
281
|
+
for part in parts[:-1]:
|
|
282
|
+
next_dir = current_dir / part
|
|
283
|
+
next_dir_str = next_dir.resolve().as_posix()
|
|
284
|
+
if next_dir_str not in dir_structure:
|
|
285
|
+
dir_node: DirectoryNode = {
|
|
286
|
+
"type": "dir",
|
|
287
|
+
"dir_path": next_dir_str,
|
|
288
|
+
"hash": "", # Will be computed later
|
|
289
|
+
"children": [],
|
|
290
|
+
}
|
|
291
|
+
dir_structure[next_dir_str] = dir_node
|
|
292
|
+
dir_structure[current_dir_str]["children"].append(dir_node)
|
|
293
|
+
current_dir = next_dir
|
|
294
|
+
current_dir_str = next_dir_str
|
|
295
|
+
# Now add the file to its parent directory
|
|
296
|
+
parent_dir_str = file_path.parent.resolve().as_posix()
|
|
297
|
+
if parent_dir_str in dir_structure:
|
|
298
|
+
dir_structure[parent_dir_str]["children"].append(file_node)
|
|
299
|
+
self._compute_directory_hashes(dir_structure)
|
|
300
|
+
|
|
301
|
+
return root_node
|
|
302
|
+
|
|
303
|
+
def _compute_directory_hashes(self, dir_structure: dict[str, DirectoryNode]) -> None:
|
|
304
|
+
"""
|
|
305
|
+
Compute hashes for all directories in the structure.
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
dir_structure: Dictionary mapping directory paths to DirectoryNode objects
|
|
309
|
+
"""
|
|
310
|
+
parents = self._map_parent_child_relationships(dir_structure)
|
|
311
|
+
leaf_dirs = self._find_leaf_directories(dir_structure, parents)
|
|
312
|
+
self._process_directories_bottom_up(dir_structure, parents, leaf_dirs)
|
|
313
|
+
|
|
314
|
+
def _map_parent_child_relationships(
|
|
315
|
+
self,
|
|
316
|
+
dir_structure: dict[str, DirectoryNode],
|
|
317
|
+
) -> dict[str, str]:
|
|
318
|
+
"""
|
|
319
|
+
Create a mapping of parent-child relationships for directories.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
dir_structure: Dictionary mapping directory paths to DirectoryNode objects
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
A dictionary mapping child directory paths to their parent directory paths.
|
|
326
|
+
"""
|
|
327
|
+
parents = {}
|
|
328
|
+
for dir_path, dir_node in dir_structure.items():
|
|
329
|
+
for child in dir_node["children"]:
|
|
330
|
+
if child["type"] == "dir":
|
|
331
|
+
child_path = child["dir_path"]
|
|
332
|
+
parents[child_path] = dir_path
|
|
333
|
+
return parents
|
|
334
|
+
|
|
335
|
+
def _find_leaf_directories(
|
|
336
|
+
self,
|
|
337
|
+
dir_structure: dict[str, DirectoryNode],
|
|
338
|
+
parents: dict[str, str],
|
|
339
|
+
) -> set[str]:
|
|
340
|
+
"""
|
|
341
|
+
Find leaf directories (those with no directory children).
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
dir_structure: Dictionary mapping directory paths to DirectoryNode objects
|
|
345
|
+
parents: Dictionary mapping child directory paths to parent directory paths
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
A set of leaf directory paths.
|
|
349
|
+
"""
|
|
350
|
+
leaf_dirs = set()
|
|
351
|
+
for dir_path in dir_structure:
|
|
352
|
+
if dir_path not in parents.values():
|
|
353
|
+
leaf_dirs.add(dir_path)
|
|
354
|
+
return leaf_dirs
|
|
355
|
+
|
|
356
|
+
def _process_directories_bottom_up(
|
|
357
|
+
self,
|
|
358
|
+
dir_structure: dict[str, DirectoryNode],
|
|
359
|
+
parents: dict[str, str],
|
|
360
|
+
leaf_dirs: set[str],
|
|
361
|
+
) -> None:
|
|
362
|
+
"""
|
|
363
|
+
Process directories bottom-up starting from leaf directories.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
dir_structure: Dictionary mapping directory paths to DirectoryNode objects
|
|
367
|
+
parents: Dictionary mapping child directory paths to parent directory paths
|
|
368
|
+
leaf_dirs: Set of leaf directory paths
|
|
369
|
+
"""
|
|
370
|
+
processed = set()
|
|
371
|
+
while leaf_dirs:
|
|
372
|
+
dir_path = leaf_dirs.pop()
|
|
373
|
+
dir_node = dir_structure[dir_path]
|
|
374
|
+
|
|
375
|
+
# Compute hash based on children
|
|
376
|
+
dir_node["hash"] = self._compute_directory_hash(dir_node)
|
|
377
|
+
|
|
378
|
+
processed.add(dir_path)
|
|
379
|
+
|
|
380
|
+
# Add parent to leaf_dirs if all its children are processed
|
|
381
|
+
if dir_path in parents:
|
|
382
|
+
parent_path = parents[dir_path]
|
|
383
|
+
if self._are_all_children_processed(dir_structure[parent_path], processed):
|
|
384
|
+
leaf_dirs.add(parent_path)
|
|
385
|
+
|
|
386
|
+
def _compute_directory_hash(self, dir_node: DirectoryNode) -> str:
|
|
387
|
+
"""
|
|
388
|
+
Compute the hash for a directory based on its children.
|
|
389
|
+
|
|
390
|
+
Args:
|
|
391
|
+
dir_node: The DirectoryNode object
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
The computed hash as a string.
|
|
395
|
+
"""
|
|
396
|
+
child_hashes = [child["hash"] for child in dir_node["children"]]
|
|
397
|
+
child_hashes.sort() # Ensure consistent hash
|
|
398
|
+
hash_input = "|".join(child_hashes)
|
|
399
|
+
return hashlib.sha1(hash_input.encode()).hexdigest()[:16] # noqa: S324
|
|
400
|
+
|
|
401
|
+
def _are_all_children_processed(self, parent_node: DirectoryNode, processed: set[str]) -> bool:
|
|
402
|
+
"""
|
|
403
|
+
Check if all children of a parent directory have been processed.
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
parent_node: The parent DirectoryNode object
|
|
407
|
+
processed: Set of processed directory paths
|
|
408
|
+
|
|
409
|
+
Returns:
|
|
410
|
+
True if all children are processed, False otherwise.
|
|
411
|
+
"""
|
|
412
|
+
for child in parent_node["children"]:
|
|
413
|
+
if child["type"] == "dir" and child["dir_path"] not in processed:
|
|
414
|
+
return False
|
|
415
|
+
return True
|
|
416
|
+
|
|
417
|
+
def _process_file(self, file_path: Path) -> FileNode:
|
|
418
|
+
"""
|
|
419
|
+
Process a single file by hashing and uploading it to storage.
|
|
420
|
+
|
|
421
|
+
This method computes a SHA1 hash of the file's contents to uniquely identify it.
|
|
422
|
+
If the file has already been processed (based on the hash), the cached result is
|
|
423
|
+
returned. Otherwise, the file is uploaded to the storage system, and a `FileNode`
|
|
424
|
+
is created to represent the file.
|
|
425
|
+
|
|
426
|
+
The method also extracts metadata such as the file's size, MIME type, and extension,
|
|
427
|
+
and determines the target storage path based on the user ID and file hash.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
file_path (Path): Path to the file to be processed.
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
FileNode: A dictionary representing the processed file, including its metadata
|
|
434
|
+
and storage URI.
|
|
435
|
+
"""
|
|
436
|
+
file_hash = self.storage.compute_file_hash(file_path)
|
|
437
|
+
|
|
438
|
+
file_extension = file_path.suffix
|
|
439
|
+
file_size = file_path.stat().st_size
|
|
440
|
+
|
|
441
|
+
if self.prefix_path:
|
|
442
|
+
prefix = self.prefix_path.rstrip("/")
|
|
443
|
+
target_key = f"{prefix}/artifacts/{file_hash}{file_extension}"
|
|
444
|
+
else:
|
|
445
|
+
raise ValueError("Prefix path is invalid or empty")
|
|
446
|
+
|
|
447
|
+
uri = self.storage.store_file(file_path, target_key)
|
|
448
|
+
|
|
449
|
+
return {
|
|
450
|
+
"type": "file",
|
|
451
|
+
"uri": uri,
|
|
452
|
+
"hash": file_hash,
|
|
453
|
+
"size_bytes": file_size,
|
|
454
|
+
"final_real_path": file_path.resolve().as_posix(),
|
|
455
|
+
}
|
dreadnode/constants.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Environment variable names
|
|
2
|
+
|
|
3
|
+
ENV_SERVER_URL = "DREADNODE_SERVER_URL"
|
|
4
|
+
ENV_SERVER = "DREADNODE_SERVER" # alternative to SERVER_URL
|
|
5
|
+
ENV_API_TOKEN = "DREADNODE_API_TOKEN" # noqa: S105
|
|
6
|
+
ENV_API_KEY = "DREADNODE_API_KEY" # alternative to API_TOKEN
|
|
7
|
+
ENV_LOCAL_DIR = "DREADNODE_LOCAL_DIR"
|
|
8
|
+
ENV_PROJECT = "DREADNODE_PROJECT"
|
|
9
|
+
|
|
10
|
+
# Default values
|
|
11
|
+
|
|
12
|
+
DEFAULT_SERVER_URL = "https://platform.dreadnode.io"
|
|
13
|
+
DEFAULT_LOCAL_OBJECT_DIR = ".dreadnode/objects"
|
|
14
|
+
|
|
15
|
+
# Default values for the S3 storage
|
|
16
|
+
MAX_INLINE_OBJECT_BYTES = 10 * 1024 # 10KB
|
|
File without changes
|