dreadnode 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dreadnode/__init__.py +51 -0
- dreadnode/api/__init__.py +0 -0
- dreadnode/api/client.py +244 -0
- dreadnode/api/models.py +210 -0
- dreadnode/artifact/__init__.py +0 -0
- dreadnode/artifact/merger.py +599 -0
- dreadnode/artifact/storage.py +126 -0
- dreadnode/artifact/tree_builder.py +455 -0
- dreadnode/constants.py +16 -0
- dreadnode/integrations/__init__.py +0 -0
- dreadnode/integrations/transformers.py +183 -0
- dreadnode/main.py +1048 -0
- dreadnode/metric.py +228 -0
- dreadnode/object.py +29 -0
- dreadnode/py.typed +0 -0
- dreadnode/serialization.py +731 -0
- dreadnode/task.py +447 -0
- dreadnode/tracing/__init__.py +0 -0
- dreadnode/tracing/constants.py +35 -0
- dreadnode/tracing/exporters.py +157 -0
- dreadnode/tracing/span.py +819 -0
- dreadnode/types.py +25 -0
- dreadnode/util.py +150 -0
- dreadnode/version.py +3 -0
- dreadnode-1.0.0.dist-info/METADATA +125 -0
- dreadnode-1.0.0.dist-info/RECORD +27 -0
- dreadnode-1.0.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,599 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility for merging artifact tree structures while preserving directory hierarchy.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import cast
|
|
8
|
+
|
|
9
|
+
from dreadnode.artifact.tree_builder import DirectoryNode, FileNode
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ArtifactMerger:
|
|
13
|
+
"""
|
|
14
|
+
Class responsible for merging artifact tree structures.
|
|
15
|
+
Handles overlapping directory structures and efficiently combines artifacts.
|
|
16
|
+
|
|
17
|
+
Example:
|
|
18
|
+
```python
|
|
19
|
+
# Create a merger instance
|
|
20
|
+
merger = ArtifactMerger()
|
|
21
|
+
|
|
22
|
+
# Add multiple artifact trees
|
|
23
|
+
merger.add_tree(tree1) # First tree gets added directly
|
|
24
|
+
merger.add_tree(tree2) # Second tree gets merged if it overlaps
|
|
25
|
+
|
|
26
|
+
# Get the merged result
|
|
27
|
+
merged_trees = merger.get_merged_trees()
|
|
28
|
+
```
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self) -> None:
|
|
32
|
+
self._path_map: dict[str, DirectoryNode | FileNode] = {}
|
|
33
|
+
# Maps file hashes to all matching files
|
|
34
|
+
self._hash_map: dict[str, list[FileNode]] = {}
|
|
35
|
+
self._merged_trees: list[DirectoryNode] = []
|
|
36
|
+
|
|
37
|
+
def add_tree(self, new_tree: DirectoryNode) -> None:
|
|
38
|
+
"""
|
|
39
|
+
Add a new artifact tree, merging with existing trees if needed.
|
|
40
|
+
|
|
41
|
+
This method analyzes the new tree and determines how to integrate it
|
|
42
|
+
with existing trees, handling parent/child relationships and overlaps.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
new_tree: New directory tree to add
|
|
46
|
+
|
|
47
|
+
Example:
|
|
48
|
+
```python
|
|
49
|
+
# Add first tree (e.g., /data/audio/sub1)
|
|
50
|
+
merger.add_tree({
|
|
51
|
+
"type": "dir",
|
|
52
|
+
"dir_path": "/data/audio/sub1",
|
|
53
|
+
"hash": "abc123",
|
|
54
|
+
"children": [...]
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
# Add parent directory later (e.g., /data/audio)
|
|
58
|
+
# The merger will recognize the relationship and restructure
|
|
59
|
+
merger.add_tree({
|
|
60
|
+
"type": "dir",
|
|
61
|
+
"dir_path": "/data/audio",
|
|
62
|
+
"hash": "def456",
|
|
63
|
+
"children": [...]
|
|
64
|
+
})
|
|
65
|
+
```
|
|
66
|
+
"""
|
|
67
|
+
# First artifact - just add it
|
|
68
|
+
if not self._merged_trees:
|
|
69
|
+
self._merged_trees = [new_tree]
|
|
70
|
+
self._build_maps(new_tree)
|
|
71
|
+
return
|
|
72
|
+
|
|
73
|
+
# Get new tree's path
|
|
74
|
+
new_dir_path = new_tree["dir_path"]
|
|
75
|
+
|
|
76
|
+
# Check for direct match with existing trees
|
|
77
|
+
for existing_tree in self._merged_trees:
|
|
78
|
+
if existing_tree["dir_path"] == new_dir_path:
|
|
79
|
+
# Same directory - merge them
|
|
80
|
+
self._merge_directory_nodes(existing_tree, new_tree)
|
|
81
|
+
self._build_maps() # Rebuild maps
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
# Check if new tree is parent of any existing trees
|
|
85
|
+
children_to_remove = []
|
|
86
|
+
for existing_tree in self._merged_trees:
|
|
87
|
+
existing_dir_path = existing_tree["dir_path"]
|
|
88
|
+
|
|
89
|
+
# New tree is parent of existing tree
|
|
90
|
+
if existing_dir_path.startswith(new_dir_path + "/"):
|
|
91
|
+
rel_path = existing_dir_path[len(new_dir_path) + 1 :].split("/")
|
|
92
|
+
self._place_tree_at_path(new_tree, existing_tree, rel_path)
|
|
93
|
+
children_to_remove.append(existing_tree)
|
|
94
|
+
|
|
95
|
+
# Remove trees that are now incorporated into new tree
|
|
96
|
+
if children_to_remove:
|
|
97
|
+
for child in children_to_remove:
|
|
98
|
+
if child in self._merged_trees:
|
|
99
|
+
self._merged_trees.remove(child)
|
|
100
|
+
self._merged_trees.append(new_tree)
|
|
101
|
+
self._build_maps() # Rebuild maps
|
|
102
|
+
return
|
|
103
|
+
|
|
104
|
+
# Check if new tree is child of an existing tree
|
|
105
|
+
for existing_tree in self._merged_trees:
|
|
106
|
+
existing_dir_path = existing_tree["dir_path"]
|
|
107
|
+
|
|
108
|
+
if new_dir_path.startswith(existing_dir_path + "/"):
|
|
109
|
+
rel_path = new_dir_path[len(existing_dir_path) + 1 :].split("/")
|
|
110
|
+
self._place_tree_at_path(existing_tree, new_tree, rel_path)
|
|
111
|
+
self._build_maps() # Rebuild maps
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
# Try to find and handle overlaps
|
|
115
|
+
new_path_map: dict[str, DirectoryNode | FileNode] = {}
|
|
116
|
+
new_hash_map: dict[str, list[FileNode]] = {}
|
|
117
|
+
self._build_path_and_hash_maps(new_tree, new_path_map, new_hash_map)
|
|
118
|
+
|
|
119
|
+
# Find common paths between existing and new tree
|
|
120
|
+
path_overlaps = set(self._path_map.keys()) & set(new_path_map.keys())
|
|
121
|
+
|
|
122
|
+
if path_overlaps and self._handle_overlaps(path_overlaps, new_path_map):
|
|
123
|
+
# Successfully merged via overlaps
|
|
124
|
+
self._build_maps() # Rebuild maps
|
|
125
|
+
return
|
|
126
|
+
|
|
127
|
+
# If we get here, add new tree as a separate root
|
|
128
|
+
self._merged_trees.append(new_tree)
|
|
129
|
+
self._build_maps() # Rebuild maps
|
|
130
|
+
|
|
131
|
+
def get_merged_trees(self) -> list[DirectoryNode]:
|
|
132
|
+
"""
|
|
133
|
+
Get the current merged trees.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
List of merged directory trees
|
|
137
|
+
|
|
138
|
+
Example:
|
|
139
|
+
```python
|
|
140
|
+
# Get the merged trees after adding multiple trees
|
|
141
|
+
trees = merger.get_merged_trees()
|
|
142
|
+
|
|
143
|
+
# Typically there will be a single root tree if all added trees are related
|
|
144
|
+
if len(trees) == 1:
|
|
145
|
+
root_tree = trees[0]
|
|
146
|
+
print(f"Root directory: {root_tree['dir_path']}")
|
|
147
|
+
```
|
|
148
|
+
"""
|
|
149
|
+
return self._merged_trees
|
|
150
|
+
|
|
151
|
+
def _place_tree_at_path(
|
|
152
|
+
self,
|
|
153
|
+
parent_tree: DirectoryNode,
|
|
154
|
+
child_tree: DirectoryNode,
|
|
155
|
+
path_parts: list[str],
|
|
156
|
+
) -> None:
|
|
157
|
+
"""
|
|
158
|
+
Place child_tree at the specified path under parent_tree.
|
|
159
|
+
|
|
160
|
+
This creates any necessary intermediate directories and then merges
|
|
161
|
+
the child tree at the correct location in the parent tree.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
parent_tree: The parent tree to place under
|
|
165
|
+
child_tree: The child tree to place
|
|
166
|
+
path_parts: Path components from parent to child
|
|
167
|
+
|
|
168
|
+
Example:
|
|
169
|
+
```python
|
|
170
|
+
# Internal use to place /data/audio/sub1 under /data
|
|
171
|
+
# path_parts would be ['audio', 'sub1']
|
|
172
|
+
self._place_tree_at_path(
|
|
173
|
+
parent_tree=data_tree, # /data
|
|
174
|
+
child_tree=sub1_tree, # /data/audio/sub1
|
|
175
|
+
path_parts=['audio', 'sub1']
|
|
176
|
+
)
|
|
177
|
+
```
|
|
178
|
+
"""
|
|
179
|
+
current = parent_tree
|
|
180
|
+
|
|
181
|
+
# Navigate to the correct location, creating directories as needed
|
|
182
|
+
for part in path_parts:
|
|
183
|
+
if not part: # Skip empty path parts
|
|
184
|
+
continue
|
|
185
|
+
|
|
186
|
+
# Look for existing directory
|
|
187
|
+
next_node = None
|
|
188
|
+
for child in current["children"]:
|
|
189
|
+
if child["type"] == "dir" and Path(child["dir_path"]).name == part:
|
|
190
|
+
next_node = child
|
|
191
|
+
break
|
|
192
|
+
|
|
193
|
+
# Create directory if it doesn't exist
|
|
194
|
+
if next_node is None:
|
|
195
|
+
next_dir_path = f"{current['dir_path']}/{part}"
|
|
196
|
+
next_node = {
|
|
197
|
+
"type": "dir",
|
|
198
|
+
"dir_path": next_dir_path,
|
|
199
|
+
"hash": "",
|
|
200
|
+
"children": [],
|
|
201
|
+
}
|
|
202
|
+
current["children"].append(next_node)
|
|
203
|
+
|
|
204
|
+
current = next_node
|
|
205
|
+
|
|
206
|
+
# Merge the trees at the final location
|
|
207
|
+
self._merge_directory_nodes(current, child_tree)
|
|
208
|
+
|
|
209
|
+
def _handle_overlaps(
|
|
210
|
+
self,
|
|
211
|
+
overlaps: set[str],
|
|
212
|
+
new_path_map: dict[str, DirectoryNode | FileNode],
|
|
213
|
+
) -> bool:
|
|
214
|
+
"""
|
|
215
|
+
Handle overlapping paths between trees.
|
|
216
|
+
|
|
217
|
+
This method processes paths that exist in both the existing trees
|
|
218
|
+
and the new tree, merging directories and handling file conflicts.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
overlaps: Set of overlapping paths
|
|
222
|
+
new_path_map: Path map for the new tree
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
True if the tree was merged, False otherwise
|
|
226
|
+
|
|
227
|
+
Example:
|
|
228
|
+
```python
|
|
229
|
+
# Internal use when two directories have some paths in common
|
|
230
|
+
# but neither is a parent of the other
|
|
231
|
+
overlapping_paths = {'/data/shared/file1.txt', '/data/shared/configs'}
|
|
232
|
+
result = self._handle_overlaps(
|
|
233
|
+
overlaps=overlapping_paths,
|
|
234
|
+
new_path_map={'/data/shared/file1.txt': file_node, ...}
|
|
235
|
+
)
|
|
236
|
+
# If result is True, the trees were successfully merged
|
|
237
|
+
```
|
|
238
|
+
"""
|
|
239
|
+
merged = False
|
|
240
|
+
|
|
241
|
+
for path in sorted(overlaps, key=len):
|
|
242
|
+
existing_node = self._path_map.get(path)
|
|
243
|
+
new_node = new_path_map.get(path)
|
|
244
|
+
|
|
245
|
+
if not existing_node or not new_node:
|
|
246
|
+
continue
|
|
247
|
+
|
|
248
|
+
if existing_node["type"] == "dir" and new_node["type"] == "dir":
|
|
249
|
+
# Both are directories - merge them
|
|
250
|
+
self._merge_directory_nodes(
|
|
251
|
+
cast("DirectoryNode", existing_node),
|
|
252
|
+
cast("DirectoryNode", new_node),
|
|
253
|
+
)
|
|
254
|
+
merged = True
|
|
255
|
+
elif existing_node["type"] == "file" and new_node["type"] == "file":
|
|
256
|
+
# Both are files - propagate URIs and update if hash differs
|
|
257
|
+
existing_file = cast("FileNode", existing_node)
|
|
258
|
+
new_file = cast("FileNode", new_node)
|
|
259
|
+
|
|
260
|
+
# Always propagate URIs between files with identical hash
|
|
261
|
+
if existing_file["hash"] == new_file["hash"]:
|
|
262
|
+
self._propagate_uri(existing_file, new_file)
|
|
263
|
+
merged = True
|
|
264
|
+
else:
|
|
265
|
+
# Different hash - find the parent directory and update the file
|
|
266
|
+
for tree in self._merged_trees:
|
|
267
|
+
if self._update_file_in_tree(tree, existing_file, new_file):
|
|
268
|
+
merged = True
|
|
269
|
+
break
|
|
270
|
+
|
|
271
|
+
return merged
|
|
272
|
+
|
|
273
|
+
def _propagate_uri(self, file1: FileNode, file2: FileNode) -> None:
|
|
274
|
+
"""
|
|
275
|
+
Ensure URIs are propagated between files with the same hash.
|
|
276
|
+
|
|
277
|
+
If one file has a URI and the other doesn't, the URI will be copied.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
file1: First file node
|
|
281
|
+
file2: Second file node
|
|
282
|
+
|
|
283
|
+
Example:
|
|
284
|
+
```python
|
|
285
|
+
# Internal use to ensure URIs are shared between identical files
|
|
286
|
+
# If file1 has a URI but file2 doesn't, file2 will get file1's URI
|
|
287
|
+
self._propagate_uri(
|
|
288
|
+
file1={"type": "file", "uri": "s3://bucket/file.txt", ...},
|
|
289
|
+
file2={"type": "file", "uri": "", ...}
|
|
290
|
+
)
|
|
291
|
+
# After: file2["uri"] == "s3://bucket/file.txt"
|
|
292
|
+
```
|
|
293
|
+
"""
|
|
294
|
+
if not file1["uri"] and file2["uri"]:
|
|
295
|
+
file1["uri"] = file2["uri"]
|
|
296
|
+
elif not file2["uri"] and file1["uri"]:
|
|
297
|
+
file2["uri"] = file1["uri"]
|
|
298
|
+
|
|
299
|
+
def _update_file_in_tree(
|
|
300
|
+
self,
|
|
301
|
+
tree: DirectoryNode,
|
|
302
|
+
old_file: FileNode,
|
|
303
|
+
new_file: FileNode,
|
|
304
|
+
) -> bool:
|
|
305
|
+
"""
|
|
306
|
+
Update a file in a directory tree.
|
|
307
|
+
|
|
308
|
+
This replaces old_file with new_file in the tree, recursively searching
|
|
309
|
+
if necessary.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
tree: The directory tree to search
|
|
313
|
+
old_file: The file to replace
|
|
314
|
+
new_file: The new file
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
True if the file was found and updated
|
|
318
|
+
|
|
319
|
+
Example:
|
|
320
|
+
```python
|
|
321
|
+
# Internal use to replace an outdated file with a newer version
|
|
322
|
+
success = self._update_file_in_tree(
|
|
323
|
+
tree=root_tree,
|
|
324
|
+
old_file={"type": "file", "hash": "abc123", ...},
|
|
325
|
+
new_file={"type": "file", "hash": "def456", ...}
|
|
326
|
+
)
|
|
327
|
+
# If success is True, the file was found and replaced
|
|
328
|
+
```
|
|
329
|
+
"""
|
|
330
|
+
for i, child in enumerate(tree["children"]):
|
|
331
|
+
if child is old_file:
|
|
332
|
+
tree["children"][i] = new_file
|
|
333
|
+
return True
|
|
334
|
+
|
|
335
|
+
if child["type"] == "dir" and self._update_file_in_tree(
|
|
336
|
+
cast("DirectoryNode", child),
|
|
337
|
+
old_file,
|
|
338
|
+
new_file,
|
|
339
|
+
):
|
|
340
|
+
return True
|
|
341
|
+
return False
|
|
342
|
+
|
|
343
|
+
def _build_maps(self, new_tree: DirectoryNode | None = None) -> None:
|
|
344
|
+
"""
|
|
345
|
+
Build or rebuild the path and hash maps.
|
|
346
|
+
|
|
347
|
+
This method populates the internal path and hash maps that enable
|
|
348
|
+
efficient lookups during tree merging.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
new_tree: Optional new tree to add directly to the maps
|
|
352
|
+
|
|
353
|
+
Example:
|
|
354
|
+
```python
|
|
355
|
+
# Internal use to initialize maps with a new tree
|
|
356
|
+
self._build_maps(new_tree=first_tree)
|
|
357
|
+
|
|
358
|
+
# Or to rebuild all maps after changes
|
|
359
|
+
self._build_maps()
|
|
360
|
+
```
|
|
361
|
+
"""
|
|
362
|
+
self._path_map.clear()
|
|
363
|
+
self._hash_map.clear()
|
|
364
|
+
|
|
365
|
+
if new_tree:
|
|
366
|
+
self._build_path_and_hash_maps(new_tree, self._path_map, self._hash_map)
|
|
367
|
+
else:
|
|
368
|
+
for tree in self._merged_trees:
|
|
369
|
+
self._build_path_and_hash_maps(tree, self._path_map, self._hash_map)
|
|
370
|
+
self._propagate_uris_by_hash()
|
|
371
|
+
|
|
372
|
+
def _propagate_uris_by_hash(self) -> None:
|
|
373
|
+
"""
|
|
374
|
+
Ensure all files with the same hash have the same URI.
|
|
375
|
+
|
|
376
|
+
This function ensures that if multiple file nodes have the same hash,
|
|
377
|
+
but only some have URIs, the URI is propagated to all instances.
|
|
378
|
+
"""
|
|
379
|
+
for file_nodes in self._hash_map.values():
|
|
380
|
+
if len(file_nodes) <= 1:
|
|
381
|
+
continue
|
|
382
|
+
|
|
383
|
+
uri = next((node["uri"] for node in file_nodes if node["uri"]), "")
|
|
384
|
+
if not uri:
|
|
385
|
+
continue
|
|
386
|
+
|
|
387
|
+
for node in file_nodes:
|
|
388
|
+
if not node["uri"]:
|
|
389
|
+
node["uri"] = uri
|
|
390
|
+
|
|
391
|
+
def _build_path_and_hash_maps(
|
|
392
|
+
self,
|
|
393
|
+
node: DirectoryNode | FileNode,
|
|
394
|
+
path_map: dict[str, DirectoryNode | FileNode],
|
|
395
|
+
hash_map: dict[str, list[FileNode]],
|
|
396
|
+
) -> None:
|
|
397
|
+
"""
|
|
398
|
+
Build both path and hash maps simultaneously.
|
|
399
|
+
|
|
400
|
+
This method recursively processes a node (file or directory) and adds
|
|
401
|
+
it to the appropriate maps.
|
|
402
|
+
|
|
403
|
+
Args:
|
|
404
|
+
node: The node to process
|
|
405
|
+
path_map: Map of paths to nodes
|
|
406
|
+
hash_map: Map of file hashes to file nodes
|
|
407
|
+
|
|
408
|
+
Example:
|
|
409
|
+
```python
|
|
410
|
+
# Internal use to build maps for a tree
|
|
411
|
+
path_map = {}
|
|
412
|
+
hash_map = {}
|
|
413
|
+
self._build_path_and_hash_maps(
|
|
414
|
+
node=root_tree,
|
|
415
|
+
path_map=path_map,
|
|
416
|
+
hash_map=hash_map
|
|
417
|
+
)
|
|
418
|
+
# After: path_map contains all paths, hash_map contains all file hashes
|
|
419
|
+
```
|
|
420
|
+
"""
|
|
421
|
+
if node["type"] == "dir":
|
|
422
|
+
# Add directory to path map
|
|
423
|
+
dir_node = cast("DirectoryNode", node)
|
|
424
|
+
dir_path = dir_node["dir_path"]
|
|
425
|
+
path_map[dir_path] = dir_node
|
|
426
|
+
|
|
427
|
+
# Process children
|
|
428
|
+
for child in dir_node["children"]:
|
|
429
|
+
self._build_path_and_hash_maps(child, path_map, hash_map)
|
|
430
|
+
else: # File node
|
|
431
|
+
# Add file to path map
|
|
432
|
+
file_node = cast("FileNode", node)
|
|
433
|
+
file_path = file_node["final_real_path"]
|
|
434
|
+
path_map[file_path] = file_node
|
|
435
|
+
|
|
436
|
+
# Add file to hash map
|
|
437
|
+
file_hash = file_node["hash"]
|
|
438
|
+
if file_hash not in hash_map:
|
|
439
|
+
hash_map[file_hash] = []
|
|
440
|
+
hash_map[file_hash].append(file_node)
|
|
441
|
+
|
|
442
|
+
def _merge_directory_nodes(self, target_dir: DirectoryNode, source_dir: DirectoryNode) -> None:
|
|
443
|
+
"""
|
|
444
|
+
Merge contents from source directory into target directory.
|
|
445
|
+
|
|
446
|
+
This combines children from both directories, handling duplicates
|
|
447
|
+
and updating files as needed.
|
|
448
|
+
|
|
449
|
+
Args:
|
|
450
|
+
target_dir: Directory to merge into
|
|
451
|
+
source_dir: Directory to merge from
|
|
452
|
+
|
|
453
|
+
Example:
|
|
454
|
+
```python
|
|
455
|
+
# Internal use to merge two directory nodes
|
|
456
|
+
self._merge_directory_nodes(
|
|
457
|
+
target_dir={"type": "dir", "dir_path": "/data", "children": [...]},
|
|
458
|
+
source_dir={"type": "dir", "dir_path": "/data", "children": [...]}
|
|
459
|
+
)
|
|
460
|
+
# After: target_dir contains all children from both directories
|
|
461
|
+
```
|
|
462
|
+
"""
|
|
463
|
+
# Delegate file and directory processing to separate methods to reduce branches
|
|
464
|
+
path_to_index, hash_to_index = self._build_indices(target_dir)
|
|
465
|
+
|
|
466
|
+
# Process each child from source
|
|
467
|
+
for source_child in source_dir["children"]:
|
|
468
|
+
if source_child["type"] == "dir":
|
|
469
|
+
self._merge_directory_child(
|
|
470
|
+
target_dir,
|
|
471
|
+
cast("DirectoryNode", source_child),
|
|
472
|
+
path_to_index,
|
|
473
|
+
)
|
|
474
|
+
else: # file
|
|
475
|
+
self._merge_file_child(
|
|
476
|
+
target_dir,
|
|
477
|
+
cast("FileNode", source_child),
|
|
478
|
+
path_to_index,
|
|
479
|
+
hash_to_index,
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
# Update hash
|
|
483
|
+
self._update_directory_hash(target_dir)
|
|
484
|
+
|
|
485
|
+
def _build_indices(self, dir_node: DirectoryNode) -> tuple[dict[str, int], dict[str, int]]:
|
|
486
|
+
"""
|
|
487
|
+
Build indices for efficient child lookups.
|
|
488
|
+
|
|
489
|
+
Returns:
|
|
490
|
+
A tuple of (path_to_index, hash_to_index) dictionaries
|
|
491
|
+
"""
|
|
492
|
+
path_to_index: dict[str, int] = {}
|
|
493
|
+
hash_to_index: dict[str, int] = {}
|
|
494
|
+
|
|
495
|
+
for i, child in enumerate(dir_node["children"]):
|
|
496
|
+
if child["type"] == "dir":
|
|
497
|
+
path_to_index[cast("DirectoryNode", child)["dir_path"]] = i
|
|
498
|
+
else: # file
|
|
499
|
+
file_child = cast("FileNode", child)
|
|
500
|
+
path_to_index[file_child["final_real_path"]] = i
|
|
501
|
+
hash_to_index[file_child["hash"]] = i
|
|
502
|
+
|
|
503
|
+
return path_to_index, hash_to_index
|
|
504
|
+
|
|
505
|
+
def _merge_directory_child(
|
|
506
|
+
self,
|
|
507
|
+
target_dir: DirectoryNode,
|
|
508
|
+
source_dir: DirectoryNode,
|
|
509
|
+
path_to_index: dict[str, int],
|
|
510
|
+
) -> None:
|
|
511
|
+
"""Merge a directory child from source into target directory."""
|
|
512
|
+
dir_path = source_dir["dir_path"]
|
|
513
|
+
if dir_path in path_to_index:
|
|
514
|
+
# Directory exists in both - merge recursively
|
|
515
|
+
index = path_to_index[dir_path]
|
|
516
|
+
existing_child = target_dir["children"][index]
|
|
517
|
+
if existing_child["type"] == "dir":
|
|
518
|
+
self._merge_directory_nodes(
|
|
519
|
+
cast("DirectoryNode", existing_child),
|
|
520
|
+
source_dir,
|
|
521
|
+
)
|
|
522
|
+
else:
|
|
523
|
+
# Directory only in source - add to target
|
|
524
|
+
target_dir["children"].append(source_dir)
|
|
525
|
+
|
|
526
|
+
def _merge_file_child(
|
|
527
|
+
self,
|
|
528
|
+
target_dir: DirectoryNode,
|
|
529
|
+
source_file: FileNode,
|
|
530
|
+
path_to_index: dict[str, int],
|
|
531
|
+
hash_to_index: dict[str, int],
|
|
532
|
+
) -> None:
|
|
533
|
+
"""Merge a file child from source into target directory."""
|
|
534
|
+
file_path = source_file["final_real_path"]
|
|
535
|
+
file_hash = source_file["hash"]
|
|
536
|
+
|
|
537
|
+
if file_path in path_to_index:
|
|
538
|
+
# File exists at same path - update if hash differs
|
|
539
|
+
index = path_to_index[file_path]
|
|
540
|
+
existing_child = target_dir["children"][index]
|
|
541
|
+
if existing_child["hash"] != file_hash:
|
|
542
|
+
target_dir["children"][index] = source_file
|
|
543
|
+
elif existing_child["type"] == "file":
|
|
544
|
+
# Same file - propagate URI if needed
|
|
545
|
+
self._propagate_uri(cast("FileNode", existing_child), source_file)
|
|
546
|
+
elif file_hash in hash_to_index:
|
|
547
|
+
# Same file content exists but at different path
|
|
548
|
+
index = hash_to_index[file_hash]
|
|
549
|
+
existing_child = target_dir["children"][index]
|
|
550
|
+
if existing_child["type"] == "file":
|
|
551
|
+
# Propagate URI if needed
|
|
552
|
+
self._propagate_uri(cast("FileNode", existing_child), source_file)
|
|
553
|
+
|
|
554
|
+
if source_file["uri"] and file_hash in self._hash_map:
|
|
555
|
+
for other_file in self._hash_map[file_hash]:
|
|
556
|
+
if not other_file["uri"]:
|
|
557
|
+
other_file["uri"] = source_file["uri"]
|
|
558
|
+
target_dir["children"].append(source_file)
|
|
559
|
+
else:
|
|
560
|
+
# File only in source - add to target
|
|
561
|
+
target_dir["children"].append(source_file)
|
|
562
|
+
|
|
563
|
+
def _update_directory_hash(self, dir_node: DirectoryNode) -> str:
|
|
564
|
+
"""
|
|
565
|
+
Update the hash of a directory based on its children.
|
|
566
|
+
|
|
567
|
+
This computes a content-based hash for a directory by combining
|
|
568
|
+
the hashes of all its children.
|
|
569
|
+
|
|
570
|
+
Args:
|
|
571
|
+
dir_node: The directory to update
|
|
572
|
+
|
|
573
|
+
Returns:
|
|
574
|
+
The updated hash
|
|
575
|
+
|
|
576
|
+
Example:
|
|
577
|
+
```python
|
|
578
|
+
# Internal use to compute directory hash after changes
|
|
579
|
+
new_hash = self._update_directory_hash(
|
|
580
|
+
dir_node={"type": "dir", "children": [...]}
|
|
581
|
+
)
|
|
582
|
+
# After: dir_node["hash"] is updated and returned
|
|
583
|
+
```
|
|
584
|
+
"""
|
|
585
|
+
child_hashes = []
|
|
586
|
+
|
|
587
|
+
for child in dir_node["children"]:
|
|
588
|
+
if child["type"] == "file":
|
|
589
|
+
child_hashes.append(cast("FileNode", child)["hash"])
|
|
590
|
+
else:
|
|
591
|
+
child_hash = self._update_directory_hash(cast("DirectoryNode", child))
|
|
592
|
+
child_hashes.append(child_hash)
|
|
593
|
+
|
|
594
|
+
child_hashes.sort() # Ensure consistent hash regardless of order
|
|
595
|
+
hash_input = "|".join(child_hashes)
|
|
596
|
+
dir_hash = hashlib.sha1(hash_input.encode()).hexdigest()[:16] # noqa: S324
|
|
597
|
+
|
|
598
|
+
dir_node["hash"] = dir_hash
|
|
599
|
+
return dir_hash
|