BETTER-NMA 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- BETTER_NMA/__init__.py +15 -0
- BETTER_NMA/adversarial_score.py +19 -0
- BETTER_NMA/change_cluster_name.py +0 -0
- BETTER_NMA/detect_attack.py +108 -0
- BETTER_NMA/find_lca.py +21 -0
- BETTER_NMA/main.py +285 -0
- BETTER_NMA/nma_creator.py +108 -0
- BETTER_NMA/plot.py +131 -0
- BETTER_NMA/query_image.py +22 -0
- BETTER_NMA/train_adversarial_detector.py +21 -0
- BETTER_NMA/utilss/__init__.py +0 -0
- BETTER_NMA/utilss/classes/__init__.py +0 -0
- BETTER_NMA/utilss/classes/adversarial_dataset.py +61 -0
- BETTER_NMA/utilss/classes/adversarial_detector.py +63 -0
- BETTER_NMA/utilss/classes/dendrogram.py +131 -0
- BETTER_NMA/utilss/classes/edges_dataframe.py +53 -0
- BETTER_NMA/utilss/classes/preprocessing/__init__.py +0 -0
- BETTER_NMA/utilss/classes/preprocessing/batch_predictor.py +28 -0
- BETTER_NMA/utilss/classes/preprocessing/graph_builder.py +46 -0
- BETTER_NMA/utilss/classes/preprocessing/heap_processor.py +30 -0
- BETTER_NMA/utilss/classes/preprocessing/hierarchical_clustering_builder.py +102 -0
- BETTER_NMA/utilss/classes/preprocessing/tree_node.py +71 -0
- BETTER_NMA/utilss/classes/preprocessing/z_builder.py +93 -0
- BETTER_NMA/utilss/classes/score_calculator.py +165 -0
- BETTER_NMA/utilss/classes/whitebox_testing.py +35 -0
- BETTER_NMA/utilss/enums/__init__.py +0 -0
- BETTER_NMA/utilss/enums/explanation_method.py +6 -0
- BETTER_NMA/utilss/enums/heap_types.py +5 -0
- BETTER_NMA/utilss/models_utils.py +18 -0
- BETTER_NMA/utilss/photos_uitls.py +72 -0
- BETTER_NMA/utilss/photos_utils.py +104 -0
- BETTER_NMA/utilss/verbal_explanation.py +15 -0
- BETTER_NMA/utilss/wordnet_utils.py +177 -0
- BETTER_NMA/white_box_testing.py +101 -0
- BETTER_NMA-1.0.0.dist-info/METADATA +11 -0
- BETTER_NMA-1.0.0.dist-info/RECORD +38 -0
- BETTER_NMA-1.0.0.dist-info/WHEEL +5 -0
- BETTER_NMA-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,177 @@
|
|
1
|
+
from nltk.corpus import wordnet as wn
|
2
|
+
from typing import Optional
|
3
|
+
import re
|
4
|
+
from collections import deque
|
5
|
+
from itertools import combinations, product
|
6
|
+
from collections import Counter
|
7
|
+
|
8
|
+
def folder_name_to_number(folder_name):
|
9
|
+
synsets = wn.synsets(folder_name)
|
10
|
+
if synsets:
|
11
|
+
offset = synsets[0].offset()
|
12
|
+
folder_number = 'n{:08d}'.format(offset)
|
13
|
+
return folder_number
|
14
|
+
|
15
|
+
def common_group(groups):
|
16
|
+
common_hypernyms = []
|
17
|
+
hierarchy = {}
|
18
|
+
for group in groups:
|
19
|
+
hierarchy[group] = []
|
20
|
+
synsets = wn.synsets(group)
|
21
|
+
if synsets:
|
22
|
+
hypernyms = synsets[0].hypernym_paths()
|
23
|
+
for path in hypernyms:
|
24
|
+
hierarchy[group].extend([node.name().split('.')[0] for node in path])
|
25
|
+
|
26
|
+
if len(hierarchy) == 1:
|
27
|
+
common_hypernyms = list(hierarchy.values())[0]
|
28
|
+
else:
|
29
|
+
for hypernym in hierarchy[groups.pop()]:
|
30
|
+
if all(hypernym in hypernyms for hypernyms in hierarchy.values()):
|
31
|
+
common_hypernyms.append(hypernym)
|
32
|
+
return common_hypernyms[::-1]
|
33
|
+
|
34
|
+
|
35
|
+
def get_all_leaf_names(node):
|
36
|
+
if "children" not in node:
|
37
|
+
if "cluster" not in node["name"]:
|
38
|
+
return [node["name"]]
|
39
|
+
return []
|
40
|
+
names = []
|
41
|
+
for child in node["children"]:
|
42
|
+
names.extend(get_all_leaf_names(child))
|
43
|
+
return names
|
44
|
+
|
45
|
+
|
46
|
+
def process_hierarchy(hierarchy_data):
|
47
|
+
return _rename_clusters(hierarchy_data)
|
48
|
+
|
49
|
+
|
50
|
+
def _get_top_synsets(phrase: str, pos=wn.NOUN, max_senses: int = 15) -> list[wn.synset]:
|
51
|
+
lemma = phrase.strip().lower().replace(" ", "_")
|
52
|
+
syns = wn.synsets(lemma, pos=pos)
|
53
|
+
return syns[:max_senses] if syns else []
|
54
|
+
|
55
|
+
|
56
|
+
def _find_best_common_hypernym(
|
57
|
+
leaves: list[str],
|
58
|
+
max_senses_per_word: int = 5,
|
59
|
+
banned_lemmas: set[str] = None
|
60
|
+
) -> Optional[str]:
|
61
|
+
|
62
|
+
if banned_lemmas is None:
|
63
|
+
banned_lemmas = {"entity", "object", "physical_entity", "thing", "Object", "Whole", "Whole", "Physical_entity", "Thing", "Entity", "Artifact"}
|
64
|
+
|
65
|
+
word_to_synsets: dict[str, list[wn.synset]] = {}
|
66
|
+
for w in leaves:
|
67
|
+
syns = _get_top_synsets(w, wn.NOUN, max_senses_per_word)
|
68
|
+
if syns:
|
69
|
+
word_to_synsets[w] = syns
|
70
|
+
|
71
|
+
if len(word_to_synsets) < 2:
|
72
|
+
return None
|
73
|
+
|
74
|
+
lch_counter: Counter[wn.synset] = Counter()
|
75
|
+
words_list = list(word_to_synsets.keys())
|
76
|
+
|
77
|
+
for w1, w2 in combinations(words_list, 2):
|
78
|
+
syns1 = word_to_synsets[w1]
|
79
|
+
syns2 = word_to_synsets[w2]
|
80
|
+
|
81
|
+
for s1, s2 in product(syns1, syns2):
|
82
|
+
try:
|
83
|
+
common = s1.lowest_common_hypernyms(s2)
|
84
|
+
except Exception as e:
|
85
|
+
print(f"Error computing LCH({s1.name()}, {s2.name()}): {e}")
|
86
|
+
continue
|
87
|
+
for hyp in common:
|
88
|
+
lch_counter[hyp] += 1
|
89
|
+
|
90
|
+
if not lch_counter:
|
91
|
+
return None
|
92
|
+
|
93
|
+
candidates = sorted(
|
94
|
+
lch_counter.items(),
|
95
|
+
key=lambda item: (item[1], item[0].min_depth()),
|
96
|
+
reverse=True
|
97
|
+
)
|
98
|
+
|
99
|
+
filtered: list[tuple[wn.synset, int]] = []
|
100
|
+
for syn, freq in candidates:
|
101
|
+
lemma = syn.name().split(".")[0].lower()
|
102
|
+
if lemma in banned_lemmas:
|
103
|
+
continue
|
104
|
+
filtered.append((syn, freq))
|
105
|
+
|
106
|
+
if not filtered:
|
107
|
+
filtered = candidates
|
108
|
+
|
109
|
+
best_synset, best_freq = filtered[0]
|
110
|
+
best_label = (best_synset.name().split(".")[0].replace(" ", "_")).lower()
|
111
|
+
return best_label
|
112
|
+
|
113
|
+
def find_common_hypernyms(
|
114
|
+
words: list[str],
|
115
|
+
abstraction_level: int = 0,
|
116
|
+
) -> Optional[str]:
|
117
|
+
|
118
|
+
clean_leaves = [
|
119
|
+
re.sub(r'_\d+$', '', w.strip().lower().replace(" ", "_"))
|
120
|
+
for w in words
|
121
|
+
if w and "cluster" not in w.lower()
|
122
|
+
]
|
123
|
+
|
124
|
+
if not clean_leaves:
|
125
|
+
return None
|
126
|
+
|
127
|
+
if len(clean_leaves) == 1:
|
128
|
+
word = clean_leaves[0]
|
129
|
+
synsets = _get_top_synsets(word, wn.NOUN, max_senses=10)
|
130
|
+
if not synsets:
|
131
|
+
return None
|
132
|
+
|
133
|
+
paths = synsets[0].hypernym_paths() # list of lists
|
134
|
+
if not paths:
|
135
|
+
return None
|
136
|
+
|
137
|
+
longest_path = max(paths, key=lambda p: len(p))
|
138
|
+
if len(longest_path) >= 2:
|
139
|
+
candidate = longest_path[-2]
|
140
|
+
name = (candidate.name().split(".")[0].replace(" ", "_")).lower()
|
141
|
+
if name.lower() not in {word, "entity"}:
|
142
|
+
return name
|
143
|
+
return None
|
144
|
+
return _find_best_common_hypernym(clean_leaves, max_senses_per_word=5)
|
145
|
+
|
146
|
+
|
147
|
+
def _rename_clusters(tree):
|
148
|
+
used_names = set()
|
149
|
+
all_leaf_names = {leaf.lower() for leaf in get_all_leaf_names(tree)}
|
150
|
+
queue = deque()
|
151
|
+
queue.append(tree)
|
152
|
+
postprocess_nodes = []
|
153
|
+
|
154
|
+
while queue:
|
155
|
+
node = queue.popleft()
|
156
|
+
if "children" in node:
|
157
|
+
queue.extend(node["children"])
|
158
|
+
postprocess_nodes.append(node)
|
159
|
+
|
160
|
+
for node in reversed(postprocess_nodes):
|
161
|
+
if "cluster" not in node["name"]:
|
162
|
+
continue
|
163
|
+
|
164
|
+
child_names = [child["name"] for child in node["children"] if "name" in child]
|
165
|
+
candidate = find_common_hypernyms(child_names)
|
166
|
+
|
167
|
+
if candidate:
|
168
|
+
base = candidate
|
169
|
+
unique = base
|
170
|
+
idx = 1
|
171
|
+
while unique.lower() in all_leaf_names or unique.lower() in {n.lower() for n in used_names}:
|
172
|
+
idx += 1
|
173
|
+
unique = f"{base}_{idx}"
|
174
|
+
node["name"] = unique
|
175
|
+
used_names.add(unique)
|
176
|
+
|
177
|
+
return tree
|
@@ -0,0 +1,101 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
import numpy as np
|
3
|
+
import matplotlib.pyplot as plt
|
4
|
+
import os
|
5
|
+
from .utilss.classes.whitebox_testing import WhiteBoxTesting
|
6
|
+
from .utilss.classes.edges_dataframe import EdgesDataframe
|
7
|
+
from .utilss.photos_utils import encode_image_to_base64
|
8
|
+
|
9
|
+
def visualize_problematic_images(x_train, problematic_imgs_dict, max_display=5):
|
10
|
+
count = 0
|
11
|
+
for image_id, matches in problematic_imgs_dict.items():
|
12
|
+
if count >= max_display:
|
13
|
+
break
|
14
|
+
|
15
|
+
image = x_train[image_id]
|
16
|
+
|
17
|
+
plt.figure(figsize=(8, 6))
|
18
|
+
plt.imshow(image.astype("uint8"))
|
19
|
+
plt.title(f"Image ID: {image_id}")
|
20
|
+
plt.axis('off')
|
21
|
+
plt.show()
|
22
|
+
|
23
|
+
print(f"Image ID: {image_id}")
|
24
|
+
print("Matches:")
|
25
|
+
for match in matches:
|
26
|
+
print(f" {match[0]} -> {match[1]}: {match[2]:.8f}")
|
27
|
+
print("-" * 30)
|
28
|
+
|
29
|
+
count += 1
|
30
|
+
|
31
|
+
def analyze_white_box_results(problematic_imgs_dict, x_train=None, encode_images=True):
|
32
|
+
results = []
|
33
|
+
|
34
|
+
for image_id, matches in problematic_imgs_dict.items():
|
35
|
+
result_item = {
|
36
|
+
"image_id": str(image_id),
|
37
|
+
"matches": matches,
|
38
|
+
"num_matches": len(matches)
|
39
|
+
}
|
40
|
+
|
41
|
+
if encode_images and x_train is not None and isinstance(image_id, int) and image_id < len(x_train):
|
42
|
+
img = x_train[image_id]
|
43
|
+
if img.max() <= 1.0:
|
44
|
+
img = (img * 255).astype(np.uint8)
|
45
|
+
|
46
|
+
result_item["image"] = encode_image_to_base64(img)
|
47
|
+
|
48
|
+
results.append(result_item)
|
49
|
+
|
50
|
+
return results
|
51
|
+
|
52
|
+
def get_white_box_analysis(edges_df_path, model_filename, dataset_str, source_labels, target_labels, x_train=None, get_image_by_id_func=None):
|
53
|
+
if not edges_df_path or not model_filename or not source_labels or not target_labels:
|
54
|
+
raise ValueError("Missing required parameters")
|
55
|
+
|
56
|
+
edges_data = EdgesDataframe(model_filename, edges_df_path)
|
57
|
+
edges_data.load_dataframe()
|
58
|
+
df = edges_data.get_dataframe()
|
59
|
+
|
60
|
+
whitebox_testing = WhiteBoxTesting(model_filename)
|
61
|
+
problematic_imgs_dict = whitebox_testing.find_problematic_images(
|
62
|
+
source_labels, target_labels, df, dataset_str)
|
63
|
+
|
64
|
+
imgs_list = []
|
65
|
+
|
66
|
+
for image_id, matches in problematic_imgs_dict.items():
|
67
|
+
try:
|
68
|
+
if x_train is not None and isinstance(image_id, int) and image_id < len(x_train):
|
69
|
+
img = x_train[image_id]
|
70
|
+
image_filename = str(image_id)
|
71
|
+
elif get_image_by_id_func is not None:
|
72
|
+
img, image_filename = get_image_by_id_func(image_id, dataset_str)
|
73
|
+
else:
|
74
|
+
print(f"Cannot retrieve image {image_id}: no data source provided")
|
75
|
+
continue
|
76
|
+
|
77
|
+
if img.max() <= 1.0:
|
78
|
+
img = (img * 255).astype(np.uint8)
|
79
|
+
|
80
|
+
original_image_base64 = encode_image_to_base64(img)
|
81
|
+
|
82
|
+
imgs_list.append({
|
83
|
+
"image": original_image_base64,
|
84
|
+
"image_id": image_filename,
|
85
|
+
"matches": matches,
|
86
|
+
})
|
87
|
+
except Exception as e:
|
88
|
+
print(f"Error processing image {image_id}: {str(e)}")
|
89
|
+
|
90
|
+
return imgs_list
|
91
|
+
|
92
|
+
def save_white_box_results(results, output_path):
|
93
|
+
import json
|
94
|
+
with open(output_path, 'w') as f:
|
95
|
+
json.dump(results, f, indent=2)
|
96
|
+
print(f"Results saved to: {output_path}")
|
97
|
+
|
98
|
+
def load_white_box_results(input_path):
|
99
|
+
import json
|
100
|
+
with open(input_path, 'r') as f:
|
101
|
+
return json.load(f)
|
@@ -0,0 +1,11 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: BETTER_NMA
|
3
|
+
Version: 1.0.0
|
4
|
+
Requires-Dist: tensorflow
|
5
|
+
Requires-Dist: pandas
|
6
|
+
Requires-Dist: igraph
|
7
|
+
Requires-Dist: numpy
|
8
|
+
Requires-Dist: scikit-learn
|
9
|
+
Requires-Dist: matplotlib
|
10
|
+
Requires-Dist: nltk
|
11
|
+
Requires-Dist: keras
|
@@ -0,0 +1,38 @@
|
|
1
|
+
BETTER_NMA/__init__.py,sha256=ePaQnto0n4hccz2490Z7bxwcbtONVAa6nWqg7SL4W1Y,428
|
2
|
+
BETTER_NMA/adversarial_score.py,sha256=qgScTqS-aJ2q4kFom505hBtonVzKK67fGS09J1_-G3o,875
|
3
|
+
BETTER_NMA/change_cluster_name.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
BETTER_NMA/detect_attack.py,sha256=s7YwTVMJFABSMt2aISR-zaIUxFaSWm9oODc9yF12KPY,4327
|
5
|
+
BETTER_NMA/find_lca.py,sha256=UlyftOJmbSPuXzxvcheRb_IrdCqBsaSQHLchIRZIR-0,812
|
6
|
+
BETTER_NMA/main.py,sha256=73MH7v6LAs2kQexumLBDykVnhH774P-HPrnhV1LZ7MM,11318
|
7
|
+
BETTER_NMA/nma_creator.py,sha256=M-LlZGRkxhGYLHpaXTNoZj9AUH7uvev7hq7tbILWMLI,5137
|
8
|
+
BETTER_NMA/plot.py,sha256=nj2ca-ybzGMlo6HhCngyjGUNaJDmfsPxF5ad9xpxzvE,4383
|
9
|
+
BETTER_NMA/query_image.py,sha256=13AQ9-8QdzaIwH5-ELX3z3iJBP8nTDe-SMtwQve-1ek,906
|
10
|
+
BETTER_NMA/train_adversarial_detector.py,sha256=nMaQ-Pm2vP84qNR1GoKQiVPpmMC3rdorzDMf5gDwKTE,977
|
11
|
+
BETTER_NMA/white_box_testing.py,sha256=zfhK8G-2cJH1AMevPywVnc05IhSqttf3YxQ6abdpM78,3524
|
12
|
+
BETTER_NMA/utilss/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
+
BETTER_NMA/utilss/models_utils.py,sha256=gBXY2LFH4iR-2GZmHeUnnB5n9t3VdjIc9sugHDrD3AM,671
|
14
|
+
BETTER_NMA/utilss/photos_uitls.py,sha256=mPe0zXTqN1RvkfDf0eiwzsniybicbomBkCQdTA-W8qQ,3160
|
15
|
+
BETTER_NMA/utilss/photos_utils.py,sha256=K6VWOSo3fY00LIW0yZPrI0MH0NrY5SPZh46RrtF0mj0,4406
|
16
|
+
BETTER_NMA/utilss/verbal_explanation.py,sha256=_hrYZUjBUYOfuGr7t5r-DACooR5d60dRtGfUj7FbeZw,549
|
17
|
+
BETTER_NMA/utilss/wordnet_utils.py,sha256=_A_gbdR7tf5tiyN2Oe5sB4vvBkWnr4KUl5e9iq5ft8c,5535
|
18
|
+
BETTER_NMA/utilss/classes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
+
BETTER_NMA/utilss/classes/adversarial_dataset.py,sha256=6LrL_KrmUYuhsSGTmV1DBuV2sVAR9nIydUAW6FbPR8c,2447
|
20
|
+
BETTER_NMA/utilss/classes/adversarial_detector.py,sha256=BE_SxNEwcvuHERBiefefOmk1k6NJSo6juehkAjkEHuQ,2331
|
21
|
+
BETTER_NMA/utilss/classes/dendrogram.py,sha256=vtKBFfwzcz8k01Goc83pZlWC2pO86endTJURlkUWVQI,5141
|
22
|
+
BETTER_NMA/utilss/classes/edges_dataframe.py,sha256=q-RQ6beOeZeIgdEzwi8T5Ag2NBFySv7-ITD5m989nl4,1896
|
23
|
+
BETTER_NMA/utilss/classes/score_calculator.py,sha256=4ZyTA2BnWh2XhnjA5AoYAbbA7AOCDvUikToz6m5m4nM,6005
|
24
|
+
BETTER_NMA/utilss/classes/whitebox_testing.py,sha256=4WSEjQ5gl6f8xzWADAagZ3WtMHE889rW-zcYld9REnw,1367
|
25
|
+
BETTER_NMA/utilss/classes/preprocessing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
26
|
+
BETTER_NMA/utilss/classes/preprocessing/batch_predictor.py,sha256=baUsz_oj_JLEpUoy_o08m_AK0uaWb5RB_cavk5Itg1A,946
|
27
|
+
BETTER_NMA/utilss/classes/preprocessing/graph_builder.py,sha256=ILumiBY9BUIOxrIvq8C-8n945pK-t94Et6gZwJB-364,1672
|
28
|
+
BETTER_NMA/utilss/classes/preprocessing/heap_processor.py,sha256=KblmkVWVfMYtpZa4Wy1Ry0lVfdSr6h8LySt4S-lvIGo,1064
|
29
|
+
BETTER_NMA/utilss/classes/preprocessing/hierarchical_clustering_builder.py,sha256=YAIElJS_fSffIb3D2N1OZu9U6z7RYrHQTfB6bH4-VPI,4027
|
30
|
+
BETTER_NMA/utilss/classes/preprocessing/tree_node.py,sha256=ELG--4ekRSPGCRWwvs8IsEC88ytaXQuGs4lnyLqriLE,2195
|
31
|
+
BETTER_NMA/utilss/classes/preprocessing/z_builder.py,sha256=T8ETfL7mMOgEj7oYNsw6-uUB0zGPLx7I26WEcYbQaw4,3553
|
32
|
+
BETTER_NMA/utilss/enums/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
|
+
BETTER_NMA/utilss/enums/explanation_method.py,sha256=Ang-rjvxO4AJ1IH4mwS8sNpSwt9jn3PlqFbPPT-R9I8,150
|
34
|
+
BETTER_NMA/utilss/enums/heap_types.py,sha256=0z1d2qu1ZCbpWRXKD1dTopn3M4G1CxRQW9HWxVxyPIA,88
|
35
|
+
BETTER_NMA-1.0.0.dist-info/METADATA,sha256=XouVuHepQ1tDjgxeQKesgEREOrVzN9oFP-iYBafHNfU,251
|
36
|
+
BETTER_NMA-1.0.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
37
|
+
BETTER_NMA-1.0.0.dist-info/top_level.txt,sha256=SVRNqWPvCnynWVyXNAYnf9CSQIvMAvE6iyyiGHodQgY,11
|
38
|
+
BETTER_NMA-1.0.0.dist-info/RECORD,,
|
@@ -0,0 +1 @@
|
|
1
|
+
BETTER_NMA
|