affective-manifold 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- affective_manifold-0.1.0/PKG-INFO +7 -0
- affective_manifold-0.1.0/affective_manifold/__init__.py +7 -0
- affective_manifold-0.1.0/affective_manifold/builder.py +204 -0
- affective_manifold-0.1.0/affective_manifold/projector.py +186 -0
- affective_manifold-0.1.0/affective_manifold.egg-info/PKG-INFO +7 -0
- affective_manifold-0.1.0/affective_manifold.egg-info/SOURCES.txt +9 -0
- affective_manifold-0.1.0/affective_manifold.egg-info/dependency_links.txt +1 -0
- affective_manifold-0.1.0/affective_manifold.egg-info/requires.txt +4 -0
- affective_manifold-0.1.0/affective_manifold.egg-info/top_level.txt +1 -0
- affective_manifold-0.1.0/pyproject.toml +18 -0
- affective_manifold-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import numpy as np
|
|
3
|
+
import nltk
|
|
4
|
+
|
|
5
|
+
from nltk.corpus import wordnet as wn
|
|
6
|
+
from nltk.stem import WordNetLemmatizer
|
|
7
|
+
|
|
8
|
+
from sentence_transformers import SentenceTransformer
|
|
9
|
+
from sklearn.decomposition import TruncatedSVD
|
|
10
|
+
|
|
11
|
+
class AffectiveManifoldBuilder:
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
model_name="all-MiniLM-L6-v2",
|
|
15
|
+
target_vocab_size=4000,
|
|
16
|
+
min_affective_gap=0.08,
|
|
17
|
+
min_salience=0.12,
|
|
18
|
+
random_state=42
|
|
19
|
+
):
|
|
20
|
+
self.model = SentenceTransformer(model_name)
|
|
21
|
+
self.target_vocab_size = target_vocab_size
|
|
22
|
+
self.min_affective_gap = min_affective_gap
|
|
23
|
+
self.min_salience = min_salience
|
|
24
|
+
self.random_state = random_state
|
|
25
|
+
self.lemmatizer = WordNetLemmatizer()
|
|
26
|
+
|
|
27
|
+
self.target_domains = {
|
|
28
|
+
"adj.all", "noun.feeling", "noun.cognition",
|
|
29
|
+
"noun.behavior", "verb.emotion", "verb.social"
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
self.pos_anchors = ["good", "pleasant", "joy", "love", "calm", "beautiful"]
|
|
33
|
+
self.neg_anchors = ["bad", "pain", "fear", "hate", "ugly", "anger"]
|
|
34
|
+
self.neu_anchors = ["object", "thing", "entity", "item", "concept", "fact"]
|
|
35
|
+
|
|
36
|
+
def _wn_pos(self, syn):
|
|
37
|
+
return {
|
|
38
|
+
"n": "n",
|
|
39
|
+
"v": "v",
|
|
40
|
+
"a": "a",
|
|
41
|
+
"s": "a",
|
|
42
|
+
"r": "r"
|
|
43
|
+
}.get(syn.pos(), "n")
|
|
44
|
+
|
|
45
|
+
def _normalize(self, x):
|
|
46
|
+
x = np.asarray(x, dtype=np.float32)
|
|
47
|
+
norms = np.linalg.norm(x, axis=1, keepdims=True)
|
|
48
|
+
norms = np.maximum(norms, 1e-12)
|
|
49
|
+
return x / norms
|
|
50
|
+
|
|
51
|
+
def _encode(self, texts):
|
|
52
|
+
emb = self.model.encode(texts, show_progress_bar=False)
|
|
53
|
+
return self._normalize(emb)
|
|
54
|
+
|
|
55
|
+
def _collect_candidates(self):
|
|
56
|
+
raw_words = []
|
|
57
|
+
raw_defs = []
|
|
58
|
+
seen = set()
|
|
59
|
+
|
|
60
|
+
for syn in wn.all_synsets():
|
|
61
|
+
if syn.lexname() not in self.target_domains:
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
lemma = syn.name().split(".")[0]
|
|
65
|
+
if len(lemma) <= 2 or "_" in lemma or not lemma.isalpha():
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
base = self.lemmatizer.lemmatize(lemma.lower(), pos=self._wn_pos(syn))
|
|
69
|
+
if base in seen:
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
seen.add(base)
|
|
73
|
+
raw_words.append(base)
|
|
74
|
+
raw_defs.append(f"{base}: {syn.definition()}")
|
|
75
|
+
|
|
76
|
+
return raw_words, raw_defs
|
|
77
|
+
|
|
78
|
+
def build(self, output_prefix="affective_manifold"):
|
|
79
|
+
raw_words, raw_defs = self._collect_candidates()
|
|
80
|
+
if not raw_words:
|
|
81
|
+
raise ValueError("No candidates found.")
|
|
82
|
+
|
|
83
|
+
word_emb = self._encode(raw_defs)
|
|
84
|
+
|
|
85
|
+
anchor_texts = self.pos_anchors + self.neg_anchors + self.neu_anchors
|
|
86
|
+
anchor_emb = self._encode(anchor_texts)
|
|
87
|
+
|
|
88
|
+
p = anchor_emb[:len(self.pos_anchors)]
|
|
89
|
+
n = anchor_emb[len(self.pos_anchors):len(self.pos_anchors) + len(self.neg_anchors)]
|
|
90
|
+
z = anchor_emb[-len(self.neu_anchors):]
|
|
91
|
+
|
|
92
|
+
pos_score = word_emb @ p.T
|
|
93
|
+
neg_score = word_emb @ n.T
|
|
94
|
+
neu_score = word_emb @ z.T
|
|
95
|
+
|
|
96
|
+
pos_mean = pos_score.mean(axis=1)
|
|
97
|
+
neg_mean = neg_score.mean(axis=1)
|
|
98
|
+
neu_mean = neu_score.mean(axis=1)
|
|
99
|
+
|
|
100
|
+
valence = pos_mean - neg_mean
|
|
101
|
+
arousal = np.maximum(pos_mean, neg_mean) - neu_mean
|
|
102
|
+
salience = np.abs(valence) + arousal
|
|
103
|
+
|
|
104
|
+
keep = (np.abs(valence) >= self.min_affective_gap) & (salience >= self.min_salience)
|
|
105
|
+
|
|
106
|
+
vocab = [w for w, k in zip(raw_words, keep) if k]
|
|
107
|
+
defs = [d for d, k in zip(raw_defs, keep) if k]
|
|
108
|
+
emb = word_emb[keep]
|
|
109
|
+
valence = valence[keep]
|
|
110
|
+
arousal = arousal[keep]
|
|
111
|
+
salience = salience[keep]
|
|
112
|
+
|
|
113
|
+
if len(vocab) == 0:
|
|
114
|
+
raise ValueError("Filtering was too strict; no words left.")
|
|
115
|
+
|
|
116
|
+
if len(vocab) > self.target_vocab_size:
|
|
117
|
+
score = np.abs(valence) + salience
|
|
118
|
+
order = np.argsort(-score)[:self.target_vocab_size]
|
|
119
|
+
vocab = [vocab[i] for i in order]
|
|
120
|
+
defs = [defs[i] for i in order]
|
|
121
|
+
emb = emb[order]
|
|
122
|
+
valence = valence[order]
|
|
123
|
+
arousal = arousal[order]
|
|
124
|
+
salience = salience[order]
|
|
125
|
+
|
|
126
|
+
features = np.column_stack([valence, arousal, salience]).astype(np.float32)
|
|
127
|
+
|
|
128
|
+
svd_dim = min(32, emb.shape[0] - 1, emb.shape[1])
|
|
129
|
+
if svd_dim >= 2:
|
|
130
|
+
svd = TruncatedSVD(n_components=svd_dim, random_state=self.random_state)
|
|
131
|
+
reduced = svd.fit_transform(emb)
|
|
132
|
+
else:
|
|
133
|
+
reduced = emb.astype(np.float32)
|
|
134
|
+
|
|
135
|
+
bundle = {
|
|
136
|
+
"vocab": vocab,
|
|
137
|
+
"definitions": defs,
|
|
138
|
+
"word_embeddings": emb.astype(np.float32),
|
|
139
|
+
"manifold_3d": features,
|
|
140
|
+
"reduced_embeddings": reduced.astype(np.float32),
|
|
141
|
+
"anchors": {
|
|
142
|
+
"positive": self.pos_anchors,
|
|
143
|
+
"negative": self.neg_anchors,
|
|
144
|
+
"neutral": self.neu_anchors
|
|
145
|
+
},
|
|
146
|
+
"config": {
|
|
147
|
+
"target_vocab_size": self.target_vocab_size,
|
|
148
|
+
"min_affective_gap": self.min_affective_gap,
|
|
149
|
+
"min_salience": self.min_salience,
|
|
150
|
+
"model_name": self.model._first_module().__class__.__name__ if hasattr(self.model, "_first_module") else "SentenceTransformer"
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
np.savez_compressed(
|
|
155
|
+
f"{output_prefix}.npz",
|
|
156
|
+
vocab=np.array(vocab, dtype=object),
|
|
157
|
+
definitions=np.array(defs, dtype=object),
|
|
158
|
+
word_embeddings=emb.astype(np.float32),
|
|
159
|
+
manifold_3d=features,
|
|
160
|
+
reduced_embeddings=reduced.astype(np.float32)
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
with open(f"{output_prefix}.json", "w", encoding="utf-8") as f:
|
|
164
|
+
json.dump(bundle["config"] | {"anchors": bundle["anchors"]}, f, indent=2)
|
|
165
|
+
|
|
166
|
+
self.vocab = vocab
|
|
167
|
+
self.definitions = defs
|
|
168
|
+
self.word_embeddings = emb
|
|
169
|
+
self.manifold_3d = features
|
|
170
|
+
self.reduced_embeddings = reduced
|
|
171
|
+
self.bundle = bundle
|
|
172
|
+
|
|
173
|
+
return bundle
|
|
174
|
+
|
|
175
|
+
def project_word(self, word, definition=None):
|
|
176
|
+
if definition is None:
|
|
177
|
+
synsets = wn.synsets(word)
|
|
178
|
+
if not synsets:
|
|
179
|
+
raise ValueError(f"No WordNet synsets found for '{word}'.")
|
|
180
|
+
definition = synsets[0].definition()
|
|
181
|
+
|
|
182
|
+
text = f"{word}: {definition}"
|
|
183
|
+
emb = self._encode([text])[0]
|
|
184
|
+
|
|
185
|
+
anchors = self._encode(self.pos_anchors + self.neg_anchors + self.neu_anchors)
|
|
186
|
+
p = anchors[:len(self.pos_anchors)]
|
|
187
|
+
n = anchors[len(self.pos_anchors):len(self.pos_anchors) + len(self.neg_anchors)]
|
|
188
|
+
z = anchors[-len(self.neu_anchors):]
|
|
189
|
+
|
|
190
|
+
pos_mean = (emb @ p.T).mean()
|
|
191
|
+
neg_mean = (emb @ n.T).mean()
|
|
192
|
+
neu_mean = (emb @ z.T).mean()
|
|
193
|
+
|
|
194
|
+
valence = pos_mean - neg_mean
|
|
195
|
+
arousal = np.maximum(pos_mean, neg_mean) - neu_mean
|
|
196
|
+
salience = np.abs(valence) + arousal
|
|
197
|
+
|
|
198
|
+
return {
|
|
199
|
+
"word": word,
|
|
200
|
+
"definition": definition,
|
|
201
|
+
"valence": float(valence),
|
|
202
|
+
"arousal": float(arousal),
|
|
203
|
+
"salience": float(salience)
|
|
204
|
+
}
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from sentence_transformers import SentenceTransformer
|
|
3
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
4
|
+
from nltk.corpus import wordnet as wn
|
|
5
|
+
|
|
6
|
+
import nltk
|
|
7
|
+
|
|
8
|
+
def ensure_nltk():
|
|
9
|
+
try:
|
|
10
|
+
nltk.data.find("corpora/wordnet")
|
|
11
|
+
except LookupError:
|
|
12
|
+
nltk.download("wordnet", quiet=True)
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
nltk.data.find("corpora/omw-1.4")
|
|
16
|
+
except LookupError:
|
|
17
|
+
nltk.download("omw-1.4", quiet=True)
|
|
18
|
+
|
|
19
|
+
ensure_nltk()
|
|
20
|
+
class AffectiveProjector:
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
manifold_path="global_affective_manifold.npz",
|
|
25
|
+
model_name="all-MiniLM-L6-v2"
|
|
26
|
+
):
|
|
27
|
+
|
|
28
|
+
self.model = SentenceTransformer(model_name)
|
|
29
|
+
|
|
30
|
+
data = np.load(
|
|
31
|
+
manifold_path,
|
|
32
|
+
allow_pickle=True
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
self.vocab = data["vocab"]
|
|
36
|
+
self.definitions = data["definitions"]
|
|
37
|
+
self.word_embeddings = data["word_embeddings"]
|
|
38
|
+
self.manifold_3d = data["manifold_3d"]
|
|
39
|
+
|
|
40
|
+
self.pos_anchors = [
|
|
41
|
+
"good",
|
|
42
|
+
"pleasant",
|
|
43
|
+
"joy",
|
|
44
|
+
"love",
|
|
45
|
+
"calm",
|
|
46
|
+
"beautiful"
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
self.neg_anchors = [
|
|
50
|
+
"bad",
|
|
51
|
+
"pain",
|
|
52
|
+
"fear",
|
|
53
|
+
"hate",
|
|
54
|
+
"ugly",
|
|
55
|
+
"anger"
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
self.neu_anchors = [
|
|
59
|
+
"object",
|
|
60
|
+
"thing",
|
|
61
|
+
"entity",
|
|
62
|
+
"item",
|
|
63
|
+
"concept",
|
|
64
|
+
"fact"
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
def _normalize(self, x):
|
|
68
|
+
|
|
69
|
+
x = np.asarray(
|
|
70
|
+
x,
|
|
71
|
+
dtype=np.float32
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
norms = np.linalg.norm(
|
|
75
|
+
x,
|
|
76
|
+
axis=1,
|
|
77
|
+
keepdims=True
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
norms = np.maximum(
|
|
81
|
+
norms,
|
|
82
|
+
1e-12
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
return x / norms
|
|
86
|
+
|
|
87
|
+
def _encode(self, texts):
|
|
88
|
+
|
|
89
|
+
emb = self.model.encode(
|
|
90
|
+
texts,
|
|
91
|
+
show_progress_bar=False
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
return self._normalize(emb)
|
|
95
|
+
|
|
96
|
+
def project_word(
|
|
97
|
+
self,
|
|
98
|
+
word,
|
|
99
|
+
definition=None
|
|
100
|
+
):
|
|
101
|
+
|
|
102
|
+
if definition is None:
|
|
103
|
+
|
|
104
|
+
synsets = wn.synsets(word)
|
|
105
|
+
|
|
106
|
+
if not synsets:
|
|
107
|
+
raise ValueError(
|
|
108
|
+
f"No WordNet entry found for '{word}'"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
definition = synsets[0].definition()
|
|
112
|
+
|
|
113
|
+
text = f"{word}: {definition}"
|
|
114
|
+
|
|
115
|
+
emb = self._encode([text])[0]
|
|
116
|
+
|
|
117
|
+
anchors = self._encode(
|
|
118
|
+
self.pos_anchors +
|
|
119
|
+
self.neg_anchors +
|
|
120
|
+
self.neu_anchors
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
p = anchors[:len(self.pos_anchors)]
|
|
124
|
+
|
|
125
|
+
n = anchors[
|
|
126
|
+
len(self.pos_anchors):
|
|
127
|
+
len(self.pos_anchors) + len(self.neg_anchors)
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
z = anchors[-len(self.neu_anchors):]
|
|
131
|
+
|
|
132
|
+
pos_mean = (emb @ p.T).mean()
|
|
133
|
+
neg_mean = (emb @ n.T).mean()
|
|
134
|
+
neu_mean = (emb @ z.T).mean()
|
|
135
|
+
|
|
136
|
+
valence = pos_mean - neg_mean
|
|
137
|
+
|
|
138
|
+
# Your newer formula
|
|
139
|
+
arousal = 1.0 - neu_mean
|
|
140
|
+
|
|
141
|
+
salience = abs(valence) + arousal
|
|
142
|
+
|
|
143
|
+
return {
|
|
144
|
+
"word": word,
|
|
145
|
+
"definition": definition,
|
|
146
|
+
"valence": float(valence),
|
|
147
|
+
"arousal": float(arousal),
|
|
148
|
+
"salience": float(salience)
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
def nearest_neighbors(
|
|
152
|
+
self,
|
|
153
|
+
word,
|
|
154
|
+
k=10
|
|
155
|
+
):
|
|
156
|
+
|
|
157
|
+
idx = np.where(
|
|
158
|
+
self.vocab == word
|
|
159
|
+
)[0]
|
|
160
|
+
|
|
161
|
+
if len(idx) == 0:
|
|
162
|
+
raise ValueError(
|
|
163
|
+
f"'{word}' not found in manifold."
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
idx = idx[0]
|
|
167
|
+
|
|
168
|
+
sims = cosine_similarity(
|
|
169
|
+
self.word_embeddings[idx:idx+1],
|
|
170
|
+
self.word_embeddings
|
|
171
|
+
)[0]
|
|
172
|
+
|
|
173
|
+
order = sims.argsort()[::-1]
|
|
174
|
+
|
|
175
|
+
neighbors = []
|
|
176
|
+
|
|
177
|
+
for j in order[1:k+1]:
|
|
178
|
+
|
|
179
|
+
neighbors.append(
|
|
180
|
+
(
|
|
181
|
+
str(self.vocab[j]),
|
|
182
|
+
float(sims[j])
|
|
183
|
+
)
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
return neighbors
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
pyproject.toml
|
|
2
|
+
affective_manifold/__init__.py
|
|
3
|
+
affective_manifold/builder.py
|
|
4
|
+
affective_manifold/projector.py
|
|
5
|
+
affective_manifold.egg-info/PKG-INFO
|
|
6
|
+
affective_manifold.egg-info/SOURCES.txt
|
|
7
|
+
affective_manifold.egg-info/dependency_links.txt
|
|
8
|
+
affective_manifold.egg-info/requires.txt
|
|
9
|
+
affective_manifold.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
affective_manifold
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "affective-manifold"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
|
|
9
|
+
dependencies = [
|
|
10
|
+
"numpy",
|
|
11
|
+
"nltk",
|
|
12
|
+
"sentence-transformers",
|
|
13
|
+
"scikit-learn"
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[tool.setuptools.packages.find]
|
|
17
|
+
include = ["affective_manifold*"]
|
|
18
|
+
exclude = ["venv*", "tests*", "examples*"]
|