affective-manifold 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ Metadata-Version: 2.4
2
+ Name: affective-manifold
3
+ Version: 0.1.0
4
+ Requires-Dist: numpy
5
+ Requires-Dist: nltk
6
+ Requires-Dist: sentence-transformers
7
+ Requires-Dist: scikit-learn
@@ -0,0 +1,7 @@
1
+ from .builder import AffectiveManifoldBuilder
2
+ from .projector import AffectiveProjector
3
+
4
+ __all__ = [
5
+ "AffectiveManifoldBuilder",
6
+ "AffectiveProjector"
7
+ ]
@@ -0,0 +1,204 @@
1
+ import json
2
+ import numpy as np
3
+ import nltk
4
+
5
+ from nltk.corpus import wordnet as wn
6
+ from nltk.stem import WordNetLemmatizer
7
+
8
+ from sentence_transformers import SentenceTransformer
9
+ from sklearn.decomposition import TruncatedSVD
10
+
11
+ class AffectiveManifoldBuilder:
12
+ def __init__(
13
+ self,
14
+ model_name="all-MiniLM-L6-v2",
15
+ target_vocab_size=4000,
16
+ min_affective_gap=0.08,
17
+ min_salience=0.12,
18
+ random_state=42
19
+ ):
20
+ self.model = SentenceTransformer(model_name)
21
+ self.target_vocab_size = target_vocab_size
22
+ self.min_affective_gap = min_affective_gap
23
+ self.min_salience = min_salience
24
+ self.random_state = random_state
25
+ self.lemmatizer = WordNetLemmatizer()
26
+
27
+ self.target_domains = {
28
+ "adj.all", "noun.feeling", "noun.cognition",
29
+ "noun.behavior", "verb.emotion", "verb.social"
30
+ }
31
+
32
+ self.pos_anchors = ["good", "pleasant", "joy", "love", "calm", "beautiful"]
33
+ self.neg_anchors = ["bad", "pain", "fear", "hate", "ugly", "anger"]
34
+ self.neu_anchors = ["object", "thing", "entity", "item", "concept", "fact"]
35
+
36
+ def _wn_pos(self, syn):
37
+ return {
38
+ "n": "n",
39
+ "v": "v",
40
+ "a": "a",
41
+ "s": "a",
42
+ "r": "r"
43
+ }.get(syn.pos(), "n")
44
+
45
+ def _normalize(self, x):
46
+ x = np.asarray(x, dtype=np.float32)
47
+ norms = np.linalg.norm(x, axis=1, keepdims=True)
48
+ norms = np.maximum(norms, 1e-12)
49
+ return x / norms
50
+
51
+ def _encode(self, texts):
52
+ emb = self.model.encode(texts, show_progress_bar=False)
53
+ return self._normalize(emb)
54
+
55
+ def _collect_candidates(self):
56
+ raw_words = []
57
+ raw_defs = []
58
+ seen = set()
59
+
60
+ for syn in wn.all_synsets():
61
+ if syn.lexname() not in self.target_domains:
62
+ continue
63
+
64
+ lemma = syn.name().split(".")[0]
65
+ if len(lemma) <= 2 or "_" in lemma or not lemma.isalpha():
66
+ continue
67
+
68
+ base = self.lemmatizer.lemmatize(lemma.lower(), pos=self._wn_pos(syn))
69
+ if base in seen:
70
+ continue
71
+
72
+ seen.add(base)
73
+ raw_words.append(base)
74
+ raw_defs.append(f"{base}: {syn.definition()}")
75
+
76
+ return raw_words, raw_defs
77
+
78
+ def build(self, output_prefix="affective_manifold"):
79
+ raw_words, raw_defs = self._collect_candidates()
80
+ if not raw_words:
81
+ raise ValueError("No candidates found.")
82
+
83
+ word_emb = self._encode(raw_defs)
84
+
85
+ anchor_texts = self.pos_anchors + self.neg_anchors + self.neu_anchors
86
+ anchor_emb = self._encode(anchor_texts)
87
+
88
+ p = anchor_emb[:len(self.pos_anchors)]
89
+ n = anchor_emb[len(self.pos_anchors):len(self.pos_anchors) + len(self.neg_anchors)]
90
+ z = anchor_emb[-len(self.neu_anchors):]
91
+
92
+ pos_score = word_emb @ p.T
93
+ neg_score = word_emb @ n.T
94
+ neu_score = word_emb @ z.T
95
+
96
+ pos_mean = pos_score.mean(axis=1)
97
+ neg_mean = neg_score.mean(axis=1)
98
+ neu_mean = neu_score.mean(axis=1)
99
+
100
+ valence = pos_mean - neg_mean
101
+ arousal = np.maximum(pos_mean, neg_mean) - neu_mean
102
+ salience = np.abs(valence) + arousal
103
+
104
+ keep = (np.abs(valence) >= self.min_affective_gap) & (salience >= self.min_salience)
105
+
106
+ vocab = [w for w, k in zip(raw_words, keep) if k]
107
+ defs = [d for d, k in zip(raw_defs, keep) if k]
108
+ emb = word_emb[keep]
109
+ valence = valence[keep]
110
+ arousal = arousal[keep]
111
+ salience = salience[keep]
112
+
113
+ if len(vocab) == 0:
114
+ raise ValueError("Filtering was too strict; no words left.")
115
+
116
+ if len(vocab) > self.target_vocab_size:
117
+ score = np.abs(valence) + salience
118
+ order = np.argsort(-score)[:self.target_vocab_size]
119
+ vocab = [vocab[i] for i in order]
120
+ defs = [defs[i] for i in order]
121
+ emb = emb[order]
122
+ valence = valence[order]
123
+ arousal = arousal[order]
124
+ salience = salience[order]
125
+
126
+ features = np.column_stack([valence, arousal, salience]).astype(np.float32)
127
+
128
+ svd_dim = min(32, emb.shape[0] - 1, emb.shape[1])
129
+ if svd_dim >= 2:
130
+ svd = TruncatedSVD(n_components=svd_dim, random_state=self.random_state)
131
+ reduced = svd.fit_transform(emb)
132
+ else:
133
+ reduced = emb.astype(np.float32)
134
+
135
+ bundle = {
136
+ "vocab": vocab,
137
+ "definitions": defs,
138
+ "word_embeddings": emb.astype(np.float32),
139
+ "manifold_3d": features,
140
+ "reduced_embeddings": reduced.astype(np.float32),
141
+ "anchors": {
142
+ "positive": self.pos_anchors,
143
+ "negative": self.neg_anchors,
144
+ "neutral": self.neu_anchors
145
+ },
146
+ "config": {
147
+ "target_vocab_size": self.target_vocab_size,
148
+ "min_affective_gap": self.min_affective_gap,
149
+ "min_salience": self.min_salience,
150
+ "model_name": self.model._first_module().__class__.__name__ if hasattr(self.model, "_first_module") else "SentenceTransformer"
151
+ }
152
+ }
153
+
154
+ np.savez_compressed(
155
+ f"{output_prefix}.npz",
156
+ vocab=np.array(vocab, dtype=object),
157
+ definitions=np.array(defs, dtype=object),
158
+ word_embeddings=emb.astype(np.float32),
159
+ manifold_3d=features,
160
+ reduced_embeddings=reduced.astype(np.float32)
161
+ )
162
+
163
+ with open(f"{output_prefix}.json", "w", encoding="utf-8") as f:
164
+ json.dump(bundle["config"] | {"anchors": bundle["anchors"]}, f, indent=2)
165
+
166
+ self.vocab = vocab
167
+ self.definitions = defs
168
+ self.word_embeddings = emb
169
+ self.manifold_3d = features
170
+ self.reduced_embeddings = reduced
171
+ self.bundle = bundle
172
+
173
+ return bundle
174
+
175
+ def project_word(self, word, definition=None):
176
+ if definition is None:
177
+ synsets = wn.synsets(word)
178
+ if not synsets:
179
+ raise ValueError(f"No WordNet synsets found for '{word}'.")
180
+ definition = synsets[0].definition()
181
+
182
+ text = f"{word}: {definition}"
183
+ emb = self._encode([text])[0]
184
+
185
+ anchors = self._encode(self.pos_anchors + self.neg_anchors + self.neu_anchors)
186
+ p = anchors[:len(self.pos_anchors)]
187
+ n = anchors[len(self.pos_anchors):len(self.pos_anchors) + len(self.neg_anchors)]
188
+ z = anchors[-len(self.neu_anchors):]
189
+
190
+ pos_mean = (emb @ p.T).mean()
191
+ neg_mean = (emb @ n.T).mean()
192
+ neu_mean = (emb @ z.T).mean()
193
+
194
+ valence = pos_mean - neg_mean
195
+ arousal = np.maximum(pos_mean, neg_mean) - neu_mean
196
+ salience = np.abs(valence) + arousal
197
+
198
+ return {
199
+ "word": word,
200
+ "definition": definition,
201
+ "valence": float(valence),
202
+ "arousal": float(arousal),
203
+ "salience": float(salience)
204
+ }
@@ -0,0 +1,186 @@
1
+ import numpy as np
2
+ from sentence_transformers import SentenceTransformer
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+ from nltk.corpus import wordnet as wn
5
+
6
+ import nltk
7
+
8
+ def ensure_nltk():
9
+ try:
10
+ nltk.data.find("corpora/wordnet")
11
+ except LookupError:
12
+ nltk.download("wordnet", quiet=True)
13
+
14
+ try:
15
+ nltk.data.find("corpora/omw-1.4")
16
+ except LookupError:
17
+ nltk.download("omw-1.4", quiet=True)
18
+
19
+ ensure_nltk()
20
+ class AffectiveProjector:
21
+
22
+ def __init__(
23
+ self,
24
+ manifold_path="global_affective_manifold.npz",
25
+ model_name="all-MiniLM-L6-v2"
26
+ ):
27
+
28
+ self.model = SentenceTransformer(model_name)
29
+
30
+ data = np.load(
31
+ manifold_path,
32
+ allow_pickle=True
33
+ )
34
+
35
+ self.vocab = data["vocab"]
36
+ self.definitions = data["definitions"]
37
+ self.word_embeddings = data["word_embeddings"]
38
+ self.manifold_3d = data["manifold_3d"]
39
+
40
+ self.pos_anchors = [
41
+ "good",
42
+ "pleasant",
43
+ "joy",
44
+ "love",
45
+ "calm",
46
+ "beautiful"
47
+ ]
48
+
49
+ self.neg_anchors = [
50
+ "bad",
51
+ "pain",
52
+ "fear",
53
+ "hate",
54
+ "ugly",
55
+ "anger"
56
+ ]
57
+
58
+ self.neu_anchors = [
59
+ "object",
60
+ "thing",
61
+ "entity",
62
+ "item",
63
+ "concept",
64
+ "fact"
65
+ ]
66
+
67
+ def _normalize(self, x):
68
+
69
+ x = np.asarray(
70
+ x,
71
+ dtype=np.float32
72
+ )
73
+
74
+ norms = np.linalg.norm(
75
+ x,
76
+ axis=1,
77
+ keepdims=True
78
+ )
79
+
80
+ norms = np.maximum(
81
+ norms,
82
+ 1e-12
83
+ )
84
+
85
+ return x / norms
86
+
87
+ def _encode(self, texts):
88
+
89
+ emb = self.model.encode(
90
+ texts,
91
+ show_progress_bar=False
92
+ )
93
+
94
+ return self._normalize(emb)
95
+
96
+ def project_word(
97
+ self,
98
+ word,
99
+ definition=None
100
+ ):
101
+
102
+ if definition is None:
103
+
104
+ synsets = wn.synsets(word)
105
+
106
+ if not synsets:
107
+ raise ValueError(
108
+ f"No WordNet entry found for '{word}'"
109
+ )
110
+
111
+ definition = synsets[0].definition()
112
+
113
+ text = f"{word}: {definition}"
114
+
115
+ emb = self._encode([text])[0]
116
+
117
+ anchors = self._encode(
118
+ self.pos_anchors +
119
+ self.neg_anchors +
120
+ self.neu_anchors
121
+ )
122
+
123
+ p = anchors[:len(self.pos_anchors)]
124
+
125
+ n = anchors[
126
+ len(self.pos_anchors):
127
+ len(self.pos_anchors) + len(self.neg_anchors)
128
+ ]
129
+
130
+ z = anchors[-len(self.neu_anchors):]
131
+
132
+ pos_mean = (emb @ p.T).mean()
133
+ neg_mean = (emb @ n.T).mean()
134
+ neu_mean = (emb @ z.T).mean()
135
+
136
+ valence = pos_mean - neg_mean
137
+
138
+ # Your newer formula
139
+ arousal = 1.0 - neu_mean
140
+
141
+ salience = abs(valence) + arousal
142
+
143
+ return {
144
+ "word": word,
145
+ "definition": definition,
146
+ "valence": float(valence),
147
+ "arousal": float(arousal),
148
+ "salience": float(salience)
149
+ }
150
+
151
+ def nearest_neighbors(
152
+ self,
153
+ word,
154
+ k=10
155
+ ):
156
+
157
+ idx = np.where(
158
+ self.vocab == word
159
+ )[0]
160
+
161
+ if len(idx) == 0:
162
+ raise ValueError(
163
+ f"'{word}' not found in manifold."
164
+ )
165
+
166
+ idx = idx[0]
167
+
168
+ sims = cosine_similarity(
169
+ self.word_embeddings[idx:idx+1],
170
+ self.word_embeddings
171
+ )[0]
172
+
173
+ order = sims.argsort()[::-1]
174
+
175
+ neighbors = []
176
+
177
+ for j in order[1:k+1]:
178
+
179
+ neighbors.append(
180
+ (
181
+ str(self.vocab[j]),
182
+ float(sims[j])
183
+ )
184
+ )
185
+
186
+ return neighbors
@@ -0,0 +1,7 @@
1
+ Metadata-Version: 2.4
2
+ Name: affective-manifold
3
+ Version: 0.1.0
4
+ Requires-Dist: numpy
5
+ Requires-Dist: nltk
6
+ Requires-Dist: sentence-transformers
7
+ Requires-Dist: scikit-learn
@@ -0,0 +1,9 @@
1
+ pyproject.toml
2
+ affective_manifold/__init__.py
3
+ affective_manifold/builder.py
4
+ affective_manifold/projector.py
5
+ affective_manifold.egg-info/PKG-INFO
6
+ affective_manifold.egg-info/SOURCES.txt
7
+ affective_manifold.egg-info/dependency_links.txt
8
+ affective_manifold.egg-info/requires.txt
9
+ affective_manifold.egg-info/top_level.txt
@@ -0,0 +1,4 @@
1
+ numpy
2
+ nltk
3
+ sentence-transformers
4
+ scikit-learn
@@ -0,0 +1 @@
1
+ affective_manifold
@@ -0,0 +1,18 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "affective-manifold"
7
+ version = "0.1.0"
8
+
9
+ dependencies = [
10
+ "numpy",
11
+ "nltk",
12
+ "sentence-transformers",
13
+ "scikit-learn"
14
+ ]
15
+
16
+ [tool.setuptools.packages.find]
17
+ include = ["affective_manifold*"]
18
+ exclude = ["venv*", "tests*", "examples*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+