sadbert 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sadbert-0.1.0/LICENSE ADDED
@@ -0,0 +1,32 @@
1
+ SADBERT Software License
2
+ Copyright (c) 2026 Xander Deanhardt
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ of this software and associated documentation files (the "Software"), to deal
6
+ in the Software without restriction, including without limitation the rights
7
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the Software is
9
+ furnished to do so, subject to the following conditions:
10
+
11
+ 1. The above copyright notice and this permission notice shall be included in
12
+ all copies or substantial portions of the Software.
13
+
14
+ 2. RESERVATION OF ACADEMIC PUBLICATION RIGHTS: Xander Deanhardt retains
15
+ exclusive rights to publish, author, and submit academic research papers,
16
+ journal articles, conference proceedings, theses, dissertations, and other
17
+ scholarly works that present the design, architecture, training methodology,
18
+ fine-tuning procedures, evaluation, or scientific contributions of this
19
+ Software or its underlying models as original research contributions.
20
+ Users of the Software may freely publish work in which this Software is
21
+ used as a tool or cited as a resource, but may not publish work that claims
22
+ the Software's methodology, training procedures, or model architecture as
23
+ the user's own original contributions without prior written permission from
24
+ Xander Deanhardt.
25
+
26
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
31
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32
+ SOFTWARE.
@@ -0,0 +1,14 @@
1
+ # Include the bundled data files in source distributions (sdist)
2
+ recursive-include sadbert/data *.pkl
3
+
4
+ # Include documentation and metadata
5
+ include README.md
6
+ include LICENSE
7
+ include pyproject.toml
8
+ include requirements.txt
9
+
10
+ # Exclude build artifacts and caches
11
+ exclude .gitignore
12
+ recursive-exclude * __pycache__
13
+ recursive-exclude * *.py[co]
14
+ recursive-exclude * .DS_Store
sadbert-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,270 @@
1
+ Metadata-Version: 2.4
2
+ Name: sadbert
3
+ Version: 0.1.0
4
+ Summary: Stereotype-content analysis with fine-tuned DistilBERT models (SADCAT framework)
5
+ Author-email: Xander Deanhardt <xanderd24@uchicago.edu>
6
+ License: SADBERT Software License
7
+ Copyright (c) 2026 Xander Deanhardt
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy
10
+ of this software and associated documentation files (the "Software"), to deal
11
+ in the Software without restriction, including without limitation the rights
12
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ copies of the Software, and to permit persons to whom the Software is
14
+ furnished to do so, subject to the following conditions:
15
+
16
+ 1. The above copyright notice and this permission notice shall be included in
17
+ all copies or substantial portions of the Software.
18
+
19
+ 2. RESERVATION OF ACADEMIC PUBLICATION RIGHTS: Xander Deanhardt retains
20
+ exclusive rights to publish, author, and submit academic research papers,
21
+ journal articles, conference proceedings, theses, dissertations, and other
22
+ scholarly works that present the design, architecture, training methodology,
23
+ fine-tuning procedures, evaluation, or scientific contributions of this
24
+ Software or its underlying models as original research contributions.
25
+ Users of the Software may freely publish work in which this Software is
26
+ used as a tool or cited as a resource, but may not publish work that claims
27
+ the Software's methodology, training procedures, or model architecture as
28
+ the user's own original contributions without prior written permission from
29
+ Xander Deanhardt.
30
+
31
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37
+ SOFTWARE.
38
+
39
+ Project-URL: Homepage, https://github.com/XanderD24/sadbert
40
+ Project-URL: Repository, https://github.com/XanderD24/sadbert
41
+ Project-URL: Bug Tracker, https://github.com/XanderD24/sadbert/issues
42
+ Project-URL: HuggingFace, https://huggingface.co/XanderD24
43
+ Keywords: NLP,stereotype,social cognition,SADCAT,DistilBERT,transformers,sentiment analysis
44
+ Classifier: Development Status :: 3 - Alpha
45
+ Classifier: Intended Audience :: Science/Research
46
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
47
+ Classifier: Topic :: Text Processing :: Linguistic
48
+ Classifier: License :: Other/Proprietary License
49
+ Classifier: Programming Language :: Python :: 3
50
+ Classifier: Programming Language :: Python :: 3.9
51
+ Classifier: Programming Language :: Python :: 3.10
52
+ Classifier: Programming Language :: Python :: 3.11
53
+ Classifier: Programming Language :: Python :: 3.12
54
+ Classifier: Operating System :: OS Independent
55
+ Requires-Python: >=3.9
56
+ Description-Content-Type: text/markdown
57
+ License-File: LICENSE
58
+ Requires-Dist: torch>=2.0.0
59
+ Requires-Dist: transformers>=4.35.0
60
+ Requires-Dist: pandas>=2.0.0
61
+ Requires-Dist: numpy>=1.24.0
62
+ Requires-Dist: huggingface_hub>=0.19.0
63
+ Provides-Extra: dev
64
+ Requires-Dist: pytest>=7.0; extra == "dev"
65
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
66
+ Requires-Dist: black>=23.0; extra == "dev"
67
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
68
+ Requires-Dist: build>=1.0; extra == "dev"
69
+ Requires-Dist: twine>=4.0; extra == "dev"
70
+ Dynamic: license-file
71
+
72
+ # SADBERT
73
+
74
+ **Stereotype-content Analysis with DistilBERT** — a Python package for identifying and characterising stereotype-relevant dimensions in natural language text, based on the SADCAT (Stereotype-Associated Dictionary of Content and Affect with Traits) framework.
75
+
76
+ ## What it does
77
+
78
+ Given a word or phrase, SADBERT predicts:
79
+
80
+ | Output column | Description |
81
+ |---|---|
82
+ | `category` | Stereotype-content dimension (e.g. *Warmth*, *Competence*) |
83
+ | `probability` | Confidence of the category classifier |
84
+ | `valence` | Direction within the category: `1` = positive, `0` = neutral, `−1` = negative |
85
+ | `valence probability` | Confidence of the valence prediction |
86
+ | `interpretation` | Human-readable label (e.g. *"Warm"*, *"Incompetent"*, *"Moral"*) |
87
+
88
+ ### Categories detected
89
+
90
+ **Major** (with valence): Warmth · Competence · Sociability · Morality · Ability · Assertiveness · Status · Beliefs · health · deviance · beauty · Politics · Religion
91
+
92
+ **Minor** (category only, no valence): emotions · Geography · Appearance · occupation · socialgroups · inhabitant · country · relative · insults · stem · humanities · art · Lacksknowledge · fortune · clothing · bodpart · bodprop · skin · bodcov · beliefsother · Other\_large · Other
93
+
94
+ ---
95
+
96
+ ## Installation
97
+
98
+ ```bash
99
+ pip install sadbert
100
+ ```
101
+
102
+ > **Note:** On first use, SADBERT automatically downloads ~2 GB of model weights from the HuggingFace Hub. These are cached locally in `~/.cache/huggingface/` and do not need to be re-downloaded on subsequent runs.
103
+
104
+ ### GPU / Apple Silicon
105
+
106
+ SADBERT auto-detects CUDA and Apple MPS. To use a specific device, instantiate `SADBERT` directly:
107
+
108
+ ```python
109
+ from sadbert import SADBERT
110
+ model = SADBERT(device="cuda") # or "mps", "cpu"
111
+ ```
112
+
113
+ ---
114
+
115
+ ## Quick Start
116
+
117
+ ```python
118
+ import sadbert
119
+
120
+ # Single word — returns a DataFrame
121
+ df = sadbert.get_stereotype_content("honest")
122
+ print(df)
123
+
124
+ # Multiple words — stacked into one DataFrame (default)
125
+ df = sadbert.get_stereotype_content(["honest", "lazy", "senator"])
126
+ print(df)
127
+
128
+ # Multiple words — one DataFrame per word
129
+ results = sadbert.get_stereotype_content(["honest", "lazy"], stacked=False)
130
+ print(results["honest"])
131
+ print(results["lazy"])
132
+ ```
133
+
134
+ ### Example output
135
+
136
+ ```
137
+ >>> sadbert.get_stereotype_content("honest")
138
+
139
+ category probability valence valence probability interpretation
140
+ 0 Warmth 0.912 1.0 0.876 Warm
141
+ 1 Morality 0.843 1.0 0.791 Moral
142
+ ```
143
+
144
+ ---
145
+
146
+ ## API reference
147
+
148
+ ### `sadbert.get_stereotype_content(text, stacked=True)`
149
+
150
+ Module-level convenience function. Uses a shared, lazily-initialised `SADBERT` instance.
151
+
152
+ | Parameter | Type | Description |
153
+ |---|---|---|
154
+ | `text` | `str` or `list[str]` | Word(s) or phrase(s) to classify |
155
+ | `stacked` | `bool` | `True` (default): return one combined DataFrame with a `"text"` column. `False`: return a `dict[str, DataFrame]`. For single string input with `stacked=False`, returns the DataFrame directly. |
156
+
157
+ ---
158
+
159
+ ### `sadbert.SADBERT(device=None, batch_size=32, load_models=True)`
160
+
161
+ Instantiate your own SADBERT object for full control.
162
+
163
+ ```python
164
+ from sadbert import SADBERT
165
+
166
+ model = SADBERT(
167
+ device="cuda", # "cuda" | "mps" | "cpu" | None (auto-detect)
168
+ batch_size=64, # increase for faster throughput on GPU
169
+ load_models=True, # set False to defer model loading to first call
170
+ )
171
+
172
+ results = model.get_stereotype_content(["nurse", "engineer", "senator"])
173
+ ```
174
+
175
+ ---
176
+
177
+ ## Model architecture
178
+
179
+ SADBERT uses a three-stage ensemble:
180
+
181
+ ```
182
+ Input text
183
+
184
+
185
+ ┌─────────────────────────────────────────────────────┐
186
+ │ Stage 1 · Master model (SADBERT_master_model) │
187
+ │ Multi-label DistilBERT, 35 output classes │
188
+ │ Softmax probabilities compared against per-class │
189
+ │ Youden-J thresholds → candidate categories │
190
+ └─────────────────────────────────────────────────────┘
191
+ │ candidate categories
192
+
193
+ ┌─────────────────────────────────────────────────────┐
194
+ │ Stage 2 · Classifier heads (SADBERT_{cat}_classifier) │
195
+ │ One binary DistilBERT per category │
196
+ │ Veto gate — keeps only categories confirmed by │
197
+ │ both master model and dedicated head │
198
+ └─────────────────────────────────────────────────────┘
199
+ │ confirmed categories + probabilities
200
+
201
+ ┌─────────────────────────────────────────────────────┐
202
+ │ Stage 3 · Sentiment models (SADBERT_{cat}_sentiment) │
203
+ │ One 3-class DistilBERT per major category │
204
+ │ Predicts negative / neutral / positive valence │
205
+ └─────────────────────────────────────────────────────┘
206
+
207
+
208
+ Results DataFrame
209
+ ```
210
+
211
+ All models are hosted on HuggingFace at [huggingface.co/XanderD24](https://huggingface.co/XanderD24).
212
+
213
+ ---
214
+
215
+ ## Building from source
216
+
217
+ ```bash
218
+ git clone https://github.com/XanderD24/sadbert.git
219
+ cd sadbert
220
+
221
+ # Install in editable mode with dev dependencies
222
+ pip install -e ".[dev]"
223
+
224
+ # Run tests
225
+ pytest
226
+ ```
227
+
228
+ ### Adding ROC_dict.pkl before publishing
229
+
230
+ `ROC_dict.pkl` (per-category probability thresholds) is **required** but not included in the repository for size reasons. Copy it into the data directory before building the wheel:
231
+
232
+ ```bash
233
+ cp /path/to/ROC_dict.pkl sadbert/data/ROC_dict.pkl
234
+ ```
235
+
236
+ Then build:
237
+
238
+ ```bash
239
+ python -m build # produces dist/sadbert-0.1.0.tar.gz and .whl
240
+ twine check dist/* # sanity-check before uploading
241
+ twine upload dist/* # publish to PyPI
242
+ ```
243
+
244
+ ---
245
+
246
+ ## Citation
247
+
248
+ If you use SADBERT in research, please cite the underlying SADCAT framework:
249
+
250
+ ```
251
+ @misc{sadbert2025,
252
+ author = {Deanhardt, Xander},
253
+ title = {{SADBERT}: Stereotype-content Analysis with {DistilBERT}},
254
+ year = {2025},
255
+ url = {https://github.com/XanderD24/sadbert},
256
+ }
257
+ ```
258
+
259
+ ---
260
+ ## Citation of Original Work
261
+ All data used to fine-tune these models was taken from the SADCAT dictionary, published by Gandalf Nicolas, Xuecunzi Bai, and Susan T Fiske, and hosted on Github by Gandalf Nicolas. It was first published in this journal:
262
+
263
+ Nicolas, Gandalf, et al. “Comprehensive Stereotype Content Dictionaries Using a Semi‐Automated Method.” European Journal of Social Psychology, vol. 51, no. 1, Feb. 2021, pp. 178–196, https://doi.org/10.1002/ejsp.2724.
264
+
265
+ Github Link = {https://github.com/gandalfnicolas/SADCAT/tree/master}
266
+
267
+ OSF Repository = {https://osf.io/yx45f/}
268
+ ## License
269
+
270
+ MIT License — see [LICENSE](LICENSE) for details.
@@ -0,0 +1,199 @@
1
+ # SADBERT
2
+
3
+ **Stereotype-content Analysis with DistilBERT** — a Python package for identifying and characterising stereotype-relevant dimensions in natural language text, based on the SADCAT (Stereotype-Associated Dictionary of Content and Affect with Traits) framework.
4
+
5
+ ## What it does
6
+
7
+ Given a word or phrase, SADBERT predicts:
8
+
9
+ | Output column | Description |
10
+ |---|---|
11
+ | `category` | Stereotype-content dimension (e.g. *Warmth*, *Competence*) |
12
+ | `probability` | Confidence of the category classifier |
13
+ | `valence` | Direction within the category: `1` = positive, `0` = neutral, `−1` = negative |
14
+ | `valence probability` | Confidence of the valence prediction |
15
+ | `interpretation` | Human-readable label (e.g. *"Warm"*, *"Incompetent"*, *"Moral"*) |
16
+
17
+ ### Categories detected
18
+
19
+ **Major** (with valence): Warmth · Competence · Sociability · Morality · Ability · Assertiveness · Status · Beliefs · health · deviance · beauty · Politics · Religion
20
+
21
+ **Minor** (category only, no valence): emotions · Geography · Appearance · occupation · socialgroups · inhabitant · country · relative · insults · stem · humanities · art · Lacksknowledge · fortune · clothing · bodpart · bodprop · skin · bodcov · beliefsother · Other\_large · Other
22
+
23
+ ---
24
+
25
+ ## Installation
26
+
27
+ ```bash
28
+ pip install sadbert
29
+ ```
30
+
31
+ > **Note:** On first use, SADBERT automatically downloads ~2 GB of model weights from the HuggingFace Hub. These are cached locally in `~/.cache/huggingface/` and do not need to be re-downloaded on subsequent runs.
32
+
33
+ ### GPU / Apple Silicon
34
+
35
+ SADBERT auto-detects CUDA and Apple MPS. To use a specific device, instantiate `SADBERT` directly:
36
+
37
+ ```python
38
+ from sadbert import SADBERT
39
+ model = SADBERT(device="cuda") # or "mps", "cpu"
40
+ ```
41
+
42
+ ---
43
+
44
+ ## Quick Start
45
+
46
+ ```python
47
+ import sadbert
48
+
49
+ # Single word — returns a DataFrame
50
+ df = sadbert.get_stereotype_content("honest")
51
+ print(df)
52
+
53
+ # Multiple words — stacked into one DataFrame (default)
54
+ df = sadbert.get_stereotype_content(["honest", "lazy", "senator"])
55
+ print(df)
56
+
57
+ # Multiple words — one DataFrame per word
58
+ results = sadbert.get_stereotype_content(["honest", "lazy"], stacked=False)
59
+ print(results["honest"])
60
+ print(results["lazy"])
61
+ ```
62
+
63
+ ### Example output
64
+
65
+ ```
66
+ >>> sadbert.get_stereotype_content("honest")
67
+
68
+ category probability valence valence probability interpretation
69
+ 0 Warmth 0.912 1.0 0.876 Warm
70
+ 1 Morality 0.843 1.0 0.791 Moral
71
+ ```
72
+
73
+ ---
74
+
75
+ ## API reference
76
+
77
+ ### `sadbert.get_stereotype_content(text, stacked=True)`
78
+
79
+ Module-level convenience function. Uses a shared, lazily-initialised `SADBERT` instance.
80
+
81
+ | Parameter | Type | Description |
82
+ |---|---|---|
83
+ | `text` | `str` or `list[str]` | Word(s) or phrase(s) to classify |
84
+ | `stacked` | `bool` | `True` (default): return one combined DataFrame with a `"text"` column. `False`: return a `dict[str, DataFrame]`. For single string input with `stacked=False`, returns the DataFrame directly. |
85
+
86
+ ---
87
+
88
+ ### `sadbert.SADBERT(device=None, batch_size=32, load_models=True)`
89
+
90
+ Instantiate your own SADBERT object for full control.
91
+
92
+ ```python
93
+ from sadbert import SADBERT
94
+
95
+ model = SADBERT(
96
+ device="cuda", # "cuda" | "mps" | "cpu" | None (auto-detect)
97
+ batch_size=64, # increase for faster throughput on GPU
98
+ load_models=True, # set False to defer model loading to first call
99
+ )
100
+
101
+ results = model.get_stereotype_content(["nurse", "engineer", "senator"])
102
+ ```
103
+
104
+ ---
105
+
106
+ ## Model architecture
107
+
108
+ SADBERT uses a three-stage ensemble:
109
+
110
+ ```
111
+ Input text
112
+
113
+
114
+ ┌─────────────────────────────────────────────────────┐
115
+ │ Stage 1 · Master model (SADBERT_master_model) │
116
+ │ Multi-label DistilBERT, 35 output classes │
117
+ │ Softmax probabilities compared against per-class │
118
+ │ Youden-J thresholds → candidate categories │
119
+ └─────────────────────────────────────────────────────┘
120
+ │ candidate categories
121
+
122
+ ┌─────────────────────────────────────────────────────┐
123
+ │ Stage 2 · Classifier heads (SADBERT_{cat}_classifier) │
124
+ │ One binary DistilBERT per category │
125
+ │ Veto gate — keeps only categories confirmed by │
126
+ │ both master model and dedicated head │
127
+ └─────────────────────────────────────────────────────┘
128
+ │ confirmed categories + probabilities
129
+
130
+ ┌─────────────────────────────────────────────────────┐
131
+ │ Stage 3 · Sentiment models (SADBERT_{cat}_sentiment) │
132
+ │ One 3-class DistilBERT per major category │
133
+ │ Predicts negative / neutral / positive valence │
134
+ └─────────────────────────────────────────────────────┘
135
+
136
+
137
+ Results DataFrame
138
+ ```
139
+
140
+ All models are hosted on HuggingFace at [huggingface.co/XanderD24](https://huggingface.co/XanderD24).
141
+
142
+ ---
143
+
144
+ ## Building from source
145
+
146
+ ```bash
147
+ git clone https://github.com/XanderD24/sadbert.git
148
+ cd sadbert
149
+
150
+ # Install in editable mode with dev dependencies
151
+ pip install -e ".[dev]"
152
+
153
+ # Run tests
154
+ pytest
155
+ ```
156
+
157
+ ### Adding ROC_dict.pkl before publishing
158
+
159
+ `ROC_dict.pkl` (per-category probability thresholds) is **required** but not included in the repository for size reasons. Copy it into the data directory before building the wheel:
160
+
161
+ ```bash
162
+ cp /path/to/ROC_dict.pkl sadbert/data/ROC_dict.pkl
163
+ ```
164
+
165
+ Then build:
166
+
167
+ ```bash
168
+ python -m build # produces dist/sadbert-0.1.0.tar.gz and .whl
169
+ twine check dist/* # sanity-check before uploading
170
+ twine upload dist/* # publish to PyPI
171
+ ```
172
+
173
+ ---
174
+
175
+ ## Citation
176
+
177
+ If you use SADBERT in research, please cite the underlying SADCAT framework:
178
+
179
+ ```
180
+ @misc{sadbert2025,
181
+ author = {Deanhardt, Xander},
182
+ title = {{SADBERT}: Stereotype-content Analysis with {DistilBERT}},
183
+ year = {2025},
184
+ url = {https://github.com/XanderD24/sadbert},
185
+ }
186
+ ```
187
+
188
+ ---
189
+ ## Citation of Original Work
190
+ All data used to fine-tune these models was taken from the SADCAT dictionary, published by Gandalf Nicolas, Xuecunzi Bai, and Susan T Fiske, and hosted on Github by Gandalf Nicolas. It was first published in this journal:
191
+
192
+ Nicolas, Gandalf, et al. “Comprehensive Stereotype Content Dictionaries Using a Semi‐Automated Method.” European Journal of Social Psychology, vol. 51, no. 1, Feb. 2021, pp. 178–196, https://doi.org/10.1002/ejsp.2724.
193
+
194
+ Github Link = {https://github.com/gandalfnicolas/SADCAT/tree/master}
195
+
196
+ OSF Repository = {https://osf.io/yx45f/}
197
+ ## License
198
+
199
+ MIT License — see [LICENSE](LICENSE) for details.
@@ -0,0 +1,94 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ # ─────────────────────────────────────────────────────────────────────────────
6
+ # Project metadata
7
+ # ─────────────────────────────────────────────────────────────────────────────
8
+ [project]
9
+ name = "sadbert"
10
+ version = "0.1.0"
11
+ description = "Stereotype-content analysis with fine-tuned DistilBERT models (SADCAT framework)"
12
+ readme = "README.md"
13
+ license = { file = "LICENSE" }
14
+ requires-python = ">=3.9"
15
+
16
+ authors = [
17
+ { name = "Xander Deanhardt", email = "xanderd24@uchicago.edu" },
18
+ ]
19
+
20
+ keywords = [
21
+ "NLP", "stereotype", "social cognition", "SADCAT",
22
+ "DistilBERT", "transformers", "sentiment analysis",
23
+ ]
24
+
25
+ classifiers = [
26
+ "Development Status :: 3 - Alpha",
27
+ "Intended Audience :: Science/Research",
28
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
29
+ "Topic :: Text Processing :: Linguistic",
30
+ "License :: Other/Proprietary License",
31
+ "Programming Language :: Python :: 3",
32
+ "Programming Language :: Python :: 3.9",
33
+ "Programming Language :: Python :: 3.10",
34
+ "Programming Language :: Python :: 3.11",
35
+ "Programming Language :: Python :: 3.12",
36
+ "Operating System :: OS Independent",
37
+ ]
38
+
39
+ # ── Runtime dependencies ──────────────────────────────────────────────────────
40
+ dependencies = [
41
+ "torch>=2.0.0",
42
+ "transformers>=4.35.0",
43
+ "pandas>=2.0.0",
44
+ "numpy>=1.24.0",
45
+ "huggingface_hub>=0.19.0",
46
+ ]
47
+
48
+ # ── Optional extras ───────────────────────────────────────────────────────────
49
+ [project.optional-dependencies]
50
+ dev = [
51
+ "pytest>=7.0",
52
+ "pytest-cov>=4.0",
53
+ "black>=23.0",
54
+ "ruff>=0.1.0",
55
+ "build>=1.0",
56
+ "twine>=4.0",
57
+ ]
58
+
59
+ # ── URLs ──────────────────────────────────────────────────────────────────────
60
+ [project.urls]
61
+ Homepage = "https://github.com/XanderD24/sadbert"
62
+ Repository = "https://github.com/XanderD24/sadbert"
63
+ "Bug Tracker" = "https://github.com/XanderD24/sadbert/issues"
64
+ "HuggingFace" = "https://huggingface.co/XanderD24"
65
+
66
+ # ─────────────────────────────────────────────────────────────────────────────
67
+ # Package discovery & data files
68
+ # ─────────────────────────────────────────────────────────────────────────────
69
+ [tool.setuptools.packages.find]
70
+ where = ["."] # look in the repo root
71
+ include = ["sadbert*"] # include sadbert and sadbert.data
72
+
73
+ [tool.setuptools.package-data]
74
+ # Bundle the static .pkl files so they are included in the wheel
75
+ "sadbert.data" = ["*.pkl"]
76
+
77
+ # ─────────────────────────────────────────────────────────────────────────────
78
+ # Tool configuration
79
+ # ─────────────────────────────────────────────────────────────────────────────
80
+ [tool.black]
81
+ line-length = 88
82
+ target-version = ["py39", "py310", "py311"]
83
+
84
+ [tool.ruff]
85
+ line-length = 88
86
+ select = ["E", "F", "W", "I"]
87
+ ignore = ["E501"]
88
+
89
+ [tool.pytest.ini_options]
90
+ testpaths = ["tests"]
91
+ addopts = "-v --tb=short"
92
+ markers = [
93
+ "slow: marks tests as slow (deselect with '-m \"not slow\"')",
94
+ ]
@@ -0,0 +1,9 @@
1
+ # Runtime dependencies for SADBERT
2
+ # These are the same constraints declared in pyproject.toml [project.dependencies]
3
+ # and are provided here for convenience (e.g. pip install -r requirements.txt).
4
+
5
+ torch>=2.0.0
6
+ transformers>=4.35.0
7
+ pandas>=2.0.0
8
+ numpy>=1.24.0
9
+ huggingface_hub>=0.19.0
@@ -0,0 +1,40 @@
1
+ """
2
+ SADBERT — Stereotype-content Analysis with DistilBERT
3
+ ======================================================
4
+
5
+ A Python package for identifying stereotype content dimensions
6
+ (warmth, competence, morality, …) in natural language text and
7
+ classifying their valence (positive / neutral / negative).
8
+
9
+ Quick start
10
+ -----------
11
+ >>> import sadbert
12
+ >>> sadbert.get_stereotype_content("She is a warm and caring nurse.")
13
+
14
+ Or use the class directly for more control:
15
+
16
+ >>> from sadbert import SADBERT
17
+ >>> model = SADBERT(device="cuda", batch_size=64)
18
+ >>> model.get_stereotype_content(["honest", "lazy", "brilliant"])
19
+ """
20
+
21
+ from .core import (
22
+ SADBERT,
23
+ get_stereotype_content,
24
+ ALL_CATS,
25
+ MAJOR_CATS,
26
+ MINOR_CATS,
27
+ )
28
+
29
+ __version__ = "0.1.0"
30
+ __author__ = "Xander Deanhardt"
31
+ __email__ = "xanderd24@uchicago.edu"
32
+ __license__ = "MIT"
33
+
34
+ __all__ = [
35
+ "SADBERT",
36
+ "get_stereotype_content",
37
+ "ALL_CATS",
38
+ "MAJOR_CATS",
39
+ "MINOR_CATS",
40
+ ]