hfs-score 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hfs_score-0.1.0/LICENSE +21 -0
- hfs_score-0.1.0/PKG-INFO +202 -0
- hfs_score-0.1.0/README.md +181 -0
- hfs_score-0.1.0/pyproject.toml +42 -0
- hfs_score-0.1.0/setup.cfg +4 -0
- hfs_score-0.1.0/src/hfs_score/__init__.py +40 -0
- hfs_score-0.1.0/src/hfs_score/cli.py +60 -0
- hfs_score-0.1.0/src/hfs_score/core.py +144 -0
- hfs_score-0.1.0/src/hfs_score/criteria.py +147 -0
- hfs_score-0.1.0/src/hfs_score/rubric.py +42 -0
- hfs_score-0.1.0/src/hfs_score/validation.py +50 -0
- hfs_score-0.1.0/src/hfs_score.egg-info/PKG-INFO +202 -0
- hfs_score-0.1.0/src/hfs_score.egg-info/SOURCES.txt +17 -0
- hfs_score-0.1.0/src/hfs_score.egg-info/dependency_links.txt +1 -0
- hfs_score-0.1.0/src/hfs_score.egg-info/entry_points.txt +2 -0
- hfs_score-0.1.0/src/hfs_score.egg-info/requires.txt +5 -0
- hfs_score-0.1.0/src/hfs_score.egg-info/top_level.txt +1 -0
- hfs_score-0.1.0/tests/test_core.py +58 -0
- hfs_score-0.1.0/tests/test_criteria.py +54 -0
hfs_score-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 The HFS Authors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files, to deal in the Software
|
|
7
|
+
without restriction, including without limitation the rights to use, copy,
|
|
8
|
+
modify, merge, publish, distribute, sublicense, and/or sell copies of the
|
|
9
|
+
Software, and to permit persons to whom the Software is furnished to do so,
|
|
10
|
+
subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
hfs_score-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hfs-score
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Historical Fidelity Score for evaluating AI-generated cultural heritage reconstructions.
|
|
5
|
+
Author: Oussama Kaich, Zakaria El Fakir, Sanaa El Filali, Omar Zahour, El Habib Benlahmar
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: historical fidelity,cultural heritage,text-to-image,evaluation,trustworthy AI,diffusion models
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Image Processing
|
|
13
|
+
Requires-Python: >=3.9
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Provides-Extra: dev
|
|
17
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
18
|
+
Requires-Dist: build>=1.0; extra == "dev"
|
|
19
|
+
Requires-Dist: twine>=4.0; extra == "dev"
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
|
|
22
|
+
# hfs-score
|
|
23
|
+
|
|
24
|
+
**Historical Fidelity Score (HFS)** is a Python starter library for evaluating the historical fidelity of AI-generated cultural heritage images.
|
|
25
|
+
|
|
26
|
+
This project implements the methodological scoring framework proposed in:
|
|
27
|
+
|
|
28
|
+
> *A Mathematical Scoring Model for Historical Fidelity in Text-to-Image Reconstruction of Cultural Heritage Scenes*
|
|
29
|
+
|
|
30
|
+
The goal is not to train a new AI model.
|
|
31
|
+
The goal is to provide a reusable scoring toolkit for researchers, museums, heritage experts, and AI practitioners.
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## What HFS measures
|
|
36
|
+
|
|
37
|
+
HFS combines five positive dimensions:
|
|
38
|
+
|
|
39
|
+
| Symbol | Meaning |
|
|
40
|
+
|---|---|
|
|
41
|
+
| TIA | Text-Image Alignment |
|
|
42
|
+
| VSS | Visual Similarity Score |
|
|
43
|
+
| ACS | Architectural Consistency Score |
|
|
44
|
+
| CHP | Cultural and Historical Plausibility |
|
|
45
|
+
| EVS | Expert Validation Score |
|
|
46
|
+
|
|
47
|
+
And two penalties:
|
|
48
|
+
|
|
49
|
+
| Symbol | Meaning |
|
|
50
|
+
|---|---|
|
|
51
|
+
| UP | Uncertainty Penalty |
|
|
52
|
+
| BP | Bias and Hallucination Penalty |
|
|
53
|
+
|
|
54
|
+
The global score is:
|
|
55
|
+
|
|
56
|
+
```text
|
|
57
|
+
HFS(I) = 100 × clip(
|
|
58
|
+
w1*TIA + w2*VSS + w3*ACS + w4*CHP + w5*EVS
|
|
59
|
+
- w6*UP - w7*BP,
|
|
60
|
+
0, 1
|
|
61
|
+
)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Default illustrative weights:
|
|
65
|
+
|
|
66
|
+
```text
|
|
67
|
+
(w1, w2, w3, w4, w5, w6, w7)
|
|
68
|
+
=
|
|
69
|
+
(0.20, 0.18, 0.17, 0.20, 0.15, 0.05, 0.05)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
These weights are initial values and should be validated using expert elicitation and sensitivity analysis.
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## Installation for development
|
|
77
|
+
|
|
78
|
+
Clone or unzip this project, then run:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
cd hfs-score
|
|
82
|
+
python -m pip install -e .
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
For tests:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
python -m pip install -e ".[dev]"
|
|
89
|
+
pytest
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Basic usage
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from hfs_score import (
|
|
98
|
+
compute_hfs,
|
|
99
|
+
compute_tia,
|
|
100
|
+
compute_vss,
|
|
101
|
+
compute_acs,
|
|
102
|
+
compute_chp,
|
|
103
|
+
compute_evs,
|
|
104
|
+
compute_up,
|
|
105
|
+
compute_bp,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
TIA = compute_tia(clip_score=0.84, tifa_score=0.78)
|
|
109
|
+
VSS = compute_vss(ssim_score=0.72, lpips_distance=0.30)
|
|
110
|
+
ACS = compute_acs(layout=0.80, proportions=0.70, structure=0.65)
|
|
111
|
+
CHP = compute_chp(
|
|
112
|
+
temporal=0.90,
|
|
113
|
+
artifacts=0.75,
|
|
114
|
+
materials=0.80,
|
|
115
|
+
anachronism_absence=0.70
|
|
116
|
+
)
|
|
117
|
+
EVS = compute_evs([0.85, 0.90, 0.80])
|
|
118
|
+
UP = compute_up(seed_variance=0.20, expert_disagreement=0.15)
|
|
119
|
+
BP = compute_bp(hallucination_rate=0.10, bias_rate=0.15)
|
|
120
|
+
|
|
121
|
+
score = compute_hfs(
|
|
122
|
+
TIA=TIA,
|
|
123
|
+
VSS=VSS,
|
|
124
|
+
ACS=ACS,
|
|
125
|
+
CHP=CHP,
|
|
126
|
+
EVS=EVS,
|
|
127
|
+
UP=UP,
|
|
128
|
+
BP=BP,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
print(f"HFS = {score:.2f}/100")
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## CLI usage
|
|
137
|
+
|
|
138
|
+
After installation:
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
hfs-score --tia 0.80 --vss 0.70 --acs 0.60 --chp 0.75 --evs 0.90 --up 0.20 --bp 0.10
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
Expected output:
|
|
145
|
+
|
|
146
|
+
```text
|
|
147
|
+
HFS = 65.80/100
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## Project roadmap
|
|
153
|
+
|
|
154
|
+
### Version 0.1
|
|
155
|
+
- Core HFS formula
|
|
156
|
+
- Criteria formulas
|
|
157
|
+
- Basic CLI
|
|
158
|
+
- Tests
|
|
159
|
+
- Example usage
|
|
160
|
+
|
|
161
|
+
### Version 0.2
|
|
162
|
+
- Batch CSV evaluation
|
|
163
|
+
- Export JSON/CSV reports
|
|
164
|
+
- Weight sensitivity analysis
|
|
165
|
+
|
|
166
|
+
### Version 0.3
|
|
167
|
+
- Integration with CLIPScore, SSIM, LPIPS
|
|
168
|
+
- Expert rubric forms
|
|
169
|
+
|
|
170
|
+
### Version 1.0
|
|
171
|
+
- Full research-ready toolkit
|
|
172
|
+
- Benchmark comparison
|
|
173
|
+
- Automatic report generation
|
|
174
|
+
|
|
175
|
+
---
|
|
176
|
+
|
|
177
|
+
## Scientific note
|
|
178
|
+
|
|
179
|
+
This package implements a methodological framework.
|
|
180
|
+
It does not claim that an HFS score is an absolute historical truth.
|
|
181
|
+
The score should be interpreted as a transparent decision-support measure combining automatic metrics, expert rubrics, and explicit risk penalties.
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
---
|
|
186
|
+
|
|
187
|
+
## Publishing on PyPI
|
|
188
|
+
|
|
189
|
+
For detailed instructions, see:
|
|
190
|
+
|
|
191
|
+
- `PUBLISHING_PYPI_FR.md`
|
|
192
|
+
- `PUBLISHING_PYPI_EN.md`
|
|
193
|
+
|
|
194
|
+
Quick commands:
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
python -m pip install --upgrade build twine
|
|
198
|
+
python -m build
|
|
199
|
+
python -m twine check dist/*
|
|
200
|
+
python -m twine upload --repository testpypi dist/*
|
|
201
|
+
python -m twine upload dist/*
|
|
202
|
+
```
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
# hfs-score
|
|
2
|
+
|
|
3
|
+
**Historical Fidelity Score (HFS)** is a Python starter library for evaluating the historical fidelity of AI-generated cultural heritage images.
|
|
4
|
+
|
|
5
|
+
This project implements the methodological scoring framework proposed in:
|
|
6
|
+
|
|
7
|
+
> *A Mathematical Scoring Model for Historical Fidelity in Text-to-Image Reconstruction of Cultural Heritage Scenes*
|
|
8
|
+
|
|
9
|
+
The goal is not to train a new AI model.
|
|
10
|
+
The goal is to provide a reusable scoring toolkit for researchers, museums, heritage experts, and AI practitioners.
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## What HFS measures
|
|
15
|
+
|
|
16
|
+
HFS combines five positive dimensions:
|
|
17
|
+
|
|
18
|
+
| Symbol | Meaning |
|
|
19
|
+
|---|---|
|
|
20
|
+
| TIA | Text-Image Alignment |
|
|
21
|
+
| VSS | Visual Similarity Score |
|
|
22
|
+
| ACS | Architectural Consistency Score |
|
|
23
|
+
| CHP | Cultural and Historical Plausibility |
|
|
24
|
+
| EVS | Expert Validation Score |
|
|
25
|
+
|
|
26
|
+
And two penalties:
|
|
27
|
+
|
|
28
|
+
| Symbol | Meaning |
|
|
29
|
+
|---|---|
|
|
30
|
+
| UP | Uncertainty Penalty |
|
|
31
|
+
| BP | Bias and Hallucination Penalty |
|
|
32
|
+
|
|
33
|
+
The global score is:
|
|
34
|
+
|
|
35
|
+
```text
|
|
36
|
+
HFS(I) = 100 × clip(
|
|
37
|
+
w1*TIA + w2*VSS + w3*ACS + w4*CHP + w5*EVS
|
|
38
|
+
- w6*UP - w7*BP,
|
|
39
|
+
0, 1
|
|
40
|
+
)
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Default illustrative weights:
|
|
44
|
+
|
|
45
|
+
```text
|
|
46
|
+
(w1, w2, w3, w4, w5, w6, w7)
|
|
47
|
+
=
|
|
48
|
+
(0.20, 0.18, 0.17, 0.20, 0.15, 0.05, 0.05)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
These weights are initial values and should be validated using expert elicitation and sensitivity analysis.
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Installation for development
|
|
56
|
+
|
|
57
|
+
Clone or unzip this project, then run:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
cd hfs-score
|
|
61
|
+
python -m pip install -e .
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
For tests:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
python -m pip install -e ".[dev]"
|
|
68
|
+
pytest
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## Basic usage
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
from hfs_score import (
|
|
77
|
+
compute_hfs,
|
|
78
|
+
compute_tia,
|
|
79
|
+
compute_vss,
|
|
80
|
+
compute_acs,
|
|
81
|
+
compute_chp,
|
|
82
|
+
compute_evs,
|
|
83
|
+
compute_up,
|
|
84
|
+
compute_bp,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
TIA = compute_tia(clip_score=0.84, tifa_score=0.78)
|
|
88
|
+
VSS = compute_vss(ssim_score=0.72, lpips_distance=0.30)
|
|
89
|
+
ACS = compute_acs(layout=0.80, proportions=0.70, structure=0.65)
|
|
90
|
+
CHP = compute_chp(
|
|
91
|
+
temporal=0.90,
|
|
92
|
+
artifacts=0.75,
|
|
93
|
+
materials=0.80,
|
|
94
|
+
anachronism_absence=0.70
|
|
95
|
+
)
|
|
96
|
+
EVS = compute_evs([0.85, 0.90, 0.80])
|
|
97
|
+
UP = compute_up(seed_variance=0.20, expert_disagreement=0.15)
|
|
98
|
+
BP = compute_bp(hallucination_rate=0.10, bias_rate=0.15)
|
|
99
|
+
|
|
100
|
+
score = compute_hfs(
|
|
101
|
+
TIA=TIA,
|
|
102
|
+
VSS=VSS,
|
|
103
|
+
ACS=ACS,
|
|
104
|
+
CHP=CHP,
|
|
105
|
+
EVS=EVS,
|
|
106
|
+
UP=UP,
|
|
107
|
+
BP=BP,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
print(f"HFS = {score:.2f}/100")
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## CLI usage
|
|
116
|
+
|
|
117
|
+
After installation:
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
hfs-score --tia 0.80 --vss 0.70 --acs 0.60 --chp 0.75 --evs 0.90 --up 0.20 --bp 0.10
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Expected output:
|
|
124
|
+
|
|
125
|
+
```text
|
|
126
|
+
HFS = 65.80/100
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## Project roadmap
|
|
132
|
+
|
|
133
|
+
### Version 0.1
|
|
134
|
+
- Core HFS formula
|
|
135
|
+
- Criteria formulas
|
|
136
|
+
- Basic CLI
|
|
137
|
+
- Tests
|
|
138
|
+
- Example usage
|
|
139
|
+
|
|
140
|
+
### Version 0.2
|
|
141
|
+
- Batch CSV evaluation
|
|
142
|
+
- Export JSON/CSV reports
|
|
143
|
+
- Weight sensitivity analysis
|
|
144
|
+
|
|
145
|
+
### Version 0.3
|
|
146
|
+
- Integration with CLIPScore, SSIM, LPIPS
|
|
147
|
+
- Expert rubric forms
|
|
148
|
+
|
|
149
|
+
### Version 1.0
|
|
150
|
+
- Full research-ready toolkit
|
|
151
|
+
- Benchmark comparison
|
|
152
|
+
- Automatic report generation
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## Scientific note
|
|
157
|
+
|
|
158
|
+
This package implements a methodological framework.
|
|
159
|
+
It does not claim that an HFS score is an absolute historical truth.
|
|
160
|
+
The score should be interpreted as a transparent decision-support measure combining automatic metrics, expert rubrics, and explicit risk penalties.
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## Publishing on PyPI
|
|
167
|
+
|
|
168
|
+
For detailed instructions, see:
|
|
169
|
+
|
|
170
|
+
- `PUBLISHING_PYPI_FR.md`
|
|
171
|
+
- `PUBLISHING_PYPI_EN.md`
|
|
172
|
+
|
|
173
|
+
Quick commands:
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
python -m pip install --upgrade build twine
|
|
177
|
+
python -m build
|
|
178
|
+
python -m twine check dist/*
|
|
179
|
+
python -m twine upload --repository testpypi dist/*
|
|
180
|
+
python -m twine upload dist/*
|
|
181
|
+
```
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "hfs-score"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Historical Fidelity Score for evaluating AI-generated cultural heritage reconstructions."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Oussama Kaich" },
|
|
14
|
+
{ name = "Zakaria El Fakir" },
|
|
15
|
+
{ name = "Sanaa El Filali" },
|
|
16
|
+
{ name = "Omar Zahour" },
|
|
17
|
+
{ name = "El Habib Benlahmar" }
|
|
18
|
+
]
|
|
19
|
+
keywords = [
|
|
20
|
+
"historical fidelity",
|
|
21
|
+
"cultural heritage",
|
|
22
|
+
"text-to-image",
|
|
23
|
+
"evaluation",
|
|
24
|
+
"trustworthy AI",
|
|
25
|
+
"diffusion models"
|
|
26
|
+
]
|
|
27
|
+
classifiers = [
|
|
28
|
+
"Programming Language :: Python :: 3",
|
|
29
|
+
"License :: OSI Approved :: MIT License",
|
|
30
|
+
"Intended Audience :: Science/Research",
|
|
31
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
32
|
+
"Topic :: Scientific/Engineering :: Image Processing"
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.optional-dependencies]
|
|
36
|
+
dev = ["pytest>=7.0", "build>=1.0", "twine>=4.0"]
|
|
37
|
+
|
|
38
|
+
[project.scripts]
|
|
39
|
+
hfs-score = "hfs_score.cli:main"
|
|
40
|
+
|
|
41
|
+
[tool.setuptools.packages.find]
|
|
42
|
+
where = ["src"]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""
|
|
2
|
+
hfs_score
|
|
3
|
+
|
|
4
|
+
A lightweight Python library for computing the Historical Fidelity Score (HFS)
|
|
5
|
+
for AI-generated cultural heritage reconstructions.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .core import HFSWeights, HFSBreakdown, compute_hfs, compute_hfs_breakdown
|
|
9
|
+
from .criteria import (
|
|
10
|
+
normalize_benefit,
|
|
11
|
+
normalize_cost,
|
|
12
|
+
compute_tia,
|
|
13
|
+
compute_vss,
|
|
14
|
+
compute_acs,
|
|
15
|
+
compute_chp,
|
|
16
|
+
compute_evs,
|
|
17
|
+
compute_up,
|
|
18
|
+
compute_bp,
|
|
19
|
+
)
|
|
20
|
+
from .validation import validate_score, validate_weights
|
|
21
|
+
|
|
22
|
+
__version__ = "0.1.0"
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"HFSWeights",
|
|
26
|
+
"HFSBreakdown",
|
|
27
|
+
"compute_hfs",
|
|
28
|
+
"compute_hfs_breakdown",
|
|
29
|
+
"normalize_benefit",
|
|
30
|
+
"normalize_cost",
|
|
31
|
+
"compute_tia",
|
|
32
|
+
"compute_vss",
|
|
33
|
+
"compute_acs",
|
|
34
|
+
"compute_chp",
|
|
35
|
+
"compute_evs",
|
|
36
|
+
"compute_up",
|
|
37
|
+
"compute_bp",
|
|
38
|
+
"validate_score",
|
|
39
|
+
"validate_weights",
|
|
40
|
+
]
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Command-line interface for hfs-score."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
|
|
5
|
+
from .core import HFSWeights, compute_hfs_breakdown
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
9
|
+
parser = argparse.ArgumentParser(
|
|
10
|
+
description="Compute Historical Fidelity Score (HFS)."
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
parser.add_argument("--tia", type=float, required=True, help="Text-Image Alignment score [0,1].")
|
|
14
|
+
parser.add_argument("--vss", type=float, required=True, help="Visual Similarity Score [0,1].")
|
|
15
|
+
parser.add_argument("--acs", type=float, required=True, help="Architectural Consistency Score [0,1].")
|
|
16
|
+
parser.add_argument("--chp", type=float, required=True, help="Cultural and Historical Plausibility [0,1].")
|
|
17
|
+
parser.add_argument("--evs", type=float, required=True, help="Expert Validation Score [0,1].")
|
|
18
|
+
parser.add_argument("--up", type=float, required=True, help="Uncertainty Penalty [0,1].")
|
|
19
|
+
parser.add_argument("--bp", type=float, required=True, help="Bias and Hallucination Penalty [0,1].")
|
|
20
|
+
|
|
21
|
+
parser.add_argument("--w1", type=float, default=0.20, help="Weight for TIA.")
|
|
22
|
+
parser.add_argument("--w2", type=float, default=0.18, help="Weight for VSS.")
|
|
23
|
+
parser.add_argument("--w3", type=float, default=0.17, help="Weight for ACS.")
|
|
24
|
+
parser.add_argument("--w4", type=float, default=0.20, help="Weight for CHP.")
|
|
25
|
+
parser.add_argument("--w5", type=float, default=0.15, help="Weight for EVS.")
|
|
26
|
+
parser.add_argument("--w6", type=float, default=0.05, help="Weight for UP.")
|
|
27
|
+
parser.add_argument("--w7", type=float, default=0.05, help="Weight for BP.")
|
|
28
|
+
|
|
29
|
+
return parser
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def main() -> None:
|
|
33
|
+
parser = build_parser()
|
|
34
|
+
args = parser.parse_args()
|
|
35
|
+
|
|
36
|
+
weights = HFSWeights(
|
|
37
|
+
w1=args.w1,
|
|
38
|
+
w2=args.w2,
|
|
39
|
+
w3=args.w3,
|
|
40
|
+
w4=args.w4,
|
|
41
|
+
w5=args.w5,
|
|
42
|
+
w6=args.w6,
|
|
43
|
+
w7=args.w7,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
result = compute_hfs_breakdown(
|
|
47
|
+
TIA=args.tia,
|
|
48
|
+
VSS=args.vss,
|
|
49
|
+
ACS=args.acs,
|
|
50
|
+
CHP=args.chp,
|
|
51
|
+
EVS=args.evs,
|
|
52
|
+
UP=args.up,
|
|
53
|
+
BP=args.bp,
|
|
54
|
+
weights=weights,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
print(f"HFS = {result.hfs_0_100:.2f}/100")
|
|
58
|
+
print(f"Positive component = {result.positive_component:.4f}")
|
|
59
|
+
print(f"Penalty component = {result.penalty_component:.4f}")
|
|
60
|
+
print(f"Raw score = {result.raw_score_0_1:.4f}")
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""Core implementation of the Historical Fidelity Score."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from .validation import validate_score, validate_weights
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True)
|
|
9
|
+
class HFSWeights:
|
|
10
|
+
"""
|
|
11
|
+
Weights used in the Historical Fidelity Score.
|
|
12
|
+
|
|
13
|
+
These default values are illustrative and should be validated through
|
|
14
|
+
expert elicitation, AHP, and sensitivity analysis in future studies.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
w1: float = 0.20 # TIA: Text-Image Alignment
|
|
18
|
+
w2: float = 0.18 # VSS: Visual Similarity Score
|
|
19
|
+
w3: float = 0.17 # ACS: Architectural Consistency Score
|
|
20
|
+
w4: float = 0.20 # CHP: Cultural and Historical Plausibility
|
|
21
|
+
w5: float = 0.15 # EVS: Expert Validation Score
|
|
22
|
+
w6: float = 0.05 # UP: Uncertainty Penalty
|
|
23
|
+
w7: float = 0.05 # BP: Bias and Hallucination Penalty
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class HFSBreakdown:
|
|
28
|
+
"""
|
|
29
|
+
Detailed result of the HFS computation.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
raw_score_0_1: float
|
|
33
|
+
clipped_score_0_1: float
|
|
34
|
+
hfs_0_100: float
|
|
35
|
+
positive_component: float
|
|
36
|
+
penalty_component: float
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def clip(value: float, lower: float = 0.0, upper: float = 1.0) -> float:
|
|
40
|
+
"""Clip a value between lower and upper bounds."""
|
|
41
|
+
return max(lower, min(value, upper))
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def compute_hfs_breakdown(
|
|
45
|
+
TIA: float,
|
|
46
|
+
VSS: float,
|
|
47
|
+
ACS: float,
|
|
48
|
+
CHP: float,
|
|
49
|
+
EVS: float,
|
|
50
|
+
UP: float,
|
|
51
|
+
BP: float,
|
|
52
|
+
weights: HFSWeights = HFSWeights(),
|
|
53
|
+
) -> HFSBreakdown:
|
|
54
|
+
"""
|
|
55
|
+
Compute the Historical Fidelity Score and return a detailed breakdown.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
TIA:
|
|
60
|
+
Text-Image Alignment score in [0, 1].
|
|
61
|
+
VSS:
|
|
62
|
+
Visual Similarity Score in [0, 1].
|
|
63
|
+
ACS:
|
|
64
|
+
Architectural Consistency Score in [0, 1].
|
|
65
|
+
CHP:
|
|
66
|
+
Cultural and Historical Plausibility score in [0, 1].
|
|
67
|
+
EVS:
|
|
68
|
+
Expert Validation Score in [0, 1].
|
|
69
|
+
UP:
|
|
70
|
+
Uncertainty Penalty in [0, 1]. Higher value means higher risk.
|
|
71
|
+
BP:
|
|
72
|
+
Bias and Hallucination Penalty in [0, 1]. Higher value means higher risk.
|
|
73
|
+
weights:
|
|
74
|
+
HFSWeights object.
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
HFSBreakdown
|
|
79
|
+
Detailed score result.
|
|
80
|
+
"""
|
|
81
|
+
validate_weights(weights)
|
|
82
|
+
|
|
83
|
+
values = {
|
|
84
|
+
"TIA": TIA,
|
|
85
|
+
"VSS": VSS,
|
|
86
|
+
"ACS": ACS,
|
|
87
|
+
"CHP": CHP,
|
|
88
|
+
"EVS": EVS,
|
|
89
|
+
"UP": UP,
|
|
90
|
+
"BP": BP,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
for name, value in values.items():
|
|
94
|
+
validate_score(name, value)
|
|
95
|
+
|
|
96
|
+
positive_component = (
|
|
97
|
+
weights.w1 * TIA
|
|
98
|
+
+ weights.w2 * VSS
|
|
99
|
+
+ weights.w3 * ACS
|
|
100
|
+
+ weights.w4 * CHP
|
|
101
|
+
+ weights.w5 * EVS
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
penalty_component = weights.w6 * UP + weights.w7 * BP
|
|
105
|
+
raw_score = positive_component - penalty_component
|
|
106
|
+
clipped_score = clip(raw_score, 0.0, 1.0)
|
|
107
|
+
|
|
108
|
+
return HFSBreakdown(
|
|
109
|
+
raw_score_0_1=raw_score,
|
|
110
|
+
clipped_score_0_1=clipped_score,
|
|
111
|
+
hfs_0_100=100.0 * clipped_score,
|
|
112
|
+
positive_component=positive_component,
|
|
113
|
+
penalty_component=penalty_component,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def compute_hfs(
|
|
118
|
+
TIA: float,
|
|
119
|
+
VSS: float,
|
|
120
|
+
ACS: float,
|
|
121
|
+
CHP: float,
|
|
122
|
+
EVS: float,
|
|
123
|
+
UP: float,
|
|
124
|
+
BP: float,
|
|
125
|
+
weights: HFSWeights = HFSWeights(),
|
|
126
|
+
) -> float:
|
|
127
|
+
"""
|
|
128
|
+
Compute the Historical Fidelity Score.
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
float
|
|
133
|
+
HFS score between 0 and 100.
|
|
134
|
+
"""
|
|
135
|
+
return compute_hfs_breakdown(
|
|
136
|
+
TIA=TIA,
|
|
137
|
+
VSS=VSS,
|
|
138
|
+
ACS=ACS,
|
|
139
|
+
CHP=CHP,
|
|
140
|
+
EVS=EVS,
|
|
141
|
+
UP=UP,
|
|
142
|
+
BP=BP,
|
|
143
|
+
weights=weights,
|
|
144
|
+
).hfs_0_100
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Criterion-level formulas for the Historical Fidelity Score."""
|
|
2
|
+
|
|
3
|
+
from typing import Sequence
|
|
4
|
+
|
|
5
|
+
from .validation import validate_score
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def normalize_benefit(value: float, minimum: float, maximum: float, epsilon: float = 1e-8) -> float:
|
|
9
|
+
"""
|
|
10
|
+
Normalize a benefit metric where higher is better.
|
|
11
|
+
|
|
12
|
+
Formula:
|
|
13
|
+
(value - minimum) / (maximum - minimum + epsilon)
|
|
14
|
+
"""
|
|
15
|
+
normalized = (value - minimum) / (maximum - minimum + epsilon)
|
|
16
|
+
return max(0.0, min(1.0, normalized))
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def normalize_cost(value: float, minimum: float, maximum: float, epsilon: float = 1e-8) -> float:
|
|
20
|
+
"""
|
|
21
|
+
Normalize a cost metric where lower is better.
|
|
22
|
+
|
|
23
|
+
Formula:
|
|
24
|
+
1 - (value - minimum) / (maximum - minimum + epsilon)
|
|
25
|
+
"""
|
|
26
|
+
normalized = 1.0 - (value - minimum) / (maximum - minimum + epsilon)
|
|
27
|
+
return max(0.0, min(1.0, normalized))
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def compute_tia(clip_score: float, tifa_score: float, alpha1: float = 0.5) -> float:
|
|
31
|
+
"""
|
|
32
|
+
Compute Text-Image Alignment.
|
|
33
|
+
|
|
34
|
+
Formula:
|
|
35
|
+
TIA = alpha1 * CLIPScore + alpha2 * TIFA
|
|
36
|
+
|
|
37
|
+
All input scores must be normalized in [0, 1].
|
|
38
|
+
"""
|
|
39
|
+
validate_score("clip_score", clip_score)
|
|
40
|
+
validate_score("tifa_score", tifa_score)
|
|
41
|
+
|
|
42
|
+
if alpha1 < 0 or alpha1 > 1:
|
|
43
|
+
raise ValueError("alpha1 must be between 0 and 1.")
|
|
44
|
+
|
|
45
|
+
alpha2 = 1.0 - alpha1
|
|
46
|
+
return alpha1 * clip_score + alpha2 * tifa_score
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def compute_vss(ssim_score: float, lpips_distance: float, beta1: float = 0.5) -> float:
|
|
50
|
+
"""
|
|
51
|
+
Compute Visual Similarity Score.
|
|
52
|
+
|
|
53
|
+
Formula:
|
|
54
|
+
VSS = beta1 * SSIM + beta2 * (1 - LPIPS)
|
|
55
|
+
|
|
56
|
+
Both SSIM and LPIPS must be normalized in [0, 1].
|
|
57
|
+
For LPIPS, lower is better, therefore the formula uses (1 - LPIPS).
|
|
58
|
+
"""
|
|
59
|
+
validate_score("ssim_score", ssim_score)
|
|
60
|
+
validate_score("lpips_distance", lpips_distance)
|
|
61
|
+
|
|
62
|
+
if beta1 < 0 or beta1 > 1:
|
|
63
|
+
raise ValueError("beta1 must be between 0 and 1.")
|
|
64
|
+
|
|
65
|
+
beta2 = 1.0 - beta1
|
|
66
|
+
return beta1 * ssim_score + beta2 * (1.0 - lpips_distance)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def compute_acs(layout: float, proportions: float, structure: float) -> float:
|
|
70
|
+
"""
|
|
71
|
+
Compute Architectural Consistency Score.
|
|
72
|
+
|
|
73
|
+
Formula:
|
|
74
|
+
ACS = (layout + proportions + structure) / 3
|
|
75
|
+
|
|
76
|
+
The three values should be obtained from an expert rubric or
|
|
77
|
+
architecture-focused checklist and normalized in [0, 1].
|
|
78
|
+
"""
|
|
79
|
+
validate_score("layout", layout)
|
|
80
|
+
validate_score("proportions", proportions)
|
|
81
|
+
validate_score("structure", structure)
|
|
82
|
+
|
|
83
|
+
return (layout + proportions + structure) / 3.0
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def compute_chp(
|
|
87
|
+
temporal: float,
|
|
88
|
+
artifacts: float,
|
|
89
|
+
materials: float,
|
|
90
|
+
anachronism_absence: float,
|
|
91
|
+
) -> float:
|
|
92
|
+
"""
|
|
93
|
+
Compute Cultural and Historical Plausibility.
|
|
94
|
+
|
|
95
|
+
Formula:
|
|
96
|
+
CHP = (temporal + artifacts + materials + anachronism_absence) / 4
|
|
97
|
+
"""
|
|
98
|
+
validate_score("temporal", temporal)
|
|
99
|
+
validate_score("artifacts", artifacts)
|
|
100
|
+
validate_score("materials", materials)
|
|
101
|
+
validate_score("anachronism_absence", anachronism_absence)
|
|
102
|
+
|
|
103
|
+
return (temporal + artifacts + materials + anachronism_absence) / 4.0
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def compute_evs(expert_scores: Sequence[float]) -> float:
|
|
107
|
+
"""
|
|
108
|
+
Compute Expert Validation Score.
|
|
109
|
+
|
|
110
|
+
Formula:
|
|
111
|
+
EVS = average expert score
|
|
112
|
+
|
|
113
|
+
expert_scores must contain values normalized in [0, 1].
|
|
114
|
+
"""
|
|
115
|
+
if len(expert_scores) == 0:
|
|
116
|
+
raise ValueError("expert_scores cannot be empty.")
|
|
117
|
+
|
|
118
|
+
for index, score in enumerate(expert_scores):
|
|
119
|
+
validate_score(f"expert_scores[{index}]", score)
|
|
120
|
+
|
|
121
|
+
return sum(expert_scores) / len(expert_scores)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def compute_up(seed_variance: float, expert_disagreement: float) -> float:
|
|
125
|
+
"""
|
|
126
|
+
Compute Uncertainty Penalty.
|
|
127
|
+
|
|
128
|
+
Formula:
|
|
129
|
+
UP = (seed_variance + expert_disagreement) / 2
|
|
130
|
+
"""
|
|
131
|
+
validate_score("seed_variance", seed_variance)
|
|
132
|
+
validate_score("expert_disagreement", expert_disagreement)
|
|
133
|
+
|
|
134
|
+
return (seed_variance + expert_disagreement) / 2.0
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def compute_bp(hallucination_rate: float, bias_rate: float) -> float:
|
|
138
|
+
"""
|
|
139
|
+
Compute Bias and Hallucination Penalty.
|
|
140
|
+
|
|
141
|
+
Formula:
|
|
142
|
+
BP = (hallucination_rate + bias_rate) / 2
|
|
143
|
+
"""
|
|
144
|
+
validate_score("hallucination_rate", hallucination_rate)
|
|
145
|
+
validate_score("bias_rate", bias_rate)
|
|
146
|
+
|
|
147
|
+
return (hallucination_rate + bias_rate) / 2.0
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Simple helper utilities for rubric-based scoring."""
|
|
2
|
+
|
|
3
|
+
from .validation import validate_score
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
RUBRIC_LEVELS = {
|
|
7
|
+
0.00: "Incorrect or unsupported",
|
|
8
|
+
0.25: "Weak fidelity",
|
|
9
|
+
0.50: "Moderate fidelity",
|
|
10
|
+
0.75: "Good fidelity",
|
|
11
|
+
1.00: "Excellent fidelity",
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def rubric_score(level: float) -> float:
|
|
16
|
+
"""
|
|
17
|
+
Return a validated rubric score.
|
|
18
|
+
|
|
19
|
+
Recommended levels are:
|
|
20
|
+
0.00, 0.25, 0.50, 0.75, 1.00
|
|
21
|
+
|
|
22
|
+
Continuous values between 0 and 1 are also accepted.
|
|
23
|
+
"""
|
|
24
|
+
validate_score("level", level)
|
|
25
|
+
return float(level)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def describe_score(score: float) -> str:
|
|
29
|
+
"""
|
|
30
|
+
Provide a simple textual interpretation of a normalized score.
|
|
31
|
+
"""
|
|
32
|
+
validate_score("score", score)
|
|
33
|
+
|
|
34
|
+
if score < 0.20:
|
|
35
|
+
return "Very low fidelity"
|
|
36
|
+
if score < 0.40:
|
|
37
|
+
return "Low fidelity"
|
|
38
|
+
if score < 0.60:
|
|
39
|
+
return "Moderate fidelity"
|
|
40
|
+
if score < 0.80:
|
|
41
|
+
return "Good fidelity"
|
|
42
|
+
return "High fidelity"
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Validation utilities for hfs_score."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def validate_score(name: str, value: float) -> None:
|
|
5
|
+
"""
|
|
6
|
+
Validate that a score is numeric and between 0 and 1.
|
|
7
|
+
|
|
8
|
+
Parameters
|
|
9
|
+
----------
|
|
10
|
+
name:
|
|
11
|
+
Name of the score.
|
|
12
|
+
value:
|
|
13
|
+
Score value.
|
|
14
|
+
|
|
15
|
+
Raises
|
|
16
|
+
------
|
|
17
|
+
TypeError
|
|
18
|
+
If the value is not numeric.
|
|
19
|
+
ValueError
|
|
20
|
+
If the value is outside [0, 1].
|
|
21
|
+
"""
|
|
22
|
+
if not isinstance(value, (int, float)):
|
|
23
|
+
raise TypeError(f"{name} must be a number.")
|
|
24
|
+
if value < 0 or value > 1:
|
|
25
|
+
raise ValueError(f"{name} must be between 0 and 1. Got {value}.")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def validate_weights(weights) -> None:
|
|
29
|
+
"""
|
|
30
|
+
Validate HFS weights.
|
|
31
|
+
|
|
32
|
+
The sum of all weights should be approximately 1.0.
|
|
33
|
+
"""
|
|
34
|
+
values = [
|
|
35
|
+
weights.w1,
|
|
36
|
+
weights.w2,
|
|
37
|
+
weights.w3,
|
|
38
|
+
weights.w4,
|
|
39
|
+
weights.w5,
|
|
40
|
+
weights.w6,
|
|
41
|
+
weights.w7,
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
for index, value in enumerate(values, start=1):
|
|
45
|
+
if value < 0:
|
|
46
|
+
raise ValueError(f"w{index} must be non-negative.")
|
|
47
|
+
|
|
48
|
+
total = sum(values)
|
|
49
|
+
if abs(total - 1.0) > 1e-8:
|
|
50
|
+
raise ValueError(f"Weights must sum to 1. Current sum = {total}.")
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hfs-score
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Historical Fidelity Score for evaluating AI-generated cultural heritage reconstructions.
|
|
5
|
+
Author: Oussama Kaich, Zakaria El Fakir, Sanaa El Filali, Omar Zahour, El Habib Benlahmar
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: historical fidelity,cultural heritage,text-to-image,evaluation,trustworthy AI,diffusion models
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Image Processing
|
|
13
|
+
Requires-Python: >=3.9
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Provides-Extra: dev
|
|
17
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
18
|
+
Requires-Dist: build>=1.0; extra == "dev"
|
|
19
|
+
Requires-Dist: twine>=4.0; extra == "dev"
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
|
|
22
|
+
# hfs-score
|
|
23
|
+
|
|
24
|
+
**Historical Fidelity Score (HFS)** is a Python starter library for evaluating the historical fidelity of AI-generated cultural heritage images.
|
|
25
|
+
|
|
26
|
+
This project implements the methodological scoring framework proposed in:
|
|
27
|
+
|
|
28
|
+
> *A Mathematical Scoring Model for Historical Fidelity in Text-to-Image Reconstruction of Cultural Heritage Scenes*
|
|
29
|
+
|
|
30
|
+
The goal is not to train a new AI model.
|
|
31
|
+
The goal is to provide a reusable scoring toolkit for researchers, museums, heritage experts, and AI practitioners.
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## What HFS measures
|
|
36
|
+
|
|
37
|
+
HFS combines five positive dimensions:
|
|
38
|
+
|
|
39
|
+
| Symbol | Meaning |
|
|
40
|
+
|---|---|
|
|
41
|
+
| TIA | Text-Image Alignment |
|
|
42
|
+
| VSS | Visual Similarity Score |
|
|
43
|
+
| ACS | Architectural Consistency Score |
|
|
44
|
+
| CHP | Cultural and Historical Plausibility |
|
|
45
|
+
| EVS | Expert Validation Score |
|
|
46
|
+
|
|
47
|
+
And two penalties:
|
|
48
|
+
|
|
49
|
+
| Symbol | Meaning |
|
|
50
|
+
|---|---|
|
|
51
|
+
| UP | Uncertainty Penalty |
|
|
52
|
+
| BP | Bias and Hallucination Penalty |
|
|
53
|
+
|
|
54
|
+
The global score is:
|
|
55
|
+
|
|
56
|
+
```text
|
|
57
|
+
HFS(I) = 100 × clip(
|
|
58
|
+
w1*TIA + w2*VSS + w3*ACS + w4*CHP + w5*EVS
|
|
59
|
+
- w6*UP - w7*BP,
|
|
60
|
+
0, 1
|
|
61
|
+
)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Default illustrative weights:
|
|
65
|
+
|
|
66
|
+
```text
|
|
67
|
+
(w1, w2, w3, w4, w5, w6, w7)
|
|
68
|
+
=
|
|
69
|
+
(0.20, 0.18, 0.17, 0.20, 0.15, 0.05, 0.05)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
These weights are initial values and should be validated using expert elicitation and sensitivity analysis.
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## Installation for development
|
|
77
|
+
|
|
78
|
+
Clone or unzip this project, then run:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
cd hfs-score
|
|
82
|
+
python -m pip install -e .
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
For tests:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
python -m pip install -e ".[dev]"
|
|
89
|
+
pytest
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Basic usage
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from hfs_score import (
|
|
98
|
+
compute_hfs,
|
|
99
|
+
compute_tia,
|
|
100
|
+
compute_vss,
|
|
101
|
+
compute_acs,
|
|
102
|
+
compute_chp,
|
|
103
|
+
compute_evs,
|
|
104
|
+
compute_up,
|
|
105
|
+
compute_bp,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
TIA = compute_tia(clip_score=0.84, tifa_score=0.78)
|
|
109
|
+
VSS = compute_vss(ssim_score=0.72, lpips_distance=0.30)
|
|
110
|
+
ACS = compute_acs(layout=0.80, proportions=0.70, structure=0.65)
|
|
111
|
+
CHP = compute_chp(
|
|
112
|
+
temporal=0.90,
|
|
113
|
+
artifacts=0.75,
|
|
114
|
+
materials=0.80,
|
|
115
|
+
anachronism_absence=0.70
|
|
116
|
+
)
|
|
117
|
+
EVS = compute_evs([0.85, 0.90, 0.80])
|
|
118
|
+
UP = compute_up(seed_variance=0.20, expert_disagreement=0.15)
|
|
119
|
+
BP = compute_bp(hallucination_rate=0.10, bias_rate=0.15)
|
|
120
|
+
|
|
121
|
+
score = compute_hfs(
|
|
122
|
+
TIA=TIA,
|
|
123
|
+
VSS=VSS,
|
|
124
|
+
ACS=ACS,
|
|
125
|
+
CHP=CHP,
|
|
126
|
+
EVS=EVS,
|
|
127
|
+
UP=UP,
|
|
128
|
+
BP=BP,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
print(f"HFS = {score:.2f}/100")
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## CLI usage
|
|
137
|
+
|
|
138
|
+
After installation:
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
hfs-score --tia 0.80 --vss 0.70 --acs 0.60 --chp 0.75 --evs 0.90 --up 0.20 --bp 0.10
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
Expected output:
|
|
145
|
+
|
|
146
|
+
```text
|
|
147
|
+
HFS = 65.80/100
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## Project roadmap
|
|
153
|
+
|
|
154
|
+
### Version 0.1
|
|
155
|
+
- Core HFS formula
|
|
156
|
+
- Criteria formulas
|
|
157
|
+
- Basic CLI
|
|
158
|
+
- Tests
|
|
159
|
+
- Example usage
|
|
160
|
+
|
|
161
|
+
### Version 0.2
|
|
162
|
+
- Batch CSV evaluation
|
|
163
|
+
- Export JSON/CSV reports
|
|
164
|
+
- Weight sensitivity analysis
|
|
165
|
+
|
|
166
|
+
### Version 0.3
|
|
167
|
+
- Integration with CLIPScore, SSIM, LPIPS
|
|
168
|
+
- Expert rubric forms
|
|
169
|
+
|
|
170
|
+
### Version 1.0
|
|
171
|
+
- Full research-ready toolkit
|
|
172
|
+
- Benchmark comparison
|
|
173
|
+
- Automatic report generation
|
|
174
|
+
|
|
175
|
+
---
|
|
176
|
+
|
|
177
|
+
## Scientific note
|
|
178
|
+
|
|
179
|
+
This package implements a methodological framework.
|
|
180
|
+
It does not claim that an HFS score is an absolute historical truth.
|
|
181
|
+
The score should be interpreted as a transparent decision-support measure combining automatic metrics, expert rubrics, and explicit risk penalties.
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
---
|
|
186
|
+
|
|
187
|
+
## Publishing on PyPI
|
|
188
|
+
|
|
189
|
+
For detailed instructions, see:
|
|
190
|
+
|
|
191
|
+
- `PUBLISHING_PYPI_FR.md`
|
|
192
|
+
- `PUBLISHING_PYPI_EN.md`
|
|
193
|
+
|
|
194
|
+
Quick commands:
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
python -m pip install --upgrade build twine
|
|
198
|
+
python -m build
|
|
199
|
+
python -m twine check dist/*
|
|
200
|
+
python -m twine upload --repository testpypi dist/*
|
|
201
|
+
python -m twine upload dist/*
|
|
202
|
+
```
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/hfs_score/__init__.py
|
|
5
|
+
src/hfs_score/cli.py
|
|
6
|
+
src/hfs_score/core.py
|
|
7
|
+
src/hfs_score/criteria.py
|
|
8
|
+
src/hfs_score/rubric.py
|
|
9
|
+
src/hfs_score/validation.py
|
|
10
|
+
src/hfs_score.egg-info/PKG-INFO
|
|
11
|
+
src/hfs_score.egg-info/SOURCES.txt
|
|
12
|
+
src/hfs_score.egg-info/dependency_links.txt
|
|
13
|
+
src/hfs_score.egg-info/entry_points.txt
|
|
14
|
+
src/hfs_score.egg-info/requires.txt
|
|
15
|
+
src/hfs_score.egg-info/top_level.txt
|
|
16
|
+
tests/test_core.py
|
|
17
|
+
tests/test_criteria.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
hfs_score
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from hfs_score import HFSWeights, compute_hfs, compute_hfs_breakdown
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_compute_hfs_range():
|
|
7
|
+
score = compute_hfs(
|
|
8
|
+
TIA=0.8,
|
|
9
|
+
VSS=0.7,
|
|
10
|
+
ACS=0.6,
|
|
11
|
+
CHP=0.75,
|
|
12
|
+
EVS=0.9,
|
|
13
|
+
UP=0.2,
|
|
14
|
+
BP=0.1,
|
|
15
|
+
)
|
|
16
|
+
assert 0 <= score <= 100
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_compute_hfs_expected_value():
|
|
20
|
+
score = compute_hfs(
|
|
21
|
+
TIA=0.8,
|
|
22
|
+
VSS=0.7,
|
|
23
|
+
ACS=0.6,
|
|
24
|
+
CHP=0.75,
|
|
25
|
+
EVS=0.9,
|
|
26
|
+
UP=0.2,
|
|
27
|
+
BP=0.1,
|
|
28
|
+
)
|
|
29
|
+
assert score == pytest.approx(65.8)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_breakdown():
|
|
33
|
+
result = compute_hfs_breakdown(
|
|
34
|
+
TIA=0.8,
|
|
35
|
+
VSS=0.7,
|
|
36
|
+
ACS=0.6,
|
|
37
|
+
CHP=0.75,
|
|
38
|
+
EVS=0.9,
|
|
39
|
+
UP=0.2,
|
|
40
|
+
BP=0.1,
|
|
41
|
+
)
|
|
42
|
+
assert result.hfs_0_100 == pytest.approx(65.8)
|
|
43
|
+
assert result.positive_component > result.penalty_component
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_invalid_weights_sum():
|
|
47
|
+
bad_weights = HFSWeights(w1=0.5, w2=0.5, w3=0.5, w4=0, w5=0, w6=0, w7=0)
|
|
48
|
+
with pytest.raises(ValueError):
|
|
49
|
+
compute_hfs(
|
|
50
|
+
TIA=0.8,
|
|
51
|
+
VSS=0.7,
|
|
52
|
+
ACS=0.6,
|
|
53
|
+
CHP=0.75,
|
|
54
|
+
EVS=0.9,
|
|
55
|
+
UP=0.2,
|
|
56
|
+
BP=0.1,
|
|
57
|
+
weights=bad_weights,
|
|
58
|
+
)
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from hfs_score import (
|
|
4
|
+
compute_tia,
|
|
5
|
+
compute_vss,
|
|
6
|
+
compute_acs,
|
|
7
|
+
compute_chp,
|
|
8
|
+
compute_evs,
|
|
9
|
+
compute_up,
|
|
10
|
+
compute_bp,
|
|
11
|
+
normalize_benefit,
|
|
12
|
+
normalize_cost,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_compute_tia():
|
|
17
|
+
assert compute_tia(0.8, 0.6, alpha1=0.5) == pytest.approx(0.7)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_compute_vss():
|
|
21
|
+
assert compute_vss(0.7, 0.3, beta1=0.5) == pytest.approx(0.7)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_compute_acs():
|
|
25
|
+
assert compute_acs(0.8, 0.7, 0.6) == pytest.approx(0.7)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_compute_chp():
|
|
29
|
+
assert compute_chp(0.9, 0.7, 0.8, 0.6) == pytest.approx(0.75)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_compute_evs():
|
|
33
|
+
assert compute_evs([0.8, 0.7, 0.9]) == pytest.approx(0.8)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_compute_up():
|
|
37
|
+
assert compute_up(0.2, 0.3) == pytest.approx(0.25)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_compute_bp():
|
|
41
|
+
assert compute_bp(0.3, 0.2) == pytest.approx(0.25)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_normalize_benefit():
|
|
45
|
+
assert normalize_benefit(5, 0, 10) == pytest.approx(0.5)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_normalize_cost():
|
|
49
|
+
assert normalize_cost(5, 0, 10) == pytest.approx(0.5)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_invalid_score_raises():
|
|
53
|
+
with pytest.raises(ValueError):
|
|
54
|
+
compute_acs(1.2, 0.7, 0.6)
|