pubmlp 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pubmlp-0.1.0/CITATION.cff +20 -0
- pubmlp-0.1.0/LICENSE +21 -0
- pubmlp-0.1.0/MANIFEST.in +3 -0
- pubmlp-0.1.0/PKG-INFO +79 -0
- pubmlp-0.1.0/README.md +33 -0
- pubmlp-0.1.0/pubmlp/__init__.py +47 -0
- pubmlp-0.1.0/pubmlp/active_learning.py +78 -0
- pubmlp-0.1.0/pubmlp/audit.py +168 -0
- pubmlp-0.1.0/pubmlp/calibration.py +88 -0
- pubmlp-0.1.0/pubmlp/config.py +124 -0
- pubmlp-0.1.0/pubmlp/cv.py +160 -0
- pubmlp-0.1.0/pubmlp/metrics.py +126 -0
- pubmlp-0.1.0/pubmlp/model.py +99 -0
- pubmlp-0.1.0/pubmlp/plotting.py +38 -0
- pubmlp-0.1.0/pubmlp/predict.py +84 -0
- pubmlp-0.1.0/pubmlp/preprocess.py +291 -0
- pubmlp-0.1.0/pubmlp/py.typed +0 -0
- pubmlp-0.1.0/pubmlp/sample.py +174 -0
- pubmlp-0.1.0/pubmlp/screening.py +236 -0
- pubmlp-0.1.0/pubmlp/stopping.py +100 -0
- pubmlp-0.1.0/pubmlp/train.py +155 -0
- pubmlp-0.1.0/pubmlp/utils.py +48 -0
- pubmlp-0.1.0/pubmlp.egg-info/PKG-INFO +79 -0
- pubmlp-0.1.0/pubmlp.egg-info/SOURCES.txt +40 -0
- pubmlp-0.1.0/pubmlp.egg-info/dependency_links.txt +1 -0
- pubmlp-0.1.0/pubmlp.egg-info/requires.txt +23 -0
- pubmlp-0.1.0/pubmlp.egg-info/top_level.txt +1 -0
- pubmlp-0.1.0/pyproject.toml +80 -0
- pubmlp-0.1.0/setup.cfg +4 -0
- pubmlp-0.1.0/tests/test_active_learning.py +50 -0
- pubmlp-0.1.0/tests/test_audit.py +82 -0
- pubmlp-0.1.0/tests/test_calibration.py +51 -0
- pubmlp-0.1.0/tests/test_config.py +89 -0
- pubmlp-0.1.0/tests/test_cv.py +38 -0
- pubmlp-0.1.0/tests/test_metrics.py +73 -0
- pubmlp-0.1.0/tests/test_model.py +166 -0
- pubmlp-0.1.0/tests/test_predict.py +44 -0
- pubmlp-0.1.0/tests/test_preprocess.py +525 -0
- pubmlp-0.1.0/tests/test_sample.py +83 -0
- pubmlp-0.1.0/tests/test_screening.py +69 -0
- pubmlp-0.1.0/tests/test_stopping.py +92 -0
- pubmlp-0.1.0/tests/test_utils.py +72 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
cff-version: 1.2.0
|
|
2
|
+
message: "If you use this software, please cite it as below."
|
|
3
|
+
type: software
|
|
4
|
+
title: "pubmlp: Multimodal publication classifier with LLM and deep learning"
|
|
5
|
+
authors:
|
|
6
|
+
- family-names: Shin
|
|
7
|
+
given-names: Mikyung
|
|
8
|
+
email: shin.mikyung@gmail.com
|
|
9
|
+
orcid: "https://orcid.org/0000-0001-7907-9193"
|
|
10
|
+
version: "0.1.0"
|
|
11
|
+
date-released: "2026-02-22"
|
|
12
|
+
license: MIT
|
|
13
|
+
url: "https://github.com/mshin77/pubmlp"
|
|
14
|
+
repository-code: "https://github.com/mshin77/pubmlp"
|
|
15
|
+
keywords:
|
|
16
|
+
- systematic review
|
|
17
|
+
- screening
|
|
18
|
+
- transformer
|
|
19
|
+
- classification
|
|
20
|
+
- Python
|
pubmlp-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Mikyung Shin
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
pubmlp-0.1.0/MANIFEST.in
ADDED
pubmlp-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pubmlp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Multimodal publication classifier with LLM and deep learning
|
|
5
|
+
Author-email: Mikyung Shin <shin.mikyung@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://mshin77.github.io/pubmlp
|
|
8
|
+
Project-URL: Repository, https://github.com/mshin77/pubmlp
|
|
9
|
+
Project-URL: Documentation, https://mshin77.github.io/pubmlp
|
|
10
|
+
Project-URL: Issues, https://github.com/mshin77/pubmlp/issues
|
|
11
|
+
Keywords: systematic-review,screening,transformer,classification,bibliometrics
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Intended Audience :: Education
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering
|
|
21
|
+
Classifier: Topic :: Text Processing
|
|
22
|
+
Requires-Python: >=3.9
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: torch>=2.3.0
|
|
26
|
+
Requires-Dist: transformers>=4.30.0
|
|
27
|
+
Requires-Dist: sentence-transformers>=2.2.0
|
|
28
|
+
Requires-Dist: scikit-learn>=1.3.0
|
|
29
|
+
Requires-Dist: pandas>=2.0.0
|
|
30
|
+
Requires-Dist: numpy>=1.24.0
|
|
31
|
+
Requires-Dist: tqdm>=4.65.0
|
|
32
|
+
Requires-Dist: matplotlib>=3.7.0
|
|
33
|
+
Requires-Dist: seaborn>=0.12.0
|
|
34
|
+
Provides-Extra: screening
|
|
35
|
+
Requires-Dist: openpyxl>=3.1.0; extra == "screening"
|
|
36
|
+
Requires-Dist: nltk>=3.8.0; extra == "screening"
|
|
37
|
+
Provides-Extra: dev
|
|
38
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
39
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
40
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
41
|
+
Provides-Extra: docs
|
|
42
|
+
Requires-Dist: sphinx>=8.2; extra == "docs"
|
|
43
|
+
Requires-Dist: pydata-sphinx-theme>=0.16; extra == "docs"
|
|
44
|
+
Requires-Dist: myst-parser>=3.0; extra == "docs"
|
|
45
|
+
Dynamic: license-file
|
|
46
|
+
|
|
47
|
+
[](https://pypi.org/project/pubmlp/)
|
|
48
|
+
[](https://pypi.org/project/pubmlp/)
|
|
49
|
+
[](https://opensource.org/licenses/MIT)
|
|
50
|
+
|
|
51
|
+
Multimodal publication classifier with LLM and deep learning. Fuses transformer embeddings with tabular features through a multilayer perceptron (MLP) for human-in-the-loop screening workflows.
|
|
52
|
+
|
|
53
|
+
## Installation
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install pubmlp
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
With optional dependencies:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
pip install pubmlp[screening] # screening tools (openpyxl, nltk)
|
|
63
|
+
pip install pubmlp[dev] # development (pytest, ruff)
|
|
64
|
+
pip install pubmlp[docs] # documentation (sphinx)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
From GitHub:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install git+https://github.com/mshin77/pubmlp.git
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Getting Started
|
|
74
|
+
|
|
75
|
+
See [Quick Start](https://mshin77.github.io/pubmlp/getting-started.html) and [Screening Workflow](https://mshin77.github.io/pubmlp/vignettes/screening-workflow.html) for tutorials.
|
|
76
|
+
|
|
77
|
+
## Citation
|
|
78
|
+
|
|
79
|
+
- Shin, M. (2026). *pubmlp: Multimodal publication classifier with LLM and deep learning* (Python package version 0.1.0) [Computer software]. <https://github.com/mshin77/pubmlp>
|
pubmlp-0.1.0/README.md
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
[](https://pypi.org/project/pubmlp/)
|
|
2
|
+
[](https://pypi.org/project/pubmlp/)
|
|
3
|
+
[](https://opensource.org/licenses/MIT)
|
|
4
|
+
|
|
5
|
+
Multimodal publication classifier with LLM and deep learning. Fuses transformer embeddings with tabular features through a multilayer perceptron (MLP) for human-in-the-loop screening workflows.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install pubmlp
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
With optional dependencies:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install pubmlp[screening] # screening tools (openpyxl, nltk)
|
|
17
|
+
pip install pubmlp[dev] # development (pytest, ruff)
|
|
18
|
+
pip install pubmlp[docs] # documentation (sphinx)
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
From GitHub:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install git+https://github.com/mshin77/pubmlp.git
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Getting Started
|
|
28
|
+
|
|
29
|
+
See [Quick Start](https://mshin77.github.io/pubmlp/getting-started.html) and [Screening Workflow](https://mshin77.github.io/pubmlp/vignettes/screening-workflow.html) for tutorials.
|
|
30
|
+
|
|
31
|
+
## Citation
|
|
32
|
+
|
|
33
|
+
- Shin, M. (2026). *pubmlp: Multimodal publication classifier with LLM and deep learning* (Python package version 0.1.0) [Computer software]. <https://github.com/mshin77/pubmlp>
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PubMLP: Multimodal publication classifier with LLM and deep learning.
|
|
3
|
+
|
|
4
|
+
Fuses transformer embeddings with tabular features through a multilayer
|
|
5
|
+
perceptron (MLP) for human-in-the-loop screening workflows.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__version__ = "0.1.0"
|
|
9
|
+
__author__ = "Mikyung Shin"
|
|
10
|
+
__license__ = "MIT"
|
|
11
|
+
|
|
12
|
+
from .config import Config, default_config, fast_config, robust_config, hitl_config, domain_configs, sentence_transformer_models
|
|
13
|
+
from .model import PubMLP
|
|
14
|
+
from .train import train_evaluate_model, calculate_loss, calculate_accuracy, calculate_pos_weight
|
|
15
|
+
from .predict import predict_model, get_predictions_and_labels, flag_uncertain
|
|
16
|
+
from .metrics import calculate_evaluation_metrics
|
|
17
|
+
from .preprocess import preprocess_dataset, create_dataloader, split_data, CustomDataset, collate_fn, FittedTransforms
|
|
18
|
+
from .plotting import plot_results
|
|
19
|
+
from .utils import get_device, auto_batch_size, load_data, unpack_batch
|
|
20
|
+
from .cv import cross_validate
|
|
21
|
+
from .calibration import TemperatureScaling, collect_logits, calibrate_model
|
|
22
|
+
from .audit import AuditTrail, AuditEntry, interpret_kappa, summarize_human_decisions, generate_prisma_report
|
|
23
|
+
from .active_learning import ALState, select_query_batch, create_review_batch, compare_reviewers, merge_human_labels
|
|
24
|
+
from .stopping import StoppingState, should_stop, update_stopping_state, generate_stopping_report, calculate_wss, transition_phase, estimate_recall
|
|
25
|
+
from .screening import regex_screen, extract_window_evidence, extract_sentence_evidence, extract_all_evidence, format_evidence_display, calculate_semantic_scores
|
|
26
|
+
from .sample import create_stratified_sample, save_sample_excel, apply_conditional_formatting, count_pattern_matches, highlight_pattern_matches
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
'Config', 'default_config', 'fast_config', 'robust_config', 'hitl_config', 'domain_configs', 'sentence_transformer_models',
|
|
30
|
+
'PubMLP',
|
|
31
|
+
'train_evaluate_model', 'calculate_loss', 'calculate_accuracy', 'calculate_pos_weight',
|
|
32
|
+
'predict_model', 'get_predictions_and_labels', 'flag_uncertain',
|
|
33
|
+
'calculate_evaluation_metrics',
|
|
34
|
+
'preprocess_dataset', 'create_dataloader', 'split_data', 'CustomDataset', 'collate_fn', 'FittedTransforms',
|
|
35
|
+
'plot_results',
|
|
36
|
+
'get_device', 'auto_batch_size', 'load_data', 'unpack_batch',
|
|
37
|
+
'cross_validate',
|
|
38
|
+
'TemperatureScaling', 'collect_logits', 'calibrate_model',
|
|
39
|
+
'AuditTrail', 'AuditEntry', 'interpret_kappa', 'summarize_human_decisions', 'generate_prisma_report',
|
|
40
|
+
'ALState', 'select_query_batch', 'create_review_batch', 'compare_reviewers', 'merge_human_labels',
|
|
41
|
+
'StoppingState', 'should_stop', 'update_stopping_state', 'generate_stopping_report', 'calculate_wss',
|
|
42
|
+
'transition_phase', 'estimate_recall',
|
|
43
|
+
'regex_screen', 'extract_window_evidence', 'extract_sentence_evidence', 'extract_all_evidence',
|
|
44
|
+
'format_evidence_display', 'calculate_semantic_scores',
|
|
45
|
+
'create_stratified_sample', 'save_sample_excel', 'apply_conditional_formatting',
|
|
46
|
+
'count_pattern_matches', 'highlight_pattern_matches',
|
|
47
|
+
]
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from dataclasses import dataclass, field, asdict
|
|
3
|
+
from sklearn.metrics import cohen_kappa_score
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class ALState:
|
|
8
|
+
labeled_indices: list = field(default_factory=list)
|
|
9
|
+
unlabeled_indices: list = field(default_factory=list)
|
|
10
|
+
iteration: int = 0
|
|
11
|
+
history: list = field(default_factory=list)
|
|
12
|
+
|
|
13
|
+
def to_dict(self):
|
|
14
|
+
return asdict(self)
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
def from_dict(cls, d):
|
|
18
|
+
return cls(**d)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def rank_by_uncertainty(probabilities):
|
|
22
|
+
"""Most uncertain (closest to 0.5) first."""
|
|
23
|
+
probs = np.asarray(probabilities)
|
|
24
|
+
return np.argsort(np.abs(probs - 0.5))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def rank_by_random(n, seed=42):
|
|
28
|
+
rng = np.random.RandomState(seed)
|
|
29
|
+
indices = np.arange(n)
|
|
30
|
+
rng.shuffle(indices)
|
|
31
|
+
return indices
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def rank_by_max_relevance(probabilities):
|
|
35
|
+
"""Highest probability (most likely relevant) first."""
|
|
36
|
+
return np.argsort(-np.asarray(probabilities))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def select_query_batch(probabilities, strategy='uncertainty', batch_size=20, seed=42):
|
|
40
|
+
probs = np.asarray(probabilities)
|
|
41
|
+
ranked = {
|
|
42
|
+
'uncertainty': lambda: rank_by_uncertainty(probs),
|
|
43
|
+
'random': lambda: rank_by_random(len(probs), seed),
|
|
44
|
+
'max_relevance': lambda: rank_by_max_relevance(probs),
|
|
45
|
+
}[strategy]()
|
|
46
|
+
return ranked[:batch_size]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def create_review_batch(df, indices, probabilities):
|
|
50
|
+
"""Subset df for human review, adding model probability and prediction columns."""
|
|
51
|
+
probs = np.asarray(probabilities)
|
|
52
|
+
batch = df.iloc[indices].copy()
|
|
53
|
+
batch['model_probability'] = probs[indices]
|
|
54
|
+
batch['model_prediction'] = (probs[indices] >= 0.5).astype(int)
|
|
55
|
+
return batch
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def merge_human_labels(df, review_batch, label_col='human_label'):
|
|
59
|
+
"""Merge human decisions from review batch back into main df."""
|
|
60
|
+
df = df.copy()
|
|
61
|
+
if label_col not in df.columns:
|
|
62
|
+
df[label_col] = np.nan
|
|
63
|
+
df.loc[review_batch.index, label_col] = review_batch[label_col]
|
|
64
|
+
return df
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def compare_reviewers(model_predictions, human_labels):
|
|
68
|
+
"""Compare model vs human reviewer decisions."""
|
|
69
|
+
model_preds = np.asarray(model_predictions)
|
|
70
|
+
human = np.asarray(human_labels)
|
|
71
|
+
agreed = np.sum(model_preds == human)
|
|
72
|
+
kappa = cohen_kappa_score(model_preds, human) if len(set(model_preds) | set(human)) > 1 else 1.0
|
|
73
|
+
disagreement_indices = np.where(model_preds != human)[0]
|
|
74
|
+
return {
|
|
75
|
+
'agreement_rate': agreed / len(human),
|
|
76
|
+
'kappa': kappa,
|
|
77
|
+
'disagreement_indices': disagreement_indices.tolist(),
|
|
78
|
+
}
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
from dataclasses import dataclass, field, asdict
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from .active_learning import compare_reviewers
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class AuditEntry:
|
|
11
|
+
record_id: str
|
|
12
|
+
model_prediction: int
|
|
13
|
+
model_probability: float
|
|
14
|
+
human_label: int = None
|
|
15
|
+
timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
|
|
16
|
+
reviewer_id: str = None
|
|
17
|
+
phase: str = 'screening'
|
|
18
|
+
notes: str = ''
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class AuditTrail:
|
|
22
|
+
def __init__(self):
|
|
23
|
+
self.entries = []
|
|
24
|
+
|
|
25
|
+
def log_decision(self, record_id, prediction, probability, phase='screening',
|
|
26
|
+
reviewer_id=None):
|
|
27
|
+
self.entries.append(AuditEntry(
|
|
28
|
+
record_id=str(record_id),
|
|
29
|
+
model_prediction=int(prediction),
|
|
30
|
+
model_probability=float(probability),
|
|
31
|
+
phase=phase,
|
|
32
|
+
reviewer_id=reviewer_id,
|
|
33
|
+
))
|
|
34
|
+
|
|
35
|
+
def log_batch(self, record_ids, predictions, probabilities, phase='screening',
|
|
36
|
+
reviewer_id=None):
|
|
37
|
+
for rid, pred, prob in zip(record_ids, predictions, probabilities):
|
|
38
|
+
self.log_decision(rid, pred, prob, phase, reviewer_id)
|
|
39
|
+
|
|
40
|
+
def update_human_label(self, record_id, human_label, reviewer_id=None, notes=''):
|
|
41
|
+
for entry in self.entries:
|
|
42
|
+
if entry.record_id == str(record_id):
|
|
43
|
+
entry.human_label = int(human_label)
|
|
44
|
+
entry.reviewer_id = reviewer_id
|
|
45
|
+
entry.notes = notes
|
|
46
|
+
entry.timestamp = datetime.now(timezone.utc).isoformat()
|
|
47
|
+
return
|
|
48
|
+
raise KeyError(f"Record {record_id} not found in audit trail")
|
|
49
|
+
|
|
50
|
+
def get_disagreements(self):
|
|
51
|
+
return [e for e in self.entries
|
|
52
|
+
if e.human_label is not None and e.model_prediction != e.human_label]
|
|
53
|
+
|
|
54
|
+
def calculate_agreement(self):
|
|
55
|
+
reviewed = [e for e in self.entries if e.human_label is not None]
|
|
56
|
+
if not reviewed:
|
|
57
|
+
return {'total': 0, 'agreed': 0, 'disagreed': 0, 'kappa': None}
|
|
58
|
+
model_preds = [e.model_prediction for e in reviewed]
|
|
59
|
+
human_labels = [e.human_label for e in reviewed]
|
|
60
|
+
result = compare_reviewers(model_preds, human_labels)
|
|
61
|
+
agreed = int(result['agreement_rate'] * len(reviewed))
|
|
62
|
+
return {
|
|
63
|
+
'total': len(reviewed),
|
|
64
|
+
'agreed': agreed,
|
|
65
|
+
'disagreed': len(reviewed) - agreed,
|
|
66
|
+
'kappa': result['kappa'],
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
def to_dataframe(self):
|
|
70
|
+
return pd.DataFrame([asdict(e) for e in self.entries])
|
|
71
|
+
|
|
72
|
+
def export_csv(self, path):
|
|
73
|
+
self.to_dataframe().to_csv(path, index=False)
|
|
74
|
+
|
|
75
|
+
def to_dict(self):
|
|
76
|
+
return {'entries': [asdict(e) for e in self.entries]}
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def from_dict(cls, d):
|
|
80
|
+
obj = cls()
|
|
81
|
+
obj.entries = [AuditEntry(**e) for e in d['entries']]
|
|
82
|
+
return obj
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def interpret_kappa(kappa):
|
|
86
|
+
if kappa < 0:
|
|
87
|
+
return 'poor'
|
|
88
|
+
if kappa <= 0.20:
|
|
89
|
+
return 'slight'
|
|
90
|
+
if kappa <= 0.40:
|
|
91
|
+
return 'fair'
|
|
92
|
+
if kappa <= 0.60:
|
|
93
|
+
return 'moderate'
|
|
94
|
+
if kappa <= 0.80:
|
|
95
|
+
return 'substantial'
|
|
96
|
+
return 'almost perfect'
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def summarize_human_decisions(audit_trail, uncertainty_low=0.3, uncertainty_high=0.7):
|
|
100
|
+
"""Summarize human reviewer decisions against model predictions."""
|
|
101
|
+
entries = audit_trail.entries
|
|
102
|
+
reviewed = [e for e in entries if e.human_label is not None]
|
|
103
|
+
overrides = [e for e in reviewed if e.model_prediction != e.human_label]
|
|
104
|
+
return {
|
|
105
|
+
'total': len(entries),
|
|
106
|
+
'included': sum(1 for e in entries if e.model_prediction == 1),
|
|
107
|
+
'excluded': sum(1 for e in entries if e.model_prediction == 0),
|
|
108
|
+
'uncertain': sum(1 for e in entries if uncertainty_low < e.model_probability < uncertainty_high),
|
|
109
|
+
'human_reviewed': len(reviewed),
|
|
110
|
+
'human_overrides': len(overrides),
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# PRISMA 2020 Item 8 + trAIce M3/M8/M9/R1/R2 (screening-scoped)
|
|
115
|
+
prisma_screening_items = {
|
|
116
|
+
'item_8': 'Selection process: automation tools used',
|
|
117
|
+
'M3': 'Purpose/Stage: AI applied at title/abstract screening',
|
|
118
|
+
'M8': 'Human-AI Interaction: human reviewer validation process',
|
|
119
|
+
'M9': 'Performance Evaluation: screening model metrics',
|
|
120
|
+
'R1': 'Study Selection: AI vs human exclusion counts in flow',
|
|
121
|
+
'R2': 'Performance Metrics: AI screening performance results',
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def generate_prisma_report(audit_trail, config=None):
|
|
126
|
+
"""Populate PRISMA Item 8 + screening-relevant trAIce items from audit data."""
|
|
127
|
+
uncertainty_low = getattr(config, 'uncertainty_low', 0.3) if config else 0.3
|
|
128
|
+
uncertainty_high = getattr(config, 'uncertainty_high', 0.7) if config else 0.7
|
|
129
|
+
summary = summarize_human_decisions(audit_trail, uncertainty_low, uncertainty_high)
|
|
130
|
+
agreement = audit_trail.calculate_agreement()
|
|
131
|
+
|
|
132
|
+
report = {
|
|
133
|
+
'item_8': {
|
|
134
|
+
'description': prisma_screening_items['item_8'],
|
|
135
|
+
'tool': 'pubmlp',
|
|
136
|
+
'stage': 'title/abstract screening',
|
|
137
|
+
'model': getattr(config, 'embedding_model', None) if config else None,
|
|
138
|
+
'calibration': getattr(config, 'calibration_method', None) if config else None,
|
|
139
|
+
},
|
|
140
|
+
'M3': {
|
|
141
|
+
'description': prisma_screening_items['M3'],
|
|
142
|
+
'stage': 'title/abstract screening',
|
|
143
|
+
'strategy': getattr(config, 'al_query_strategy', None) if config else None,
|
|
144
|
+
},
|
|
145
|
+
'M8': {
|
|
146
|
+
'description': prisma_screening_items['M8'],
|
|
147
|
+
'human_reviewed': summary['human_reviewed'],
|
|
148
|
+
'human_overrides': summary['human_overrides'],
|
|
149
|
+
'agreement_kappa': agreement['kappa'],
|
|
150
|
+
'kappa_interpretation': interpret_kappa(agreement['kappa']) if agreement['kappa'] is not None else None,
|
|
151
|
+
},
|
|
152
|
+
'M9': {
|
|
153
|
+
'description': prisma_screening_items['M9'],
|
|
154
|
+
'total_screened': summary['total'],
|
|
155
|
+
'uncertain_flagged': summary['uncertain'],
|
|
156
|
+
},
|
|
157
|
+
'R1': {
|
|
158
|
+
'description': prisma_screening_items['R1'],
|
|
159
|
+
'model_included': summary['included'],
|
|
160
|
+
'model_excluded': summary['excluded'],
|
|
161
|
+
'human_overrides': summary['human_overrides'],
|
|
162
|
+
},
|
|
163
|
+
'R2': {
|
|
164
|
+
'description': prisma_screening_items['R2'],
|
|
165
|
+
'agreement': agreement,
|
|
166
|
+
},
|
|
167
|
+
}
|
|
168
|
+
return report
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.nn as nn
|
|
3
|
+
import torch.optim as optim
|
|
4
|
+
|
|
5
|
+
from .utils import unpack_batch
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def collect_logits(model, dataloader, device):
|
|
9
|
+
"""Run model in eval mode and collect raw logits + labels."""
|
|
10
|
+
model.eval()
|
|
11
|
+
all_logits, all_labels = [], []
|
|
12
|
+
with torch.no_grad():
|
|
13
|
+
for batch in dataloader:
|
|
14
|
+
input_ids, attention_mask, categorical_tensor, numeric_tensor, labels, texts = unpack_batch(batch, device)
|
|
15
|
+
logits = model(input_ids, attention_mask, categorical_tensor, numeric_tensor, texts)
|
|
16
|
+
all_logits.append(logits)
|
|
17
|
+
all_labels.append(labels)
|
|
18
|
+
return torch.cat(all_logits), torch.cat(all_labels)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TemperatureScaling:
|
|
22
|
+
"""Per-label temperature scaling. Works for single and multi-label."""
|
|
23
|
+
|
|
24
|
+
def __init__(self):
|
|
25
|
+
self.temperature = None
|
|
26
|
+
|
|
27
|
+
def fit(self, logits, labels, lr=0.01, max_iter=50):
|
|
28
|
+
"""Optimize temperature via NLL loss using LBFGS."""
|
|
29
|
+
num_labels = logits.shape[-1] if logits.dim() > 1 else 1
|
|
30
|
+
|
|
31
|
+
if num_labels == 1:
|
|
32
|
+
logits_flat = logits.view(-1)
|
|
33
|
+
labels_flat = labels.view(-1).float()
|
|
34
|
+
log_temp = nn.Parameter(torch.zeros(1, device=logits.device))
|
|
35
|
+
criterion = nn.BCEWithLogitsLoss()
|
|
36
|
+
optimizer = optim.LBFGS([log_temp], lr=lr, max_iter=max_iter)
|
|
37
|
+
|
|
38
|
+
def closure():
|
|
39
|
+
optimizer.zero_grad()
|
|
40
|
+
loss = criterion(logits_flat / log_temp.exp(), labels_flat)
|
|
41
|
+
loss.backward()
|
|
42
|
+
return loss
|
|
43
|
+
|
|
44
|
+
optimizer.step(closure)
|
|
45
|
+
self.temperature = log_temp.exp().item()
|
|
46
|
+
else:
|
|
47
|
+
# Per-label temperature
|
|
48
|
+
temps = []
|
|
49
|
+
criterion = nn.BCEWithLogitsLoss()
|
|
50
|
+
for i in range(num_labels):
|
|
51
|
+
log_temp = nn.Parameter(torch.zeros(1, device=logits.device))
|
|
52
|
+
optimizer = optim.LBFGS([log_temp], lr=lr, max_iter=max_iter)
|
|
53
|
+
col_logits = logits[:, i]
|
|
54
|
+
col_labels = labels[:, i].float()
|
|
55
|
+
|
|
56
|
+
def closure(lt=log_temp, l=col_logits, lb=col_labels, opt=optimizer):
|
|
57
|
+
opt.zero_grad()
|
|
58
|
+
loss = criterion(l / lt.exp(), lb)
|
|
59
|
+
loss.backward()
|
|
60
|
+
return loss
|
|
61
|
+
|
|
62
|
+
optimizer.step(closure)
|
|
63
|
+
temps.append(log_temp.exp().item())
|
|
64
|
+
self.temperature = temps
|
|
65
|
+
return self
|
|
66
|
+
|
|
67
|
+
def transform(self, logits):
|
|
68
|
+
if isinstance(self.temperature, list):
|
|
69
|
+
temp_tensor = torch.tensor(self.temperature, device=logits.device).unsqueeze(0)
|
|
70
|
+
return logits / temp_tensor
|
|
71
|
+
return logits / self.temperature
|
|
72
|
+
|
|
73
|
+
def to_dict(self):
|
|
74
|
+
return {'temperature': self.temperature}
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def from_dict(cls, d):
|
|
78
|
+
obj = cls()
|
|
79
|
+
obj.temperature = d['temperature']
|
|
80
|
+
return obj
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def calibrate_model(model, dataloader, device):
|
|
84
|
+
"""Collect logits and fit temperature scaling."""
|
|
85
|
+
logits, labels = collect_logits(model, dataloader, device)
|
|
86
|
+
scaler = TemperatureScaling()
|
|
87
|
+
scaler.fit(logits, labels)
|
|
88
|
+
return scaler
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import random
|
|
2
|
+
import numpy as np
|
|
3
|
+
import torch
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# Models that use the SentenceTransformer encoder (frozen, no fine-tuning)
|
|
7
|
+
sentence_transformer_models = {'sentence-transformer', 'bge-small'}
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Config:
|
|
11
|
+
"""Configuration for PubMLP training and inference."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, **kwargs):
|
|
14
|
+
# Random seed
|
|
15
|
+
self.random_seed = kwargs.get('random_seed', 42)
|
|
16
|
+
|
|
17
|
+
# Training hyperparameters
|
|
18
|
+
self.batch_size = kwargs.get('batch_size', 16)
|
|
19
|
+
self.eval_batch_size = kwargs.get('eval_batch_size', 32)
|
|
20
|
+
self.epochs = kwargs.get('epochs', 10)
|
|
21
|
+
self.learning_rate = kwargs.get('learning_rate', 2e-5)
|
|
22
|
+
self.early_stopping_patience = kwargs.get('early_stopping_patience', 3)
|
|
23
|
+
|
|
24
|
+
# Model architecture
|
|
25
|
+
self.dropout_rate = kwargs.get('dropout_rate', 0.2)
|
|
26
|
+
self.mlp_hidden_size = kwargs.get('mlp_hidden_size', 64)
|
|
27
|
+
self.n_hidden_layers = kwargs.get('n_hidden_layers', 1)
|
|
28
|
+
self.max_length = kwargs.get('max_length', 512)
|
|
29
|
+
|
|
30
|
+
# Optimization
|
|
31
|
+
self.gradient_clip_norm = kwargs.get('gradient_clip_norm', 1.0)
|
|
32
|
+
self.warmup_steps = kwargs.get('warmup_steps', 0)
|
|
33
|
+
|
|
34
|
+
# Embedding model
|
|
35
|
+
self.embedding_model = kwargs.get('embedding_model', 'bert')
|
|
36
|
+
self.model_name = kwargs.get('model_name', None)
|
|
37
|
+
self.pooling_strategy = kwargs.get('pooling_strategy', 'auto')
|
|
38
|
+
|
|
39
|
+
# Uncertainty thresholds
|
|
40
|
+
self.uncertainty_low = kwargs.get('uncertainty_low', 0.3)
|
|
41
|
+
self.uncertainty_high = kwargs.get('uncertainty_high', 0.7)
|
|
42
|
+
|
|
43
|
+
# Cross-validation
|
|
44
|
+
self.n_folds = kwargs.get('n_folds', 5)
|
|
45
|
+
|
|
46
|
+
# Calibration
|
|
47
|
+
self.calibration_method = kwargs.get('calibration_method', 'temperature')
|
|
48
|
+
|
|
49
|
+
# Active learning
|
|
50
|
+
self.al_query_strategy = kwargs.get('al_query_strategy', 'uncertainty')
|
|
51
|
+
self.al_batch_size = kwargs.get('al_batch_size', 20)
|
|
52
|
+
self.al_initial_sample_pct = kwargs.get('al_initial_sample_pct', 0.1)
|
|
53
|
+
|
|
54
|
+
# Categorical encoding
|
|
55
|
+
self.rare_threshold = kwargs.get('rare_threshold', 5)
|
|
56
|
+
|
|
57
|
+
# Class weighting
|
|
58
|
+
self.pos_weight = kwargs.get('pos_weight', 'auto')
|
|
59
|
+
|
|
60
|
+
# SAFE stopping
|
|
61
|
+
self.safe_consecutive_irrelevant = kwargs.get('safe_consecutive_irrelevant', 50)
|
|
62
|
+
self.safe_min_screened_pct = kwargs.get('safe_min_screened_pct', 0.5)
|
|
63
|
+
self.safe_random_sample_pct = kwargs.get('safe_random_sample_pct', 0.1)
|
|
64
|
+
self.safe_switch_model = kwargs.get('safe_switch_model', False)
|
|
65
|
+
|
|
66
|
+
if self.model_name is None:
|
|
67
|
+
self.model_name = self._get_default_model_name()
|
|
68
|
+
|
|
69
|
+
def _get_default_model_name(self):
|
|
70
|
+
defaults = {
|
|
71
|
+
'bert': 'bert-base-uncased',
|
|
72
|
+
'modernbert': 'answerdotai/ModernBERT-base',
|
|
73
|
+
'scibert': 'allenai/scibert_scivocab_uncased',
|
|
74
|
+
'pubmedbert': 'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext',
|
|
75
|
+
'sentence-transformer': 'all-MiniLM-L6-v2',
|
|
76
|
+
'bge-small': 'BAAI/bge-small-en-v1.5',
|
|
77
|
+
}
|
|
78
|
+
return defaults.get(self.embedding_model, 'bert-base-uncased')
|
|
79
|
+
|
|
80
|
+
def set_random_seeds(self):
|
|
81
|
+
random.seed(self.random_seed)
|
|
82
|
+
np.random.seed(self.random_seed)
|
|
83
|
+
torch.manual_seed(self.random_seed)
|
|
84
|
+
if torch.cuda.is_available():
|
|
85
|
+
torch.cuda.manual_seed(self.random_seed)
|
|
86
|
+
torch.cuda.manual_seed_all(self.random_seed)
|
|
87
|
+
|
|
88
|
+
def to_dict(self):
|
|
89
|
+
return {k: v for k, v in self.__dict__.items() if not k.startswith('_')}
|
|
90
|
+
|
|
91
|
+
def __repr__(self):
|
|
92
|
+
items = [f"{k}={repr(v)}" for k, v in self.to_dict().items()]
|
|
93
|
+
return f"Config({', '.join(items)})"
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
default_config = Config()
|
|
97
|
+
|
|
98
|
+
fast_config = Config(
|
|
99
|
+
epochs=5,
|
|
100
|
+
batch_size=32,
|
|
101
|
+
embedding_model='sentence-transformer',
|
|
102
|
+
model_name='all-MiniLM-L6-v2'
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
robust_config = Config(
|
|
106
|
+
epochs=20,
|
|
107
|
+
early_stopping_patience=5,
|
|
108
|
+
dropout_rate=0.3,
|
|
109
|
+
mlp_hidden_size=128
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
hitl_config = Config(
|
|
113
|
+
al_query_strategy='uncertainty',
|
|
114
|
+
al_batch_size=20,
|
|
115
|
+
safe_consecutive_irrelevant=50,
|
|
116
|
+
safe_min_screened_pct=0.5,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
domain_configs = {
|
|
120
|
+
'science': Config(embedding_model='scibert'),
|
|
121
|
+
'medicine': Config(embedding_model='pubmedbert'),
|
|
122
|
+
'general': Config(embedding_model='bert'),
|
|
123
|
+
'modernbert': Config(embedding_model='modernbert'),
|
|
124
|
+
}
|