pubmlp 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. pubmlp-0.1.0/CITATION.cff +20 -0
  2. pubmlp-0.1.0/LICENSE +21 -0
  3. pubmlp-0.1.0/MANIFEST.in +3 -0
  4. pubmlp-0.1.0/PKG-INFO +79 -0
  5. pubmlp-0.1.0/README.md +33 -0
  6. pubmlp-0.1.0/pubmlp/__init__.py +47 -0
  7. pubmlp-0.1.0/pubmlp/active_learning.py +78 -0
  8. pubmlp-0.1.0/pubmlp/audit.py +168 -0
  9. pubmlp-0.1.0/pubmlp/calibration.py +88 -0
  10. pubmlp-0.1.0/pubmlp/config.py +124 -0
  11. pubmlp-0.1.0/pubmlp/cv.py +160 -0
  12. pubmlp-0.1.0/pubmlp/metrics.py +126 -0
  13. pubmlp-0.1.0/pubmlp/model.py +99 -0
  14. pubmlp-0.1.0/pubmlp/plotting.py +38 -0
  15. pubmlp-0.1.0/pubmlp/predict.py +84 -0
  16. pubmlp-0.1.0/pubmlp/preprocess.py +291 -0
  17. pubmlp-0.1.0/pubmlp/py.typed +0 -0
  18. pubmlp-0.1.0/pubmlp/sample.py +174 -0
  19. pubmlp-0.1.0/pubmlp/screening.py +236 -0
  20. pubmlp-0.1.0/pubmlp/stopping.py +100 -0
  21. pubmlp-0.1.0/pubmlp/train.py +155 -0
  22. pubmlp-0.1.0/pubmlp/utils.py +48 -0
  23. pubmlp-0.1.0/pubmlp.egg-info/PKG-INFO +79 -0
  24. pubmlp-0.1.0/pubmlp.egg-info/SOURCES.txt +40 -0
  25. pubmlp-0.1.0/pubmlp.egg-info/dependency_links.txt +1 -0
  26. pubmlp-0.1.0/pubmlp.egg-info/requires.txt +23 -0
  27. pubmlp-0.1.0/pubmlp.egg-info/top_level.txt +1 -0
  28. pubmlp-0.1.0/pyproject.toml +80 -0
  29. pubmlp-0.1.0/setup.cfg +4 -0
  30. pubmlp-0.1.0/tests/test_active_learning.py +50 -0
  31. pubmlp-0.1.0/tests/test_audit.py +82 -0
  32. pubmlp-0.1.0/tests/test_calibration.py +51 -0
  33. pubmlp-0.1.0/tests/test_config.py +89 -0
  34. pubmlp-0.1.0/tests/test_cv.py +38 -0
  35. pubmlp-0.1.0/tests/test_metrics.py +73 -0
  36. pubmlp-0.1.0/tests/test_model.py +166 -0
  37. pubmlp-0.1.0/tests/test_predict.py +44 -0
  38. pubmlp-0.1.0/tests/test_preprocess.py +525 -0
  39. pubmlp-0.1.0/tests/test_sample.py +83 -0
  40. pubmlp-0.1.0/tests/test_screening.py +69 -0
  41. pubmlp-0.1.0/tests/test_stopping.py +92 -0
  42. pubmlp-0.1.0/tests/test_utils.py +72 -0
@@ -0,0 +1,20 @@
1
+ cff-version: 1.2.0
2
+ message: "If you use this software, please cite it as below."
3
+ type: software
4
+ title: "pubmlp: Multimodal publication classifier with LLM and deep learning"
5
+ authors:
6
+ - family-names: Shin
7
+ given-names: Mikyung
8
+ email: shin.mikyung@gmail.com
9
+ orcid: "https://orcid.org/0000-0001-7907-9193"
10
+ version: "0.1.0"
11
+ date-released: "2026-02-22"
12
+ license: MIT
13
+ url: "https://github.com/mshin77/pubmlp"
14
+ repository-code: "https://github.com/mshin77/pubmlp"
15
+ keywords:
16
+ - systematic review
17
+ - screening
18
+ - transformer
19
+ - classification
20
+ - Python
pubmlp-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mikyung Shin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,3 @@
1
+ include CITATION.cff
2
+ include LICENSE
3
+ include README.md
pubmlp-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,79 @@
1
+ Metadata-Version: 2.4
2
+ Name: pubmlp
3
+ Version: 0.1.0
4
+ Summary: Multimodal publication classifier with LLM and deep learning
5
+ Author-email: Mikyung Shin <shin.mikyung@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://mshin77.github.io/pubmlp
8
+ Project-URL: Repository, https://github.com/mshin77/pubmlp
9
+ Project-URL: Documentation, https://mshin77.github.io/pubmlp
10
+ Project-URL: Issues, https://github.com/mshin77/pubmlp/issues
11
+ Keywords: systematic-review,screening,transformer,classification,bibliometrics
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Intended Audience :: Education
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering
21
+ Classifier: Topic :: Text Processing
22
+ Requires-Python: >=3.9
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: torch>=2.3.0
26
+ Requires-Dist: transformers>=4.30.0
27
+ Requires-Dist: sentence-transformers>=2.2.0
28
+ Requires-Dist: scikit-learn>=1.3.0
29
+ Requires-Dist: pandas>=2.0.0
30
+ Requires-Dist: numpy>=1.24.0
31
+ Requires-Dist: tqdm>=4.65.0
32
+ Requires-Dist: matplotlib>=3.7.0
33
+ Requires-Dist: seaborn>=0.12.0
34
+ Provides-Extra: screening
35
+ Requires-Dist: openpyxl>=3.1.0; extra == "screening"
36
+ Requires-Dist: nltk>=3.8.0; extra == "screening"
37
+ Provides-Extra: dev
38
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
39
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
40
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
41
+ Provides-Extra: docs
42
+ Requires-Dist: sphinx>=8.2; extra == "docs"
43
+ Requires-Dist: pydata-sphinx-theme>=0.16; extra == "docs"
44
+ Requires-Dist: myst-parser>=3.0; extra == "docs"
45
+ Dynamic: license-file
46
+
47
+ [![PyPI version](https://img.shields.io/pypi/v/pubmlp)](https://pypi.org/project/pubmlp/)
48
+ [![Python versions](https://img.shields.io/pypi/pyversions/pubmlp)](https://pypi.org/project/pubmlp/)
49
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
50
+
51
+ Multimodal publication classifier with LLM and deep learning. Fuses transformer embeddings with tabular features through a multilayer perceptron (MLP) for human-in-the-loop screening workflows.
52
+
53
+ ## Installation
54
+
55
+ ```bash
56
+ pip install pubmlp
57
+ ```
58
+
59
+ With optional dependencies:
60
+
61
+ ```bash
62
+ pip install pubmlp[screening] # screening tools (openpyxl, nltk)
63
+ pip install pubmlp[dev] # development (pytest, ruff)
64
+ pip install pubmlp[docs] # documentation (sphinx)
65
+ ```
66
+
67
+ From GitHub:
68
+
69
+ ```bash
70
+ pip install git+https://github.com/mshin77/pubmlp.git
71
+ ```
72
+
73
+ ## Getting Started
74
+
75
+ See [Quick Start](https://mshin77.github.io/pubmlp/getting-started.html) and [Screening Workflow](https://mshin77.github.io/pubmlp/vignettes/screening-workflow.html) for tutorials.
76
+
77
+ ## Citation
78
+
79
+ - Shin, M. (2026). *pubmlp: Multimodal publication classifier with LLM and deep learning* (Python package version 0.1.0) [Computer software]. <https://github.com/mshin77/pubmlp>
pubmlp-0.1.0/README.md ADDED
@@ -0,0 +1,33 @@
1
+ [![PyPI version](https://img.shields.io/pypi/v/pubmlp)](https://pypi.org/project/pubmlp/)
2
+ [![Python versions](https://img.shields.io/pypi/pyversions/pubmlp)](https://pypi.org/project/pubmlp/)
3
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
4
+
5
+ Multimodal publication classifier with LLM and deep learning. Fuses transformer embeddings with tabular features through a multilayer perceptron (MLP) for human-in-the-loop screening workflows.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install pubmlp
11
+ ```
12
+
13
+ With optional dependencies:
14
+
15
+ ```bash
16
+ pip install pubmlp[screening] # screening tools (openpyxl, nltk)
17
+ pip install pubmlp[dev] # development (pytest, ruff)
18
+ pip install pubmlp[docs] # documentation (sphinx)
19
+ ```
20
+
21
+ From GitHub:
22
+
23
+ ```bash
24
+ pip install git+https://github.com/mshin77/pubmlp.git
25
+ ```
26
+
27
+ ## Getting Started
28
+
29
+ See [Quick Start](https://mshin77.github.io/pubmlp/getting-started.html) and [Screening Workflow](https://mshin77.github.io/pubmlp/vignettes/screening-workflow.html) for tutorials.
30
+
31
+ ## Citation
32
+
33
+ - Shin, M. (2026). *pubmlp: Multimodal publication classifier with LLM and deep learning* (Python package version 0.1.0) [Computer software]. <https://github.com/mshin77/pubmlp>
@@ -0,0 +1,47 @@
1
+ """
2
+ PubMLP: Multimodal publication classifier with LLM and deep learning.
3
+
4
+ Fuses transformer embeddings with tabular features through a multilayer
5
+ perceptron (MLP) for human-in-the-loop screening workflows.
6
+ """
7
+
8
+ __version__ = "0.1.0"
9
+ __author__ = "Mikyung Shin"
10
+ __license__ = "MIT"
11
+
12
+ from .config import Config, default_config, fast_config, robust_config, hitl_config, domain_configs, sentence_transformer_models
13
+ from .model import PubMLP
14
+ from .train import train_evaluate_model, calculate_loss, calculate_accuracy, calculate_pos_weight
15
+ from .predict import predict_model, get_predictions_and_labels, flag_uncertain
16
+ from .metrics import calculate_evaluation_metrics
17
+ from .preprocess import preprocess_dataset, create_dataloader, split_data, CustomDataset, collate_fn, FittedTransforms
18
+ from .plotting import plot_results
19
+ from .utils import get_device, auto_batch_size, load_data, unpack_batch
20
+ from .cv import cross_validate
21
+ from .calibration import TemperatureScaling, collect_logits, calibrate_model
22
+ from .audit import AuditTrail, AuditEntry, interpret_kappa, summarize_human_decisions, generate_prisma_report
23
+ from .active_learning import ALState, select_query_batch, create_review_batch, compare_reviewers, merge_human_labels
24
+ from .stopping import StoppingState, should_stop, update_stopping_state, generate_stopping_report, calculate_wss, transition_phase, estimate_recall
25
+ from .screening import regex_screen, extract_window_evidence, extract_sentence_evidence, extract_all_evidence, format_evidence_display, calculate_semantic_scores
26
+ from .sample import create_stratified_sample, save_sample_excel, apply_conditional_formatting, count_pattern_matches, highlight_pattern_matches
27
+
28
+ __all__ = [
29
+ 'Config', 'default_config', 'fast_config', 'robust_config', 'hitl_config', 'domain_configs', 'sentence_transformer_models',
30
+ 'PubMLP',
31
+ 'train_evaluate_model', 'calculate_loss', 'calculate_accuracy', 'calculate_pos_weight',
32
+ 'predict_model', 'get_predictions_and_labels', 'flag_uncertain',
33
+ 'calculate_evaluation_metrics',
34
+ 'preprocess_dataset', 'create_dataloader', 'split_data', 'CustomDataset', 'collate_fn', 'FittedTransforms',
35
+ 'plot_results',
36
+ 'get_device', 'auto_batch_size', 'load_data', 'unpack_batch',
37
+ 'cross_validate',
38
+ 'TemperatureScaling', 'collect_logits', 'calibrate_model',
39
+ 'AuditTrail', 'AuditEntry', 'interpret_kappa', 'summarize_human_decisions', 'generate_prisma_report',
40
+ 'ALState', 'select_query_batch', 'create_review_batch', 'compare_reviewers', 'merge_human_labels',
41
+ 'StoppingState', 'should_stop', 'update_stopping_state', 'generate_stopping_report', 'calculate_wss',
42
+ 'transition_phase', 'estimate_recall',
43
+ 'regex_screen', 'extract_window_evidence', 'extract_sentence_evidence', 'extract_all_evidence',
44
+ 'format_evidence_display', 'calculate_semantic_scores',
45
+ 'create_stratified_sample', 'save_sample_excel', 'apply_conditional_formatting',
46
+ 'count_pattern_matches', 'highlight_pattern_matches',
47
+ ]
@@ -0,0 +1,78 @@
1
+ import numpy as np
2
+ from dataclasses import dataclass, field, asdict
3
+ from sklearn.metrics import cohen_kappa_score
4
+
5
+
6
+ @dataclass
7
+ class ALState:
8
+ labeled_indices: list = field(default_factory=list)
9
+ unlabeled_indices: list = field(default_factory=list)
10
+ iteration: int = 0
11
+ history: list = field(default_factory=list)
12
+
13
+ def to_dict(self):
14
+ return asdict(self)
15
+
16
+ @classmethod
17
+ def from_dict(cls, d):
18
+ return cls(**d)
19
+
20
+
21
+ def rank_by_uncertainty(probabilities):
22
+ """Most uncertain (closest to 0.5) first."""
23
+ probs = np.asarray(probabilities)
24
+ return np.argsort(np.abs(probs - 0.5))
25
+
26
+
27
+ def rank_by_random(n, seed=42):
28
+ rng = np.random.RandomState(seed)
29
+ indices = np.arange(n)
30
+ rng.shuffle(indices)
31
+ return indices
32
+
33
+
34
+ def rank_by_max_relevance(probabilities):
35
+ """Highest probability (most likely relevant) first."""
36
+ return np.argsort(-np.asarray(probabilities))
37
+
38
+
39
+ def select_query_batch(probabilities, strategy='uncertainty', batch_size=20, seed=42):
40
+ probs = np.asarray(probabilities)
41
+ ranked = {
42
+ 'uncertainty': lambda: rank_by_uncertainty(probs),
43
+ 'random': lambda: rank_by_random(len(probs), seed),
44
+ 'max_relevance': lambda: rank_by_max_relevance(probs),
45
+ }[strategy]()
46
+ return ranked[:batch_size]
47
+
48
+
49
+ def create_review_batch(df, indices, probabilities):
50
+ """Subset df for human review, adding model probability and prediction columns."""
51
+ probs = np.asarray(probabilities)
52
+ batch = df.iloc[indices].copy()
53
+ batch['model_probability'] = probs[indices]
54
+ batch['model_prediction'] = (probs[indices] >= 0.5).astype(int)
55
+ return batch
56
+
57
+
58
+ def merge_human_labels(df, review_batch, label_col='human_label'):
59
+ """Merge human decisions from review batch back into main df."""
60
+ df = df.copy()
61
+ if label_col not in df.columns:
62
+ df[label_col] = np.nan
63
+ df.loc[review_batch.index, label_col] = review_batch[label_col]
64
+ return df
65
+
66
+
67
+ def compare_reviewers(model_predictions, human_labels):
68
+ """Compare model vs human reviewer decisions."""
69
+ model_preds = np.asarray(model_predictions)
70
+ human = np.asarray(human_labels)
71
+ agreed = np.sum(model_preds == human)
72
+ kappa = cohen_kappa_score(model_preds, human) if len(set(model_preds) | set(human)) > 1 else 1.0
73
+ disagreement_indices = np.where(model_preds != human)[0]
74
+ return {
75
+ 'agreement_rate': agreed / len(human),
76
+ 'kappa': kappa,
77
+ 'disagreement_indices': disagreement_indices.tolist(),
78
+ }
@@ -0,0 +1,168 @@
1
+ from dataclasses import dataclass, field, asdict
2
+ from datetime import datetime, timezone
3
+
4
+ import pandas as pd
5
+
6
+ from .active_learning import compare_reviewers
7
+
8
+
9
+ @dataclass
10
+ class AuditEntry:
11
+ record_id: str
12
+ model_prediction: int
13
+ model_probability: float
14
+ human_label: int = None
15
+ timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
16
+ reviewer_id: str = None
17
+ phase: str = 'screening'
18
+ notes: str = ''
19
+
20
+
21
+ class AuditTrail:
22
+ def __init__(self):
23
+ self.entries = []
24
+
25
+ def log_decision(self, record_id, prediction, probability, phase='screening',
26
+ reviewer_id=None):
27
+ self.entries.append(AuditEntry(
28
+ record_id=str(record_id),
29
+ model_prediction=int(prediction),
30
+ model_probability=float(probability),
31
+ phase=phase,
32
+ reviewer_id=reviewer_id,
33
+ ))
34
+
35
+ def log_batch(self, record_ids, predictions, probabilities, phase='screening',
36
+ reviewer_id=None):
37
+ for rid, pred, prob in zip(record_ids, predictions, probabilities):
38
+ self.log_decision(rid, pred, prob, phase, reviewer_id)
39
+
40
+ def update_human_label(self, record_id, human_label, reviewer_id=None, notes=''):
41
+ for entry in self.entries:
42
+ if entry.record_id == str(record_id):
43
+ entry.human_label = int(human_label)
44
+ entry.reviewer_id = reviewer_id
45
+ entry.notes = notes
46
+ entry.timestamp = datetime.now(timezone.utc).isoformat()
47
+ return
48
+ raise KeyError(f"Record {record_id} not found in audit trail")
49
+
50
+ def get_disagreements(self):
51
+ return [e for e in self.entries
52
+ if e.human_label is not None and e.model_prediction != e.human_label]
53
+
54
+ def calculate_agreement(self):
55
+ reviewed = [e for e in self.entries if e.human_label is not None]
56
+ if not reviewed:
57
+ return {'total': 0, 'agreed': 0, 'disagreed': 0, 'kappa': None}
58
+ model_preds = [e.model_prediction for e in reviewed]
59
+ human_labels = [e.human_label for e in reviewed]
60
+ result = compare_reviewers(model_preds, human_labels)
61
+ agreed = int(result['agreement_rate'] * len(reviewed))
62
+ return {
63
+ 'total': len(reviewed),
64
+ 'agreed': agreed,
65
+ 'disagreed': len(reviewed) - agreed,
66
+ 'kappa': result['kappa'],
67
+ }
68
+
69
+ def to_dataframe(self):
70
+ return pd.DataFrame([asdict(e) for e in self.entries])
71
+
72
+ def export_csv(self, path):
73
+ self.to_dataframe().to_csv(path, index=False)
74
+
75
+ def to_dict(self):
76
+ return {'entries': [asdict(e) for e in self.entries]}
77
+
78
+ @classmethod
79
+ def from_dict(cls, d):
80
+ obj = cls()
81
+ obj.entries = [AuditEntry(**e) for e in d['entries']]
82
+ return obj
83
+
84
+
85
+ def interpret_kappa(kappa):
86
+ if kappa < 0:
87
+ return 'poor'
88
+ if kappa <= 0.20:
89
+ return 'slight'
90
+ if kappa <= 0.40:
91
+ return 'fair'
92
+ if kappa <= 0.60:
93
+ return 'moderate'
94
+ if kappa <= 0.80:
95
+ return 'substantial'
96
+ return 'almost perfect'
97
+
98
+
99
+ def summarize_human_decisions(audit_trail, uncertainty_low=0.3, uncertainty_high=0.7):
100
+ """Summarize human reviewer decisions against model predictions."""
101
+ entries = audit_trail.entries
102
+ reviewed = [e for e in entries if e.human_label is not None]
103
+ overrides = [e for e in reviewed if e.model_prediction != e.human_label]
104
+ return {
105
+ 'total': len(entries),
106
+ 'included': sum(1 for e in entries if e.model_prediction == 1),
107
+ 'excluded': sum(1 for e in entries if e.model_prediction == 0),
108
+ 'uncertain': sum(1 for e in entries if uncertainty_low < e.model_probability < uncertainty_high),
109
+ 'human_reviewed': len(reviewed),
110
+ 'human_overrides': len(overrides),
111
+ }
112
+
113
+
114
+ # PRISMA 2020 Item 8 + trAIce M3/M8/M9/R1/R2 (screening-scoped)
115
+ prisma_screening_items = {
116
+ 'item_8': 'Selection process: automation tools used',
117
+ 'M3': 'Purpose/Stage: AI applied at title/abstract screening',
118
+ 'M8': 'Human-AI Interaction: human reviewer validation process',
119
+ 'M9': 'Performance Evaluation: screening model metrics',
120
+ 'R1': 'Study Selection: AI vs human exclusion counts in flow',
121
+ 'R2': 'Performance Metrics: AI screening performance results',
122
+ }
123
+
124
+
125
+ def generate_prisma_report(audit_trail, config=None):
126
+ """Populate PRISMA Item 8 + screening-relevant trAIce items from audit data."""
127
+ uncertainty_low = getattr(config, 'uncertainty_low', 0.3) if config else 0.3
128
+ uncertainty_high = getattr(config, 'uncertainty_high', 0.7) if config else 0.7
129
+ summary = summarize_human_decisions(audit_trail, uncertainty_low, uncertainty_high)
130
+ agreement = audit_trail.calculate_agreement()
131
+
132
+ report = {
133
+ 'item_8': {
134
+ 'description': prisma_screening_items['item_8'],
135
+ 'tool': 'pubmlp',
136
+ 'stage': 'title/abstract screening',
137
+ 'model': getattr(config, 'embedding_model', None) if config else None,
138
+ 'calibration': getattr(config, 'calibration_method', None) if config else None,
139
+ },
140
+ 'M3': {
141
+ 'description': prisma_screening_items['M3'],
142
+ 'stage': 'title/abstract screening',
143
+ 'strategy': getattr(config, 'al_query_strategy', None) if config else None,
144
+ },
145
+ 'M8': {
146
+ 'description': prisma_screening_items['M8'],
147
+ 'human_reviewed': summary['human_reviewed'],
148
+ 'human_overrides': summary['human_overrides'],
149
+ 'agreement_kappa': agreement['kappa'],
150
+ 'kappa_interpretation': interpret_kappa(agreement['kappa']) if agreement['kappa'] is not None else None,
151
+ },
152
+ 'M9': {
153
+ 'description': prisma_screening_items['M9'],
154
+ 'total_screened': summary['total'],
155
+ 'uncertain_flagged': summary['uncertain'],
156
+ },
157
+ 'R1': {
158
+ 'description': prisma_screening_items['R1'],
159
+ 'model_included': summary['included'],
160
+ 'model_excluded': summary['excluded'],
161
+ 'human_overrides': summary['human_overrides'],
162
+ },
163
+ 'R2': {
164
+ 'description': prisma_screening_items['R2'],
165
+ 'agreement': agreement,
166
+ },
167
+ }
168
+ return report
@@ -0,0 +1,88 @@
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+
5
+ from .utils import unpack_batch
6
+
7
+
8
+ def collect_logits(model, dataloader, device):
9
+ """Run model in eval mode and collect raw logits + labels."""
10
+ model.eval()
11
+ all_logits, all_labels = [], []
12
+ with torch.no_grad():
13
+ for batch in dataloader:
14
+ input_ids, attention_mask, categorical_tensor, numeric_tensor, labels, texts = unpack_batch(batch, device)
15
+ logits = model(input_ids, attention_mask, categorical_tensor, numeric_tensor, texts)
16
+ all_logits.append(logits)
17
+ all_labels.append(labels)
18
+ return torch.cat(all_logits), torch.cat(all_labels)
19
+
20
+
21
+ class TemperatureScaling:
22
+ """Per-label temperature scaling. Works for single and multi-label."""
23
+
24
+ def __init__(self):
25
+ self.temperature = None
26
+
27
+ def fit(self, logits, labels, lr=0.01, max_iter=50):
28
+ """Optimize temperature via NLL loss using LBFGS."""
29
+ num_labels = logits.shape[-1] if logits.dim() > 1 else 1
30
+
31
+ if num_labels == 1:
32
+ logits_flat = logits.view(-1)
33
+ labels_flat = labels.view(-1).float()
34
+ log_temp = nn.Parameter(torch.zeros(1, device=logits.device))
35
+ criterion = nn.BCEWithLogitsLoss()
36
+ optimizer = optim.LBFGS([log_temp], lr=lr, max_iter=max_iter)
37
+
38
+ def closure():
39
+ optimizer.zero_grad()
40
+ loss = criterion(logits_flat / log_temp.exp(), labels_flat)
41
+ loss.backward()
42
+ return loss
43
+
44
+ optimizer.step(closure)
45
+ self.temperature = log_temp.exp().item()
46
+ else:
47
+ # Per-label temperature
48
+ temps = []
49
+ criterion = nn.BCEWithLogitsLoss()
50
+ for i in range(num_labels):
51
+ log_temp = nn.Parameter(torch.zeros(1, device=logits.device))
52
+ optimizer = optim.LBFGS([log_temp], lr=lr, max_iter=max_iter)
53
+ col_logits = logits[:, i]
54
+ col_labels = labels[:, i].float()
55
+
56
+ def closure(lt=log_temp, l=col_logits, lb=col_labels, opt=optimizer):
57
+ opt.zero_grad()
58
+ loss = criterion(l / lt.exp(), lb)
59
+ loss.backward()
60
+ return loss
61
+
62
+ optimizer.step(closure)
63
+ temps.append(log_temp.exp().item())
64
+ self.temperature = temps
65
+ return self
66
+
67
+ def transform(self, logits):
68
+ if isinstance(self.temperature, list):
69
+ temp_tensor = torch.tensor(self.temperature, device=logits.device).unsqueeze(0)
70
+ return logits / temp_tensor
71
+ return logits / self.temperature
72
+
73
+ def to_dict(self):
74
+ return {'temperature': self.temperature}
75
+
76
+ @classmethod
77
+ def from_dict(cls, d):
78
+ obj = cls()
79
+ obj.temperature = d['temperature']
80
+ return obj
81
+
82
+
83
+ def calibrate_model(model, dataloader, device):
84
+ """Collect logits and fit temperature scaling."""
85
+ logits, labels = collect_logits(model, dataloader, device)
86
+ scaler = TemperatureScaling()
87
+ scaler.fit(logits, labels)
88
+ return scaler
@@ -0,0 +1,124 @@
1
+ import random
2
+ import numpy as np
3
+ import torch
4
+
5
+
6
+ # Models that use the SentenceTransformer encoder (frozen, no fine-tuning)
7
+ sentence_transformer_models = {'sentence-transformer', 'bge-small'}
8
+
9
+
10
+ class Config:
11
+ """Configuration for PubMLP training and inference."""
12
+
13
+ def __init__(self, **kwargs):
14
+ # Random seed
15
+ self.random_seed = kwargs.get('random_seed', 42)
16
+
17
+ # Training hyperparameters
18
+ self.batch_size = kwargs.get('batch_size', 16)
19
+ self.eval_batch_size = kwargs.get('eval_batch_size', 32)
20
+ self.epochs = kwargs.get('epochs', 10)
21
+ self.learning_rate = kwargs.get('learning_rate', 2e-5)
22
+ self.early_stopping_patience = kwargs.get('early_stopping_patience', 3)
23
+
24
+ # Model architecture
25
+ self.dropout_rate = kwargs.get('dropout_rate', 0.2)
26
+ self.mlp_hidden_size = kwargs.get('mlp_hidden_size', 64)
27
+ self.n_hidden_layers = kwargs.get('n_hidden_layers', 1)
28
+ self.max_length = kwargs.get('max_length', 512)
29
+
30
+ # Optimization
31
+ self.gradient_clip_norm = kwargs.get('gradient_clip_norm', 1.0)
32
+ self.warmup_steps = kwargs.get('warmup_steps', 0)
33
+
34
+ # Embedding model
35
+ self.embedding_model = kwargs.get('embedding_model', 'bert')
36
+ self.model_name = kwargs.get('model_name', None)
37
+ self.pooling_strategy = kwargs.get('pooling_strategy', 'auto')
38
+
39
+ # Uncertainty thresholds
40
+ self.uncertainty_low = kwargs.get('uncertainty_low', 0.3)
41
+ self.uncertainty_high = kwargs.get('uncertainty_high', 0.7)
42
+
43
+ # Cross-validation
44
+ self.n_folds = kwargs.get('n_folds', 5)
45
+
46
+ # Calibration
47
+ self.calibration_method = kwargs.get('calibration_method', 'temperature')
48
+
49
+ # Active learning
50
+ self.al_query_strategy = kwargs.get('al_query_strategy', 'uncertainty')
51
+ self.al_batch_size = kwargs.get('al_batch_size', 20)
52
+ self.al_initial_sample_pct = kwargs.get('al_initial_sample_pct', 0.1)
53
+
54
+ # Categorical encoding
55
+ self.rare_threshold = kwargs.get('rare_threshold', 5)
56
+
57
+ # Class weighting
58
+ self.pos_weight = kwargs.get('pos_weight', 'auto')
59
+
60
+ # SAFE stopping
61
+ self.safe_consecutive_irrelevant = kwargs.get('safe_consecutive_irrelevant', 50)
62
+ self.safe_min_screened_pct = kwargs.get('safe_min_screened_pct', 0.5)
63
+ self.safe_random_sample_pct = kwargs.get('safe_random_sample_pct', 0.1)
64
+ self.safe_switch_model = kwargs.get('safe_switch_model', False)
65
+
66
+ if self.model_name is None:
67
+ self.model_name = self._get_default_model_name()
68
+
69
+ def _get_default_model_name(self):
70
+ defaults = {
71
+ 'bert': 'bert-base-uncased',
72
+ 'modernbert': 'answerdotai/ModernBERT-base',
73
+ 'scibert': 'allenai/scibert_scivocab_uncased',
74
+ 'pubmedbert': 'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext',
75
+ 'sentence-transformer': 'all-MiniLM-L6-v2',
76
+ 'bge-small': 'BAAI/bge-small-en-v1.5',
77
+ }
78
+ return defaults.get(self.embedding_model, 'bert-base-uncased')
79
+
80
+ def set_random_seeds(self):
81
+ random.seed(self.random_seed)
82
+ np.random.seed(self.random_seed)
83
+ torch.manual_seed(self.random_seed)
84
+ if torch.cuda.is_available():
85
+ torch.cuda.manual_seed(self.random_seed)
86
+ torch.cuda.manual_seed_all(self.random_seed)
87
+
88
+ def to_dict(self):
89
+ return {k: v for k, v in self.__dict__.items() if not k.startswith('_')}
90
+
91
+ def __repr__(self):
92
+ items = [f"{k}={repr(v)}" for k, v in self.to_dict().items()]
93
+ return f"Config({', '.join(items)})"
94
+
95
+
96
+ default_config = Config()
97
+
98
+ fast_config = Config(
99
+ epochs=5,
100
+ batch_size=32,
101
+ embedding_model='sentence-transformer',
102
+ model_name='all-MiniLM-L6-v2'
103
+ )
104
+
105
+ robust_config = Config(
106
+ epochs=20,
107
+ early_stopping_patience=5,
108
+ dropout_rate=0.3,
109
+ mlp_hidden_size=128
110
+ )
111
+
112
+ hitl_config = Config(
113
+ al_query_strategy='uncertainty',
114
+ al_batch_size=20,
115
+ safe_consecutive_irrelevant=50,
116
+ safe_min_screened_pct=0.5,
117
+ )
118
+
119
+ domain_configs = {
120
+ 'science': Config(embedding_model='scibert'),
121
+ 'medicine': Config(embedding_model='pubmedbert'),
122
+ 'general': Config(embedding_model='bert'),
123
+ 'modernbert': Config(embedding_model='modernbert'),
124
+ }