poster2json 0.2.2__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {poster2json-0.2.2 → poster2json-0.3.0}/PKG-INFO +12 -5
- {poster2json-0.2.2 → poster2json-0.3.0}/README.md +11 -4
- {poster2json-0.2.2 → poster2json-0.3.0}/poster2json/cli.py +36 -10
- {poster2json-0.2.2 → poster2json-0.3.0}/poster2json/extract.py +71 -91
- {poster2json-0.2.2 → poster2json-0.3.0}/pyproject.toml +1 -1
- {poster2json-0.2.2 → poster2json-0.3.0}/LICENSE.md +0 -0
- {poster2json-0.2.2 → poster2json-0.3.0}/poster2json/__init__.py +0 -0
- {poster2json-0.2.2 → poster2json-0.3.0}/poster2json/__main__.py +0 -0
- {poster2json-0.2.2 → poster2json-0.3.0}/poster2json/gui.py +0 -0
- {poster2json-0.2.2 → poster2json-0.3.0}/poster2json/identifiers.py +0 -0
- {poster2json-0.2.2 → poster2json-0.3.0}/poster2json/schemas/poster_schema.json +0 -0
- {poster2json-0.2.2 → poster2json-0.3.0}/poster2json/standards.py +0 -0
- {poster2json-0.2.2 → poster2json-0.3.0}/poster2json/tests/__init__.py +0 -0
- {poster2json-0.2.2 → poster2json-0.3.0}/poster2json/tests/conftest.py +0 -0
- {poster2json-0.2.2 → poster2json-0.3.0}/poster2json/utils.py +0 -0
- {poster2json-0.2.2 → poster2json-0.3.0}/poster2json/validate.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: poster2json
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Convert scientific posters (PDF/images) to structured JSON metadata using Large Language Models
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE.md
|
|
@@ -44,7 +44,7 @@ Description-Content-Type: text/markdown
|
|
|
44
44
|
|
|
45
45
|
<div align="center">
|
|
46
46
|
|
|
47
|
-
<img src="https://cdn.posters.science/logos/poster-fairy.png" alt="logo" width="200" height="auto" />
|
|
47
|
+
<img src="https://cdn.posters.science/logos/poster-fairy.png" alt="logo" width="200" height="auto" title="This image was generated by AI" />
|
|
48
48
|
|
|
49
49
|
<br />
|
|
50
50
|
|
|
@@ -118,9 +118,16 @@ pip install poster2json
|
|
|
118
118
|
### CLI Usage
|
|
119
119
|
|
|
120
120
|
```bash
|
|
121
|
-
# Extract metadata from a poster
|
|
121
|
+
# Extract metadata from a poster (default: fine-tuned Llama @ 4bit)
|
|
122
122
|
poster2json extract poster.pdf -o result.json
|
|
123
123
|
|
|
124
|
+
# Use a different instruct model (any HuggingFace repo id works)
|
|
125
|
+
poster2json extract poster.pdf --model google/gemma-2-9b-it --quantization 4bit
|
|
126
|
+
|
|
127
|
+
# Trade VRAM for quality
|
|
128
|
+
poster2json extract poster.pdf --quantization 8bit
|
|
129
|
+
poster2json extract poster.pdf --quantization fp16
|
|
130
|
+
|
|
124
131
|
# Validate extracted JSON
|
|
125
132
|
poster2json validate result.json
|
|
126
133
|
|
|
@@ -175,7 +182,7 @@ Output conforms to the [poster-json-schema](https://github.com/fairdataihub/post
|
|
|
175
182
|
|
|
176
183
|
| Requirement | Specification |
|
|
177
184
|
| ----------- | -------------------------------- |
|
|
178
|
-
| GPU | NVIDIA CUDA-capable, ≥16GB
|
|
185
|
+
| GPU | NVIDIA CUDA-capable, ≥8GB VRAM (default 4bit); ≥16GB for `--quantization fp16` or image/OCR posters |
|
|
179
186
|
| RAM | ≥32GB recommended |
|
|
180
187
|
| Python | 3.10+ |
|
|
181
188
|
| OS | Linux, macOS, Windows (via WSL2) |
|
|
@@ -246,7 +253,7 @@ MIT License - see [LICENSE](LICENSE.md) for details.
|
|
|
246
253
|
title = {poster2json: Scientific Poster to JSON Metadata Extraction},
|
|
247
254
|
author = {O'Neill, James and Soundarajan, Sanjay and Portillo, Dorian and Patel, Bhavesh},
|
|
248
255
|
year = {2026},
|
|
249
|
-
version = {0.2.
|
|
256
|
+
version = {0.2.3},
|
|
250
257
|
url = {https://github.com/fairdataihub/poster2json},
|
|
251
258
|
doi = {10.5281/zenodo.18320010}
|
|
252
259
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
<div align="center">
|
|
2
2
|
|
|
3
|
-
<img src="https://cdn.posters.science/logos/poster-fairy.png" alt="logo" width="200" height="auto" />
|
|
3
|
+
<img src="https://cdn.posters.science/logos/poster-fairy.png" alt="logo" width="200" height="auto" title="This image was generated by AI" />
|
|
4
4
|
|
|
5
5
|
<br />
|
|
6
6
|
|
|
@@ -74,9 +74,16 @@ pip install poster2json
|
|
|
74
74
|
### CLI Usage
|
|
75
75
|
|
|
76
76
|
```bash
|
|
77
|
-
# Extract metadata from a poster
|
|
77
|
+
# Extract metadata from a poster (default: fine-tuned Llama @ 4bit)
|
|
78
78
|
poster2json extract poster.pdf -o result.json
|
|
79
79
|
|
|
80
|
+
# Use a different instruct model (any HuggingFace repo id works)
|
|
81
|
+
poster2json extract poster.pdf --model google/gemma-2-9b-it --quantization 4bit
|
|
82
|
+
|
|
83
|
+
# Trade VRAM for quality
|
|
84
|
+
poster2json extract poster.pdf --quantization 8bit
|
|
85
|
+
poster2json extract poster.pdf --quantization fp16
|
|
86
|
+
|
|
80
87
|
# Validate extracted JSON
|
|
81
88
|
poster2json validate result.json
|
|
82
89
|
|
|
@@ -131,7 +138,7 @@ Output conforms to the [poster-json-schema](https://github.com/fairdataihub/post
|
|
|
131
138
|
|
|
132
139
|
| Requirement | Specification |
|
|
133
140
|
| ----------- | -------------------------------- |
|
|
134
|
-
| GPU | NVIDIA CUDA-capable, ≥16GB
|
|
141
|
+
| GPU | NVIDIA CUDA-capable, ≥8GB VRAM (default 4bit); ≥16GB for `--quantization fp16` or image/OCR posters |
|
|
135
142
|
| RAM | ≥32GB recommended |
|
|
136
143
|
| Python | 3.10+ |
|
|
137
144
|
| OS | Linux, macOS, Windows (via WSL2) |
|
|
@@ -202,7 +209,7 @@ MIT License - see [LICENSE](LICENSE.md) for details.
|
|
|
202
209
|
title = {poster2json: Scientific Poster to JSON Metadata Extraction},
|
|
203
210
|
author = {O'Neill, James and Soundarajan, Sanjay and Portillo, Dorian and Patel, Bhavesh},
|
|
204
211
|
year = {2026},
|
|
205
|
-
version = {0.2.
|
|
212
|
+
version = {0.2.3},
|
|
206
213
|
url = {https://github.com/fairdataihub/poster2json},
|
|
207
214
|
doi = {10.5281/zenodo.18320010}
|
|
208
215
|
}
|
|
@@ -54,26 +54,52 @@ def main(ctx):
|
|
|
54
54
|
default=True,
|
|
55
55
|
help="Pretty-print JSON output (default: pretty)"
|
|
56
56
|
)
|
|
57
|
-
|
|
57
|
+
@click.option(
|
|
58
|
+
"--model",
|
|
59
|
+
"model_id",
|
|
60
|
+
type=str,
|
|
61
|
+
default=None,
|
|
62
|
+
help=(
|
|
63
|
+
"HuggingFace model ID to use for JSON structuring. Overrides the "
|
|
64
|
+
"default fine-tuned Llama. Any instruct model works "
|
|
65
|
+
"(e.g. google/gemma-2-9b-it, Qwen/Qwen2.5-7B-Instruct)."
|
|
66
|
+
)
|
|
67
|
+
)
|
|
68
|
+
@click.option(
|
|
69
|
+
"--quantization",
|
|
70
|
+
type=click.Choice(["fp16", "8bit", "4bit"], case_sensitive=False),
|
|
71
|
+
default=None,
|
|
72
|
+
help="Precision mode for the JSON model. Defaults to 4bit (NF4)."
|
|
73
|
+
)
|
|
74
|
+
def extract(input_file: str, output: str, pretty: bool, model_id: str, quantization: str):
|
|
58
75
|
"""
|
|
59
76
|
Extract structured JSON from a scientific poster.
|
|
60
|
-
|
|
77
|
+
|
|
61
78
|
INPUT_FILE: Path to the poster file (PDF, JPG, or PNG)
|
|
62
|
-
|
|
63
|
-
Requires a CUDA-capable GPU
|
|
64
|
-
|
|
79
|
+
|
|
80
|
+
Requires a CUDA-capable GPU. The default 4bit quantization fits on
|
|
81
|
+
~6GB VRAM; use --quantization 8bit or fp16 if you have headroom and
|
|
82
|
+
want slightly better quality. (Image/OCR posters also load a Qwen2-VL
|
|
83
|
+
vision model at bf16 — expect higher peak VRAM on that path.)
|
|
84
|
+
|
|
65
85
|
Examples:
|
|
66
|
-
|
|
86
|
+
|
|
67
87
|
poster2json extract poster.pdf
|
|
68
|
-
|
|
88
|
+
|
|
69
89
|
poster2json extract poster.jpg -o output.json
|
|
90
|
+
|
|
91
|
+
poster2json extract poster.pdf --model google/gemma-2-9b-it --quantization 8bit
|
|
70
92
|
"""
|
|
71
93
|
from .extract import extract_poster
|
|
72
|
-
|
|
94
|
+
|
|
73
95
|
click.echo(f"Extracting metadata from: {input_file}", err=True)
|
|
74
|
-
|
|
96
|
+
if model_id:
|
|
97
|
+
click.echo(f"Model: {model_id}", err=True)
|
|
98
|
+
if quantization:
|
|
99
|
+
click.echo(f"Quantization: {quantization}", err=True)
|
|
100
|
+
|
|
75
101
|
try:
|
|
76
|
-
result = extract_poster(input_file)
|
|
102
|
+
result = extract_poster(input_file, model_id=model_id, quantization=quantization)
|
|
77
103
|
|
|
78
104
|
if "error" in result:
|
|
79
105
|
click.echo(f"Error during extraction: {result['error']}", err=True)
|
|
@@ -33,6 +33,7 @@ from transformers import (
|
|
|
33
33
|
AutoModelForCausalLM,
|
|
34
34
|
AutoProcessor,
|
|
35
35
|
AutoTokenizer,
|
|
36
|
+
BitsAndBytesConfig,
|
|
36
37
|
Qwen2VLForConditionalGeneration,
|
|
37
38
|
TextStreamer,
|
|
38
39
|
)
|
|
@@ -589,9 +590,21 @@ _json_model = None
|
|
|
589
590
|
_json_tokenizer = None
|
|
590
591
|
|
|
591
592
|
|
|
592
|
-
def load_json_model(
|
|
593
|
-
|
|
593
|
+
def load_json_model(
|
|
594
|
+
model_id: Optional[str] = None,
|
|
595
|
+
quantization: Optional[str] = None,
|
|
596
|
+
):
|
|
597
|
+
"""Load the JSON-structuring LLM.
|
|
598
|
+
|
|
599
|
+
Args:
|
|
600
|
+
model_id: override the default JSON_MODEL_ID. Accepts any HuggingFace
|
|
601
|
+
repo id (e.g. the default fine-tuned Llama, or a generic instruct
|
|
602
|
+
model like google/gemma-2-9b-it, Qwen/Qwen2.5-7B-Instruct).
|
|
603
|
+
quantization: precision mode — one of "fp16", "8bit", "4bit".
|
|
604
|
+
Defaults to "4bit" (NF4), which fits on ~6GB VRAM.
|
|
605
|
+
"""
|
|
594
606
|
global _json_model, _json_tokenizer
|
|
607
|
+
resolved_model_id = model_id or JSON_MODEL_ID
|
|
595
608
|
if _json_model is None:
|
|
596
609
|
device = get_best_gpu()
|
|
597
610
|
|
|
@@ -604,12 +617,16 @@ def load_json_model(force_full_precision: bool = False):
|
|
|
604
617
|
free_gb = 32
|
|
605
618
|
device_map_value = "cpu"
|
|
606
619
|
|
|
607
|
-
log(f"Loading {
|
|
620
|
+
log(f"Loading {resolved_model_id} for JSON structuring on {device}...")
|
|
608
621
|
|
|
609
622
|
try:
|
|
610
|
-
_json_tokenizer = AutoTokenizer.from_pretrained(
|
|
623
|
+
_json_tokenizer = AutoTokenizer.from_pretrained(resolved_model_id)
|
|
611
624
|
|
|
612
|
-
|
|
625
|
+
mode = (quantization or "4bit").lower()
|
|
626
|
+
if mode not in {"fp16", "8bit", "4bit"}:
|
|
627
|
+
raise ValueError(
|
|
628
|
+
f"quantization must be one of fp16|8bit|4bit, got {quantization!r}"
|
|
629
|
+
)
|
|
613
630
|
|
|
614
631
|
# Try Flash Attention 2
|
|
615
632
|
try:
|
|
@@ -621,28 +638,30 @@ def load_json_model(force_full_precision: bool = False):
|
|
|
621
638
|
attn_impl = None
|
|
622
639
|
log(" Flash Attention not available, using default attention")
|
|
623
640
|
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
"
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
641
|
+
model_kwargs = {
|
|
642
|
+
"device_map": device_map_value,
|
|
643
|
+
"low_cpu_mem_usage": True,
|
|
644
|
+
}
|
|
645
|
+
if attn_impl:
|
|
646
|
+
model_kwargs["attn_implementation"] = attn_impl
|
|
647
|
+
|
|
648
|
+
if mode == "8bit":
|
|
649
|
+
log(f" Using 8-bit quantization (free={free_gb:.1f}GB)")
|
|
650
|
+
model_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True)
|
|
651
|
+
elif mode == "4bit":
|
|
652
|
+
log(f" Using 4-bit NF4 quantization (free={free_gb:.1f}GB)")
|
|
653
|
+
model_kwargs["quantization_config"] = BitsAndBytesConfig(
|
|
654
|
+
load_in_4bit=True,
|
|
655
|
+
bnb_4bit_quant_type="nf4",
|
|
656
|
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
|
657
|
+
bnb_4bit_use_double_quant=True,
|
|
658
|
+
)
|
|
659
|
+
else: # fp16 (bfloat16)
|
|
660
|
+
log(f" Using bfloat16 (free={free_gb:.1f}GB)")
|
|
661
|
+
model_kwargs["torch_dtype"] = torch.bfloat16
|
|
662
|
+
|
|
663
|
+
_json_model = AutoModelForCausalLM.from_pretrained(resolved_model_id, **model_kwargs)
|
|
664
|
+
log(f" ✓ JSON model loaded on {device} ({mode})")
|
|
646
665
|
except Exception as e:
|
|
647
666
|
log(f" ✗ Failed to load JSON model: {e}")
|
|
648
667
|
if _json_model is not None:
|
|
@@ -722,13 +741,7 @@ JSON SCHEMA (all top-level fields are REQUIRED):
|
|
|
722
741
|
"subjects": [{{"subject": "keyword1"}}, {{"subject": "keyword2"}}, {{"subject": "keyword3"}}],
|
|
723
742
|
"descriptions": [{{"description": "The abstract text from the poster...", "descriptionType": "Abstract"}}],
|
|
724
743
|
"publisher": {{"name": "Conference Organizer or Institution Name"}},
|
|
725
|
-
"conference":
|
|
726
|
-
"conferenceName": "Name of Conference",
|
|
727
|
-
"conferenceYear": 2025,
|
|
728
|
-
"conferenceLocation": "City, Country",
|
|
729
|
-
"conferenceStartDate": "YYYY-MM-DD",
|
|
730
|
-
"conferenceEndDate": "YYYY-MM-DD"
|
|
731
|
-
}},
|
|
744
|
+
"conference": null,
|
|
732
745
|
"formats": ["PDF"],
|
|
733
746
|
"content": {{
|
|
734
747
|
"sections": [
|
|
@@ -747,8 +760,12 @@ EXTRACTION NOTES:
|
|
|
747
760
|
- descriptions: Use the Abstract section content, descriptionType is REQUIRED
|
|
748
761
|
- publisher: Use conference organizer, hosting institution, or repository name
|
|
749
762
|
- titles: If the poster title is ALL CAPS, convert to proper Title Case preserving acronyms (e.g. "RESEARCH ON SARS-CoV-2" not "RESEARCH ON SARS-COV-2")
|
|
750
|
-
- conference:
|
|
751
|
-
|
|
763
|
+
- conference: Extract ONLY from text clearly visible on the poster (header, footer, logos).
|
|
764
|
+
* If conference details are NOT visible, set "conference": null — do NOT invent names, locations, dates, URLs, or acronyms.
|
|
765
|
+
* NEVER output generic values like "Name of Conference", "City, Country", "Conference Name", or made-up URLs.
|
|
766
|
+
* If only SOME fields are visible (e.g. name and year but not location), include only those: {{"conferenceName": "ACL 2024", "conferenceYear": 2024}}
|
|
767
|
+
* If no conference information is found at all, output "conference": null
|
|
768
|
+
- publisher: Extract from poster. If not found, set to null — do NOT use placeholder text
|
|
752
769
|
- formats: Set to ["PDF"] for PDF files, ["PNG"] or ["JPEG"] for images
|
|
753
770
|
- imageCaptions/tableCaptions: Use "id" field (e.g., "fig1") for cross-referencing if needed
|
|
754
771
|
- rightsList: OPTIONAL - include if license/copyright info found on poster
|
|
@@ -763,7 +780,7 @@ FALLBACK_PROMPT = """Convert poster text to JSON. REQUIRED FIELDS:
|
|
|
763
780
|
2. SEPARATE section for EACH header found in the poster text. Use the poster's own headers. Lines starting with "## " are detected headers.
|
|
764
781
|
3. Copy ALL text EXACTLY verbatim
|
|
765
782
|
4. If title is ALL CAPS, convert to Title Case preserving acronyms (SARS-CoV-2, not SARS-COV-2)
|
|
766
|
-
5.
|
|
783
|
+
5. conference/publisher: extract ONLY if clearly visible on the poster. If not found, set to null. NEVER invent names, locations, dates, URLs, or use generic placeholders.
|
|
767
784
|
|
|
768
785
|
{{
|
|
769
786
|
"creators": [{{"name": "LastName, FirstName", "givenName": "FirstName", "familyName": "LastName", "affiliation": ["Institution"]}}],
|
|
@@ -772,7 +789,7 @@ FALLBACK_PROMPT = """Convert poster text to JSON. REQUIRED FIELDS:
|
|
|
772
789
|
"subjects": [{{"subject": "keyword1"}}, {{"subject": "keyword2"}}],
|
|
773
790
|
"descriptions": [{{"description": "Abstract text", "descriptionType": "Abstract"}}],
|
|
774
791
|
"publisher": {{"name": "Conference or Institution"}},
|
|
775
|
-
"conference":
|
|
792
|
+
"conference": null,
|
|
776
793
|
"formats": ["PDF"],
|
|
777
794
|
"content": {{
|
|
778
795
|
"sections": [{{"sectionTitle": "Header", "sectionContent": "verbatim text"}}]
|
|
@@ -1172,43 +1189,6 @@ def _postprocess_json(data: dict, raw_text: str = "") -> dict:
|
|
|
1172
1189
|
|
|
1173
1190
|
result = enrich_json_with_identifiers(result, raw_text)
|
|
1174
1191
|
|
|
1175
|
-
# Strip "Unknown" and prompt-placeholder values the LLM likes to hallucinate.
|
|
1176
|
-
# These violate metadata quality expectations — better to omit than guess.
|
|
1177
|
-
_UNKNOWN_RE = re.compile(r"^unknown\b", re.IGNORECASE)
|
|
1178
|
-
# Prompt placeholders that the model echoes back verbatim when it can't
|
|
1179
|
-
# find real conference metadata on the poster.
|
|
1180
|
-
_PLACEHOLDER_VALS = {
|
|
1181
|
-
"name of conference",
|
|
1182
|
-
"conference name",
|
|
1183
|
-
"city, country",
|
|
1184
|
-
"location",
|
|
1185
|
-
"conference organizer or institution name",
|
|
1186
|
-
"conference or institution",
|
|
1187
|
-
}
|
|
1188
|
-
_PLACEHOLDER_DATE_RE = re.compile(r"^[Yy]{4}-[Mm]{2}-[Dd]{2}$")
|
|
1189
|
-
|
|
1190
|
-
def _is_placeholder(val: str) -> bool:
|
|
1191
|
-
s = val.strip()
|
|
1192
|
-
return (
|
|
1193
|
-
not s
|
|
1194
|
-
or _UNKNOWN_RE.match(s)
|
|
1195
|
-
or s.lower() in _PLACEHOLDER_VALS
|
|
1196
|
-
or bool(_PLACEHOLDER_DATE_RE.match(s))
|
|
1197
|
-
)
|
|
1198
|
-
|
|
1199
|
-
if "conference" in result and isinstance(result["conference"], dict):
|
|
1200
|
-
for key in list(result["conference"]):
|
|
1201
|
-
val = result["conference"][key]
|
|
1202
|
-
if isinstance(val, str) and _is_placeholder(val):
|
|
1203
|
-
del result["conference"][key]
|
|
1204
|
-
# Top-level optional string fields
|
|
1205
|
-
for key in ("conferenceLocation", "publisher", "researchField"):
|
|
1206
|
-
val = result.get(key)
|
|
1207
|
-
if isinstance(val, str) and _is_placeholder(val):
|
|
1208
|
-
del result[key]
|
|
1209
|
-
elif isinstance(val, dict) and "name" in val and isinstance(val["name"], str) and _is_placeholder(val["name"]):
|
|
1210
|
-
del result[key]
|
|
1211
|
-
|
|
1212
1192
|
return result
|
|
1213
1193
|
|
|
1214
1194
|
|
|
@@ -1286,23 +1266,22 @@ def extract_json_with_retry(raw_text: str, model, tokenizer) -> dict:
|
|
|
1286
1266
|
return result
|
|
1287
1267
|
|
|
1288
1268
|
|
|
1289
|
-
def extract_poster(
|
|
1269
|
+
def extract_poster(
|
|
1270
|
+
poster_path: str,
|
|
1271
|
+
model_id: Optional[str] = None,
|
|
1272
|
+
quantization: Optional[str] = None,
|
|
1273
|
+
) -> dict:
|
|
1290
1274
|
"""
|
|
1291
1275
|
Extract structured JSON metadata from a scientific poster.
|
|
1292
1276
|
|
|
1293
|
-
This is the main entry point for poster extraction.
|
|
1294
|
-
|
|
1295
1277
|
Args:
|
|
1296
|
-
poster_path: Path to the poster file (PDF, JPG, or PNG)
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
>>> result = extract_poster("poster.pdf")
|
|
1304
|
-
>>> print(result["titles"][0]["title"])
|
|
1305
|
-
"Machine Learning Approaches to Diabetic Retinopathy Detection"
|
|
1278
|
+
poster_path: Path to the poster file (PDF, JPG, or PNG).
|
|
1279
|
+
model_id: Override the default JSON structuring model. Accepts any
|
|
1280
|
+
HuggingFace repo id (e.g. google/gemma-2-9b-it,
|
|
1281
|
+
Qwen/Qwen2.5-7B-Instruct) in addition to the default fine-tuned
|
|
1282
|
+
Llama.
|
|
1283
|
+
quantization: Precision mode: "fp16", "8bit", or "4bit".
|
|
1284
|
+
Defaults to "4bit" (NF4) when unset.
|
|
1306
1285
|
"""
|
|
1307
1286
|
log(f"Processing poster: {poster_path}")
|
|
1308
1287
|
|
|
@@ -1317,12 +1296,13 @@ def extract_poster(poster_path: str) -> dict:
|
|
|
1317
1296
|
log(f"Extracted {len(raw_text)} chars using {source} in {t_extract_elapsed:.2f}s")
|
|
1318
1297
|
|
|
1319
1298
|
# Unload vision model before loading JSON model
|
|
1320
|
-
ext = Path(poster_path).suffix.lower()
|
|
1321
|
-
is_image_poster = ext in [".jpg", ".jpeg", ".png"]
|
|
1322
1299
|
unload_vision_model()
|
|
1323
1300
|
|
|
1324
1301
|
# Load JSON model
|
|
1325
|
-
model, tokenizer = load_json_model(
|
|
1302
|
+
model, tokenizer = load_json_model(
|
|
1303
|
+
model_id=model_id,
|
|
1304
|
+
quantization=quantization,
|
|
1305
|
+
)
|
|
1326
1306
|
|
|
1327
1307
|
try:
|
|
1328
1308
|
t_json_start = time.time()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|