scholarcli 1.21__tar.gz → 1.23__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scholarcli-1.21 → scholarcli-1.23}/PKG-INFO +2 -2
- {scholarcli-1.21 → scholarcli-1.23}/README.md +1 -1
- {scholarcli-1.21 → scholarcli-1.23}/pyproject.toml +1 -1
- {scholarcli-1.21 → scholarcli-1.23}/src/scholar/cli.py +8 -5
- {scholarcli-1.21 → scholarcli-1.23}/src/scholar/llm_review.py +15 -8
- {scholarcli-1.21 → scholarcli-1.23}/src/scholarcli.egg-info/PKG-INFO +2 -2
- {scholarcli-1.21 → scholarcli-1.23}/tests/test_llm_review.py +34 -0
- {scholarcli-1.21 → scholarcli-1.23}/LICENSE +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/setup.cfg +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholar/__init__.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholar/__main__.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholar/cache.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholar/crossref.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholar/enrich.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholar/notes.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholar/pdf.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholar/providers.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholar/questionary.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholar/review.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholar/scholar.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholar/tui.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholar/utils.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholarcli.egg-info/SOURCES.txt +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholarcli.egg-info/dependency_links.txt +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholarcli.egg-info/entry_points.txt +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholarcli.egg-info/requires.txt +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/scholarcli.egg-info/top_level.txt +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/__init__.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/__init__.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/aggregator.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/arxiv.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/base.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/crossref.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/google_scholar.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/openalex.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/opencitations.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/semantic_scholar.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/cli.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/exporters/__init__.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/exporters/bibtex.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/exporters/csv_exporter.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/exporters/tikz.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/filters/__init__.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/filters/filter_engine.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/models.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/paper_utils.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/parsers/__init__.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/parsers/pdf_parser.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/scoring/__init__.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/scoring/base.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/scoring/llm_scorer.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/scoring/tfidf_scorer.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/services.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/snowballing.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/storage/__init__.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/storage/json_storage.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/tui/__init__.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/tui/app.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/tui/dialogs.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/tui/setup.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/snowball/visualization.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/__init__.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/analysis.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/cli.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/clustering.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/database.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/grobid.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/logging.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/models.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/project.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/tui.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/tests/test_cache.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/tests/test_cli.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/tests/test_crossref.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/tests/test_enrich.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/tests/test_notes.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/tests/test_pdf.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/tests/test_providers.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/tests/test_review.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/tests/test_scholar.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/tests/test_tui.py +0 -0
- {scholarcli-1.21 → scholarcli-1.23}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scholarcli
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.23
|
|
4
4
|
Summary: A tool for structured literature searches across bibliographic databases
|
|
5
5
|
Author-email: Daniel Bosk <dbosk@kth.se>, Ric Glassey <glassey@kth.se>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -303,7 +303,7 @@ scholar llm classify "session-name" --count 10
|
|
|
303
303
|
|
|
304
304
|
### How It Works
|
|
305
305
|
|
|
306
|
-
1. **Tag some papers manually** -
|
|
306
|
+
1. **Tag some papers manually** (recommended) - Examples help the LLM learn your criteria. Tagging ~5 papers (themes for kept, motivations for discarded) improves quality, but classification will still run — with a warning — if you have fewer or none.
|
|
307
307
|
|
|
308
308
|
2. **Set research context** (optional) - Describe your review's focus to help the LLM understand relevance criteria.
|
|
309
309
|
|
|
@@ -263,7 +263,7 @@ scholar llm classify "session-name" --count 10
|
|
|
263
263
|
|
|
264
264
|
### How It Works
|
|
265
265
|
|
|
266
|
-
1. **Tag some papers manually** -
|
|
266
|
+
1. **Tag some papers manually** (recommended) - Examples help the LLM learn your criteria. Tagging ~5 papers (themes for kept, motivations for discarded) improves quality, but classification will still run — with a warning — if you have fewer or none.
|
|
267
267
|
|
|
268
268
|
2. **Set research context** (optional) - Describe your review's focus to help the LLM understand relevance criteria.
|
|
269
269
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "scholarcli"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.23"
|
|
4
4
|
description = "A tool for structured literature searches across bibliographic databases"
|
|
5
5
|
authors = [{ name = "Daniel Bosk", email = "dbosk@kth.se" },
|
|
6
6
|
{ name = "Ric Glassey", email = "glassey@kth.se" }]
|
|
@@ -3450,7 +3450,8 @@ def llm_classify(
|
|
|
3450
3450
|
bool,
|
|
3451
3451
|
typer.Option(
|
|
3452
3452
|
"--no-examples",
|
|
3453
|
-
help="
|
|
3453
|
+
help="Skip examples entirely for zero-shot classification, "
|
|
3454
|
+
"even if some exist.",
|
|
3454
3455
|
),
|
|
3455
3456
|
] = False,
|
|
3456
3457
|
full_text: Annotated[
|
|
@@ -3465,10 +3466,12 @@ def llm_classify(
|
|
|
3465
3466
|
"""
|
|
3466
3467
|
Classify pending papers using LLM.
|
|
3467
3468
|
|
|
3468
|
-
Uses human-reviewed papers as training examples
|
|
3469
|
-
|
|
3470
|
-
|
|
3471
|
-
|
|
3469
|
+
Uses human-reviewed papers as training examples when available. Tagged
|
|
3470
|
+
examples are recommended (ideally 5+, with at least 1 kept and 1
|
|
3471
|
+
discarded) but no longer required: if too few exist, a warning is shown
|
|
3472
|
+
and classification proceeds with whatever examples are present. Pass
|
|
3473
|
+
--no-examples to skip examples entirely and run zero-shot classification
|
|
3474
|
+
using only the research context, even when examples are available.
|
|
3472
3475
|
|
|
3473
3476
|
With --full-text, supporting passages from each paper's PDF (matched
|
|
3474
3477
|
against the research context) are added to the prompt, so classification
|
|
@@ -537,7 +537,8 @@ def classify_papers_with_llm(
|
|
|
537
537
|
|
|
538
538
|
This is the main entry point for LLM-assisted classification. It:
|
|
539
539
|
1. Optionally gathers training examples from human-reviewed papers
|
|
540
|
-
2.
|
|
540
|
+
2. Warns (but does not fail) if examples fall below the recommended
|
|
541
|
+
minimum, then proceeds with whatever examples exist
|
|
541
542
|
3. Optionally enriches papers lacking abstracts
|
|
542
543
|
4. Optionally extracts full-text supporting passages
|
|
543
544
|
5. Constructs a prompt with examples and papers to classify
|
|
@@ -550,9 +551,11 @@ def classify_papers_with_llm(
|
|
|
550
551
|
model_id: Deprecated compatibility alias for selecting the analytic model
|
|
551
552
|
enrich_missing: Whether to auto-enrich papers without abstracts
|
|
552
553
|
dry_run: If True, return the prompt without calling LLM
|
|
553
|
-
require_examples: If True (default),
|
|
554
|
-
|
|
555
|
-
|
|
554
|
+
require_examples: If True (default), gather tagged example papers
|
|
555
|
+
and use them to ground the LLM. Insufficient or missing examples
|
|
556
|
+
produce a warning, not an error. Set False to skip example
|
|
557
|
+
gathering entirely for zero-shot classification using only the
|
|
558
|
+
research context.
|
|
556
559
|
use_fulltext: If True, extract supporting passages from each paper's
|
|
557
560
|
full text (matched against the research context) and include them
|
|
558
561
|
in the prompt, so classification can use evidence beyond the
|
|
@@ -563,18 +566,22 @@ def classify_papers_with_llm(
|
|
|
563
566
|
LLMBatchResult with decisions, or prompt string if dry_run=True
|
|
564
567
|
|
|
565
568
|
Raises:
|
|
566
|
-
ValueError: If there are no papers to classify
|
|
567
|
-
require_examples=True and there are insufficient examples
|
|
569
|
+
ValueError: If there are no pending papers to classify
|
|
568
570
|
ImportError: If llm package is not installed
|
|
569
571
|
"""
|
|
570
572
|
# Gather examples (optional)
|
|
571
573
|
if require_examples:
|
|
572
574
|
kept_examples, discarded_examples = get_example_decisions(session)
|
|
573
575
|
|
|
574
|
-
#
|
|
576
|
+
# Below-threshold examples are a quality concern, not a hard stop:
|
|
577
|
+
# warn and continue with whatever was gathered (possibly none) so
|
|
578
|
+
# classification still runs early in a review.
|
|
575
579
|
is_valid, error = validate_examples(kept_examples, discarded_examples)
|
|
576
580
|
if not is_valid:
|
|
577
|
-
|
|
581
|
+
logger.warning(
|
|
582
|
+
f"{error} Proceeding with the available examples; "
|
|
583
|
+
"classification quality may be reduced."
|
|
584
|
+
)
|
|
578
585
|
else:
|
|
579
586
|
kept_examples = []
|
|
580
587
|
discarded_examples = []
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scholarcli
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.23
|
|
4
4
|
Summary: A tool for structured literature searches across bibliographic databases
|
|
5
5
|
Author-email: Daniel Bosk <dbosk@kth.se>, Ric Glassey <glassey@kth.se>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -303,7 +303,7 @@ scholar llm classify "session-name" --count 10
|
|
|
303
303
|
|
|
304
304
|
### How It Works
|
|
305
305
|
|
|
306
|
-
1. **Tag some papers manually** -
|
|
306
|
+
1. **Tag some papers manually** (recommended) - Examples help the LLM learn your criteria. Tagging ~5 papers (themes for kept, motivations for discarded) improves quality, but classification will still run — with a warning — if you have fewer or none.
|
|
307
307
|
|
|
308
308
|
2. **Set research context** (optional) - Describe your review's focus to help the LLM understand relevance criteria.
|
|
309
309
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Tests for the LLM review module."""
|
|
2
2
|
import json
|
|
3
|
+
import logging
|
|
3
4
|
import pytest
|
|
4
5
|
from datetime import datetime
|
|
5
6
|
from unittest.mock import Mock, patch, MagicMock
|
|
@@ -380,6 +381,39 @@ class TestZeroShotClassification:
|
|
|
380
381
|
|
|
381
382
|
assert "Research Context" in prompt
|
|
382
383
|
assert "Papers to Classify" in prompt
|
|
384
|
+
class TestExamplesOptional:
|
|
385
|
+
"""Default classification tolerates missing/insufficient examples."""
|
|
386
|
+
|
|
387
|
+
def test_classify_insufficient_examples_warns_not_raises(self, caplog):
|
|
388
|
+
"""Below-threshold examples warn and still build a prompt."""
|
|
389
|
+
session = ReviewSession(
|
|
390
|
+
query="test",
|
|
391
|
+
providers=["test"],
|
|
392
|
+
timestamp=datetime.now(),
|
|
393
|
+
research_context="I am studying X.",
|
|
394
|
+
)
|
|
395
|
+
session.decisions.append(
|
|
396
|
+
ReviewDecision(
|
|
397
|
+
paper=Paper(
|
|
398
|
+
title="Pending",
|
|
399
|
+
authors=["A"],
|
|
400
|
+
year=2024,
|
|
401
|
+
abstract="Abstract.",
|
|
402
|
+
),
|
|
403
|
+
provider="test",
|
|
404
|
+
status=DecisionStatus.PENDING,
|
|
405
|
+
)
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
with caplog.at_level(logging.WARNING):
|
|
409
|
+
prompt = classify_papers_with_llm(
|
|
410
|
+
session=session,
|
|
411
|
+
count=1,
|
|
412
|
+
dry_run=True,
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
assert "Papers to Classify" in prompt
|
|
416
|
+
assert "Proceeding with the available examples" in caplog.text
|
|
383
417
|
class TestLLMInteraction:
|
|
384
418
|
"""Tests for LLM interaction functions."""
|
|
385
419
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|