scholarcli 1.21__tar.gz → 1.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {scholarcli-1.21 → scholarcli-1.23}/PKG-INFO +2 -2
  2. {scholarcli-1.21 → scholarcli-1.23}/README.md +1 -1
  3. {scholarcli-1.21 → scholarcli-1.23}/pyproject.toml +1 -1
  4. {scholarcli-1.21 → scholarcli-1.23}/src/scholar/cli.py +8 -5
  5. {scholarcli-1.21 → scholarcli-1.23}/src/scholar/llm_review.py +15 -8
  6. {scholarcli-1.21 → scholarcli-1.23}/src/scholarcli.egg-info/PKG-INFO +2 -2
  7. {scholarcli-1.21 → scholarcli-1.23}/tests/test_llm_review.py +34 -0
  8. {scholarcli-1.21 → scholarcli-1.23}/LICENSE +0 -0
  9. {scholarcli-1.21 → scholarcli-1.23}/setup.cfg +0 -0
  10. {scholarcli-1.21 → scholarcli-1.23}/src/scholar/__init__.py +0 -0
  11. {scholarcli-1.21 → scholarcli-1.23}/src/scholar/__main__.py +0 -0
  12. {scholarcli-1.21 → scholarcli-1.23}/src/scholar/cache.py +0 -0
  13. {scholarcli-1.21 → scholarcli-1.23}/src/scholar/crossref.py +0 -0
  14. {scholarcli-1.21 → scholarcli-1.23}/src/scholar/enrich.py +0 -0
  15. {scholarcli-1.21 → scholarcli-1.23}/src/scholar/notes.py +0 -0
  16. {scholarcli-1.21 → scholarcli-1.23}/src/scholar/pdf.py +0 -0
  17. {scholarcli-1.21 → scholarcli-1.23}/src/scholar/providers.py +0 -0
  18. {scholarcli-1.21 → scholarcli-1.23}/src/scholar/questionary.py +0 -0
  19. {scholarcli-1.21 → scholarcli-1.23}/src/scholar/review.py +0 -0
  20. {scholarcli-1.21 → scholarcli-1.23}/src/scholar/scholar.py +0 -0
  21. {scholarcli-1.21 → scholarcli-1.23}/src/scholar/tui.py +0 -0
  22. {scholarcli-1.21 → scholarcli-1.23}/src/scholar/utils.py +0 -0
  23. {scholarcli-1.21 → scholarcli-1.23}/src/scholarcli.egg-info/SOURCES.txt +0 -0
  24. {scholarcli-1.21 → scholarcli-1.23}/src/scholarcli.egg-info/dependency_links.txt +0 -0
  25. {scholarcli-1.21 → scholarcli-1.23}/src/scholarcli.egg-info/entry_points.txt +0 -0
  26. {scholarcli-1.21 → scholarcli-1.23}/src/scholarcli.egg-info/requires.txt +0 -0
  27. {scholarcli-1.21 → scholarcli-1.23}/src/scholarcli.egg-info/top_level.txt +0 -0
  28. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/__init__.py +0 -0
  29. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/__init__.py +0 -0
  30. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/aggregator.py +0 -0
  31. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/arxiv.py +0 -0
  32. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/base.py +0 -0
  33. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/crossref.py +0 -0
  34. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/google_scholar.py +0 -0
  35. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/openalex.py +0 -0
  36. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/opencitations.py +0 -0
  37. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/apis/semantic_scholar.py +0 -0
  38. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/cli.py +0 -0
  39. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/exporters/__init__.py +0 -0
  40. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/exporters/bibtex.py +0 -0
  41. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/exporters/csv_exporter.py +0 -0
  42. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/exporters/tikz.py +0 -0
  43. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/filters/__init__.py +0 -0
  44. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/filters/filter_engine.py +0 -0
  45. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/models.py +0 -0
  46. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/paper_utils.py +0 -0
  47. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/parsers/__init__.py +0 -0
  48. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/parsers/pdf_parser.py +0 -0
  49. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/scoring/__init__.py +0 -0
  50. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/scoring/base.py +0 -0
  51. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/scoring/llm_scorer.py +0 -0
  52. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/scoring/tfidf_scorer.py +0 -0
  53. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/services.py +0 -0
  54. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/snowballing.py +0 -0
  55. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/storage/__init__.py +0 -0
  56. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/storage/json_storage.py +0 -0
  57. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/tui/__init__.py +0 -0
  58. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/tui/app.py +0 -0
  59. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/tui/dialogs.py +0 -0
  60. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/tui/setup.py +0 -0
  61. {scholarcli-1.21 → scholarcli-1.23}/src/snowball/visualization.py +0 -0
  62. {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/__init__.py +0 -0
  63. {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/analysis.py +0 -0
  64. {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/cli.py +0 -0
  65. {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/clustering.py +0 -0
  66. {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/database.py +0 -0
  67. {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/grobid.py +0 -0
  68. {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/logging.py +0 -0
  69. {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/models.py +0 -0
  70. {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/project.py +0 -0
  71. {scholarcli-1.21 → scholarcli-1.23}/src/tuxedo/tui.py +0 -0
  72. {scholarcli-1.21 → scholarcli-1.23}/tests/test_cache.py +0 -0
  73. {scholarcli-1.21 → scholarcli-1.23}/tests/test_cli.py +0 -0
  74. {scholarcli-1.21 → scholarcli-1.23}/tests/test_crossref.py +0 -0
  75. {scholarcli-1.21 → scholarcli-1.23}/tests/test_enrich.py +0 -0
  76. {scholarcli-1.21 → scholarcli-1.23}/tests/test_notes.py +0 -0
  77. {scholarcli-1.21 → scholarcli-1.23}/tests/test_pdf.py +0 -0
  78. {scholarcli-1.21 → scholarcli-1.23}/tests/test_providers.py +0 -0
  79. {scholarcli-1.21 → scholarcli-1.23}/tests/test_review.py +0 -0
  80. {scholarcli-1.21 → scholarcli-1.23}/tests/test_scholar.py +0 -0
  81. {scholarcli-1.21 → scholarcli-1.23}/tests/test_tui.py +0 -0
  82. {scholarcli-1.21 → scholarcli-1.23}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scholarcli
3
- Version: 1.21
3
+ Version: 1.23
4
4
  Summary: A tool for structured literature searches across bibliographic databases
5
5
  Author-email: Daniel Bosk <dbosk@kth.se>, Ric Glassey <glassey@kth.se>
6
6
  License-Expression: MIT
@@ -303,7 +303,7 @@ scholar llm classify "session-name" --count 10
303
303
 
304
304
  ### How It Works
305
305
 
306
- 1. **Tag some papers manually** - The LLM needs examples to learn from. Review at least 5 papers with tags (themes for kept, motivations for discarded).
306
+ 1. **Tag some papers manually** (recommended) - Examples help the LLM learn your criteria. Tagging ~5 papers (themes for kept, motivations for discarded) improves quality, but classification will still run — with a warning — if you have fewer or none.
307
307
 
308
308
  2. **Set research context** (optional) - Describe your review's focus to help the LLM understand relevance criteria.
309
309
 
@@ -263,7 +263,7 @@ scholar llm classify "session-name" --count 10
263
263
 
264
264
  ### How It Works
265
265
 
266
- 1. **Tag some papers manually** - The LLM needs examples to learn from. Review at least 5 papers with tags (themes for kept, motivations for discarded).
266
+ 1. **Tag some papers manually** (recommended) - Examples help the LLM learn your criteria. Tagging ~5 papers (themes for kept, motivations for discarded) improves quality, but classification will still run — with a warning — if you have fewer or none.
267
267
 
268
268
  2. **Set research context** (optional) - Describe your review's focus to help the LLM understand relevance criteria.
269
269
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "scholarcli"
3
- version = "1.21"
3
+ version = "1.23"
4
4
  description = "A tool for structured literature searches across bibliographic databases"
5
5
  authors = [{ name = "Daniel Bosk", email = "dbosk@kth.se" },
6
6
  { name = "Ric Glassey", email = "glassey@kth.se" }]
@@ -3450,7 +3450,8 @@ def llm_classify(
3450
3450
  bool,
3451
3451
  typer.Option(
3452
3452
  "--no-examples",
3453
- help="Run without requiring tagged examples (zero-shot).",
3453
+ help="Skip examples entirely for zero-shot classification, "
3454
+ "even if some exist.",
3454
3455
  ),
3455
3456
  ] = False,
3456
3457
  full_text: Annotated[
@@ -3465,10 +3466,12 @@ def llm_classify(
3465
3466
  """
3466
3467
  Classify pending papers using LLM.
3467
3468
 
3468
- Uses human-reviewed papers as training examples. Requires at least
3469
- 5 tagged examples (minimum 1 kept, 1 discarded) unless --no-examples
3470
- is given, which runs zero-shot classification using only the research
3471
- context.
3469
+ Uses human-reviewed papers as training examples when available. Tagged
3470
+ examples are recommended (ideally 5+, with at least 1 kept and 1
3471
+ discarded) but no longer required: if too few exist, a warning is shown
3472
+ and classification proceeds with whatever examples are present. Pass
3473
+ --no-examples to skip examples entirely and run zero-shot classification
3474
+ using only the research context, even when examples are available.
3472
3475
 
3473
3476
  With --full-text, supporting passages from each paper's PDF (matched
3474
3477
  against the research context) are added to the prompt, so classification
@@ -537,7 +537,8 @@ def classify_papers_with_llm(
537
537
 
538
538
  This is the main entry point for LLM-assisted classification. It:
539
539
  1. Optionally gathers training examples from human-reviewed papers
540
- 2. Optionally validates minimum example requirements
540
+ 2. Warns (but does not fail) if examples fall below the recommended
541
+ minimum, then proceeds with whatever examples exist
541
542
  3. Optionally enriches papers lacking abstracts
542
543
  4. Optionally extracts full-text supporting passages
543
544
  5. Constructs a prompt with examples and papers to classify
@@ -550,9 +551,11 @@ def classify_papers_with_llm(
550
551
  model_id: Deprecated compatibility alias for selecting the analytic model
551
552
  enrich_missing: Whether to auto-enrich papers without abstracts
552
553
  dry_run: If True, return the prompt without calling LLM
553
- require_examples: If True (default), require tagged example papers
554
- before classification. Set False for zero-shot classification
555
- using only the research context.
554
+ require_examples: If True (default), gather tagged example papers
555
+ and use them to ground the LLM. Insufficient or missing examples
556
+ produce a warning, not an error. Set False to skip example
557
+ gathering entirely for zero-shot classification using only the
558
+ research context.
556
559
  use_fulltext: If True, extract supporting passages from each paper's
557
560
  full text (matched against the research context) and include them
558
561
  in the prompt, so classification can use evidence beyond the
@@ -563,18 +566,22 @@ def classify_papers_with_llm(
563
566
  LLMBatchResult with decisions, or prompt string if dry_run=True
564
567
 
565
568
  Raises:
566
- ValueError: If there are no papers to classify, or if
567
- require_examples=True and there are insufficient examples
569
+ ValueError: If there are no pending papers to classify
568
570
  ImportError: If llm package is not installed
569
571
  """
570
572
  # Gather examples (optional)
571
573
  if require_examples:
572
574
  kept_examples, discarded_examples = get_example_decisions(session)
573
575
 
574
- # Validate
576
+ # Below-threshold examples are a quality concern, not a hard stop:
577
+ # warn and continue with whatever was gathered (possibly none) so
578
+ # classification still runs early in a review.
575
579
  is_valid, error = validate_examples(kept_examples, discarded_examples)
576
580
  if not is_valid:
577
- raise ValueError(error)
581
+ logger.warning(
582
+ f"{error} Proceeding with the available examples; "
583
+ "classification quality may be reduced."
584
+ )
578
585
  else:
579
586
  kept_examples = []
580
587
  discarded_examples = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scholarcli
3
- Version: 1.21
3
+ Version: 1.23
4
4
  Summary: A tool for structured literature searches across bibliographic databases
5
5
  Author-email: Daniel Bosk <dbosk@kth.se>, Ric Glassey <glassey@kth.se>
6
6
  License-Expression: MIT
@@ -303,7 +303,7 @@ scholar llm classify "session-name" --count 10
303
303
 
304
304
  ### How It Works
305
305
 
306
- 1. **Tag some papers manually** - The LLM needs examples to learn from. Review at least 5 papers with tags (themes for kept, motivations for discarded).
306
+ 1. **Tag some papers manually** (recommended) - Examples help the LLM learn your criteria. Tagging ~5 papers (themes for kept, motivations for discarded) improves quality, but classification will still run — with a warning — if you have fewer or none.
307
307
 
308
308
  2. **Set research context** (optional) - Describe your review's focus to help the LLM understand relevance criteria.
309
309
 
@@ -1,5 +1,6 @@
1
1
  """Tests for the LLM review module."""
2
2
  import json
3
+ import logging
3
4
  import pytest
4
5
  from datetime import datetime
5
6
  from unittest.mock import Mock, patch, MagicMock
@@ -380,6 +381,39 @@ class TestZeroShotClassification:
380
381
 
381
382
  assert "Research Context" in prompt
382
383
  assert "Papers to Classify" in prompt
384
+ class TestExamplesOptional:
385
+ """Default classification tolerates missing/insufficient examples."""
386
+
387
+ def test_classify_insufficient_examples_warns_not_raises(self, caplog):
388
+ """Below-threshold examples warn and still build a prompt."""
389
+ session = ReviewSession(
390
+ query="test",
391
+ providers=["test"],
392
+ timestamp=datetime.now(),
393
+ research_context="I am studying X.",
394
+ )
395
+ session.decisions.append(
396
+ ReviewDecision(
397
+ paper=Paper(
398
+ title="Pending",
399
+ authors=["A"],
400
+ year=2024,
401
+ abstract="Abstract.",
402
+ ),
403
+ provider="test",
404
+ status=DecisionStatus.PENDING,
405
+ )
406
+ )
407
+
408
+ with caplog.at_level(logging.WARNING):
409
+ prompt = classify_papers_with_llm(
410
+ session=session,
411
+ count=1,
412
+ dry_run=True,
413
+ )
414
+
415
+ assert "Papers to Classify" in prompt
416
+ assert "Proceeding with the available examples" in caplog.text
383
417
  class TestLLMInteraction:
384
418
  """Tests for LLM interaction functions."""
385
419
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes