scholarcli 1.15__tar.gz → 1.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. {scholarcli-1.15/src/scholarcli.egg-info → scholarcli-1.20}/PKG-INFO +1 -1
  2. {scholarcli-1.15 → scholarcli-1.20}/pyproject.toml +1 -1
  3. {scholarcli-1.15 → scholarcli-1.20}/src/scholar/cli.py +37 -4
  4. {scholarcli-1.15 → scholarcli-1.20}/src/scholar/providers.py +253 -0
  5. {scholarcli-1.15 → scholarcli-1.20}/src/scholar/tui.py +13 -1
  6. {scholarcli-1.15 → scholarcli-1.20/src/scholarcli.egg-info}/PKG-INFO +1 -1
  7. {scholarcli-1.15 → scholarcli-1.20}/src/scholarcli.egg-info/SOURCES.txt +3 -0
  8. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/apis/aggregator.py +1 -1
  9. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/apis/google_scholar.py +6 -3
  10. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/cli.py +100 -73
  11. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/parsers/pdf_parser.py +223 -5
  12. scholarcli-1.20/src/snowball/services.py +337 -0
  13. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/snowballing.py +32 -5
  14. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/storage/json_storage.py +9 -2
  15. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/tui/app.py +122 -284
  16. scholarcli-1.20/src/snowball/tui/dialogs.py +212 -0
  17. scholarcli-1.20/src/snowball/tui/setup.py +325 -0
  18. {scholarcli-1.15 → scholarcli-1.20}/tests/test_cli.py +70 -0
  19. {scholarcli-1.15 → scholarcli-1.20}/tests/test_providers.py +302 -0
  20. scholarcli-1.20/tests/test_tui.py +31 -0
  21. scholarcli-1.15/tests/test_tui.py +0 -11
  22. {scholarcli-1.15 → scholarcli-1.20}/LICENSE +0 -0
  23. {scholarcli-1.15 → scholarcli-1.20}/README.md +0 -0
  24. {scholarcli-1.15 → scholarcli-1.20}/setup.cfg +0 -0
  25. {scholarcli-1.15 → scholarcli-1.20}/src/scholar/__init__.py +0 -0
  26. {scholarcli-1.15 → scholarcli-1.20}/src/scholar/__main__.py +0 -0
  27. {scholarcli-1.15 → scholarcli-1.20}/src/scholar/cache.py +0 -0
  28. {scholarcli-1.15 → scholarcli-1.20}/src/scholar/enrich.py +0 -0
  29. {scholarcli-1.15 → scholarcli-1.20}/src/scholar/llm_review.py +0 -0
  30. {scholarcli-1.15 → scholarcli-1.20}/src/scholar/notes.py +0 -0
  31. {scholarcli-1.15 → scholarcli-1.20}/src/scholar/pdf.py +0 -0
  32. {scholarcli-1.15 → scholarcli-1.20}/src/scholar/questionary.py +0 -0
  33. {scholarcli-1.15 → scholarcli-1.20}/src/scholar/review.py +0 -0
  34. {scholarcli-1.15 → scholarcli-1.20}/src/scholar/scholar.py +0 -0
  35. {scholarcli-1.15 → scholarcli-1.20}/src/scholar/utils.py +0 -0
  36. {scholarcli-1.15 → scholarcli-1.20}/src/scholarcli.egg-info/dependency_links.txt +0 -0
  37. {scholarcli-1.15 → scholarcli-1.20}/src/scholarcli.egg-info/entry_points.txt +0 -0
  38. {scholarcli-1.15 → scholarcli-1.20}/src/scholarcli.egg-info/requires.txt +0 -0
  39. {scholarcli-1.15 → scholarcli-1.20}/src/scholarcli.egg-info/top_level.txt +0 -0
  40. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/__init__.py +0 -0
  41. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/apis/__init__.py +0 -0
  42. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/apis/arxiv.py +0 -0
  43. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/apis/base.py +0 -0
  44. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/apis/crossref.py +0 -0
  45. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/apis/openalex.py +0 -0
  46. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/apis/opencitations.py +0 -0
  47. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/apis/semantic_scholar.py +0 -0
  48. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/exporters/__init__.py +0 -0
  49. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/exporters/bibtex.py +0 -0
  50. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/exporters/csv_exporter.py +0 -0
  51. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/exporters/tikz.py +0 -0
  52. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/filters/__init__.py +0 -0
  53. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/filters/filter_engine.py +0 -0
  54. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/models.py +0 -0
  55. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/paper_utils.py +0 -0
  56. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/parsers/__init__.py +0 -0
  57. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/scoring/__init__.py +0 -0
  58. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/scoring/base.py +0 -0
  59. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/scoring/llm_scorer.py +0 -0
  60. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/scoring/tfidf_scorer.py +0 -0
  61. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/storage/__init__.py +0 -0
  62. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/tui/__init__.py +0 -0
  63. {scholarcli-1.15 → scholarcli-1.20}/src/snowball/visualization.py +0 -0
  64. {scholarcli-1.15 → scholarcli-1.20}/src/tuxedo/__init__.py +0 -0
  65. {scholarcli-1.15 → scholarcli-1.20}/src/tuxedo/analysis.py +0 -0
  66. {scholarcli-1.15 → scholarcli-1.20}/src/tuxedo/cli.py +0 -0
  67. {scholarcli-1.15 → scholarcli-1.20}/src/tuxedo/clustering.py +0 -0
  68. {scholarcli-1.15 → scholarcli-1.20}/src/tuxedo/database.py +0 -0
  69. {scholarcli-1.15 → scholarcli-1.20}/src/tuxedo/grobid.py +0 -0
  70. {scholarcli-1.15 → scholarcli-1.20}/src/tuxedo/logging.py +0 -0
  71. {scholarcli-1.15 → scholarcli-1.20}/src/tuxedo/models.py +0 -0
  72. {scholarcli-1.15 → scholarcli-1.20}/src/tuxedo/project.py +0 -0
  73. {scholarcli-1.15 → scholarcli-1.20}/src/tuxedo/tui.py +0 -0
  74. {scholarcli-1.15 → scholarcli-1.20}/tests/test_cache.py +0 -0
  75. {scholarcli-1.15 → scholarcli-1.20}/tests/test_enrich.py +0 -0
  76. {scholarcli-1.15 → scholarcli-1.20}/tests/test_llm_review.py +0 -0
  77. {scholarcli-1.15 → scholarcli-1.20}/tests/test_notes.py +0 -0
  78. {scholarcli-1.15 → scholarcli-1.20}/tests/test_pdf.py +0 -0
  79. {scholarcli-1.15 → scholarcli-1.20}/tests/test_review.py +0 -0
  80. {scholarcli-1.15 → scholarcli-1.20}/tests/test_scholar.py +0 -0
  81. {scholarcli-1.15 → scholarcli-1.20}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scholarcli
3
- Version: 1.15
3
+ Version: 1.20
4
4
  Summary: A tool for structured literature searches across bibliographic databases
5
5
  Author-email: Daniel Bosk <dbosk@kth.se>, Ric Glassey <glassey@kth.se>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "scholarcli"
3
- version = "1.15"
3
+ version = "1.20"
4
4
  description = "A tool for structured literature searches across bibliographic databases"
5
5
  authors = [{ name = "Daniel Bosk", email = "dbosk@kth.se" },
6
6
  { name = "Ric Glassey", email = "glassey@kth.se" }]
@@ -1083,6 +1083,11 @@ def providers() -> None:
1083
1083
  "env_var": "IEEE_API_KEY",
1084
1084
  "how_to_get": "developer.ieee.org",
1085
1085
  },
1086
+ "scopus": {
1087
+ "required": True,
1088
+ "env_var": "SCOPUS_API_KEY",
1089
+ "how_to_get": "dev.elsevier.com",
1090
+ },
1086
1091
  }
1087
1092
 
1088
1093
  for provider in get_all_providers():
@@ -1181,6 +1186,14 @@ def syntax() -> None:
1181
1186
  "[green]✓[/]",
1182
1187
  "Must be UPPERCASE, supports ONEAR",
1183
1188
  )
1189
+ table.add_row(
1190
+ "scopus",
1191
+ "[green]✓[/]",
1192
+ "[green]✓[/]",
1193
+ "[green]✓[/]",
1194
+ "[green]✓[/]",
1195
+ "Must be UPPERCASE, supports W/n PRE/n proximity",
1196
+ )
1184
1197
  table.add_row(
1185
1198
  "arxiv",
1186
1199
  "[green]✓[/]",
@@ -1236,6 +1249,13 @@ def syntax() -> None:
1236
1249
  "[green]✓[/]",
1237
1250
  "Max 5 wildcards per search",
1238
1251
  )
1252
+ table2.add_row(
1253
+ "scopus",
1254
+ "[green]✓[/] * ?",
1255
+ '[green]✓[/] "..."',
1256
+ "[green]✓[/] TITLE() AUTH()",
1257
+ "TITLE-ABS-KEY(), AUTH(), SRCTITLE()",
1258
+ )
1239
1259
  table2.add_row(
1240
1260
  "arxiv",
1241
1261
  "[red]✗[/]",
@@ -1255,6 +1275,7 @@ def syntax() -> None:
1255
1275
  ("dblp", "machine learning privacy [space = AND]"),
1256
1276
  ("wos", 'TS=("machine learning" AND privacy)'),
1257
1277
  ("ieee", '"machine learning" AND privacy NOT survey'),
1278
+ ("scopus", 'TITLE-ABS-KEY("machine learning" AND privacy)'),
1258
1279
  ("arxiv", 'ti:"machine learning" AND cat:cs.AI'),
1259
1280
  ]
1260
1281
  for provider, example in examples:
@@ -1284,6 +1305,7 @@ def syntax() -> None:
1284
1305
  "ieee",
1285
1306
  "https://ieeexplore.ieee.org/Xplorehelp/searching-ieee-xplore/command-search",
1286
1307
  ),
1308
+ ("scopus", "https://dev.elsevier.com/sc_search_tips.html"),
1287
1309
  (
1288
1310
  "arxiv",
1289
1311
  "https://info.arxiv.org/help/api/user-manual.html#query_details",
@@ -2038,7 +2060,7 @@ def sessions_show(
2038
2060
  con.print()
2039
2061
 
2040
2062
  # Show kept papers
2041
- kept = session.kept_papers
2063
+ kept = session.kept_papers()
2042
2064
  if kept:
2043
2065
  con.print(f"[green bold]Kept ({len(kept)}):[/green bold]")
2044
2066
  for d in kept:
@@ -2055,7 +2077,7 @@ def sessions_show(
2055
2077
  con.print()
2056
2078
 
2057
2079
  # Show discarded papers
2058
- discarded = session.discarded_papers
2080
+ discarded = session.discarded_papers()
2059
2081
  if discarded:
2060
2082
  con.print(
2061
2083
  f"[red bold]Discarded ({len(discarded)}):[/red bold]"
@@ -2074,7 +2096,7 @@ def sessions_show(
2074
2096
  con.print()
2075
2097
 
2076
2098
  # Show pending papers
2077
- pending = session.pending_papers
2099
+ pending = session.pending_papers()
2078
2100
  if pending:
2079
2101
  con.print(
2080
2102
  f"[yellow bold]Pending ({len(pending)}):[/yellow bold]"
@@ -2543,15 +2565,25 @@ def llm_classify(
2543
2565
  help="Skip automatic enrichment of papers without abstracts.",
2544
2566
  ),
2545
2567
  ] = False,
2568
+ no_examples: Annotated[
2569
+ bool,
2570
+ typer.Option(
2571
+ "--no-examples",
2572
+ help="Run without requiring tagged examples (zero-shot).",
2573
+ ),
2574
+ ] = False,
2546
2575
  ) -> None:
2547
2576
  """
2548
2577
  Classify pending papers using LLM.
2549
2578
 
2550
2579
  Uses human-reviewed papers as training examples. Requires at least
2551
- 5 tagged examples (minimum 1 kept, 1 discarded) before classification.
2580
+ 5 tagged examples (minimum 1 kept, 1 discarded) unless --no-examples
2581
+ is given, which runs zero-shot classification using only the research
2582
+ context.
2552
2583
 
2553
2584
  Example:
2554
2585
  scholar llm classify "my review" --count 20
2586
+ scholar llm classify "my review" --no-examples
2555
2587
  """
2556
2588
  import scholar.review as review
2557
2589
  from scholar.review import save_session
@@ -2582,6 +2614,7 @@ def llm_classify(
2582
2614
  model_id=select_model_id(model_selection, "analytic"),
2583
2615
  enrich_missing=not no_enrich,
2584
2616
  dry_run=dry_run,
2617
+ require_examples=not no_examples,
2585
2618
  )
2586
2619
 
2587
2620
  if dry_run:
@@ -43,6 +43,7 @@ WOS_STARTER_API_URL = (
43
43
  WOS_EXPANDED_API_URL = "https://wos-api.clarivate.com/api/wos"
44
44
  _WOS_NOT_PROVIDED = object() # Sentinel for "argument not passed"
45
45
  IEEE_API_URL = "https://ieeexploreapi.ieee.org/api/v1/search/articles"
46
+ SCOPUS_API_URL = "https://api.elsevier.com/content/search/scopus"
46
47
 
47
48
 
48
49
  class SearchProvider(Protocol):
@@ -1683,6 +1684,8 @@ class WebOfScienceProvider:
1683
1684
  "p",
1684
1685
  default=None,
1685
1686
  )
1687
+ if isinstance(abstract, list):
1688
+ abstract = "\n\n".join(str(p) for p in abstract if p) or None
1686
1689
 
1687
1690
  # Extract venue (source title)
1688
1691
  venue = None
@@ -2787,3 +2790,253 @@ class ArxivProvider:
2787
2790
 
2788
2791
  # Register the provider on module import
2789
2792
  register_provider(ArxivProvider())
2793
+
2794
+
2795
+ class ScopusProvider:
2796
+ """Search provider for Elsevier Scopus."""
2797
+
2798
+ name = "scopus"
2799
+ MAX_LIMIT = 25 # Scopus returns max 25 per page
2800
+
2801
+ def __init__(
2802
+ self,
2803
+ api_key: str | None = None,
2804
+ inst_token: str | None = None,
2805
+ ):
2806
+ """Initialize the Scopus provider.
2807
+
2808
+ Args:
2809
+ api_key: API key for Scopus API. Falls back
2810
+ to SCOPUS_API_KEY environment variable.
2811
+ inst_token: Institutional token for extended
2812
+ access. Falls back to SCOPUS_INST_TOKEN.
2813
+ """
2814
+ self.api_key = api_key or os.environ.get("SCOPUS_API_KEY")
2815
+ self.inst_token = inst_token or os.environ.get("SCOPUS_INST_TOKEN")
2816
+ self._cache: dict = load_cache(self.name)
2817
+ register_cache(self.name, self._cache)
2818
+
2819
+ def is_available(self) -> bool:
2820
+ """Scopus requires an API key."""
2821
+ return bool(self.api_key)
2822
+
2823
+ @cachedmethod(
2824
+ lambda self: self._cache,
2825
+ key=lambda self, query, limit=100, filters=None: (
2826
+ query,
2827
+ limit,
2828
+ filters.cache_key() if filters else "",
2829
+ ),
2830
+ )
2831
+ def search(
2832
+ self,
2833
+ query: str,
2834
+ limit: int = 100,
2835
+ filters: SearchFilters | None = None,
2836
+ ) -> list[Paper]:
2837
+ """Search Scopus for papers matching the query.
2838
+
2839
+ Fetches multiple pages when the requested limit
2840
+ exceeds the per-request maximum of 25 results.
2841
+ """
2842
+ if not self.api_key:
2843
+ return []
2844
+
2845
+ logger.debug(
2846
+ "scopus: Searching for '%s' with limit=%d",
2847
+ query,
2848
+ limit,
2849
+ )
2850
+
2851
+ search_query = query
2852
+ if filters:
2853
+ clauses = []
2854
+
2855
+ if filters.year:
2856
+ start_year, end_year = filters.year_range()
2857
+ if start_year and end_year:
2858
+ if start_year == end_year:
2859
+ clauses.append(f"PUBYEAR = {start_year}")
2860
+ else:
2861
+ clauses.append(
2862
+ f"PUBYEAR > {start_year - 1} "
2863
+ f"AND PUBYEAR < {end_year + 1}"
2864
+ )
2865
+ elif start_year:
2866
+ clauses.append(f"PUBYEAR > {start_year - 1}")
2867
+ elif end_year:
2868
+ clauses.append(f"PUBYEAR < {end_year + 1}")
2869
+
2870
+ if filters.open_access:
2871
+ clauses.append("OPENACCESS(1)")
2872
+
2873
+ if filters.venue:
2874
+ clauses.append(f"SRCTITLE({filters.venue})")
2875
+
2876
+ if filters.pub_types:
2877
+ type_mapping = {
2878
+ "article": "ar",
2879
+ "conference": "cp",
2880
+ "review": "re",
2881
+ "book": "bk",
2882
+ }
2883
+ doc_types = []
2884
+ for pt in filters.pub_types:
2885
+ mapped = type_mapping.get(pt.lower())
2886
+ if mapped:
2887
+ doc_types.append(mapped)
2888
+ else:
2889
+ logger.warning(
2890
+ "scopus: Publication type '%s' "
2891
+ "not supported, ignoring",
2892
+ pt,
2893
+ )
2894
+ if doc_types:
2895
+ dtype_clause = " OR ".join(
2896
+ f"DOCTYPE({dt})" for dt in doc_types
2897
+ )
2898
+ clauses.append(f"({dtype_clause})")
2899
+
2900
+ if filters.min_citations is not None:
2901
+ logger.warning(
2902
+ "scopus: Citation count filter " "not supported, ignoring"
2903
+ )
2904
+
2905
+ if clauses:
2906
+ search_query = f"({query}) AND " + " AND ".join(clauses)
2907
+
2908
+ all_papers: list[Paper] = []
2909
+ start = 0
2910
+ page_size = self.MAX_LIMIT
2911
+
2912
+ try:
2913
+ while len(all_papers) < limit:
2914
+ remaining = limit - len(all_papers)
2915
+ current_count = min(page_size, remaining)
2916
+
2917
+ headers = {
2918
+ "X-ELS-APIKey": self.api_key,
2919
+ "Accept": "application/json",
2920
+ }
2921
+ if self.inst_token:
2922
+ headers["X-ELS-Insttoken"] = self.inst_token
2923
+
2924
+ params = {
2925
+ "query": search_query,
2926
+ "start": start,
2927
+ "count": current_count,
2928
+ }
2929
+
2930
+ response = requests.get(
2931
+ SCOPUS_API_URL,
2932
+ headers=headers,
2933
+ params=params,
2934
+ timeout=30,
2935
+ )
2936
+ if response.status_code != 200:
2937
+ if response.status_code == 401:
2938
+ logger.warning(
2939
+ "scopus: Authentication failed (HTTP 401). "
2940
+ "Check your SCOPUS_API_KEY at "
2941
+ "https://dev.elsevier.com/"
2942
+ )
2943
+ elif response.status_code == 429:
2944
+ logger.warning(
2945
+ "scopus: Rate limited (HTTP 429). "
2946
+ "Wait before making more requests."
2947
+ )
2948
+ elif response.status_code == 403:
2949
+ logger.warning(
2950
+ "scopus: Access denied (HTTP 403). "
2951
+ "Your API key may lack Scopus Search "
2952
+ "permissions."
2953
+ )
2954
+ else:
2955
+ logger.warning(
2956
+ "scopus: API error (HTTP %d): %s",
2957
+ response.status_code,
2958
+ response.text[:200],
2959
+ )
2960
+ response.raise_for_status()
2961
+ data = response.json()
2962
+
2963
+ results = data.get("search-results", {})
2964
+ entries = results.get("entry", [])
2965
+
2966
+ if not entries or (
2967
+ len(entries) == 1 and entries[0].get("@_fa") == "false"
2968
+ ):
2969
+ break
2970
+
2971
+ all_papers.extend(
2972
+ self._convert_entry(entry)
2973
+ for entry in entries
2974
+ if entry.get("@_fa") != "false"
2975
+ )
2976
+ start += len(entries)
2977
+
2978
+ total = int(results.get("opensearch:totalResults", 0))
2979
+ if start >= total:
2980
+ break
2981
+
2982
+ logger.debug(
2983
+ "scopus: Retrieved %d papers",
2984
+ len(all_papers),
2985
+ )
2986
+ return all_papers
2987
+ except requests.exceptions.HTTPError:
2988
+ return all_papers
2989
+ except Exception as e:
2990
+ logger.warning("scopus: %s", e)
2991
+ return all_papers
2992
+
2993
+ def _convert_entry(self, entry: dict) -> Paper:
2994
+ """Convert a Scopus search entry to a Paper."""
2995
+ year = None
2996
+ cover_date = entry.get("prism:coverDate")
2997
+ if cover_date:
2998
+ try:
2999
+ year = int(cover_date[:4])
3000
+ except (ValueError, TypeError):
3001
+ pass
3002
+
3003
+ authors = []
3004
+ creator = entry.get("dc:creator")
3005
+ if creator:
3006
+ authors.append(creator)
3007
+
3008
+ url = None
3009
+ for link in entry.get("link", []):
3010
+ if link.get("@ref") == "scopus":
3011
+ url = link.get("@href")
3012
+ break
3013
+
3014
+ keywords = None
3015
+ auth_kw = entry.get("authkeywords")
3016
+ if auth_kw and isinstance(auth_kw, str):
3017
+ keywords = [kw.strip() for kw in auth_kw.split("|") if kw.strip()]
3018
+
3019
+ citation_count = None
3020
+ cited_by = entry.get("citedby-count")
3021
+ if cited_by is not None:
3022
+ try:
3023
+ citation_count = int(cited_by)
3024
+ except (ValueError, TypeError):
3025
+ pass
3026
+
3027
+ return Paper(
3028
+ title=entry.get("dc:title", "") or "",
3029
+ authors=authors,
3030
+ year=year,
3031
+ doi=entry.get("prism:doi"),
3032
+ abstract=entry.get("dc:description"),
3033
+ venue=entry.get("prism:publicationName"),
3034
+ url=url,
3035
+ citation_count=citation_count,
3036
+ keywords=keywords,
3037
+ sources=[self.name],
3038
+ )
3039
+
3040
+
3041
+ # Register the provider on module import
3042
+ register_provider(ScopusProvider())
@@ -153,6 +153,18 @@ class PaperListItem(ListItem):
153
153
  )
154
154
 
155
155
 
156
+ def normalize_abstract(abstract) -> str:
157
+ """Return abstract as a plain string, joining list paragraphs if needed.
158
+
159
+ Some providers return the abstract as a list of paragraph strings.
160
+ This helper joins them with blank lines so paragraph breaks are
161
+ preserved in the TUI display.
162
+ """
163
+ if isinstance(abstract, list):
164
+ return "\n\n".join(str(p) for p in abstract if p)
165
+ return abstract or ""
166
+
167
+
156
168
  class AbstractScreen(Screen[None]):
157
169
  """Full-screen view of a paper's abstract and details."""
158
170
 
@@ -217,7 +229,7 @@ class AbstractScreen(Screen[None]):
217
229
  yield Static("")
218
230
  yield Static("[bold]Abstract:[/bold]")
219
231
  if paper.abstract:
220
- yield Static(escape(paper.abstract))
232
+ yield Static(escape(normalize_abstract(paper.abstract)))
221
233
  else:
222
234
  if paper.pdf_url:
223
235
  yield Static(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scholarcli
3
- Version: 1.15
3
+ Version: 1.20
4
4
  Summary: A tool for structured literature searches across bibliographic databases
5
5
  Author-email: Daniel Bosk <dbosk@kth.se>, Ric Glassey <glassey@kth.se>
6
6
  License-Expression: MIT
@@ -25,6 +25,7 @@ src/snowball/__init__.py
25
25
  src/snowball/cli.py
26
26
  src/snowball/models.py
27
27
  src/snowball/paper_utils.py
28
+ src/snowball/services.py
28
29
  src/snowball/snowballing.py
29
30
  src/snowball/visualization.py
30
31
  src/snowball/apis/__init__.py
@@ -52,6 +53,8 @@ src/snowball/storage/__init__.py
52
53
  src/snowball/storage/json_storage.py
53
54
  src/snowball/tui/__init__.py
54
55
  src/snowball/tui/app.py
56
+ src/snowball/tui/dialogs.py
57
+ src/snowball/tui/setup.py
55
58
  src/tuxedo/__init__.py
56
59
  src/tuxedo/analysis.py
57
60
  src/tuxedo/cli.py
@@ -206,7 +206,7 @@ class APIAggregator:
206
206
  if "google_scholar" in self.clients and paper.title:
207
207
  try:
208
208
  # Google Scholar returns dicts, convert to Paper objects
209
- gs_limit = min(limit, 50) # Limit GS to 50 due to rate limiting
209
+ gs_limit = min(limit, 20) # Keep Scholar fallback batches deliberately small.
210
210
  gs_citations = self.clients["google_scholar"].get_citations(
211
211
  paper.title, gs_limit
212
212
  )
@@ -6,6 +6,8 @@ from typing import Optional, Tuple, List
6
6
 
7
7
  logger = logging.getLogger(__name__)
8
8
 
9
+ DEFAULT_RATE_LIMIT_DELAY = 15.0
10
+
9
11
 
10
12
  class GoogleScholarClient:
11
13
  """Client for fetching citation counts from Google Scholar.
@@ -17,7 +19,7 @@ class GoogleScholarClient:
17
19
 
18
20
  def __init__(
19
21
  self,
20
- rate_limit_delay: float = 2.0,
22
+ rate_limit_delay: float = DEFAULT_RATE_LIMIT_DELAY,
21
23
  proxy: Optional[str] = None,
22
24
  use_free_proxy: bool = False,
23
25
  ):
@@ -25,7 +27,8 @@ class GoogleScholarClient:
25
27
 
26
28
  Args:
27
29
  rate_limit_delay: Delay between requests in seconds.
28
- Default is 2 seconds to avoid rate limiting.
30
+ Default is intentionally conservative because
31
+ Google Scholar is scraped, not an official API.
29
32
  proxy: HTTP/HTTPS proxy URL (e.g., "http://user:pass@host:port")
30
33
  use_free_proxy: Use free rotating proxies via free-proxy library
31
34
  """
@@ -186,7 +189,7 @@ class GoogleScholarClient:
186
189
 
187
190
  return similarity >= threshold
188
191
 
189
- def get_citations(self, title: str, limit: int = 50) -> List[dict]:
192
+ def get_citations(self, title: str, limit: int = 20) -> List[dict]:
190
193
  """Get papers that cite a given paper (forward citations).
191
194
 
192
195
  Args: