flonat-research 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/domain-reviewer.md +336 -0
- package/.claude/agents/fixer.md +226 -0
- package/.claude/agents/paper-critic.md +370 -0
- package/.claude/agents/peer-reviewer.md +289 -0
- package/.claude/agents/proposal-reviewer.md +215 -0
- package/.claude/agents/referee2-reviewer.md +367 -0
- package/.claude/agents/references/journal-referee-profiles.md +354 -0
- package/.claude/agents/references/paper-critic/council-personas.md +77 -0
- package/.claude/agents/references/paper-critic/council-prompts.md +198 -0
- package/.claude/agents/references/peer-reviewer/report-template.md +199 -0
- package/.claude/agents/references/peer-reviewer/sa-prompts.md +260 -0
- package/.claude/agents/references/peer-reviewer/security-scan.md +188 -0
- package/.claude/agents/references/proposal-reviewer/report-template.md +144 -0
- package/.claude/agents/references/proposal-reviewer/sa-prompts.md +149 -0
- package/.claude/agents/references/referee-config.md +114 -0
- package/.claude/agents/references/referee2-reviewer/audit-checklists.md +287 -0
- package/.claude/agents/references/referee2-reviewer/report-template.md +334 -0
- package/.claude/rules/design-before-results.md +52 -0
- package/.claude/rules/ignore-agents-md.md +17 -0
- package/.claude/rules/ignore-gemini-md.md +17 -0
- package/.claude/rules/lean-claude-md.md +45 -0
- package/.claude/rules/learn-tags.md +99 -0
- package/.claude/rules/overleaf-separation.md +67 -0
- package/.claude/rules/plan-first.md +175 -0
- package/.claude/rules/read-docs-first.md +50 -0
- package/.claude/rules/scope-discipline.md +28 -0
- package/.claude/settings.json +125 -0
- package/.context/current-focus.md +33 -0
- package/.context/preferences/priorities.md +36 -0
- package/.context/preferences/task-naming.md +28 -0
- package/.context/profile.md +29 -0
- package/.context/projects/_index.md +41 -0
- package/.context/projects/papers/nudge-exp.md +22 -0
- package/.context/projects/papers/uncertainty.md +31 -0
- package/.context/resources/claude-scientific-writer-review.md +48 -0
- package/.context/resources/cunningham-multi-analyst-agents.md +104 -0
- package/.context/resources/cunningham-multilang-code-audit.md +62 -0
- package/.context/resources/google-ai-co-scientist-review.md +72 -0
- package/.context/resources/karpathy-llm-council-review.md +58 -0
- package/.context/resources/multi-coder-reliability-protocol.md +175 -0
- package/.context/resources/pedro-santanna-takeaways.md +96 -0
- package/.context/resources/venue-rankings/abs_ajg_2024.csv +1823 -0
- package/.context/resources/venue-rankings/abs_ajg_2024_econ.csv +356 -0
- package/.context/resources/venue-rankings/cabs_4_4star_theory.csv +40 -0
- package/.context/resources/venue-rankings/core_2026.csv +801 -0
- package/.context/resources/venue-rankings.md +147 -0
- package/.context/workflows/README.md +69 -0
- package/.context/workflows/daily-review.md +91 -0
- package/.context/workflows/meeting-actions.md +108 -0
- package/.context/workflows/replication-protocol.md +155 -0
- package/.context/workflows/weekly-review.md +113 -0
- package/.mcp-server-biblio/formatters.py +158 -0
- package/.mcp-server-biblio/pyproject.toml +11 -0
- package/.mcp-server-biblio/server.py +678 -0
- package/.mcp-server-biblio/sources/__init__.py +14 -0
- package/.mcp-server-biblio/sources/base.py +73 -0
- package/.mcp-server-biblio/sources/formatters.py +83 -0
- package/.mcp-server-biblio/sources/models.py +22 -0
- package/.mcp-server-biblio/sources/multi_source.py +243 -0
- package/.mcp-server-biblio/sources/openalex_source.py +183 -0
- package/.mcp-server-biblio/sources/scopus_source.py +309 -0
- package/.mcp-server-biblio/sources/wos_source.py +508 -0
- package/.mcp-server-biblio/uv.lock +896 -0
- package/.scripts/README.md +161 -0
- package/.scripts/ai_pattern_density.py +446 -0
- package/.scripts/conf +445 -0
- package/.scripts/config.py +122 -0
- package/.scripts/count_inventory.py +275 -0
- package/.scripts/daily_digest.py +288 -0
- package/.scripts/done +177 -0
- package/.scripts/extract_meeting_actions.py +223 -0
- package/.scripts/focus +176 -0
- package/.scripts/generate-codex-agents-md.py +217 -0
- package/.scripts/inbox +194 -0
- package/.scripts/notion_helpers.py +325 -0
- package/.scripts/openalex/query_helpers.py +306 -0
- package/.scripts/papers +227 -0
- package/.scripts/query +223 -0
- package/.scripts/session-history.py +201 -0
- package/.scripts/skill-health.py +516 -0
- package/.scripts/skill-log-miner.py +273 -0
- package/.scripts/sync-to-codex.sh +252 -0
- package/.scripts/task +213 -0
- package/.scripts/tasks +190 -0
- package/.scripts/week +206 -0
- package/CLAUDE.md +197 -0
- package/LICENSE +21 -0
- package/MEMORY.md +38 -0
- package/README.md +269 -0
- package/docs/agents.md +44 -0
- package/docs/bibliography-setup.md +55 -0
- package/docs/council-mode.md +36 -0
- package/docs/getting-started.md +245 -0
- package/docs/hooks.md +38 -0
- package/docs/mcp-servers.md +82 -0
- package/docs/notion-setup.md +109 -0
- package/docs/rules.md +33 -0
- package/docs/scripts.md +303 -0
- package/docs/setup-overview/setup-overview.pdf +0 -0
- package/docs/skills.md +70 -0
- package/docs/system.md +159 -0
- package/hooks/block-destructive-git.sh +66 -0
- package/hooks/context-monitor.py +114 -0
- package/hooks/postcompact-restore.py +157 -0
- package/hooks/precompact-autosave.py +181 -0
- package/hooks/promise-checker.sh +124 -0
- package/hooks/protect-source-files.sh +81 -0
- package/hooks/resume-context-loader.sh +53 -0
- package/hooks/startup-context-loader.sh +102 -0
- package/package.json +51 -0
- package/packages/cli-council/.github/workflows/claude-code-review.yml +44 -0
- package/packages/cli-council/.github/workflows/claude.yml +50 -0
- package/packages/cli-council/README.md +100 -0
- package/packages/cli-council/pyproject.toml +43 -0
- package/packages/cli-council/src/cli_council/__init__.py +19 -0
- package/packages/cli-council/src/cli_council/__main__.py +185 -0
- package/packages/cli-council/src/cli_council/backends/__init__.py +8 -0
- package/packages/cli-council/src/cli_council/backends/base.py +81 -0
- package/packages/cli-council/src/cli_council/backends/claude.py +25 -0
- package/packages/cli-council/src/cli_council/backends/codex.py +27 -0
- package/packages/cli-council/src/cli_council/backends/gemini.py +26 -0
- package/packages/cli-council/src/cli_council/checkpoint.py +212 -0
- package/packages/cli-council/src/cli_council/config.py +51 -0
- package/packages/cli-council/src/cli_council/council.py +391 -0
- package/packages/cli-council/src/cli_council/models.py +46 -0
- package/packages/llm-council/.github/workflows/claude-code-review.yml +44 -0
- package/packages/llm-council/.github/workflows/claude.yml +50 -0
- package/packages/llm-council/README.md +453 -0
- package/packages/llm-council/pyproject.toml +42 -0
- package/packages/llm-council/src/llm_council/__init__.py +23 -0
- package/packages/llm-council/src/llm_council/__main__.py +259 -0
- package/packages/llm-council/src/llm_council/checkpoint.py +193 -0
- package/packages/llm-council/src/llm_council/client.py +253 -0
- package/packages/llm-council/src/llm_council/config.py +232 -0
- package/packages/llm-council/src/llm_council/council.py +482 -0
- package/packages/llm-council/src/llm_council/models.py +46 -0
- package/packages/mcp-bibliography/MEMORY.md +31 -0
- package/packages/mcp-bibliography/_app.py +226 -0
- package/packages/mcp-bibliography/formatters.py +158 -0
- package/packages/mcp-bibliography/log/2026-03-13-2100.md +35 -0
- package/packages/mcp-bibliography/pyproject.toml +15 -0
- package/packages/mcp-bibliography/run.sh +20 -0
- package/packages/mcp-bibliography/scholarly_formatters.py +83 -0
- package/packages/mcp-bibliography/server.py +1857 -0
- package/packages/mcp-bibliography/tools/__init__.py +28 -0
- package/packages/mcp-bibliography/tools/_registry.py +19 -0
- package/packages/mcp-bibliography/tools/altmetric.py +107 -0
- package/packages/mcp-bibliography/tools/core.py +92 -0
- package/packages/mcp-bibliography/tools/dblp.py +52 -0
- package/packages/mcp-bibliography/tools/openalex.py +296 -0
- package/packages/mcp-bibliography/tools/opencitations.py +102 -0
- package/packages/mcp-bibliography/tools/openreview.py +179 -0
- package/packages/mcp-bibliography/tools/orcid.py +131 -0
- package/packages/mcp-bibliography/tools/scholarly.py +575 -0
- package/packages/mcp-bibliography/tools/unpaywall.py +63 -0
- package/packages/mcp-bibliography/tools/zenodo.py +123 -0
- package/packages/mcp-bibliography/uv.lock +711 -0
- package/scripts/setup.sh +143 -0
- package/skills/beamer-deck/SKILL.md +199 -0
- package/skills/beamer-deck/references/quality-rubric.md +54 -0
- package/skills/beamer-deck/references/review-prompts.md +106 -0
- package/skills/bib-validate/SKILL.md +261 -0
- package/skills/bib-validate/references/council-mode.md +34 -0
- package/skills/bib-validate/references/deep-verify.md +79 -0
- package/skills/bib-validate/references/fix-mode.md +36 -0
- package/skills/bib-validate/references/openalex-verification.md +45 -0
- package/skills/bib-validate/references/preprint-check.md +31 -0
- package/skills/bib-validate/references/ref-manager-crossref.md +41 -0
- package/skills/bib-validate/references/report-template.md +82 -0
- package/skills/code-archaeology/SKILL.md +141 -0
- package/skills/code-review/SKILL.md +265 -0
- package/skills/code-review/references/quality-rubric.md +67 -0
- package/skills/consolidate-memory/SKILL.md +208 -0
- package/skills/context-status/SKILL.md +126 -0
- package/skills/creation-guard/SKILL.md +230 -0
- package/skills/devils-advocate/SKILL.md +130 -0
- package/skills/devils-advocate/references/competing-hypotheses.md +83 -0
- package/skills/init-project/SKILL.md +115 -0
- package/skills/init-project-course/references/memory-and-settings.md +92 -0
- package/skills/init-project-course/references/organise-templates.md +94 -0
- package/skills/init-project-course/skill.md +147 -0
- package/skills/init-project-light/skill.md +139 -0
- package/skills/init-project-research/SKILL.md +368 -0
- package/skills/init-project-research/references/atlas-pipeline-sync.md +70 -0
- package/skills/init-project-research/references/atlas-schema.md +81 -0
- package/skills/init-project-research/references/confirmation-report.md +39 -0
- package/skills/init-project-research/references/domain-profile-template.md +104 -0
- package/skills/init-project-research/references/interview-round3.md +34 -0
- package/skills/init-project-research/references/literature-discovery.md +43 -0
- package/skills/init-project-research/references/scaffold-details.md +197 -0
- package/skills/init-project-research/templates/field-calibration.md +60 -0
- package/skills/init-project-research/templates/pipeline-manifest.md +63 -0
- package/skills/init-project-research/templates/run-all.sh +116 -0
- package/skills/init-project-research/templates/seed-files.md +337 -0
- package/skills/insights-deck/SKILL.md +151 -0
- package/skills/interview-me/SKILL.md +157 -0
- package/skills/latex/SKILL.md +141 -0
- package/skills/latex/references/latex-configs.md +183 -0
- package/skills/latex-autofix/SKILL.md +230 -0
- package/skills/latex-autofix/references/known-errors.md +183 -0
- package/skills/latex-autofix/references/quality-rubric.md +50 -0
- package/skills/latex-health-check/SKILL.md +161 -0
- package/skills/learn/SKILL.md +220 -0
- package/skills/learn/scripts/validate_skill.py +265 -0
- package/skills/lessons-learned/SKILL.md +201 -0
- package/skills/literature/SKILL.md +335 -0
- package/skills/literature/references/agent-templates.md +393 -0
- package/skills/literature/references/bibliometric-apis.md +44 -0
- package/skills/literature/references/cli-council-search.md +79 -0
- package/skills/literature/references/openalex-api-guide.md +371 -0
- package/skills/literature/references/openalex-common-queries.md +381 -0
- package/skills/literature/references/openalex-workflows.md +248 -0
- package/skills/literature/references/reference-manager-sync.md +36 -0
- package/skills/literature/references/scopus-api-guide.md +208 -0
- package/skills/literature/references/wos-api-guide.md +308 -0
- package/skills/multi-perspective/SKILL.md +311 -0
- package/skills/multi-perspective/references/computational-many-analysts.md +77 -0
- package/skills/pipeline-manifest/SKILL.md +226 -0
- package/skills/pre-submission-report/SKILL.md +153 -0
- package/skills/process-reviews/SKILL.md +244 -0
- package/skills/process-reviews/references/rr-routing.md +101 -0
- package/skills/project-deck/SKILL.md +87 -0
- package/skills/project-safety/SKILL.md +135 -0
- package/skills/proofread/SKILL.md +254 -0
- package/skills/proofread/references/quality-rubric.md +104 -0
- package/skills/python-env/SKILL.md +57 -0
- package/skills/quarto-deck/SKILL.md +226 -0
- package/skills/quarto-deck/references/markdown-format.md +143 -0
- package/skills/quarto-deck/references/quality-rubric.md +54 -0
- package/skills/save-context/SKILL.md +174 -0
- package/skills/session-log/SKILL.md +98 -0
- package/skills/shared/concept-validation-gate.md +161 -0
- package/skills/shared/council-protocol.md +265 -0
- package/skills/shared/distribution-diagnostics.md +164 -0
- package/skills/shared/engagement-stratified-sampling.md +218 -0
- package/skills/shared/escalation-protocol.md +74 -0
- package/skills/shared/external-audit-protocol.md +205 -0
- package/skills/shared/intercoder-reliability.md +256 -0
- package/skills/shared/mcp-degradation.md +81 -0
- package/skills/shared/method-probing-questions.md +163 -0
- package/skills/shared/multi-language-conventions.md +143 -0
- package/skills/shared/paid-api-safety.md +174 -0
- package/skills/shared/palettes.md +90 -0
- package/skills/shared/progressive-disclosure.md +92 -0
- package/skills/shared/project-documentation-content.md +443 -0
- package/skills/shared/project-documentation-format.md +281 -0
- package/skills/shared/project-documentation.md +100 -0
- package/skills/shared/publication-output.md +138 -0
- package/skills/shared/quality-scoring.md +70 -0
- package/skills/shared/reference-resolution.md +77 -0
- package/skills/shared/research-quality-rubric.md +165 -0
- package/skills/shared/rhetoric-principles.md +54 -0
- package/skills/shared/skill-design-patterns.md +272 -0
- package/skills/shared/skill-index.md +240 -0
- package/skills/shared/system-documentation.md +334 -0
- package/skills/shared/tikz-rules.md +402 -0
- package/skills/shared/validation-tiers.md +121 -0
- package/skills/shared/venue-guides/README.md +46 -0
- package/skills/shared/venue-guides/cell_press_style.md +483 -0
- package/skills/shared/venue-guides/conferences_formatting.md +564 -0
- package/skills/shared/venue-guides/cs_conference_style.md +463 -0
- package/skills/shared/venue-guides/examples/cell_summary_example.md +247 -0
- package/skills/shared/venue-guides/examples/medical_structured_abstract.md +313 -0
- package/skills/shared/venue-guides/examples/nature_abstract_examples.md +213 -0
- package/skills/shared/venue-guides/examples/neurips_introduction_example.md +245 -0
- package/skills/shared/venue-guides/journals_formatting.md +486 -0
- package/skills/shared/venue-guides/medical_journal_styles.md +535 -0
- package/skills/shared/venue-guides/ml_conference_style.md +556 -0
- package/skills/shared/venue-guides/nature_science_style.md +405 -0
- package/skills/shared/venue-guides/reviewer_expectations.md +417 -0
- package/skills/shared/venue-guides/venue_writing_styles.md +321 -0
- package/skills/split-pdf/SKILL.md +172 -0
- package/skills/split-pdf/methodology.md +48 -0
- package/skills/sync-notion/SKILL.md +93 -0
- package/skills/system-audit/SKILL.md +157 -0
- package/skills/system-audit/references/sub-agent-prompts.md +294 -0
- package/skills/task-management/SKILL.md +131 -0
- package/skills/update-focus/SKILL.md +204 -0
- package/skills/update-project-doc/SKILL.md +194 -0
- package/skills/validate-bib/SKILL.md +242 -0
- package/skills/validate-bib/references/council-mode.md +34 -0
- package/skills/validate-bib/references/deep-verify.md +71 -0
- package/skills/validate-bib/references/openalex-verification.md +45 -0
- package/skills/validate-bib/references/preprint-check.md +31 -0
- package/skills/validate-bib/references/report-template.md +62 -0
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
"""Scopus adapter using httpx (Elsevier REST API).
|
|
2
|
+
|
|
3
|
+
Uses async httpx client. Requires SCOPUS_API_KEY env var.
|
|
4
|
+
Optional SCOPUS_INST_TOKEN for non-institutional IP access.
|
|
5
|
+
|
|
6
|
+
SYNC: Mirrored in Topic Finder (claude_topic_finder/services/scopus.py).
|
|
7
|
+
Changes to query construction, pagination, or record parsing must be propagated.
|
|
8
|
+
Topic Finder adds get_topics/get_trend_data; this version adds
|
|
9
|
+
verify_doi/batch_verify_dois. Core search logic should stay identical.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
import re
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
import httpx
|
|
19
|
+
|
|
20
|
+
from sources.base import ScholarlySource
|
|
21
|
+
from sources.models import Paper
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
_SEARCH_URL = "https://api.elsevier.com/content/search/scopus"
|
|
26
|
+
|
|
27
|
+
# Common English stopwords — kept small to avoid false removals of domain terms
|
|
28
|
+
_STOPWORDS = frozenset(
|
|
29
|
+
"a an the and or but in on of to for is it that this with by from as at be "
|
|
30
|
+
"are was were been have has had do does did not no nor so if then than can "
|
|
31
|
+
"will would could should may might shall its we i you he she they our my "
|
|
32
|
+
"your their about into through during before after above below between"
|
|
33
|
+
.split()
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
_WORD_RE = re.compile(r"[a-zA-Z][\w-]*[a-zA-Z]|[a-zA-Z]{2,}")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _extract_search_terms(text: str, *, max_terms: int = 12) -> str:
|
|
40
|
+
"""Extract key terms from a long text for use in Scopus TITLE-ABS-KEY() queries."""
|
|
41
|
+
words = _WORD_RE.findall(text)
|
|
42
|
+
terms: list[str] = []
|
|
43
|
+
seen: set[str] = set()
|
|
44
|
+
for w in words:
|
|
45
|
+
lower = w.lower()
|
|
46
|
+
if lower in _STOPWORDS or len(lower) < 3:
|
|
47
|
+
continue
|
|
48
|
+
if lower not in seen:
|
|
49
|
+
seen.add(lower)
|
|
50
|
+
terms.append(w)
|
|
51
|
+
if len(terms) >= max_terms:
|
|
52
|
+
break
|
|
53
|
+
return " ".join(terms)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ScopusSource(ScholarlySource):
|
|
57
|
+
"""Scopus implementation using the Elsevier REST API directly.
|
|
58
|
+
|
|
59
|
+
Uses httpx async client — no pybliometrics dependency.
|
|
60
|
+
Supports API key only (institutional IP) or API key + InstToken (any IP).
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def __init__(self, api_key: str, inst_token: str = "") -> None:
|
|
64
|
+
self._api_key = api_key
|
|
65
|
+
self._inst_token = inst_token
|
|
66
|
+
headers: dict[str, str] = {
|
|
67
|
+
"X-ELS-APIKey": api_key,
|
|
68
|
+
"Accept": "application/json",
|
|
69
|
+
}
|
|
70
|
+
if inst_token:
|
|
71
|
+
headers["X-ELS-Insttoken"] = inst_token
|
|
72
|
+
self._client = httpx.AsyncClient(
|
|
73
|
+
headers=headers,
|
|
74
|
+
timeout=30.0,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def source_name(self) -> str:
|
|
79
|
+
return "Scopus"
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def source_key(self) -> str:
|
|
83
|
+
return "scopus"
|
|
84
|
+
|
|
85
|
+
# ------------------------------------------------------------------
|
|
86
|
+
# Public interface
|
|
87
|
+
# ------------------------------------------------------------------
|
|
88
|
+
|
|
89
|
+
async def search_works(
|
|
90
|
+
self,
|
|
91
|
+
query: str,
|
|
92
|
+
*,
|
|
93
|
+
year_from: int | None = None,
|
|
94
|
+
year_to: int | None = None,
|
|
95
|
+
sort_by: str = "relevance",
|
|
96
|
+
limit: int = 50,
|
|
97
|
+
) -> list[Paper]:
|
|
98
|
+
scopus_query = f"TITLE-ABS-KEY({query})"
|
|
99
|
+
if year_from:
|
|
100
|
+
scopus_query += f" AND PUBYEAR > {year_from - 1}"
|
|
101
|
+
if year_to:
|
|
102
|
+
scopus_query += f" AND PUBYEAR < {year_to + 1}"
|
|
103
|
+
|
|
104
|
+
sort_param = "relevancy"
|
|
105
|
+
if sort_by == "cited_by_count":
|
|
106
|
+
sort_param = "-citedby-count"
|
|
107
|
+
elif sort_by == "publication_year":
|
|
108
|
+
sort_param = "-coverDate"
|
|
109
|
+
|
|
110
|
+
papers: list[Paper] = []
|
|
111
|
+
start = 0
|
|
112
|
+
per_page = min(limit, 25)
|
|
113
|
+
|
|
114
|
+
while len(papers) < limit:
|
|
115
|
+
params: dict[str, Any] = {
|
|
116
|
+
"query": scopus_query,
|
|
117
|
+
"start": start,
|
|
118
|
+
"count": per_page,
|
|
119
|
+
"sort": sort_param,
|
|
120
|
+
"view": "COMPLETE",
|
|
121
|
+
}
|
|
122
|
+
try:
|
|
123
|
+
resp = await self._client.get(_SEARCH_URL, params=params)
|
|
124
|
+
resp.raise_for_status()
|
|
125
|
+
data = resp.json()
|
|
126
|
+
except httpx.HTTPStatusError as exc:
|
|
127
|
+
logger.warning(
|
|
128
|
+
"Scopus HTTP %d for: %s. Body: %.200s",
|
|
129
|
+
exc.response.status_code, scopus_query, exc.response.text,
|
|
130
|
+
)
|
|
131
|
+
break
|
|
132
|
+
except Exception:
|
|
133
|
+
logger.warning("Scopus search failed for: %s", scopus_query, exc_info=True)
|
|
134
|
+
break
|
|
135
|
+
|
|
136
|
+
results = data.get("search-results", {})
|
|
137
|
+
entries = results.get("entry", [])
|
|
138
|
+
if not entries or (len(entries) == 1 and "error" in entries[0]):
|
|
139
|
+
break
|
|
140
|
+
|
|
141
|
+
for entry in entries:
|
|
142
|
+
papers.append(self._to_paper(entry))
|
|
143
|
+
if len(papers) >= limit:
|
|
144
|
+
break
|
|
145
|
+
|
|
146
|
+
total = int(results.get("opensearch:totalResults", 0) or 0)
|
|
147
|
+
start += per_page
|
|
148
|
+
if start >= total:
|
|
149
|
+
break
|
|
150
|
+
|
|
151
|
+
return papers[:limit]
|
|
152
|
+
|
|
153
|
+
async def verify_doi(self, doi: str) -> Paper | None:
|
|
154
|
+
clean_doi = doi
|
|
155
|
+
if clean_doi.startswith("https://doi.org/"):
|
|
156
|
+
clean_doi = clean_doi[len("https://doi.org/"):]
|
|
157
|
+
|
|
158
|
+
scopus_query = f"DOI({clean_doi})"
|
|
159
|
+
params: dict[str, Any] = {
|
|
160
|
+
"query": scopus_query,
|
|
161
|
+
"start": 0,
|
|
162
|
+
"count": 1,
|
|
163
|
+
"view": "COMPLETE",
|
|
164
|
+
}
|
|
165
|
+
try:
|
|
166
|
+
resp = await self._client.get(_SEARCH_URL, params=params)
|
|
167
|
+
resp.raise_for_status()
|
|
168
|
+
data = resp.json()
|
|
169
|
+
except Exception:
|
|
170
|
+
logger.debug("Scopus DOI verify failed for: %s", doi)
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
entries = data.get("search-results", {}).get("entry", [])
|
|
174
|
+
if not entries or (len(entries) == 1 and "error" in entries[0]):
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
paper = self._to_paper(entries[0])
|
|
178
|
+
paper.verified_by = ["scopus"]
|
|
179
|
+
return paper
|
|
180
|
+
|
|
181
|
+
async def batch_verify_dois(self, dois: list[str]) -> dict[str, Paper | None]:
|
|
182
|
+
results: dict[str, Paper | None] = {d: None for d in dois}
|
|
183
|
+
|
|
184
|
+
clean_dois = []
|
|
185
|
+
for d in dois:
|
|
186
|
+
clean = d
|
|
187
|
+
if clean.startswith("https://doi.org/"):
|
|
188
|
+
clean = clean[len("https://doi.org/"):]
|
|
189
|
+
clean_dois.append(clean)
|
|
190
|
+
|
|
191
|
+
# Process in chunks of 10 (Scopus query length limits)
|
|
192
|
+
for i in range(0, len(clean_dois), 10):
|
|
193
|
+
batch = clean_dois[i:i + 10]
|
|
194
|
+
orig_batch = dois[i:i + 10]
|
|
195
|
+
or_query = " OR ".join(f"DOI({d})" for d in batch)
|
|
196
|
+
|
|
197
|
+
try:
|
|
198
|
+
resp = await self._client.get(
|
|
199
|
+
_SEARCH_URL,
|
|
200
|
+
params={
|
|
201
|
+
"query": or_query,
|
|
202
|
+
"start": 0,
|
|
203
|
+
"count": 25,
|
|
204
|
+
"view": "COMPLETE",
|
|
205
|
+
},
|
|
206
|
+
)
|
|
207
|
+
resp.raise_for_status()
|
|
208
|
+
data = resp.json()
|
|
209
|
+
|
|
210
|
+
entries = data.get("search-results", {}).get("entry", [])
|
|
211
|
+
if not entries or (len(entries) == 1 and "error" in entries[0]):
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
for entry in entries:
|
|
215
|
+
paper = self._to_paper(entry)
|
|
216
|
+
paper.verified_by = ["scopus"]
|
|
217
|
+
entry_doi = entry.get("prism:doi", "")
|
|
218
|
+
if entry_doi:
|
|
219
|
+
entry_doi_lower = entry_doi.lower()
|
|
220
|
+
for orig in orig_batch:
|
|
221
|
+
clean_orig = orig
|
|
222
|
+
if clean_orig.startswith("https://doi.org/"):
|
|
223
|
+
clean_orig = clean_orig[len("https://doi.org/"):]
|
|
224
|
+
if entry_doi_lower == clean_orig.lower():
|
|
225
|
+
results[orig] = paper
|
|
226
|
+
break
|
|
227
|
+
except Exception:
|
|
228
|
+
logger.warning("Scopus batch verify failed for chunk starting at %d", i)
|
|
229
|
+
|
|
230
|
+
return results
|
|
231
|
+
|
|
232
|
+
async def find_similar_works(
|
|
233
|
+
self,
|
|
234
|
+
text: str,
|
|
235
|
+
*,
|
|
236
|
+
limit: int = 20,
|
|
237
|
+
) -> list[Paper]:
|
|
238
|
+
query = _extract_search_terms(text, max_terms=12)
|
|
239
|
+
if not query:
|
|
240
|
+
return []
|
|
241
|
+
return await self.search_works(query, sort_by="relevance", limit=limit)
|
|
242
|
+
|
|
243
|
+
async def close(self) -> None:
|
|
244
|
+
await self._client.aclose()
|
|
245
|
+
|
|
246
|
+
# ------------------------------------------------------------------
|
|
247
|
+
# Paper mapping
|
|
248
|
+
# ------------------------------------------------------------------
|
|
249
|
+
|
|
250
|
+
@staticmethod
|
|
251
|
+
def _to_paper(entry: dict) -> Paper:
|
|
252
|
+
"""Convert Scopus JSON entry to Paper."""
|
|
253
|
+
# Authors — dc:creator is first author, author array has all
|
|
254
|
+
authors: list[str] = []
|
|
255
|
+
author_list = entry.get("author", [])
|
|
256
|
+
if author_list:
|
|
257
|
+
for a in author_list:
|
|
258
|
+
name = a.get("authname") or a.get("given-name", "") + " " + a.get("surname", "")
|
|
259
|
+
if name and name.strip():
|
|
260
|
+
authors.append(name.strip())
|
|
261
|
+
elif entry.get("dc:creator"):
|
|
262
|
+
authors = [entry["dc:creator"]]
|
|
263
|
+
|
|
264
|
+
# Keywords
|
|
265
|
+
keywords: list[str] = []
|
|
266
|
+
authkeywords = entry.get("authkeywords")
|
|
267
|
+
if authkeywords:
|
|
268
|
+
keywords = [k.strip() for k in authkeywords.split("|") if k.strip()]
|
|
269
|
+
|
|
270
|
+
# Citation count
|
|
271
|
+
cited_by = 0
|
|
272
|
+
try:
|
|
273
|
+
cited_by = int(entry.get("citedby-count", 0) or 0)
|
|
274
|
+
except (ValueError, TypeError):
|
|
275
|
+
pass
|
|
276
|
+
|
|
277
|
+
# Publication year from coverDate (YYYY-MM-DD)
|
|
278
|
+
pub_year = 0
|
|
279
|
+
cover_date = entry.get("prism:coverDate", "")
|
|
280
|
+
if cover_date:
|
|
281
|
+
try:
|
|
282
|
+
pub_year = int(str(cover_date)[:4])
|
|
283
|
+
except (ValueError, TypeError):
|
|
284
|
+
pass
|
|
285
|
+
|
|
286
|
+
# DOI
|
|
287
|
+
doi = entry.get("prism:doi")
|
|
288
|
+
if doi and not doi.startswith("http"):
|
|
289
|
+
doi = f"https://doi.org/{doi}"
|
|
290
|
+
|
|
291
|
+
# EID
|
|
292
|
+
eid = entry.get("eid", "")
|
|
293
|
+
|
|
294
|
+
# Abstract (available in COMPLETE view)
|
|
295
|
+
abstract = entry.get("dc:description")
|
|
296
|
+
|
|
297
|
+
return Paper(
|
|
298
|
+
source_id=f"scopus:{eid}",
|
|
299
|
+
title=entry.get("dc:title", "") or "",
|
|
300
|
+
abstract=abstract,
|
|
301
|
+
authors=authors,
|
|
302
|
+
publication_year=pub_year,
|
|
303
|
+
cited_by_count=cited_by,
|
|
304
|
+
source_name=entry.get("prism:publicationName"),
|
|
305
|
+
doi=doi,
|
|
306
|
+
keywords=keywords,
|
|
307
|
+
url=doi or f"https://www.scopus.com/record/display.uri?eid={eid}",
|
|
308
|
+
verified_by=["scopus"],
|
|
309
|
+
)
|