flonat-research 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/domain-reviewer.md +336 -0
- package/.claude/agents/fixer.md +226 -0
- package/.claude/agents/paper-critic.md +370 -0
- package/.claude/agents/peer-reviewer.md +289 -0
- package/.claude/agents/proposal-reviewer.md +215 -0
- package/.claude/agents/referee2-reviewer.md +367 -0
- package/.claude/agents/references/journal-referee-profiles.md +354 -0
- package/.claude/agents/references/paper-critic/council-personas.md +77 -0
- package/.claude/agents/references/paper-critic/council-prompts.md +198 -0
- package/.claude/agents/references/peer-reviewer/report-template.md +199 -0
- package/.claude/agents/references/peer-reviewer/sa-prompts.md +260 -0
- package/.claude/agents/references/peer-reviewer/security-scan.md +188 -0
- package/.claude/agents/references/proposal-reviewer/report-template.md +144 -0
- package/.claude/agents/references/proposal-reviewer/sa-prompts.md +149 -0
- package/.claude/agents/references/referee-config.md +114 -0
- package/.claude/agents/references/referee2-reviewer/audit-checklists.md +287 -0
- package/.claude/agents/references/referee2-reviewer/report-template.md +334 -0
- package/.claude/rules/design-before-results.md +52 -0
- package/.claude/rules/ignore-agents-md.md +17 -0
- package/.claude/rules/ignore-gemini-md.md +17 -0
- package/.claude/rules/lean-claude-md.md +45 -0
- package/.claude/rules/learn-tags.md +99 -0
- package/.claude/rules/overleaf-separation.md +67 -0
- package/.claude/rules/plan-first.md +175 -0
- package/.claude/rules/read-docs-first.md +50 -0
- package/.claude/rules/scope-discipline.md +28 -0
- package/.claude/settings.json +125 -0
- package/.context/current-focus.md +33 -0
- package/.context/preferences/priorities.md +36 -0
- package/.context/preferences/task-naming.md +28 -0
- package/.context/profile.md +29 -0
- package/.context/projects/_index.md +41 -0
- package/.context/projects/papers/nudge-exp.md +22 -0
- package/.context/projects/papers/uncertainty.md +31 -0
- package/.context/resources/claude-scientific-writer-review.md +48 -0
- package/.context/resources/cunningham-multi-analyst-agents.md +104 -0
- package/.context/resources/cunningham-multilang-code-audit.md +62 -0
- package/.context/resources/google-ai-co-scientist-review.md +72 -0
- package/.context/resources/karpathy-llm-council-review.md +58 -0
- package/.context/resources/multi-coder-reliability-protocol.md +175 -0
- package/.context/resources/pedro-santanna-takeaways.md +96 -0
- package/.context/resources/venue-rankings/abs_ajg_2024.csv +1823 -0
- package/.context/resources/venue-rankings/abs_ajg_2024_econ.csv +356 -0
- package/.context/resources/venue-rankings/cabs_4_4star_theory.csv +40 -0
- package/.context/resources/venue-rankings/core_2026.csv +801 -0
- package/.context/resources/venue-rankings.md +147 -0
- package/.context/workflows/README.md +69 -0
- package/.context/workflows/daily-review.md +91 -0
- package/.context/workflows/meeting-actions.md +108 -0
- package/.context/workflows/replication-protocol.md +155 -0
- package/.context/workflows/weekly-review.md +113 -0
- package/.mcp-server-biblio/formatters.py +158 -0
- package/.mcp-server-biblio/pyproject.toml +11 -0
- package/.mcp-server-biblio/server.py +678 -0
- package/.mcp-server-biblio/sources/__init__.py +14 -0
- package/.mcp-server-biblio/sources/base.py +73 -0
- package/.mcp-server-biblio/sources/formatters.py +83 -0
- package/.mcp-server-biblio/sources/models.py +22 -0
- package/.mcp-server-biblio/sources/multi_source.py +243 -0
- package/.mcp-server-biblio/sources/openalex_source.py +183 -0
- package/.mcp-server-biblio/sources/scopus_source.py +309 -0
- package/.mcp-server-biblio/sources/wos_source.py +508 -0
- package/.mcp-server-biblio/uv.lock +896 -0
- package/.scripts/README.md +161 -0
- package/.scripts/ai_pattern_density.py +446 -0
- package/.scripts/conf +445 -0
- package/.scripts/config.py +122 -0
- package/.scripts/count_inventory.py +275 -0
- package/.scripts/daily_digest.py +288 -0
- package/.scripts/done +177 -0
- package/.scripts/extract_meeting_actions.py +223 -0
- package/.scripts/focus +176 -0
- package/.scripts/generate-codex-agents-md.py +217 -0
- package/.scripts/inbox +194 -0
- package/.scripts/notion_helpers.py +325 -0
- package/.scripts/openalex/query_helpers.py +306 -0
- package/.scripts/papers +227 -0
- package/.scripts/query +223 -0
- package/.scripts/session-history.py +201 -0
- package/.scripts/skill-health.py +516 -0
- package/.scripts/skill-log-miner.py +273 -0
- package/.scripts/sync-to-codex.sh +252 -0
- package/.scripts/task +213 -0
- package/.scripts/tasks +190 -0
- package/.scripts/week +206 -0
- package/CLAUDE.md +197 -0
- package/LICENSE +21 -0
- package/MEMORY.md +38 -0
- package/README.md +269 -0
- package/docs/agents.md +44 -0
- package/docs/bibliography-setup.md +55 -0
- package/docs/council-mode.md +36 -0
- package/docs/getting-started.md +245 -0
- package/docs/hooks.md +38 -0
- package/docs/mcp-servers.md +82 -0
- package/docs/notion-setup.md +109 -0
- package/docs/rules.md +33 -0
- package/docs/scripts.md +303 -0
- package/docs/setup-overview/setup-overview.pdf +0 -0
- package/docs/skills.md +70 -0
- package/docs/system.md +159 -0
- package/hooks/block-destructive-git.sh +66 -0
- package/hooks/context-monitor.py +114 -0
- package/hooks/postcompact-restore.py +157 -0
- package/hooks/precompact-autosave.py +181 -0
- package/hooks/promise-checker.sh +124 -0
- package/hooks/protect-source-files.sh +81 -0
- package/hooks/resume-context-loader.sh +53 -0
- package/hooks/startup-context-loader.sh +102 -0
- package/package.json +51 -0
- package/packages/cli-council/.github/workflows/claude-code-review.yml +44 -0
- package/packages/cli-council/.github/workflows/claude.yml +50 -0
- package/packages/cli-council/README.md +100 -0
- package/packages/cli-council/pyproject.toml +43 -0
- package/packages/cli-council/src/cli_council/__init__.py +19 -0
- package/packages/cli-council/src/cli_council/__main__.py +185 -0
- package/packages/cli-council/src/cli_council/backends/__init__.py +8 -0
- package/packages/cli-council/src/cli_council/backends/base.py +81 -0
- package/packages/cli-council/src/cli_council/backends/claude.py +25 -0
- package/packages/cli-council/src/cli_council/backends/codex.py +27 -0
- package/packages/cli-council/src/cli_council/backends/gemini.py +26 -0
- package/packages/cli-council/src/cli_council/checkpoint.py +212 -0
- package/packages/cli-council/src/cli_council/config.py +51 -0
- package/packages/cli-council/src/cli_council/council.py +391 -0
- package/packages/cli-council/src/cli_council/models.py +46 -0
- package/packages/llm-council/.github/workflows/claude-code-review.yml +44 -0
- package/packages/llm-council/.github/workflows/claude.yml +50 -0
- package/packages/llm-council/README.md +453 -0
- package/packages/llm-council/pyproject.toml +42 -0
- package/packages/llm-council/src/llm_council/__init__.py +23 -0
- package/packages/llm-council/src/llm_council/__main__.py +259 -0
- package/packages/llm-council/src/llm_council/checkpoint.py +193 -0
- package/packages/llm-council/src/llm_council/client.py +253 -0
- package/packages/llm-council/src/llm_council/config.py +232 -0
- package/packages/llm-council/src/llm_council/council.py +482 -0
- package/packages/llm-council/src/llm_council/models.py +46 -0
- package/packages/mcp-bibliography/MEMORY.md +31 -0
- package/packages/mcp-bibliography/_app.py +226 -0
- package/packages/mcp-bibliography/formatters.py +158 -0
- package/packages/mcp-bibliography/log/2026-03-13-2100.md +35 -0
- package/packages/mcp-bibliography/pyproject.toml +15 -0
- package/packages/mcp-bibliography/run.sh +20 -0
- package/packages/mcp-bibliography/scholarly_formatters.py +83 -0
- package/packages/mcp-bibliography/server.py +1857 -0
- package/packages/mcp-bibliography/tools/__init__.py +28 -0
- package/packages/mcp-bibliography/tools/_registry.py +19 -0
- package/packages/mcp-bibliography/tools/altmetric.py +107 -0
- package/packages/mcp-bibliography/tools/core.py +92 -0
- package/packages/mcp-bibliography/tools/dblp.py +52 -0
- package/packages/mcp-bibliography/tools/openalex.py +296 -0
- package/packages/mcp-bibliography/tools/opencitations.py +102 -0
- package/packages/mcp-bibliography/tools/openreview.py +179 -0
- package/packages/mcp-bibliography/tools/orcid.py +131 -0
- package/packages/mcp-bibliography/tools/scholarly.py +575 -0
- package/packages/mcp-bibliography/tools/unpaywall.py +63 -0
- package/packages/mcp-bibliography/tools/zenodo.py +123 -0
- package/packages/mcp-bibliography/uv.lock +711 -0
- package/scripts/setup.sh +143 -0
- package/skills/beamer-deck/SKILL.md +199 -0
- package/skills/beamer-deck/references/quality-rubric.md +54 -0
- package/skills/beamer-deck/references/review-prompts.md +106 -0
- package/skills/bib-validate/SKILL.md +261 -0
- package/skills/bib-validate/references/council-mode.md +34 -0
- package/skills/bib-validate/references/deep-verify.md +79 -0
- package/skills/bib-validate/references/fix-mode.md +36 -0
- package/skills/bib-validate/references/openalex-verification.md +45 -0
- package/skills/bib-validate/references/preprint-check.md +31 -0
- package/skills/bib-validate/references/ref-manager-crossref.md +41 -0
- package/skills/bib-validate/references/report-template.md +82 -0
- package/skills/code-archaeology/SKILL.md +141 -0
- package/skills/code-review/SKILL.md +265 -0
- package/skills/code-review/references/quality-rubric.md +67 -0
- package/skills/consolidate-memory/SKILL.md +208 -0
- package/skills/context-status/SKILL.md +126 -0
- package/skills/creation-guard/SKILL.md +230 -0
- package/skills/devils-advocate/SKILL.md +130 -0
- package/skills/devils-advocate/references/competing-hypotheses.md +83 -0
- package/skills/init-project/SKILL.md +115 -0
- package/skills/init-project-course/references/memory-and-settings.md +92 -0
- package/skills/init-project-course/references/organise-templates.md +94 -0
- package/skills/init-project-course/skill.md +147 -0
- package/skills/init-project-light/skill.md +139 -0
- package/skills/init-project-research/SKILL.md +368 -0
- package/skills/init-project-research/references/atlas-pipeline-sync.md +70 -0
- package/skills/init-project-research/references/atlas-schema.md +81 -0
- package/skills/init-project-research/references/confirmation-report.md +39 -0
- package/skills/init-project-research/references/domain-profile-template.md +104 -0
- package/skills/init-project-research/references/interview-round3.md +34 -0
- package/skills/init-project-research/references/literature-discovery.md +43 -0
- package/skills/init-project-research/references/scaffold-details.md +197 -0
- package/skills/init-project-research/templates/field-calibration.md +60 -0
- package/skills/init-project-research/templates/pipeline-manifest.md +63 -0
- package/skills/init-project-research/templates/run-all.sh +116 -0
- package/skills/init-project-research/templates/seed-files.md +337 -0
- package/skills/insights-deck/SKILL.md +151 -0
- package/skills/interview-me/SKILL.md +157 -0
- package/skills/latex/SKILL.md +141 -0
- package/skills/latex/references/latex-configs.md +183 -0
- package/skills/latex-autofix/SKILL.md +230 -0
- package/skills/latex-autofix/references/known-errors.md +183 -0
- package/skills/latex-autofix/references/quality-rubric.md +50 -0
- package/skills/latex-health-check/SKILL.md +161 -0
- package/skills/learn/SKILL.md +220 -0
- package/skills/learn/scripts/validate_skill.py +265 -0
- package/skills/lessons-learned/SKILL.md +201 -0
- package/skills/literature/SKILL.md +335 -0
- package/skills/literature/references/agent-templates.md +393 -0
- package/skills/literature/references/bibliometric-apis.md +44 -0
- package/skills/literature/references/cli-council-search.md +79 -0
- package/skills/literature/references/openalex-api-guide.md +371 -0
- package/skills/literature/references/openalex-common-queries.md +381 -0
- package/skills/literature/references/openalex-workflows.md +248 -0
- package/skills/literature/references/reference-manager-sync.md +36 -0
- package/skills/literature/references/scopus-api-guide.md +208 -0
- package/skills/literature/references/wos-api-guide.md +308 -0
- package/skills/multi-perspective/SKILL.md +311 -0
- package/skills/multi-perspective/references/computational-many-analysts.md +77 -0
- package/skills/pipeline-manifest/SKILL.md +226 -0
- package/skills/pre-submission-report/SKILL.md +153 -0
- package/skills/process-reviews/SKILL.md +244 -0
- package/skills/process-reviews/references/rr-routing.md +101 -0
- package/skills/project-deck/SKILL.md +87 -0
- package/skills/project-safety/SKILL.md +135 -0
- package/skills/proofread/SKILL.md +254 -0
- package/skills/proofread/references/quality-rubric.md +104 -0
- package/skills/python-env/SKILL.md +57 -0
- package/skills/quarto-deck/SKILL.md +226 -0
- package/skills/quarto-deck/references/markdown-format.md +143 -0
- package/skills/quarto-deck/references/quality-rubric.md +54 -0
- package/skills/save-context/SKILL.md +174 -0
- package/skills/session-log/SKILL.md +98 -0
- package/skills/shared/concept-validation-gate.md +161 -0
- package/skills/shared/council-protocol.md +265 -0
- package/skills/shared/distribution-diagnostics.md +164 -0
- package/skills/shared/engagement-stratified-sampling.md +218 -0
- package/skills/shared/escalation-protocol.md +74 -0
- package/skills/shared/external-audit-protocol.md +205 -0
- package/skills/shared/intercoder-reliability.md +256 -0
- package/skills/shared/mcp-degradation.md +81 -0
- package/skills/shared/method-probing-questions.md +163 -0
- package/skills/shared/multi-language-conventions.md +143 -0
- package/skills/shared/paid-api-safety.md +174 -0
- package/skills/shared/palettes.md +90 -0
- package/skills/shared/progressive-disclosure.md +92 -0
- package/skills/shared/project-documentation-content.md +443 -0
- package/skills/shared/project-documentation-format.md +281 -0
- package/skills/shared/project-documentation.md +100 -0
- package/skills/shared/publication-output.md +138 -0
- package/skills/shared/quality-scoring.md +70 -0
- package/skills/shared/reference-resolution.md +77 -0
- package/skills/shared/research-quality-rubric.md +165 -0
- package/skills/shared/rhetoric-principles.md +54 -0
- package/skills/shared/skill-design-patterns.md +272 -0
- package/skills/shared/skill-index.md +240 -0
- package/skills/shared/system-documentation.md +334 -0
- package/skills/shared/tikz-rules.md +402 -0
- package/skills/shared/validation-tiers.md +121 -0
- package/skills/shared/venue-guides/README.md +46 -0
- package/skills/shared/venue-guides/cell_press_style.md +483 -0
- package/skills/shared/venue-guides/conferences_formatting.md +564 -0
- package/skills/shared/venue-guides/cs_conference_style.md +463 -0
- package/skills/shared/venue-guides/examples/cell_summary_example.md +247 -0
- package/skills/shared/venue-guides/examples/medical_structured_abstract.md +313 -0
- package/skills/shared/venue-guides/examples/nature_abstract_examples.md +213 -0
- package/skills/shared/venue-guides/examples/neurips_introduction_example.md +245 -0
- package/skills/shared/venue-guides/journals_formatting.md +486 -0
- package/skills/shared/venue-guides/medical_journal_styles.md +535 -0
- package/skills/shared/venue-guides/ml_conference_style.md +556 -0
- package/skills/shared/venue-guides/nature_science_style.md +405 -0
- package/skills/shared/venue-guides/reviewer_expectations.md +417 -0
- package/skills/shared/venue-guides/venue_writing_styles.md +321 -0
- package/skills/split-pdf/SKILL.md +172 -0
- package/skills/split-pdf/methodology.md +48 -0
- package/skills/sync-notion/SKILL.md +93 -0
- package/skills/system-audit/SKILL.md +157 -0
- package/skills/system-audit/references/sub-agent-prompts.md +294 -0
- package/skills/task-management/SKILL.md +131 -0
- package/skills/update-focus/SKILL.md +204 -0
- package/skills/update-project-doc/SKILL.md +194 -0
- package/skills/validate-bib/SKILL.md +242 -0
- package/skills/validate-bib/references/council-mode.md +34 -0
- package/skills/validate-bib/references/deep-verify.md +71 -0
- package/skills/validate-bib/references/openalex-verification.md +45 -0
- package/skills/validate-bib/references/preprint-check.md +31 -0
- package/skills/validate-bib/references/report-template.md +62 -0
|
@@ -0,0 +1,508 @@
|
|
|
1
|
+
"""Web of Science API adapter (Starter + Expanded tiers).
|
|
2
|
+
|
|
3
|
+
Uses httpx async client. Requires WOS_API_KEY env var.
|
|
4
|
+
Optional WOS_API_TIER env var: "starter" (default) or "expanded".
|
|
5
|
+
|
|
6
|
+
Expanded tier provides: abstracts, higher per-page limits (100 vs 50),
|
|
7
|
+
full author affiliations, funding data.
|
|
8
|
+
|
|
9
|
+
SYNC: Mirrored in Topic Finder (claude_topic_finder/services/wos.py).
|
|
10
|
+
Changes to query construction, pagination, or record parsing must be propagated.
|
|
11
|
+
Topic Finder adds get_topics/get_trend_data; this version adds
|
|
12
|
+
verify_doi/batch_verify_dois. Core search logic should stay identical.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
import logging
|
|
19
|
+
import re
|
|
20
|
+
from datetime import datetime, timezone
|
|
21
|
+
|
|
22
|
+
import httpx
|
|
23
|
+
|
|
24
|
+
from sources.base import ScholarlySource
|
|
25
|
+
from sources.models import Paper
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
# Common English stopwords — kept small to avoid false removals of domain terms
|
|
30
|
+
_STOPWORDS = frozenset(
|
|
31
|
+
"a an the and or but in on of to for is it that this with by from as at be "
|
|
32
|
+
"are was were been have has had do does did not no nor so if then than can "
|
|
33
|
+
"will would could should may might shall its we i you he she they our my "
|
|
34
|
+
"your their about into through during before after above below between"
|
|
35
|
+
.split()
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
_WORD_RE = re.compile(r"[a-zA-Z][\w-]*[a-zA-Z]|[a-zA-Z]{2,}")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _extract_search_terms(text: str, *, max_terms: int = 12) -> str:
|
|
42
|
+
"""Extract key terms from a long text for use in WoS TS=() queries."""
|
|
43
|
+
words = _WORD_RE.findall(text)
|
|
44
|
+
terms: list[str] = []
|
|
45
|
+
seen: set[str] = set()
|
|
46
|
+
for w in words:
|
|
47
|
+
lower = w.lower()
|
|
48
|
+
if lower in _STOPWORDS or len(lower) < 3:
|
|
49
|
+
continue
|
|
50
|
+
if lower not in seen:
|
|
51
|
+
seen.add(lower)
|
|
52
|
+
terms.append(w)
|
|
53
|
+
if len(terms) >= max_terms:
|
|
54
|
+
break
|
|
55
|
+
return " ".join(terms)
|
|
56
|
+
|
|
57
|
+
_BASE_URLS = {
|
|
58
|
+
"starter": "https://api.clarivate.com/apis/wos-starter/v1",
|
|
59
|
+
"expanded": "https://wos-api.clarivate.com/api/wos",
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class WosSource(ScholarlySource):
|
|
64
|
+
"""Web of Science implementation supporting both Starter and Expanded API tiers.
|
|
65
|
+
|
|
66
|
+
Starter: /documents endpoint, max 50/page, no abstracts.
|
|
67
|
+
Expanded: root endpoint, max 100/page, abstracts + affiliations + funding.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
def __init__(self, api_key: str, tier: str = "starter") -> None:
|
|
71
|
+
self._api_key = api_key
|
|
72
|
+
self._tier = tier
|
|
73
|
+
base_url = _BASE_URLS.get(tier, _BASE_URLS["starter"])
|
|
74
|
+
self._per_page_max = 100 if tier == "expanded" else 50
|
|
75
|
+
self._client = httpx.AsyncClient(
|
|
76
|
+
base_url=base_url,
|
|
77
|
+
headers={"X-ApiKey": api_key},
|
|
78
|
+
timeout=30.0,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def source_name(self) -> str:
|
|
83
|
+
tier_label = " (Expanded)" if self._tier == "expanded" else ""
|
|
84
|
+
return f"Web of Science{tier_label}"
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def source_key(self) -> str:
|
|
88
|
+
return "wos"
|
|
89
|
+
|
|
90
|
+
# ------------------------------------------------------------------
|
|
91
|
+
# Public interface
|
|
92
|
+
# ------------------------------------------------------------------
|
|
93
|
+
|
|
94
|
+
async def search_works(
|
|
95
|
+
self,
|
|
96
|
+
query: str,
|
|
97
|
+
*,
|
|
98
|
+
year_from: int | None = None,
|
|
99
|
+
year_to: int | None = None,
|
|
100
|
+
sort_by: str = "relevance",
|
|
101
|
+
limit: int = 50,
|
|
102
|
+
) -> list[Paper]:
|
|
103
|
+
current_year = datetime.now(timezone.utc).year
|
|
104
|
+
wos_query = f"TS=({query})"
|
|
105
|
+
if year_from:
|
|
106
|
+
wos_query += f" AND PY=({year_from}-{year_to or current_year})"
|
|
107
|
+
elif year_to:
|
|
108
|
+
wos_query += f" AND PY=(1900-{year_to})"
|
|
109
|
+
|
|
110
|
+
if self._tier == "expanded":
|
|
111
|
+
return await self._search_expanded(wos_query, sort_by, limit)
|
|
112
|
+
return await self._search_starter(wos_query, sort_by, limit)
|
|
113
|
+
|
|
114
|
+
async def verify_doi(self, doi: str) -> Paper | None:
|
|
115
|
+
clean_doi = doi
|
|
116
|
+
if clean_doi.startswith("https://doi.org/"):
|
|
117
|
+
clean_doi = clean_doi[len("https://doi.org/"):]
|
|
118
|
+
|
|
119
|
+
wos_query = f"DO=({clean_doi})"
|
|
120
|
+
|
|
121
|
+
if self._tier == "expanded":
|
|
122
|
+
papers = await self._search_expanded(wos_query, "relevance", 1)
|
|
123
|
+
else:
|
|
124
|
+
papers = await self._search_starter(wos_query, "relevance", 1)
|
|
125
|
+
|
|
126
|
+
if papers:
|
|
127
|
+
papers[0].verified_by = ["wos"]
|
|
128
|
+
return papers[0]
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
async def batch_verify_dois(self, dois: list[str]) -> dict[str, Paper | None]:
|
|
132
|
+
results: dict[str, Paper | None] = {d: None for d in dois}
|
|
133
|
+
|
|
134
|
+
clean_dois = []
|
|
135
|
+
for d in dois:
|
|
136
|
+
clean = d
|
|
137
|
+
if clean.startswith("https://doi.org/"):
|
|
138
|
+
clean = clean[len("https://doi.org/"):]
|
|
139
|
+
clean_dois.append(clean)
|
|
140
|
+
|
|
141
|
+
# Process in chunks of 10 (WoS query length limits)
|
|
142
|
+
for i in range(0, len(clean_dois), 10):
|
|
143
|
+
batch = clean_dois[i:i + 10]
|
|
144
|
+
orig_batch = dois[i:i + 10]
|
|
145
|
+
or_query = " OR ".join(f'"{d}"' for d in batch)
|
|
146
|
+
wos_query = f"DO=({or_query})"
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
if self._tier == "expanded":
|
|
150
|
+
papers = await self._search_expanded(wos_query, "relevance", 50)
|
|
151
|
+
else:
|
|
152
|
+
papers = await self._search_starter(wos_query, "relevance", 50)
|
|
153
|
+
|
|
154
|
+
for paper in papers:
|
|
155
|
+
paper.verified_by = ["wos"]
|
|
156
|
+
if paper.doi:
|
|
157
|
+
paper_doi = paper.doi.lower()
|
|
158
|
+
if paper_doi.startswith("https://doi.org/"):
|
|
159
|
+
paper_doi = paper_doi[len("https://doi.org/"):]
|
|
160
|
+
for orig in orig_batch:
|
|
161
|
+
clean_orig = orig
|
|
162
|
+
if clean_orig.startswith("https://doi.org/"):
|
|
163
|
+
clean_orig = clean_orig[len("https://doi.org/"):]
|
|
164
|
+
if paper_doi == clean_orig.lower():
|
|
165
|
+
results[orig] = paper
|
|
166
|
+
break
|
|
167
|
+
except Exception:
|
|
168
|
+
logger.warning("WoS batch verify failed for chunk starting at %d", i)
|
|
169
|
+
|
|
170
|
+
return results
|
|
171
|
+
|
|
172
|
+
async def find_similar_works(
|
|
173
|
+
self,
|
|
174
|
+
text: str,
|
|
175
|
+
*,
|
|
176
|
+
limit: int = 20,
|
|
177
|
+
) -> list[Paper]:
|
|
178
|
+
query = _extract_search_terms(text, max_terms=12)
|
|
179
|
+
if not query:
|
|
180
|
+
return []
|
|
181
|
+
return await self.search_works(query, sort_by="relevance", limit=limit)
|
|
182
|
+
|
|
183
|
+
async def close(self) -> None:
|
|
184
|
+
await self._client.aclose()
|
|
185
|
+
|
|
186
|
+
# ------------------------------------------------------------------
|
|
187
|
+
# Starter API implementation
|
|
188
|
+
# ------------------------------------------------------------------
|
|
189
|
+
|
|
190
|
+
async def _search_starter(
|
|
191
|
+
self, wos_query: str, sort_by: str, limit: int,
|
|
192
|
+
) -> list[Paper]:
|
|
193
|
+
sort_field = "relevance"
|
|
194
|
+
if sort_by == "cited_by_count":
|
|
195
|
+
sort_field = "TC.D"
|
|
196
|
+
elif sort_by == "publication_year":
|
|
197
|
+
sort_field = "PY.D"
|
|
198
|
+
|
|
199
|
+
papers: list[Paper] = []
|
|
200
|
+
page = 1
|
|
201
|
+
per_page = min(limit, self._per_page_max)
|
|
202
|
+
|
|
203
|
+
while len(papers) < limit:
|
|
204
|
+
try:
|
|
205
|
+
resp = await self._client.get(
|
|
206
|
+
"/documents",
|
|
207
|
+
params={
|
|
208
|
+
"q": wos_query,
|
|
209
|
+
"limit": per_page,
|
|
210
|
+
"page": page,
|
|
211
|
+
"sortField": sort_field,
|
|
212
|
+
"db": "WOS",
|
|
213
|
+
},
|
|
214
|
+
)
|
|
215
|
+
resp.raise_for_status()
|
|
216
|
+
|
|
217
|
+
if not resp.content or not resp.content.strip():
|
|
218
|
+
logger.warning("WoS empty response for: %s (page %d)", wos_query, page)
|
|
219
|
+
break
|
|
220
|
+
|
|
221
|
+
content_type = resp.headers.get("content-type", "")
|
|
222
|
+
if "json" not in content_type:
|
|
223
|
+
logger.warning("WoS non-JSON response (%s) for: %s", content_type, wos_query)
|
|
224
|
+
break
|
|
225
|
+
|
|
226
|
+
data = resp.json()
|
|
227
|
+
except httpx.HTTPStatusError as exc:
|
|
228
|
+
logger.error("WoS HTTP %d for: %s", exc.response.status_code, wos_query)
|
|
229
|
+
break
|
|
230
|
+
except Exception:
|
|
231
|
+
logger.exception("WoS search failed for: %s (page %d)", wos_query, page)
|
|
232
|
+
break
|
|
233
|
+
|
|
234
|
+
hits = data.get("hits", [])
|
|
235
|
+
if not hits:
|
|
236
|
+
break
|
|
237
|
+
|
|
238
|
+
for hit in hits:
|
|
239
|
+
papers.append(self._starter_to_paper(hit))
|
|
240
|
+
if len(papers) >= limit:
|
|
241
|
+
break
|
|
242
|
+
|
|
243
|
+
total = data.get("metadata", {}).get("total", 0)
|
|
244
|
+
if page * per_page >= total:
|
|
245
|
+
break
|
|
246
|
+
page += 1
|
|
247
|
+
|
|
248
|
+
return papers[:limit]
|
|
249
|
+
|
|
250
|
+
# ------------------------------------------------------------------
|
|
251
|
+
# Expanded API implementation
|
|
252
|
+
# ------------------------------------------------------------------
|
|
253
|
+
|
|
254
|
+
async def _search_expanded(
|
|
255
|
+
self, wos_query: str, sort_by: str, limit: int,
|
|
256
|
+
) -> list[Paper]:
|
|
257
|
+
# Expanded uses different sort format: field+direction (e.g. PY+D)
|
|
258
|
+
sort_field = "RS+D" # relevance score descending
|
|
259
|
+
if sort_by == "cited_by_count":
|
|
260
|
+
sort_field = "TC+D"
|
|
261
|
+
elif sort_by == "publication_year":
|
|
262
|
+
sort_field = "PY+D"
|
|
263
|
+
|
|
264
|
+
papers: list[Paper] = []
|
|
265
|
+
first_record = 1
|
|
266
|
+
per_page = min(limit, self._per_page_max)
|
|
267
|
+
query_id: int | None = None
|
|
268
|
+
|
|
269
|
+
while len(papers) < limit:
|
|
270
|
+
try:
|
|
271
|
+
if query_id is None:
|
|
272
|
+
# First request: search query
|
|
273
|
+
resp = await self._client.get(
|
|
274
|
+
"",
|
|
275
|
+
params={
|
|
276
|
+
"databaseId": "WOS",
|
|
277
|
+
"usrQuery": wos_query,
|
|
278
|
+
"count": per_page,
|
|
279
|
+
"firstRecord": first_record,
|
|
280
|
+
"sortField": sort_field,
|
|
281
|
+
"optionView": "FR",
|
|
282
|
+
},
|
|
283
|
+
)
|
|
284
|
+
else:
|
|
285
|
+
# Subsequent pages: use queryId
|
|
286
|
+
resp = await self._client.get(
|
|
287
|
+
f"/query/{query_id}",
|
|
288
|
+
params={
|
|
289
|
+
"count": per_page,
|
|
290
|
+
"firstRecord": first_record,
|
|
291
|
+
"sortField": sort_field,
|
|
292
|
+
"optionView": "FR",
|
|
293
|
+
},
|
|
294
|
+
)
|
|
295
|
+
resp.raise_for_status()
|
|
296
|
+
|
|
297
|
+
if not resp.content or not resp.content.strip():
|
|
298
|
+
logger.warning("WoS Expanded empty response for: %s", wos_query)
|
|
299
|
+
break
|
|
300
|
+
|
|
301
|
+
content_type = resp.headers.get("content-type", "")
|
|
302
|
+
if "json" not in content_type:
|
|
303
|
+
logger.warning("WoS Expanded non-JSON response (%s)", content_type)
|
|
304
|
+
break
|
|
305
|
+
|
|
306
|
+
data = resp.json()
|
|
307
|
+
except httpx.HTTPStatusError as exc:
|
|
308
|
+
logger.error(
|
|
309
|
+
"WoS Expanded HTTP %d for: %s. Body: %.200s",
|
|
310
|
+
exc.response.status_code, wos_query, exc.response.text,
|
|
311
|
+
)
|
|
312
|
+
break
|
|
313
|
+
except Exception:
|
|
314
|
+
logger.exception("WoS Expanded search failed for: %s", wos_query)
|
|
315
|
+
break
|
|
316
|
+
|
|
317
|
+
# Extract query metadata
|
|
318
|
+
query_result = data.get("QueryResult", {})
|
|
319
|
+
if query_id is None:
|
|
320
|
+
query_id = query_result.get("QueryID")
|
|
321
|
+
records_found = query_result.get("RecordsFound", 0)
|
|
322
|
+
|
|
323
|
+
# Extract records
|
|
324
|
+
recs = (
|
|
325
|
+
data
|
|
326
|
+
.get("Data", {})
|
|
327
|
+
.get("Records", {})
|
|
328
|
+
.get("records", {})
|
|
329
|
+
.get("REC", [])
|
|
330
|
+
)
|
|
331
|
+
if not recs:
|
|
332
|
+
break
|
|
333
|
+
|
|
334
|
+
for rec in recs:
|
|
335
|
+
papers.append(self._expanded_to_paper(rec))
|
|
336
|
+
if len(papers) >= limit:
|
|
337
|
+
break
|
|
338
|
+
|
|
339
|
+
first_record += len(recs)
|
|
340
|
+
if first_record > records_found:
|
|
341
|
+
break
|
|
342
|
+
|
|
343
|
+
return papers[:limit]
|
|
344
|
+
|
|
345
|
+
# ------------------------------------------------------------------
|
|
346
|
+
# Record converters
|
|
347
|
+
# ------------------------------------------------------------------
|
|
348
|
+
|
|
349
|
+
@staticmethod
|
|
350
|
+
def _starter_to_paper(hit: dict) -> Paper:
|
|
351
|
+
"""Convert WoS Starter API hit to Paper."""
|
|
352
|
+
names = hit.get("names", {})
|
|
353
|
+
authors = []
|
|
354
|
+
for author_entry in names.get("authors", []):
|
|
355
|
+
display_name = author_entry.get("displayName") or author_entry.get("wosStandard", "")
|
|
356
|
+
if display_name:
|
|
357
|
+
authors.append(display_name)
|
|
358
|
+
|
|
359
|
+
keywords = hit.get("keywords", {}).get("authorKeywords", []) or []
|
|
360
|
+
|
|
361
|
+
cited_by = 0
|
|
362
|
+
for c in hit.get("citations", []):
|
|
363
|
+
if c.get("db") == "wos":
|
|
364
|
+
try:
|
|
365
|
+
cited_by = int(c.get("count", 0))
|
|
366
|
+
except (ValueError, TypeError):
|
|
367
|
+
pass
|
|
368
|
+
break
|
|
369
|
+
|
|
370
|
+
pub_year = 0
|
|
371
|
+
source_info = hit.get("source", {})
|
|
372
|
+
pub_date = source_info.get("publishYear")
|
|
373
|
+
if pub_date:
|
|
374
|
+
try:
|
|
375
|
+
pub_year = int(pub_date)
|
|
376
|
+
except (ValueError, TypeError):
|
|
377
|
+
pass
|
|
378
|
+
|
|
379
|
+
identifiers = hit.get("identifiers", {})
|
|
380
|
+
doi = identifiers.get("doi")
|
|
381
|
+
if doi and not doi.startswith("http"):
|
|
382
|
+
doi = f"https://doi.org/{doi}"
|
|
383
|
+
|
|
384
|
+
uid = hit.get("uid", "")
|
|
385
|
+
|
|
386
|
+
return Paper(
|
|
387
|
+
source_id=f"wos:{uid}",
|
|
388
|
+
title=hit.get("title", "") or "",
|
|
389
|
+
abstract=None, # Starter API doesn't return abstracts
|
|
390
|
+
authors=authors,
|
|
391
|
+
publication_year=pub_year,
|
|
392
|
+
cited_by_count=cited_by,
|
|
393
|
+
source_name=source_info.get("sourceTitle"),
|
|
394
|
+
doi=doi,
|
|
395
|
+
keywords=keywords,
|
|
396
|
+
url=doi or f"https://www.webofscience.com/wos/woscc/full-record/{uid}",
|
|
397
|
+
verified_by=["wos"],
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
@staticmethod
|
|
401
|
+
def _expanded_to_paper(rec: dict) -> Paper:
|
|
402
|
+
"""Convert WoS Expanded API record to Paper."""
|
|
403
|
+
uid = rec.get("UID", "")
|
|
404
|
+
static = rec.get("static_data", {})
|
|
405
|
+
dynamic = rec.get("dynamic_data", {})
|
|
406
|
+
summary = static.get("summary", {})
|
|
407
|
+
fullrecord = static.get("fullrecord_metadata", {})
|
|
408
|
+
|
|
409
|
+
# Title — look for type "item" (article title)
|
|
410
|
+
title = ""
|
|
411
|
+
source_name = None
|
|
412
|
+
for t in summary.get("titles", {}).get("title", []):
|
|
413
|
+
if t.get("type") == "item":
|
|
414
|
+
title = t.get("content", "")
|
|
415
|
+
elif t.get("type") == "source":
|
|
416
|
+
source_name = t.get("content")
|
|
417
|
+
|
|
418
|
+
# Authors
|
|
419
|
+
authors = []
|
|
420
|
+
for name_entry in summary.get("names", {}).get("name", []):
|
|
421
|
+
if name_entry.get("role") == "author":
|
|
422
|
+
display = (
|
|
423
|
+
name_entry.get("display_name")
|
|
424
|
+
or name_entry.get("full_name")
|
|
425
|
+
or name_entry.get("wos_standard", "")
|
|
426
|
+
)
|
|
427
|
+
if display:
|
|
428
|
+
authors.append(display)
|
|
429
|
+
|
|
430
|
+
# Abstract
|
|
431
|
+
abstract = None
|
|
432
|
+
abstracts_block = fullrecord.get("abstracts", {}).get("abstract", {})
|
|
433
|
+
abstract_text = abstracts_block.get("abstract_text", {})
|
|
434
|
+
if isinstance(abstract_text, dict):
|
|
435
|
+
p = abstract_text.get("p")
|
|
436
|
+
if isinstance(p, list):
|
|
437
|
+
abstract = " ".join(str(para) for para in p)
|
|
438
|
+
elif isinstance(p, str):
|
|
439
|
+
abstract = p
|
|
440
|
+
elif isinstance(abstract_text, str):
|
|
441
|
+
abstract = abstract_text
|
|
442
|
+
|
|
443
|
+
# Keywords
|
|
444
|
+
keywords = fullrecord.get("keywords", {}).get("keyword", []) or []
|
|
445
|
+
if isinstance(keywords, str):
|
|
446
|
+
keywords = [keywords]
|
|
447
|
+
|
|
448
|
+
# Publication year
|
|
449
|
+
pub_year = 0
|
|
450
|
+
pub_info = summary.get("pub_info", {})
|
|
451
|
+
pubyear = pub_info.get("pubyear")
|
|
452
|
+
if pubyear:
|
|
453
|
+
try:
|
|
454
|
+
pub_year = int(pubyear)
|
|
455
|
+
except (ValueError, TypeError):
|
|
456
|
+
pass
|
|
457
|
+
|
|
458
|
+
# Citation count — from dynamic_data.citation_related.tc_list.silo_tc
|
|
459
|
+
cited_by = 0
|
|
460
|
+
tc_list = (
|
|
461
|
+
dynamic
|
|
462
|
+
.get("citation_related", {})
|
|
463
|
+
.get("tc_list", {})
|
|
464
|
+
.get("silo_tc", [])
|
|
465
|
+
)
|
|
466
|
+
for tc in tc_list:
|
|
467
|
+
if tc.get("coll_id") == "WOS":
|
|
468
|
+
try:
|
|
469
|
+
cited_by = int(tc.get("local_count", 0))
|
|
470
|
+
except (ValueError, TypeError):
|
|
471
|
+
pass
|
|
472
|
+
break
|
|
473
|
+
|
|
474
|
+
# DOI — try dynamic_data path first (most reliable), then static fallbacks
|
|
475
|
+
doi = None
|
|
476
|
+
dyn_ids = dynamic.get("cluster_related", {}).get("identifiers", {}).get("identifier", [])
|
|
477
|
+
if isinstance(dyn_ids, dict):
|
|
478
|
+
dyn_ids = [dyn_ids]
|
|
479
|
+
for ident in dyn_ids:
|
|
480
|
+
if ident.get("type") == "doi":
|
|
481
|
+
doi = ident.get("value")
|
|
482
|
+
break
|
|
483
|
+
if not doi:
|
|
484
|
+
static_ids = summary.get("identifiers", {}).get("identifier", [])
|
|
485
|
+
if isinstance(static_ids, dict):
|
|
486
|
+
static_ids = [static_ids]
|
|
487
|
+
for ident in static_ids:
|
|
488
|
+
if ident.get("type") == "doi":
|
|
489
|
+
doi = ident.get("value")
|
|
490
|
+
break
|
|
491
|
+
if not doi:
|
|
492
|
+
doi = static.get("item", {}).get("ids", {}).get("doi")
|
|
493
|
+
if doi and not doi.startswith("http"):
|
|
494
|
+
doi = f"https://doi.org/{doi}"
|
|
495
|
+
|
|
496
|
+
return Paper(
|
|
497
|
+
source_id=f"wos:{uid}",
|
|
498
|
+
title=title,
|
|
499
|
+
abstract=abstract,
|
|
500
|
+
authors=authors,
|
|
501
|
+
publication_year=pub_year,
|
|
502
|
+
cited_by_count=cited_by,
|
|
503
|
+
source_name=source_name,
|
|
504
|
+
doi=doi,
|
|
505
|
+
keywords=keywords,
|
|
506
|
+
url=doi or f"https://www.webofscience.com/wos/woscc/full-record/{uid}",
|
|
507
|
+
verified_by=["wos"],
|
|
508
|
+
)
|