flonat-research 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/.claude/agents/domain-reviewer.md +336 -0
  2. package/.claude/agents/fixer.md +226 -0
  3. package/.claude/agents/paper-critic.md +370 -0
  4. package/.claude/agents/peer-reviewer.md +289 -0
  5. package/.claude/agents/proposal-reviewer.md +215 -0
  6. package/.claude/agents/referee2-reviewer.md +367 -0
  7. package/.claude/agents/references/journal-referee-profiles.md +354 -0
  8. package/.claude/agents/references/paper-critic/council-personas.md +77 -0
  9. package/.claude/agents/references/paper-critic/council-prompts.md +198 -0
  10. package/.claude/agents/references/peer-reviewer/report-template.md +199 -0
  11. package/.claude/agents/references/peer-reviewer/sa-prompts.md +260 -0
  12. package/.claude/agents/references/peer-reviewer/security-scan.md +188 -0
  13. package/.claude/agents/references/proposal-reviewer/report-template.md +144 -0
  14. package/.claude/agents/references/proposal-reviewer/sa-prompts.md +149 -0
  15. package/.claude/agents/references/referee-config.md +114 -0
  16. package/.claude/agents/references/referee2-reviewer/audit-checklists.md +287 -0
  17. package/.claude/agents/references/referee2-reviewer/report-template.md +334 -0
  18. package/.claude/rules/design-before-results.md +52 -0
  19. package/.claude/rules/ignore-agents-md.md +17 -0
  20. package/.claude/rules/ignore-gemini-md.md +17 -0
  21. package/.claude/rules/lean-claude-md.md +45 -0
  22. package/.claude/rules/learn-tags.md +99 -0
  23. package/.claude/rules/overleaf-separation.md +67 -0
  24. package/.claude/rules/plan-first.md +175 -0
  25. package/.claude/rules/read-docs-first.md +50 -0
  26. package/.claude/rules/scope-discipline.md +28 -0
  27. package/.claude/settings.json +125 -0
  28. package/.context/current-focus.md +33 -0
  29. package/.context/preferences/priorities.md +36 -0
  30. package/.context/preferences/task-naming.md +28 -0
  31. package/.context/profile.md +29 -0
  32. package/.context/projects/_index.md +41 -0
  33. package/.context/projects/papers/nudge-exp.md +22 -0
  34. package/.context/projects/papers/uncertainty.md +31 -0
  35. package/.context/resources/claude-scientific-writer-review.md +48 -0
  36. package/.context/resources/cunningham-multi-analyst-agents.md +104 -0
  37. package/.context/resources/cunningham-multilang-code-audit.md +62 -0
  38. package/.context/resources/google-ai-co-scientist-review.md +72 -0
  39. package/.context/resources/karpathy-llm-council-review.md +58 -0
  40. package/.context/resources/multi-coder-reliability-protocol.md +175 -0
  41. package/.context/resources/pedro-santanna-takeaways.md +96 -0
  42. package/.context/resources/venue-rankings/abs_ajg_2024.csv +1823 -0
  43. package/.context/resources/venue-rankings/abs_ajg_2024_econ.csv +356 -0
  44. package/.context/resources/venue-rankings/cabs_4_4star_theory.csv +40 -0
  45. package/.context/resources/venue-rankings/core_2026.csv +801 -0
  46. package/.context/resources/venue-rankings.md +147 -0
  47. package/.context/workflows/README.md +69 -0
  48. package/.context/workflows/daily-review.md +91 -0
  49. package/.context/workflows/meeting-actions.md +108 -0
  50. package/.context/workflows/replication-protocol.md +155 -0
  51. package/.context/workflows/weekly-review.md +113 -0
  52. package/.mcp-server-biblio/formatters.py +158 -0
  53. package/.mcp-server-biblio/pyproject.toml +11 -0
  54. package/.mcp-server-biblio/server.py +678 -0
  55. package/.mcp-server-biblio/sources/__init__.py +14 -0
  56. package/.mcp-server-biblio/sources/base.py +73 -0
  57. package/.mcp-server-biblio/sources/formatters.py +83 -0
  58. package/.mcp-server-biblio/sources/models.py +22 -0
  59. package/.mcp-server-biblio/sources/multi_source.py +243 -0
  60. package/.mcp-server-biblio/sources/openalex_source.py +183 -0
  61. package/.mcp-server-biblio/sources/scopus_source.py +309 -0
  62. package/.mcp-server-biblio/sources/wos_source.py +508 -0
  63. package/.mcp-server-biblio/uv.lock +896 -0
  64. package/.scripts/README.md +161 -0
  65. package/.scripts/ai_pattern_density.py +446 -0
  66. package/.scripts/conf +445 -0
  67. package/.scripts/config.py +122 -0
  68. package/.scripts/count_inventory.py +275 -0
  69. package/.scripts/daily_digest.py +288 -0
  70. package/.scripts/done +177 -0
  71. package/.scripts/extract_meeting_actions.py +223 -0
  72. package/.scripts/focus +176 -0
  73. package/.scripts/generate-codex-agents-md.py +217 -0
  74. package/.scripts/inbox +194 -0
  75. package/.scripts/notion_helpers.py +325 -0
  76. package/.scripts/openalex/query_helpers.py +306 -0
  77. package/.scripts/papers +227 -0
  78. package/.scripts/query +223 -0
  79. package/.scripts/session-history.py +201 -0
  80. package/.scripts/skill-health.py +516 -0
  81. package/.scripts/skill-log-miner.py +273 -0
  82. package/.scripts/sync-to-codex.sh +252 -0
  83. package/.scripts/task +213 -0
  84. package/.scripts/tasks +190 -0
  85. package/.scripts/week +206 -0
  86. package/CLAUDE.md +197 -0
  87. package/LICENSE +21 -0
  88. package/MEMORY.md +38 -0
  89. package/README.md +269 -0
  90. package/docs/agents.md +44 -0
  91. package/docs/bibliography-setup.md +55 -0
  92. package/docs/council-mode.md +36 -0
  93. package/docs/getting-started.md +245 -0
  94. package/docs/hooks.md +38 -0
  95. package/docs/mcp-servers.md +82 -0
  96. package/docs/notion-setup.md +109 -0
  97. package/docs/rules.md +33 -0
  98. package/docs/scripts.md +303 -0
  99. package/docs/setup-overview/setup-overview.pdf +0 -0
  100. package/docs/skills.md +70 -0
  101. package/docs/system.md +159 -0
  102. package/hooks/block-destructive-git.sh +66 -0
  103. package/hooks/context-monitor.py +114 -0
  104. package/hooks/postcompact-restore.py +157 -0
  105. package/hooks/precompact-autosave.py +181 -0
  106. package/hooks/promise-checker.sh +124 -0
  107. package/hooks/protect-source-files.sh +81 -0
  108. package/hooks/resume-context-loader.sh +53 -0
  109. package/hooks/startup-context-loader.sh +102 -0
  110. package/package.json +51 -0
  111. package/packages/cli-council/.github/workflows/claude-code-review.yml +44 -0
  112. package/packages/cli-council/.github/workflows/claude.yml +50 -0
  113. package/packages/cli-council/README.md +100 -0
  114. package/packages/cli-council/pyproject.toml +43 -0
  115. package/packages/cli-council/src/cli_council/__init__.py +19 -0
  116. package/packages/cli-council/src/cli_council/__main__.py +185 -0
  117. package/packages/cli-council/src/cli_council/backends/__init__.py +8 -0
  118. package/packages/cli-council/src/cli_council/backends/base.py +81 -0
  119. package/packages/cli-council/src/cli_council/backends/claude.py +25 -0
  120. package/packages/cli-council/src/cli_council/backends/codex.py +27 -0
  121. package/packages/cli-council/src/cli_council/backends/gemini.py +26 -0
  122. package/packages/cli-council/src/cli_council/checkpoint.py +212 -0
  123. package/packages/cli-council/src/cli_council/config.py +51 -0
  124. package/packages/cli-council/src/cli_council/council.py +391 -0
  125. package/packages/cli-council/src/cli_council/models.py +46 -0
  126. package/packages/llm-council/.github/workflows/claude-code-review.yml +44 -0
  127. package/packages/llm-council/.github/workflows/claude.yml +50 -0
  128. package/packages/llm-council/README.md +453 -0
  129. package/packages/llm-council/pyproject.toml +42 -0
  130. package/packages/llm-council/src/llm_council/__init__.py +23 -0
  131. package/packages/llm-council/src/llm_council/__main__.py +259 -0
  132. package/packages/llm-council/src/llm_council/checkpoint.py +193 -0
  133. package/packages/llm-council/src/llm_council/client.py +253 -0
  134. package/packages/llm-council/src/llm_council/config.py +232 -0
  135. package/packages/llm-council/src/llm_council/council.py +482 -0
  136. package/packages/llm-council/src/llm_council/models.py +46 -0
  137. package/packages/mcp-bibliography/MEMORY.md +31 -0
  138. package/packages/mcp-bibliography/_app.py +226 -0
  139. package/packages/mcp-bibliography/formatters.py +158 -0
  140. package/packages/mcp-bibliography/log/2026-03-13-2100.md +35 -0
  141. package/packages/mcp-bibliography/pyproject.toml +15 -0
  142. package/packages/mcp-bibliography/run.sh +20 -0
  143. package/packages/mcp-bibliography/scholarly_formatters.py +83 -0
  144. package/packages/mcp-bibliography/server.py +1857 -0
  145. package/packages/mcp-bibliography/tools/__init__.py +28 -0
  146. package/packages/mcp-bibliography/tools/_registry.py +19 -0
  147. package/packages/mcp-bibliography/tools/altmetric.py +107 -0
  148. package/packages/mcp-bibliography/tools/core.py +92 -0
  149. package/packages/mcp-bibliography/tools/dblp.py +52 -0
  150. package/packages/mcp-bibliography/tools/openalex.py +296 -0
  151. package/packages/mcp-bibliography/tools/opencitations.py +102 -0
  152. package/packages/mcp-bibliography/tools/openreview.py +179 -0
  153. package/packages/mcp-bibliography/tools/orcid.py +131 -0
  154. package/packages/mcp-bibliography/tools/scholarly.py +575 -0
  155. package/packages/mcp-bibliography/tools/unpaywall.py +63 -0
  156. package/packages/mcp-bibliography/tools/zenodo.py +123 -0
  157. package/packages/mcp-bibliography/uv.lock +711 -0
  158. package/scripts/setup.sh +143 -0
  159. package/skills/beamer-deck/SKILL.md +199 -0
  160. package/skills/beamer-deck/references/quality-rubric.md +54 -0
  161. package/skills/beamer-deck/references/review-prompts.md +106 -0
  162. package/skills/bib-validate/SKILL.md +261 -0
  163. package/skills/bib-validate/references/council-mode.md +34 -0
  164. package/skills/bib-validate/references/deep-verify.md +79 -0
  165. package/skills/bib-validate/references/fix-mode.md +36 -0
  166. package/skills/bib-validate/references/openalex-verification.md +45 -0
  167. package/skills/bib-validate/references/preprint-check.md +31 -0
  168. package/skills/bib-validate/references/ref-manager-crossref.md +41 -0
  169. package/skills/bib-validate/references/report-template.md +82 -0
  170. package/skills/code-archaeology/SKILL.md +141 -0
  171. package/skills/code-review/SKILL.md +265 -0
  172. package/skills/code-review/references/quality-rubric.md +67 -0
  173. package/skills/consolidate-memory/SKILL.md +208 -0
  174. package/skills/context-status/SKILL.md +126 -0
  175. package/skills/creation-guard/SKILL.md +230 -0
  176. package/skills/devils-advocate/SKILL.md +130 -0
  177. package/skills/devils-advocate/references/competing-hypotheses.md +83 -0
  178. package/skills/init-project/SKILL.md +115 -0
  179. package/skills/init-project-course/references/memory-and-settings.md +92 -0
  180. package/skills/init-project-course/references/organise-templates.md +94 -0
  181. package/skills/init-project-course/skill.md +147 -0
  182. package/skills/init-project-light/skill.md +139 -0
  183. package/skills/init-project-research/SKILL.md +368 -0
  184. package/skills/init-project-research/references/atlas-pipeline-sync.md +70 -0
  185. package/skills/init-project-research/references/atlas-schema.md +81 -0
  186. package/skills/init-project-research/references/confirmation-report.md +39 -0
  187. package/skills/init-project-research/references/domain-profile-template.md +104 -0
  188. package/skills/init-project-research/references/interview-round3.md +34 -0
  189. package/skills/init-project-research/references/literature-discovery.md +43 -0
  190. package/skills/init-project-research/references/scaffold-details.md +197 -0
  191. package/skills/init-project-research/templates/field-calibration.md +60 -0
  192. package/skills/init-project-research/templates/pipeline-manifest.md +63 -0
  193. package/skills/init-project-research/templates/run-all.sh +116 -0
  194. package/skills/init-project-research/templates/seed-files.md +337 -0
  195. package/skills/insights-deck/SKILL.md +151 -0
  196. package/skills/interview-me/SKILL.md +157 -0
  197. package/skills/latex/SKILL.md +141 -0
  198. package/skills/latex/references/latex-configs.md +183 -0
  199. package/skills/latex-autofix/SKILL.md +230 -0
  200. package/skills/latex-autofix/references/known-errors.md +183 -0
  201. package/skills/latex-autofix/references/quality-rubric.md +50 -0
  202. package/skills/latex-health-check/SKILL.md +161 -0
  203. package/skills/learn/SKILL.md +220 -0
  204. package/skills/learn/scripts/validate_skill.py +265 -0
  205. package/skills/lessons-learned/SKILL.md +201 -0
  206. package/skills/literature/SKILL.md +335 -0
  207. package/skills/literature/references/agent-templates.md +393 -0
  208. package/skills/literature/references/bibliometric-apis.md +44 -0
  209. package/skills/literature/references/cli-council-search.md +79 -0
  210. package/skills/literature/references/openalex-api-guide.md +371 -0
  211. package/skills/literature/references/openalex-common-queries.md +381 -0
  212. package/skills/literature/references/openalex-workflows.md +248 -0
  213. package/skills/literature/references/reference-manager-sync.md +36 -0
  214. package/skills/literature/references/scopus-api-guide.md +208 -0
  215. package/skills/literature/references/wos-api-guide.md +308 -0
  216. package/skills/multi-perspective/SKILL.md +311 -0
  217. package/skills/multi-perspective/references/computational-many-analysts.md +77 -0
  218. package/skills/pipeline-manifest/SKILL.md +226 -0
  219. package/skills/pre-submission-report/SKILL.md +153 -0
  220. package/skills/process-reviews/SKILL.md +244 -0
  221. package/skills/process-reviews/references/rr-routing.md +101 -0
  222. package/skills/project-deck/SKILL.md +87 -0
  223. package/skills/project-safety/SKILL.md +135 -0
  224. package/skills/proofread/SKILL.md +254 -0
  225. package/skills/proofread/references/quality-rubric.md +104 -0
  226. package/skills/python-env/SKILL.md +57 -0
  227. package/skills/quarto-deck/SKILL.md +226 -0
  228. package/skills/quarto-deck/references/markdown-format.md +143 -0
  229. package/skills/quarto-deck/references/quality-rubric.md +54 -0
  230. package/skills/save-context/SKILL.md +174 -0
  231. package/skills/session-log/SKILL.md +98 -0
  232. package/skills/shared/concept-validation-gate.md +161 -0
  233. package/skills/shared/council-protocol.md +265 -0
  234. package/skills/shared/distribution-diagnostics.md +164 -0
  235. package/skills/shared/engagement-stratified-sampling.md +218 -0
  236. package/skills/shared/escalation-protocol.md +74 -0
  237. package/skills/shared/external-audit-protocol.md +205 -0
  238. package/skills/shared/intercoder-reliability.md +256 -0
  239. package/skills/shared/mcp-degradation.md +81 -0
  240. package/skills/shared/method-probing-questions.md +163 -0
  241. package/skills/shared/multi-language-conventions.md +143 -0
  242. package/skills/shared/paid-api-safety.md +174 -0
  243. package/skills/shared/palettes.md +90 -0
  244. package/skills/shared/progressive-disclosure.md +92 -0
  245. package/skills/shared/project-documentation-content.md +443 -0
  246. package/skills/shared/project-documentation-format.md +281 -0
  247. package/skills/shared/project-documentation.md +100 -0
  248. package/skills/shared/publication-output.md +138 -0
  249. package/skills/shared/quality-scoring.md +70 -0
  250. package/skills/shared/reference-resolution.md +77 -0
  251. package/skills/shared/research-quality-rubric.md +165 -0
  252. package/skills/shared/rhetoric-principles.md +54 -0
  253. package/skills/shared/skill-design-patterns.md +272 -0
  254. package/skills/shared/skill-index.md +240 -0
  255. package/skills/shared/system-documentation.md +334 -0
  256. package/skills/shared/tikz-rules.md +402 -0
  257. package/skills/shared/validation-tiers.md +121 -0
  258. package/skills/shared/venue-guides/README.md +46 -0
  259. package/skills/shared/venue-guides/cell_press_style.md +483 -0
  260. package/skills/shared/venue-guides/conferences_formatting.md +564 -0
  261. package/skills/shared/venue-guides/cs_conference_style.md +463 -0
  262. package/skills/shared/venue-guides/examples/cell_summary_example.md +247 -0
  263. package/skills/shared/venue-guides/examples/medical_structured_abstract.md +313 -0
  264. package/skills/shared/venue-guides/examples/nature_abstract_examples.md +213 -0
  265. package/skills/shared/venue-guides/examples/neurips_introduction_example.md +245 -0
  266. package/skills/shared/venue-guides/journals_formatting.md +486 -0
  267. package/skills/shared/venue-guides/medical_journal_styles.md +535 -0
  268. package/skills/shared/venue-guides/ml_conference_style.md +556 -0
  269. package/skills/shared/venue-guides/nature_science_style.md +405 -0
  270. package/skills/shared/venue-guides/reviewer_expectations.md +417 -0
  271. package/skills/shared/venue-guides/venue_writing_styles.md +321 -0
  272. package/skills/split-pdf/SKILL.md +172 -0
  273. package/skills/split-pdf/methodology.md +48 -0
  274. package/skills/sync-notion/SKILL.md +93 -0
  275. package/skills/system-audit/SKILL.md +157 -0
  276. package/skills/system-audit/references/sub-agent-prompts.md +294 -0
  277. package/skills/task-management/SKILL.md +131 -0
  278. package/skills/update-focus/SKILL.md +204 -0
  279. package/skills/update-project-doc/SKILL.md +194 -0
  280. package/skills/validate-bib/SKILL.md +242 -0
  281. package/skills/validate-bib/references/council-mode.md +34 -0
  282. package/skills/validate-bib/references/deep-verify.md +71 -0
  283. package/skills/validate-bib/references/openalex-verification.md +45 -0
  284. package/skills/validate-bib/references/preprint-check.md +31 -0
  285. package/skills/validate-bib/references/report-template.md +62 -0
@@ -0,0 +1,508 @@
1
+ """Web of Science API adapter (Starter + Expanded tiers).
2
+
3
+ Uses httpx async client. Requires WOS_API_KEY env var.
4
+ Optional WOS_API_TIER env var: "starter" (default) or "expanded".
5
+
6
+ Expanded tier provides: abstracts, higher per-page limits (100 vs 50),
7
+ full author affiliations, funding data.
8
+
9
+ SYNC: Mirrored in Topic Finder (claude_topic_finder/services/wos.py).
10
+ Changes to query construction, pagination, or record parsing must be propagated.
11
+ Topic Finder adds get_topics/get_trend_data; this version adds
12
+ verify_doi/batch_verify_dois. Core search logic should stay identical.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import asyncio
18
+ import logging
19
+ import re
20
+ from datetime import datetime, timezone
21
+
22
+ import httpx
23
+
24
+ from sources.base import ScholarlySource
25
+ from sources.models import Paper
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ # Common English stopwords — kept small to avoid false removals of domain terms
30
+ _STOPWORDS = frozenset(
31
+ "a an the and or but in on of to for is it that this with by from as at be "
32
+ "are was were been have has had do does did not no nor so if then than can "
33
+ "will would could should may might shall its we i you he she they our my "
34
+ "your their about into through during before after above below between"
35
+ .split()
36
+ )
37
+
38
+ _WORD_RE = re.compile(r"[a-zA-Z][\w-]*[a-zA-Z]|[a-zA-Z]{2,}")
39
+
40
+
41
+ def _extract_search_terms(text: str, *, max_terms: int = 12) -> str:
42
+ """Extract key terms from a long text for use in WoS TS=() queries."""
43
+ words = _WORD_RE.findall(text)
44
+ terms: list[str] = []
45
+ seen: set[str] = set()
46
+ for w in words:
47
+ lower = w.lower()
48
+ if lower in _STOPWORDS or len(lower) < 3:
49
+ continue
50
+ if lower not in seen:
51
+ seen.add(lower)
52
+ terms.append(w)
53
+ if len(terms) >= max_terms:
54
+ break
55
+ return " ".join(terms)
56
+
57
+ _BASE_URLS = {
58
+ "starter": "https://api.clarivate.com/apis/wos-starter/v1",
59
+ "expanded": "https://wos-api.clarivate.com/api/wos",
60
+ }
61
+
62
+
63
+ class WosSource(ScholarlySource):
64
+ """Web of Science implementation supporting both Starter and Expanded API tiers.
65
+
66
+ Starter: /documents endpoint, max 50/page, no abstracts.
67
+ Expanded: root endpoint, max 100/page, abstracts + affiliations + funding.
68
+ """
69
+
70
+ def __init__(self, api_key: str, tier: str = "starter") -> None:
71
+ self._api_key = api_key
72
+ self._tier = tier
73
+ base_url = _BASE_URLS.get(tier, _BASE_URLS["starter"])
74
+ self._per_page_max = 100 if tier == "expanded" else 50
75
+ self._client = httpx.AsyncClient(
76
+ base_url=base_url,
77
+ headers={"X-ApiKey": api_key},
78
+ timeout=30.0,
79
+ )
80
+
81
+ @property
82
+ def source_name(self) -> str:
83
+ tier_label = " (Expanded)" if self._tier == "expanded" else ""
84
+ return f"Web of Science{tier_label}"
85
+
86
+ @property
87
+ def source_key(self) -> str:
88
+ return "wos"
89
+
90
+ # ------------------------------------------------------------------
91
+ # Public interface
92
+ # ------------------------------------------------------------------
93
+
94
+ async def search_works(
95
+ self,
96
+ query: str,
97
+ *,
98
+ year_from: int | None = None,
99
+ year_to: int | None = None,
100
+ sort_by: str = "relevance",
101
+ limit: int = 50,
102
+ ) -> list[Paper]:
103
+ current_year = datetime.now(timezone.utc).year
104
+ wos_query = f"TS=({query})"
105
+ if year_from:
106
+ wos_query += f" AND PY=({year_from}-{year_to or current_year})"
107
+ elif year_to:
108
+ wos_query += f" AND PY=(1900-{year_to})"
109
+
110
+ if self._tier == "expanded":
111
+ return await self._search_expanded(wos_query, sort_by, limit)
112
+ return await self._search_starter(wos_query, sort_by, limit)
113
+
114
+ async def verify_doi(self, doi: str) -> Paper | None:
115
+ clean_doi = doi
116
+ if clean_doi.startswith("https://doi.org/"):
117
+ clean_doi = clean_doi[len("https://doi.org/"):]
118
+
119
+ wos_query = f"DO=({clean_doi})"
120
+
121
+ if self._tier == "expanded":
122
+ papers = await self._search_expanded(wos_query, "relevance", 1)
123
+ else:
124
+ papers = await self._search_starter(wos_query, "relevance", 1)
125
+
126
+ if papers:
127
+ papers[0].verified_by = ["wos"]
128
+ return papers[0]
129
+ return None
130
+
131
+ async def batch_verify_dois(self, dois: list[str]) -> dict[str, Paper | None]:
132
+ results: dict[str, Paper | None] = {d: None for d in dois}
133
+
134
+ clean_dois = []
135
+ for d in dois:
136
+ clean = d
137
+ if clean.startswith("https://doi.org/"):
138
+ clean = clean[len("https://doi.org/"):]
139
+ clean_dois.append(clean)
140
+
141
+ # Process in chunks of 10 (WoS query length limits)
142
+ for i in range(0, len(clean_dois), 10):
143
+ batch = clean_dois[i:i + 10]
144
+ orig_batch = dois[i:i + 10]
145
+ or_query = " OR ".join(f'"{d}"' for d in batch)
146
+ wos_query = f"DO=({or_query})"
147
+
148
+ try:
149
+ if self._tier == "expanded":
150
+ papers = await self._search_expanded(wos_query, "relevance", 50)
151
+ else:
152
+ papers = await self._search_starter(wos_query, "relevance", 50)
153
+
154
+ for paper in papers:
155
+ paper.verified_by = ["wos"]
156
+ if paper.doi:
157
+ paper_doi = paper.doi.lower()
158
+ if paper_doi.startswith("https://doi.org/"):
159
+ paper_doi = paper_doi[len("https://doi.org/"):]
160
+ for orig in orig_batch:
161
+ clean_orig = orig
162
+ if clean_orig.startswith("https://doi.org/"):
163
+ clean_orig = clean_orig[len("https://doi.org/"):]
164
+ if paper_doi == clean_orig.lower():
165
+ results[orig] = paper
166
+ break
167
+ except Exception:
168
+ logger.warning("WoS batch verify failed for chunk starting at %d", i)
169
+
170
+ return results
171
+
172
+ async def find_similar_works(
173
+ self,
174
+ text: str,
175
+ *,
176
+ limit: int = 20,
177
+ ) -> list[Paper]:
178
+ query = _extract_search_terms(text, max_terms=12)
179
+ if not query:
180
+ return []
181
+ return await self.search_works(query, sort_by="relevance", limit=limit)
182
+
183
+ async def close(self) -> None:
184
+ await self._client.aclose()
185
+
186
+ # ------------------------------------------------------------------
187
+ # Starter API implementation
188
+ # ------------------------------------------------------------------
189
+
190
+ async def _search_starter(
191
+ self, wos_query: str, sort_by: str, limit: int,
192
+ ) -> list[Paper]:
193
+ sort_field = "relevance"
194
+ if sort_by == "cited_by_count":
195
+ sort_field = "TC.D"
196
+ elif sort_by == "publication_year":
197
+ sort_field = "PY.D"
198
+
199
+ papers: list[Paper] = []
200
+ page = 1
201
+ per_page = min(limit, self._per_page_max)
202
+
203
+ while len(papers) < limit:
204
+ try:
205
+ resp = await self._client.get(
206
+ "/documents",
207
+ params={
208
+ "q": wos_query,
209
+ "limit": per_page,
210
+ "page": page,
211
+ "sortField": sort_field,
212
+ "db": "WOS",
213
+ },
214
+ )
215
+ resp.raise_for_status()
216
+
217
+ if not resp.content or not resp.content.strip():
218
+ logger.warning("WoS empty response for: %s (page %d)", wos_query, page)
219
+ break
220
+
221
+ content_type = resp.headers.get("content-type", "")
222
+ if "json" not in content_type:
223
+ logger.warning("WoS non-JSON response (%s) for: %s", content_type, wos_query)
224
+ break
225
+
226
+ data = resp.json()
227
+ except httpx.HTTPStatusError as exc:
228
+ logger.error("WoS HTTP %d for: %s", exc.response.status_code, wos_query)
229
+ break
230
+ except Exception:
231
+ logger.exception("WoS search failed for: %s (page %d)", wos_query, page)
232
+ break
233
+
234
+ hits = data.get("hits", [])
235
+ if not hits:
236
+ break
237
+
238
+ for hit in hits:
239
+ papers.append(self._starter_to_paper(hit))
240
+ if len(papers) >= limit:
241
+ break
242
+
243
+ total = data.get("metadata", {}).get("total", 0)
244
+ if page * per_page >= total:
245
+ break
246
+ page += 1
247
+
248
+ return papers[:limit]
249
+
250
+ # ------------------------------------------------------------------
251
+ # Expanded API implementation
252
+ # ------------------------------------------------------------------
253
+
254
+ async def _search_expanded(
255
+ self, wos_query: str, sort_by: str, limit: int,
256
+ ) -> list[Paper]:
257
+ # Expanded uses different sort format: field+direction (e.g. PY+D)
258
+ sort_field = "RS+D" # relevance score descending
259
+ if sort_by == "cited_by_count":
260
+ sort_field = "TC+D"
261
+ elif sort_by == "publication_year":
262
+ sort_field = "PY+D"
263
+
264
+ papers: list[Paper] = []
265
+ first_record = 1
266
+ per_page = min(limit, self._per_page_max)
267
+ query_id: int | None = None
268
+
269
+ while len(papers) < limit:
270
+ try:
271
+ if query_id is None:
272
+ # First request: search query
273
+ resp = await self._client.get(
274
+ "",
275
+ params={
276
+ "databaseId": "WOS",
277
+ "usrQuery": wos_query,
278
+ "count": per_page,
279
+ "firstRecord": first_record,
280
+ "sortField": sort_field,
281
+ "optionView": "FR",
282
+ },
283
+ )
284
+ else:
285
+ # Subsequent pages: use queryId
286
+ resp = await self._client.get(
287
+ f"/query/{query_id}",
288
+ params={
289
+ "count": per_page,
290
+ "firstRecord": first_record,
291
+ "sortField": sort_field,
292
+ "optionView": "FR",
293
+ },
294
+ )
295
+ resp.raise_for_status()
296
+
297
+ if not resp.content or not resp.content.strip():
298
+ logger.warning("WoS Expanded empty response for: %s", wos_query)
299
+ break
300
+
301
+ content_type = resp.headers.get("content-type", "")
302
+ if "json" not in content_type:
303
+ logger.warning("WoS Expanded non-JSON response (%s)", content_type)
304
+ break
305
+
306
+ data = resp.json()
307
+ except httpx.HTTPStatusError as exc:
308
+ logger.error(
309
+ "WoS Expanded HTTP %d for: %s. Body: %.200s",
310
+ exc.response.status_code, wos_query, exc.response.text,
311
+ )
312
+ break
313
+ except Exception:
314
+ logger.exception("WoS Expanded search failed for: %s", wos_query)
315
+ break
316
+
317
+ # Extract query metadata
318
+ query_result = data.get("QueryResult", {})
319
+ if query_id is None:
320
+ query_id = query_result.get("QueryID")
321
+ records_found = query_result.get("RecordsFound", 0)
322
+
323
+ # Extract records
324
+ recs = (
325
+ data
326
+ .get("Data", {})
327
+ .get("Records", {})
328
+ .get("records", {})
329
+ .get("REC", [])
330
+ )
331
+ if not recs:
332
+ break
333
+
334
+ for rec in recs:
335
+ papers.append(self._expanded_to_paper(rec))
336
+ if len(papers) >= limit:
337
+ break
338
+
339
+ first_record += len(recs)
340
+ if first_record > records_found:
341
+ break
342
+
343
+ return papers[:limit]
344
+
345
+ # ------------------------------------------------------------------
346
+ # Record converters
347
+ # ------------------------------------------------------------------
348
+
349
+ @staticmethod
350
+ def _starter_to_paper(hit: dict) -> Paper:
351
+ """Convert WoS Starter API hit to Paper."""
352
+ names = hit.get("names", {})
353
+ authors = []
354
+ for author_entry in names.get("authors", []):
355
+ display_name = author_entry.get("displayName") or author_entry.get("wosStandard", "")
356
+ if display_name:
357
+ authors.append(display_name)
358
+
359
+ keywords = hit.get("keywords", {}).get("authorKeywords", []) or []
360
+
361
+ cited_by = 0
362
+ for c in hit.get("citations", []):
363
+ if c.get("db") == "wos":
364
+ try:
365
+ cited_by = int(c.get("count", 0))
366
+ except (ValueError, TypeError):
367
+ pass
368
+ break
369
+
370
+ pub_year = 0
371
+ source_info = hit.get("source", {})
372
+ pub_date = source_info.get("publishYear")
373
+ if pub_date:
374
+ try:
375
+ pub_year = int(pub_date)
376
+ except (ValueError, TypeError):
377
+ pass
378
+
379
+ identifiers = hit.get("identifiers", {})
380
+ doi = identifiers.get("doi")
381
+ if doi and not doi.startswith("http"):
382
+ doi = f"https://doi.org/{doi}"
383
+
384
+ uid = hit.get("uid", "")
385
+
386
+ return Paper(
387
+ source_id=f"wos:{uid}",
388
+ title=hit.get("title", "") or "",
389
+ abstract=None, # Starter API doesn't return abstracts
390
+ authors=authors,
391
+ publication_year=pub_year,
392
+ cited_by_count=cited_by,
393
+ source_name=source_info.get("sourceTitle"),
394
+ doi=doi,
395
+ keywords=keywords,
396
+ url=doi or f"https://www.webofscience.com/wos/woscc/full-record/{uid}",
397
+ verified_by=["wos"],
398
+ )
399
+
400
+ @staticmethod
401
+ def _expanded_to_paper(rec: dict) -> Paper:
402
+ """Convert WoS Expanded API record to Paper."""
403
+ uid = rec.get("UID", "")
404
+ static = rec.get("static_data", {})
405
+ dynamic = rec.get("dynamic_data", {})
406
+ summary = static.get("summary", {})
407
+ fullrecord = static.get("fullrecord_metadata", {})
408
+
409
+ # Title — look for type "item" (article title)
410
+ title = ""
411
+ source_name = None
412
+ for t in summary.get("titles", {}).get("title", []):
413
+ if t.get("type") == "item":
414
+ title = t.get("content", "")
415
+ elif t.get("type") == "source":
416
+ source_name = t.get("content")
417
+
418
+ # Authors
419
+ authors = []
420
+ for name_entry in summary.get("names", {}).get("name", []):
421
+ if name_entry.get("role") == "author":
422
+ display = (
423
+ name_entry.get("display_name")
424
+ or name_entry.get("full_name")
425
+ or name_entry.get("wos_standard", "")
426
+ )
427
+ if display:
428
+ authors.append(display)
429
+
430
+ # Abstract
431
+ abstract = None
432
+ abstracts_block = fullrecord.get("abstracts", {}).get("abstract", {})
433
+ abstract_text = abstracts_block.get("abstract_text", {})
434
+ if isinstance(abstract_text, dict):
435
+ p = abstract_text.get("p")
436
+ if isinstance(p, list):
437
+ abstract = " ".join(str(para) for para in p)
438
+ elif isinstance(p, str):
439
+ abstract = p
440
+ elif isinstance(abstract_text, str):
441
+ abstract = abstract_text
442
+
443
+ # Keywords
444
+ keywords = fullrecord.get("keywords", {}).get("keyword", []) or []
445
+ if isinstance(keywords, str):
446
+ keywords = [keywords]
447
+
448
+ # Publication year
449
+ pub_year = 0
450
+ pub_info = summary.get("pub_info", {})
451
+ pubyear = pub_info.get("pubyear")
452
+ if pubyear:
453
+ try:
454
+ pub_year = int(pubyear)
455
+ except (ValueError, TypeError):
456
+ pass
457
+
458
+ # Citation count — from dynamic_data.citation_related.tc_list.silo_tc
459
+ cited_by = 0
460
+ tc_list = (
461
+ dynamic
462
+ .get("citation_related", {})
463
+ .get("tc_list", {})
464
+ .get("silo_tc", [])
465
+ )
466
+ for tc in tc_list:
467
+ if tc.get("coll_id") == "WOS":
468
+ try:
469
+ cited_by = int(tc.get("local_count", 0))
470
+ except (ValueError, TypeError):
471
+ pass
472
+ break
473
+
474
+ # DOI — try dynamic_data path first (most reliable), then static fallbacks
475
+ doi = None
476
+ dyn_ids = dynamic.get("cluster_related", {}).get("identifiers", {}).get("identifier", [])
477
+ if isinstance(dyn_ids, dict):
478
+ dyn_ids = [dyn_ids]
479
+ for ident in dyn_ids:
480
+ if ident.get("type") == "doi":
481
+ doi = ident.get("value")
482
+ break
483
+ if not doi:
484
+ static_ids = summary.get("identifiers", {}).get("identifier", [])
485
+ if isinstance(static_ids, dict):
486
+ static_ids = [static_ids]
487
+ for ident in static_ids:
488
+ if ident.get("type") == "doi":
489
+ doi = ident.get("value")
490
+ break
491
+ if not doi:
492
+ doi = static.get("item", {}).get("ids", {}).get("doi")
493
+ if doi and not doi.startswith("http"):
494
+ doi = f"https://doi.org/{doi}"
495
+
496
+ return Paper(
497
+ source_id=f"wos:{uid}",
498
+ title=title,
499
+ abstract=abstract,
500
+ authors=authors,
501
+ publication_year=pub_year,
502
+ cited_by_count=cited_by,
503
+ source_name=source_name,
504
+ doi=doi,
505
+ keywords=keywords,
506
+ url=doi or f"https://www.webofscience.com/wos/woscc/full-record/{uid}",
507
+ verified_by=["wos"],
508
+ )