flonat-research 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/.claude/agents/domain-reviewer.md +336 -0
  2. package/.claude/agents/fixer.md +226 -0
  3. package/.claude/agents/paper-critic.md +370 -0
  4. package/.claude/agents/peer-reviewer.md +289 -0
  5. package/.claude/agents/proposal-reviewer.md +215 -0
  6. package/.claude/agents/referee2-reviewer.md +367 -0
  7. package/.claude/agents/references/journal-referee-profiles.md +354 -0
  8. package/.claude/agents/references/paper-critic/council-personas.md +77 -0
  9. package/.claude/agents/references/paper-critic/council-prompts.md +198 -0
  10. package/.claude/agents/references/peer-reviewer/report-template.md +199 -0
  11. package/.claude/agents/references/peer-reviewer/sa-prompts.md +260 -0
  12. package/.claude/agents/references/peer-reviewer/security-scan.md +188 -0
  13. package/.claude/agents/references/proposal-reviewer/report-template.md +144 -0
  14. package/.claude/agents/references/proposal-reviewer/sa-prompts.md +149 -0
  15. package/.claude/agents/references/referee-config.md +114 -0
  16. package/.claude/agents/references/referee2-reviewer/audit-checklists.md +287 -0
  17. package/.claude/agents/references/referee2-reviewer/report-template.md +334 -0
  18. package/.claude/rules/design-before-results.md +52 -0
  19. package/.claude/rules/ignore-agents-md.md +17 -0
  20. package/.claude/rules/ignore-gemini-md.md +17 -0
  21. package/.claude/rules/lean-claude-md.md +45 -0
  22. package/.claude/rules/learn-tags.md +99 -0
  23. package/.claude/rules/overleaf-separation.md +67 -0
  24. package/.claude/rules/plan-first.md +175 -0
  25. package/.claude/rules/read-docs-first.md +50 -0
  26. package/.claude/rules/scope-discipline.md +28 -0
  27. package/.claude/settings.json +125 -0
  28. package/.context/current-focus.md +33 -0
  29. package/.context/preferences/priorities.md +36 -0
  30. package/.context/preferences/task-naming.md +28 -0
  31. package/.context/profile.md +29 -0
  32. package/.context/projects/_index.md +41 -0
  33. package/.context/projects/papers/nudge-exp.md +22 -0
  34. package/.context/projects/papers/uncertainty.md +31 -0
  35. package/.context/resources/claude-scientific-writer-review.md +48 -0
  36. package/.context/resources/cunningham-multi-analyst-agents.md +104 -0
  37. package/.context/resources/cunningham-multilang-code-audit.md +62 -0
  38. package/.context/resources/google-ai-co-scientist-review.md +72 -0
  39. package/.context/resources/karpathy-llm-council-review.md +58 -0
  40. package/.context/resources/multi-coder-reliability-protocol.md +175 -0
  41. package/.context/resources/pedro-santanna-takeaways.md +96 -0
  42. package/.context/resources/venue-rankings/abs_ajg_2024.csv +1823 -0
  43. package/.context/resources/venue-rankings/abs_ajg_2024_econ.csv +356 -0
  44. package/.context/resources/venue-rankings/cabs_4_4star_theory.csv +40 -0
  45. package/.context/resources/venue-rankings/core_2026.csv +801 -0
  46. package/.context/resources/venue-rankings.md +147 -0
  47. package/.context/workflows/README.md +69 -0
  48. package/.context/workflows/daily-review.md +91 -0
  49. package/.context/workflows/meeting-actions.md +108 -0
  50. package/.context/workflows/replication-protocol.md +155 -0
  51. package/.context/workflows/weekly-review.md +113 -0
  52. package/.mcp-server-biblio/formatters.py +158 -0
  53. package/.mcp-server-biblio/pyproject.toml +11 -0
  54. package/.mcp-server-biblio/server.py +678 -0
  55. package/.mcp-server-biblio/sources/__init__.py +14 -0
  56. package/.mcp-server-biblio/sources/base.py +73 -0
  57. package/.mcp-server-biblio/sources/formatters.py +83 -0
  58. package/.mcp-server-biblio/sources/models.py +22 -0
  59. package/.mcp-server-biblio/sources/multi_source.py +243 -0
  60. package/.mcp-server-biblio/sources/openalex_source.py +183 -0
  61. package/.mcp-server-biblio/sources/scopus_source.py +309 -0
  62. package/.mcp-server-biblio/sources/wos_source.py +508 -0
  63. package/.mcp-server-biblio/uv.lock +896 -0
  64. package/.scripts/README.md +161 -0
  65. package/.scripts/ai_pattern_density.py +446 -0
  66. package/.scripts/conf +445 -0
  67. package/.scripts/config.py +122 -0
  68. package/.scripts/count_inventory.py +275 -0
  69. package/.scripts/daily_digest.py +288 -0
  70. package/.scripts/done +177 -0
  71. package/.scripts/extract_meeting_actions.py +223 -0
  72. package/.scripts/focus +176 -0
  73. package/.scripts/generate-codex-agents-md.py +217 -0
  74. package/.scripts/inbox +194 -0
  75. package/.scripts/notion_helpers.py +325 -0
  76. package/.scripts/openalex/query_helpers.py +306 -0
  77. package/.scripts/papers +227 -0
  78. package/.scripts/query +223 -0
  79. package/.scripts/session-history.py +201 -0
  80. package/.scripts/skill-health.py +516 -0
  81. package/.scripts/skill-log-miner.py +273 -0
  82. package/.scripts/sync-to-codex.sh +252 -0
  83. package/.scripts/task +213 -0
  84. package/.scripts/tasks +190 -0
  85. package/.scripts/week +206 -0
  86. package/CLAUDE.md +197 -0
  87. package/LICENSE +21 -0
  88. package/MEMORY.md +38 -0
  89. package/README.md +269 -0
  90. package/docs/agents.md +44 -0
  91. package/docs/bibliography-setup.md +55 -0
  92. package/docs/council-mode.md +36 -0
  93. package/docs/getting-started.md +245 -0
  94. package/docs/hooks.md +38 -0
  95. package/docs/mcp-servers.md +82 -0
  96. package/docs/notion-setup.md +109 -0
  97. package/docs/rules.md +33 -0
  98. package/docs/scripts.md +303 -0
  99. package/docs/setup-overview/setup-overview.pdf +0 -0
  100. package/docs/skills.md +70 -0
  101. package/docs/system.md +159 -0
  102. package/hooks/block-destructive-git.sh +66 -0
  103. package/hooks/context-monitor.py +114 -0
  104. package/hooks/postcompact-restore.py +157 -0
  105. package/hooks/precompact-autosave.py +181 -0
  106. package/hooks/promise-checker.sh +124 -0
  107. package/hooks/protect-source-files.sh +81 -0
  108. package/hooks/resume-context-loader.sh +53 -0
  109. package/hooks/startup-context-loader.sh +102 -0
  110. package/package.json +51 -0
  111. package/packages/cli-council/.github/workflows/claude-code-review.yml +44 -0
  112. package/packages/cli-council/.github/workflows/claude.yml +50 -0
  113. package/packages/cli-council/README.md +100 -0
  114. package/packages/cli-council/pyproject.toml +43 -0
  115. package/packages/cli-council/src/cli_council/__init__.py +19 -0
  116. package/packages/cli-council/src/cli_council/__main__.py +185 -0
  117. package/packages/cli-council/src/cli_council/backends/__init__.py +8 -0
  118. package/packages/cli-council/src/cli_council/backends/base.py +81 -0
  119. package/packages/cli-council/src/cli_council/backends/claude.py +25 -0
  120. package/packages/cli-council/src/cli_council/backends/codex.py +27 -0
  121. package/packages/cli-council/src/cli_council/backends/gemini.py +26 -0
  122. package/packages/cli-council/src/cli_council/checkpoint.py +212 -0
  123. package/packages/cli-council/src/cli_council/config.py +51 -0
  124. package/packages/cli-council/src/cli_council/council.py +391 -0
  125. package/packages/cli-council/src/cli_council/models.py +46 -0
  126. package/packages/llm-council/.github/workflows/claude-code-review.yml +44 -0
  127. package/packages/llm-council/.github/workflows/claude.yml +50 -0
  128. package/packages/llm-council/README.md +453 -0
  129. package/packages/llm-council/pyproject.toml +42 -0
  130. package/packages/llm-council/src/llm_council/__init__.py +23 -0
  131. package/packages/llm-council/src/llm_council/__main__.py +259 -0
  132. package/packages/llm-council/src/llm_council/checkpoint.py +193 -0
  133. package/packages/llm-council/src/llm_council/client.py +253 -0
  134. package/packages/llm-council/src/llm_council/config.py +232 -0
  135. package/packages/llm-council/src/llm_council/council.py +482 -0
  136. package/packages/llm-council/src/llm_council/models.py +46 -0
  137. package/packages/mcp-bibliography/MEMORY.md +31 -0
  138. package/packages/mcp-bibliography/_app.py +226 -0
  139. package/packages/mcp-bibliography/formatters.py +158 -0
  140. package/packages/mcp-bibliography/log/2026-03-13-2100.md +35 -0
  141. package/packages/mcp-bibliography/pyproject.toml +15 -0
  142. package/packages/mcp-bibliography/run.sh +20 -0
  143. package/packages/mcp-bibliography/scholarly_formatters.py +83 -0
  144. package/packages/mcp-bibliography/server.py +1857 -0
  145. package/packages/mcp-bibliography/tools/__init__.py +28 -0
  146. package/packages/mcp-bibliography/tools/_registry.py +19 -0
  147. package/packages/mcp-bibliography/tools/altmetric.py +107 -0
  148. package/packages/mcp-bibliography/tools/core.py +92 -0
  149. package/packages/mcp-bibliography/tools/dblp.py +52 -0
  150. package/packages/mcp-bibliography/tools/openalex.py +296 -0
  151. package/packages/mcp-bibliography/tools/opencitations.py +102 -0
  152. package/packages/mcp-bibliography/tools/openreview.py +179 -0
  153. package/packages/mcp-bibliography/tools/orcid.py +131 -0
  154. package/packages/mcp-bibliography/tools/scholarly.py +575 -0
  155. package/packages/mcp-bibliography/tools/unpaywall.py +63 -0
  156. package/packages/mcp-bibliography/tools/zenodo.py +123 -0
  157. package/packages/mcp-bibliography/uv.lock +711 -0
  158. package/scripts/setup.sh +143 -0
  159. package/skills/beamer-deck/SKILL.md +199 -0
  160. package/skills/beamer-deck/references/quality-rubric.md +54 -0
  161. package/skills/beamer-deck/references/review-prompts.md +106 -0
  162. package/skills/bib-validate/SKILL.md +261 -0
  163. package/skills/bib-validate/references/council-mode.md +34 -0
  164. package/skills/bib-validate/references/deep-verify.md +79 -0
  165. package/skills/bib-validate/references/fix-mode.md +36 -0
  166. package/skills/bib-validate/references/openalex-verification.md +45 -0
  167. package/skills/bib-validate/references/preprint-check.md +31 -0
  168. package/skills/bib-validate/references/ref-manager-crossref.md +41 -0
  169. package/skills/bib-validate/references/report-template.md +82 -0
  170. package/skills/code-archaeology/SKILL.md +141 -0
  171. package/skills/code-review/SKILL.md +265 -0
  172. package/skills/code-review/references/quality-rubric.md +67 -0
  173. package/skills/consolidate-memory/SKILL.md +208 -0
  174. package/skills/context-status/SKILL.md +126 -0
  175. package/skills/creation-guard/SKILL.md +230 -0
  176. package/skills/devils-advocate/SKILL.md +130 -0
  177. package/skills/devils-advocate/references/competing-hypotheses.md +83 -0
  178. package/skills/init-project/SKILL.md +115 -0
  179. package/skills/init-project-course/references/memory-and-settings.md +92 -0
  180. package/skills/init-project-course/references/organise-templates.md +94 -0
  181. package/skills/init-project-course/skill.md +147 -0
  182. package/skills/init-project-light/skill.md +139 -0
  183. package/skills/init-project-research/SKILL.md +368 -0
  184. package/skills/init-project-research/references/atlas-pipeline-sync.md +70 -0
  185. package/skills/init-project-research/references/atlas-schema.md +81 -0
  186. package/skills/init-project-research/references/confirmation-report.md +39 -0
  187. package/skills/init-project-research/references/domain-profile-template.md +104 -0
  188. package/skills/init-project-research/references/interview-round3.md +34 -0
  189. package/skills/init-project-research/references/literature-discovery.md +43 -0
  190. package/skills/init-project-research/references/scaffold-details.md +197 -0
  191. package/skills/init-project-research/templates/field-calibration.md +60 -0
  192. package/skills/init-project-research/templates/pipeline-manifest.md +63 -0
  193. package/skills/init-project-research/templates/run-all.sh +116 -0
  194. package/skills/init-project-research/templates/seed-files.md +337 -0
  195. package/skills/insights-deck/SKILL.md +151 -0
  196. package/skills/interview-me/SKILL.md +157 -0
  197. package/skills/latex/SKILL.md +141 -0
  198. package/skills/latex/references/latex-configs.md +183 -0
  199. package/skills/latex-autofix/SKILL.md +230 -0
  200. package/skills/latex-autofix/references/known-errors.md +183 -0
  201. package/skills/latex-autofix/references/quality-rubric.md +50 -0
  202. package/skills/latex-health-check/SKILL.md +161 -0
  203. package/skills/learn/SKILL.md +220 -0
  204. package/skills/learn/scripts/validate_skill.py +265 -0
  205. package/skills/lessons-learned/SKILL.md +201 -0
  206. package/skills/literature/SKILL.md +335 -0
  207. package/skills/literature/references/agent-templates.md +393 -0
  208. package/skills/literature/references/bibliometric-apis.md +44 -0
  209. package/skills/literature/references/cli-council-search.md +79 -0
  210. package/skills/literature/references/openalex-api-guide.md +371 -0
  211. package/skills/literature/references/openalex-common-queries.md +381 -0
  212. package/skills/literature/references/openalex-workflows.md +248 -0
  213. package/skills/literature/references/reference-manager-sync.md +36 -0
  214. package/skills/literature/references/scopus-api-guide.md +208 -0
  215. package/skills/literature/references/wos-api-guide.md +308 -0
  216. package/skills/multi-perspective/SKILL.md +311 -0
  217. package/skills/multi-perspective/references/computational-many-analysts.md +77 -0
  218. package/skills/pipeline-manifest/SKILL.md +226 -0
  219. package/skills/pre-submission-report/SKILL.md +153 -0
  220. package/skills/process-reviews/SKILL.md +244 -0
  221. package/skills/process-reviews/references/rr-routing.md +101 -0
  222. package/skills/project-deck/SKILL.md +87 -0
  223. package/skills/project-safety/SKILL.md +135 -0
  224. package/skills/proofread/SKILL.md +254 -0
  225. package/skills/proofread/references/quality-rubric.md +104 -0
  226. package/skills/python-env/SKILL.md +57 -0
  227. package/skills/quarto-deck/SKILL.md +226 -0
  228. package/skills/quarto-deck/references/markdown-format.md +143 -0
  229. package/skills/quarto-deck/references/quality-rubric.md +54 -0
  230. package/skills/save-context/SKILL.md +174 -0
  231. package/skills/session-log/SKILL.md +98 -0
  232. package/skills/shared/concept-validation-gate.md +161 -0
  233. package/skills/shared/council-protocol.md +265 -0
  234. package/skills/shared/distribution-diagnostics.md +164 -0
  235. package/skills/shared/engagement-stratified-sampling.md +218 -0
  236. package/skills/shared/escalation-protocol.md +74 -0
  237. package/skills/shared/external-audit-protocol.md +205 -0
  238. package/skills/shared/intercoder-reliability.md +256 -0
  239. package/skills/shared/mcp-degradation.md +81 -0
  240. package/skills/shared/method-probing-questions.md +163 -0
  241. package/skills/shared/multi-language-conventions.md +143 -0
  242. package/skills/shared/paid-api-safety.md +174 -0
  243. package/skills/shared/palettes.md +90 -0
  244. package/skills/shared/progressive-disclosure.md +92 -0
  245. package/skills/shared/project-documentation-content.md +443 -0
  246. package/skills/shared/project-documentation-format.md +281 -0
  247. package/skills/shared/project-documentation.md +100 -0
  248. package/skills/shared/publication-output.md +138 -0
  249. package/skills/shared/quality-scoring.md +70 -0
  250. package/skills/shared/reference-resolution.md +77 -0
  251. package/skills/shared/research-quality-rubric.md +165 -0
  252. package/skills/shared/rhetoric-principles.md +54 -0
  253. package/skills/shared/skill-design-patterns.md +272 -0
  254. package/skills/shared/skill-index.md +240 -0
  255. package/skills/shared/system-documentation.md +334 -0
  256. package/skills/shared/tikz-rules.md +402 -0
  257. package/skills/shared/validation-tiers.md +121 -0
  258. package/skills/shared/venue-guides/README.md +46 -0
  259. package/skills/shared/venue-guides/cell_press_style.md +483 -0
  260. package/skills/shared/venue-guides/conferences_formatting.md +564 -0
  261. package/skills/shared/venue-guides/cs_conference_style.md +463 -0
  262. package/skills/shared/venue-guides/examples/cell_summary_example.md +247 -0
  263. package/skills/shared/venue-guides/examples/medical_structured_abstract.md +313 -0
  264. package/skills/shared/venue-guides/examples/nature_abstract_examples.md +213 -0
  265. package/skills/shared/venue-guides/examples/neurips_introduction_example.md +245 -0
  266. package/skills/shared/venue-guides/journals_formatting.md +486 -0
  267. package/skills/shared/venue-guides/medical_journal_styles.md +535 -0
  268. package/skills/shared/venue-guides/ml_conference_style.md +556 -0
  269. package/skills/shared/venue-guides/nature_science_style.md +405 -0
  270. package/skills/shared/venue-guides/reviewer_expectations.md +417 -0
  271. package/skills/shared/venue-guides/venue_writing_styles.md +321 -0
  272. package/skills/split-pdf/SKILL.md +172 -0
  273. package/skills/split-pdf/methodology.md +48 -0
  274. package/skills/sync-notion/SKILL.md +93 -0
  275. package/skills/system-audit/SKILL.md +157 -0
  276. package/skills/system-audit/references/sub-agent-prompts.md +294 -0
  277. package/skills/task-management/SKILL.md +131 -0
  278. package/skills/update-focus/SKILL.md +204 -0
  279. package/skills/update-project-doc/SKILL.md +194 -0
  280. package/skills/validate-bib/SKILL.md +242 -0
  281. package/skills/validate-bib/references/council-mode.md +34 -0
  282. package/skills/validate-bib/references/deep-verify.md +71 -0
  283. package/skills/validate-bib/references/openalex-verification.md +45 -0
  284. package/skills/validate-bib/references/preprint-check.md +31 -0
  285. package/skills/validate-bib/references/report-template.md +62 -0
@@ -0,0 +1,73 @@
1
+ """Abstract base class for scholarly data sources."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Any
7
+
8
+ from sources.models import Paper
9
+
10
+
11
+ class ScholarlySource(ABC):
12
+ """Abstract interface for bibliometric data providers.
13
+
14
+ Implement this for each data source (OpenAlex, Scopus, Web of Science).
15
+ All methods are async to support concurrent API calls.
16
+ """
17
+
18
+ @property
19
+ @abstractmethod
20
+ def source_name(self) -> str:
21
+ """Human-readable name of this data source."""
22
+ ...
23
+
24
+ @property
25
+ @abstractmethod
26
+ def source_key(self) -> str:
27
+ """Machine-readable slug for this source (e.g. 'openalex', 'scopus', 'wos')."""
28
+ ...
29
+
30
+ @abstractmethod
31
+ async def search_works(
32
+ self,
33
+ query: str,
34
+ *,
35
+ year_from: int | None = None,
36
+ year_to: int | None = None,
37
+ sort_by: str = "relevance",
38
+ limit: int = 50,
39
+ ) -> list[Paper]:
40
+ """Search for works matching a text query."""
41
+ ...
42
+
43
+ @abstractmethod
44
+ async def verify_doi(self, doi: str) -> Paper | None:
45
+ """Look up a single DOI and return metadata, or None if not found."""
46
+ ...
47
+
48
+ @abstractmethod
49
+ async def batch_verify_dois(self, dois: list[str]) -> dict[str, Paper | None]:
50
+ """Verify multiple DOIs at once. Returns {doi: Paper or None}."""
51
+ ...
52
+
53
+ @abstractmethod
54
+ async def find_similar_works(
55
+ self,
56
+ text: str,
57
+ *,
58
+ limit: int = 20,
59
+ ) -> list[Paper]:
60
+ """Find works most similar to a given text (title or abstract)."""
61
+ ...
62
+
63
+ async def close(self) -> None:
64
+ """Release any network/session resources held by the source."""
65
+ return None
66
+
67
+ def reset_diagnostics(self) -> None:
68
+ """Reset per-request diagnostics collected by the source."""
69
+ return None
70
+
71
+ def consume_diagnostics(self) -> dict[str, Any] | None:
72
+ """Return and clear diagnostics for the current request context."""
73
+ return None
@@ -0,0 +1,83 @@
1
+ """Markdown formatters for multi-source scholarly results."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from sources.models import Paper
6
+
7
+
8
+ def format_papers_table(papers: list[Paper], title: str = "Results") -> str:
9
+ """Format a list of Papers as a markdown table."""
10
+ if not papers:
11
+ return f"## {title}\n\nNo results found."
12
+
13
+ lines = [f"## {title}\n"]
14
+ lines.append("| # | Title | Authors | Year | Cited | Source | DOI |")
15
+ lines.append("|---|-------|---------|------|-------|--------|-----|")
16
+
17
+ for i, p in enumerate(papers, 1):
18
+ authors = ", ".join(p.authors[:3])
19
+ if len(p.authors) > 3:
20
+ authors += " et al."
21
+ title_short = p.title[:80] + "..." if len(p.title) > 80 else p.title
22
+ doi_link = f"[link]({p.doi})" if p.doi else "—"
23
+ source = p.source_name or "—"
24
+ if len(source) > 30:
25
+ source = source[:27] + "..."
26
+
27
+ lines.append(
28
+ f"| {i} | {title_short} | {authors} | {p.publication_year} | "
29
+ f"{p.cited_by_count:,} | {source} | {doi_link} |"
30
+ )
31
+
32
+ return "\n".join(lines)
33
+
34
+
35
+ def format_verification_table(results: dict[str, Paper | None]) -> str:
36
+ """Format DOI verification results as a markdown table."""
37
+ lines = ["## DOI Verification Results\n"]
38
+ lines.append("| # | DOI | Title | Year | Cited By | Verified By | Status |")
39
+ lines.append("|---|-----|-------|------|----------|-------------|--------|")
40
+
41
+ verified_count = 0
42
+ single_count = 0
43
+ not_found_count = 0
44
+
45
+ for i, (doi, paper) in enumerate(results.items(), 1):
46
+ if paper is None:
47
+ status = "NOT FOUND"
48
+ not_found_count += 1
49
+ lines.append(f"| {i} | `{doi}` | — | — | — | — | ❌ {status} |")
50
+ else:
51
+ sources = ", ".join(paper.verified_by)
52
+ if len(paper.verified_by) >= 2:
53
+ status = "VERIFIED"
54
+ verified_count += 1
55
+ else:
56
+ status = "SINGLE SOURCE"
57
+ single_count += 1
58
+ title_short = paper.title[:60] + "..." if len(paper.title) > 60 else paper.title
59
+ lines.append(
60
+ f"| {i} | `{doi}` | {title_short} | {paper.publication_year} | "
61
+ f"{paper.cited_by_count:,} | {sources} | "
62
+ f"{'✅' if status == 'VERIFIED' else '⚠️'} {status} |"
63
+ )
64
+
65
+ lines.append("")
66
+ lines.append(f"**Summary:** {verified_count} verified (2+ sources), "
67
+ f"{single_count} single-source, {not_found_count} not found")
68
+
69
+ return "\n".join(lines)
70
+
71
+
72
+ def format_source_status(sources: list[dict]) -> str:
73
+ """Format source status as a markdown table."""
74
+ lines = ["## Scholarly Source Status\n"]
75
+ lines.append("| Source | Status | Key |")
76
+ lines.append("|--------|--------|-----|")
77
+
78
+ for s in sources:
79
+ status = "✅ Active" if s["active"] else "❌ Not configured"
80
+ key = s.get("key", "—")
81
+ lines.append(f"| {s['name']} | {status} | `{key}` |")
82
+
83
+ return "\n".join(lines)
@@ -0,0 +1,22 @@
1
+ """Paper dataclass for multi-source scholarly search."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+
7
+
8
+ @dataclass
9
+ class Paper:
10
+ """Unified paper representation across all sources."""
11
+
12
+ source_id: str
13
+ title: str
14
+ authors: list[str]
15
+ publication_year: int
16
+ cited_by_count: int
17
+ doi: str | None
18
+ abstract: str | None = None
19
+ source_name: str | None = None # journal / venue
20
+ keywords: list[str] = field(default_factory=list)
21
+ url: str | None = None
22
+ verified_by: list[str] = field(default_factory=list) # which sources confirmed this DOI
@@ -0,0 +1,243 @@
1
+ """Multi-source composite adapter with DOI-based deduplication.
2
+
3
+ Queries all enabled sources concurrently and merges results.
4
+ Ported from ZZ Topic Finder with improvements.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import logging
11
+ import re
12
+ from contextvars import ContextVar
13
+ from typing import Any
14
+
15
+ from sources.base import ScholarlySource
16
+ from sources.models import Paper
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class MultiSource(ScholarlySource):
22
+ """Composite data source that queries multiple APIs and deduplicates results.
23
+
24
+ All methods query sources concurrently. Failed sources are skipped gracefully.
25
+ Papers are deduplicated by normalized DOI; papers without DOIs are kept as-is.
26
+ """
27
+
28
+ def __init__(self, sources: list[ScholarlySource]) -> None:
29
+ if not sources:
30
+ raise ValueError("MultiSource requires at least one source")
31
+ self._sources = sources
32
+ self._diagnostics_ctx: ContextVar[dict[str, Any] | None] = ContextVar(
33
+ f"multi_source_diagnostics_{id(self)}", default=None
34
+ )
35
+
36
+ @property
37
+ def source_name(self) -> str:
38
+ names = [s.source_name for s in self._sources]
39
+ return " + ".join(names)
40
+
41
+ @property
42
+ def source_key(self) -> str:
43
+ return "multi"
44
+
45
+ def reset_diagnostics(self) -> None:
46
+ self._diagnostics_ctx.set(
47
+ {
48
+ "attempted": [s.source_name for s in self._sources],
49
+ "failed": set(),
50
+ "warnings": [],
51
+ }
52
+ )
53
+
54
+ def consume_diagnostics(self) -> dict[str, Any] | None:
55
+ diag = self._diagnostics_ctx.get()
56
+ if diag is None:
57
+ return None
58
+
59
+ attempted = list(diag["attempted"])
60
+ failed = sorted(diag["failed"])
61
+ succeeded = [name for name in attempted if name not in failed]
62
+ warnings = list(diag["warnings"])
63
+ self._diagnostics_ctx.set(None)
64
+ return {
65
+ "attempted": attempted,
66
+ "succeeded": succeeded,
67
+ "failed": failed,
68
+ "warnings": warnings,
69
+ }
70
+
71
+ async def search_works(
72
+ self,
73
+ query: str,
74
+ *,
75
+ year_from: int | None = None,
76
+ year_to: int | None = None,
77
+ sort_by: str = "relevance",
78
+ limit: int = 50,
79
+ ) -> list[Paper]:
80
+ coros = [
81
+ s.search_works(query, year_from=year_from, year_to=year_to, sort_by=sort_by, limit=limit)
82
+ for s in self._sources
83
+ ]
84
+ all_papers = await self._gather_flat(coros)
85
+ deduped = deduplicate_papers(all_papers)
86
+
87
+ if sort_by == "cited_by_count":
88
+ deduped.sort(key=lambda p: p.cited_by_count, reverse=True)
89
+ elif sort_by == "publication_year":
90
+ deduped.sort(key=lambda p: p.publication_year, reverse=True)
91
+
92
+ return deduped[:limit]
93
+
94
+ async def verify_doi(self, doi: str) -> Paper | None:
95
+ coros = [s.verify_doi(doi) for s in self._sources]
96
+ results = await asyncio.gather(*coros, return_exceptions=True)
97
+
98
+ papers = []
99
+ verified_by: list[str] = []
100
+ for i, result in enumerate(results):
101
+ if isinstance(result, BaseException):
102
+ logger.debug("Source %s failed DOI verify: %s", self._sources[i].source_name, result)
103
+ continue
104
+ if result is not None:
105
+ papers.append(result)
106
+ verified_by.append(self._sources[i].source_key)
107
+
108
+ if not papers:
109
+ return None
110
+
111
+ # Merge all found papers into one
112
+ merged = papers[0]
113
+ for p in papers[1:]:
114
+ merged = _merge_papers(merged, p)
115
+ merged.verified_by = verified_by
116
+ return merged
117
+
118
+ async def batch_verify_dois(self, dois: list[str]) -> dict[str, Paper | None]:
119
+ coros = [s.batch_verify_dois(dois) for s in self._sources]
120
+ results = await asyncio.gather(*coros, return_exceptions=True)
121
+
122
+ # Merge results from all sources
123
+ merged: dict[str, Paper | None] = {d: None for d in dois}
124
+
125
+ for i, result in enumerate(results):
126
+ if isinstance(result, BaseException):
127
+ logger.warning("Source %s failed batch verify: %s", self._sources[i].source_name, result)
128
+ continue
129
+
130
+ for doi, paper in result.items():
131
+ if paper is None:
132
+ continue
133
+ if merged[doi] is None:
134
+ merged[doi] = paper
135
+ merged[doi].verified_by = [self._sources[i].source_key]
136
+ else:
137
+ merged[doi] = _merge_papers(merged[doi], paper)
138
+ if self._sources[i].source_key not in merged[doi].verified_by:
139
+ merged[doi].verified_by.append(self._sources[i].source_key)
140
+
141
+ return merged
142
+
143
+ async def find_similar_works(
144
+ self,
145
+ text: str,
146
+ *,
147
+ limit: int = 20,
148
+ ) -> list[Paper]:
149
+ coros = [s.find_similar_works(text, limit=limit) for s in self._sources]
150
+ all_papers = await self._gather_flat(coros)
151
+ return deduplicate_papers(all_papers)[:limit]
152
+
153
+ async def close(self) -> None:
154
+ for s in self._sources:
155
+ try:
156
+ await s.close()
157
+ except Exception:
158
+ pass
159
+
160
+ async def _gather_flat(self, coros: list) -> list:
161
+ """Run coroutines concurrently, flatten results, skip failures."""
162
+ diag = self._diagnostics_ctx.get()
163
+ results = await asyncio.gather(*coros, return_exceptions=True)
164
+ flat: list = []
165
+ for i, result in enumerate(results):
166
+ if isinstance(result, BaseException):
167
+ source_name = self._sources[i].source_name
168
+ logger.warning("Source %s failed: %s", source_name, result)
169
+ if diag is not None:
170
+ diag["failed"].add(source_name)
171
+ diag["warnings"].append(f"{source_name} failed: {result}")
172
+ continue
173
+ flat.extend(result)
174
+ return flat
175
+
176
+
177
+ # ---------------------------------------------------------------------------
178
+ # Deduplication
179
+ # ---------------------------------------------------------------------------
180
+
181
+ _DOI_PATTERN = re.compile(r"10\.\d{4,}/[^\s]+", re.IGNORECASE)
182
+
183
+
184
+ def _normalize_doi(doi: str | None) -> str | None:
185
+ """Extract and normalize a DOI string for comparison."""
186
+ if not doi:
187
+ return None
188
+ match = _DOI_PATTERN.search(doi)
189
+ if not match:
190
+ return None
191
+ return match.group(0).lower().rstrip(".")
192
+
193
+
194
+ def deduplicate_papers(papers: list[Paper]) -> list[Paper]:
195
+ """Deduplicate papers by normalized DOI.
196
+
197
+ For duplicates: keep the longest abstract, max citation count, union keywords.
198
+ Papers without DOIs are kept as-is.
199
+ """
200
+ by_doi: dict[str, Paper] = {}
201
+ no_doi: list[Paper] = []
202
+
203
+ for p in papers:
204
+ ndoi = _normalize_doi(p.doi)
205
+ if ndoi is None:
206
+ no_doi.append(p)
207
+ continue
208
+
209
+ if ndoi not in by_doi:
210
+ by_doi[ndoi] = p
211
+ else:
212
+ existing = by_doi[ndoi]
213
+ by_doi[ndoi] = _merge_papers(existing, p)
214
+
215
+ return list(by_doi.values()) + no_doi
216
+
217
+
218
+ def _merge_papers(a: Paper, b: Paper) -> Paper:
219
+ """Merge two Paper records representing the same work."""
220
+ # Pick the longer abstract
221
+ abstract = a.abstract
222
+ if b.abstract and (not abstract or len(b.abstract) > len(abstract)):
223
+ abstract = b.abstract
224
+
225
+ # Union keywords, preserving order
226
+ keywords = list(dict.fromkeys(a.keywords + b.keywords))
227
+
228
+ # Union verified_by
229
+ verified_by = list(dict.fromkeys(a.verified_by + b.verified_by))
230
+
231
+ return Paper(
232
+ source_id=a.source_id,
233
+ title=a.title or b.title,
234
+ abstract=abstract,
235
+ authors=a.authors or b.authors,
236
+ publication_year=a.publication_year or b.publication_year,
237
+ cited_by_count=max(a.cited_by_count, b.cited_by_count),
238
+ source_name=a.source_name or b.source_name,
239
+ doi=a.doi or b.doi,
240
+ keywords=keywords,
241
+ url=a.url or b.url,
242
+ verified_by=verified_by,
243
+ )
@@ -0,0 +1,183 @@
1
+ """OpenAlex adapter wrapping the shared client from .scripts/openalex/."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ import sys
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from sources.base import ScholarlySource
12
+ from sources.models import Paper
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Import the shared OpenAlex client
17
+ SCRIPTS_DIR = str(Path(__file__).parent.parent.parent / ".scripts" / "openalex")
18
+ if SCRIPTS_DIR not in sys.path:
19
+ sys.path.insert(0, SCRIPTS_DIR)
20
+
21
+ from openalex_client import OpenAlexClient # noqa: E402
22
+
23
+
24
+ class OpenAlexSource(ScholarlySource):
25
+ """OpenAlex implementation using the shared client.
26
+
27
+ Always available (no API key needed). Uses polite pool with email.
28
+ """
29
+
30
+ def __init__(self, client: OpenAlexClient) -> None:
31
+ self._client = client
32
+
33
+ @property
34
+ def source_name(self) -> str:
35
+ return "OpenAlex"
36
+
37
+ @property
38
+ def source_key(self) -> str:
39
+ return "openalex"
40
+
41
+ async def search_works(
42
+ self,
43
+ query: str,
44
+ *,
45
+ year_from: int | None = None,
46
+ year_to: int | None = None,
47
+ sort_by: str = "relevance",
48
+ limit: int = 50,
49
+ ) -> list[Paper]:
50
+ filter_params: dict[str, str] = {}
51
+ if year_from and year_to:
52
+ filter_params["publication_year"] = f"{year_from}-{year_to}"
53
+ elif year_from:
54
+ filter_params["publication_year"] = f">{year_from - 1}"
55
+ elif year_to:
56
+ filter_params["publication_year"] = f"<{year_to + 1}"
57
+
58
+ sort_param = "cited_by_count:desc"
59
+ if sort_by == "relevance":
60
+ sort_param = "relevance_score:desc"
61
+ elif sort_by == "publication_year":
62
+ sort_param = "publication_date:desc"
63
+
64
+ def _search() -> dict[str, Any]:
65
+ return self._client.search_works(
66
+ search=query,
67
+ filter_params=filter_params if filter_params else None,
68
+ per_page=min(limit, 200),
69
+ sort=sort_param,
70
+ )
71
+
72
+ response = await asyncio.to_thread(_search)
73
+ works = response.get("results", [])
74
+ return [self._to_paper(w) for w in works[:limit]]
75
+
76
+ async def verify_doi(self, doi: str) -> Paper | None:
77
+ if not doi.startswith("https://doi.org/"):
78
+ doi = f"https://doi.org/{doi}"
79
+
80
+ try:
81
+ work = await asyncio.to_thread(self._client.get_entity, "works", doi)
82
+ if work and work.get("id"):
83
+ return self._to_paper(work)
84
+ except Exception:
85
+ logger.debug("OpenAlex DOI lookup failed for: %s", doi)
86
+ return None
87
+
88
+ async def batch_verify_dois(self, dois: list[str]) -> dict[str, Paper | None]:
89
+ # Normalize DOIs
90
+ normalized = []
91
+ for d in dois:
92
+ if not d.startswith("https://doi.org/"):
93
+ d = f"https://doi.org/{d}"
94
+ normalized.append(d)
95
+
96
+ results: dict[str, Paper | None] = {d: None for d in dois}
97
+
98
+ try:
99
+ works = await asyncio.to_thread(
100
+ self._client.batch_lookup, "works", normalized, "doi"
101
+ )
102
+ for w in works:
103
+ doi_val = w.get("doi", "")
104
+ if doi_val:
105
+ # Match back to original DOI
106
+ for orig in dois:
107
+ norm_orig = orig if orig.startswith("https://doi.org/") else f"https://doi.org/{orig}"
108
+ if doi_val.lower() == norm_orig.lower():
109
+ results[orig] = self._to_paper(w)
110
+ break
111
+ except Exception:
112
+ logger.warning("OpenAlex batch DOI lookup failed")
113
+
114
+ return results
115
+
116
+ async def find_similar_works(
117
+ self,
118
+ text: str,
119
+ *,
120
+ limit: int = 20,
121
+ ) -> list[Paper]:
122
+ return await self.search_works(text, sort_by="relevance", limit=limit)
123
+
124
+ @staticmethod
125
+ def _to_paper(work: dict[str, Any]) -> Paper:
126
+ """Convert OpenAlex work dict to Paper."""
127
+ # Authors
128
+ authors = []
129
+ for authorship in work.get("authorships", []):
130
+ author = authorship.get("author", {})
131
+ name = author.get("display_name", "")
132
+ if name:
133
+ authors.append(name)
134
+
135
+ # Keywords
136
+ keywords = []
137
+ for kw in work.get("keywords", []):
138
+ if isinstance(kw, dict):
139
+ keywords.append(kw.get("keyword", ""))
140
+ elif isinstance(kw, str):
141
+ keywords.append(kw)
142
+
143
+ # Abstract
144
+ abstract = None
145
+ inv_abstract = work.get("abstract_inverted_index")
146
+ if inv_abstract:
147
+ try:
148
+ word_positions = []
149
+ for word, positions in inv_abstract.items():
150
+ for pos in positions:
151
+ word_positions.append((pos, word))
152
+ word_positions.sort()
153
+ abstract = " ".join(w for _, w in word_positions)
154
+ except Exception:
155
+ pass
156
+
157
+ # DOI
158
+ doi = work.get("doi")
159
+ if doi and not doi.startswith("http"):
160
+ doi = f"https://doi.org/{doi}"
161
+
162
+ # Publication year
163
+ pub_year = work.get("publication_year", 0) or 0
164
+
165
+ # Source / journal
166
+ source_name = None
167
+ primary_location = work.get("primary_location") or {}
168
+ source_info = primary_location.get("source") or {}
169
+ source_name = source_info.get("display_name")
170
+
171
+ return Paper(
172
+ source_id=f"openalex:{work.get('id', '')}",
173
+ title=work.get("title", "") or "",
174
+ abstract=abstract,
175
+ authors=authors,
176
+ publication_year=pub_year,
177
+ cited_by_count=work.get("cited_by_count", 0) or 0,
178
+ source_name=source_name,
179
+ doi=doi,
180
+ keywords=keywords,
181
+ url=doi or work.get("id", ""),
182
+ verified_by=["openalex"],
183
+ )