flonat-research 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/domain-reviewer.md +336 -0
- package/.claude/agents/fixer.md +226 -0
- package/.claude/agents/paper-critic.md +370 -0
- package/.claude/agents/peer-reviewer.md +289 -0
- package/.claude/agents/proposal-reviewer.md +215 -0
- package/.claude/agents/referee2-reviewer.md +367 -0
- package/.claude/agents/references/journal-referee-profiles.md +354 -0
- package/.claude/agents/references/paper-critic/council-personas.md +77 -0
- package/.claude/agents/references/paper-critic/council-prompts.md +198 -0
- package/.claude/agents/references/peer-reviewer/report-template.md +199 -0
- package/.claude/agents/references/peer-reviewer/sa-prompts.md +260 -0
- package/.claude/agents/references/peer-reviewer/security-scan.md +188 -0
- package/.claude/agents/references/proposal-reviewer/report-template.md +144 -0
- package/.claude/agents/references/proposal-reviewer/sa-prompts.md +149 -0
- package/.claude/agents/references/referee-config.md +114 -0
- package/.claude/agents/references/referee2-reviewer/audit-checklists.md +287 -0
- package/.claude/agents/references/referee2-reviewer/report-template.md +334 -0
- package/.claude/rules/design-before-results.md +52 -0
- package/.claude/rules/ignore-agents-md.md +17 -0
- package/.claude/rules/ignore-gemini-md.md +17 -0
- package/.claude/rules/lean-claude-md.md +45 -0
- package/.claude/rules/learn-tags.md +99 -0
- package/.claude/rules/overleaf-separation.md +67 -0
- package/.claude/rules/plan-first.md +175 -0
- package/.claude/rules/read-docs-first.md +50 -0
- package/.claude/rules/scope-discipline.md +28 -0
- package/.claude/settings.json +125 -0
- package/.context/current-focus.md +33 -0
- package/.context/preferences/priorities.md +36 -0
- package/.context/preferences/task-naming.md +28 -0
- package/.context/profile.md +29 -0
- package/.context/projects/_index.md +41 -0
- package/.context/projects/papers/nudge-exp.md +22 -0
- package/.context/projects/papers/uncertainty.md +31 -0
- package/.context/resources/claude-scientific-writer-review.md +48 -0
- package/.context/resources/cunningham-multi-analyst-agents.md +104 -0
- package/.context/resources/cunningham-multilang-code-audit.md +62 -0
- package/.context/resources/google-ai-co-scientist-review.md +72 -0
- package/.context/resources/karpathy-llm-council-review.md +58 -0
- package/.context/resources/multi-coder-reliability-protocol.md +175 -0
- package/.context/resources/pedro-santanna-takeaways.md +96 -0
- package/.context/resources/venue-rankings/abs_ajg_2024.csv +1823 -0
- package/.context/resources/venue-rankings/abs_ajg_2024_econ.csv +356 -0
- package/.context/resources/venue-rankings/cabs_4_4star_theory.csv +40 -0
- package/.context/resources/venue-rankings/core_2026.csv +801 -0
- package/.context/resources/venue-rankings.md +147 -0
- package/.context/workflows/README.md +69 -0
- package/.context/workflows/daily-review.md +91 -0
- package/.context/workflows/meeting-actions.md +108 -0
- package/.context/workflows/replication-protocol.md +155 -0
- package/.context/workflows/weekly-review.md +113 -0
- package/.mcp-server-biblio/formatters.py +158 -0
- package/.mcp-server-biblio/pyproject.toml +11 -0
- package/.mcp-server-biblio/server.py +678 -0
- package/.mcp-server-biblio/sources/__init__.py +14 -0
- package/.mcp-server-biblio/sources/base.py +73 -0
- package/.mcp-server-biblio/sources/formatters.py +83 -0
- package/.mcp-server-biblio/sources/models.py +22 -0
- package/.mcp-server-biblio/sources/multi_source.py +243 -0
- package/.mcp-server-biblio/sources/openalex_source.py +183 -0
- package/.mcp-server-biblio/sources/scopus_source.py +309 -0
- package/.mcp-server-biblio/sources/wos_source.py +508 -0
- package/.mcp-server-biblio/uv.lock +896 -0
- package/.scripts/README.md +161 -0
- package/.scripts/ai_pattern_density.py +446 -0
- package/.scripts/conf +445 -0
- package/.scripts/config.py +122 -0
- package/.scripts/count_inventory.py +275 -0
- package/.scripts/daily_digest.py +288 -0
- package/.scripts/done +177 -0
- package/.scripts/extract_meeting_actions.py +223 -0
- package/.scripts/focus +176 -0
- package/.scripts/generate-codex-agents-md.py +217 -0
- package/.scripts/inbox +194 -0
- package/.scripts/notion_helpers.py +325 -0
- package/.scripts/openalex/query_helpers.py +306 -0
- package/.scripts/papers +227 -0
- package/.scripts/query +223 -0
- package/.scripts/session-history.py +201 -0
- package/.scripts/skill-health.py +516 -0
- package/.scripts/skill-log-miner.py +273 -0
- package/.scripts/sync-to-codex.sh +252 -0
- package/.scripts/task +213 -0
- package/.scripts/tasks +190 -0
- package/.scripts/week +206 -0
- package/CLAUDE.md +197 -0
- package/LICENSE +21 -0
- package/MEMORY.md +38 -0
- package/README.md +269 -0
- package/docs/agents.md +44 -0
- package/docs/bibliography-setup.md +55 -0
- package/docs/council-mode.md +36 -0
- package/docs/getting-started.md +245 -0
- package/docs/hooks.md +38 -0
- package/docs/mcp-servers.md +82 -0
- package/docs/notion-setup.md +109 -0
- package/docs/rules.md +33 -0
- package/docs/scripts.md +303 -0
- package/docs/setup-overview/setup-overview.pdf +0 -0
- package/docs/skills.md +70 -0
- package/docs/system.md +159 -0
- package/hooks/block-destructive-git.sh +66 -0
- package/hooks/context-monitor.py +114 -0
- package/hooks/postcompact-restore.py +157 -0
- package/hooks/precompact-autosave.py +181 -0
- package/hooks/promise-checker.sh +124 -0
- package/hooks/protect-source-files.sh +81 -0
- package/hooks/resume-context-loader.sh +53 -0
- package/hooks/startup-context-loader.sh +102 -0
- package/package.json +51 -0
- package/packages/cli-council/.github/workflows/claude-code-review.yml +44 -0
- package/packages/cli-council/.github/workflows/claude.yml +50 -0
- package/packages/cli-council/README.md +100 -0
- package/packages/cli-council/pyproject.toml +43 -0
- package/packages/cli-council/src/cli_council/__init__.py +19 -0
- package/packages/cli-council/src/cli_council/__main__.py +185 -0
- package/packages/cli-council/src/cli_council/backends/__init__.py +8 -0
- package/packages/cli-council/src/cli_council/backends/base.py +81 -0
- package/packages/cli-council/src/cli_council/backends/claude.py +25 -0
- package/packages/cli-council/src/cli_council/backends/codex.py +27 -0
- package/packages/cli-council/src/cli_council/backends/gemini.py +26 -0
- package/packages/cli-council/src/cli_council/checkpoint.py +212 -0
- package/packages/cli-council/src/cli_council/config.py +51 -0
- package/packages/cli-council/src/cli_council/council.py +391 -0
- package/packages/cli-council/src/cli_council/models.py +46 -0
- package/packages/llm-council/.github/workflows/claude-code-review.yml +44 -0
- package/packages/llm-council/.github/workflows/claude.yml +50 -0
- package/packages/llm-council/README.md +453 -0
- package/packages/llm-council/pyproject.toml +42 -0
- package/packages/llm-council/src/llm_council/__init__.py +23 -0
- package/packages/llm-council/src/llm_council/__main__.py +259 -0
- package/packages/llm-council/src/llm_council/checkpoint.py +193 -0
- package/packages/llm-council/src/llm_council/client.py +253 -0
- package/packages/llm-council/src/llm_council/config.py +232 -0
- package/packages/llm-council/src/llm_council/council.py +482 -0
- package/packages/llm-council/src/llm_council/models.py +46 -0
- package/packages/mcp-bibliography/MEMORY.md +31 -0
- package/packages/mcp-bibliography/_app.py +226 -0
- package/packages/mcp-bibliography/formatters.py +158 -0
- package/packages/mcp-bibliography/log/2026-03-13-2100.md +35 -0
- package/packages/mcp-bibliography/pyproject.toml +15 -0
- package/packages/mcp-bibliography/run.sh +20 -0
- package/packages/mcp-bibliography/scholarly_formatters.py +83 -0
- package/packages/mcp-bibliography/server.py +1857 -0
- package/packages/mcp-bibliography/tools/__init__.py +28 -0
- package/packages/mcp-bibliography/tools/_registry.py +19 -0
- package/packages/mcp-bibliography/tools/altmetric.py +107 -0
- package/packages/mcp-bibliography/tools/core.py +92 -0
- package/packages/mcp-bibliography/tools/dblp.py +52 -0
- package/packages/mcp-bibliography/tools/openalex.py +296 -0
- package/packages/mcp-bibliography/tools/opencitations.py +102 -0
- package/packages/mcp-bibliography/tools/openreview.py +179 -0
- package/packages/mcp-bibliography/tools/orcid.py +131 -0
- package/packages/mcp-bibliography/tools/scholarly.py +575 -0
- package/packages/mcp-bibliography/tools/unpaywall.py +63 -0
- package/packages/mcp-bibliography/tools/zenodo.py +123 -0
- package/packages/mcp-bibliography/uv.lock +711 -0
- package/scripts/setup.sh +143 -0
- package/skills/beamer-deck/SKILL.md +199 -0
- package/skills/beamer-deck/references/quality-rubric.md +54 -0
- package/skills/beamer-deck/references/review-prompts.md +106 -0
- package/skills/bib-validate/SKILL.md +261 -0
- package/skills/bib-validate/references/council-mode.md +34 -0
- package/skills/bib-validate/references/deep-verify.md +79 -0
- package/skills/bib-validate/references/fix-mode.md +36 -0
- package/skills/bib-validate/references/openalex-verification.md +45 -0
- package/skills/bib-validate/references/preprint-check.md +31 -0
- package/skills/bib-validate/references/ref-manager-crossref.md +41 -0
- package/skills/bib-validate/references/report-template.md +82 -0
- package/skills/code-archaeology/SKILL.md +141 -0
- package/skills/code-review/SKILL.md +265 -0
- package/skills/code-review/references/quality-rubric.md +67 -0
- package/skills/consolidate-memory/SKILL.md +208 -0
- package/skills/context-status/SKILL.md +126 -0
- package/skills/creation-guard/SKILL.md +230 -0
- package/skills/devils-advocate/SKILL.md +130 -0
- package/skills/devils-advocate/references/competing-hypotheses.md +83 -0
- package/skills/init-project/SKILL.md +115 -0
- package/skills/init-project-course/references/memory-and-settings.md +92 -0
- package/skills/init-project-course/references/organise-templates.md +94 -0
- package/skills/init-project-course/skill.md +147 -0
- package/skills/init-project-light/skill.md +139 -0
- package/skills/init-project-research/SKILL.md +368 -0
- package/skills/init-project-research/references/atlas-pipeline-sync.md +70 -0
- package/skills/init-project-research/references/atlas-schema.md +81 -0
- package/skills/init-project-research/references/confirmation-report.md +39 -0
- package/skills/init-project-research/references/domain-profile-template.md +104 -0
- package/skills/init-project-research/references/interview-round3.md +34 -0
- package/skills/init-project-research/references/literature-discovery.md +43 -0
- package/skills/init-project-research/references/scaffold-details.md +197 -0
- package/skills/init-project-research/templates/field-calibration.md +60 -0
- package/skills/init-project-research/templates/pipeline-manifest.md +63 -0
- package/skills/init-project-research/templates/run-all.sh +116 -0
- package/skills/init-project-research/templates/seed-files.md +337 -0
- package/skills/insights-deck/SKILL.md +151 -0
- package/skills/interview-me/SKILL.md +157 -0
- package/skills/latex/SKILL.md +141 -0
- package/skills/latex/references/latex-configs.md +183 -0
- package/skills/latex-autofix/SKILL.md +230 -0
- package/skills/latex-autofix/references/known-errors.md +183 -0
- package/skills/latex-autofix/references/quality-rubric.md +50 -0
- package/skills/latex-health-check/SKILL.md +161 -0
- package/skills/learn/SKILL.md +220 -0
- package/skills/learn/scripts/validate_skill.py +265 -0
- package/skills/lessons-learned/SKILL.md +201 -0
- package/skills/literature/SKILL.md +335 -0
- package/skills/literature/references/agent-templates.md +393 -0
- package/skills/literature/references/bibliometric-apis.md +44 -0
- package/skills/literature/references/cli-council-search.md +79 -0
- package/skills/literature/references/openalex-api-guide.md +371 -0
- package/skills/literature/references/openalex-common-queries.md +381 -0
- package/skills/literature/references/openalex-workflows.md +248 -0
- package/skills/literature/references/reference-manager-sync.md +36 -0
- package/skills/literature/references/scopus-api-guide.md +208 -0
- package/skills/literature/references/wos-api-guide.md +308 -0
- package/skills/multi-perspective/SKILL.md +311 -0
- package/skills/multi-perspective/references/computational-many-analysts.md +77 -0
- package/skills/pipeline-manifest/SKILL.md +226 -0
- package/skills/pre-submission-report/SKILL.md +153 -0
- package/skills/process-reviews/SKILL.md +244 -0
- package/skills/process-reviews/references/rr-routing.md +101 -0
- package/skills/project-deck/SKILL.md +87 -0
- package/skills/project-safety/SKILL.md +135 -0
- package/skills/proofread/SKILL.md +254 -0
- package/skills/proofread/references/quality-rubric.md +104 -0
- package/skills/python-env/SKILL.md +57 -0
- package/skills/quarto-deck/SKILL.md +226 -0
- package/skills/quarto-deck/references/markdown-format.md +143 -0
- package/skills/quarto-deck/references/quality-rubric.md +54 -0
- package/skills/save-context/SKILL.md +174 -0
- package/skills/session-log/SKILL.md +98 -0
- package/skills/shared/concept-validation-gate.md +161 -0
- package/skills/shared/council-protocol.md +265 -0
- package/skills/shared/distribution-diagnostics.md +164 -0
- package/skills/shared/engagement-stratified-sampling.md +218 -0
- package/skills/shared/escalation-protocol.md +74 -0
- package/skills/shared/external-audit-protocol.md +205 -0
- package/skills/shared/intercoder-reliability.md +256 -0
- package/skills/shared/mcp-degradation.md +81 -0
- package/skills/shared/method-probing-questions.md +163 -0
- package/skills/shared/multi-language-conventions.md +143 -0
- package/skills/shared/paid-api-safety.md +174 -0
- package/skills/shared/palettes.md +90 -0
- package/skills/shared/progressive-disclosure.md +92 -0
- package/skills/shared/project-documentation-content.md +443 -0
- package/skills/shared/project-documentation-format.md +281 -0
- package/skills/shared/project-documentation.md +100 -0
- package/skills/shared/publication-output.md +138 -0
- package/skills/shared/quality-scoring.md +70 -0
- package/skills/shared/reference-resolution.md +77 -0
- package/skills/shared/research-quality-rubric.md +165 -0
- package/skills/shared/rhetoric-principles.md +54 -0
- package/skills/shared/skill-design-patterns.md +272 -0
- package/skills/shared/skill-index.md +240 -0
- package/skills/shared/system-documentation.md +334 -0
- package/skills/shared/tikz-rules.md +402 -0
- package/skills/shared/validation-tiers.md +121 -0
- package/skills/shared/venue-guides/README.md +46 -0
- package/skills/shared/venue-guides/cell_press_style.md +483 -0
- package/skills/shared/venue-guides/conferences_formatting.md +564 -0
- package/skills/shared/venue-guides/cs_conference_style.md +463 -0
- package/skills/shared/venue-guides/examples/cell_summary_example.md +247 -0
- package/skills/shared/venue-guides/examples/medical_structured_abstract.md +313 -0
- package/skills/shared/venue-guides/examples/nature_abstract_examples.md +213 -0
- package/skills/shared/venue-guides/examples/neurips_introduction_example.md +245 -0
- package/skills/shared/venue-guides/journals_formatting.md +486 -0
- package/skills/shared/venue-guides/medical_journal_styles.md +535 -0
- package/skills/shared/venue-guides/ml_conference_style.md +556 -0
- package/skills/shared/venue-guides/nature_science_style.md +405 -0
- package/skills/shared/venue-guides/reviewer_expectations.md +417 -0
- package/skills/shared/venue-guides/venue_writing_styles.md +321 -0
- package/skills/split-pdf/SKILL.md +172 -0
- package/skills/split-pdf/methodology.md +48 -0
- package/skills/sync-notion/SKILL.md +93 -0
- package/skills/system-audit/SKILL.md +157 -0
- package/skills/system-audit/references/sub-agent-prompts.md +294 -0
- package/skills/task-management/SKILL.md +131 -0
- package/skills/update-focus/SKILL.md +204 -0
- package/skills/update-project-doc/SKILL.md +194 -0
- package/skills/validate-bib/SKILL.md +242 -0
- package/skills/validate-bib/references/council-mode.md +34 -0
- package/skills/validate-bib/references/deep-verify.md +71 -0
- package/skills/validate-bib/references/openalex-verification.md +45 -0
- package/skills/validate-bib/references/preprint-check.md +31 -0
- package/skills/validate-bib/references/report-template.md +62 -0
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Abstract base class for scholarly data sources."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from sources.models import Paper
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ScholarlySource(ABC):
|
|
12
|
+
"""Abstract interface for bibliometric data providers.
|
|
13
|
+
|
|
14
|
+
Implement this for each data source (OpenAlex, Scopus, Web of Science).
|
|
15
|
+
All methods are async to support concurrent API calls.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
@abstractmethod
|
|
20
|
+
def source_name(self) -> str:
|
|
21
|
+
"""Human-readable name of this data source."""
|
|
22
|
+
...
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def source_key(self) -> str:
|
|
27
|
+
"""Machine-readable slug for this source (e.g. 'openalex', 'scopus', 'wos')."""
|
|
28
|
+
...
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
async def search_works(
|
|
32
|
+
self,
|
|
33
|
+
query: str,
|
|
34
|
+
*,
|
|
35
|
+
year_from: int | None = None,
|
|
36
|
+
year_to: int | None = None,
|
|
37
|
+
sort_by: str = "relevance",
|
|
38
|
+
limit: int = 50,
|
|
39
|
+
) -> list[Paper]:
|
|
40
|
+
"""Search for works matching a text query."""
|
|
41
|
+
...
|
|
42
|
+
|
|
43
|
+
@abstractmethod
|
|
44
|
+
async def verify_doi(self, doi: str) -> Paper | None:
|
|
45
|
+
"""Look up a single DOI and return metadata, or None if not found."""
|
|
46
|
+
...
|
|
47
|
+
|
|
48
|
+
@abstractmethod
|
|
49
|
+
async def batch_verify_dois(self, dois: list[str]) -> dict[str, Paper | None]:
|
|
50
|
+
"""Verify multiple DOIs at once. Returns {doi: Paper or None}."""
|
|
51
|
+
...
|
|
52
|
+
|
|
53
|
+
@abstractmethod
|
|
54
|
+
async def find_similar_works(
|
|
55
|
+
self,
|
|
56
|
+
text: str,
|
|
57
|
+
*,
|
|
58
|
+
limit: int = 20,
|
|
59
|
+
) -> list[Paper]:
|
|
60
|
+
"""Find works most similar to a given text (title or abstract)."""
|
|
61
|
+
...
|
|
62
|
+
|
|
63
|
+
async def close(self) -> None:
|
|
64
|
+
"""Release any network/session resources held by the source."""
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
def reset_diagnostics(self) -> None:
|
|
68
|
+
"""Reset per-request diagnostics collected by the source."""
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
def consume_diagnostics(self) -> dict[str, Any] | None:
|
|
72
|
+
"""Return and clear diagnostics for the current request context."""
|
|
73
|
+
return None
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Markdown formatters for multi-source scholarly results."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from sources.models import Paper
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def format_papers_table(papers: list[Paper], title: str = "Results") -> str:
|
|
9
|
+
"""Format a list of Papers as a markdown table."""
|
|
10
|
+
if not papers:
|
|
11
|
+
return f"## {title}\n\nNo results found."
|
|
12
|
+
|
|
13
|
+
lines = [f"## {title}\n"]
|
|
14
|
+
lines.append("| # | Title | Authors | Year | Cited | Source | DOI |")
|
|
15
|
+
lines.append("|---|-------|---------|------|-------|--------|-----|")
|
|
16
|
+
|
|
17
|
+
for i, p in enumerate(papers, 1):
|
|
18
|
+
authors = ", ".join(p.authors[:3])
|
|
19
|
+
if len(p.authors) > 3:
|
|
20
|
+
authors += " et al."
|
|
21
|
+
title_short = p.title[:80] + "..." if len(p.title) > 80 else p.title
|
|
22
|
+
doi_link = f"[link]({p.doi})" if p.doi else "—"
|
|
23
|
+
source = p.source_name or "—"
|
|
24
|
+
if len(source) > 30:
|
|
25
|
+
source = source[:27] + "..."
|
|
26
|
+
|
|
27
|
+
lines.append(
|
|
28
|
+
f"| {i} | {title_short} | {authors} | {p.publication_year} | "
|
|
29
|
+
f"{p.cited_by_count:,} | {source} | {doi_link} |"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
return "\n".join(lines)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def format_verification_table(results: dict[str, Paper | None]) -> str:
|
|
36
|
+
"""Format DOI verification results as a markdown table."""
|
|
37
|
+
lines = ["## DOI Verification Results\n"]
|
|
38
|
+
lines.append("| # | DOI | Title | Year | Cited By | Verified By | Status |")
|
|
39
|
+
lines.append("|---|-----|-------|------|----------|-------------|--------|")
|
|
40
|
+
|
|
41
|
+
verified_count = 0
|
|
42
|
+
single_count = 0
|
|
43
|
+
not_found_count = 0
|
|
44
|
+
|
|
45
|
+
for i, (doi, paper) in enumerate(results.items(), 1):
|
|
46
|
+
if paper is None:
|
|
47
|
+
status = "NOT FOUND"
|
|
48
|
+
not_found_count += 1
|
|
49
|
+
lines.append(f"| {i} | `{doi}` | — | — | — | — | ❌ {status} |")
|
|
50
|
+
else:
|
|
51
|
+
sources = ", ".join(paper.verified_by)
|
|
52
|
+
if len(paper.verified_by) >= 2:
|
|
53
|
+
status = "VERIFIED"
|
|
54
|
+
verified_count += 1
|
|
55
|
+
else:
|
|
56
|
+
status = "SINGLE SOURCE"
|
|
57
|
+
single_count += 1
|
|
58
|
+
title_short = paper.title[:60] + "..." if len(paper.title) > 60 else paper.title
|
|
59
|
+
lines.append(
|
|
60
|
+
f"| {i} | `{doi}` | {title_short} | {paper.publication_year} | "
|
|
61
|
+
f"{paper.cited_by_count:,} | {sources} | "
|
|
62
|
+
f"{'✅' if status == 'VERIFIED' else '⚠️'} {status} |"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
lines.append("")
|
|
66
|
+
lines.append(f"**Summary:** {verified_count} verified (2+ sources), "
|
|
67
|
+
f"{single_count} single-source, {not_found_count} not found")
|
|
68
|
+
|
|
69
|
+
return "\n".join(lines)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def format_source_status(sources: list[dict]) -> str:
|
|
73
|
+
"""Format source status as a markdown table."""
|
|
74
|
+
lines = ["## Scholarly Source Status\n"]
|
|
75
|
+
lines.append("| Source | Status | Key |")
|
|
76
|
+
lines.append("|--------|--------|-----|")
|
|
77
|
+
|
|
78
|
+
for s in sources:
|
|
79
|
+
status = "✅ Active" if s["active"] else "❌ Not configured"
|
|
80
|
+
key = s.get("key", "—")
|
|
81
|
+
lines.append(f"| {s['name']} | {status} | `{key}` |")
|
|
82
|
+
|
|
83
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Paper dataclass for multi-source scholarly search."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class Paper:
|
|
10
|
+
"""Unified paper representation across all sources."""
|
|
11
|
+
|
|
12
|
+
source_id: str
|
|
13
|
+
title: str
|
|
14
|
+
authors: list[str]
|
|
15
|
+
publication_year: int
|
|
16
|
+
cited_by_count: int
|
|
17
|
+
doi: str | None
|
|
18
|
+
abstract: str | None = None
|
|
19
|
+
source_name: str | None = None # journal / venue
|
|
20
|
+
keywords: list[str] = field(default_factory=list)
|
|
21
|
+
url: str | None = None
|
|
22
|
+
verified_by: list[str] = field(default_factory=list) # which sources confirmed this DOI
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"""Multi-source composite adapter with DOI-based deduplication.
|
|
2
|
+
|
|
3
|
+
Queries all enabled sources concurrently and merges results.
|
|
4
|
+
Ported from ZZ Topic Finder with improvements.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import logging
|
|
11
|
+
import re
|
|
12
|
+
from contextvars import ContextVar
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from sources.base import ScholarlySource
|
|
16
|
+
from sources.models import Paper
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class MultiSource(ScholarlySource):
|
|
22
|
+
"""Composite data source that queries multiple APIs and deduplicates results.
|
|
23
|
+
|
|
24
|
+
All methods query sources concurrently. Failed sources are skipped gracefully.
|
|
25
|
+
Papers are deduplicated by normalized DOI; papers without DOIs are kept as-is.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, sources: list[ScholarlySource]) -> None:
|
|
29
|
+
if not sources:
|
|
30
|
+
raise ValueError("MultiSource requires at least one source")
|
|
31
|
+
self._sources = sources
|
|
32
|
+
self._diagnostics_ctx: ContextVar[dict[str, Any] | None] = ContextVar(
|
|
33
|
+
f"multi_source_diagnostics_{id(self)}", default=None
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def source_name(self) -> str:
|
|
38
|
+
names = [s.source_name for s in self._sources]
|
|
39
|
+
return " + ".join(names)
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def source_key(self) -> str:
|
|
43
|
+
return "multi"
|
|
44
|
+
|
|
45
|
+
def reset_diagnostics(self) -> None:
|
|
46
|
+
self._diagnostics_ctx.set(
|
|
47
|
+
{
|
|
48
|
+
"attempted": [s.source_name for s in self._sources],
|
|
49
|
+
"failed": set(),
|
|
50
|
+
"warnings": [],
|
|
51
|
+
}
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def consume_diagnostics(self) -> dict[str, Any] | None:
|
|
55
|
+
diag = self._diagnostics_ctx.get()
|
|
56
|
+
if diag is None:
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
attempted = list(diag["attempted"])
|
|
60
|
+
failed = sorted(diag["failed"])
|
|
61
|
+
succeeded = [name for name in attempted if name not in failed]
|
|
62
|
+
warnings = list(diag["warnings"])
|
|
63
|
+
self._diagnostics_ctx.set(None)
|
|
64
|
+
return {
|
|
65
|
+
"attempted": attempted,
|
|
66
|
+
"succeeded": succeeded,
|
|
67
|
+
"failed": failed,
|
|
68
|
+
"warnings": warnings,
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async def search_works(
|
|
72
|
+
self,
|
|
73
|
+
query: str,
|
|
74
|
+
*,
|
|
75
|
+
year_from: int | None = None,
|
|
76
|
+
year_to: int | None = None,
|
|
77
|
+
sort_by: str = "relevance",
|
|
78
|
+
limit: int = 50,
|
|
79
|
+
) -> list[Paper]:
|
|
80
|
+
coros = [
|
|
81
|
+
s.search_works(query, year_from=year_from, year_to=year_to, sort_by=sort_by, limit=limit)
|
|
82
|
+
for s in self._sources
|
|
83
|
+
]
|
|
84
|
+
all_papers = await self._gather_flat(coros)
|
|
85
|
+
deduped = deduplicate_papers(all_papers)
|
|
86
|
+
|
|
87
|
+
if sort_by == "cited_by_count":
|
|
88
|
+
deduped.sort(key=lambda p: p.cited_by_count, reverse=True)
|
|
89
|
+
elif sort_by == "publication_year":
|
|
90
|
+
deduped.sort(key=lambda p: p.publication_year, reverse=True)
|
|
91
|
+
|
|
92
|
+
return deduped[:limit]
|
|
93
|
+
|
|
94
|
+
async def verify_doi(self, doi: str) -> Paper | None:
|
|
95
|
+
coros = [s.verify_doi(doi) for s in self._sources]
|
|
96
|
+
results = await asyncio.gather(*coros, return_exceptions=True)
|
|
97
|
+
|
|
98
|
+
papers = []
|
|
99
|
+
verified_by: list[str] = []
|
|
100
|
+
for i, result in enumerate(results):
|
|
101
|
+
if isinstance(result, BaseException):
|
|
102
|
+
logger.debug("Source %s failed DOI verify: %s", self._sources[i].source_name, result)
|
|
103
|
+
continue
|
|
104
|
+
if result is not None:
|
|
105
|
+
papers.append(result)
|
|
106
|
+
verified_by.append(self._sources[i].source_key)
|
|
107
|
+
|
|
108
|
+
if not papers:
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
# Merge all found papers into one
|
|
112
|
+
merged = papers[0]
|
|
113
|
+
for p in papers[1:]:
|
|
114
|
+
merged = _merge_papers(merged, p)
|
|
115
|
+
merged.verified_by = verified_by
|
|
116
|
+
return merged
|
|
117
|
+
|
|
118
|
+
async def batch_verify_dois(self, dois: list[str]) -> dict[str, Paper | None]:
|
|
119
|
+
coros = [s.batch_verify_dois(dois) for s in self._sources]
|
|
120
|
+
results = await asyncio.gather(*coros, return_exceptions=True)
|
|
121
|
+
|
|
122
|
+
# Merge results from all sources
|
|
123
|
+
merged: dict[str, Paper | None] = {d: None for d in dois}
|
|
124
|
+
|
|
125
|
+
for i, result in enumerate(results):
|
|
126
|
+
if isinstance(result, BaseException):
|
|
127
|
+
logger.warning("Source %s failed batch verify: %s", self._sources[i].source_name, result)
|
|
128
|
+
continue
|
|
129
|
+
|
|
130
|
+
for doi, paper in result.items():
|
|
131
|
+
if paper is None:
|
|
132
|
+
continue
|
|
133
|
+
if merged[doi] is None:
|
|
134
|
+
merged[doi] = paper
|
|
135
|
+
merged[doi].verified_by = [self._sources[i].source_key]
|
|
136
|
+
else:
|
|
137
|
+
merged[doi] = _merge_papers(merged[doi], paper)
|
|
138
|
+
if self._sources[i].source_key not in merged[doi].verified_by:
|
|
139
|
+
merged[doi].verified_by.append(self._sources[i].source_key)
|
|
140
|
+
|
|
141
|
+
return merged
|
|
142
|
+
|
|
143
|
+
async def find_similar_works(
|
|
144
|
+
self,
|
|
145
|
+
text: str,
|
|
146
|
+
*,
|
|
147
|
+
limit: int = 20,
|
|
148
|
+
) -> list[Paper]:
|
|
149
|
+
coros = [s.find_similar_works(text, limit=limit) for s in self._sources]
|
|
150
|
+
all_papers = await self._gather_flat(coros)
|
|
151
|
+
return deduplicate_papers(all_papers)[:limit]
|
|
152
|
+
|
|
153
|
+
async def close(self) -> None:
|
|
154
|
+
for s in self._sources:
|
|
155
|
+
try:
|
|
156
|
+
await s.close()
|
|
157
|
+
except Exception:
|
|
158
|
+
pass
|
|
159
|
+
|
|
160
|
+
async def _gather_flat(self, coros: list) -> list:
|
|
161
|
+
"""Run coroutines concurrently, flatten results, skip failures."""
|
|
162
|
+
diag = self._diagnostics_ctx.get()
|
|
163
|
+
results = await asyncio.gather(*coros, return_exceptions=True)
|
|
164
|
+
flat: list = []
|
|
165
|
+
for i, result in enumerate(results):
|
|
166
|
+
if isinstance(result, BaseException):
|
|
167
|
+
source_name = self._sources[i].source_name
|
|
168
|
+
logger.warning("Source %s failed: %s", source_name, result)
|
|
169
|
+
if diag is not None:
|
|
170
|
+
diag["failed"].add(source_name)
|
|
171
|
+
diag["warnings"].append(f"{source_name} failed: {result}")
|
|
172
|
+
continue
|
|
173
|
+
flat.extend(result)
|
|
174
|
+
return flat
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# ---------------------------------------------------------------------------
|
|
178
|
+
# Deduplication
|
|
179
|
+
# ---------------------------------------------------------------------------
|
|
180
|
+
|
|
181
|
+
_DOI_PATTERN = re.compile(r"10\.\d{4,}/[^\s]+", re.IGNORECASE)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _normalize_doi(doi: str | None) -> str | None:
|
|
185
|
+
"""Extract and normalize a DOI string for comparison."""
|
|
186
|
+
if not doi:
|
|
187
|
+
return None
|
|
188
|
+
match = _DOI_PATTERN.search(doi)
|
|
189
|
+
if not match:
|
|
190
|
+
return None
|
|
191
|
+
return match.group(0).lower().rstrip(".")
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def deduplicate_papers(papers: list[Paper]) -> list[Paper]:
|
|
195
|
+
"""Deduplicate papers by normalized DOI.
|
|
196
|
+
|
|
197
|
+
For duplicates: keep the longest abstract, max citation count, union keywords.
|
|
198
|
+
Papers without DOIs are kept as-is.
|
|
199
|
+
"""
|
|
200
|
+
by_doi: dict[str, Paper] = {}
|
|
201
|
+
no_doi: list[Paper] = []
|
|
202
|
+
|
|
203
|
+
for p in papers:
|
|
204
|
+
ndoi = _normalize_doi(p.doi)
|
|
205
|
+
if ndoi is None:
|
|
206
|
+
no_doi.append(p)
|
|
207
|
+
continue
|
|
208
|
+
|
|
209
|
+
if ndoi not in by_doi:
|
|
210
|
+
by_doi[ndoi] = p
|
|
211
|
+
else:
|
|
212
|
+
existing = by_doi[ndoi]
|
|
213
|
+
by_doi[ndoi] = _merge_papers(existing, p)
|
|
214
|
+
|
|
215
|
+
return list(by_doi.values()) + no_doi
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _merge_papers(a: Paper, b: Paper) -> Paper:
|
|
219
|
+
"""Merge two Paper records representing the same work."""
|
|
220
|
+
# Pick the longer abstract
|
|
221
|
+
abstract = a.abstract
|
|
222
|
+
if b.abstract and (not abstract or len(b.abstract) > len(abstract)):
|
|
223
|
+
abstract = b.abstract
|
|
224
|
+
|
|
225
|
+
# Union keywords, preserving order
|
|
226
|
+
keywords = list(dict.fromkeys(a.keywords + b.keywords))
|
|
227
|
+
|
|
228
|
+
# Union verified_by
|
|
229
|
+
verified_by = list(dict.fromkeys(a.verified_by + b.verified_by))
|
|
230
|
+
|
|
231
|
+
return Paper(
|
|
232
|
+
source_id=a.source_id,
|
|
233
|
+
title=a.title or b.title,
|
|
234
|
+
abstract=abstract,
|
|
235
|
+
authors=a.authors or b.authors,
|
|
236
|
+
publication_year=a.publication_year or b.publication_year,
|
|
237
|
+
cited_by_count=max(a.cited_by_count, b.cited_by_count),
|
|
238
|
+
source_name=a.source_name or b.source_name,
|
|
239
|
+
doi=a.doi or b.doi,
|
|
240
|
+
keywords=keywords,
|
|
241
|
+
url=a.url or b.url,
|
|
242
|
+
verified_by=verified_by,
|
|
243
|
+
)
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""OpenAlex adapter wrapping the shared client from .scripts/openalex/."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from sources.base import ScholarlySource
|
|
12
|
+
from sources.models import Paper
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
# Import the shared OpenAlex client
|
|
17
|
+
SCRIPTS_DIR = str(Path(__file__).parent.parent.parent / ".scripts" / "openalex")
|
|
18
|
+
if SCRIPTS_DIR not in sys.path:
|
|
19
|
+
sys.path.insert(0, SCRIPTS_DIR)
|
|
20
|
+
|
|
21
|
+
from openalex_client import OpenAlexClient # noqa: E402
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class OpenAlexSource(ScholarlySource):
|
|
25
|
+
"""OpenAlex implementation using the shared client.
|
|
26
|
+
|
|
27
|
+
Always available (no API key needed). Uses polite pool with email.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, client: OpenAlexClient) -> None:
|
|
31
|
+
self._client = client
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def source_name(self) -> str:
|
|
35
|
+
return "OpenAlex"
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def source_key(self) -> str:
|
|
39
|
+
return "openalex"
|
|
40
|
+
|
|
41
|
+
async def search_works(
|
|
42
|
+
self,
|
|
43
|
+
query: str,
|
|
44
|
+
*,
|
|
45
|
+
year_from: int | None = None,
|
|
46
|
+
year_to: int | None = None,
|
|
47
|
+
sort_by: str = "relevance",
|
|
48
|
+
limit: int = 50,
|
|
49
|
+
) -> list[Paper]:
|
|
50
|
+
filter_params: dict[str, str] = {}
|
|
51
|
+
if year_from and year_to:
|
|
52
|
+
filter_params["publication_year"] = f"{year_from}-{year_to}"
|
|
53
|
+
elif year_from:
|
|
54
|
+
filter_params["publication_year"] = f">{year_from - 1}"
|
|
55
|
+
elif year_to:
|
|
56
|
+
filter_params["publication_year"] = f"<{year_to + 1}"
|
|
57
|
+
|
|
58
|
+
sort_param = "cited_by_count:desc"
|
|
59
|
+
if sort_by == "relevance":
|
|
60
|
+
sort_param = "relevance_score:desc"
|
|
61
|
+
elif sort_by == "publication_year":
|
|
62
|
+
sort_param = "publication_date:desc"
|
|
63
|
+
|
|
64
|
+
def _search() -> dict[str, Any]:
|
|
65
|
+
return self._client.search_works(
|
|
66
|
+
search=query,
|
|
67
|
+
filter_params=filter_params if filter_params else None,
|
|
68
|
+
per_page=min(limit, 200),
|
|
69
|
+
sort=sort_param,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
response = await asyncio.to_thread(_search)
|
|
73
|
+
works = response.get("results", [])
|
|
74
|
+
return [self._to_paper(w) for w in works[:limit]]
|
|
75
|
+
|
|
76
|
+
async def verify_doi(self, doi: str) -> Paper | None:
|
|
77
|
+
if not doi.startswith("https://doi.org/"):
|
|
78
|
+
doi = f"https://doi.org/{doi}"
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
work = await asyncio.to_thread(self._client.get_entity, "works", doi)
|
|
82
|
+
if work and work.get("id"):
|
|
83
|
+
return self._to_paper(work)
|
|
84
|
+
except Exception:
|
|
85
|
+
logger.debug("OpenAlex DOI lookup failed for: %s", doi)
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
async def batch_verify_dois(self, dois: list[str]) -> dict[str, Paper | None]:
|
|
89
|
+
# Normalize DOIs
|
|
90
|
+
normalized = []
|
|
91
|
+
for d in dois:
|
|
92
|
+
if not d.startswith("https://doi.org/"):
|
|
93
|
+
d = f"https://doi.org/{d}"
|
|
94
|
+
normalized.append(d)
|
|
95
|
+
|
|
96
|
+
results: dict[str, Paper | None] = {d: None for d in dois}
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
works = await asyncio.to_thread(
|
|
100
|
+
self._client.batch_lookup, "works", normalized, "doi"
|
|
101
|
+
)
|
|
102
|
+
for w in works:
|
|
103
|
+
doi_val = w.get("doi", "")
|
|
104
|
+
if doi_val:
|
|
105
|
+
# Match back to original DOI
|
|
106
|
+
for orig in dois:
|
|
107
|
+
norm_orig = orig if orig.startswith("https://doi.org/") else f"https://doi.org/{orig}"
|
|
108
|
+
if doi_val.lower() == norm_orig.lower():
|
|
109
|
+
results[orig] = self._to_paper(w)
|
|
110
|
+
break
|
|
111
|
+
except Exception:
|
|
112
|
+
logger.warning("OpenAlex batch DOI lookup failed")
|
|
113
|
+
|
|
114
|
+
return results
|
|
115
|
+
|
|
116
|
+
async def find_similar_works(
|
|
117
|
+
self,
|
|
118
|
+
text: str,
|
|
119
|
+
*,
|
|
120
|
+
limit: int = 20,
|
|
121
|
+
) -> list[Paper]:
|
|
122
|
+
return await self.search_works(text, sort_by="relevance", limit=limit)
|
|
123
|
+
|
|
124
|
+
@staticmethod
|
|
125
|
+
def _to_paper(work: dict[str, Any]) -> Paper:
|
|
126
|
+
"""Convert OpenAlex work dict to Paper."""
|
|
127
|
+
# Authors
|
|
128
|
+
authors = []
|
|
129
|
+
for authorship in work.get("authorships", []):
|
|
130
|
+
author = authorship.get("author", {})
|
|
131
|
+
name = author.get("display_name", "")
|
|
132
|
+
if name:
|
|
133
|
+
authors.append(name)
|
|
134
|
+
|
|
135
|
+
# Keywords
|
|
136
|
+
keywords = []
|
|
137
|
+
for kw in work.get("keywords", []):
|
|
138
|
+
if isinstance(kw, dict):
|
|
139
|
+
keywords.append(kw.get("keyword", ""))
|
|
140
|
+
elif isinstance(kw, str):
|
|
141
|
+
keywords.append(kw)
|
|
142
|
+
|
|
143
|
+
# Abstract
|
|
144
|
+
abstract = None
|
|
145
|
+
inv_abstract = work.get("abstract_inverted_index")
|
|
146
|
+
if inv_abstract:
|
|
147
|
+
try:
|
|
148
|
+
word_positions = []
|
|
149
|
+
for word, positions in inv_abstract.items():
|
|
150
|
+
for pos in positions:
|
|
151
|
+
word_positions.append((pos, word))
|
|
152
|
+
word_positions.sort()
|
|
153
|
+
abstract = " ".join(w for _, w in word_positions)
|
|
154
|
+
except Exception:
|
|
155
|
+
pass
|
|
156
|
+
|
|
157
|
+
# DOI
|
|
158
|
+
doi = work.get("doi")
|
|
159
|
+
if doi and not doi.startswith("http"):
|
|
160
|
+
doi = f"https://doi.org/{doi}"
|
|
161
|
+
|
|
162
|
+
# Publication year
|
|
163
|
+
pub_year = work.get("publication_year", 0) or 0
|
|
164
|
+
|
|
165
|
+
# Source / journal
|
|
166
|
+
source_name = None
|
|
167
|
+
primary_location = work.get("primary_location") or {}
|
|
168
|
+
source_info = primary_location.get("source") or {}
|
|
169
|
+
source_name = source_info.get("display_name")
|
|
170
|
+
|
|
171
|
+
return Paper(
|
|
172
|
+
source_id=f"openalex:{work.get('id', '')}",
|
|
173
|
+
title=work.get("title", "") or "",
|
|
174
|
+
abstract=abstract,
|
|
175
|
+
authors=authors,
|
|
176
|
+
publication_year=pub_year,
|
|
177
|
+
cited_by_count=work.get("cited_by_count", 0) or 0,
|
|
178
|
+
source_name=source_name,
|
|
179
|
+
doi=doi,
|
|
180
|
+
keywords=keywords,
|
|
181
|
+
url=doi or work.get("id", ""),
|
|
182
|
+
verified_by=["openalex"],
|
|
183
|
+
)
|