@wentorai/research-plugins 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +204 -0
  3. package/curated/analysis/README.md +64 -0
  4. package/curated/domains/README.md +104 -0
  5. package/curated/literature/README.md +53 -0
  6. package/curated/research/README.md +62 -0
  7. package/curated/tools/README.md +87 -0
  8. package/curated/writing/README.md +61 -0
  9. package/index.ts +39 -0
  10. package/mcp-configs/academic-db/ChatSpatial.json +17 -0
  11. package/mcp-configs/academic-db/academia-mcp.json +17 -0
  12. package/mcp-configs/academic-db/academic-paper-explorer.json +17 -0
  13. package/mcp-configs/academic-db/academic-search-mcp-server.json +17 -0
  14. package/mcp-configs/academic-db/agentinterviews-mcp.json +17 -0
  15. package/mcp-configs/academic-db/all-in-mcp.json +17 -0
  16. package/mcp-configs/academic-db/apple-health-mcp.json +17 -0
  17. package/mcp-configs/academic-db/arxiv-latex-mcp.json +17 -0
  18. package/mcp-configs/academic-db/arxiv-mcp-server.json +17 -0
  19. package/mcp-configs/academic-db/bgpt-mcp.json +17 -0
  20. package/mcp-configs/academic-db/biomcp.json +17 -0
  21. package/mcp-configs/academic-db/biothings-mcp.json +17 -0
  22. package/mcp-configs/academic-db/catalysishub-mcp-server.json +17 -0
  23. package/mcp-configs/academic-db/clinicaltrialsgov-mcp-server.json +17 -0
  24. package/mcp-configs/academic-db/deep-research-mcp.json +17 -0
  25. package/mcp-configs/academic-db/dicom-mcp.json +17 -0
  26. package/mcp-configs/academic-db/enrichr-mcp-server.json +17 -0
  27. package/mcp-configs/academic-db/fec-mcp-server.json +17 -0
  28. package/mcp-configs/academic-db/fhir-mcp-server-themomentum.json +17 -0
  29. package/mcp-configs/academic-db/fhir-mcp.json +19 -0
  30. package/mcp-configs/academic-db/gget-mcp.json +17 -0
  31. package/mcp-configs/academic-db/google-researcher-mcp.json +17 -0
  32. package/mcp-configs/academic-db/idea-reality-mcp.json +17 -0
  33. package/mcp-configs/academic-db/legiscan-mcp.json +19 -0
  34. package/mcp-configs/academic-db/lex.json +17 -0
  35. package/mcp-configs/ai-platform/Adaptive-Graph-of-Thoughts-MCP-server.json +17 -0
  36. package/mcp-configs/ai-platform/ai-counsel.json +17 -0
  37. package/mcp-configs/ai-platform/atlas-mcp-server.json +17 -0
  38. package/mcp-configs/ai-platform/counsel-mcp.json +17 -0
  39. package/mcp-configs/ai-platform/cross-llm-mcp.json +17 -0
  40. package/mcp-configs/ai-platform/gptr-mcp.json +17 -0
  41. package/mcp-configs/browser/decipher-research-agent.json +17 -0
  42. package/mcp-configs/browser/deep-research.json +17 -0
  43. package/mcp-configs/browser/everything-claude-code.json +17 -0
  44. package/mcp-configs/browser/gpt-researcher.json +17 -0
  45. package/mcp-configs/browser/heurist-agent-framework.json +17 -0
  46. package/mcp-configs/data-platform/4everland-hosting-mcp.json +17 -0
  47. package/mcp-configs/data-platform/context-keeper.json +17 -0
  48. package/mcp-configs/data-platform/context7.json +19 -0
  49. package/mcp-configs/data-platform/contextstream-mcp.json +17 -0
  50. package/mcp-configs/data-platform/email-mcp.json +17 -0
  51. package/mcp-configs/note-knowledge/ApeRAG.json +17 -0
  52. package/mcp-configs/note-knowledge/In-Memoria.json +17 -0
  53. package/mcp-configs/note-knowledge/agent-memory.json +17 -0
  54. package/mcp-configs/note-knowledge/aimemo.json +17 -0
  55. package/mcp-configs/note-knowledge/biel-mcp.json +19 -0
  56. package/mcp-configs/note-knowledge/cognee.json +17 -0
  57. package/mcp-configs/note-knowledge/context-awesome.json +17 -0
  58. package/mcp-configs/note-knowledge/context-mcp.json +17 -0
  59. package/mcp-configs/note-knowledge/conversation-handoff-mcp.json +17 -0
  60. package/mcp-configs/note-knowledge/cortex.json +17 -0
  61. package/mcp-configs/note-knowledge/devrag.json +17 -0
  62. package/mcp-configs/note-knowledge/easy-obsidian-mcp.json +17 -0
  63. package/mcp-configs/note-knowledge/engram.json +17 -0
  64. package/mcp-configs/note-knowledge/gnosis-mcp.json +17 -0
  65. package/mcp-configs/note-knowledge/graphlit-mcp-server.json +19 -0
  66. package/mcp-configs/reference-mgr/arxiv-cli.json +17 -0
  67. package/mcp-configs/reference-mgr/arxiv-search-mcp.json +17 -0
  68. package/mcp-configs/reference-mgr/chiken.json +17 -0
  69. package/mcp-configs/reference-mgr/claude-scholar.json +17 -0
  70. package/mcp-configs/reference-mgr/devonthink-mcp.json +17 -0
  71. package/mcp-configs/registry.json +447 -0
  72. package/openclaw.plugin.json +21 -0
  73. package/package.json +61 -0
  74. package/skills/analysis/dataviz/color-accessibility-guide/SKILL.md +230 -0
  75. package/skills/analysis/dataviz/geospatial-viz-guide/SKILL.md +218 -0
  76. package/skills/analysis/dataviz/interactive-viz-guide/SKILL.md +287 -0
  77. package/skills/analysis/dataviz/network-visualization-guide/SKILL.md +195 -0
  78. package/skills/analysis/dataviz/publication-figures-guide/SKILL.md +238 -0
  79. package/skills/analysis/dataviz/python-dataviz-guide/SKILL.md +195 -0
  80. package/skills/analysis/econometrics/causal-inference-guide/SKILL.md +197 -0
  81. package/skills/analysis/econometrics/iv-regression-guide/SKILL.md +198 -0
  82. package/skills/analysis/econometrics/panel-data-guide/SKILL.md +274 -0
  83. package/skills/analysis/econometrics/robustness-checks/SKILL.md +250 -0
  84. package/skills/analysis/econometrics/stata-regression/SKILL.md +117 -0
  85. package/skills/analysis/econometrics/time-series-guide/SKILL.md +235 -0
  86. package/skills/analysis/statistics/bayesian-statistics-guide/SKILL.md +221 -0
  87. package/skills/analysis/statistics/hypothesis-testing-guide/SKILL.md +210 -0
  88. package/skills/analysis/statistics/meta-analysis-guide/SKILL.md +206 -0
  89. package/skills/analysis/statistics/nonparametric-tests-guide/SKILL.md +221 -0
  90. package/skills/analysis/statistics/power-analysis-guide/SKILL.md +240 -0
  91. package/skills/analysis/statistics/sem-guide/SKILL.md +231 -0
  92. package/skills/analysis/statistics/survival-analysis-guide/SKILL.md +195 -0
  93. package/skills/analysis/wrangling/missing-data-handling/SKILL.md +224 -0
  94. package/skills/analysis/wrangling/pandas-data-wrangling/SKILL.md +242 -0
  95. package/skills/analysis/wrangling/questionnaire-design-guide/SKILL.md +234 -0
  96. package/skills/analysis/wrangling/text-mining-guide/SKILL.md +225 -0
  97. package/skills/domains/ai-ml/computer-vision-guide/SKILL.md +213 -0
  98. package/skills/domains/ai-ml/deep-learning-papers-guide/SKILL.md +200 -0
  99. package/skills/domains/ai-ml/llm-evaluation-guide/SKILL.md +194 -0
  100. package/skills/domains/ai-ml/prompt-engineering-research/SKILL.md +233 -0
  101. package/skills/domains/ai-ml/reinforcement-learning-guide/SKILL.md +254 -0
  102. package/skills/domains/ai-ml/transformer-architecture-guide/SKILL.md +233 -0
  103. package/skills/domains/biomedical/clinical-research-guide/SKILL.md +232 -0
  104. package/skills/domains/biomedical/clinicaltrials-api/SKILL.md +177 -0
  105. package/skills/domains/biomedical/epidemiology-guide/SKILL.md +200 -0
  106. package/skills/domains/biomedical/genomics-analysis-guide/SKILL.md +270 -0
  107. package/skills/domains/business/market-analysis-guide/SKILL.md +112 -0
  108. package/skills/domains/business/strategic-management-guide/SKILL.md +154 -0
  109. package/skills/domains/chemistry/computational-chemistry-guide/SKILL.md +266 -0
  110. package/skills/domains/chemistry/retrosynthesis-guide/SKILL.md +215 -0
  111. package/skills/domains/cs/algorithms-complexity-guide/SKILL.md +194 -0
  112. package/skills/domains/cs/dblp-api/SKILL.md +129 -0
  113. package/skills/domains/cs/software-engineering-research/SKILL.md +218 -0
  114. package/skills/domains/ecology/biodiversity-data-guide/SKILL.md +296 -0
  115. package/skills/domains/ecology/conservation-biology-guide/SKILL.md +198 -0
  116. package/skills/domains/ecology/gbif-api/SKILL.md +158 -0
  117. package/skills/domains/ecology/inaturalist-api/SKILL.md +173 -0
  118. package/skills/domains/economics/behavioral-economics-guide/SKILL.md +239 -0
  119. package/skills/domains/economics/development-economics-guide/SKILL.md +181 -0
  120. package/skills/domains/economics/fred-api/SKILL.md +189 -0
  121. package/skills/domains/education/curriculum-design-guide/SKILL.md +144 -0
  122. package/skills/domains/education/learning-science-guide/SKILL.md +150 -0
  123. package/skills/domains/finance/financial-data-analysis/SKILL.md +152 -0
  124. package/skills/domains/finance/quantitative-finance-guide/SKILL.md +151 -0
  125. package/skills/domains/geoscience/climate-science-guide/SKILL.md +158 -0
  126. package/skills/domains/geoscience/gis-remote-sensing-guide/SKILL.md +129 -0
  127. package/skills/domains/humanities/digital-humanities-guide/SKILL.md +181 -0
  128. package/skills/domains/humanities/philosophy-research-guide/SKILL.md +148 -0
  129. package/skills/domains/law/courtlistener-api/SKILL.md +213 -0
  130. package/skills/domains/law/legal-research-guide/SKILL.md +250 -0
  131. package/skills/domains/math/linear-algebra-applications/SKILL.md +227 -0
  132. package/skills/domains/math/numerical-methods-guide/SKILL.md +236 -0
  133. package/skills/domains/math/oeis-api/SKILL.md +158 -0
  134. package/skills/domains/pharma/clinical-pharmacology-guide/SKILL.md +165 -0
  135. package/skills/domains/pharma/drug-development-guide/SKILL.md +177 -0
  136. package/skills/domains/physics/computational-physics-guide/SKILL.md +300 -0
  137. package/skills/domains/physics/nasa-ads-api/SKILL.md +150 -0
  138. package/skills/domains/physics/quantum-computing-guide/SKILL.md +234 -0
  139. package/skills/domains/social-science/social-research-methods/SKILL.md +194 -0
  140. package/skills/domains/social-science/survey-research-guide/SKILL.md +182 -0
  141. package/skills/literature/discovery/citation-alert-guide/SKILL.md +154 -0
  142. package/skills/literature/discovery/conference-proceedings-guide/SKILL.md +142 -0
  143. package/skills/literature/discovery/literature-mapping-guide/SKILL.md +175 -0
  144. package/skills/literature/discovery/paper-tracking-guide/SKILL.md +211 -0
  145. package/skills/literature/discovery/rss-paper-feeds/SKILL.md +214 -0
  146. package/skills/literature/discovery/semantic-scholar-recs-guide/SKILL.md +164 -0
  147. package/skills/literature/fulltext/doaj-api/SKILL.md +120 -0
  148. package/skills/literature/fulltext/interlibrary-loan-guide/SKILL.md +163 -0
  149. package/skills/literature/fulltext/open-access-guide/SKILL.md +183 -0
  150. package/skills/literature/fulltext/pmc-oai-api/SKILL.md +184 -0
  151. package/skills/literature/fulltext/preprint-servers-guide/SKILL.md +128 -0
  152. package/skills/literature/fulltext/repository-harvesting-guide/SKILL.md +207 -0
  153. package/skills/literature/fulltext/unpaywall-api/SKILL.md +113 -0
  154. package/skills/literature/metadata/altmetrics-guide/SKILL.md +132 -0
  155. package/skills/literature/metadata/citation-network-guide/SKILL.md +236 -0
  156. package/skills/literature/metadata/crossref-api/SKILL.md +133 -0
  157. package/skills/literature/metadata/datacite-api/SKILL.md +126 -0
  158. package/skills/literature/metadata/doi-resolution-guide/SKILL.md +168 -0
  159. package/skills/literature/metadata/h-index-guide/SKILL.md +183 -0
  160. package/skills/literature/metadata/journal-metrics-guide/SKILL.md +188 -0
  161. package/skills/literature/metadata/opencitations-api/SKILL.md +128 -0
  162. package/skills/literature/metadata/orcid-api/SKILL.md +136 -0
  163. package/skills/literature/metadata/orcid-integration-guide/SKILL.md +178 -0
  164. package/skills/literature/search/arxiv-api/SKILL.md +95 -0
  165. package/skills/literature/search/biorxiv-api/SKILL.md +123 -0
  166. package/skills/literature/search/boolean-search-guide/SKILL.md +199 -0
  167. package/skills/literature/search/citation-chaining-guide/SKILL.md +148 -0
  168. package/skills/literature/search/database-comparison-guide/SKILL.md +100 -0
  169. package/skills/literature/search/europe-pmc-api/SKILL.md +120 -0
  170. package/skills/literature/search/google-scholar-guide/SKILL.md +182 -0
  171. package/skills/literature/search/mesh-terms-guide/SKILL.md +164 -0
  172. package/skills/literature/search/openalex-api/SKILL.md +134 -0
  173. package/skills/literature/search/pubmed-api/SKILL.md +130 -0
  174. package/skills/literature/search/scientify-literature-survey/SKILL.md +203 -0
  175. package/skills/literature/search/semantic-scholar-api/SKILL.md +134 -0
  176. package/skills/literature/search/systematic-search-strategy/SKILL.md +214 -0
  177. package/skills/research/automation/ai-scientist-guide/SKILL.md +228 -0
  178. package/skills/research/automation/data-collection-automation/SKILL.md +248 -0
  179. package/skills/research/automation/research-workflow-automation/SKILL.md +266 -0
  180. package/skills/research/deep-research/meta-synthesis-guide/SKILL.md +174 -0
  181. package/skills/research/deep-research/research-cog/SKILL.md +153 -0
  182. package/skills/research/deep-research/scoping-review-guide/SKILL.md +217 -0
  183. package/skills/research/deep-research/systematic-review-guide/SKILL.md +250 -0
  184. package/skills/research/funding/figshare-api/SKILL.md +163 -0
  185. package/skills/research/funding/grant-writing-guide/SKILL.md +233 -0
  186. package/skills/research/funding/nsf-grant-guide/SKILL.md +206 -0
  187. package/skills/research/funding/open-science-guide/SKILL.md +255 -0
  188. package/skills/research/funding/zenodo-api/SKILL.md +174 -0
  189. package/skills/research/methodology/action-research-guide/SKILL.md +201 -0
  190. package/skills/research/methodology/experimental-design-guide/SKILL.md +236 -0
  191. package/skills/research/methodology/grad-school-guide/SKILL.md +182 -0
  192. package/skills/research/methodology/grounded-theory-guide/SKILL.md +171 -0
  193. package/skills/research/methodology/mixed-methods-guide/SKILL.md +208 -0
  194. package/skills/research/methodology/qualitative-research-guide/SKILL.md +234 -0
  195. package/skills/research/methodology/scientify-idea-generation/SKILL.md +222 -0
  196. package/skills/research/paper-review/paper-reading-assistant/SKILL.md +266 -0
  197. package/skills/research/paper-review/peer-review-guide/SKILL.md +227 -0
  198. package/skills/research/paper-review/rebuttal-writing-guide/SKILL.md +185 -0
  199. package/skills/research/paper-review/scientify-write-review-paper/SKILL.md +209 -0
  200. package/skills/tools/code-exec/jupyter-notebook-guide/SKILL.md +178 -0
  201. package/skills/tools/code-exec/python-reproducibility-guide/SKILL.md +341 -0
  202. package/skills/tools/code-exec/r-reproducibility-guide/SKILL.md +236 -0
  203. package/skills/tools/code-exec/sandbox-execution-guide/SKILL.md +221 -0
  204. package/skills/tools/diagram/mermaid-diagram-guide/SKILL.md +269 -0
  205. package/skills/tools/diagram/plantuml-guide/SKILL.md +397 -0
  206. package/skills/tools/diagram/scientific-illustration-guide/SKILL.md +225 -0
  207. package/skills/tools/document/anystyle-api/SKILL.md +199 -0
  208. package/skills/tools/document/grobid-pdf-parsing/SKILL.md +294 -0
  209. package/skills/tools/document/markdown-academic-guide/SKILL.md +217 -0
  210. package/skills/tools/document/pdf-extraction-guide/SKILL.md +321 -0
  211. package/skills/tools/knowledge-graph/knowledge-graph-construction/SKILL.md +306 -0
  212. package/skills/tools/knowledge-graph/ontology-design-guide/SKILL.md +214 -0
  213. package/skills/tools/knowledge-graph/rag-methodology-guide/SKILL.md +325 -0
  214. package/skills/tools/ocr-translate/formula-recognition-guide/SKILL.md +367 -0
  215. package/skills/tools/ocr-translate/handwriting-recognition-guide/SKILL.md +211 -0
  216. package/skills/tools/ocr-translate/latex-ocr-guide/SKILL.md +204 -0
  217. package/skills/tools/ocr-translate/multilingual-research-guide/SKILL.md +234 -0
  218. package/skills/tools/scraping/academic-web-scraping/SKILL.md +326 -0
  219. package/skills/tools/scraping/api-data-collection-guide/SKILL.md +301 -0
  220. package/skills/tools/scraping/web-scraping-ethics-guide/SKILL.md +250 -0
  221. package/skills/writing/citation/bibtex-management-guide/SKILL.md +246 -0
  222. package/skills/writing/citation/citation-style-guide/SKILL.md +248 -0
  223. package/skills/writing/citation/reference-manager-comparison/SKILL.md +208 -0
  224. package/skills/writing/citation/zotero-api/SKILL.md +188 -0
  225. package/skills/writing/composition/abstract-writing-guide/SKILL.md +188 -0
  226. package/skills/writing/composition/discussion-writing-guide/SKILL.md +194 -0
  227. package/skills/writing/composition/introduction-writing-guide/SKILL.md +194 -0
  228. package/skills/writing/composition/literature-review-writing/SKILL.md +196 -0
  229. package/skills/writing/composition/methods-section-guide/SKILL.md +185 -0
  230. package/skills/writing/composition/response-to-reviewers/SKILL.md +215 -0
  231. package/skills/writing/composition/scientific-writing-guide/SKILL.md +152 -0
  232. package/skills/writing/latex/bibliography-management-guide/SKILL.md +206 -0
  233. package/skills/writing/latex/latex-drawing-guide/SKILL.md +234 -0
  234. package/skills/writing/latex/latex-ecosystem-guide/SKILL.md +240 -0
  235. package/skills/writing/latex/math-typesetting-guide/SKILL.md +231 -0
  236. package/skills/writing/latex/overleaf-collaboration-guide/SKILL.md +211 -0
  237. package/skills/writing/latex/tikz-diagrams-guide/SKILL.md +211 -0
  238. package/skills/writing/polish/academic-translation-guide/SKILL.md +175 -0
  239. package/skills/writing/polish/academic-writing-refiner/SKILL.md +143 -0
  240. package/skills/writing/polish/ai-writing-humanizer/SKILL.md +178 -0
  241. package/skills/writing/polish/grammar-checker-guide/SKILL.md +184 -0
  242. package/skills/writing/polish/plagiarism-detection-guide/SKILL.md +167 -0
  243. package/skills/writing/templates/beamer-presentation-guide/SKILL.md +263 -0
  244. package/skills/writing/templates/conference-paper-template/SKILL.md +219 -0
  245. package/skills/writing/templates/thesis-template-guide/SKILL.md +200 -0
  246. package/skills/writing/templates/thesis-writing-guide/SKILL.md +220 -0
  247. package/src/tools/arxiv.ts +131 -0
  248. package/src/tools/crossref.ts +112 -0
  249. package/src/tools/openalex.ts +174 -0
  250. package/src/tools/pubmed.ts +166 -0
  251. package/src/tools/semantic-scholar.ts +108 -0
  252. package/src/tools/unpaywall.ts +58 -0
@@ -0,0 +1,306 @@
1
+ ---
2
+ name: knowledge-graph-construction
3
+ description: "Build research knowledge graphs for literature synthesis and RAG systems"
4
+ metadata:
5
+ openclaw:
6
+ emoji: "🗺"
7
+ category: "tools"
8
+ subcategory: "knowledge-graph"
9
+ keywords: ["knowledge graph", "knowledge modeling", "ontology", "RAG", "retrieval augmented generation"]
10
+ source: "N/A"
11
+ ---
12
+
13
+ # Knowledge Graph Construction Guide
14
+
15
+ ## Overview
16
+
17
+ Knowledge graphs (KGs) organize information as networks of entities and relationships, making them powerful tools for research synthesis, literature exploration, and AI-augmented retrieval. In academic contexts, knowledge graphs can represent relationships between papers, authors, methods, datasets, findings, and concepts -- enabling queries like "Which methods have been applied to dataset X?" or "What are the common limitations reported across studies of Y?"
18
+
19
+ This guide covers building knowledge graphs for research applications: defining schemas (ontologies), extracting entities and relations from text, storing and querying graph data, and integrating knowledge graphs with Retrieval Augmented Generation (RAG) systems for AI-powered research assistants.
20
+
21
+ Whether you are building a personal research knowledge base, constructing a domain-specific literature graph, or developing a RAG system for an academic chatbot, these patterns provide a solid foundation.
22
+
23
+ ## Knowledge Graph Fundamentals
24
+
25
+ ### Core Components
26
+
27
+ | Component | Definition | Research Example |
28
+ |-----------|-----------|-----------------|
29
+ | Entity (Node) | A distinct concept or object | Paper, Author, Method, Dataset |
30
+ | Relation (Edge) | A typed connection between entities | "cites", "uses_method", "evaluates_on" |
31
+ | Property | An attribute of an entity or relation | Paper.year, Author.affiliation |
32
+ | Ontology/Schema | Formal definition of entity and relation types | Research ontology defining valid types |
33
+
34
+ ### Designing a Research Ontology
35
+
36
+ ```yaml
37
+ # research_ontology.yaml
38
+ entities:
39
+ Paper:
40
+ properties: [title, year, doi, abstract, venue]
41
+ Author:
42
+ properties: [name, affiliation, orcid]
43
+ Method:
44
+ properties: [name, description, category]
45
+ Dataset:
46
+ properties: [name, domain, size, url]
47
+ Finding:
48
+ properties: [description, metric, value, significance]
49
+ Concept:
50
+ properties: [name, definition, domain]
51
+
52
+ relations:
53
+ CITES:
54
+ from: Paper
55
+ to: Paper
56
+ AUTHORED_BY:
57
+ from: Paper
58
+ to: Author
59
+ USES_METHOD:
60
+ from: Paper
61
+ to: Method
62
+ EVALUATES_ON:
63
+ from: Paper
64
+ to: Dataset
65
+ REPORTS_FINDING:
66
+ from: Paper
67
+ to: Finding
68
+ RELATED_TO:
69
+ from: Concept
70
+ to: Concept
71
+ INTRODUCES:
72
+ from: Paper
73
+ to: Method
74
+ ```
75
+
76
+ ## Entity and Relation Extraction
77
+
78
+ ### LLM-Based Extraction
79
+
80
+ Using a large language model to extract structured knowledge from paper abstracts:
81
+
82
+ ```python
83
+ import json
84
+ from openai import OpenAI
85
+
86
+ client = OpenAI()
87
+
88
+ EXTRACTION_PROMPT = """Extract entities and relationships from this research paper abstract.
89
+
90
+ Return JSON with:
91
+ - entities: list of {type, name, properties}
92
+ - relations: list of {source, relation, target}
93
+
94
+ Entity types: Paper, Method, Dataset, Finding, Concept
95
+ Relation types: USES_METHOD, EVALUATES_ON, REPORTS_FINDING, RELATED_TO, INTRODUCES
96
+
97
+ Abstract: {abstract}
98
+
99
+ Respond ONLY with valid JSON."""
100
+
101
+ def extract_from_abstract(abstract, paper_title):
102
+ response = client.chat.completions.create(
103
+ model="gpt-4o",
104
+ messages=[
105
+ {"role": "system", "content": "You are a research knowledge extraction system."},
106
+ {"role": "user", "content": EXTRACTION_PROMPT.format(abstract=abstract)}
107
+ ],
108
+ response_format={"type": "json_object"},
109
+ temperature=0
110
+ )
111
+
112
+ result = json.loads(response.choices[0].message.content)
113
+
114
+ # Add the paper itself as an entity
115
+ result['entities'].insert(0, {
116
+ 'type': 'Paper',
117
+ 'name': paper_title,
118
+ 'properties': {'abstract': abstract[:200]}
119
+ })
120
+
121
+ return result
122
+ ```
123
+
124
+ ### SpaCy + Custom NER for Domain-Specific Extraction
125
+
126
+ ```python
127
+ import spacy
128
+ from spacy.tokens import Span
129
+
130
+ nlp = spacy.load("en_core_web_trf")
131
+
132
+ # Register custom entity types
133
+ @spacy.Language.component("research_entities")
134
+ def research_entity_component(doc):
135
+ # Pattern-based recognition for methods
136
+ method_patterns = [
137
+ "random forest", "gradient boosting", "neural network",
138
+ "transformer", "attention mechanism", "BERT", "GPT",
139
+ "convolutional", "recurrent", "GAN"
140
+ ]
141
+
142
+ new_ents = list(doc.ents)
143
+ for token in doc:
144
+ for pattern in method_patterns:
145
+ if pattern.lower() in doc[token.i:token.i+3].text.lower():
146
+ span = doc.char_span(token.idx, token.idx + len(pattern),
147
+ label="METHOD")
148
+ if span and span not in new_ents:
149
+ new_ents.append(span)
150
+ doc.ents = spacy.util.filter_spans(new_ents)
151
+ return doc
152
+
153
+ nlp.add_pipe("research_entities", after="ner")
154
+ ```
155
+
156
+ ## Graph Storage and Querying
157
+
158
+ ### Neo4j (Production)
159
+
160
+ ```python
161
+ from neo4j import GraphDatabase
162
+
163
+ class ResearchGraph:
164
+ def __init__(self, uri, user, password):
165
+ self.driver = GraphDatabase.driver(uri, auth=(user, password))
166
+
167
+ def add_paper(self, paper):
168
+ with self.driver.session() as session:
169
+ session.run("""
170
+ MERGE (p:Paper {doi: $doi})
171
+ SET p.title = $title, p.year = $year, p.abstract = $abstract
172
+ """, **paper)
173
+
174
+ def add_citation(self, citing_doi, cited_doi):
175
+ with self.driver.session() as session:
176
+ session.run("""
177
+ MATCH (a:Paper {doi: $citing})
178
+ MATCH (b:Paper {doi: $cited})
179
+ MERGE (a)-[:CITES]->(b)
180
+ """, citing=citing_doi, cited=cited_doi)
181
+
182
+ def add_method_usage(self, paper_doi, method_name):
183
+ with self.driver.session() as session:
184
+ session.run("""
185
+ MATCH (p:Paper {doi: $doi})
186
+ MERGE (m:Method {name: $method})
187
+ MERGE (p)-[:USES_METHOD]->(m)
188
+ """, doi=paper_doi, method=method_name)
189
+
190
+ def find_papers_using_method(self, method_name):
191
+ with self.driver.session() as session:
192
+ result = session.run("""
193
+ MATCH (p:Paper)-[:USES_METHOD]->(m:Method {name: $method})
194
+ RETURN p.title AS title, p.year AS year, p.doi AS doi
195
+ ORDER BY p.year DESC
196
+ """, method=method_name)
197
+ return [dict(record) for record in result]
198
+
199
+ def find_common_methods(self, doi1, doi2):
200
+ with self.driver.session() as session:
201
+ result = session.run("""
202
+ MATCH (p1:Paper {doi: $doi1})-[:USES_METHOD]->(m:Method)
203
+ <-[:USES_METHOD]-(p2:Paper {doi: $doi2})
204
+ RETURN m.name AS method
205
+ """, doi1=doi1, doi2=doi2)
206
+ return [record['method'] for record in result]
207
+ ```
208
+
209
+ ### NetworkX (Lightweight / Prototyping)
210
+
211
+ ```python
212
+ import networkx as nx
213
+ import json
214
+
215
+ def build_research_graph(extracted_data_list):
216
+ """Build a NetworkX graph from extracted paper data."""
217
+ G = nx.MultiDiGraph()
218
+
219
+ for data in extracted_data_list:
220
+ for entity in data['entities']:
221
+ G.add_node(
222
+ entity['name'],
223
+ type=entity['type'],
224
+ **entity.get('properties', {})
225
+ )
226
+
227
+ for rel in data['relations']:
228
+ G.add_edge(
229
+ rel['source'],
230
+ rel['target'],
231
+ relation=rel['relation']
232
+ )
233
+
234
+ return G
235
+
236
+ # Query the graph
237
+ def get_method_landscape(G):
238
+ """Find which methods are most used across papers."""
239
+ methods = [n for n, d in G.nodes(data=True) if d.get('type') == 'Method']
240
+ method_usage = {}
241
+ for method in methods:
242
+ papers = [n for n in G.predecessors(method)
243
+ if G.nodes[n].get('type') == 'Paper']
244
+ method_usage[method] = len(papers)
245
+ return sorted(method_usage.items(), key=lambda x: x[1], reverse=True)
246
+ ```
247
+
248
+ ## Knowledge Graph + RAG Integration
249
+
250
+ Combining knowledge graphs with retrieval augmented generation creates powerful research assistants:
251
+
252
+ ```python
253
+ def kg_rag_query(question, graph, embedding_model, llm):
254
+ """Answer a research question using KG-enhanced RAG."""
255
+
256
+ # Step 1: Extract entities from the question
257
+ question_entities = extract_entities(question)
258
+
259
+ # Step 2: Retrieve relevant subgraph
260
+ subgraph_nodes = set()
261
+ for entity in question_entities:
262
+ if entity in graph:
263
+ # Get 2-hop neighborhood
264
+ neighbors = nx.ego_graph(graph, entity, radius=2)
265
+ subgraph_nodes.update(neighbors.nodes())
266
+
267
+ # Step 3: Format context from subgraph
268
+ context_parts = []
269
+ for node in subgraph_nodes:
270
+ node_data = graph.nodes[node]
271
+ edges = list(graph.edges(node, data=True))
272
+ context_parts.append(
273
+ f"{node} ({node_data.get('type', 'Unknown')}): "
274
+ f"{', '.join(f'{e[2].get(\"relation\", \"related_to\")} {e[1]}' for e in edges[:5])}"
275
+ )
276
+ context = '\n'.join(context_parts[:20])
277
+
278
+ # Step 4: Generate answer with LLM
279
+ prompt = f"""Based on the following knowledge graph context, answer the question.
280
+
281
+ Context:
282
+ {context}
283
+
284
+ Question: {question}
285
+
286
+ Provide a detailed answer citing specific papers, methods, and findings from the context."""
287
+
288
+ return llm.generate(prompt)
289
+ ```
290
+
291
+ ## Best Practices
292
+
293
+ - **Start with a clear schema.** Define your entity types and relations before extracting data. A schema change later requires re-processing.
294
+ - **Use persistent identifiers.** DOIs for papers, ORCIDs for authors, and canonical names for methods prevent duplicate nodes.
295
+ - **Validate extracted triples.** LLM extraction is imperfect. Sample and manually verify 5-10% of extractions.
296
+ - **Enrich with external data.** Link your KG to OpenAlex, Semantic Scholar, or Wikidata for additional metadata.
297
+ - **Version your graph.** Export snapshots regularly and track changes over time.
298
+ - **Design queries before building.** Know what questions you want to answer before deciding on the schema.
299
+
300
+ ## References
301
+
302
+ - [Neo4j Documentation](https://neo4j.com/docs/) -- Graph database
303
+ - [NetworkX Documentation](https://networkx.org/) -- Python graph library
304
+ - [OpenAlex API](https://docs.openalex.org/) -- Open bibliographic data
305
+ - [LlamaIndex Knowledge Graph Guide](https://docs.llamaindex.ai/) -- KG-RAG integration
306
+ - [Graphiti](https://github.com/getzep/graphiti) -- Temporal knowledge graph library
@@ -0,0 +1,214 @@
1
+ ---
2
+ name: ontology-design-guide
3
+ description: "Design ontologies and knowledge graphs for research data modeling"
4
+ metadata:
5
+ openclaw:
6
+ emoji: "link"
7
+ category: "tools"
8
+ subcategory: "knowledge-graph"
9
+ keywords: ["ontology", "knowledge graph", "RDF", "OWL", "semantic web", "data modeling", "linked data"]
10
+ source: "wentor-research-plugins"
11
+ ---
12
+
13
+ # Ontology Design Guide
14
+
15
+ A skill for designing ontologies and knowledge graphs to model research domain knowledge. Covers ontology engineering methodologies, OWL and RDF basics, reusing existing ontologies, and practical tools for building, validating, and querying knowledge graphs.
16
+
17
+ ## What Is an Ontology?
18
+
19
+ ### Definitions and Purpose
20
+
21
+ ```
22
+ An ontology is a formal, explicit specification of a shared
23
+ conceptualization. In practical terms, it defines:
24
+
25
+ - Classes: Categories of things (e.g., Gene, Disease, Drug)
26
+ - Properties: Relationships between things (e.g., causes, treats)
27
+ - Individuals: Specific instances (e.g., TP53, Breast Cancer)
28
+ - Axioms: Rules and constraints (e.g., every Drug has exactly
29
+ one molecular formula)
30
+
31
+ Purpose in research:
32
+ - Standardize terminology across research groups
33
+ - Enable data integration from heterogeneous sources
34
+ - Support automated reasoning and inference
35
+ - Facilitate knowledge discovery through graph queries
36
+ - Provide machine-readable domain models
37
+ ```
38
+
39
+ ### Ontology vs. Taxonomy vs. Knowledge Graph
40
+
41
+ ```
42
+ Taxonomy: Hierarchical classification (is-a relationships only)
43
+ Example: Animal > Mammal > Primate > Human
44
+
45
+ Ontology: Formal model with classes, properties, and axioms
46
+ Supports reasoning (e.g., if X treats Y and Y is-a Disease,
47
+ then X is a DrugCandidate)
48
+
49
+ Knowledge Graph: An ontology populated with instance data
50
+ Millions of triples: (subject, predicate, object)
51
+ Examples: Wikidata, DBpedia, Google Knowledge Graph
52
+ ```
53
+
54
+ ## Ontology Engineering Process
55
+
56
+ ### Methodology Overview
57
+
58
+ ```python
59
+ def ontology_design_process(domain: str) -> dict:
60
+ """
61
+ Steps for designing a domain ontology.
62
+
63
+ Args:
64
+ domain: The research domain to model
65
+ """
66
+ return {
67
+ "step_1_scope": {
68
+ "description": "Define scope and competency questions",
69
+ "questions": [
70
+ "What domain does the ontology cover?",
71
+ "What questions should the ontology be able to answer?",
72
+ "Who will use it and for what purpose?"
73
+ ],
74
+ "example": (
75
+ "Domain: Drug-disease interactions. "
76
+ "Competency question: 'What drugs target proteins "
77
+ "associated with Alzheimer disease?'"
78
+ )
79
+ },
80
+ "step_2_reuse": {
81
+ "description": "Search for existing ontologies to reuse",
82
+ "resources": [
83
+ "BioPortal (bioportal.bioontology.org) -- biomedical ontologies",
84
+ "Linked Open Vocabularies (lov.linkeddata.es) -- general",
85
+ "OBO Foundry (obofoundry.org) -- life sciences",
86
+ "Schema.org -- web-scale vocabulary"
87
+ ]
88
+ },
89
+ "step_3_enumerate": {
90
+ "description": "List key terms, concepts, and relationships",
91
+ "method": "Brainstorm with domain experts; review literature"
92
+ },
93
+ "step_4_model": {
94
+ "description": "Define class hierarchy and properties",
95
+ "tools": ["Protege", "WebVOWL", "TopBraid Composer"]
96
+ },
97
+ "step_5_formalize": {
98
+ "description": "Encode in OWL/RDF with axioms and constraints"
99
+ },
100
+ "step_6_validate": {
101
+ "description": "Test against competency questions and real data",
102
+ "methods": ["SPARQL queries", "Reasoner (HermiT, Pellet)", "Unit tests"]
103
+ },
104
+ "step_7_publish": {
105
+ "description": "Publish with persistent URI and documentation",
106
+ "best_practice": "Use w3id.org or purl.org for persistent identifiers"
107
+ }
108
+ }
109
+ ```
110
+
111
+ ## RDF and OWL Basics
112
+
113
+ ### RDF Triple Model
114
+
115
+ ```
116
+ RDF (Resource Description Framework) represents knowledge as triples:
117
+
118
+ (Subject, Predicate, Object)
119
+
120
+ Examples:
121
+ (:Aspirin, :treats, :Headache)
122
+ (:TP53, rdf:type, :Gene)
123
+ (:TP53, :associatedWith, :BreastCancer)
124
+ (:Aspirin, :hasChemicalFormula, "C9H8O4")
125
+
126
+ Serialization formats:
127
+ - Turtle (.ttl): Human-readable, most common for authoring
128
+ - JSON-LD (.jsonld): Web-friendly, API-compatible
129
+ - RDF/XML (.rdf): Verbose, legacy format
130
+ - N-Triples (.nt): Simple, good for large datasets
131
+ ```
132
+
133
+ ### Turtle Syntax Example
134
+
135
+ ```turtle
136
+ @prefix : <http://example.org/research#> .
137
+ @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
138
+ @prefix owl: <http://www.w3.org/2002/07/owl#> .
139
+
140
+ # Classes
141
+ :Gene a owl:Class ;
142
+ rdfs:label "Gene" ;
143
+ rdfs:comment "A unit of heredity in a living organism." .
144
+
145
+ :Disease a owl:Class ;
146
+ rdfs:label "Disease" .
147
+
148
+ :Drug a owl:Class ;
149
+ rdfs:label "Drug" .
150
+
151
+ # Properties
152
+ :associatedWith a owl:ObjectProperty ;
153
+ rdfs:domain :Gene ;
154
+ rdfs:range :Disease .
155
+
156
+ :treats a owl:ObjectProperty ;
157
+ rdfs:domain :Drug ;
158
+ rdfs:range :Disease .
159
+
160
+ # Individuals
161
+ :TP53 a :Gene ;
162
+ rdfs:label "TP53" ;
163
+ :associatedWith :BreastCancer .
164
+
165
+ :BreastCancer a :Disease ;
166
+ rdfs:label "Breast Cancer" .
167
+ ```
168
+
169
+ ## Querying with SPARQL
170
+
171
+ ### Basic SPARQL Queries
172
+
173
+ ```sparql
174
+ # Find all genes associated with Breast Cancer
175
+ SELECT ?gene ?geneLabel
176
+ WHERE {
177
+ ?gene a :Gene .
178
+ ?gene :associatedWith :BreastCancer .
179
+ ?gene rdfs:label ?geneLabel .
180
+ }
181
+
182
+ # Find drugs that treat diseases associated with gene TP53
183
+ SELECT ?drug ?disease
184
+ WHERE {
185
+ :TP53 :associatedWith ?disease .
186
+ ?drug :treats ?disease .
187
+ }
188
+
189
+ # Count diseases per gene
190
+ SELECT ?gene (COUNT(?disease) AS ?diseaseCount)
191
+ WHERE {
192
+ ?gene a :Gene .
193
+ ?gene :associatedWith ?disease .
194
+ }
195
+ GROUP BY ?gene
196
+ ORDER BY DESC(?diseaseCount)
197
+ ```
198
+
199
+ ## Tools and Software
200
+
201
+ ### Ontology Development
202
+
203
+ | Tool | Type | Best For |
204
+ |------|------|---------|
205
+ | Protege | Desktop IDE | Full ontology development and reasoning |
206
+ | WebVOWL | Web viewer | Visualizing ontology structure |
207
+ | RDFLib (Python) | Library | Programmatic RDF manipulation |
208
+ | Apache Jena | Framework | SPARQL endpoint and reasoning |
209
+ | Neo4j | Graph database | Property graph modeling (not RDF) |
210
+ | Blazegraph/GraphDB | Triplestore | Storing and querying RDF data |
211
+
212
+ ## Design Principles
213
+
214
+ Follow the FAIR principles (Findable, Accessible, Interoperable, Reusable) when publishing ontologies. Reuse existing terms from established ontologies before creating new ones. Document every class and property with labels, definitions, and examples. Use a reasoner to check logical consistency. Version your ontology and maintain a changelog. Publish both human-readable documentation (HTML) and machine-readable files (OWL/TTL) at a persistent URI.