pi-skill-search 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. package/CHANGELOG.md +20 -0
  2. package/LICENSE +21 -0
  3. package/README.md +97 -0
  4. package/index.ts +163 -0
  5. package/package.json +48 -0
  6. package/skills/adaptyv/SKILL.md +92 -0
  7. package/skills/add-community-extension/SKILL.md +85 -0
  8. package/skills/aeon/SKILL.md +111 -0
  9. package/skills/ai-slop-cleaner/SKILL.md +118 -0
  10. package/skills/anndata/SKILL.md +83 -0
  11. package/skills/arboreto/SKILL.md +107 -0
  12. package/skills/ask/SKILL.md +55 -0
  13. package/skills/astropy/SKILL.md +30 -0
  14. package/skills/async-worker-recovery/SKILL.md +44 -0
  15. package/skills/autopilot/SKILL.md +63 -0
  16. package/skills/autoresearch/SKILL.md +64 -0
  17. package/skills/autoskill/SKILL.md +116 -0
  18. package/skills/babysit/SKILL.md +43 -0
  19. package/skills/benchling-integration/SKILL.md +106 -0
  20. package/skills/bgpt-paper-search/SKILL.md +67 -0
  21. package/skills/biopython/SKILL.md +29 -0
  22. package/skills/bioservices/SKILL.md +96 -0
  23. package/skills/brainstorming/SKILL.md +104 -0
  24. package/skills/cancel/SKILL.md +85 -0
  25. package/skills/ccg/SKILL.md +87 -0
  26. package/skills/celery-pipeline/SKILL.md +30 -0
  27. package/skills/cellxgene-census/SKILL.md +104 -0
  28. package/skills/child-pi-spawning/SKILL.md +85 -0
  29. package/skills/cirq/SKILL.md +113 -0
  30. package/skills/citation-management/SKILL.md +91 -0
  31. package/skills/clinical-decision-support/SKILL.md +117 -0
  32. package/skills/clinical-reports/SKILL.md +118 -0
  33. package/skills/clinical-trial/SKILL.md +28 -0
  34. package/skills/cobrapy/SKILL.md +116 -0
  35. package/skills/configure-notifications/SKILL.md +85 -0
  36. package/skills/consciousness-council/SKILL.md +120 -0
  37. package/skills/context-artifact-hygiene/SKILL.md +85 -0
  38. package/skills/context-mode-ops/SKILL.md +87 -0
  39. package/skills/dask/SKILL.md +85 -0
  40. package/skills/database-lookup/SKILL.md +118 -0
  41. package/skills/datamol/SKILL.md +108 -0
  42. package/skills/debug/SKILL.md +32 -0
  43. package/skills/deep-dive/SKILL.md +114 -0
  44. package/skills/deep-interview/SKILL.md +90 -0
  45. package/skills/deepchem/SKILL.md +117 -0
  46. package/skills/deepinit/SKILL.md +100 -0
  47. package/skills/deeptools/SKILL.md +118 -0
  48. package/skills/delegation-patterns/SKILL.md +56 -0
  49. package/skills/depmap/SKILL.md +94 -0
  50. package/skills/dhdna-profiler/SKILL.md +86 -0
  51. package/skills/diffdock/SKILL.md +101 -0
  52. package/skills/dispatching-parallel-agents/SKILL.md +119 -0
  53. package/skills/dnanexus-integration/SKILL.md +118 -0
  54. package/skills/do/SKILL.md +48 -0
  55. package/skills/docker-sandbox/SKILL.md +29 -0
  56. package/skills/docx/SKILL.md +119 -0
  57. package/skills/esm/SKILL.md +116 -0
  58. package/skills/etetoolkit/SKILL.md +103 -0
  59. package/skills/event-log-tracing/SKILL.md +85 -0
  60. package/skills/exa-search/SKILL.md +72 -0
  61. package/skills/executing-plans/SKILL.md +69 -0
  62. package/skills/exploratory-data-analysis/SKILL.md +118 -0
  63. package/skills/external-context/SKILL.md +80 -0
  64. package/skills/fastapi/SKILL.md +30 -0
  65. package/skills/finishing-a-development-branch/SKILL.md +106 -0
  66. package/skills/flowio/SKILL.md +114 -0
  67. package/skills/fluidsim/SKILL.md +108 -0
  68. package/skills/generate-image/SKILL.md +108 -0
  69. package/skills/geniml/SKILL.md +117 -0
  70. package/skills/geomaster/SKILL.md +109 -0
  71. package/skills/geopandas/SKILL.md +114 -0
  72. package/skills/get-available-resources/SKILL.md +100 -0
  73. package/skills/gget/SKILL.md +111 -0
  74. package/skills/ginkgo-cloud-lab/SKILL.md +52 -0
  75. package/skills/git-master/SKILL.md +85 -0
  76. package/skills/glycoengineering/SKILL.md +104 -0
  77. package/skills/gtars/SKILL.md +104 -0
  78. package/skills/hackernews-frontpage/SKILL.md +46 -0
  79. package/skills/histolab/SKILL.md +98 -0
  80. package/skills/how-it-works/SKILL.md +25 -0
  81. package/skills/hud/SKILL.md +86 -0
  82. package/skills/hugging-science/SKILL.md +93 -0
  83. package/skills/huggingface/SKILL.md +30 -0
  84. package/skills/hypogenic/SKILL.md +107 -0
  85. package/skills/hypothesis-generation/SKILL.md +118 -0
  86. package/skills/imaging-data-commons/SKILL.md +119 -0
  87. package/skills/infographics/SKILL.md +102 -0
  88. package/skills/iso-13485-certification/SKILL.md +114 -0
  89. package/skills/knowledge-agent/SKILL.md +83 -0
  90. package/skills/labarchive-integration/SKILL.md +98 -0
  91. package/skills/lamindb/SKILL.md +119 -0
  92. package/skills/landsat/SKILL.md +29 -0
  93. package/skills/latchbio-integration/SKILL.md +118 -0
  94. package/skills/latex-posters/SKILL.md +112 -0
  95. package/skills/learn-codebase/SKILL.md +24 -0
  96. package/skills/learner/SKILL.md +118 -0
  97. package/skills/literature-review/SKILL.md +118 -0
  98. package/skills/live-agent-lifecycle/SKILL.md +85 -0
  99. package/skills/mailbox-interactive/SKILL.md +85 -0
  100. package/skills/make-plan/SKILL.md +59 -0
  101. package/skills/markdown-mermaid-writing/SKILL.md +118 -0
  102. package/skills/market-research-reports/SKILL.md +119 -0
  103. package/skills/markitdown/SKILL.md +111 -0
  104. package/skills/markitdown-docs/SKILL.md +28 -0
  105. package/skills/matchms/SKILL.md +91 -0
  106. package/skills/matlab/SKILL.md +118 -0
  107. package/skills/matplotlib/SKILL.md +30 -0
  108. package/skills/mcp-setup/SKILL.md +84 -0
  109. package/skills/medchem/SKILL.md +109 -0
  110. package/skills/mem-search/SKILL.md +96 -0
  111. package/skills/modal/SKILL.md +104 -0
  112. package/skills/model-routing-context/SKILL.md +85 -0
  113. package/skills/molecular-dynamics/SKILL.md +116 -0
  114. package/skills/molfeat/SKILL.md +110 -0
  115. package/skills/multi-perspective-review/SKILL.md +85 -0
  116. package/skills/networkx/SKILL.md +111 -0
  117. package/skills/neurokit2/SKILL.md +114 -0
  118. package/skills/neuropixels-analysis/SKILL.md +112 -0
  119. package/skills/nilearn/SKILL.md +29 -0
  120. package/skills/observability-reliability/SKILL.md +43 -0
  121. package/skills/omc-doctor/SKILL.md +86 -0
  122. package/skills/omc-reference/SKILL.md +119 -0
  123. package/skills/omc-setup/SKILL.md +82 -0
  124. package/skills/omc-teams/SKILL.md +81 -0
  125. package/skills/omero-integration/SKILL.md +111 -0
  126. package/skills/open-notebook/SKILL.md +100 -0
  127. package/skills/openephys/SKILL.md +28 -0
  128. package/skills/opentrons-integration/SKILL.md +110 -0
  129. package/skills/optimize-for-gpu/SKILL.md +119 -0
  130. package/skills/orchestration/SKILL.md +85 -0
  131. package/skills/ownership-session-security/SKILL.md +43 -0
  132. package/skills/paper-lookup/SKILL.md +119 -0
  133. package/skills/paperzilla/SKILL.md +114 -0
  134. package/skills/parallel-web/SKILL.md +64 -0
  135. package/skills/pathfinder/SKILL.md +114 -0
  136. package/skills/pathml/SKILL.md +98 -0
  137. package/skills/pdf/SKILL.md +113 -0
  138. package/skills/peer-review/SKILL.md +119 -0
  139. package/skills/pennylane/SKILL.md +119 -0
  140. package/skills/phylogenetics/SKILL.md +102 -0
  141. package/skills/pi-extension-lifecycle/SKILL.md +41 -0
  142. package/skills/plan/SKILL.md +66 -0
  143. package/skills/polars/SKILL.md +114 -0
  144. package/skills/polars-bio/SKILL.md +84 -0
  145. package/skills/pptx/SKILL.md +118 -0
  146. package/skills/pptx-posters/SKILL.md +112 -0
  147. package/skills/primekg/SKILL.md +97 -0
  148. package/skills/project-session-manager/SKILL.md +85 -0
  149. package/skills/protocolsio-integration/SKILL.md +119 -0
  150. package/skills/pubmed-search/SKILL.md +29 -0
  151. package/skills/pufferlib/SKILL.md +103 -0
  152. package/skills/pydeseq2/SKILL.md +106 -0
  153. package/skills/pydicom/SKILL.md +115 -0
  154. package/skills/pyhealth/SKILL.md +117 -0
  155. package/skills/pylabrobot/SKILL.md +100 -0
  156. package/skills/pymatgen/SKILL.md +28 -0
  157. package/skills/pymc/SKILL.md +108 -0
  158. package/skills/pymoo/SKILL.md +90 -0
  159. package/skills/pyopenms/SKILL.md +119 -0
  160. package/skills/pysam/SKILL.md +118 -0
  161. package/skills/pyspark/SKILL.md +30 -0
  162. package/skills/pytdc/SKILL.md +102 -0
  163. package/skills/pytorch/SKILL.md +31 -0
  164. package/skills/pytorch-lightning/SKILL.md +119 -0
  165. package/skills/pyzotero/SKILL.md +104 -0
  166. package/skills/qiskit/SKILL.md +119 -0
  167. package/skills/qutip/SKILL.md +111 -0
  168. package/skills/ralph/SKILL.md +23 -0
  169. package/skills/ralplan/SKILL.md +105 -0
  170. package/skills/rdflib/SKILL.md +29 -0
  171. package/skills/rdkit/SKILL.md +30 -0
  172. package/skills/read-only-explorer/SKILL.md +85 -0
  173. package/skills/receiving-code-review/SKILL.md +103 -0
  174. package/skills/release/SKILL.md +117 -0
  175. package/skills/remember/SKILL.md +39 -0
  176. package/skills/requesting-code-review/SKILL.md +85 -0
  177. package/skills/requirements-to-task-packet/SKILL.md +65 -0
  178. package/skills/research-grants/SKILL.md +118 -0
  179. package/skills/research-lookup/SKILL.md +117 -0
  180. package/skills/research-reproducibility/SKILL.md +28 -0
  181. package/skills/resource-discovery-config/SKILL.md +43 -0
  182. package/skills/rowan/SKILL.md +100 -0
  183. package/skills/runtime-state-reader/SKILL.md +46 -0
  184. package/skills/safe-bash/SKILL.md +85 -0
  185. package/skills/scanpy/SKILL.md +32 -0
  186. package/skills/scholar-evaluation/SKILL.md +115 -0
  187. package/skills/scientific-brainstorming/SKILL.md +118 -0
  188. package/skills/scientific-critical-thinking/SKILL.md +119 -0
  189. package/skills/scientific-schematics/SKILL.md +116 -0
  190. package/skills/scientific-slides/SKILL.md +117 -0
  191. package/skills/scientific-visualization/SKILL.md +109 -0
  192. package/skills/scientific-writing/SKILL.md +119 -0
  193. package/skills/scikit-bio/SKILL.md +92 -0
  194. package/skills/scikit-learn/SKILL.md +99 -0
  195. package/skills/scikit-survival/SKILL.md +110 -0
  196. package/skills/sciomc/SKILL.md +86 -0
  197. package/skills/scvelo/SKILL.md +106 -0
  198. package/skills/scvi-tools/SKILL.md +114 -0
  199. package/skills/seaborn/SKILL.md +97 -0
  200. package/skills/secure-agent-orchestration-review/SKILL.md +47 -0
  201. package/skills/self-improve/SKILL.md +119 -0
  202. package/skills/semantic-compression/SKILL.md +62 -0
  203. package/skills/setup/SKILL.md +42 -0
  204. package/skills/shap/SKILL.md +103 -0
  205. package/skills/simpy/SKILL.md +116 -0
  206. package/skills/skill/SKILL.md +117 -0
  207. package/skills/skill-search/SKILL.md +67 -0
  208. package/skills/skillify/SKILL.md +46 -0
  209. package/skills/smart-explore/SKILL.md +94 -0
  210. package/skills/sqlite-pandas/SKILL.md +30 -0
  211. package/skills/stable-baselines3/SKILL.md +86 -0
  212. package/skills/state-mutation-locking/SKILL.md +44 -0
  213. package/skills/statistical-analysis/SKILL.md +108 -0
  214. package/skills/statsmodels/SKILL.md +29 -0
  215. package/skills/subagent-driven-development/SKILL.md +89 -0
  216. package/skills/sympy/SKILL.md +115 -0
  217. package/skills/system-prompts/SKILL.md +116 -0
  218. package/skills/systematic-debugging/SKILL.md +119 -0
  219. package/skills/team/SKILL.md +85 -0
  220. package/skills/test-driven-development/SKILL.md +84 -0
  221. package/skills/tiledbvcf/SKILL.md +119 -0
  222. package/skills/timeline-report/SKILL.md +85 -0
  223. package/skills/timesfm-forecasting/SKILL.md +112 -0
  224. package/skills/torch-geometric/SKILL.md +118 -0
  225. package/skills/torchdrug/SKILL.md +118 -0
  226. package/skills/trace/SKILL.md +118 -0
  227. package/skills/transformers/SKILL.md +110 -0
  228. package/skills/treatment-plans/SKILL.md +119 -0
  229. package/skills/ui-render-performance/SKILL.md +41 -0
  230. package/skills/ultragoal/SKILL.md +63 -0
  231. package/skills/ultraqa/SKILL.md +85 -0
  232. package/skills/ultrawork/SKILL.md +20 -0
  233. package/skills/umap-learn/SKILL.md +119 -0
  234. package/skills/usfiscaldata/SKILL.md +118 -0
  235. package/skills/using-git-worktrees/SKILL.md +112 -0
  236. package/skills/using-superpowers/SKILL.md +85 -0
  237. package/skills/using-vetc/SKILL.md +92 -0
  238. package/skills/vaex/SKILL.md +111 -0
  239. package/skills/venue-templates/SKILL.md +113 -0
  240. package/skills/verification-before-completion/SKILL.md +88 -0
  241. package/skills/verification-before-done/SKILL.md +68 -0
  242. package/skills/verify/SKILL.md +33 -0
  243. package/skills/version-bump/SKILL.md +54 -0
  244. package/skills/vetc-analyze-ba/SKILL.md +117 -0
  245. package/skills/vetc-analyze-codebase/SKILL.md +118 -0
  246. package/skills/vetc-api-design/SKILL.md +103 -0
  247. package/skills/vetc-brainstorming/SKILL.md +116 -0
  248. package/skills/vetc-change-proposal/SKILL.md +111 -0
  249. package/skills/vetc-cicd/SKILL.md +113 -0
  250. package/skills/vetc-continuous-learning/SKILL.md +115 -0
  251. package/skills/vetc-deep-interview/SKILL.md +103 -0
  252. package/skills/vetc-docgen/SKILL.md +108 -0
  253. package/skills/vetc-frontend-patterns/SKILL.md +99 -0
  254. package/skills/vetc-iterative-retrieval/SKILL.md +110 -0
  255. package/skills/vetc-java-patterns/SKILL.md +113 -0
  256. package/skills/vetc-meta-skill-creator/SKILL.md +99 -0
  257. package/skills/vetc-oracle-patterns/SKILL.md +109 -0
  258. package/skills/vetc-performance-testing/SKILL.md +104 -0
  259. package/skills/vetc-pr-response/SKILL.md +106 -0
  260. package/skills/vetc-ralph/SKILL.md +108 -0
  261. package/skills/vetc-ralplan/SKILL.md +116 -0
  262. package/skills/vetc-receiving-review/SKILL.md +106 -0
  263. package/skills/vetc-reconcile-patterns/SKILL.md +117 -0
  264. package/skills/vetc-refactoring/SKILL.md +96 -0
  265. package/skills/vetc-runbook/SKILL.md +118 -0
  266. package/skills/vetc-sast/SKILL.md +118 -0
  267. package/skills/vetc-sdlc/SKILL.md +97 -0
  268. package/skills/vetc-security/SKILL.md +117 -0
  269. package/skills/vetc-spec-driven/SKILL.md +111 -0
  270. package/skills/vetc-spec-quality/SKILL.md +117 -0
  271. package/skills/vetc-systematic-debugging/SKILL.md +74 -0
  272. package/skills/vetc-tdd/SKILL.md +96 -0
  273. package/skills/vetc-thinking-pm/SKILL.md +110 -0
  274. package/skills/vetc-ui-visual-qa/SKILL.md +117 -0
  275. package/skills/vetc-verify/SKILL.md +101 -0
  276. package/skills/visual-verdict/SKILL.md +59 -0
  277. package/skills/what-if-oracle/SKILL.md +87 -0
  278. package/skills/widget-rendering/SKILL.md +85 -0
  279. package/skills/wiki/SKILL.md +69 -0
  280. package/skills/workspace-isolation/SKILL.md +85 -0
  281. package/skills/worktree-isolation/SKILL.md +85 -0
  282. package/skills/wowerpoint/SKILL.md +101 -0
  283. package/skills/writer-memory/SKILL.md +82 -0
  284. package/skills/writing-plans/SKILL.md +115 -0
  285. package/skills/writing-skills/SKILL.md +115 -0
  286. package/skills/xgboost/SKILL.md +29 -0
  287. package/skills/xgboost-ts/SKILL.md +28 -0
  288. package/skills/xlsx/SKILL.md +111 -0
  289. package/skills/zarr-python/SKILL.md +101 -0
  290. package/src/categories.ts +383 -0
  291. package/src/format.ts +104 -0
  292. package/src/indexer.ts +101 -0
  293. package/src/proactive.ts +51 -0
  294. package/src/scanner.ts +85 -0
  295. package/src/search.ts +89 -0
  296. package/src/strip.ts +29 -0
  297. package/src/synonyms.ts +83 -0
  298. package/src/text.ts +118 -0
  299. package/src/types.ts +64 -0
@@ -0,0 +1,108 @@
1
+ ---
2
+ name: generate-image
3
+ description: Generate or edit images using AI models (FLUX, Nano Banana 2). Use for general-purpose image generation including photos, illustrations, artwork, visual assets, concept art, and any image that is not a technical diagram or schematic. For flowcharts, circuits, pathways, and technical diagrams, use the scientific-schematics skill instead.
4
+ ---
5
+
6
+ # Generate Image
7
+
8
+ Generate and edit high-quality images using OpenRouter's image generation models including FLUX.2 Pro and Gemini 3.1 Flash Image Preview.
9
+
10
+ ## When to Use This Skill
11
+
12
+ **Use generate-image for:**
13
+ - Photos and photorealistic images
14
+ - Artistic illustrations and artwork
15
+ - Concept art and visual concepts
16
+ - Visual assets for presentations or documents
17
+ - Image editing and modifications
18
+ - Any general-purpose image generation needs
19
+
20
+ **Use scientific-schematics instead for:**
21
+ - Flowcharts and process diagrams
22
+ - Circuit diagrams and electrical schematics
23
+ - Biological pathways and signaling cascades
24
+ - System architecture diagrams
25
+ - CONSORT diagrams and methodology flowcharts
26
+ - Any technical/schematic diagrams
27
+
28
+ ## Quick Start
29
+
30
+ Use the `scripts/generate_image.py` script to generate or edit images:
31
+
32
+ ```bash
33
+ # Generate a new image
34
+ python scripts/generate_image.py "A beautiful sunset over mountains"
35
+
36
+ # Edit an existing image
37
+ python scripts/generate_image.py "Make the sky purple" --input photo.jpg
38
+ ```
39
+
40
+ This generates/edits an image and saves it as `generated_image.png` in the current directory.
41
+
42
+ ## API Key Setup
43
+
44
+ **CRITICAL**: The script requires an OpenRouter API key. Before running, check if the user has configured their API key:
45
+
46
+ 1. Look for a `.env` file in the project directory or parent directories
47
+ 2. Check for `OPENROUTER_API_KEY=<key>` in the `.env` file
48
+ 3. If not found, inform the user they need to:
49
+ - Create a `.env` file with `OPENROUTER_API_KEY=your-api-key-here`
50
+ - Or set the environment variable: `export OPENROUTER_API_KEY=your-api-key-here`
51
+ - Get an API key from: https://openrouter.ai/keys
52
+
53
+ The script will automatically detect the `.env` file and provide clear error messages if the API key is missing.
54
+
55
+ ## Model Selection
56
+
57
+ **Default model**: `google/gemini-3.1-flash-image-preview` (high quality, recommended)
58
+
59
+ **Available models for generation and editing**:
60
+ - `google/gemini-3.1-flash-image-preview` - High quality, supports generation + editing
61
+ - `black-forest-labs/flux.2-pro` - Fast, high quality, supports generation + editing
62
+
63
+ **Generation only**:
64
+ - `black-forest-labs/flux.2-flex` - Fast and cheap, but not as high quality as pro
65
+
66
+ Select based on:
67
+ - **Quality**: Use gemini-3.1-flash-image-preview or flux.2-pro
68
+ - **Editing**: Use gemini-3.1-flash-image-preview or flux.2-pro (both support image editing)
69
+ - **Cost**: Use flux.2-flex for generation only
70
+
71
+ ## Common Usage Patterns
72
+
73
+ ### Basic generation
74
+ ```bash
75
+ python scripts/generate_image.py "Your prompt here"
76
+ ```
77
+
78
+ ### Specify model
79
+ ```bash
80
+ python scripts/generate_image.py "A cat in space" --model "black-forest-labs/flux.2-pro"
81
+ ```
82
+
83
+ ### Custom output path
84
+ ```bash
85
+ python scripts/generate_image.py "Abstract art" --output artwork.png
86
+ ```
87
+
88
+ ### Edit an existing image
89
+ ```bash
90
+ python scripts/generate_image.py "Make the background blue" --input photo.jpg
91
+ ```
92
+
93
+ ### Edit with a specific model
94
+ ```bash
95
+ python scripts/generate_image.py "Add sunglasses to the person" --input portrait.png --model "black-forest-labs/flux.2-pro"
96
+ ```
97
+
98
+ ### Edit with custom output
99
+ ```bash
100
+ python scripts/generate_image.py "Remove the text from the image" --input screenshot.png --output cleaned.png
101
+ ```
102
+
103
+ ### Multiple images
104
+ Run the script multiple times with different prompts or output paths:
105
+ ```bash
106
+ python scripts/generate_image.py "Image 1 description" --output image1.png
107
+ python scripts/generate_image.py "Image 2 description" --output image2.png
108
+ ```
@@ -0,0 +1,117 @@
1
+ ---
2
+ name: geniml
3
+ description: This skill should be used when working with genomic interval data (BED files) for machine learning tasks. Use for training region embeddings (Region2Vec, BEDspace), single-cell ATAC-seq analysis (scEmbed), building consensus peaks (universes), or any ML-based analysis of genomic regions. Applies to BED file collections, scATAC-seq data, chromatin accessibility datasets, and region-based genomic feature learning.
4
+ ---
5
+
6
+ # Geniml: Genomic Interval Machine Learning
7
+
8
+ ## Overview
9
+
10
+ Geniml is a Python package for building machine learning models on genomic interval data from BED files. It provides unsupervised methods for learning embeddings of genomic regions, single cells, and metadata labels, enabling similarity searches, clustering, and downstream ML tasks.
11
+
12
+ ## Core Capabilities
13
+
14
+ Geniml provides five primary capabilities, each detailed in dedicated reference files:
15
+
16
+ ### 1. Region2Vec: Genomic Region Embeddings
17
+
18
+ Train unsupervised embeddings of genomic regions using word2vec-style learning.
19
+
20
+ **Use for:** Dimensionality reduction of BED files, region similarity analysis, feature vectors for downstream ML.
21
+
22
+ **Workflow:**
23
+ 1. Tokenize BED files using a universe reference
24
+ 2. Train Region2Vec model on tokens
25
+ 3. Generate embeddings for regions
26
+
27
+ **Reference:** See `(see docs)` for detailed workflow, parameters, and examples.
28
+
29
+ ### 2. BEDspace: Joint Region and Metadata Embeddings
30
+
31
+ Train shared embeddings for region sets and metadata labels using StarSpace.
32
+
33
+ **Use for:** Metadata-aware searches, cross-modal queries (region→label or label→region), joint analysis of genomic content and experimental conditions.
34
+
35
+ **Workflow:**
36
+ 1. Preprocess regions and metadata
37
+ 2. Train BEDspace model
38
+ 3. Compute distances
39
+ 4. Query across regions and labels
40
+
41
+ **Reference:** See `(see docs)` for detailed workflow, search types, and examples.
42
+
43
+ ### 3. scEmbed: Single-Cell Chromatin Accessibility Embeddings
44
+
45
+ Train Region2Vec models on single-cell ATAC-seq data for cell-level embeddings.
46
+
47
+ **Use for:** scATAC-seq clustering, cell-type annotation, dimensionality reduction of single cells, integration with scanpy workflows.
48
+
49
+ **Workflow:**
50
+ 1. Prepare AnnData with peak coordinates
51
+ 2. Pre-tokenize cells
52
+ 3. Train scEmbed model
53
+ 4. Generate cell embeddings
54
+ 5. Cluster and visualize with scanpy
55
+
56
+ **Reference:** See `(see docs)` for detailed workflow, parameters, and examples.
57
+
58
+ ### 4. Consensus Peaks: Universe Building
59
+
60
+ Build reference peak sets (universes) from BED file collections using multiple statistical methods.
61
+
62
+ **Use for:** Creating tokenization references, standardizing regions across datasets, defining consensus features with statistical rigor.
63
+
64
+ **Workflow:**
65
+ 1. Combine BED files
66
+ 2. Generate coverage tracks
67
+ 3. Build universe using CC, CCF, ML, or HMM method
68
+
69
+ **Methods:**
70
+ - **CC (Coverage Cutoff)**: Simple threshold-based
71
+ - **CCF (Coverage Cutoff Flexible)**: Confidence intervals for boundaries
72
+ - **ML (Maximum Likelihood)**: Probabilistic modeling of positions
73
+
74
+ ### 5. Utilities: Supporting Tools
75
+
76
+ Additional tools for caching, randomization, evaluation, and search.
77
+
78
+ **Available utilities:**
79
+ - **BBClient**: BED file caching for repeated access
80
+ - **BEDshift**: Randomization preserving genomic context
81
+ - **Evaluation**: Metrics for embedding quality (silhouette, Davies-Bouldin, etc.)
82
+ - **Tokenization**: Region tokenization utilities (hard, soft, universe-based)
83
+ - **Text2BedNN**: Neural search backends for genomic queries
84
+
85
+ **Reference:** See `(see docs)` for detailed usage of each utility.
86
+
87
+ ## Common Workflows
88
+
89
+ ### Basic Region Embedding Pipeline
90
+
91
+ ```python
92
+ from geniml.tokenization import hard_tokenization
93
+ from geniml.region2vec import region2vec
94
+ from geniml.evaluation import evaluate_embeddings
95
+
96
+ # Step 1: Tokenize BED files
97
+ hard_tokenization(
98
+ src_folder='bed_files/',
99
+ dst_folder='tokens/',
100
+ universe_file='universe.bed',
101
+ p_value_threshold=1e-9
102
+ )
103
+
104
+ # Step 2: Train Region2Vec
105
+ region2vec(
106
+ token_folder='tokens/',
107
+ save_dir='model/',
108
+ num_shufflings=1000,
109
+ embedding_dim=100
110
+ )
111
+
112
+ # Step 3: Evaluate
113
+ metrics = evaluate_embeddings(
114
+ embeddings_file='model/embeddings.npy',
115
+ labels_file='metadata.csv'
116
+ )
117
+ ```
@@ -0,0 +1,109 @@
1
+ ---
2
+ name: geomaster
3
+ description: Comprehensive geospatial science skill covering remote sensing, GIS, spatial analysis, machine learning for earth observation, and 30+ scientific domains. Supports satellite imagery processing (Sentinel, Landsat, MODIS, SAR, hyperspectral), vector and raster data operations, spatial statistics, point cloud processing, network analysis, cloud-native workflows (STAC, COG, Planetary Computer), and 8 programming languages (Python, R, Julia, JavaScript, C++, Java, Go, Rust) with 500+ code examples. Use for remote sensing workflows, GIS analysis, spatial ML, Earth observation data processing, terrain analysis, hydrological modeling, marine spatial analysis, atmospheric science, and any geospatial computation task.
4
+ ---
5
+
6
+ # GeoMaster
7
+
8
+ Comprehensive geospatial science skill covering GIS, remote sensing, spatial analysis, and ML for Earth observation across 70+ topics with 500+ code examples in 8 programming languages.
9
+
10
+ # Core Python stack (conda recommended)
11
+ conda install -c conda-forge gdal rasterio fiona shapely pyproj geopandas
12
+
13
+ # Remote sensing & ML
14
+ uv pip install rsgislib torchgeo earthengine-api
15
+ uv pip install scikit-learn xgboost torch-geometric
16
+
17
+ # Network & visualization
18
+ uv pip install osmnx networkx folium keplergl
19
+ uv pip install cartopy contextily mapclassify
20
+
21
+ # Big data & cloud
22
+ uv pip install xarray rioxarray dask-geopandas
23
+ uv pip install pystac-client planetary-computer
24
+
25
+ # Point clouds
26
+ uv pip install laspy pylas open3d pdal
27
+
28
+ # Databases
29
+ conda install -c conda-forge postgis spatialite
30
+ ```
31
+
32
+ ## Quick Start
33
+
34
+ ### NDVI from Sentinel-2
35
+
36
+ ```python
37
+ import rasterio
38
+ import numpy as np
39
+
40
+ with rasterio.open('sentinel2.tif') as src:
41
+ red = src.read(4).astype(float) # B04
42
+ nir = src.read(8).astype(float) # B08
43
+ ndvi = (nir - red) / (nir + red + 1e-8)
44
+ ndvi = np.nan_to_num(ndvi, nan=0)
45
+
46
+ profile = src.profile
47
+ profile.update(count=1, dtype=rasterio.float32)
48
+
49
+
50
+ ### Spatial Analysis with GeoPandas
51
+
52
+ ```python
53
+ import geopandas as gpd
54
+
55
+ # Load and ensure same CRS
56
+ zones = gpd.read_file('zones.geojson')
57
+ points = gpd.read_file('points.geojson')
58
+
59
+ if zones.crs != points.crs:
60
+ points = points.to_crs(zones.crs)
61
+
62
+ # Spatial join and statistics
63
+ joined = gpd.sjoin(points, zones, how='inner', predicate='within')
64
+ stats = joined.groupby('zone_id').agg({
65
+ 'value': ['count', 'mean', 'std', 'min', 'max']
66
+ }).round(2)
67
+ ```
68
+
69
+ ### Google Earth Engine Time Series
70
+
71
+ ```python
72
+ import ee
73
+ import pandas as pd
74
+
75
+ ee.Initialize(project='your-project')
76
+ roi = ee.Geometry.Point([-122.4, 37.7]).buffer(10000)
77
+
78
+ s2 = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
79
+ .filterBounds(roi)
80
+ .filterDate('2020-01-01', '2023-12-31')
81
+ .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20)))
82
+
83
+ def add_ndvi(img):
84
+
85
+ ## Core Concepts
86
+
87
+ ### Data Types
88
+
89
+ | Type | Examples | Libraries |
90
+ |------|----------|-----------|
91
+ | Vector | Shapefile, GeoJSON, GeoPackage | GeoPandas, Fiona, GDAL |
92
+ | Raster | GeoTIFF, NetCDF, COG | Rasterio, Xarray, GDAL |
93
+ | Point Cloud | LAS, LAZ | Laspy, PDAL, Open3D |
94
+
95
+ ### Coordinate Systems
96
+
97
+ - **EPSG:4326** (WGS 84) - Geographic, lat/lon, use for storage
98
+ - **EPSG:3857** (Web Mercator) - Web maps only (don't use for area/distance!)
99
+ - **EPSG:326xx/327xx** (UTM) - Metric calculations, <1% distortion per zone
100
+ - Use `gdf.estimate_utm_crs()` for automatic UTM detection
101
+
102
+ ```python
103
+ # Always check CRS before operations
104
+ assert gdf1.crs == gdf2.crs, "CRS mismatch!"
105
+
106
+ # For area/distance calculations, use projected CRS
107
+ gdf_metric = gdf.to_crs(gdf.estimate_utm_crs())
108
+
109
+
@@ -0,0 +1,114 @@
1
+ ---
2
+ name: geopandas
3
+ description: Python library for working with geospatial vector data including shapefiles, GeoJSON, and GeoPackage files. Use when working with geographic data for spatial analysis, geometric operations, coordinate transformations, spatial joins, overlay operations, choropleth mapping, or any task involving reading/writing/analyzing vector geographic data. Supports PostGIS databases, interactive maps, and integration with matplotlib/folium/cartopy. Use for tasks like buffer analysis, spatial joins between datasets, dissolving boundaries, clipping data, calculating areas/distances, reprojecting coordinate systems, creating maps, or converting between spatial file formats.
4
+ ---
5
+
6
+ # GeoPandas
7
+
8
+ GeoPandas extends pandas to enable spatial operations on geometric types. It combines the capabilities of pandas and shapely for geospatial data analysis.
9
+
10
+ ### Optional Dependencies
11
+
12
+ ```bash
13
+ # For interactive maps
14
+ uv pip install folium
15
+
16
+ # For classification schemes in mapping
17
+ uv pip install mapclassify
18
+
19
+ # For faster I/O operations (2-4x speedup)
20
+ uv pip install pyarrow
21
+
22
+ # For PostGIS database support
23
+ uv pip install psycopg2
24
+ uv pip install geoalchemy2
25
+
26
+ # For basemaps
27
+ uv pip install contextily
28
+
29
+ # For cartographic projections
30
+ uv pip install cartopy
31
+ ```
32
+
33
+ ## Quick Start
34
+
35
+ ```python
36
+ import geopandas as gpd
37
+
38
+ # Read spatial data
39
+ gdf = gpd.read_file("data.geojson")
40
+
41
+ # Basic exploration
42
+ print(gdf.head())
43
+ print(gdf.crs)
44
+ print(gdf.geometry.geom_type)
45
+
46
+ # Simple plot
47
+ gdf.plot()
48
+
49
+ # Reproject to different CRS
50
+ gdf_projected = gdf.to_crs("EPSG:3857")
51
+
52
+ # Calculate area (use projected CRS for accuracy)
53
+ gdf_projected['area'] = gdf_projected.geometry.area
54
+
55
+ # Save to file
56
+ gdf.to_file("output.gpkg")
57
+ ```
58
+
59
+ ## Core Concepts
60
+
61
+ ### Data Structures
62
+
63
+ - **GeoSeries**: Vector of geometries with spatial operations
64
+ - **GeoDataFrame**: Tabular data structure with geometry column
65
+
66
+ See data-structures.md for details.
67
+
68
+ ### Reading and Writing Data
69
+
70
+ GeoPandas reads/writes multiple formats: Shapefile, GeoJSON, GeoPackage, PostGIS, Parquet.
71
+
72
+ ```python
73
+ # Read with filtering
74
+ gdf = gpd.read_file("data.gpkg", bbox=(xmin, ymin, xmax, ymax))
75
+
76
+ # Write with Arrow acceleration
77
+ gdf.to_file("output.gpkg", use_arrow=True)
78
+ ```
79
+
80
+ See data-io.md for comprehensive I/O operations.
81
+
82
+ ### Coordinate Reference Systems
83
+
84
+ Always check and manage CRS for accurate spatial operations:
85
+
86
+ ```python
87
+ # Check CRS
88
+ print(gdf.crs)
89
+
90
+ # Reproject (transforms coordinates)
91
+ gdf_projected = gdf.to_crs("EPSG:3857")
92
+
93
+ # Set CRS (only when metadata missing)
94
+ gdf = gdf.set_crs("EPSG:4326")
95
+ ```
96
+
97
+ See crs-management.md for CRS operations.
98
+
99
+ ## Common Operations
100
+
101
+ ### Geometric Operations
102
+
103
+ Buffer, simplify, centroid, convex hull, affine transformations:
104
+
105
+ ```python
106
+ # Buffer by 10 units
107
+ buffered = gdf.geometry.buffer(10)
108
+
109
+ # Simplify with tolerance
110
+ simplified = gdf.geometry.simplify(tolerance=5, preserve_topology=True)
111
+
112
+ # Get centroids
113
+ centroids = gdf.geometry.centroid
114
+ ```
@@ -0,0 +1,100 @@
1
+ ---
2
+ name: get-available-resources
3
+ description: This skill should be used at the start of any computationally intensive scientific task to detect and report available system resources (CPU cores, GPUs, memory, disk space). It creates a JSON file with resource information and strategic recommendations that inform computational approach decisions such as whether to use parallel processing (joblib, multiprocessing), out-of-core computing (Dask, Zarr), GPU acceleration (PyTorch, JAX), or memory-efficient strategies. Use this skill before running analyses, training models, processing large datasets, or any task where resource constraints matter.
4
+ ---
5
+
6
+ # Get Available Resources
7
+
8
+ ## Overview
9
+
10
+ Detect available computational resources and generate strategic recommendations for scientific computing tasks. This skill automatically identifies CPU capabilities, GPU availability (NVIDIA CUDA, AMD ROCm, Apple Silicon Metal), memory constraints, and disk space to help make informed decisions about computational approaches.
11
+
12
+ ## When to Use This Skill
13
+
14
+ Use this skill proactively before any computationally intensive task:
15
+
16
+ - **Before data analysis**: Determine if datasets can be loaded into memory or require out-of-core processing
17
+ - **Before model training**: Check if GPU acceleration is available and which backend to use
18
+ - **Before parallel processing**: Identify optimal number of workers for joblib, multiprocessing, or Dask
19
+ - **Before large file operations**: Verify sufficient disk space and appropriate storage strategies
20
+ - **At project initialization**: Understand baseline capabilities for making architectural decisions
21
+
22
+ **Example scenarios:**
23
+ - "Help me analyze this 50GB genomics dataset" → Use this skill first to determine if Dask/Zarr are needed
24
+ - "Train a neural network on this data" → Use this skill to detect available GPUs and backends
25
+ - "Process 10,000 files in parallel" → Use this skill to determine optimal worker count
26
+ - "Run a computationally intensive simulation" → Use this skill to understand resource constraints
27
+
28
+ ### Resource Detection
29
+
30
+ The skill runs `scripts/detect_resources.py` to automatically detect:
31
+
32
+ 1. **CPU Information**
33
+ - Physical and logical core counts
34
+ - Processor architecture and model
35
+ - CPU frequency information
36
+
37
+ 2. **GPU Information**
38
+ - NVIDIA GPUs: Detects via nvidia-smi, reports VRAM, driver version, compute capability
39
+ - AMD GPUs: Detects via rocm-smi
40
+ - Apple Silicon: Detects M1/M2/M3/M4 chips with Metal support and unified memory
41
+
42
+ 3. **Memory Information**
43
+
44
+ ### Strategic Recommendations
45
+
46
+ The skill generates context-aware recommendations:
47
+
48
+ **Parallel Processing Recommendations:**
49
+ - **High parallelism (8+ cores)**: Use Dask, joblib, or multiprocessing with workers = cores - 2
50
+ - **Moderate parallelism (4-7 cores)**: Use joblib or multiprocessing with workers = cores - 1
51
+ - **Sequential (< 4 cores)**: Prefer sequential processing to avoid overhead
52
+
53
+ **Memory Strategy Recommendations:**
54
+ - **Memory constrained (< 4GB available)**: Use Zarr, Dask, or H5py for out-of-core processing
55
+ - **Moderate memory (4-16GB available)**: Use Dask/Zarr for datasets > 2GB
56
+ - **Memory abundant (> 16GB available)**: Can load most datasets into memory directly
57
+
58
+ **GPU Acceleration Recommendations:**
59
+
60
+ ## Usage Instructions
61
+
62
+ ### Step 1: Run Resource Detection
63
+
64
+ Execute the detection script at the start of any computationally intensive task:
65
+
66
+ ```bash
67
+ python scripts/detect_resources.py
68
+ ```
69
+
70
+ Optional arguments:
71
+ - `-o, --output <path>`: Specify custom output path (default: `.claude_resources.json`)
72
+ - `-v, --verbose`: Print full resource information to stdout
73
+
74
+ ### Step 2: Read and Apply Recommendations
75
+
76
+ After running detection, read the generated `.claude_resources.json` file to inform computational decisions:
77
+
78
+ ```python
79
+ # Example: Use recommendations in code
80
+ import json
81
+
82
+ with open('.claude_resources.json', 'r') as f:
83
+ resources = json.load(f)
84
+
85
+ # Check parallel processing strategy
86
+ if resources['recommendations']['parallel_processing']['strategy'] == 'high_parallelism':
87
+ n_jobs = resources['recommendations']['parallel_processing']['suggested_workers']
88
+ # Use joblib, Dask, or multiprocessing with n_jobs workers
89
+
90
+ # Check memory strategy
91
+ if resources['recommendations']['memory_strategy']['strategy'] == 'memory_constrained':
92
+ # Use Dask, Zarr, or H5py for out-of-core processing
93
+ import dask.array as da
94
+ # Load data in chunks
95
+
96
+ # Check GPU availability
97
+ if resources['recommendations']['gpu_acceleration']['available']:
98
+ backends = resources['recommendations']['gpu_acceleration']['backends']
99
+ # Use appropriate GPU library based on available backend
100
+ ```
@@ -0,0 +1,111 @@
1
+ ---
2
+ name: gget
3
+ description: "Fast CLI/Python queries to 20+ bioinformatics databases. Use for quick lookups: gene info, BLAST searches, AlphaFold structures, enrichment analysis. Best for interactive exploration, simple queries. For batch processing or advanced BLAST use biopython; for multi-database Python workflows use bioservices."
4
+ ---
5
+
6
+ # gget
7
+
8
+ ## Overview
9
+
10
+ gget is a command-line bioinformatics tool and Python package providing unified access to 20+ genomic databases and analysis methods. Query gene information, sequence analysis, protein structures, expression data, and disease associations through a consistent interface. All gget modules work both as command-line tools and as Python functions.
11
+
12
+ **Important**: The databases queried by gget are continuously updated, which sometimes changes their structure. gget modules are tested automatically on a biweekly basis and updated to match new database structures when necessary.
13
+
14
+ # Using uv (recommended)
15
+ uv uv pip install gget
16
+
17
+ # Or using pip
18
+ uv pip install --upgrade gget
19
+
20
+ # In Python/Jupyter
21
+ import gget
22
+ ```
23
+
24
+ ## Quick Start
25
+
26
+ Basic usage pattern for all modules:
27
+
28
+ ```bash
29
+ # Command-line
30
+ gget <module> [arguments] [options]
31
+
32
+ # Python
33
+ gget.module(arguments, options)
34
+ ```
35
+
36
+ Most modules return:
37
+ - **Command-line**: JSON (default) or CSV with `-csv` flag
38
+ - **Python**: DataFrame or dictionary
39
+
40
+ Common flags across modules:
41
+ - `-o/--out`: Save results to file
42
+ - `-q/--quiet`: Suppress progress information
43
+ - `-csv`: Return CSV format (command-line only)
44
+
45
+ ## Module Categories
46
+
47
+ # List available species
48
+ gget ref --list_species
49
+
50
+ # Get all reference files for human
51
+ gget ref homo_sapiens
52
+
53
+ # Download only GTF annotation for mouse
54
+ gget ref -w gtf -d mouse
55
+ ```
56
+
57
+ ```python
58
+ # Python
59
+ gget.ref("homo_sapiens")
60
+ gget.ref("mus_musculus", which="gtf", download=True)
61
+ ```
62
+
63
+ #### gget search - Gene Search
64
+
65
+ Locate genes by name or description across species.
66
+
67
+ **Parameters**:
68
+ - `searchwords`: One or more search terms (case-insensitive)
69
+ - `-s/--species`: Target species (e.g., 'homo_sapiens', 'mouse')
70
+ - `-r/--release`: Ensembl release number
71
+ - `-t/--id_type`: Return 'gene' (default) or 'transcript'
72
+ - `-ao/--andor`: 'or' (default) finds ANY searchword; 'and' requires ALL
73
+
74
+ # Search for GABA-related genes in human
75
+ gget search -s human gaba gamma-aminobutyric
76
+
77
+ # Find specific gene, require all terms
78
+ gget search -s mouse -ao and pax7 transcription
79
+ ```
80
+
81
+ ```python
82
+ # Python
83
+ gget.search(["gaba", "gamma-aminobutyric"], species="homo_sapiens")
84
+ ```
85
+
86
+ #### gget info - Gene/Transcript Information
87
+
88
+ Retrieve comprehensive gene and transcript metadata from Ensembl, UniProt, and NCBI.
89
+
90
+ **Parameters**:
91
+ - `ens_ids`: One or more Ensembl IDs (also supports WormBase, Flybase IDs). Limit: ~1000 IDs
92
+ - `-n/--ncbi`: Disable NCBI data retrieval
93
+ - `-u/--uniprot`: Disable UniProt data retrieval
94
+ - `-pdb`: Include PDB identifiers (increases runtime)
95
+
96
+ **Returns**: UniProt ID, NCBI gene ID, primary gene name, synonyms, protein names, descriptions, biotype, canonical transcript
97
+
98
+ **Examples**:
99
+ ```bash
100
+ # Get info for multiple genes
101
+ gget info ENSG00000034713 ENSG00000104853 ENSG00000170296
102
+
103
+ # Include PDB IDs
104
+ gget info ENSG00000034713 -pdb
105
+ ```
106
+
107
+ ```python
108
+ # Python
109
+ gget.info(["ENSG00000034713", "ENSG00000104853"], pdb=True)
110
+
111
+