@nahisaho/satori 0.12.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/README.md +150 -54
  2. package/package.json +1 -1
  3. package/src/.github/skills/scientific-biomedical-pubtator/SKILL.md +331 -0
  4. package/src/.github/skills/scientific-biothings-idmapping/SKILL.md +298 -0
  5. package/src/.github/skills/scientific-cell-line-resources/SKILL.md +258 -0
  6. package/src/.github/skills/scientific-compound-screening/SKILL.md +245 -0
  7. package/src/.github/skills/scientific-ebi-databases/SKILL.md +280 -0
  8. package/src/.github/skills/scientific-genome-sequence-tools/SKILL.md +304 -0
  9. package/src/.github/skills/scientific-healthcare-ai/SKILL.md +273 -0
  10. package/src/.github/skills/scientific-human-protein-atlas/SKILL.md +244 -0
  11. package/src/.github/skills/scientific-metabolic-modeling/SKILL.md +288 -0
  12. package/src/.github/skills/scientific-noncoding-rna/SKILL.md +262 -0
  13. package/src/.github/skills/scientific-ontology-enrichment/SKILL.md +340 -0
  14. package/src/.github/skills/scientific-pharmacology-targets/SKILL.md +323 -0
  15. package/src/.github/skills/scientific-phylogenetics/SKILL.md +297 -0
  16. package/src/.github/skills/scientific-preprint-archive/SKILL.md +476 -0
  17. package/src/.github/skills/scientific-public-health-data/SKILL.md +322 -0
  18. package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +327 -0
  19. package/src/.github/skills/scientific-regulatory-genomics/SKILL.md +274 -0
  20. package/src/.github/skills/scientific-reinforcement-learning/SKILL.md +280 -0
  21. package/src/.github/skills/scientific-structural-proteomics/SKILL.md +317 -0
  22. package/src/.github/skills/scientific-symbolic-mathematics/SKILL.md +277 -0
@@ -0,0 +1,258 @@
1
+ ---
2
+ name: scientific-cell-line-resources
3
+ description: |
4
+ 細胞株リソーススキル。Cellosaurus 細胞株データベース検索、
5
+ STR プロファイルマッチング、コンタミネーション検出、
6
+ 細胞株メタデータ (由来組織・疾患・種) 取得パイプライン。
7
+ ---
8
+
9
+ # Scientific Cell Line Resources
10
+
11
+ Cellosaurus を中心とした細胞株リソースデータベースアクセス
12
+ パイプラインを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - 細胞株の正式名称・アクセッション番号を確認するとき
17
+ - 細胞株の由来 (組織・疾患・種) を調べるとき
18
+ - STR プロファイルで細胞株の同一性を検証するとき
19
+ - 細胞株のコンタミネーション (ミスアイデンティフィケーション) を確認するとき
20
+ - 実験に使用する細胞株の参考文献・データベースリンクを取得するとき
21
+
22
+ ---
23
+
24
+ ## Quick Start
25
+
26
+ ## 1. Cellosaurus 細胞株検索
27
+
28
+ ```python
29
+ import requests
30
+ import pandas as pd
31
+
32
+ CELLOSAURUS_API = "https://api.cellosaurus.org"
33
+
34
+
35
+ def search_cellosaurus(query, limit=25):
36
+ """
37
+ Cellosaurus 細胞株検索。
38
+
39
+ Parameters:
40
+ query: str — 細胞株名 (e.g., "HeLa", "MCF-7", "A549")
41
+ limit: int — 最大取得数
42
+
43
+ ToolUniverse:
44
+ Cellosaurus_search(query=query)
45
+ Cellosaurus_get_cell_line(accession=accession)
46
+ Cellosaurus_get_str_profile(accession=accession)
47
+ """
48
+ params = {"q": query, "rows": limit, "format": "json"}
49
+ resp = requests.get(f"{CELLOSAURUS_API}/search/cell-line", params=params)
50
+ resp.raise_for_status()
51
+ data = resp.json()
52
+
53
+ results = []
54
+ for cell_line in data.get("result", {}).get("cellLineList", []):
55
+ cl = cell_line.get("cellLine", {})
56
+ results.append({
57
+ "accession": cl.get("accession", ""),
58
+ "name": cl.get("name", ""),
59
+ "synonyms": [s.get("value", "") for s in cl.get("synonymList", [])],
60
+ "category": cl.get("category", ""),
61
+ "sex": cl.get("sex", ""),
62
+ "species": cl.get("species", {}).get("value", ""),
63
+ "diseases": [
64
+ d.get("terminology", {}).get("value", "")
65
+ for d in cl.get("diseaseList", [])
66
+ ],
67
+ "derived_from_site": cl.get("derivedFromSite", {}).get("value", ""),
68
+ "is_contaminated": cl.get("isContaminated", False),
69
+ "is_problematic": cl.get("isProblematic", False),
70
+ })
71
+
72
+ df = pd.DataFrame(results)
73
+ print(f"Cellosaurus search '{query}': {len(df)} cell lines")
74
+ return df
75
+ ```
76
+
77
+ ## 2. 細胞株詳細情報取得
78
+
79
+ ```python
80
+ def get_cellosaurus_entry(accession):
81
+ """
82
+ Cellosaurus 細胞株詳細情報取得。
83
+
84
+ Parameters:
85
+ accession: str — Cellosaurus アクセッション (e.g., "CVCL_0030")
86
+ """
87
+ resp = requests.get(
88
+ f"{CELLOSAURUS_API}/cell-line/{accession}",
89
+ params={"format": "json"}
90
+ )
91
+ resp.raise_for_status()
92
+ data = resp.json()
93
+
94
+ cl = data.get("cellLine", {})
95
+ entry = {
96
+ "accession": cl.get("accession", ""),
97
+ "name": cl.get("name", ""),
98
+ "category": cl.get("category", ""),
99
+ "sex": cl.get("sex", ""),
100
+ "age": cl.get("age", ""),
101
+ "species": cl.get("species", {}).get("value", ""),
102
+ "diseases": [
103
+ {
104
+ "name": d.get("terminology", {}).get("value", ""),
105
+ "accession": d.get("terminology", {}).get("accession", ""),
106
+ }
107
+ for d in cl.get("diseaseList", [])
108
+ ],
109
+ "derived_from_site": cl.get("derivedFromSite", {}).get("value", ""),
110
+ "is_contaminated": cl.get("isContaminated", False),
111
+ "contamination_comment": cl.get("contaminationComment", ""),
112
+ "str_profile": cl.get("strList", []),
113
+ "references": [
114
+ {
115
+ "pmid": r.get("pubmedId", ""),
116
+ "title": r.get("title", ""),
117
+ }
118
+ for r in cl.get("referenceList", [])
119
+ ],
120
+ "cross_references": [
121
+ {
122
+ "database": xr.get("database", ""),
123
+ "accession": xr.get("accession", ""),
124
+ }
125
+ for xr in cl.get("xrefList", [])
126
+ ],
127
+ }
128
+
129
+ print(f"Cellosaurus {accession}: {entry['name']} "
130
+ f"({entry['species']}, {entry['category']})")
131
+ return entry
132
+ ```
133
+
134
+ ## 3. STR プロファイル検証
135
+
136
+ ```python
137
+ def check_str_profile(accession, str_data=None):
138
+ """
139
+ STR (Short Tandem Repeat) プロファイルによる細胞株同一性検証。
140
+
141
+ Parameters:
142
+ accession: str — Cellosaurus アクセッション
143
+ str_data: dict — 測定した STR データ {marker: alleles}
144
+ """
145
+ entry = get_cellosaurus_entry(accession)
146
+ ref_str = entry.get("str_profile", [])
147
+
148
+ if not ref_str:
149
+ print(f"WARNING: {accession} has no STR profile in Cellosaurus")
150
+ return {"match": None, "message": "No reference STR profile available"}
151
+
152
+ ref_markers = {}
153
+ for marker in ref_str:
154
+ name = marker.get("marker", "")
155
+ alleles = marker.get("alleles", "")
156
+ ref_markers[name] = alleles
157
+
158
+ if str_data is None:
159
+ print(f"Reference STR for {accession}: {len(ref_markers)} markers")
160
+ return {"reference_str": ref_markers, "marker_count": len(ref_markers)}
161
+
162
+ # Calculate match percentage
163
+ matched = 0
164
+ total = 0
165
+ details = []
166
+ for marker, ref_alleles in ref_markers.items():
167
+ if marker in str_data:
168
+ total += 1
169
+ measured = str_data[marker]
170
+ if set(str(ref_alleles).split(",")) == set(str(measured).split(",")):
171
+ matched += 1
172
+ details.append({"marker": marker, "match": True})
173
+ else:
174
+ details.append({
175
+ "marker": marker, "match": False,
176
+ "reference": ref_alleles, "measured": measured,
177
+ })
178
+
179
+ match_pct = (matched / total * 100) if total > 0 else 0
180
+ result = {
181
+ "match_percentage": match_pct,
182
+ "matched": matched,
183
+ "total_compared": total,
184
+ "is_authenticated": match_pct >= 80,
185
+ "details": details,
186
+ }
187
+
188
+ status = "PASS" if result["is_authenticated"] else "FAIL"
189
+ print(f"STR verification {accession}: {match_pct:.1f}% match → {status}")
190
+ return result
191
+ ```
192
+
193
+ ## 4. コンタミネーション・問題細胞株チェック
194
+
195
+ ```python
196
+ def check_contamination_status(cell_line_names):
197
+ """
198
+ 細胞株リストのコンタミネーション/ミスアイデンティフィケーション確認。
199
+
200
+ Parameters:
201
+ cell_line_names: list — 細胞株名リスト
202
+ """
203
+ results = []
204
+ for name in cell_line_names:
205
+ df = search_cellosaurus(name, limit=1)
206
+ if df.empty:
207
+ results.append({
208
+ "name": name, "found": False,
209
+ "is_contaminated": None, "is_problematic": None,
210
+ })
211
+ continue
212
+
213
+ row = df.iloc[0]
214
+ results.append({
215
+ "name": name,
216
+ "found": True,
217
+ "accession": row.get("accession", ""),
218
+ "official_name": row.get("name", ""),
219
+ "is_contaminated": row.get("is_contaminated", False),
220
+ "is_problematic": row.get("is_problematic", False),
221
+ "species": row.get("species", ""),
222
+ "diseases": row.get("diseases", []),
223
+ })
224
+
225
+ df = pd.DataFrame(results)
226
+ contaminated = df["is_contaminated"].sum() if "is_contaminated" in df else 0
227
+ problematic = df["is_problematic"].sum() if "is_problematic" in df else 0
228
+ print(f"Cell line check: {len(cell_line_names)} lines, "
229
+ f"{contaminated} contaminated, {problematic} problematic")
230
+ return df
231
+ ```
232
+
233
+ ---
234
+
235
+ ## 利用可能ツール
236
+
237
+ | ToolUniverse カテゴリ | 主なツール |
238
+ |---|---|
239
+ | `cellosaurus` | `Cellosaurus_search`, `Cellosaurus_get_cell_line`, `Cellosaurus_get_str_profile` |
240
+
241
+ ## パイプライン出力
242
+
243
+ | 出力ファイル | 説明 | 連携先スキル |
244
+ |---|---|---|
245
+ | `results/cell_lines.csv` | 細胞株メタデータ | → cancer-genomics, precision-oncology |
246
+ | `results/str_verification.json` | STR 検証結果 | → lab-automation, lab-data-management |
247
+ | `results/contamination_report.json` | コンタミレポート | → research-methodology |
248
+
249
+ ## パイプライン統合
250
+
251
+ ```
252
+ cancer-genomics ──→ cell-line-resources ──→ lab-automation
253
+ (COSMIC/DepMap) (Cellosaurus STR) (プロトコル管理)
254
+
255
+ ├──→ precision-oncology (腫瘍細胞株)
256
+ ├──→ disease-research (疾患モデル)
257
+ └──→ human-protein-atlas (発現データ)
258
+ ```
@@ -0,0 +1,245 @@
1
+ ---
2
+ name: scientific-compound-screening
3
+ description: |
4
+ 化合物スクリーニングスキル。ZINC データベースを活用した購入可能化合物検索、
5
+ SMILES/名前ベースの類似性検索、カタログフィルタリング、
6
+ バーチャルスクリーニング前処理パイプライン。
7
+ ---
8
+
9
+ # Scientific Compound Screening
10
+
11
+ ZINC データベースを活用した化合物ライブラリ検索・
12
+ バーチャルスクリーニング前処理パイプラインを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - 購入可能な化合物ライブラリを検索するとき
17
+ - SMILES 構造式から類似化合物を探すとき
18
+ - 化合物名からデータベースレコードを取得するとき
19
+ - ベンダーカタログの絞り込みを行うとき
20
+ - バーチャルスクリーニング用の化合物セットを準備するとき
21
+
22
+ ---
23
+
24
+ ## Quick Start
25
+
26
+ ## 1. ZINC 化合物名検索
27
+
28
+ ```python
29
+ import requests
30
+ import pandas as pd
31
+
32
+ ZINC_API = "https://zinc15.docking.org"
33
+
34
+
35
+ def zinc_search_by_name(name, max_results=20):
36
+ """
37
+ ZINC データベースで化合物名による検索。
38
+
39
+ Parameters:
40
+ name: str — compound name (e.g., "aspirin")
41
+ max_results: int — maximum results
42
+
43
+ ToolUniverse:
44
+ ZINC_search_by_name(name=name)
45
+ """
46
+ url = f"{ZINC_API}/substances/search"
47
+ params = {"q": name, "count": max_results}
48
+ resp = requests.get(url, params=params)
49
+ resp.raise_for_status()
50
+ data = resp.json()
51
+
52
+ results = []
53
+ for item in data:
54
+ results.append({
55
+ "zinc_id": item.get("zinc_id", ""),
56
+ "name": item.get("name", ""),
57
+ "smiles": item.get("smiles", ""),
58
+ "mwt": item.get("mwt", ""),
59
+ "logp": item.get("logp", ""),
60
+ "purchasable": item.get("purchasability", ""),
61
+ })
62
+
63
+ df = pd.DataFrame(results)
64
+ print(f"ZINC search '{name}': {len(df)} compounds")
65
+ return df
66
+ ```
67
+
68
+ ## 2. ZINC SMILES 類似性検索
69
+
70
+ ```python
71
+ def zinc_search_by_smiles(smiles, similarity=0.7, max_results=20):
72
+ """
73
+ ZINC で SMILES 構造式による類似性検索。
74
+
75
+ Parameters:
76
+ smiles: str — SMILES string
77
+ similarity: float — Tanimoto similarity threshold (0-1)
78
+
79
+ ToolUniverse:
80
+ ZINC_search_by_smiles(smiles=smiles)
81
+ """
82
+ url = f"{ZINC_API}/substances/search"
83
+ params = {
84
+ "smiles": smiles,
85
+ "similarity": similarity,
86
+ "count": max_results,
87
+ }
88
+ resp = requests.get(url, params=params)
89
+ resp.raise_for_status()
90
+ data = resp.json()
91
+
92
+ results = []
93
+ for item in data:
94
+ results.append({
95
+ "zinc_id": item.get("zinc_id", ""),
96
+ "smiles": item.get("smiles", ""),
97
+ "similarity": item.get("similarity", ""),
98
+ "mwt": item.get("mwt", ""),
99
+ "logp": item.get("logp", ""),
100
+ "purchasable": item.get("purchasability", ""),
101
+ })
102
+
103
+ df = pd.DataFrame(results)
104
+ print(f"ZINC SMILES search: {len(df)} similar compounds "
105
+ f"(threshold={similarity})")
106
+ return df
107
+ ```
108
+
109
+ ## 3. ZINC 化合物詳細取得
110
+
111
+ ```python
112
+ def zinc_get_substance(zinc_id):
113
+ """
114
+ ZINC ID から化合物の完全情報を取得。
115
+
116
+ Parameters:
117
+ zinc_id: str — ZINC ID (e.g., "ZINC000000000001")
118
+
119
+ ToolUniverse:
120
+ ZINC_get_substance(zinc_id=zinc_id)
121
+ """
122
+ url = f"{ZINC_API}/substances/{zinc_id}.json"
123
+ resp = requests.get(url)
124
+ resp.raise_for_status()
125
+ data = resp.json()
126
+
127
+ info = {
128
+ "zinc_id": data.get("zinc_id", ""),
129
+ "name": data.get("name", ""),
130
+ "smiles": data.get("smiles", ""),
131
+ "inchikey": data.get("inchikey", ""),
132
+ "mwt": data.get("mwt", ""),
133
+ "logp": data.get("logp", ""),
134
+ "num_rotatable_bonds": data.get("num_rotatable_bonds", ""),
135
+ "num_hba": data.get("num_hba", ""),
136
+ "num_hbd": data.get("num_hbd", ""),
137
+ "tpsa": data.get("tpsa", ""),
138
+ "purchasable": data.get("purchasability", ""),
139
+ }
140
+
141
+ print(f"ZINC {zinc_id}: {info['name']} (MW={info['mwt']})")
142
+ return info, data
143
+ ```
144
+
145
+ ## 4. ZINC カタログ一覧
146
+
147
+ ```python
148
+ def zinc_get_catalogs():
149
+ """
150
+ ZINC の利用可能カタログ (ベンダー) 一覧を取得。
151
+
152
+ ToolUniverse:
153
+ ZINC_get_catalogs()
154
+ """
155
+ url = f"{ZINC_API}/catalogs.json"
156
+ resp = requests.get(url)
157
+ resp.raise_for_status()
158
+ data = resp.json()
159
+
160
+ results = []
161
+ for cat in data:
162
+ results.append({
163
+ "catalog_name": cat.get("name", ""),
164
+ "short_name": cat.get("short_name", ""),
165
+ "num_substances": cat.get("num_substances", 0),
166
+ "url": cat.get("url", ""),
167
+ })
168
+
169
+ df = pd.DataFrame(results)
170
+ print(f"ZINC catalogs: {len(df)} vendors")
171
+ return df
172
+ ```
173
+
174
+ ## 5. バーチャルスクリーニング前処理パイプライン
175
+
176
+ ```python
177
+ def virtual_screening_prep(query_smiles, lipinski=True, max_compounds=100):
178
+ """
179
+ バーチャルスクリーニング用の化合物セット準備。
180
+ Lipinski's Rule of Five フィルタリング含む。
181
+
182
+ ToolUniverse (横断):
183
+ ZINC_search_by_smiles(smiles=query_smiles) → ZINC_get_substance(zinc_id)
184
+ """
185
+ # Step 1: Similar compound search
186
+ df = zinc_search_by_smiles(query_smiles, similarity=0.6,
187
+ max_results=max_compounds)
188
+
189
+ if df.empty:
190
+ print("No similar compounds found")
191
+ return df
192
+
193
+ # Step 2: Lipinski filter
194
+ if lipinski:
195
+ df["mwt"] = pd.to_numeric(df["mwt"], errors="coerce")
196
+ df["logp"] = pd.to_numeric(df["logp"], errors="coerce")
197
+ before = len(df)
198
+ df = df[
199
+ (df["mwt"] <= 500)
200
+ & (df["logp"] <= 5)
201
+ ]
202
+ print(f"Lipinski filter: {before} → {len(df)} compounds")
203
+
204
+ # Step 3: Sort by similarity
205
+ df["similarity"] = pd.to_numeric(df["similarity"], errors="coerce")
206
+ df = df.sort_values("similarity", ascending=False)
207
+
208
+ print(f"VS prep: {len(df)} compounds ready for screening")
209
+ return df
210
+ ```
211
+
212
+ ## References
213
+
214
+ ### Output Files
215
+
216
+ | ファイル | 形式 |
217
+ |---|---|
218
+ | `results/zinc_search.csv` | CSV |
219
+ | `results/zinc_similar.csv` | CSV |
220
+ | `results/zinc_substance.json` | JSON |
221
+ | `results/zinc_catalogs.csv` | CSV |
222
+ | `results/vs_library.csv` | CSV |
223
+
224
+ ### 利用可能ツール
225
+
226
+ | カテゴリ | 主要ツール | 用途 |
227
+ |---|---|---|
228
+ | ZINC | `ZINC_search_by_name` | 化合物名検索 |
229
+ | ZINC | `ZINC_search_by_smiles` | SMILES 類似性検索 |
230
+ | ZINC | `ZINC_get_substance` | 化合物詳細 |
231
+ | ZINC | `ZINC_get_catalogs` | カタログ一覧 |
232
+
233
+ ### 参照スキル
234
+
235
+ | スキル | 関連 |
236
+ |---|---|
237
+ | `scientific-compound-similarity` | 化合物類似性 |
238
+ | `scientific-pharmacology-targets` | 薬理学ターゲット |
239
+ | `scientific-molecular-docking` | 分子ドッキング |
240
+ | `scientific-drug-target-interaction` | DTI 解析 |
241
+ | `scientific-admet-toxicity` | ADMET 毒性 |
242
+
243
+ ### 依存パッケージ
244
+
245
+ `requests`, `pandas`