@nahisaho/satori 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -29
- package/package.json +1 -1
- package/src/.github/skills/scientific-biothings-idmapping/SKILL.md +298 -0
- package/src/.github/skills/scientific-compound-screening/SKILL.md +245 -0
- package/src/.github/skills/scientific-genome-sequence-tools/SKILL.md +304 -0
- package/src/.github/skills/scientific-healthcare-ai/SKILL.md +273 -0
- package/src/.github/skills/scientific-human-protein-atlas/SKILL.md +244 -0
- package/src/.github/skills/scientific-metabolic-modeling/SKILL.md +288 -0
- package/src/.github/skills/scientific-noncoding-rna/SKILL.md +262 -0
- package/src/.github/skills/scientific-pharmacology-targets/SKILL.md +323 -0
- package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +327 -0
- package/src/.github/skills/scientific-structural-proteomics/SKILL.md +317 -0
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-metabolic-modeling
|
|
3
|
+
description: |
|
|
4
|
+
代謝モデリングスキル。BiGG Models ゲノムスケール代謝モデル、
|
|
5
|
+
BioModels SBML リポジトリを統合した代謝ネットワーク解析・
|
|
6
|
+
モデル検索パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Metabolic Modeling
|
|
10
|
+
|
|
11
|
+
BiGG Models と BioModels を活用したゲノムスケール代謝モデルの
|
|
12
|
+
検索・探索・解析パイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- ゲノムスケール代謝モデル (GEM) を検索・取得するとき
|
|
17
|
+
- BiGG Models の反応・代謝物データを調べるとき
|
|
18
|
+
- BioModels リポジトリから SBML モデルを取得するとき
|
|
19
|
+
- 代謝パスウェイのフラックス解析の準備を行うとき
|
|
20
|
+
- 複数生物種の代謝モデルを比較するとき
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
25
|
+
|
|
26
|
+
## 1. BiGG Models 検索
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
import requests
|
|
30
|
+
import pandas as pd
|
|
31
|
+
|
|
32
|
+
BIGG_API = "http://bigg.ucsd.edu/api/v2"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def bigg_search(query, search_type="models"):
|
|
36
|
+
"""
|
|
37
|
+
BiGG Models データベースを検索。
|
|
38
|
+
|
|
39
|
+
Parameters:
|
|
40
|
+
query: str — search term
|
|
41
|
+
search_type: str — "models", "reactions", "metabolites"
|
|
42
|
+
|
|
43
|
+
ToolUniverse:
|
|
44
|
+
BiGG_search(query=query, search_type=search_type)
|
|
45
|
+
BiGG_list_models()
|
|
46
|
+
"""
|
|
47
|
+
url = f"{BIGG_API}/search"
|
|
48
|
+
params = {
|
|
49
|
+
"query": query,
|
|
50
|
+
"search_type": search_type,
|
|
51
|
+
}
|
|
52
|
+
resp = requests.get(url, params=params)
|
|
53
|
+
resp.raise_for_status()
|
|
54
|
+
data = resp.json()
|
|
55
|
+
|
|
56
|
+
results = data.get("results", [])
|
|
57
|
+
df = pd.DataFrame(results)
|
|
58
|
+
print(f"BiGG search '{query}' ({search_type}): {len(df)} results")
|
|
59
|
+
return df
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## 2. BiGG モデル詳細取得
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
def bigg_get_model(model_id):
|
|
66
|
+
"""
|
|
67
|
+
BiGG Models からゲノムスケール代謝モデルの詳細を取得。
|
|
68
|
+
|
|
69
|
+
Parameters:
|
|
70
|
+
model_id: str — BiGG model ID (e.g., "iJO1366")
|
|
71
|
+
|
|
72
|
+
ToolUniverse:
|
|
73
|
+
BiGG_get_model(model_id=model_id)
|
|
74
|
+
BiGG_get_model_reactions(model_id=model_id)
|
|
75
|
+
BiGG_get_database_version()
|
|
76
|
+
"""
|
|
77
|
+
url = f"{BIGG_API}/models/{model_id}"
|
|
78
|
+
resp = requests.get(url)
|
|
79
|
+
resp.raise_for_status()
|
|
80
|
+
data = resp.json()
|
|
81
|
+
|
|
82
|
+
info = {
|
|
83
|
+
"model_id": data.get("bigg_id", ""),
|
|
84
|
+
"organism": data.get("organism", ""),
|
|
85
|
+
"genome_name": data.get("genome_name", ""),
|
|
86
|
+
"num_reactions": data.get("reaction_count", 0),
|
|
87
|
+
"num_metabolites": data.get("metabolite_count", 0),
|
|
88
|
+
"num_genes": data.get("gene_count", 0),
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
print(f"BiGG model {model_id}: {info['organism']}, "
|
|
92
|
+
f"{info['num_reactions']} reactions, "
|
|
93
|
+
f"{info['num_metabolites']} metabolites, "
|
|
94
|
+
f"{info['num_genes']} genes")
|
|
95
|
+
return info, data
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## 3. BiGG 反応・代謝物データ
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
def bigg_get_reaction(reaction_id):
|
|
102
|
+
"""
|
|
103
|
+
BiGG 反応の詳細 (反応式, 関連モデル) を取得。
|
|
104
|
+
|
|
105
|
+
ToolUniverse:
|
|
106
|
+
BiGG_get_reaction(reaction_id=reaction_id)
|
|
107
|
+
"""
|
|
108
|
+
url = f"{BIGG_API}/universal/reactions/{reaction_id}"
|
|
109
|
+
resp = requests.get(url)
|
|
110
|
+
resp.raise_for_status()
|
|
111
|
+
data = resp.json()
|
|
112
|
+
|
|
113
|
+
info = {
|
|
114
|
+
"reaction_id": data.get("bigg_id", ""),
|
|
115
|
+
"name": data.get("name", ""),
|
|
116
|
+
"reaction_string": data.get("reaction_string", ""),
|
|
117
|
+
"pseudoreaction": data.get("pseudoreaction", False),
|
|
118
|
+
"model_count": len(data.get("models_containing_reaction", [])),
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
print(f"BiGG reaction {reaction_id}: {info['name']}")
|
|
122
|
+
return info, data
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def bigg_get_metabolite(metabolite_id):
|
|
126
|
+
"""
|
|
127
|
+
BiGG 代謝物の詳細を取得。
|
|
128
|
+
|
|
129
|
+
ToolUniverse:
|
|
130
|
+
BiGG_get_metabolite(metabolite_id=metabolite_id)
|
|
131
|
+
"""
|
|
132
|
+
url = f"{BIGG_API}/universal/metabolites/{metabolite_id}"
|
|
133
|
+
resp = requests.get(url)
|
|
134
|
+
resp.raise_for_status()
|
|
135
|
+
data = resp.json()
|
|
136
|
+
|
|
137
|
+
info = {
|
|
138
|
+
"metabolite_id": data.get("bigg_id", ""),
|
|
139
|
+
"name": data.get("name", ""),
|
|
140
|
+
"formulae": data.get("formulae", []),
|
|
141
|
+
"charges": data.get("charges", []),
|
|
142
|
+
"model_count": len(data.get("models_containing_metabolite", [])),
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
print(f"BiGG metabolite {metabolite_id}: {info['name']}")
|
|
146
|
+
return info, data
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## 4. BioModels リポジトリ検索
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
BIOMODELS_API = "https://www.ebi.ac.uk/biomodels"
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def biomodels_search(query, num_results=10):
|
|
156
|
+
"""
|
|
157
|
+
BioModels (SBML) リポジトリからモデルを検索。
|
|
158
|
+
|
|
159
|
+
Parameters:
|
|
160
|
+
query: str — search term
|
|
161
|
+
|
|
162
|
+
ToolUniverse:
|
|
163
|
+
biomodels_search(query=query)
|
|
164
|
+
BioModels_search_parameters(query=query)
|
|
165
|
+
"""
|
|
166
|
+
url = f"{BIOMODELS_API}/search"
|
|
167
|
+
params = {"query": query, "numResults": num_results}
|
|
168
|
+
resp = requests.get(url, params=params)
|
|
169
|
+
resp.raise_for_status()
|
|
170
|
+
data = resp.json()
|
|
171
|
+
|
|
172
|
+
models = data.get("models", [])
|
|
173
|
+
results = []
|
|
174
|
+
for m in models:
|
|
175
|
+
results.append({
|
|
176
|
+
"model_id": m.get("id", ""),
|
|
177
|
+
"name": m.get("name", ""),
|
|
178
|
+
"format": m.get("format", {}).get("name", ""),
|
|
179
|
+
"submission_date": m.get("submissionDate", ""),
|
|
180
|
+
"publication": m.get("publication", {}).get("title", ""),
|
|
181
|
+
})
|
|
182
|
+
|
|
183
|
+
df = pd.DataFrame(results)
|
|
184
|
+
print(f"BioModels '{query}': {data.get('matches', 0)} total, "
|
|
185
|
+
f"{len(df)} returned")
|
|
186
|
+
return df
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def biomodels_get_model(model_id):
|
|
190
|
+
"""
|
|
191
|
+
BioModels モデル詳細取得。
|
|
192
|
+
|
|
193
|
+
ToolUniverse:
|
|
194
|
+
BioModels_get_model(model_id=model_id)
|
|
195
|
+
BioModels_list_files(model_id=model_id)
|
|
196
|
+
BioModels_download_model(model_id=model_id)
|
|
197
|
+
"""
|
|
198
|
+
url = f"{BIOMODELS_API}/{model_id}"
|
|
199
|
+
resp = requests.get(url, headers={"Accept": "application/json"})
|
|
200
|
+
resp.raise_for_status()
|
|
201
|
+
data = resp.json()
|
|
202
|
+
|
|
203
|
+
info = {
|
|
204
|
+
"model_id": data.get("publicationId", model_id),
|
|
205
|
+
"name": data.get("name", ""),
|
|
206
|
+
"description": data.get("description", ""),
|
|
207
|
+
"format": data.get("format", {}).get("name", ""),
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
print(f"BioModels {model_id}: {info['name']}")
|
|
211
|
+
return info, data
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## 5. 統合代謝モデル探索パイプライン
|
|
215
|
+
|
|
216
|
+
```python
|
|
217
|
+
def metabolic_model_exploration(organism_query):
|
|
218
|
+
"""
|
|
219
|
+
BiGG + BioModels を横断した代謝モデル探索。
|
|
220
|
+
|
|
221
|
+
ToolUniverse (横断):
|
|
222
|
+
BiGG_search(query=organism_query) → BiGG_get_model(model_id)
|
|
223
|
+
biomodels_search(query=organism_query) → BioModels_get_model(model_id)
|
|
224
|
+
"""
|
|
225
|
+
pipeline = {"query": organism_query}
|
|
226
|
+
|
|
227
|
+
# Step 1: BiGG search
|
|
228
|
+
bigg_df = bigg_search(organism_query, search_type="models")
|
|
229
|
+
pipeline["bigg_models"] = len(bigg_df)
|
|
230
|
+
|
|
231
|
+
if not bigg_df.empty:
|
|
232
|
+
top_model = bigg_df.iloc[0]
|
|
233
|
+
model_id = top_model.get("bigg_id", "")
|
|
234
|
+
if model_id:
|
|
235
|
+
info, _ = bigg_get_model(model_id)
|
|
236
|
+
pipeline["bigg_top_model"] = info
|
|
237
|
+
|
|
238
|
+
# Step 2: BioModels search
|
|
239
|
+
bm_df = biomodels_search(organism_query)
|
|
240
|
+
pipeline["biomodels_models"] = len(bm_df)
|
|
241
|
+
|
|
242
|
+
print(f"Metabolic models '{organism_query}': "
|
|
243
|
+
f"BiGG={pipeline['bigg_models']}, "
|
|
244
|
+
f"BioModels={pipeline['biomodels_models']}")
|
|
245
|
+
return pipeline
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
## References
|
|
249
|
+
|
|
250
|
+
### Output Files
|
|
251
|
+
|
|
252
|
+
| ファイル | 形式 |
|
|
253
|
+
|---|---|
|
|
254
|
+
| `results/bigg_search.csv` | CSV |
|
|
255
|
+
| `results/bigg_model.json` | JSON |
|
|
256
|
+
| `results/bigg_reaction.json` | JSON |
|
|
257
|
+
| `results/biomodels_search.csv` | CSV |
|
|
258
|
+
| `results/biomodels_model.json` | JSON |
|
|
259
|
+
|
|
260
|
+
### 利用可能ツール
|
|
261
|
+
|
|
262
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
263
|
+
|---|---|---|
|
|
264
|
+
| BiGG | `BiGG_search` | モデル/反応/代謝物検索 |
|
|
265
|
+
| BiGG | `BiGG_list_models` | モデル一覧 |
|
|
266
|
+
| BiGG | `BiGG_get_model` | モデル詳細 |
|
|
267
|
+
| BiGG | `BiGG_get_model_reactions` | モデル反応一覧 |
|
|
268
|
+
| BiGG | `BiGG_get_reaction` | 反応詳細 |
|
|
269
|
+
| BiGG | `BiGG_get_metabolite` | 代謝物詳細 |
|
|
270
|
+
| BiGG | `BiGG_get_database_version` | DB バージョン |
|
|
271
|
+
| BioModels | `biomodels_search` | モデル検索 |
|
|
272
|
+
| BioModels | `BioModels_get_model` | モデル詳細 |
|
|
273
|
+
| BioModels | `BioModels_list_files` | ファイル一覧 |
|
|
274
|
+
| BioModels | `BioModels_download_model` | モデル DL |
|
|
275
|
+
| BioModels | `BioModels_search_parameters` | パラメータ検索 |
|
|
276
|
+
|
|
277
|
+
### 参照スキル
|
|
278
|
+
|
|
279
|
+
| スキル | 関連 |
|
|
280
|
+
|---|---|
|
|
281
|
+
| `scientific-pathway-enrichment` | パスウェイ解析 |
|
|
282
|
+
| `scientific-systems-biology` | システム生物学 |
|
|
283
|
+
| `scientific-gene-expression-transcriptomics` | 発現データ |
|
|
284
|
+
| `scientific-biothings-idmapping` | ID マッピング |
|
|
285
|
+
|
|
286
|
+
### 依存パッケージ
|
|
287
|
+
|
|
288
|
+
`requests`, `pandas`
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-noncoding-rna
|
|
3
|
+
description: |
|
|
4
|
+
非コード RNA (ncRNA) 解析スキル。Rfam RNA ファミリー検索、
|
|
5
|
+
RNAcentral 統合 ncRNA データベース、共分散モデル、構造マッピング、
|
|
6
|
+
系統樹解析パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Noncoding RNA
|
|
10
|
+
|
|
11
|
+
Rfam および RNAcentral を活用した ncRNA ファミリー検索、
|
|
12
|
+
配列アノテーション、構造予測パイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- RNA ファミリー (miRNA, lncRNA, rRNA, tRNA 等) を分類するとき
|
|
17
|
+
- Rfam 共分散モデルで RNA 配列を検索するとき
|
|
18
|
+
- RNAcentral で ncRNA のクロスリファレンスを取得するとき
|
|
19
|
+
- RNA 二次構造・構造マッピング情報を取得するとき
|
|
20
|
+
- RNA ファミリーの系統樹情報を調べるとき
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
25
|
+
|
|
26
|
+
## 1. Rfam ファミリー検索
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
import requests
|
|
30
|
+
import pandas as pd
|
|
31
|
+
|
|
32
|
+
RFAM_API = "https://rfam.org/family"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def get_rfam_family(rfam_acc):
|
|
36
|
+
"""
|
|
37
|
+
Rfam RNA ファミリーの詳細情報を取得。
|
|
38
|
+
|
|
39
|
+
Parameters:
|
|
40
|
+
rfam_acc: str — Rfam accession (e.g., "RF00001") or ID
|
|
41
|
+
|
|
42
|
+
ToolUniverse:
|
|
43
|
+
Rfam_get_family(rfam_acc=rfam_acc)
|
|
44
|
+
Rfam_id_to_accession(rfam_id=rfam_id)
|
|
45
|
+
"""
|
|
46
|
+
url = f"https://rfam.org/family/{rfam_acc}?content-type=application/json"
|
|
47
|
+
resp = requests.get(url)
|
|
48
|
+
resp.raise_for_status()
|
|
49
|
+
data = resp.json()
|
|
50
|
+
|
|
51
|
+
info = data.get("rfam", {}).get("acc", {})
|
|
52
|
+
desc = data.get("rfam", {}).get("description", "")
|
|
53
|
+
|
|
54
|
+
print(f"Rfam {rfam_acc}: {data.get('rfam', {}).get('id', '?')}")
|
|
55
|
+
return data
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## 2. Rfam 配列検索 (Infernal cmscan)
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
import time
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def rfam_sequence_search(sequence, email=None):
|
|
65
|
+
"""
|
|
66
|
+
Rfam に RNA 配列を投入し Infernal cmscan で
|
|
67
|
+
マッチする RNA ファミリーを同定。
|
|
68
|
+
|
|
69
|
+
Parameters:
|
|
70
|
+
sequence: str — RNA sequence
|
|
71
|
+
|
|
72
|
+
ToolUniverse:
|
|
73
|
+
Rfam_search_sequence(sequence=sequence)
|
|
74
|
+
"""
|
|
75
|
+
url = "https://rfam.org/search/sequence"
|
|
76
|
+
|
|
77
|
+
payload = {
|
|
78
|
+
"seq": sequence,
|
|
79
|
+
"output": "json",
|
|
80
|
+
}
|
|
81
|
+
resp = requests.post(url, data=payload)
|
|
82
|
+
resp.raise_for_status()
|
|
83
|
+
|
|
84
|
+
# Async job → poll
|
|
85
|
+
job_url = resp.json().get("resultURL", "")
|
|
86
|
+
if not job_url:
|
|
87
|
+
return resp.json()
|
|
88
|
+
|
|
89
|
+
for _ in range(30):
|
|
90
|
+
time.sleep(10)
|
|
91
|
+
result = requests.get(job_url)
|
|
92
|
+
if result.status_code == 200:
|
|
93
|
+
data = result.json()
|
|
94
|
+
if data.get("status", "") == "DONE":
|
|
95
|
+
hits = data.get("hits", {}).get("hit", [])
|
|
96
|
+
print(f"Rfam cmscan: {len(hits)} family hits")
|
|
97
|
+
return hits
|
|
98
|
+
|
|
99
|
+
print("Rfam cmscan: timeout")
|
|
100
|
+
return []
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## 3. Rfam 構造マッピング
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
def get_rfam_structure_mapping(rfam_acc):
|
|
107
|
+
"""
|
|
108
|
+
Rfam ファミリーの PDB 構造マッピング情報を取得。
|
|
109
|
+
|
|
110
|
+
ToolUniverse:
|
|
111
|
+
Rfam_get_structure_mapping(rfam_acc=rfam_acc)
|
|
112
|
+
Rfam_get_covariance_model(rfam_acc=rfam_acc)
|
|
113
|
+
Rfam_get_tree_data(rfam_acc=rfam_acc)
|
|
114
|
+
Rfam_get_sequence_regions(rfam_acc=rfam_acc)
|
|
115
|
+
"""
|
|
116
|
+
# Structure mapping
|
|
117
|
+
url_struct = (
|
|
118
|
+
f"https://rfam.org/family/{rfam_acc}/structures"
|
|
119
|
+
"?content-type=application/json"
|
|
120
|
+
)
|
|
121
|
+
resp_s = requests.get(url_struct)
|
|
122
|
+
structures = resp_s.json() if resp_s.status_code == 200 else []
|
|
123
|
+
|
|
124
|
+
# Sequence regions
|
|
125
|
+
url_regions = (
|
|
126
|
+
f"https://rfam.org/family/{rfam_acc}/regions"
|
|
127
|
+
"?content-type=application/json"
|
|
128
|
+
)
|
|
129
|
+
resp_r = requests.get(url_regions)
|
|
130
|
+
regions = resp_r.json() if resp_r.status_code == 200 else []
|
|
131
|
+
|
|
132
|
+
print(f"Rfam {rfam_acc}: {len(structures)} PDB structures, "
|
|
133
|
+
f"{len(regions) if isinstance(regions, list) else '?'} regions")
|
|
134
|
+
return structures, regions
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## 4. RNAcentral ncRNA 検索
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
RNACENTRAL_API = "https://rnacentral.org/api/v1"
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def rnacentral_search(query, page_size=10):
|
|
144
|
+
"""
|
|
145
|
+
RNAcentral で ncRNA を検索。
|
|
146
|
+
|
|
147
|
+
Parameters:
|
|
148
|
+
query: str — search term (gene name, accession, keyword)
|
|
149
|
+
|
|
150
|
+
ToolUniverse:
|
|
151
|
+
RNAcentral_search(query=query)
|
|
152
|
+
"""
|
|
153
|
+
url = f"{RNACENTRAL_API}/rna/"
|
|
154
|
+
params = {"query": query, "page_size": page_size}
|
|
155
|
+
resp = requests.get(url, params=params)
|
|
156
|
+
resp.raise_for_status()
|
|
157
|
+
data = resp.json()
|
|
158
|
+
|
|
159
|
+
results = data.get("results", [])
|
|
160
|
+
entries = []
|
|
161
|
+
for r in results:
|
|
162
|
+
entries.append({
|
|
163
|
+
"rnacentral_id": r.get("rnacentral_id", ""),
|
|
164
|
+
"description": r.get("description", ""),
|
|
165
|
+
"rna_type": r.get("rna_type", ""),
|
|
166
|
+
"length": r.get("length", 0),
|
|
167
|
+
"num_xrefs": r.get("xref_count", 0),
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
df = pd.DataFrame(entries)
|
|
171
|
+
print(f"RNAcentral '{query}': {data.get('count', 0)} total, "
|
|
172
|
+
f"{len(df)} returned")
|
|
173
|
+
return df
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def rnacentral_get_by_accession(accession):
|
|
177
|
+
"""
|
|
178
|
+
RNAcentral アクセッションから ncRNA 詳細情報を取得。
|
|
179
|
+
|
|
180
|
+
ToolUniverse:
|
|
181
|
+
RNAcentral_get_by_accession(accession=accession)
|
|
182
|
+
"""
|
|
183
|
+
url = f"{RNACENTRAL_API}/rna/{accession}/"
|
|
184
|
+
resp = requests.get(url)
|
|
185
|
+
resp.raise_for_status()
|
|
186
|
+
data = resp.json()
|
|
187
|
+
|
|
188
|
+
print(f"RNAcentral {accession}: {data.get('description', '')}")
|
|
189
|
+
return data
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## 5. ncRNA 統合解析パイプライン
|
|
193
|
+
|
|
194
|
+
```python
|
|
195
|
+
def ncRNA_integrated_search(sequence, rfam_acc=None):
|
|
196
|
+
"""
|
|
197
|
+
配列ベースの ncRNA 統合解析。
|
|
198
|
+
|
|
199
|
+
ToolUniverse (横断):
|
|
200
|
+
Rfam_search_sequence(sequence) → Rfam_get_family(rfam_acc)
|
|
201
|
+
RNAcentral_search(query)
|
|
202
|
+
"""
|
|
203
|
+
pipeline = {"sequence_length": len(sequence)}
|
|
204
|
+
|
|
205
|
+
# Step 1: Rfam family identification
|
|
206
|
+
rfam_hits = rfam_sequence_search(sequence)
|
|
207
|
+
pipeline["rfam_hits"] = len(rfam_hits) if isinstance(rfam_hits, list) else 0
|
|
208
|
+
|
|
209
|
+
# Step 2: If Rfam family found, get details
|
|
210
|
+
if rfam_hits and isinstance(rfam_hits, list) and len(rfam_hits) > 0:
|
|
211
|
+
top_hit = rfam_hits[0]
|
|
212
|
+
top_acc = top_hit.get("acc", rfam_acc or "")
|
|
213
|
+
if top_acc:
|
|
214
|
+
family = get_rfam_family(top_acc)
|
|
215
|
+
pipeline["rfam_family"] = top_acc
|
|
216
|
+
|
|
217
|
+
# Step 3: RNAcentral search
|
|
218
|
+
rna_df = rnacentral_search(sequence[:30]) # truncate for search
|
|
219
|
+
pipeline["rnacentral_hits"] = len(rna_df)
|
|
220
|
+
|
|
221
|
+
print(f"ncRNA pipeline: Rfam={pipeline.get('rfam_family', 'none')}, "
|
|
222
|
+
f"RNAcentral={pipeline['rnacentral_hits']} hits")
|
|
223
|
+
return pipeline
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
## References
|
|
227
|
+
|
|
228
|
+
### Output Files
|
|
229
|
+
|
|
230
|
+
| ファイル | 形式 |
|
|
231
|
+
|---|---|
|
|
232
|
+
| `results/rfam_family.json` | JSON |
|
|
233
|
+
| `results/rfam_cmscan_hits.json` | JSON |
|
|
234
|
+
| `results/rfam_structures.json` | JSON |
|
|
235
|
+
| `results/rnacentral_search.csv` | CSV |
|
|
236
|
+
|
|
237
|
+
### 利用可能ツール
|
|
238
|
+
|
|
239
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
240
|
+
|---|---|---|
|
|
241
|
+
| Rfam | `Rfam_get_family` | ファミリー情報 |
|
|
242
|
+
| Rfam | `Rfam_search_sequence` | 配列→ファミリー同定 |
|
|
243
|
+
| Rfam | `Rfam_get_covariance_model` | 共分散モデル |
|
|
244
|
+
| Rfam | `Rfam_get_structure_mapping` | PDB マッピング |
|
|
245
|
+
| Rfam | `Rfam_get_tree_data` | 系統樹 |
|
|
246
|
+
| Rfam | `Rfam_get_sequence_regions` | 配列領域 |
|
|
247
|
+
| Rfam | `Rfam_id_to_accession` | ID→アクセッション変換 |
|
|
248
|
+
| RNAcentral | `RNAcentral_search` | ncRNA 検索 |
|
|
249
|
+
| RNAcentral | `RNAcentral_get_by_accession` | 詳細取得 |
|
|
250
|
+
|
|
251
|
+
### 参照スキル
|
|
252
|
+
|
|
253
|
+
| スキル | 関連 |
|
|
254
|
+
|---|---|
|
|
255
|
+
| `scientific-gene-expression-transcriptomics` | 転写産物解析 |
|
|
256
|
+
| `scientific-genome-sequence-tools` | 配列取得 |
|
|
257
|
+
| `scientific-structural-proteomics` | RNA 構造 |
|
|
258
|
+
| `scientific-biothings-idmapping` | ID マッピング |
|
|
259
|
+
|
|
260
|
+
### 依存パッケージ
|
|
261
|
+
|
|
262
|
+
`requests`, `pandas`
|