@nahisaho/satori 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,288 @@
1
+ ---
2
+ name: scientific-metabolic-modeling
3
+ description: |
4
+ 代謝モデリングスキル。BiGG Models ゲノムスケール代謝モデル、
5
+ BioModels SBML リポジトリを統合した代謝ネットワーク解析・
6
+ モデル検索パイプライン。
7
+ ---
8
+
9
+ # Scientific Metabolic Modeling
10
+
11
+ BiGG Models と BioModels を活用したゲノムスケール代謝モデルの
12
+ 検索・探索・解析パイプラインを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - ゲノムスケール代謝モデル (GEM) を検索・取得するとき
17
+ - BiGG Models の反応・代謝物データを調べるとき
18
+ - BioModels リポジトリから SBML モデルを取得するとき
19
+ - 代謝パスウェイのフラックス解析の準備を行うとき
20
+ - 複数生物種の代謝モデルを比較するとき
21
+
22
+ ---
23
+
24
+ ## Quick Start
25
+
26
+ ## 1. BiGG Models 検索
27
+
28
+ ```python
29
+ import requests
30
+ import pandas as pd
31
+
32
+ BIGG_API = "http://bigg.ucsd.edu/api/v2"
33
+
34
+
35
+ def bigg_search(query, search_type="models"):
36
+ """
37
+ BiGG Models データベースを検索。
38
+
39
+ Parameters:
40
+ query: str — search term
41
+ search_type: str — "models", "reactions", "metabolites"
42
+
43
+ ToolUniverse:
44
+ BiGG_search(query=query, search_type=search_type)
45
+ BiGG_list_models()
46
+ """
47
+ url = f"{BIGG_API}/search"
48
+ params = {
49
+ "query": query,
50
+ "search_type": search_type,
51
+ }
52
+ resp = requests.get(url, params=params)
53
+ resp.raise_for_status()
54
+ data = resp.json()
55
+
56
+ results = data.get("results", [])
57
+ df = pd.DataFrame(results)
58
+ print(f"BiGG search '{query}' ({search_type}): {len(df)} results")
59
+ return df
60
+ ```
61
+
62
+ ## 2. BiGG モデル詳細取得
63
+
64
+ ```python
65
+ def bigg_get_model(model_id):
66
+ """
67
+ BiGG Models からゲノムスケール代謝モデルの詳細を取得。
68
+
69
+ Parameters:
70
+ model_id: str — BiGG model ID (e.g., "iJO1366")
71
+
72
+ ToolUniverse:
73
+ BiGG_get_model(model_id=model_id)
74
+ BiGG_get_model_reactions(model_id=model_id)
75
+ BiGG_get_database_version()
76
+ """
77
+ url = f"{BIGG_API}/models/{model_id}"
78
+ resp = requests.get(url)
79
+ resp.raise_for_status()
80
+ data = resp.json()
81
+
82
+ info = {
83
+ "model_id": data.get("bigg_id", ""),
84
+ "organism": data.get("organism", ""),
85
+ "genome_name": data.get("genome_name", ""),
86
+ "num_reactions": data.get("reaction_count", 0),
87
+ "num_metabolites": data.get("metabolite_count", 0),
88
+ "num_genes": data.get("gene_count", 0),
89
+ }
90
+
91
+ print(f"BiGG model {model_id}: {info['organism']}, "
92
+ f"{info['num_reactions']} reactions, "
93
+ f"{info['num_metabolites']} metabolites, "
94
+ f"{info['num_genes']} genes")
95
+ return info, data
96
+ ```
97
+
98
+ ## 3. BiGG 反応・代謝物データ
99
+
100
+ ```python
101
+ def bigg_get_reaction(reaction_id):
102
+ """
103
+ BiGG 反応の詳細 (反応式, 関連モデル) を取得。
104
+
105
+ ToolUniverse:
106
+ BiGG_get_reaction(reaction_id=reaction_id)
107
+ """
108
+ url = f"{BIGG_API}/universal/reactions/{reaction_id}"
109
+ resp = requests.get(url)
110
+ resp.raise_for_status()
111
+ data = resp.json()
112
+
113
+ info = {
114
+ "reaction_id": data.get("bigg_id", ""),
115
+ "name": data.get("name", ""),
116
+ "reaction_string": data.get("reaction_string", ""),
117
+ "pseudoreaction": data.get("pseudoreaction", False),
118
+ "model_count": len(data.get("models_containing_reaction", [])),
119
+ }
120
+
121
+ print(f"BiGG reaction {reaction_id}: {info['name']}")
122
+ return info, data
123
+
124
+
125
+ def bigg_get_metabolite(metabolite_id):
126
+ """
127
+ BiGG 代謝物の詳細を取得。
128
+
129
+ ToolUniverse:
130
+ BiGG_get_metabolite(metabolite_id=metabolite_id)
131
+ """
132
+ url = f"{BIGG_API}/universal/metabolites/{metabolite_id}"
133
+ resp = requests.get(url)
134
+ resp.raise_for_status()
135
+ data = resp.json()
136
+
137
+ info = {
138
+ "metabolite_id": data.get("bigg_id", ""),
139
+ "name": data.get("name", ""),
140
+ "formulae": data.get("formulae", []),
141
+ "charges": data.get("charges", []),
142
+ "model_count": len(data.get("models_containing_metabolite", [])),
143
+ }
144
+
145
+ print(f"BiGG metabolite {metabolite_id}: {info['name']}")
146
+ return info, data
147
+ ```
148
+
149
+ ## 4. BioModels リポジトリ検索
150
+
151
+ ```python
152
+ BIOMODELS_API = "https://www.ebi.ac.uk/biomodels"
153
+
154
+
155
+ def biomodels_search(query, num_results=10):
156
+ """
157
+ BioModels (SBML) リポジトリからモデルを検索。
158
+
159
+ Parameters:
160
+ query: str — search term
161
+
162
+ ToolUniverse:
163
+ biomodels_search(query=query)
164
+ BioModels_search_parameters(query=query)
165
+ """
166
+ url = f"{BIOMODELS_API}/search"
167
+ params = {"query": query, "numResults": num_results}
168
+ resp = requests.get(url, params=params)
169
+ resp.raise_for_status()
170
+ data = resp.json()
171
+
172
+ models = data.get("models", [])
173
+ results = []
174
+ for m in models:
175
+ results.append({
176
+ "model_id": m.get("id", ""),
177
+ "name": m.get("name", ""),
178
+ "format": m.get("format", {}).get("name", ""),
179
+ "submission_date": m.get("submissionDate", ""),
180
+ "publication": m.get("publication", {}).get("title", ""),
181
+ })
182
+
183
+ df = pd.DataFrame(results)
184
+ print(f"BioModels '{query}': {data.get('matches', 0)} total, "
185
+ f"{len(df)} returned")
186
+ return df
187
+
188
+
189
+ def biomodels_get_model(model_id):
190
+ """
191
+ BioModels モデル詳細取得。
192
+
193
+ ToolUniverse:
194
+ BioModels_get_model(model_id=model_id)
195
+ BioModels_list_files(model_id=model_id)
196
+ BioModels_download_model(model_id=model_id)
197
+ """
198
+ url = f"{BIOMODELS_API}/{model_id}"
199
+ resp = requests.get(url, headers={"Accept": "application/json"})
200
+ resp.raise_for_status()
201
+ data = resp.json()
202
+
203
+ info = {
204
+ "model_id": data.get("publicationId", model_id),
205
+ "name": data.get("name", ""),
206
+ "description": data.get("description", ""),
207
+ "format": data.get("format", {}).get("name", ""),
208
+ }
209
+
210
+ print(f"BioModels {model_id}: {info['name']}")
211
+ return info, data
212
+ ```
213
+
214
+ ## 5. 統合代謝モデル探索パイプライン
215
+
216
+ ```python
217
+ def metabolic_model_exploration(organism_query):
218
+ """
219
+ BiGG + BioModels を横断した代謝モデル探索。
220
+
221
+ ToolUniverse (横断):
222
+ BiGG_search(query=organism_query) → BiGG_get_model(model_id)
223
+ biomodels_search(query=organism_query) → BioModels_get_model(model_id)
224
+ """
225
+ pipeline = {"query": organism_query}
226
+
227
+ # Step 1: BiGG search
228
+ bigg_df = bigg_search(organism_query, search_type="models")
229
+ pipeline["bigg_models"] = len(bigg_df)
230
+
231
+ if not bigg_df.empty:
232
+ top_model = bigg_df.iloc[0]
233
+ model_id = top_model.get("bigg_id", "")
234
+ if model_id:
235
+ info, _ = bigg_get_model(model_id)
236
+ pipeline["bigg_top_model"] = info
237
+
238
+ # Step 2: BioModels search
239
+ bm_df = biomodels_search(organism_query)
240
+ pipeline["biomodels_models"] = len(bm_df)
241
+
242
+ print(f"Metabolic models '{organism_query}': "
243
+ f"BiGG={pipeline['bigg_models']}, "
244
+ f"BioModels={pipeline['biomodels_models']}")
245
+ return pipeline
246
+ ```
247
+
248
+ ## References
249
+
250
+ ### Output Files
251
+
252
+ | ファイル | 形式 |
253
+ |---|---|
254
+ | `results/bigg_search.csv` | CSV |
255
+ | `results/bigg_model.json` | JSON |
256
+ | `results/bigg_reaction.json` | JSON |
257
+ | `results/biomodels_search.csv` | CSV |
258
+ | `results/biomodels_model.json` | JSON |
259
+
260
+ ### 利用可能ツール
261
+
262
+ | カテゴリ | 主要ツール | 用途 |
263
+ |---|---|---|
264
+ | BiGG | `BiGG_search` | モデル/反応/代謝物検索 |
265
+ | BiGG | `BiGG_list_models` | モデル一覧 |
266
+ | BiGG | `BiGG_get_model` | モデル詳細 |
267
+ | BiGG | `BiGG_get_model_reactions` | モデル反応一覧 |
268
+ | BiGG | `BiGG_get_reaction` | 反応詳細 |
269
+ | BiGG | `BiGG_get_metabolite` | 代謝物詳細 |
270
+ | BiGG | `BiGG_get_database_version` | DB バージョン |
271
+ | BioModels | `biomodels_search` | モデル検索 |
272
+ | BioModels | `BioModels_get_model` | モデル詳細 |
273
+ | BioModels | `BioModels_list_files` | ファイル一覧 |
274
+ | BioModels | `BioModels_download_model` | モデル DL |
275
+ | BioModels | `BioModels_search_parameters` | パラメータ検索 |
276
+
277
+ ### 参照スキル
278
+
279
+ | スキル | 関連 |
280
+ |---|---|
281
+ | `scientific-pathway-enrichment` | パスウェイ解析 |
282
+ | `scientific-systems-biology` | システム生物学 |
283
+ | `scientific-gene-expression-transcriptomics` | 発現データ |
284
+ | `scientific-biothings-idmapping` | ID マッピング |
285
+
286
+ ### 依存パッケージ
287
+
288
+ `requests`, `pandas`
@@ -0,0 +1,262 @@
1
+ ---
2
+ name: scientific-noncoding-rna
3
+ description: |
4
+ 非コード RNA (ncRNA) 解析スキル。Rfam RNA ファミリー検索、
5
+ RNAcentral 統合 ncRNA データベース、共分散モデル、構造マッピング、
6
+ 系統樹解析パイプライン。
7
+ ---
8
+
9
+ # Scientific Noncoding RNA
10
+
11
+ Rfam および RNAcentral を活用した ncRNA ファミリー検索、
12
+ 配列アノテーション、構造予測パイプラインを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - RNA ファミリー (miRNA, lncRNA, rRNA, tRNA 等) を分類するとき
17
+ - Rfam 共分散モデルで RNA 配列を検索するとき
18
+ - RNAcentral で ncRNA のクロスリファレンスを取得するとき
19
+ - RNA 二次構造・構造マッピング情報を取得するとき
20
+ - RNA ファミリーの系統樹情報を調べるとき
21
+
22
+ ---
23
+
24
+ ## Quick Start
25
+
26
+ ## 1. Rfam ファミリー検索
27
+
28
+ ```python
29
+ import requests
30
+ import pandas as pd
31
+
32
+ RFAM_API = "https://rfam.org/family"
33
+
34
+
35
+ def get_rfam_family(rfam_acc):
36
+ """
37
+ Rfam RNA ファミリーの詳細情報を取得。
38
+
39
+ Parameters:
40
+ rfam_acc: str — Rfam accession (e.g., "RF00001") or ID
41
+
42
+ ToolUniverse:
43
+ Rfam_get_family(rfam_acc=rfam_acc)
44
+ Rfam_id_to_accession(rfam_id=rfam_id)
45
+ """
46
+ url = f"https://rfam.org/family/{rfam_acc}?content-type=application/json"
47
+ resp = requests.get(url)
48
+ resp.raise_for_status()
49
+ data = resp.json()
50
+
51
+ info = data.get("rfam", {}).get("acc", {})
52
+ desc = data.get("rfam", {}).get("description", "")
53
+
54
+ print(f"Rfam {rfam_acc}: {data.get('rfam', {}).get('id', '?')}")
55
+ return data
56
+ ```
57
+
58
+ ## 2. Rfam 配列検索 (Infernal cmscan)
59
+
60
+ ```python
61
+ import time
62
+
63
+
64
+ def rfam_sequence_search(sequence, email=None):
65
+ """
66
+ Rfam に RNA 配列を投入し Infernal cmscan で
67
+ マッチする RNA ファミリーを同定。
68
+
69
+ Parameters:
70
+ sequence: str — RNA sequence
71
+
72
+ ToolUniverse:
73
+ Rfam_search_sequence(sequence=sequence)
74
+ """
75
+ url = "https://rfam.org/search/sequence"
76
+
77
+ payload = {
78
+ "seq": sequence,
79
+ "output": "json",
80
+ }
81
+ resp = requests.post(url, data=payload)
82
+ resp.raise_for_status()
83
+
84
+ # Async job → poll
85
+ job_url = resp.json().get("resultURL", "")
86
+ if not job_url:
87
+ return resp.json()
88
+
89
+ for _ in range(30):
90
+ time.sleep(10)
91
+ result = requests.get(job_url)
92
+ if result.status_code == 200:
93
+ data = result.json()
94
+ if data.get("status", "") == "DONE":
95
+ hits = data.get("hits", {}).get("hit", [])
96
+ print(f"Rfam cmscan: {len(hits)} family hits")
97
+ return hits
98
+
99
+ print("Rfam cmscan: timeout")
100
+ return []
101
+ ```
102
+
103
+ ## 3. Rfam 構造マッピング
104
+
105
+ ```python
106
+ def get_rfam_structure_mapping(rfam_acc):
107
+ """
108
+ Rfam ファミリーの PDB 構造マッピング情報を取得。
109
+
110
+ ToolUniverse:
111
+ Rfam_get_structure_mapping(rfam_acc=rfam_acc)
112
+ Rfam_get_covariance_model(rfam_acc=rfam_acc)
113
+ Rfam_get_tree_data(rfam_acc=rfam_acc)
114
+ Rfam_get_sequence_regions(rfam_acc=rfam_acc)
115
+ """
116
+ # Structure mapping
117
+ url_struct = (
118
+ f"https://rfam.org/family/{rfam_acc}/structures"
119
+ "?content-type=application/json"
120
+ )
121
+ resp_s = requests.get(url_struct)
122
+ structures = resp_s.json() if resp_s.status_code == 200 else []
123
+
124
+ # Sequence regions
125
+ url_regions = (
126
+ f"https://rfam.org/family/{rfam_acc}/regions"
127
+ "?content-type=application/json"
128
+ )
129
+ resp_r = requests.get(url_regions)
130
+ regions = resp_r.json() if resp_r.status_code == 200 else []
131
+
132
+ print(f"Rfam {rfam_acc}: {len(structures)} PDB structures, "
133
+ f"{len(regions) if isinstance(regions, list) else '?'} regions")
134
+ return structures, regions
135
+ ```
136
+
137
+ ## 4. RNAcentral ncRNA 検索
138
+
139
+ ```python
140
+ RNACENTRAL_API = "https://rnacentral.org/api/v1"
141
+
142
+
143
+ def rnacentral_search(query, page_size=10):
144
+ """
145
+ RNAcentral で ncRNA を検索。
146
+
147
+ Parameters:
148
+ query: str — search term (gene name, accession, keyword)
149
+
150
+ ToolUniverse:
151
+ RNAcentral_search(query=query)
152
+ """
153
+ url = f"{RNACENTRAL_API}/rna/"
154
+ params = {"query": query, "page_size": page_size}
155
+ resp = requests.get(url, params=params)
156
+ resp.raise_for_status()
157
+ data = resp.json()
158
+
159
+ results = data.get("results", [])
160
+ entries = []
161
+ for r in results:
162
+ entries.append({
163
+ "rnacentral_id": r.get("rnacentral_id", ""),
164
+ "description": r.get("description", ""),
165
+ "rna_type": r.get("rna_type", ""),
166
+ "length": r.get("length", 0),
167
+ "num_xrefs": r.get("xref_count", 0),
168
+ })
169
+
170
+ df = pd.DataFrame(entries)
171
+ print(f"RNAcentral '{query}': {data.get('count', 0)} total, "
172
+ f"{len(df)} returned")
173
+ return df
174
+
175
+
176
+ def rnacentral_get_by_accession(accession):
177
+ """
178
+ RNAcentral アクセッションから ncRNA 詳細情報を取得。
179
+
180
+ ToolUniverse:
181
+ RNAcentral_get_by_accession(accession=accession)
182
+ """
183
+ url = f"{RNACENTRAL_API}/rna/{accession}/"
184
+ resp = requests.get(url)
185
+ resp.raise_for_status()
186
+ data = resp.json()
187
+
188
+ print(f"RNAcentral {accession}: {data.get('description', '')}")
189
+ return data
190
+ ```
191
+
192
+ ## 5. ncRNA 統合解析パイプライン
193
+
194
+ ```python
195
+ def ncRNA_integrated_search(sequence, rfam_acc=None):
196
+ """
197
+ 配列ベースの ncRNA 統合解析。
198
+
199
+ ToolUniverse (横断):
200
+ Rfam_search_sequence(sequence) → Rfam_get_family(rfam_acc)
201
+ RNAcentral_search(query)
202
+ """
203
+ pipeline = {"sequence_length": len(sequence)}
204
+
205
+ # Step 1: Rfam family identification
206
+ rfam_hits = rfam_sequence_search(sequence)
207
+ pipeline["rfam_hits"] = len(rfam_hits) if isinstance(rfam_hits, list) else 0
208
+
209
+ # Step 2: If Rfam family found, get details
210
+ if rfam_hits and isinstance(rfam_hits, list) and len(rfam_hits) > 0:
211
+ top_hit = rfam_hits[0]
212
+ top_acc = top_hit.get("acc", rfam_acc or "")
213
+ if top_acc:
214
+ family = get_rfam_family(top_acc)
215
+ pipeline["rfam_family"] = top_acc
216
+
217
+ # Step 3: RNAcentral search
218
+ rna_df = rnacentral_search(sequence[:30]) # truncate for search
219
+ pipeline["rnacentral_hits"] = len(rna_df)
220
+
221
+ print(f"ncRNA pipeline: Rfam={pipeline.get('rfam_family', 'none')}, "
222
+ f"RNAcentral={pipeline['rnacentral_hits']} hits")
223
+ return pipeline
224
+ ```
225
+
226
+ ## References
227
+
228
+ ### Output Files
229
+
230
+ | ファイル | 形式 |
231
+ |---|---|
232
+ | `results/rfam_family.json` | JSON |
233
+ | `results/rfam_cmscan_hits.json` | JSON |
234
+ | `results/rfam_structures.json` | JSON |
235
+ | `results/rnacentral_search.csv` | CSV |
236
+
237
+ ### 利用可能ツール
238
+
239
+ | カテゴリ | 主要ツール | 用途 |
240
+ |---|---|---|
241
+ | Rfam | `Rfam_get_family` | ファミリー情報 |
242
+ | Rfam | `Rfam_search_sequence` | 配列→ファミリー同定 |
243
+ | Rfam | `Rfam_get_covariance_model` | 共分散モデル |
244
+ | Rfam | `Rfam_get_structure_mapping` | PDB マッピング |
245
+ | Rfam | `Rfam_get_tree_data` | 系統樹 |
246
+ | Rfam | `Rfam_get_sequence_regions` | 配列領域 |
247
+ | Rfam | `Rfam_id_to_accession` | ID→アクセッション変換 |
248
+ | RNAcentral | `RNAcentral_search` | ncRNA 検索 |
249
+ | RNAcentral | `RNAcentral_get_by_accession` | 詳細取得 |
250
+
251
+ ### 参照スキル
252
+
253
+ | スキル | 関連 |
254
+ |---|---|
255
+ | `scientific-gene-expression-transcriptomics` | 転写産物解析 |
256
+ | `scientific-genome-sequence-tools` | 配列取得 |
257
+ | `scientific-structural-proteomics` | RNA 構造 |
258
+ | `scientific-biothings-idmapping` | ID マッピング |
259
+
260
+ ### 依存パッケージ
261
+
262
+ `requests`, `pandas`