react-msaview-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +178 -0
- package/package.json +30 -0
- package/src/ebi-api.ts +112 -0
- package/src/index.ts +108 -0
- package/src/interproscan-msa.ts +74 -0
- package/src/local-runner.ts +87 -0
package/README.md
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# @react-msaview/cli
|
|
2
|
+
|
|
3
|
+
Command-line tools for [react-msaview](../) (JBrowseMSA), including batch
|
|
4
|
+
InterProScan processing for multiple sequence alignments.
|
|
5
|
+
|
|
6
|
+
Uses [msa-parsers](../msa-parsers/) for file format support.
|
|
7
|
+
|
|
8
|
+
## Installation
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
# From the monorepo root
|
|
12
|
+
yarn install
|
|
13
|
+
yarn workspace @react-msaview/cli build
|
|
14
|
+
|
|
15
|
+
# Or install globally (after publishing)
|
|
16
|
+
npm install -g @react-msaview/cli
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Commands
|
|
20
|
+
|
|
21
|
+
### interproscan
|
|
22
|
+
|
|
23
|
+
Run InterProScan on all sequences in an MSA file and output results as GFF3.
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
react-msaview-cli interproscan <input-msa> [options]
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
#### Options
|
|
30
|
+
|
|
31
|
+
| Option | Description | Default |
|
|
32
|
+
| ---------------------------- | ------------------------------------------------------ | ------------------ |
|
|
33
|
+
| `-o, --output <file>` | Output GFF file path | `domains.gff` |
|
|
34
|
+
| `--local` | Use local InterProScan installation instead of EBI API | `false` |
|
|
35
|
+
| `--interproscan-path <path>` | Path to local interproscan.sh | `interproscan.sh` |
|
|
36
|
+
| `--programs <list>` | Comma-separated list of InterProScan programs | `Pfam` |
|
|
37
|
+
| `--email <email>` | Email for EBI API (required for EBI API usage) | `user@example.com` |
|
|
38
|
+
| `--batch-size <n>` | Number of sequences per API batch | `30` |
|
|
39
|
+
| `-h, --help` | Show help message | |
|
|
40
|
+
|
|
41
|
+
#### Supported MSA Formats
|
|
42
|
+
|
|
43
|
+
The CLI automatically detects the input format:
|
|
44
|
+
|
|
45
|
+
- **FASTA** (`.fasta`, `.fa`, `.faa`)
|
|
46
|
+
- **Clustal** (`.clustal`, `.aln`)
|
|
47
|
+
- **Stockholm** (`.sto`, `.stockholm`)
|
|
48
|
+
- **A3M** (`.a3m`) - AlphaFold/ColabFold format
|
|
49
|
+
- **EMF** (`.emf`) - Ensembl Multi Format
|
|
50
|
+
|
|
51
|
+
#### Available InterProScan Programs
|
|
52
|
+
|
|
53
|
+
When using `--programs`, you can specify any combination of:
|
|
54
|
+
|
|
55
|
+
- `Pfam` - Protein families (default)
|
|
56
|
+
- `SMART` - Simple Modular Architecture Research Tool
|
|
57
|
+
- `SUPERFAMILY` - Structural and functional annotation
|
|
58
|
+
- `Gene3D` - Structural domain assignments
|
|
59
|
+
- `CDD` - Conserved Domain Database
|
|
60
|
+
- `PANTHER` - Protein Analysis Through Evolutionary Relationships
|
|
61
|
+
- `TIGRFAM` - TIGR protein families
|
|
62
|
+
- `Hamap` - High-quality Automated Annotation of Microbial Proteomes
|
|
63
|
+
- `ProSiteProfiles` - PROSITE profiles
|
|
64
|
+
- `ProSitePatterns` - PROSITE patterns
|
|
65
|
+
- `PRINTS` - Protein fingerprints
|
|
66
|
+
- `PIRSF` - PIR SuperFamily
|
|
67
|
+
- `MobiDBLite` - Disorder prediction
|
|
68
|
+
|
|
69
|
+
## Examples
|
|
70
|
+
|
|
71
|
+
### Using EBI API (recommended for small datasets)
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# Basic usage - runs Pfam analysis
|
|
75
|
+
react-msaview-cli interproscan alignment.fasta -o domains.gff --email your@email.com
|
|
76
|
+
|
|
77
|
+
# Multiple programs
|
|
78
|
+
react-msaview-cli interproscan alignment.fasta -o domains.gff \
|
|
79
|
+
--programs Pfam,SMART,Gene3D \
|
|
80
|
+
--email your@email.com
|
|
81
|
+
|
|
82
|
+
# Smaller batch size for large sequences
|
|
83
|
+
react-msaview-cli interproscan large_proteins.fasta -o domains.gff \
|
|
84
|
+
--batch-size 10 \
|
|
85
|
+
--email your@email.com
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Using Local InterProScan
|
|
89
|
+
|
|
90
|
+
For large datasets or frequent usage, install InterProScan locally:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
# With interproscan.sh in PATH
|
|
94
|
+
react-msaview-cli interproscan alignment.fasta -o domains.gff --local
|
|
95
|
+
|
|
96
|
+
# With custom path
|
|
97
|
+
react-msaview-cli interproscan alignment.fasta -o domains.gff \
|
|
98
|
+
--local \
|
|
99
|
+
--interproscan-path /opt/interproscan/interproscan.sh
|
|
100
|
+
|
|
101
|
+
# With specific programs
|
|
102
|
+
react-msaview-cli interproscan alignment.fasta -o domains.gff \
|
|
103
|
+
--local \
|
|
104
|
+
--programs Pfam,SMART
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Different Input Formats
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
# Clustal format
|
|
111
|
+
react-msaview-cli interproscan alignment.clustal -o domains.gff
|
|
112
|
+
|
|
113
|
+
# Stockholm format
|
|
114
|
+
react-msaview-cli interproscan PF00001.stockholm -o domains.gff
|
|
115
|
+
|
|
116
|
+
# A3M format (from ColabFold/AlphaFold)
|
|
117
|
+
react-msaview-cli interproscan colabfold.a3m -o domains.gff
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Output Format
|
|
121
|
+
|
|
122
|
+
The output is standard GFF3 format compatible with react-msaview:
|
|
123
|
+
|
|
124
|
+
```gff
|
|
125
|
+
##gff-version 3
|
|
126
|
+
seq1 InterProScan protein_match 10 150 . . . Name=PF00001;signature_desc=7tm_1;description=7 transmembrane receptor (rhodopsin family)
|
|
127
|
+
seq1 InterProScan protein_match 200 350 . . . Name=PF00002;signature_desc=7tm_2;description=7 transmembrane receptor (Secretin family)
|
|
128
|
+
seq2 InterProScan protein_match 5 120 . . . Name=PF00001;signature_desc=7tm_1;description=7 transmembrane receptor (rhodopsin family)
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Loading Results in react-msaview
|
|
132
|
+
|
|
133
|
+
After generating the GFF file, you can load it in react-msaview:
|
|
134
|
+
|
|
135
|
+
1. Open your MSA file in react-msaview
|
|
136
|
+
2. Go to **Menu > Open domains...**
|
|
137
|
+
3. Select the generated GFF file
|
|
138
|
+
4. Domains will appear as colored boxes on the alignment
|
|
139
|
+
|
|
140
|
+
## Troubleshooting
|
|
141
|
+
|
|
142
|
+
### EBI API Timeout
|
|
143
|
+
|
|
144
|
+
If you get timeout errors with the EBI API:
|
|
145
|
+
|
|
146
|
+
- Reduce `--batch-size` (try 10-20 for large proteins)
|
|
147
|
+
- Use `--local` with a local InterProScan installation
|
|
148
|
+
- Check your internet connection
|
|
149
|
+
|
|
150
|
+
### Local InterProScan Not Found
|
|
151
|
+
|
|
152
|
+
```
|
|
153
|
+
Error: Failed to run InterProScan: spawn interproscan.sh ENOENT
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Make sure InterProScan is installed and specify the full path:
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
--interproscan-path /full/path/to/interproscan-5.xx/interproscan.sh
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### No Results in Output
|
|
163
|
+
|
|
164
|
+
- Check that your sequences are protein sequences (not nucleotide)
|
|
165
|
+
- Try different programs (some may not have hits for your sequences)
|
|
166
|
+
- Verify the input file is valid MSA format
|
|
167
|
+
|
|
168
|
+
## API Rate Limits
|
|
169
|
+
|
|
170
|
+
The EBI InterProScan API has usage limits:
|
|
171
|
+
|
|
172
|
+
- Maximum ~30 sequences per request (configurable via `--batch-size`)
|
|
173
|
+
- Requests are processed sequentially to avoid overwhelming the server
|
|
174
|
+
- For large datasets (>100 sequences), consider using local InterProScan
|
|
175
|
+
|
|
176
|
+
## License
|
|
177
|
+
|
|
178
|
+
MIT
|
package/package.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "react-msaview-cli",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"license": "MIT",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"react-msaview-cli": "./dist/index.js"
|
|
8
|
+
},
|
|
9
|
+
"main": "dist/index.js",
|
|
10
|
+
"types": "dist/index.d.ts",
|
|
11
|
+
"exports": {
|
|
12
|
+
".": {
|
|
13
|
+
"types": "./dist/index.d.ts",
|
|
14
|
+
"import": "./dist/index.js"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"files": [
|
|
18
|
+
"dist",
|
|
19
|
+
"src"
|
|
20
|
+
],
|
|
21
|
+
"scripts": {
|
|
22
|
+
"clean": "rimraf dist",
|
|
23
|
+
"build": "tsc",
|
|
24
|
+
"watch": "tsc --watch",
|
|
25
|
+
"test": "vitest"
|
|
26
|
+
},
|
|
27
|
+
"dependencies": {
|
|
28
|
+
"msa-parsers": "workspace:*"
|
|
29
|
+
}
|
|
30
|
+
}
|
package/src/ebi-api.ts
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import type { InterProScanResults } from 'msa-parsers'
|
|
2
|
+
|
|
3
|
+
const BASE_URL = 'https://www.ebi.ac.uk/Tools/services/rest/iprscan5'
|
|
4
|
+
|
|
5
|
+
interface InterProScanResponse {
|
|
6
|
+
results: InterProScanResults[]
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
async function submitJob(
|
|
10
|
+
sequences: { id: string; seq: string }[],
|
|
11
|
+
programs: string[],
|
|
12
|
+
email: string,
|
|
13
|
+
): Promise<string> {
|
|
14
|
+
const fastaSeq = sequences.map(s => `>${s.id}\n${s.seq}`).join('\n')
|
|
15
|
+
|
|
16
|
+
const response = await fetch(`${BASE_URL}/run`, {
|
|
17
|
+
method: 'POST',
|
|
18
|
+
headers: {
|
|
19
|
+
'Content-Type': 'application/x-www-form-urlencoded',
|
|
20
|
+
},
|
|
21
|
+
body: new URLSearchParams({
|
|
22
|
+
email,
|
|
23
|
+
sequence: fastaSeq,
|
|
24
|
+
appl: programs.join(','),
|
|
25
|
+
}),
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
if (!response.ok) {
|
|
29
|
+
throw new Error(`Failed to submit job: ${response.statusText}`)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return response.text()
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async function checkStatus(jobId: string): Promise<string> {
|
|
36
|
+
const response = await fetch(`${BASE_URL}/status/${jobId}`)
|
|
37
|
+
if (!response.ok) {
|
|
38
|
+
throw new Error(`Failed to check status: ${response.statusText}`)
|
|
39
|
+
}
|
|
40
|
+
return response.text()
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
async function getResults(jobId: string): Promise<InterProScanResponse> {
|
|
44
|
+
const response = await fetch(`${BASE_URL}/result/${jobId}/json`)
|
|
45
|
+
if (!response.ok) {
|
|
46
|
+
throw new Error(`Failed to get results: ${response.statusText}`)
|
|
47
|
+
}
|
|
48
|
+
return response.json() as Promise<InterProScanResponse>
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
async function waitForJob(jobId: string): Promise<void> {
|
|
52
|
+
console.log(` Waiting for job ${jobId}...`)
|
|
53
|
+
let attempts = 0
|
|
54
|
+
const maxAttempts = 300 // 5 minutes max wait
|
|
55
|
+
|
|
56
|
+
while (attempts < maxAttempts) {
|
|
57
|
+
const status = await checkStatus(jobId)
|
|
58
|
+
|
|
59
|
+
if (status.includes('FINISHED')) {
|
|
60
|
+
return
|
|
61
|
+
}
|
|
62
|
+
if (status.includes('FAILURE') || status.includes('ERROR')) {
|
|
63
|
+
throw new Error(`Job ${jobId} failed: ${status}`)
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
await new Promise(resolve => setTimeout(resolve, 1000))
|
|
67
|
+
attempts++
|
|
68
|
+
|
|
69
|
+
if (attempts % 10 === 0) {
|
|
70
|
+
console.log(` Still waiting... (${attempts}s)`)
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
throw new Error(`Timeout waiting for job ${jobId}`)
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export async function runEbiInterProScan(
|
|
78
|
+
sequences: { id: string; seq: string }[],
|
|
79
|
+
programs: string[],
|
|
80
|
+
email: string,
|
|
81
|
+
batchSize: number,
|
|
82
|
+
): Promise<InterProScanResults[]> {
|
|
83
|
+
const allResults: InterProScanResults[] = []
|
|
84
|
+
const batches: { id: string; seq: string }[][] = []
|
|
85
|
+
|
|
86
|
+
for (let i = 0; i < sequences.length; i += batchSize) {
|
|
87
|
+
batches.push(sequences.slice(i, i + batchSize))
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
console.log(` Submitting ${batches.length} batch(es)...`)
|
|
91
|
+
|
|
92
|
+
for (let i = 0; i < batches.length; i++) {
|
|
93
|
+
const batch = batches[i]!
|
|
94
|
+
console.log(
|
|
95
|
+
` Processing batch ${i + 1}/${batches.length} (${batch.length} sequences)...`,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
const jobId = await submitJob(batch, programs, email)
|
|
99
|
+
console.log(` Job submitted: ${jobId}`)
|
|
100
|
+
|
|
101
|
+
await waitForJob(jobId)
|
|
102
|
+
|
|
103
|
+
const results = await getResults(jobId)
|
|
104
|
+
for (const r of results.results) {
|
|
105
|
+
allResults.push(r)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
console.log(` Batch ${i + 1} complete`)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return allResults
|
|
112
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { parseArgs } from 'node:util'
|
|
4
|
+
|
|
5
|
+
import { runInterProScan } from './interproscan-msa'
|
|
6
|
+
|
|
7
|
+
const { values, positionals } = parseArgs({
|
|
8
|
+
allowPositionals: true,
|
|
9
|
+
options: {
|
|
10
|
+
output: {
|
|
11
|
+
type: 'string',
|
|
12
|
+
short: 'o',
|
|
13
|
+
default: 'domains.gff',
|
|
14
|
+
},
|
|
15
|
+
local: {
|
|
16
|
+
type: 'boolean',
|
|
17
|
+
default: false,
|
|
18
|
+
},
|
|
19
|
+
'interproscan-path': {
|
|
20
|
+
type: 'string',
|
|
21
|
+
default: 'interproscan.sh',
|
|
22
|
+
},
|
|
23
|
+
programs: {
|
|
24
|
+
type: 'string',
|
|
25
|
+
default: 'Pfam',
|
|
26
|
+
},
|
|
27
|
+
email: {
|
|
28
|
+
type: 'string',
|
|
29
|
+
default: 'user@example.com',
|
|
30
|
+
},
|
|
31
|
+
'batch-size': {
|
|
32
|
+
type: 'string',
|
|
33
|
+
default: '30',
|
|
34
|
+
},
|
|
35
|
+
help: {
|
|
36
|
+
type: 'boolean',
|
|
37
|
+
short: 'h',
|
|
38
|
+
default: false,
|
|
39
|
+
},
|
|
40
|
+
},
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
function printHelp() {
|
|
44
|
+
console.log(`
|
|
45
|
+
react-msaview-cli - CLI tools for react-msaview
|
|
46
|
+
|
|
47
|
+
USAGE:
|
|
48
|
+
react-msaview-cli interproscan <input-msa> [options]
|
|
49
|
+
|
|
50
|
+
COMMANDS:
|
|
51
|
+
interproscan Run InterProScan on all sequences in an MSA file
|
|
52
|
+
|
|
53
|
+
OPTIONS:
|
|
54
|
+
-o, --output <file> Output GFF file (default: domains.gff)
|
|
55
|
+
--local Use local InterProScan installation
|
|
56
|
+
--interproscan-path <path> Path to interproscan.sh (default: interproscan.sh)
|
|
57
|
+
--programs <list> Comma-separated list of programs (default: Pfam)
|
|
58
|
+
--email <email> Email for EBI API (default: user@example.com)
|
|
59
|
+
--batch-size <n> Number of sequences per API batch (default: 30)
|
|
60
|
+
-h, --help Show this help message
|
|
61
|
+
|
|
62
|
+
EXAMPLES:
|
|
63
|
+
# Run InterProScan using EBI API
|
|
64
|
+
react-msaview-cli interproscan alignment.fasta -o domains.gff
|
|
65
|
+
|
|
66
|
+
# Run with local InterProScan
|
|
67
|
+
react-msaview-cli interproscan alignment.fasta -o domains.gff --local
|
|
68
|
+
|
|
69
|
+
# Specify programs
|
|
70
|
+
react-msaview-cli interproscan alignment.clustal -o domains.gff --programs Pfam,SMART
|
|
71
|
+
`)
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
async function main() {
|
|
75
|
+
if (values.help || positionals.length === 0) {
|
|
76
|
+
printHelp()
|
|
77
|
+
process.exit(0)
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const command = positionals[0]
|
|
81
|
+
|
|
82
|
+
if (command === 'interproscan') {
|
|
83
|
+
const inputFile = positionals[1]
|
|
84
|
+
if (!inputFile) {
|
|
85
|
+
console.error('Error: Input MSA file is required')
|
|
86
|
+
process.exit(1)
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
await runInterProScan({
|
|
90
|
+
inputFile,
|
|
91
|
+
outputFile: values.output,
|
|
92
|
+
useLocal: values.local,
|
|
93
|
+
interproscanPath: values['interproscan-path'],
|
|
94
|
+
programs: values.programs.split(','),
|
|
95
|
+
email: values.email,
|
|
96
|
+
batchSize: Number.parseInt(values['batch-size'], 10),
|
|
97
|
+
})
|
|
98
|
+
} else {
|
|
99
|
+
console.error(`Unknown command: ${command}`)
|
|
100
|
+
printHelp()
|
|
101
|
+
process.exit(1)
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
main().catch((error: unknown) => {
|
|
106
|
+
console.error('Error:', error instanceof Error ? error.message : error)
|
|
107
|
+
process.exit(1)
|
|
108
|
+
})
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import * as fs from 'node:fs'
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
getUngappedSequence,
|
|
5
|
+
interProResponseToGFF,
|
|
6
|
+
parseMSA,
|
|
7
|
+
} from 'msa-parsers'
|
|
8
|
+
|
|
9
|
+
import { runEbiInterProScan } from './ebi-api'
|
|
10
|
+
import { runLocalInterProScan } from './local-runner'
|
|
11
|
+
|
|
12
|
+
import type { InterProScanResults } from 'msa-parsers'
|
|
13
|
+
|
|
14
|
+
export interface InterProScanOptions {
|
|
15
|
+
inputFile: string
|
|
16
|
+
outputFile: string
|
|
17
|
+
useLocal: boolean
|
|
18
|
+
interproscanPath: string
|
|
19
|
+
programs: string[]
|
|
20
|
+
email: string
|
|
21
|
+
batchSize: number
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export async function runInterProScan(options: InterProScanOptions) {
|
|
25
|
+
const {
|
|
26
|
+
inputFile,
|
|
27
|
+
outputFile,
|
|
28
|
+
useLocal,
|
|
29
|
+
interproscanPath,
|
|
30
|
+
programs,
|
|
31
|
+
email,
|
|
32
|
+
batchSize,
|
|
33
|
+
} = options
|
|
34
|
+
|
|
35
|
+
console.log(`Reading MSA from ${inputFile}...`)
|
|
36
|
+
const msaText = fs.readFileSync(inputFile, 'utf8')
|
|
37
|
+
const msa = parseMSA(msaText)
|
|
38
|
+
|
|
39
|
+
const names = msa.getNames()
|
|
40
|
+
console.log(`Found ${names.length} sequences`)
|
|
41
|
+
|
|
42
|
+
const sequences: { id: string; seq: string }[] = []
|
|
43
|
+
for (const name of names) {
|
|
44
|
+
const alignedSeq = msa.getRow(name)
|
|
45
|
+
const ungappedSeq = getUngappedSequence(alignedSeq)
|
|
46
|
+
if (ungappedSeq.length > 0) {
|
|
47
|
+
sequences.push({ id: name, seq: ungappedSeq })
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
console.log(`Processing ${sequences.length} non-empty sequences...`)
|
|
52
|
+
|
|
53
|
+
let allResults: InterProScanResults[]
|
|
54
|
+
|
|
55
|
+
if (useLocal) {
|
|
56
|
+
console.log(`Running local InterProScan at ${interproscanPath}...`)
|
|
57
|
+
allResults = await runLocalInterProScan(
|
|
58
|
+
sequences,
|
|
59
|
+
interproscanPath,
|
|
60
|
+
programs,
|
|
61
|
+
)
|
|
62
|
+
} else {
|
|
63
|
+
console.log(`Running InterProScan via EBI API...`)
|
|
64
|
+
allResults = await runEbiInterProScan(sequences, programs, email, batchSize)
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
console.log(`Converting results to GFF...`)
|
|
68
|
+
const gff = interProResponseToGFF(allResults)
|
|
69
|
+
|
|
70
|
+
console.log(`Writing output to ${outputFile}...`)
|
|
71
|
+
fs.writeFileSync(outputFile, gff, 'utf8')
|
|
72
|
+
|
|
73
|
+
console.log('Done!')
|
|
74
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { spawn } from 'node:child_process'
|
|
2
|
+
import * as fs from 'node:fs'
|
|
3
|
+
import * as os from 'node:os'
|
|
4
|
+
import path from 'node:path'
|
|
5
|
+
|
|
6
|
+
import type { InterProScanResults } from 'msa-parsers'
|
|
7
|
+
|
|
8
|
+
interface InterProScanResponse {
|
|
9
|
+
results: InterProScanResults[]
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export async function runLocalInterProScan(
|
|
13
|
+
sequences: { id: string; seq: string }[],
|
|
14
|
+
interproscanPath: string,
|
|
15
|
+
programs: string[],
|
|
16
|
+
): Promise<InterProScanResults[]> {
|
|
17
|
+
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'interproscan-'))
|
|
18
|
+
const inputFile = path.join(tmpDir, 'input.fasta')
|
|
19
|
+
const outputFile = path.join(tmpDir, 'output.json')
|
|
20
|
+
|
|
21
|
+
try {
|
|
22
|
+
const fastaContent = sequences.map(s => `>${s.id}\n${s.seq}`).join('\n')
|
|
23
|
+
|
|
24
|
+
fs.writeFileSync(inputFile, fastaContent, 'utf8')
|
|
25
|
+
|
|
26
|
+
console.log(` Running InterProScan on ${sequences.length} sequences...`)
|
|
27
|
+
|
|
28
|
+
await new Promise<void>((resolve, reject) => {
|
|
29
|
+
const args = [
|
|
30
|
+
'-i',
|
|
31
|
+
inputFile,
|
|
32
|
+
'-o',
|
|
33
|
+
outputFile,
|
|
34
|
+
'-f',
|
|
35
|
+
'JSON',
|
|
36
|
+
'-appl',
|
|
37
|
+
programs.join(','),
|
|
38
|
+
'--goterms',
|
|
39
|
+
'--pathways',
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
const proc = spawn(interproscanPath, args, {
|
|
43
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
let stderr = ''
|
|
47
|
+
|
|
48
|
+
proc.stdout.on('data', (data: Buffer) => {
|
|
49
|
+
const line = data.toString().trim()
|
|
50
|
+
if (line) {
|
|
51
|
+
console.log(` ${line}`)
|
|
52
|
+
}
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
proc.stderr.on('data', (data: Buffer) => {
|
|
56
|
+
stderr += data.toString()
|
|
57
|
+
})
|
|
58
|
+
|
|
59
|
+
proc.on('close', code => {
|
|
60
|
+
if (code === 0) {
|
|
61
|
+
resolve()
|
|
62
|
+
} else {
|
|
63
|
+
reject(new Error(`InterProScan failed with code ${code}: ${stderr}`))
|
|
64
|
+
}
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
proc.on('error', err => {
|
|
68
|
+
reject(new Error(`Failed to run InterProScan: ${err.message}`))
|
|
69
|
+
})
|
|
70
|
+
})
|
|
71
|
+
|
|
72
|
+
if (!fs.existsSync(outputFile)) {
|
|
73
|
+
throw new Error('InterProScan did not produce output file')
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const outputContent = fs.readFileSync(outputFile, 'utf8')
|
|
77
|
+
const response: InterProScanResponse = JSON.parse(outputContent)
|
|
78
|
+
|
|
79
|
+
return response.results
|
|
80
|
+
} finally {
|
|
81
|
+
try {
|
|
82
|
+
fs.rmSync(tmpDir, { recursive: true })
|
|
83
|
+
} catch {
|
|
84
|
+
// ignore cleanup errors
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|