dsrnascan 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dsrnascan might be problematic. Click here for more details.

Files changed (32) hide show
  1. dsrnascan-0.3.0/.github/workflows/ci-simple.yml +114 -0
  2. dsrnascan-0.3.0/.github/workflows/quick-test.yml +40 -0
  3. dsrnascan-0.3.0/.github/workflows/release.yml +140 -0
  4. dsrnascan-0.3.0/.gitignore +81 -0
  5. dsrnascan-0.3.0/LICENSE +21 -0
  6. dsrnascan-0.3.0/MANIFEST.in +17 -0
  7. dsrnascan-0.3.0/PKG-INFO +276 -0
  8. dsrnascan-0.3.0/README.md +231 -0
  9. dsrnascan-0.3.0/compile_minimal_einverted.c +29 -0
  10. dsrnascan-0.3.0/compile_patched_einverted.sh +97 -0
  11. dsrnascan-0.3.0/conda-recipe/build.sh +43 -0
  12. dsrnascan-0.3.0/conda-recipe/meta.yaml +68 -0
  13. dsrnascan-0.3.0/dsrnascan/__init__.py +5 -0
  14. dsrnascan-0.3.0/dsrnascan/dsRNAscan.py +1439 -0
  15. dsrnascan-0.3.0/dsrnascan/tools/einverted +0 -0
  16. dsrnascan-0.3.0/dsrnascan/tools/einverted_darwin_arm64 +0 -0
  17. dsrnascan-0.3.0/dsrnascan.egg-info/PKG-INFO +276 -0
  18. dsrnascan-0.3.0/dsrnascan.egg-info/SOURCES.txt +31 -0
  19. dsrnascan-0.3.0/dsrnascan.egg-info/dependency_links.txt +1 -0
  20. dsrnascan-0.3.0/dsrnascan.egg-info/entry_points.txt +2 -0
  21. dsrnascan-0.3.0/dsrnascan.egg-info/not-zip-safe +1 -0
  22. dsrnascan-0.3.0/dsrnascan.egg-info/requires.txt +15 -0
  23. dsrnascan-0.3.0/dsrnascan.egg-info/top_level.txt +2 -0
  24. dsrnascan-0.3.0/einverted.c +713 -0
  25. dsrnascan-0.3.0/einverted.patch +29 -0
  26. dsrnascan-0.3.0/pyproject.toml +76 -0
  27. dsrnascan-0.3.0/requirements.txt +4 -0
  28. dsrnascan-0.3.0/setup.cfg +66 -0
  29. dsrnascan-0.3.0/setup.py +208 -0
  30. dsrnascan-0.3.0/test_data/test_sequences.fasta +14 -0
  31. dsrnascan-0.3.0/tools/einverted +0 -0
  32. dsrnascan-0.3.0/tools/einverted_darwin_arm64 +0 -0
@@ -0,0 +1,114 @@
1
+ name: CI Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [ main, master, develop ]
6
+ pull_request:
7
+ branches: [ main, master ]
8
+ workflow_dispatch:
9
+
10
+ jobs:
11
+ test:
12
+ name: Test - ${{ matrix.os }} - Python ${{ matrix.python-version }}
13
+ runs-on: ${{ matrix.os }}
14
+ strategy:
15
+ fail-fast: false
16
+ matrix:
17
+ os: [ubuntu-latest, macos-latest]
18
+ python-version: ['3.9', '3.10', '3.11']
19
+ include:
20
+ # Test one version on Windows
21
+ - os: windows-latest
22
+ python-version: '3.10'
23
+
24
+ steps:
25
+ - name: Checkout code
26
+ uses: actions/checkout@v4
27
+
28
+ - name: Set up Python ${{ matrix.python-version }}
29
+ uses: actions/setup-python@v5
30
+ with:
31
+ python-version: ${{ matrix.python-version }}
32
+
33
+ - name: Install dependencies
34
+ run: |
35
+ python -m pip install --upgrade pip
36
+ pip install build wheel
37
+
38
+ - name: Install einverted (Linux)
39
+ if: runner.os == 'Linux'
40
+ run: |
41
+ # Try to install from conda first (fastest)
42
+ # Note: This requires conda to be available, which it isn't by default
43
+ # Alternative: compile from source or download pre-compiled binary
44
+ echo "Note: einverted binary not available for Linux in this test"
45
+ echo "In production, users should install EMBOSS via their package manager"
46
+ # For Ubuntu/Debian: sudo apt-get install emboss
47
+ # For now, we'll skip the functional test on Linux
48
+
49
+ - name: Build package
50
+ run: python -m build
51
+
52
+ - name: Install package
53
+ run: pip install dist/*.whl
54
+
55
+ - name: Test CLI
56
+ run: |
57
+ dsrnascan --help
58
+ dsrnascan --version
59
+
60
+ - name: Run functional test (macOS only for now)
61
+ if: runner.os == 'macOS'
62
+ run: |
63
+ echo ">test" > test.fasta
64
+ echo "GGGGGGGGGGAAAAAAAAAAAAAACCCCCCCCCC" >> test.fasta
65
+ dsrnascan test.fasta -w 100 -s 50 --score 15 -c 1
66
+
67
+ - name: Skip functional test (Linux/Windows)
68
+ if: runner.os != 'macOS'
69
+ run: |
70
+ echo "Skipping functional test - einverted binary not available for ${{ runner.os }}"
71
+ echo "Users should install EMBOSS via their system package manager"
72
+
73
+ - name: Verify output files (macOS only)
74
+ if: runner.os == 'macOS'
75
+ shell: python
76
+ run: |
77
+ import glob
78
+ import sys
79
+
80
+ # Check for output directory
81
+ dirs = glob.glob('dsrnascan_*')
82
+ if not dirs:
83
+ print('ERROR: No output directory created')
84
+ sys.exit(1)
85
+
86
+ print(f'โœ“ Output directory: {dirs[0]}')
87
+
88
+ # Check for results file
89
+ results = glob.glob(f'{dirs[0]}/*_merged_results.txt')
90
+ if not results:
91
+ print('ERROR: No results file created')
92
+ sys.exit(1)
93
+
94
+ print(f'โœ“ Results file: {results[0]}')
95
+ print('All tests passed!')
96
+
97
+ test-direct-install:
98
+ name: Test Direct Install from GitHub
99
+ runs-on: ubuntu-latest
100
+
101
+ steps:
102
+ - name: Set up Python
103
+ uses: actions/setup-python@v5
104
+ with:
105
+ python-version: '3.10'
106
+
107
+ - name: Install from GitHub
108
+ run: |
109
+ pip install git+https://github.com/${{ github.repository }}.git@${{ github.sha }}
110
+
111
+ - name: Test installation
112
+ run: |
113
+ dsrnascan --help
114
+ python -c "import dsrnascan; print('Import successful')"
@@ -0,0 +1,40 @@
1
+ name: Quick Test
2
+
3
+ on:
4
+ push:
5
+ paths:
6
+ - 'dsRNAscan.py'
7
+ - 'setup.py'
8
+ - 'pyproject.toml'
9
+ - 'requirements.txt'
10
+ workflow_dispatch:
11
+
12
+ jobs:
13
+ quick-test:
14
+ name: Quick Install Test
15
+ runs-on: ubuntu-latest
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Set up Python
21
+ uses: actions/setup-python@v5
22
+ with:
23
+ python-version: '3.9'
24
+
25
+ - name: Install package
26
+ run: |
27
+ pip install .
28
+
29
+ - name: Run smoke test
30
+ run: |
31
+ dsrnascan --version
32
+ dsrnascan --help
33
+
34
+ # Quick functional test
35
+ echo ">test" > test.fa
36
+ echo "GGGGGGGGGGAAAAAAAAAAAAAACCCCCCCCCC" >> test.fa
37
+ dsrnascan test.fa -w 100 -s 50 --score 15 -c 1
38
+
39
+ # Verify output was created
40
+ ls -la dsrnascan_*/
@@ -0,0 +1,140 @@
1
+ name: Build and Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ build-linux:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+
15
+ - name: Install dependencies
16
+ run: |
17
+ sudo apt-get update
18
+ sudo apt-get install -y build-essential autoconf automake libtool
19
+
20
+ - name: Download EMBOSS
21
+ run: |
22
+ wget ftp://emboss.open-bio.org/pub/EMBOSS/EMBOSS-6.6.0.tar.gz
23
+ tar -xzf EMBOSS-6.6.0.tar.gz
24
+
25
+ - name: Apply patch and build
26
+ run: |
27
+ cd EMBOSS-6.6.0
28
+ patch -p1 < ../einverted.patch
29
+ ./configure --without-x --without-pngdriver
30
+ cd emboss
31
+ make einverted
32
+ cp .libs/einverted ../einverted-linux-x86_64
33
+
34
+ - name: Upload artifact
35
+ uses: actions/upload-artifact@v3
36
+ with:
37
+ name: einverted-linux-x86_64
38
+ path: EMBOSS-6.6.0/einverted-linux-x86_64
39
+
40
+ build-macos:
41
+ runs-on: macos-latest
42
+ steps:
43
+ - uses: actions/checkout@v3
44
+
45
+ - name: Install dependencies
46
+ run: |
47
+ brew install autoconf automake libtool
48
+
49
+ - name: Download EMBOSS
50
+ run: |
51
+ curl -L -o EMBOSS-6.6.0.tar.gz ftp://emboss.open-bio.org/pub/EMBOSS/EMBOSS-6.6.0.tar.gz
52
+ tar -xzf EMBOSS-6.6.0.tar.gz
53
+
54
+ - name: Apply patch and build
55
+ run: |
56
+ cd EMBOSS-6.6.0
57
+ patch -p1 < ../einverted.patch
58
+ ./configure --without-x --without-pngdriver
59
+ cd emboss
60
+ make einverted
61
+ cp .libs/einverted ../einverted-macos-x86_64
62
+
63
+ - name: Upload artifact
64
+ uses: actions/upload-artifact@v3
65
+ with:
66
+ name: einverted-macos-x86_64
67
+ path: EMBOSS-6.6.0/einverted-macos-x86_64
68
+
69
+ build-macos-arm64:
70
+ runs-on: macos-latest
71
+ steps:
72
+ - uses: actions/checkout@v3
73
+
74
+ - name: Install dependencies
75
+ run: |
76
+ brew install autoconf automake libtool
77
+
78
+ - name: Download EMBOSS
79
+ run: |
80
+ curl -L -o EMBOSS-6.6.0.tar.gz ftp://emboss.open-bio.org/pub/EMBOSS/EMBOSS-6.6.0.tar.gz
81
+ tar -xzf EMBOSS-6.6.0.tar.gz
82
+
83
+ - name: Apply patch and build for ARM64
84
+ run: |
85
+ cd EMBOSS-6.6.0
86
+ patch -p1 < ../einverted.patch
87
+ # Cross-compile for ARM64
88
+ ./configure --without-x --without-pngdriver --host=arm64-apple-darwin
89
+ cd emboss
90
+ make einverted CFLAGS="-target arm64-apple-darwin"
91
+ cp .libs/einverted ../einverted-macos-arm64
92
+
93
+ - name: Upload artifact
94
+ uses: actions/upload-artifact@v3
95
+ with:
96
+ name: einverted-macos-arm64
97
+ path: EMBOSS-6.6.0/einverted-macos-arm64
98
+
99
+ release:
100
+ needs: [build-linux, build-macos, build-macos-arm64]
101
+ runs-on: ubuntu-latest
102
+ steps:
103
+ - uses: actions/checkout@v3
104
+
105
+ - name: Download all artifacts
106
+ uses: actions/download-artifact@v3
107
+
108
+ - name: Create install script
109
+ run: |
110
+ cp install.sh install-dsrnascan.sh
111
+ chmod +x install-dsrnascan.sh
112
+
113
+ - name: Create Release
114
+ uses: softprops/action-gh-release@v1
115
+ with:
116
+ files: |
117
+ einverted-linux-x86_64/einverted-linux-x86_64
118
+ einverted-macos-x86_64/einverted-macos-x86_64
119
+ einverted-macos-arm64/einverted-macos-arm64
120
+ install-dsrnascan.sh
121
+ dsRNAscan.py
122
+ body: |
123
+ ## Installation
124
+
125
+ ### Quick install (recommended):
126
+ ```bash
127
+ curl -L https://github.com/Bass-Lab/dsRNAscan/releases/download/${{ github.ref_name }}/install-dsrnascan.sh | bash
128
+ ```
129
+
130
+ ### Manual install:
131
+ 1. Download the appropriate einverted binary for your platform
132
+ 2. Download dsRNAscan.py
133
+ 3. Make both executable and add to your PATH
134
+
135
+ ### Supported platforms:
136
+ - Linux x86_64
137
+ - macOS x86_64 (Intel)
138
+ - macOS ARM64 (Apple Silicon)
139
+ env:
140
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -0,0 +1,81 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Unit test / coverage reports
35
+ htmlcov/
36
+ .tox/
37
+ .nox/
38
+ .coverage
39
+ .coverage.*
40
+ .cache
41
+ nosetests.xml
42
+ coverage.xml
43
+ *.cover
44
+ *.py,cover
45
+ .hypothesis/
46
+ .pytest_cache/
47
+
48
+ # Environments
49
+ .env
50
+ .venv
51
+ env/
52
+ venv/
53
+ ENV/
54
+ env.bak/
55
+ venv.bak/
56
+
57
+ # IDEs
58
+ .idea/
59
+ .vscode/
60
+ *.swp
61
+ *.swo
62
+ *~
63
+
64
+ # OS
65
+ .DS_Store
66
+ Thumbs.db
67
+
68
+ # dsRNAscan specific
69
+ *.dsRNApredictions.bp
70
+ *_merged_results.txt
71
+ *_ein_results.txt
72
+ test_sequence.fa
73
+ *.test_chr.*
74
+ EMBOSS-6.6.0/
75
+ EMBOSS-6.6.0.tar.gz
76
+ emboss_install/
77
+
78
+ # Temporary files
79
+ *.tmp
80
+ *.temp
81
+ *.log
@@ -0,0 +1,21 @@
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 3, 29 June 2007
3
+
4
+ dsRNAscan - A tool for genome-wide prediction of double-stranded RNA structures
5
+ Copyright (C) 2024 Bass Lab
6
+
7
+ This program is free software: you can redistribute it and/or modify
8
+ it under the terms of the GNU General Public License as published by
9
+ the Free Software Foundation, either version 3 of the License, or
10
+ (at your option) any later version.
11
+
12
+ This program is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU General Public License for more details.
16
+
17
+ You should have received a copy of the GNU General Public License
18
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
19
+
20
+ For the full GPL-3.0 license text, see:
21
+ https://www.gnu.org/licenses/gpl-3.0.txt
@@ -0,0 +1,17 @@
1
+ include README.md
2
+ include LICENSE
3
+ include einverted.patch
4
+ include einverted.c
5
+ include compile_patched_einverted.sh
6
+ include compile_minimal_einverted.c
7
+ include requirements.txt
8
+ recursive-include dsrnascan/tools *
9
+ recursive-include tools *
10
+ recursive-include conda-recipe *
11
+ exclude test_sequence.fa
12
+ exclude *.dsRNApredictions.bp
13
+ exclude *_merged_results.txt
14
+ exclude *_ein_results.txt
15
+ global-exclude *.pyc
16
+ global-exclude __pycache__
17
+ global-exclude .DS_Store
@@ -0,0 +1,276 @@
1
+ Metadata-Version: 2.4
2
+ Name: dsrnascan
3
+ Version: 0.3.0
4
+ Summary: A tool for genome-wide prediction of double-stranded RNA structures
5
+ Home-page: https://github.com/Bass-Lab/dsRNAscan
6
+ Author: Bass Lab
7
+ Author-email:
8
+ License: GPL-3.0-or-later
9
+ Project-URL: Homepage, https://github.com/Bass-Lab/dsRNAscan
10
+ Project-URL: Documentation, https://github.com/Bass-Lab/dsRNAscan/blob/main/README.md
11
+ Project-URL: Repository, https://github.com/Bass-Lab/dsRNAscan
12
+ Project-URL: Bug Tracker, https://github.com/Bass-Lab/dsRNAscan/issues
13
+ Keywords: bioinformatics,RNA,dsRNA,secondary structure,genomics
14
+ Classifier: Development Status :: 4 - Beta
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
17
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.7
20
+ Classifier: Programming Language :: Python :: 3.8
21
+ Classifier: Programming Language :: Python :: 3.9
22
+ Classifier: Programming Language :: Python :: 3.10
23
+ Classifier: Programming Language :: Python :: 3.11
24
+ Classifier: Operating System :: POSIX :: Linux
25
+ Classifier: Operating System :: MacOS :: MacOS X
26
+ Requires-Python: >=3.7
27
+ Description-Content-Type: text/markdown
28
+ License-File: LICENSE
29
+ Requires-Dist: biopython>=1.78
30
+ Requires-Dist: numpy>=1.19
31
+ Requires-Dist: pandas>=1.1
32
+ Requires-Dist: ViennaRNA>=2.4
33
+ Provides-Extra: mpi
34
+ Requires-Dist: mpi4py>=3.0; extra == "mpi"
35
+ Requires-Dist: parasail>=1.2; extra == "mpi"
36
+ Provides-Extra: dev
37
+ Requires-Dist: pytest>=6.0; extra == "dev"
38
+ Requires-Dist: pytest-cov>=2.0; extra == "dev"
39
+ Requires-Dist: black>=22.0; extra == "dev"
40
+ Requires-Dist: flake8>=4.0; extra == "dev"
41
+ Requires-Dist: mypy>=0.900; extra == "dev"
42
+ Dynamic: home-page
43
+ Dynamic: license-file
44
+ Dynamic: requires-python
45
+
46
+ # dsRNAscan
47
+
48
+ [![CI Tests](https://github.com/Bass-Lab/dsRNAscan/actions/workflows/ci-simple.yml/badge.svg)](https://github.com/Bass-Lab/dsRNAscan/actions/workflows/ci-simple.yml)
49
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
50
+ [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
51
+
52
+ **dsRNAscan** is a bioinformatics tool for genome-wide identification of **double-stranded RNA (dsRNA) structures**. It uses a sliding window approach to detect inverted repeats that can form dsRNA secondary structures, with special support for **G-U wobble base pairing** critical for RNA analysis.
53
+
54
+ ## ๐Ÿš€ Quick Start
55
+
56
+ ### Install from PyPI (Coming Soon)
57
+ ```bash
58
+ pip install dsrnascan
59
+ ```
60
+
61
+ ### Install from GitHub
62
+ ```bash
63
+ # Direct from GitHub with automatic einverted compilation
64
+ pip install --no-binary :all: git+https://github.com/Bass-Lab/dsRNAscan.git
65
+
66
+ # Or if you prefer using pre-built wheels (faster but einverted needs separate setup)
67
+ pip install git+https://github.com/Bass-Lab/dsRNAscan.git
68
+ ```
69
+
70
+ ### Install from Local Files
71
+ ```bash
72
+ # Option 1: Clone and install with einverted compilation
73
+ git clone https://github.com/Bass-Lab/dsRNAscan.git
74
+ cd dsRNAscan
75
+ pip install --no-binary :all: .
76
+
77
+ # Option 2: Quick install (uses pre-compiled binaries if available)
78
+ git clone https://github.com/Bass-Lab/dsRNAscan.git
79
+ cd dsRNAscan
80
+ pip install .
81
+
82
+ # Option 3: Development mode (editable install)
83
+ git clone https://github.com/Bass-Lab/dsRNAscan.git
84
+ cd dsRNAscan
85
+ pip install -e .
86
+
87
+ # Option 4: Manual einverted compilation then install
88
+ git clone https://github.com/Bass-Lab/dsRNAscan.git
89
+ cd dsRNAscan
90
+ ./compile_patched_einverted.sh # Compile einverted with G-U patch
91
+ pip install .
92
+ ```
93
+
94
+ ### Basic Usage
95
+ ```bash
96
+ # Scan a genome/sequence for dsRNA structures
97
+ dsrnascan input.fasta -w 10000 -s 150 --score 50
98
+
99
+ # Process specific chromosome
100
+ dsrnascan genome.fasta --chr chr21 -c 8
101
+
102
+ # Use custom parameters for sensitive detection
103
+ dsrnascan sequence.fasta -w 5000 --min 20 --score 30
104
+ ```
105
+
106
+ ## ๐Ÿ“‹ Requirements
107
+
108
+ - **Python 3.8+**
109
+ - **Dependencies** (automatically installed):
110
+ - numpy โ‰ฅ1.19
111
+ - pandas โ‰ฅ1.1
112
+ - biopython โ‰ฅ1.78
113
+ - ViennaRNA โ‰ฅ2.4
114
+
115
+ ### Important: einverted Binary
116
+
117
+ dsRNAscan requires the `einverted` tool from EMBOSS with our **G-U wobble patch** for accurate RNA structure detection.
118
+
119
+ **Option 1: Automatic** (macOS with included binary)
120
+ - The package includes a pre-compiled einverted for macOS ARM64
121
+ - It will be used automatically on compatible systems
122
+
123
+ **Option 2: System Installation** (Linux/Other)
124
+ ```bash
125
+ # Ubuntu/Debian
126
+ sudo apt-get install emboss
127
+
128
+ # macOS with Homebrew
129
+ brew install emboss
130
+
131
+ # Conda (recommended for bioinformatics workflows)
132
+ conda install -c bioconda emboss
133
+ ```
134
+
135
+ **Note:** System-installed EMBOSS won't have the G-U patch. For full RNA functionality with G-U wobble pairs, compile from source:
136
+
137
+ ```bash
138
+ # Compile with G-U patch (optional but recommended)
139
+ cd dsRNAscan
140
+ DSRNASCAN_COMPILE_FULL=true pip install .
141
+ ```
142
+
143
+ ## ๐Ÿงฌ Key Features
144
+
145
+ - **G-U Wobble Base Pairing**: Modified einverted algorithm specifically for RNA
146
+ - **Parallel Processing**: Multi-CPU support for genome-scale analysis
147
+ - **Flexible Windowing**: Customizable window and step sizes
148
+ - **RNA Structure Prediction**: Integration with ViennaRNA for structure validation
149
+ - **Multiple Output Formats**: Tab-delimited results and IGV visualization files
150
+
151
+ ## ๐Ÿ“– Detailed Usage
152
+
153
+ ### Command-Line Options
154
+
155
+ ```bash
156
+ dsrnascan --help
157
+ ```
158
+
159
+ Key parameters:
160
+ - `-w/--window`: Window size for scanning (default: 10000)
161
+ - `-s/--step`: Step size between windows (default: 150)
162
+ - `--score`: Minimum score threshold for inverted repeats (default: 50)
163
+ - `--min/--max`: Min/max length of inverted repeats (default: 30/10000)
164
+ - `--paired_cutoff`: Minimum percentage of paired bases (default: 70%)
165
+ - `-c/--cpus`: Number of CPUs to use (default: 4)
166
+ - `--chr`: Specific chromosome to process
167
+ - `--reverse`: Scan reverse strand
168
+
169
+ ### Output Files
170
+
171
+ dsRNAscan generates several output files in a timestamped directory:
172
+
173
+ 1. **`*_merged_results.txt`**: Tab-delimited file with all predicted dsRNAs
174
+ - Columns include: coordinates, scores, sequences, structures, folding energy
175
+
176
+ 2. **`*.dsRNApredictions.bp`**: IGV-compatible visualization file
177
+ - Load in IGV to visualize dsRNA locations on genome
178
+
179
+ ### Example Workflow
180
+
181
+ ```bash
182
+ # 1. Basic genome scan
183
+ dsrnascan human_genome.fa -c 16 --output-dir results/
184
+
185
+ # 2. Scan specific region with sensitive parameters
186
+ dsrnascan chr21.fa -w 5000 -s 100 --score 30 --min 20
187
+
188
+ # 3. Process RNA-seq assembled transcripts
189
+ dsrnascan transcripts.fa -w 1000 --paired_cutoff 60
190
+
191
+ # 4. Scan both strands
192
+ dsrnascan sequence.fa --reverse
193
+ ```
194
+
195
+ ## ๐Ÿ”ง Installation Troubleshooting
196
+
197
+ ### "einverted binary not found"
198
+ The package needs einverted from EMBOSS. Solutions:
199
+ 1. Install EMBOSS: `conda install -c bioconda emboss`
200
+ 2. Or compile during install: `DSRNASCAN_COMPILE_FULL=true pip install .`
201
+ 3. Or use the package without functional testing: `dsrnascan --help` works without einverted
202
+
203
+ ### "ModuleNotFoundError: No module named 'ViennaRNA'"
204
+ Install ViennaRNA Python bindings:
205
+ ```bash
206
+ # Via conda (recommended)
207
+ conda install -c bioconda viennarna
208
+
209
+ # Via pip
210
+ pip install ViennaRNA
211
+ ```
212
+
213
+ ### Installation on HPC/Cluster
214
+ ```bash
215
+ module load python/3.8 # or your Python module
216
+ module load emboss # if available
217
+ pip install --user git+https://github.com/Bass-Lab/dsRNAscan.git
218
+ ```
219
+
220
+ ## ๐Ÿงช Testing
221
+
222
+ Run test with sample data:
223
+ ```bash
224
+ # Create test file
225
+ echo ">test_sequence" > test.fasta
226
+ echo "GGGGGGGGGGAAAAAAAAAAAAAACCCCCCCCCC" >> test.fasta
227
+
228
+ # Run dsRNAscan
229
+ dsrnascan test.fasta -w 100 -s 50 --score 15
230
+ ```
231
+
232
+ ## ๐Ÿ“š Algorithm Details
233
+
234
+ dsRNAscan uses a multi-step approach:
235
+
236
+ 1. **Window Extraction**: Divides genome into overlapping windows
237
+ 2. **Inverted Repeat Detection**: Uses modified einverted with G-U wobble support
238
+ 3. **Structure Prediction**: Validates structures with RNAduplex (ViennaRNA)
239
+ 4. **Filtering**: Applies score and pairing percentage cutoffs
240
+ 5. **Parallel Processing**: Distributes windows across multiple CPUs
241
+
242
+ The key innovation is the **G-U wobble patch** for einverted, allowing detection of RNA-specific base pairs crucial for identifying functional dsRNA structures.
243
+
244
+ ## ๐Ÿ“„ Citation
245
+
246
+ If you use dsRNAscan in your research, please cite:
247
+ ```
248
+ Bass Lab. dsRNAscan: A tool for genome-wide prediction of double-stranded RNA structures.
249
+ https://github.com/Bass-Lab/dsRNAscan
250
+ ```
251
+
252
+ ## ๐Ÿค Contributing
253
+
254
+ Contributions are welcome! Please:
255
+ 1. Fork the repository
256
+ 2. Create a feature branch
257
+ 3. Make your changes
258
+ 4. Submit a pull request
259
+
260
+ ## ๐Ÿ“ License
261
+
262
+ This project is licensed under the GNU General Public License v3.0 - see the [LICENSE](LICENSE) file for details.
263
+
264
+ ## ๐Ÿ†˜ Support
265
+
266
+ - **Issues**: [GitHub Issues](https://github.com/Bass-Lab/dsRNAscan/issues)
267
+ - **Documentation**: [GitHub Wiki](https://github.com/Bass-Lab/dsRNAscan/wiki)
268
+
269
+ ## Acknowledgments
270
+
271
+ - EMBOSS team for the einverted tool
272
+ - ViennaRNA team for RNA folding algorithms
273
+ - All contributors to the project
274
+
275
+ ---
276
+ **Note**: This tool is for research purposes. Ensure you understand the parameters for your specific use case.