cis-gs 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cis_gs-1.1.0/LICENSE +21 -0
- cis_gs-1.1.0/MANIFEST.in +4 -0
- cis_gs-1.1.0/PKG-INFO +359 -0
- cis_gs-1.1.0/README.md +296 -0
- cis_gs-1.1.0/animaltfdb_importer.py +1461 -0
- cis_gs-1.1.0/app_v4_open.py +10218 -0
- cis_gs-1.1.0/chromosome_utils.py +418 -0
- cis_gs-1.1.0/cis_gs/__init__.py +12 -0
- cis_gs-1.1.0/cis_gs/__main__.py +5 -0
- cis_gs-1.1.0/cis_gs/animaltfdb_importer.py +1461 -0
- cis_gs-1.1.0/cis_gs/app.py +6356 -0
- cis_gs-1.1.0/cis_gs/assets/banner.png +0 -0
- cis_gs-1.1.0/cis_gs/assets/favicon.ico +0 -0
- cis_gs-1.1.0/cis_gs/assets/favicon.png +0 -0
- cis_gs-1.1.0/cis_gs/assets/logo.png +0 -0
- cis_gs-1.1.0/cis_gs/chromosome_utils.py +418 -0
- cis_gs-1.1.0/cis_gs/cli.py +1870 -0
- cis_gs-1.1.0/cis_gs/cli_enrichment.py +169 -0
- cis_gs-1.1.0/cis_gs/cli_interactive.py +865 -0
- cis_gs-1.1.0/cis_gs/create_assets.py +104 -0
- cis_gs-1.1.0/cis_gs/enrichment/__init__.py +36 -0
- cis_gs-1.1.0/cis_gs/enrichment/core.py +281 -0
- cis_gs-1.1.0/cis_gs/enrichment/idmap.py +474 -0
- cis_gs-1.1.0/cis_gs/enrichment/kegg.py +225 -0
- cis_gs-1.1.0/cis_gs/enrichment/plots.py +87 -0
- cis_gs-1.1.0/cis_gs/planttfdb_importer.py +1056 -0
- cis_gs-1.1.0/cis_gs.egg-info/PKG-INFO +359 -0
- cis_gs-1.1.0/cis_gs.egg-info/SOURCES.txt +35 -0
- cis_gs-1.1.0/cis_gs.egg-info/dependency_links.txt +1 -0
- cis_gs-1.1.0/cis_gs.egg-info/entry_points.txt +5 -0
- cis_gs-1.1.0/cis_gs.egg-info/requires.txt +29 -0
- cis_gs-1.1.0/cis_gs.egg-info/top_level.txt +5 -0
- cis_gs-1.1.0/planttfdb_importer.py +1056 -0
- cis_gs-1.1.0/pyproject.toml +104 -0
- cis_gs-1.1.0/requirements.txt +35 -0
- cis_gs-1.1.0/setup.cfg +4 -0
- cis_gs-1.1.0/tests/test_smoke.py +59 -0
cis_gs-1.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Plant Signaling Lab, IISER Tirupati
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
cis_gs-1.1.0/MANIFEST.in
ADDED
cis_gs-1.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cis-gs
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: Cis-regulatory Element Genome Scanner - whole-genome cis-element discovery, expression coupling, and KEGG enrichment in one PyQt5 + CLI pipeline.
|
|
5
|
+
Author: Plant Signaling Lab, IISER Tirupati
|
|
6
|
+
Author-email: Ayushman Mallick <ayushmania2002@gmail.com>
|
|
7
|
+
Maintainer-email: Ayushman Mallick <ayushmania2002@gmail.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
Project-URL: Homepage, https://github.com/Ayushmania2002/Cis-GS
|
|
10
|
+
Project-URL: Documentation, https://Ayushmania2002.github.io/Cis-GS/
|
|
11
|
+
Project-URL: Repository, https://github.com/Ayushmania2002/Cis-GS
|
|
12
|
+
Project-URL: Issues, https://github.com/Ayushmania2002/Cis-GS/issues
|
|
13
|
+
Project-URL: Changelog, https://github.com/Ayushmania2002/Cis-GS/releases
|
|
14
|
+
Keywords: bioinformatics,genomics,cis-regulatory-elements,promoter-analysis,motif-discovery,transcription-factor,TFBS,gene-expression,co-expression,WGCNA,KEGG,enrichment,PlantTFDB,AnimalTFDB,JASPAR,HOCOMOCO
|
|
15
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
16
|
+
Classifier: Intended Audience :: Science/Research
|
|
17
|
+
Classifier: Intended Audience :: Education
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
20
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
21
|
+
Classifier: Programming Language :: Python :: 3
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
26
|
+
Classifier: Operating System :: OS Independent
|
|
27
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
28
|
+
Classifier: Operating System :: MacOS
|
|
29
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
30
|
+
Classifier: Environment :: X11 Applications :: Qt
|
|
31
|
+
Classifier: Environment :: Console
|
|
32
|
+
Requires-Python: >=3.9
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
License-File: LICENSE
|
|
35
|
+
Requires-Dist: PyQt5>=5.15
|
|
36
|
+
Requires-Dist: numpy>=1.23
|
|
37
|
+
Requires-Dist: pandas>=1.5
|
|
38
|
+
Requires-Dist: scipy>=1.9
|
|
39
|
+
Requires-Dist: scikit-learn>=1.1
|
|
40
|
+
Requires-Dist: matplotlib>=3.6
|
|
41
|
+
Requires-Dist: seaborn>=0.12
|
|
42
|
+
Requires-Dist: matplotlib-venn>=0.11
|
|
43
|
+
Requires-Dist: biopython>=1.79
|
|
44
|
+
Requires-Dist: bcbio-gff>=0.6
|
|
45
|
+
Requires-Dist: logomaker>=0.8
|
|
46
|
+
Requires-Dist: networkx>=3.0
|
|
47
|
+
Requires-Dist: python-louvain>=0.16
|
|
48
|
+
Requires-Dist: Pillow>=9.0
|
|
49
|
+
Requires-Dist: requests>=2.28
|
|
50
|
+
Requires-Dist: tqdm>=4.64
|
|
51
|
+
Requires-Dist: PyMuPDF>=1.22
|
|
52
|
+
Provides-Extra: dev
|
|
53
|
+
Requires-Dist: pyinstaller>=6.0; extra == "dev"
|
|
54
|
+
Requires-Dist: build; extra == "dev"
|
|
55
|
+
Requires-Dist: twine; extra == "dev"
|
|
56
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
57
|
+
Provides-Extra: docs
|
|
58
|
+
Requires-Dist: sphinx>=7.0; extra == "docs"
|
|
59
|
+
Requires-Dist: sphinx-rtd-theme>=2.0; extra == "docs"
|
|
60
|
+
Requires-Dist: myst-parser>=2.0; extra == "docs"
|
|
61
|
+
Requires-Dist: sphinx-copybutton>=0.5; extra == "docs"
|
|
62
|
+
Dynamic: license-file
|
|
63
|
+
|
|
64
|
+
<div align="center">
|
|
65
|
+
|
|
66
|
+
<img src="assets/banner.png" alt="Cis-GS banner" width="780"/>
|
|
67
|
+
|
|
68
|
+
# Cis-GS · Cis-regulatory Element Genome Scanner
|
|
69
|
+
|
|
70
|
+
**A whole-genome pipeline for discovering cis-regulatory elements, coupling them to expression, and finishing with KEGG enrichment — in one PyQt5 desktop app *and* one interactive CLI.**
|
|
71
|
+
|
|
72
|
+
[](https://pypi.org/project/cis-gs/)
|
|
73
|
+
[](https://pypi.org/project/cis-gs/)
|
|
74
|
+
[](LICENSE)
|
|
75
|
+
[](https://Ayushmania2002.github.io/Cis-GS/)
|
|
76
|
+
[](https://github.com/Ayushmania2002/Cis-GS/actions)
|
|
77
|
+
[](#citation)
|
|
78
|
+
|
|
79
|
+
</div>
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## Table of Contents
|
|
84
|
+
|
|
85
|
+
- [What Cis-GS Does](#what-cis-gs-does)
|
|
86
|
+
- [Highlights of v1.1](#highlights-of-v11)
|
|
87
|
+
- [Installation](#installation)
|
|
88
|
+
- [Quick Start](#quick-start)
|
|
89
|
+
- [The 7-Step Workflow](#the-7-step-workflow)
|
|
90
|
+
- [Supported Motif Databases](#supported-motif-databases)
|
|
91
|
+
- [CLI Reference](#cli-reference)
|
|
92
|
+
- [Programmatic API](#programmatic-api)
|
|
93
|
+
- [Screenshots](#screenshots)
|
|
94
|
+
- [Troubleshooting](#troubleshooting)
|
|
95
|
+
- [Contributing](#contributing)
|
|
96
|
+
- [Citation](#citation)
|
|
97
|
+
- [License](#license)
|
|
98
|
+
- [Contact](#contact)
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## What Cis-GS Does
|
|
103
|
+
|
|
104
|
+
Cis-GS automates the full **promoter → motif → expression → function** journey that
|
|
105
|
+
plant- and animal-genomics labs run by hand today:
|
|
106
|
+
|
|
107
|
+
1. **Fetch** a reference genome + annotation directly from NCBI (live Assembly search).
|
|
108
|
+
2. **Extract** promoter sequences (configurable length, strand-aware, intergenic-clipped) from any GFF3.
|
|
109
|
+
3. **Scan** those promoters for transcription-factor binding motifs imported from PlantTFDB, AnimalTFDB, JASPAR 2024, or HOCOMOCO v11 — or any user-supplied IUPAC consensus.
|
|
110
|
+
4. **Render** publication-ready sequence logos and per-gene hit tables with hypergeometric p-values and BH-FDR.
|
|
111
|
+
5. **Couple** the hits to your expression table (RNA-seq, microarray, qPCR) to flag motifs whose presence tracks expression direction.
|
|
112
|
+
6. **Build** a co-expression network (Pearson / Spearman / WGCNA-style soft-thresholding), detect modules via Louvain or hierarchical clustering, and visualise eigengenes.
|
|
113
|
+
7. **Enrich** the top module / cluster against KEGG (live REST queries, 11 700+ organisms) with one-sided hypergeometric ORA + Benjamini-Hochberg FDR.
|
|
114
|
+
|
|
115
|
+
Everything runs **locally**, **offline-friendly after the first network fetch**, and exports CSV / SVG / PDF at every step.
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Highlights of v1.1
|
|
120
|
+
|
|
121
|
+
- **Live KEGG dropdown** — every one of the 11 700+ organisms KEGG knows about, fetched on demand. No more stale species tables.
|
|
122
|
+
- **Live NCBI Taxonomy search** — type any common or Latin name; results stream back as you type.
|
|
123
|
+
- **60× faster ID conversion** — MyGene.info batched POST + progress bar (previously 60+ minutes for 10 k genes; now ~60 s).
|
|
124
|
+
- **Interactive CLI wizards** — `cis-gs wizard` walks you through every step with arrow-key menus. Every subcommand also accepts `-i / --interactive`.
|
|
125
|
+
- **Fuzzy "did you mean...?"** for CLI typos.
|
|
126
|
+
- **Brand-icon Contact tab** with real-website logos (LinkedIn, GitHub, KEGG, NCBI, PlantTFDB, AnimalTFDB, MyGene).
|
|
127
|
+
- **Modern single-color theme** (teal `#16A085`) with instant light / dark toggle — no more 1-2 s freeze.
|
|
128
|
+
- **First-run NCBI email prompt** — required by the Entrez API, stored only on your machine.
|
|
129
|
+
- **Three Gene-ID-Mapping methods** for the annoying NCBI-LOC vs species-database mismatch (column swap, mapping CSV, GFF3 Dbxref expansion).
|
|
130
|
+
|
|
131
|
+
See the [full release notes](https://github.com/Ayushmania2002/Cis-GS/releases) for the v1.0 → v1.1 diff.
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## Installation
|
|
136
|
+
|
|
137
|
+
### Option 1 — PyPI (Linux / macOS / Windows)
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
pip install cis-gs
|
|
141
|
+
cis-gs --help # CLI
|
|
142
|
+
cis-gs-gui # GUI
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Python 3.9+ required. The first GUI launch will pop up a one-time NCBI email prompt.
|
|
146
|
+
|
|
147
|
+
### Option 2 — Standalone Windows executable
|
|
148
|
+
|
|
149
|
+
Download `Cis-GS.exe` from the [latest release](https://github.com/Ayushmania2002/Cis-GS/releases) page.
|
|
150
|
+
Double-click. No Python install needed. Roughly 120 MB.
|
|
151
|
+
|
|
152
|
+
### Option 3 — From source
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
git clone https://github.com/Ayushmania2002/Cis-GS.git
|
|
156
|
+
cd Cis-GS
|
|
157
|
+
pip install -e ".[dev,docs]"
|
|
158
|
+
python app_v4_open.py # GUI
|
|
159
|
+
python -m cis_gs --help # CLI
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
Full build details (PyInstaller spec, build scripts for all 3 OSes, PyPI release workflow):
|
|
163
|
+
see [`BUILD.md`](BUILD.md).
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## Quick Start
|
|
168
|
+
|
|
169
|
+
### GUI (one minute)
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
cis-gs-gui
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
1. **Step 1 — Promoters**: drop a FASTA + GFF3, set promoter length (default 2 kb), click *Extract*.
|
|
176
|
+
2. **Step 2 — Motif Search**: click *Import from PlantTFDB* (or AnimalTFDB), pick your species, tick the motifs you want, *Import Selected*.
|
|
177
|
+
3. **Step 7 — KEGG Enrichment**: pick a KEGG organism from the live dropdown, paste your gene list, run.
|
|
178
|
+
|
|
179
|
+
Done. CSVs and SVGs land in `~/CisGS-Workspace/`.
|
|
180
|
+
|
|
181
|
+
### CLI (interactive wizard)
|
|
182
|
+
|
|
183
|
+
```bash
|
|
184
|
+
cis-gs wizard
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
The wizard auto-detects what you've already produced and offers the next sensible step.
|
|
188
|
+
|
|
189
|
+
### CLI (one-liners)
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
# Extract 2 kb promoters from a GFF3 + FASTA
|
|
193
|
+
cis-gs extract --fasta genome.fa --gff annot.gff3 --upstream 2000 --out promoters.fa
|
|
194
|
+
|
|
195
|
+
# Scan promoters with a MEME motif file
|
|
196
|
+
cis-gs search --promoters promoters.fa --motifs motifs.meme --out hits.csv
|
|
197
|
+
|
|
198
|
+
# KEGG enrichment
|
|
199
|
+
cis-gs enrich-kegg --organism ath --genes top_module.txt --out kegg.csv
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
Every command supports `-i / --interactive` if you want to be walked through it.
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## The 7-Step Workflow
|
|
207
|
+
|
|
208
|
+
| Step | What it does | Output |
|
|
209
|
+
|---|---|---|
|
|
210
|
+
| **1. Promoters** | Strand-aware promoter extraction from any FASTA + GFF3 | `promoters.fa` |
|
|
211
|
+
| **2. Motif Search** | IUPAC / MEME / PlantTFDB / AnimalTFDB scanning with hypergeometric p-values + BH-FDR | `hits.csv`, significance summary |
|
|
212
|
+
| **3. Motif Logos** | logomaker sequence logos with information-content shading | per-motif SVG / PNG |
|
|
213
|
+
| **4. Expression Feeding** | Joins hits with an expression CSV via three Gene-ID-Mapping methods (LOC swap, mapping CSV, GFF3 Dbxref expansion) | `expression_matched.csv` |
|
|
214
|
+
| **5. Coexpression** | Pearson / Spearman / WGCNA-style soft-thresholding, Louvain / hierarchical module detection | `network.gexf`, eigengene plot |
|
|
215
|
+
| **6. K-means** | Elbow + silhouette, deterministic seeding, exportable per-cluster gene lists | `clusters/*.txt` |
|
|
216
|
+
| **7. KEGG Enrichment** | Live REST query against any of 11 700+ KEGG organisms, hypergeometric ORA, BH-FDR, fold-enrichment | `kegg_enrichment.csv` |
|
|
217
|
+
|
|
218
|
+
A full description of each step's algorithm and parameters lives in the
|
|
219
|
+
[online documentation](https://Ayushmania2002.github.io/Cis-GS/).
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## Supported Motif Databases
|
|
224
|
+
|
|
225
|
+
| Database | Coverage | Access |
|
|
226
|
+
|---|---|---|
|
|
227
|
+
| **PlantTFDB v5** | 157 plant species, ~6 000 motifs | Built-in importer with live species list |
|
|
228
|
+
| **AnimalTFDB v4** | Human, mouse, zebrafish, insects | Built-in importer |
|
|
229
|
+
| **JASPAR 2024 (non-redundant)** | 575 vertebrate + 99 insect motifs | Direct REST download |
|
|
230
|
+
| **HOCOMOCO v11** | ~700 human + ~400 mouse ChIP-Seq motifs | Direct REST download |
|
|
231
|
+
| **Custom IUPAC / MEME** | Anything you can write down | Paste into Step 2 |
|
|
232
|
+
|
|
233
|
+
---
|
|
234
|
+
|
|
235
|
+
## CLI Reference
|
|
236
|
+
|
|
237
|
+
```text
|
|
238
|
+
cis-gs --help
|
|
239
|
+
|
|
240
|
+
usage: cis-gs [-h] {wizard,fetch,extract,search,feed,coexpr,kmeans,enrich-kegg,id-convert} ...
|
|
241
|
+
|
|
242
|
+
wizard Step-by-step wizard (recommended for new users)
|
|
243
|
+
fetch Download a genome + annotation from NCBI
|
|
244
|
+
extract Extract promoter sequences from FASTA + GFF3
|
|
245
|
+
search Scan promoters for motif occurrences
|
|
246
|
+
feed Couple motif hits with an expression table
|
|
247
|
+
coexpr Build a co-expression network
|
|
248
|
+
kmeans K-means clustering with elbow / silhouette
|
|
249
|
+
enrich-kegg KEGG over-representation analysis
|
|
250
|
+
id-convert Convert gene IDs across namespaces (MyGene.info, batched)
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
Every subcommand accepts `-i / --interactive` for a guided run, and `--help` for full flags.
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
257
|
+
## Programmatic API
|
|
258
|
+
|
|
259
|
+
```python
|
|
260
|
+
from cis_gs.enrichment import KEGGEnricher
|
|
261
|
+
|
|
262
|
+
e = KEGGEnricher(organism="ath") # Arabidopsis
|
|
263
|
+
result = e.enrich(["AT1G01010", "AT2G18790", "AT3G09600"])
|
|
264
|
+
print(result.table.head())
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
```python
|
|
268
|
+
from cis_gs.enrichment.idmap import IDConverter
|
|
269
|
+
|
|
270
|
+
idc = IDConverter(species="human")
|
|
271
|
+
mapping = idc.convert(["TP53", "BRCA1", "MYC"], target="entrez")
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
Full API reference: [`Ayushmania2002.github.io/Cis-GS/api`](https://Ayushmania2002.github.io/Cis-GS/api.html).
|
|
275
|
+
|
|
276
|
+
---
|
|
277
|
+
|
|
278
|
+
## Screenshots
|
|
279
|
+
|
|
280
|
+
<div align="center">
|
|
281
|
+
|
|
282
|
+
| Step 1: Promoter extraction | Step 2: Motif search |
|
|
283
|
+
|---|---|
|
|
284
|
+
| <img src="docs/source/_static/screenshot_step1.png" width="350"/> | <img src="docs/source/_static/screenshot_step2.png" width="350"/> |
|
|
285
|
+
|
|
286
|
+
| Step 5: Coexpression network | Step 7: KEGG enrichment |
|
|
287
|
+
|---|---|
|
|
288
|
+
| <img src="docs/source/_static/screenshot_step5.png" width="350"/> | <img src="docs/source/_static/screenshot_step7.png" width="350"/> |
|
|
289
|
+
|
|
290
|
+
</div>
|
|
291
|
+
|
|
292
|
+
> *Screenshots are placeholder paths; replace with actual PNGs in `docs/source/_static/` before publishing.*
|
|
293
|
+
|
|
294
|
+
---
|
|
295
|
+
|
|
296
|
+
## Troubleshooting
|
|
297
|
+
|
|
298
|
+
| Symptom | Likely cause | Fix |
|
|
299
|
+
|---|---|---|
|
|
300
|
+
| `cis-gs-gui: command not found` after `pip install` | Scripts dir not on `PATH` | `python -m cis_gs` works, or add `pip --user` bin dir to PATH |
|
|
301
|
+
| First NCBI Fetch returns 0 results | NCBI email not set | Settings → Set NCBI Email, then retry |
|
|
302
|
+
| `KEGG REST unreachable` | Firewall or VPN | Set `HTTPS_PROXY` env var, or use the *Browse & Import* tab with a manually downloaded MEME |
|
|
303
|
+
| Motif hits CSV has empty `gene_symbol` column | Annotation GFF3 not loaded in Step 2 | Re-run with the same GFF3 from Step 1 in *Gene ID Resolution* |
|
|
304
|
+
| Coexpression freezes on > 30k genes | All-vs-all correlation is O(n²) | Pre-filter to expressed genes (TPM > 1) before Step 5 |
|
|
305
|
+
|
|
306
|
+
Open an [issue](https://github.com/Ayushmania2002/Cis-GS/issues) with the log file from `~/CisGS-Workspace/cisgs.log` if you hit anything else.
|
|
307
|
+
|
|
308
|
+
---
|
|
309
|
+
|
|
310
|
+
## Contributing
|
|
311
|
+
|
|
312
|
+
Bug reports, feature requests, and pull requests are welcome.
|
|
313
|
+
For substantial contributions please open an issue first to discuss the change.
|
|
314
|
+
|
|
315
|
+
```bash
|
|
316
|
+
git clone https://github.com/Ayushmania2002/Cis-GS.git
|
|
317
|
+
cd Cis-GS
|
|
318
|
+
pip install -e ".[dev]"
|
|
319
|
+
pytest # run the test suite
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
---
|
|
323
|
+
|
|
324
|
+
## Citation
|
|
325
|
+
|
|
326
|
+
If Cis-GS contributes to a publication, please cite:
|
|
327
|
+
|
|
328
|
+
> Mallick A. *Cis-GS: a unified pipeline for whole-genome cis-regulatory element
|
|
329
|
+
> discovery, expression coupling, and KEGG enrichment.* (manuscript in preparation,
|
|
330
|
+
> Plant Signaling Lab, IISER Tirupati, 2026).
|
|
331
|
+
|
|
332
|
+
BibTeX:
|
|
333
|
+
|
|
334
|
+
```bibtex
|
|
335
|
+
@software{mallick_cisgs_2026,
|
|
336
|
+
author = {Mallick, Ayushman},
|
|
337
|
+
title = {{Cis-GS}: Cis-regulatory Element Genome Scanner},
|
|
338
|
+
year = {2026},
|
|
339
|
+
url = {https://github.com/Ayushmania2002/Cis-GS},
|
|
340
|
+
version = {1.1.0}
|
|
341
|
+
}
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
A `CITATION.cff` is included for GitHub's automatic citation widget.
|
|
345
|
+
|
|
346
|
+
---
|
|
347
|
+
|
|
348
|
+
## License
|
|
349
|
+
|
|
350
|
+
Released under the [MIT License](LICENSE). Free for academic and commercial use.
|
|
351
|
+
|
|
352
|
+
---
|
|
353
|
+
|
|
354
|
+
## Contact
|
|
355
|
+
|
|
356
|
+
**Ayushman Mallick** · ayushmania2002@gmail.com
|
|
357
|
+
Plant Signaling Lab, [IISER Tirupati](https://www.iisertirupati.ac.in/)
|
|
358
|
+
|
|
359
|
+
<sub>© 2026 Ayushman Mallick · Plant Signaling Lab · Cis-GS</sub>
|
cis_gs-1.1.0/README.md
ADDED
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
<img src="assets/banner.png" alt="Cis-GS banner" width="780"/>
|
|
4
|
+
|
|
5
|
+
# Cis-GS · Cis-regulatory Element Genome Scanner
|
|
6
|
+
|
|
7
|
+
**A whole-genome pipeline for discovering cis-regulatory elements, coupling them to expression, and finishing with KEGG enrichment — in one PyQt5 desktop app *and* one interactive CLI.**
|
|
8
|
+
|
|
9
|
+
[](https://pypi.org/project/cis-gs/)
|
|
10
|
+
[](https://pypi.org/project/cis-gs/)
|
|
11
|
+
[](LICENSE)
|
|
12
|
+
[](https://Ayushmania2002.github.io/Cis-GS/)
|
|
13
|
+
[](https://github.com/Ayushmania2002/Cis-GS/actions)
|
|
14
|
+
[](#citation)
|
|
15
|
+
|
|
16
|
+
</div>
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Table of Contents
|
|
21
|
+
|
|
22
|
+
- [What Cis-GS Does](#what-cis-gs-does)
|
|
23
|
+
- [Highlights of v1.1](#highlights-of-v11)
|
|
24
|
+
- [Installation](#installation)
|
|
25
|
+
- [Quick Start](#quick-start)
|
|
26
|
+
- [The 7-Step Workflow](#the-7-step-workflow)
|
|
27
|
+
- [Supported Motif Databases](#supported-motif-databases)
|
|
28
|
+
- [CLI Reference](#cli-reference)
|
|
29
|
+
- [Programmatic API](#programmatic-api)
|
|
30
|
+
- [Screenshots](#screenshots)
|
|
31
|
+
- [Troubleshooting](#troubleshooting)
|
|
32
|
+
- [Contributing](#contributing)
|
|
33
|
+
- [Citation](#citation)
|
|
34
|
+
- [License](#license)
|
|
35
|
+
- [Contact](#contact)
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## What Cis-GS Does
|
|
40
|
+
|
|
41
|
+
Cis-GS automates the full **promoter → motif → expression → function** journey that
|
|
42
|
+
plant- and animal-genomics labs run by hand today:
|
|
43
|
+
|
|
44
|
+
1. **Fetch** a reference genome + annotation directly from NCBI (live Assembly search).
|
|
45
|
+
2. **Extract** promoter sequences (configurable length, strand-aware, intergenic-clipped) from any GFF3.
|
|
46
|
+
3. **Scan** those promoters for transcription-factor binding motifs imported from PlantTFDB, AnimalTFDB, JASPAR 2024, or HOCOMOCO v11 — or any user-supplied IUPAC consensus.
|
|
47
|
+
4. **Render** publication-ready sequence logos and per-gene hit tables with hypergeometric p-values and BH-FDR.
|
|
48
|
+
5. **Couple** the hits to your expression table (RNA-seq, microarray, qPCR) to flag motifs whose presence tracks expression direction.
|
|
49
|
+
6. **Build** a co-expression network (Pearson / Spearman / WGCNA-style soft-thresholding), detect modules via Louvain or hierarchical clustering, and visualise eigengenes.
|
|
50
|
+
7. **Enrich** the top module / cluster against KEGG (live REST queries, 11 700+ organisms) with one-sided hypergeometric ORA + Benjamini-Hochberg FDR.
|
|
51
|
+
|
|
52
|
+
Everything runs **locally**, **offline-friendly after the first network fetch**, and exports CSV / SVG / PDF at every step.
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Highlights of v1.1
|
|
57
|
+
|
|
58
|
+
- **Live KEGG dropdown** — every one of the 11 700+ organisms KEGG knows about, fetched on demand. No more stale species tables.
|
|
59
|
+
- **Live NCBI Taxonomy search** — type any common or Latin name; results stream back as you type.
|
|
60
|
+
- **60× faster ID conversion** — MyGene.info batched POST + progress bar (previously 60+ minutes for 10 k genes; now ~60 s).
|
|
61
|
+
- **Interactive CLI wizards** — `cis-gs wizard` walks you through every step with arrow-key menus. Every subcommand also accepts `-i / --interactive`.
|
|
62
|
+
- **Fuzzy "did you mean...?"** for CLI typos.
|
|
63
|
+
- **Brand-icon Contact tab** with real-website logos (LinkedIn, GitHub, KEGG, NCBI, PlantTFDB, AnimalTFDB, MyGene).
|
|
64
|
+
- **Modern single-color theme** (teal `#16A085`) with instant light / dark toggle — no more 1-2 s freeze.
|
|
65
|
+
- **First-run NCBI email prompt** — required by the Entrez API, stored only on your machine.
|
|
66
|
+
- **Three Gene-ID-Mapping methods** for the annoying NCBI-LOC vs species-database mismatch (column swap, mapping CSV, GFF3 Dbxref expansion).
|
|
67
|
+
|
|
68
|
+
See the [full release notes](https://github.com/Ayushmania2002/Cis-GS/releases) for the v1.0 → v1.1 diff.
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## Installation
|
|
73
|
+
|
|
74
|
+
### Option 1 — PyPI (Linux / macOS / Windows)
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
pip install cis-gs
|
|
78
|
+
cis-gs --help # CLI
|
|
79
|
+
cis-gs-gui # GUI
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Python 3.9+ required. The first GUI launch will pop up a one-time NCBI email prompt.
|
|
83
|
+
|
|
84
|
+
### Option 2 — Standalone Windows executable
|
|
85
|
+
|
|
86
|
+
Download `Cis-GS.exe` from the [latest release](https://github.com/Ayushmania2002/Cis-GS/releases) page.
|
|
87
|
+
Double-click. No Python install needed. Roughly 120 MB.
|
|
88
|
+
|
|
89
|
+
### Option 3 — From source
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
git clone https://github.com/Ayushmania2002/Cis-GS.git
|
|
93
|
+
cd Cis-GS
|
|
94
|
+
pip install -e ".[dev,docs]"
|
|
95
|
+
python app_v4_open.py # GUI
|
|
96
|
+
python -m cis_gs --help # CLI
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Full build details (PyInstaller spec, build scripts for all 3 OSes, PyPI release workflow):
|
|
100
|
+
see [`BUILD.md`](BUILD.md).
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Quick Start
|
|
105
|
+
|
|
106
|
+
### GUI (one minute)
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
cis-gs-gui
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
1. **Step 1 — Promoters**: drop a FASTA + GFF3, set promoter length (default 2 kb), click *Extract*.
|
|
113
|
+
2. **Step 2 — Motif Search**: click *Import from PlantTFDB* (or AnimalTFDB), pick your species, tick the motifs you want, *Import Selected*.
|
|
114
|
+
3. **Step 7 — KEGG Enrichment**: pick a KEGG organism from the live dropdown, paste your gene list, run.
|
|
115
|
+
|
|
116
|
+
Done. CSVs and SVGs land in `~/CisGS-Workspace/`.
|
|
117
|
+
|
|
118
|
+
### CLI (interactive wizard)
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
cis-gs wizard
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
The wizard auto-detects what you've already produced and offers the next sensible step.
|
|
125
|
+
|
|
126
|
+
### CLI (one-liners)
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
# Extract 2 kb promoters from a GFF3 + FASTA
|
|
130
|
+
cis-gs extract --fasta genome.fa --gff annot.gff3 --upstream 2000 --out promoters.fa
|
|
131
|
+
|
|
132
|
+
# Scan promoters with a MEME motif file
|
|
133
|
+
cis-gs search --promoters promoters.fa --motifs motifs.meme --out hits.csv
|
|
134
|
+
|
|
135
|
+
# KEGG enrichment
|
|
136
|
+
cis-gs enrich-kegg --organism ath --genes top_module.txt --out kegg.csv
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
Every command supports `-i / --interactive` if you want to be walked through it.
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## The 7-Step Workflow
|
|
144
|
+
|
|
145
|
+
| Step | What it does | Output |
|
|
146
|
+
|---|---|---|
|
|
147
|
+
| **1. Promoters** | Strand-aware promoter extraction from any FASTA + GFF3 | `promoters.fa` |
|
|
148
|
+
| **2. Motif Search** | IUPAC / MEME / PlantTFDB / AnimalTFDB scanning with hypergeometric p-values + BH-FDR | `hits.csv`, significance summary |
|
|
149
|
+
| **3. Motif Logos** | logomaker sequence logos with information-content shading | per-motif SVG / PNG |
|
|
150
|
+
| **4. Expression Feeding** | Joins hits with an expression CSV via three Gene-ID-Mapping methods (LOC swap, mapping CSV, GFF3 Dbxref expansion) | `expression_matched.csv` |
|
|
151
|
+
| **5. Coexpression** | Pearson / Spearman / WGCNA-style soft-thresholding, Louvain / hierarchical module detection | `network.gexf`, eigengene plot |
|
|
152
|
+
| **6. K-means** | Elbow + silhouette, deterministic seeding, exportable per-cluster gene lists | `clusters/*.txt` |
|
|
153
|
+
| **7. KEGG Enrichment** | Live REST query against any of 11 700+ KEGG organisms, hypergeometric ORA, BH-FDR, fold-enrichment | `kegg_enrichment.csv` |
|
|
154
|
+
|
|
155
|
+
A full description of each step's algorithm and parameters lives in the
|
|
156
|
+
[online documentation](https://Ayushmania2002.github.io/Cis-GS/).
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## Supported Motif Databases
|
|
161
|
+
|
|
162
|
+
| Database | Coverage | Access |
|
|
163
|
+
|---|---|---|
|
|
164
|
+
| **PlantTFDB v5** | 157 plant species, ~6 000 motifs | Built-in importer with live species list |
|
|
165
|
+
| **AnimalTFDB v4** | Human, mouse, zebrafish, insects | Built-in importer |
|
|
166
|
+
| **JASPAR 2024 (non-redundant)** | 575 vertebrate + 99 insect motifs | Direct REST download |
|
|
167
|
+
| **HOCOMOCO v11** | ~700 human + ~400 mouse ChIP-Seq motifs | Direct REST download |
|
|
168
|
+
| **Custom IUPAC / MEME** | Anything you can write down | Paste into Step 2 |
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## CLI Reference
|
|
173
|
+
|
|
174
|
+
```text
|
|
175
|
+
cis-gs --help
|
|
176
|
+
|
|
177
|
+
usage: cis-gs [-h] {wizard,fetch,extract,search,feed,coexpr,kmeans,enrich-kegg,id-convert} ...
|
|
178
|
+
|
|
179
|
+
wizard Step-by-step wizard (recommended for new users)
|
|
180
|
+
fetch Download a genome + annotation from NCBI
|
|
181
|
+
extract Extract promoter sequences from FASTA + GFF3
|
|
182
|
+
search Scan promoters for motif occurrences
|
|
183
|
+
feed Couple motif hits with an expression table
|
|
184
|
+
coexpr Build a co-expression network
|
|
185
|
+
kmeans K-means clustering with elbow / silhouette
|
|
186
|
+
enrich-kegg KEGG over-representation analysis
|
|
187
|
+
id-convert Convert gene IDs across namespaces (MyGene.info, batched)
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
Every subcommand accepts `-i / --interactive` for a guided run, and `--help` for full flags.
|
|
191
|
+
|
|
192
|
+
---
|
|
193
|
+
|
|
194
|
+
## Programmatic API
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
from cis_gs.enrichment import KEGGEnricher
|
|
198
|
+
|
|
199
|
+
e = KEGGEnricher(organism="ath") # Arabidopsis
|
|
200
|
+
result = e.enrich(["AT1G01010", "AT2G18790", "AT3G09600"])
|
|
201
|
+
print(result.table.head())
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
from cis_gs.enrichment.idmap import IDConverter
|
|
206
|
+
|
|
207
|
+
idc = IDConverter(species="human")
|
|
208
|
+
mapping = idc.convert(["TP53", "BRCA1", "MYC"], target="entrez")
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
Full API reference: [`Ayushmania2002.github.io/Cis-GS/api`](https://Ayushmania2002.github.io/Cis-GS/api.html).
|
|
212
|
+
|
|
213
|
+
---
|
|
214
|
+
|
|
215
|
+
## Screenshots
|
|
216
|
+
|
|
217
|
+
<div align="center">
|
|
218
|
+
|
|
219
|
+
| Step 1: Promoter extraction | Step 2: Motif search |
|
|
220
|
+
|---|---|
|
|
221
|
+
| <img src="docs/source/_static/screenshot_step1.png" width="350"/> | <img src="docs/source/_static/screenshot_step2.png" width="350"/> |
|
|
222
|
+
|
|
223
|
+
| Step 5: Coexpression network | Step 7: KEGG enrichment |
|
|
224
|
+
|---|---|
|
|
225
|
+
| <img src="docs/source/_static/screenshot_step5.png" width="350"/> | <img src="docs/source/_static/screenshot_step7.png" width="350"/> |
|
|
226
|
+
|
|
227
|
+
</div>
|
|
228
|
+
|
|
229
|
+
> *Screenshots are placeholder paths; replace with actual PNGs in `docs/source/_static/` before publishing.*
|
|
230
|
+
|
|
231
|
+
---
|
|
232
|
+
|
|
233
|
+
## Troubleshooting
|
|
234
|
+
|
|
235
|
+
| Symptom | Likely cause | Fix |
|
|
236
|
+
|---|---|---|
|
|
237
|
+
| `cis-gs-gui: command not found` after `pip install` | Scripts dir not on `PATH` | `python -m cis_gs` works, or add `pip --user` bin dir to PATH |
|
|
238
|
+
| First NCBI Fetch returns 0 results | NCBI email not set | Settings → Set NCBI Email, then retry |
|
|
239
|
+
| `KEGG REST unreachable` | Firewall or VPN | Set `HTTPS_PROXY` env var, or use the *Browse & Import* tab with a manually downloaded MEME |
|
|
240
|
+
| Motif hits CSV has empty `gene_symbol` column | Annotation GFF3 not loaded in Step 2 | Re-run with the same GFF3 from Step 1 in *Gene ID Resolution* |
|
|
241
|
+
| Coexpression freezes on > 30k genes | All-vs-all correlation is O(n²) | Pre-filter to expressed genes (TPM > 1) before Step 5 |
|
|
242
|
+
|
|
243
|
+
Open an [issue](https://github.com/Ayushmania2002/Cis-GS/issues) with the log file from `~/CisGS-Workspace/cisgs.log` if you hit anything else.
|
|
244
|
+
|
|
245
|
+
---
|
|
246
|
+
|
|
247
|
+
## Contributing
|
|
248
|
+
|
|
249
|
+
Bug reports, feature requests, and pull requests are welcome.
|
|
250
|
+
For substantial contributions please open an issue first to discuss the change.
|
|
251
|
+
|
|
252
|
+
```bash
|
|
253
|
+
git clone https://github.com/Ayushmania2002/Cis-GS.git
|
|
254
|
+
cd Cis-GS
|
|
255
|
+
pip install -e ".[dev]"
|
|
256
|
+
pytest # run the test suite
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
## Citation
|
|
262
|
+
|
|
263
|
+
If Cis-GS contributes to a publication, please cite:
|
|
264
|
+
|
|
265
|
+
> Mallick A. *Cis-GS: a unified pipeline for whole-genome cis-regulatory element
|
|
266
|
+
> discovery, expression coupling, and KEGG enrichment.* (manuscript in preparation,
|
|
267
|
+
> Plant Signaling Lab, IISER Tirupati, 2026).
|
|
268
|
+
|
|
269
|
+
BibTeX:
|
|
270
|
+
|
|
271
|
+
```bibtex
|
|
272
|
+
@software{mallick_cisgs_2026,
|
|
273
|
+
author = {Mallick, Ayushman},
|
|
274
|
+
title = {{Cis-GS}: Cis-regulatory Element Genome Scanner},
|
|
275
|
+
year = {2026},
|
|
276
|
+
url = {https://github.com/Ayushmania2002/Cis-GS},
|
|
277
|
+
version = {1.1.0}
|
|
278
|
+
}
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
A `CITATION.cff` is included for GitHub's automatic citation widget.
|
|
282
|
+
|
|
283
|
+
---
|
|
284
|
+
|
|
285
|
+
## License
|
|
286
|
+
|
|
287
|
+
Released under the [MIT License](LICENSE). Free for academic and commercial use.
|
|
288
|
+
|
|
289
|
+
---
|
|
290
|
+
|
|
291
|
+
## Contact
|
|
292
|
+
|
|
293
|
+
**Ayushman Mallick** · ayushmania2002@gmail.com
|
|
294
|
+
Plant Signaling Lab, [IISER Tirupati](https://www.iisertirupati.ac.in/)
|
|
295
|
+
|
|
296
|
+
<sub>© 2026 Ayushman Mallick · Plant Signaling Lab · Cis-GS</sub>
|