pycircdb 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pycircdb-0.1.0/PKG-INFO +31 -0
- pycircdb-0.1.0/README.md +16 -0
- pycircdb-0.1.0/config.py +116 -0
- pycircdb-0.1.0/main.py +331 -0
- pycircdb-0.1.0/pycircdb.egg-info/PKG-INFO +31 -0
- pycircdb-0.1.0/pycircdb.egg-info/SOURCES.txt +29 -0
- pycircdb-0.1.0/pycircdb.egg-info/dependency_links.txt +1 -0
- pycircdb-0.1.0/pycircdb.egg-info/entry_points.txt +2 -0
- pycircdb-0.1.0/pycircdb.egg-info/requires.txt +7 -0
- pycircdb-0.1.0/pycircdb.egg-info/top_level.txt +12 -0
- pycircdb-0.1.0/pyproject.toml +47 -0
- pycircdb-0.1.0/sequence_tables/extract_sequences.py +68 -0
- pycircdb-0.1.0/sequence_tables/split_cscd_by_chr.py +31 -0
- pycircdb-0.1.0/setup.cfg +4 -0
- pycircdb-0.1.0/utils/__init__.py +0 -0
- pycircdb-0.1.0/utils/annotate/annotate_driver.py +82 -0
- pycircdb-0.1.0/utils/annotate/annotate_subdag.py +170 -0
- pycircdb-0.1.0/utils/annotate/annotate_subdriver.py +69 -0
- pycircdb-0.1.0/utils/connect_s3/download_annotation_tables.py +171 -0
- pycircdb-0.1.0/utils/connect_s3/download_lookup_tables.py +84 -0
- pycircdb-0.1.0/utils/connect_s3/download_mirna_tables.py +102 -0
- pycircdb-0.1.0/utils/connect_s3/download_rbp_tables.py +102 -0
- pycircdb-0.1.0/utils/connect_s3/download_sequence_tables.py +163 -0
- pycircdb-0.1.0/utils/detect_inputs/detect_inputs_driver.py +33 -0
- pycircdb-0.1.0/utils/detect_inputs/detect_inputs_subdag.py +82 -0
- pycircdb-0.1.0/utils/fasta/sequence_driver.py +74 -0
- pycircdb-0.1.0/utils/fasta/sequence_subdag.py +183 -0
- pycircdb-0.1.0/utils/md5sum_check.py +110 -0
- pycircdb-0.1.0/utils/mirna/mirna_driver.py +101 -0
- pycircdb-0.1.0/utils/output_mapping.py +0 -0
- pycircdb-0.1.0/utils/rbp/rbp_driver.py +94 -0
pycircdb-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pycircdb
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: pycircdb: integrated circRNA database annotation for computational workflows.
|
|
5
|
+
Author-email: Barry Digby <b.digby237@gmail.com>
|
|
6
|
+
Requires-Python: >=3.14
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: apache-hamilton[lsp]>=1.90.0
|
|
9
|
+
Requires-Dist: boto3>=1.42.89
|
|
10
|
+
Requires-Dist: click>=8.3.2
|
|
11
|
+
Requires-Dist: polars>=1.39.3
|
|
12
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
13
|
+
Requires-Dist: rich-click>=1.9.7
|
|
14
|
+
Requires-Dist: sf-hamilton[lsp]>=1.89.0
|
|
15
|
+
|
|
16
|
+
# pycircdb
|
|
17
|
+
|
|
18
|
+
A command-line tool for identifying and annotating circRNA interactions.
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install pycircdb
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Usage
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pycircdb --help
|
|
30
|
+
```
|
|
31
|
+
|
pycircdb-0.1.0/README.md
ADDED
pycircdb-0.1.0/config.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import shutil
|
|
3
|
+
import rich_click as click
|
|
4
|
+
from rich.console import Console, Group
|
|
5
|
+
from rich.panel import Panel
|
|
6
|
+
from rich.text import Text
|
|
7
|
+
from rich.table import Table
|
|
8
|
+
from rich import box
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import List, Optional, TypedDict, Union
|
|
11
|
+
|
|
12
|
+
console = Console(stderr=True, highlight=False)
|
|
13
|
+
CONFIG_DIR = Path(__file__).parent.absolute()
|
|
14
|
+
|
|
15
|
+
DEFAULT_CONFIG_DATA = {
|
|
16
|
+
"global_parameters": {
|
|
17
|
+
"max_tasks": 1,
|
|
18
|
+
"output_dir": "results/"
|
|
19
|
+
},
|
|
20
|
+
"samples": {
|
|
21
|
+
"sample_1": {
|
|
22
|
+
"file_path": "path/to/sample1.txt",
|
|
23
|
+
"reference": "hg38",
|
|
24
|
+
"zero_based": True
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
class ToolConfig(TypedDict, total=False):
|
|
30
|
+
input: List[str]
|
|
31
|
+
reference: Union[str, List[str], None]
|
|
32
|
+
zero_based: Union[bool, List[bool], None]
|
|
33
|
+
max_tasks: int
|
|
34
|
+
|
|
35
|
+
def create_config(name: str):
|
|
36
|
+
"""Generate a default configuration file."""
|
|
37
|
+
output_path = Path.cwd() / f"{name}.json"
|
|
38
|
+
with open(output_path, "w") as f:
|
|
39
|
+
json.dump(DEFAULT_CONFIG_DATA, f, indent=4)
|
|
40
|
+
click.echo(f"Created default configuration file at {output_path}")
|
|
41
|
+
|
|
42
|
+
def load_config(user_config_path: Optional[str] = None, verbose: int = 1) -> ToolConfig:
|
|
43
|
+
"""
|
|
44
|
+
Load default config and override with a user config file if provided.
|
|
45
|
+
"""
|
|
46
|
+
config: ToolConfig = {}
|
|
47
|
+
|
|
48
|
+
# Load defaults directly from memory
|
|
49
|
+
config.update(DEFAULT_CONFIG_DATA)
|
|
50
|
+
|
|
51
|
+
# Load user config if provided
|
|
52
|
+
if user_config_path:
|
|
53
|
+
user_path = Path(user_config_path)
|
|
54
|
+
if user_path.is_file():
|
|
55
|
+
with user_path.open() as f:
|
|
56
|
+
user_config = json.load(f) or {}
|
|
57
|
+
config.update(user_config)
|
|
58
|
+
|
|
59
|
+
# Verbosity check
|
|
60
|
+
if verbose >= 1:
|
|
61
|
+
display_path = str(Path(user_config_path).resolve()) if user_config_path else 'defaults'
|
|
62
|
+
console.print(Text(f"✓ Configuration File Loaded: {display_path}", style="bold green"))
|
|
63
|
+
|
|
64
|
+
return config
|
|
65
|
+
|
|
66
|
+
def print_config_panel(config: ToolConfig, user_config_path: Optional[str] = None):
|
|
67
|
+
"""Prints the rich panel for the workflow configuration."""
|
|
68
|
+
global_table = Table(show_header=True, header_style="bold cyan", box=box.ROUNDED, expand=True)
|
|
69
|
+
global_params = config.get("global_parameters", {})
|
|
70
|
+
for key in global_params.keys():
|
|
71
|
+
global_table.add_column(str(key), justify="center", style="magenta")
|
|
72
|
+
if global_params:
|
|
73
|
+
global_table.add_row(*[str(val) for val in global_params.values()])
|
|
74
|
+
|
|
75
|
+
sample_table = Table(show_header=True, header_style="bold blue", box=box.ROUNDED, expand=True)
|
|
76
|
+
sample_table.add_column("Sample Name", style="bold green")
|
|
77
|
+
sample_table.add_column("Input File", style="yellow")
|
|
78
|
+
sample_table.add_column("Ref", style="cyan", justify="center")
|
|
79
|
+
sample_table.add_column("0-based", style="magenta", justify="center")
|
|
80
|
+
|
|
81
|
+
for sample_name, sample_info in config.get("samples", {}).items():
|
|
82
|
+
sample_table.add_row(
|
|
83
|
+
sample_name,
|
|
84
|
+
str(sample_info.get("input", sample_info.get("file_path", ""))),
|
|
85
|
+
str(sample_info.get("reference", "")),
|
|
86
|
+
str(sample_info.get("zero_based", ""))
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
db_table = Table(show_header=True, header_style="bold yellow", box=box.ROUNDED, expand=True)
|
|
90
|
+
db_table.add_column("Annotation DB", style="cyan")
|
|
91
|
+
db_table.add_column("FASTA DB", style="magenta")
|
|
92
|
+
db_table.add_column("miRNA Algorithms", style="green")
|
|
93
|
+
|
|
94
|
+
ann_dbs = config.get("annotate_databases", ["arraystar", "circbank", "circbase", "circpedia", "circrna_db", "cscd", "exorbase"])
|
|
95
|
+
fas_dbs = config.get("fasta_databases", ["arraystar", "circbank", "circbase", "circpedia", "circrna_db", "cscd"])
|
|
96
|
+
mir_algs = config.get("mirna_algorithms", ["miranda", "pita", "targetscan"])
|
|
97
|
+
|
|
98
|
+
max_len = max(len(ann_dbs), len(fas_dbs), len(mir_algs))
|
|
99
|
+
for i in range(max_len):
|
|
100
|
+
a = ann_dbs[i] if i < len(ann_dbs) else ""
|
|
101
|
+
f = fas_dbs[i] if i < len(fas_dbs) else ""
|
|
102
|
+
m = mir_algs[i] if i < len(mir_algs) else ""
|
|
103
|
+
db_table.add_row(a, f, m)
|
|
104
|
+
|
|
105
|
+
panel_group = Group(
|
|
106
|
+
Text("Global Parameters:", style="bold white"),
|
|
107
|
+
global_table,
|
|
108
|
+
Text(""),
|
|
109
|
+
Text("Samples:", style="bold white"),
|
|
110
|
+
sample_table,
|
|
111
|
+
Text(""),
|
|
112
|
+
Text("Databases & Algorithms:", style="bold white"),
|
|
113
|
+
db_table
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
console.print(Panel(panel_group, title="[bold white]Workflow Configuration[/bold white]", border_style="green", expand=False))
|
pycircdb-0.1.0/main.py
ADDED
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import shutil
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import rich_click as click
|
|
5
|
+
from typing import Tuple, List, Optional
|
|
6
|
+
from config import create_config, load_config, print_config_panel
|
|
7
|
+
|
|
8
|
+
# Workflow stuff
|
|
9
|
+
from hamilton import driver
|
|
10
|
+
from hamilton.execution import executors
|
|
11
|
+
|
|
12
|
+
from utils.connect_s3.download_annotation_tables import fetch_annotation_tables
|
|
13
|
+
from utils.connect_s3.download_sequence_tables import fetch_sequence_tables
|
|
14
|
+
from utils.connect_s3.download_mirna_tables import fetch_mirna_tables
|
|
15
|
+
from utils.connect_s3.download_rbp_tables import fetch_rbp_tables
|
|
16
|
+
|
|
17
|
+
import utils.detect_inputs.detect_inputs_driver as instantiate_lookup_driver
|
|
18
|
+
import utils.annotate.annotate_driver as annotation_driver
|
|
19
|
+
import utils.fasta.sequence_driver as sequence_driver
|
|
20
|
+
import utils.mirna.mirna_driver as mirna_driver
|
|
21
|
+
import utils.rbp.rbp_driver as rbp_driver
|
|
22
|
+
from rich.console import Console
|
|
23
|
+
|
|
24
|
+
console = Console(stderr=True, highlight=False)
|
|
25
|
+
|
|
26
|
+
@click.group(chain=True, context_settings=dict(help_option_names=['-h', '--help']))
|
|
27
|
+
@click.option(
|
|
28
|
+
"-c",
|
|
29
|
+
"--config",
|
|
30
|
+
type=click.Path(exists=True, dir_okay=False, readable=True),
|
|
31
|
+
required=False,
|
|
32
|
+
help="Path to the JSON config file containing workflow parameters."
|
|
33
|
+
)
|
|
34
|
+
@click.option(
|
|
35
|
+
"-v",
|
|
36
|
+
"--verbose",
|
|
37
|
+
type=click.IntRange(0, 2),
|
|
38
|
+
default=1,
|
|
39
|
+
help="Verbosity level: 0 (silent), 1 (high-level, default), 2 (all outputs)."
|
|
40
|
+
)
|
|
41
|
+
@click.pass_context
|
|
42
|
+
def cli(ctx, config, verbose):
|
|
43
|
+
"""Main CLI tool."""
|
|
44
|
+
ctx.ensure_object(dict)
|
|
45
|
+
|
|
46
|
+
ctx.obj['verbose'] = verbose
|
|
47
|
+
|
|
48
|
+
if config:
|
|
49
|
+
cfg = load_config(config, verbose=verbose)
|
|
50
|
+
if not cfg.get('samples'):
|
|
51
|
+
raise click.UsageError("Configuration file must contain a 'samples' dictionary.")
|
|
52
|
+
cfg.update({'verbose': verbose})
|
|
53
|
+
ctx.obj['cfg'] = cfg
|
|
54
|
+
else:
|
|
55
|
+
ctx.obj['cfg'] = None
|
|
56
|
+
ctx.obj['lookup_dict'] = None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@cli.result_callback()
|
|
60
|
+
@click.pass_context
|
|
61
|
+
def process_pipeline(ctx, processors, config, verbose):
|
|
62
|
+
"""Execute all processors returned by subcommands after parsing args."""
|
|
63
|
+
cfg = ctx.obj.get('cfg')
|
|
64
|
+
if cfg:
|
|
65
|
+
# Populate missing config for printing correctly
|
|
66
|
+
if 'annotate_databases' not in cfg:
|
|
67
|
+
cfg['annotate_databases'] = ['arraystar', 'circbank', 'circbase', 'circpedia', 'circrna_db', 'cscd', 'exorbase']
|
|
68
|
+
if 'fasta_databases' not in cfg:
|
|
69
|
+
cfg['fasta_databases'] = ['arraystar', 'circbank', 'circbase', 'circpedia', 'circrna_db', 'cscd']
|
|
70
|
+
if 'mirna_algorithms' not in cfg:
|
|
71
|
+
cfg['mirna_algorithms'] = ['miranda', 'pita', 'targetscan']
|
|
72
|
+
|
|
73
|
+
if verbose >= 2:
|
|
74
|
+
print_config_panel(cfg, config)
|
|
75
|
+
|
|
76
|
+
for processor in processors:
|
|
77
|
+
processor()
|
|
78
|
+
|
|
79
|
+
@cli.command('annotate')
|
|
80
|
+
@click.option(
|
|
81
|
+
"-d",
|
|
82
|
+
"--database",
|
|
83
|
+
type=str,
|
|
84
|
+
required=False,
|
|
85
|
+
default="arraystar,circbank,circbase,circpedia,circRNA_DB,CSCD,exorbase",
|
|
86
|
+
show_default=True,
|
|
87
|
+
help="Comma-separated list of databases to use."
|
|
88
|
+
)
|
|
89
|
+
@click.pass_context
|
|
90
|
+
def annotate(ctx, database):
|
|
91
|
+
"""Annotate circRNAs using a JSON configuration file."""
|
|
92
|
+
cfg = ctx.obj.get('cfg')
|
|
93
|
+
if not cfg:
|
|
94
|
+
raise click.UsageError("A config file must be provided via -c/--config before subcommands (e.g., main.py -c config.json annotate)")
|
|
95
|
+
|
|
96
|
+
if database:
|
|
97
|
+
valid_dbs = {'arraystar', 'circbank', 'circbase', 'circpedia', 'circrna_db', 'cscd', 'exorbase'}
|
|
98
|
+
parsed_dbs = [d.strip().lower() for d in database.split(',')]
|
|
99
|
+
invalid_dbs = [d for d in parsed_dbs if d not in valid_dbs]
|
|
100
|
+
if invalid_dbs:
|
|
101
|
+
raise click.BadParameter(f"Invalid databases provided: {', '.join(invalid_dbs)}. Valid options are: {', '.join(sorted(valid_dbs))}")
|
|
102
|
+
cfg['annotate_databases'] = parsed_dbs
|
|
103
|
+
else:
|
|
104
|
+
cfg['annotate_databases'] = ["arraystar", "circbank", "circbase", "circpedia", "circrna_db", "cscd", "exorbase"]
|
|
105
|
+
|
|
106
|
+
def processor():
|
|
107
|
+
lookup_dict = ctx.obj.get('lookup_dict')
|
|
108
|
+
ctx.obj['lookup_dict'] = run_annotation(lookup_dict=lookup_dict, **cfg)
|
|
109
|
+
return processor
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@cli.command('fasta')
|
|
113
|
+
@click.option(
|
|
114
|
+
"-d",
|
|
115
|
+
"--database",
|
|
116
|
+
type=str,
|
|
117
|
+
required=False,
|
|
118
|
+
default="arraystar,circbank,circbase,circpedia,circRNA_DB,CSCD",
|
|
119
|
+
show_default=True,
|
|
120
|
+
help="Comma-separated list of databases to use."
|
|
121
|
+
)
|
|
122
|
+
@click.pass_context
|
|
123
|
+
def fasta(ctx, database):
|
|
124
|
+
"""Output circRNA sequences in FASTA format."""
|
|
125
|
+
cfg = ctx.obj.get('cfg')
|
|
126
|
+
if not cfg:
|
|
127
|
+
raise click.UsageError("A config file must be provided via -c/--config")
|
|
128
|
+
|
|
129
|
+
if database:
|
|
130
|
+
valid_dbs = {'arraystar', 'circbank', 'circbase', 'circpedia', 'circrna_db', 'cscd'}
|
|
131
|
+
parsed_dbs = [d.strip().lower() for d in database.split(',')]
|
|
132
|
+
invalid_dbs = [d for d in parsed_dbs if d not in valid_dbs]
|
|
133
|
+
if invalid_dbs:
|
|
134
|
+
raise click.BadParameter(f"Invalid databases provided: {', '.join(invalid_dbs)}. Valid options are: {', '.join(sorted(valid_dbs))}")
|
|
135
|
+
cfg['fasta_databases'] = parsed_dbs
|
|
136
|
+
else:
|
|
137
|
+
cfg['fasta_databases'] = ["arraystar", "circbank", "circbase", "circpedia", "circrna_db", "cscd"]
|
|
138
|
+
|
|
139
|
+
def processor():
|
|
140
|
+
lookup_dict = ctx.obj.get('lookup_dict')
|
|
141
|
+
ctx.obj['lookup_dict'] = run_fasta(lookup_dict=lookup_dict, **cfg)
|
|
142
|
+
return processor
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@cli.command('mirna')
|
|
146
|
+
@click.option(
|
|
147
|
+
"-a",
|
|
148
|
+
"--algorithm",
|
|
149
|
+
type=str,
|
|
150
|
+
required=False,
|
|
151
|
+
default="miRanda,PITA,TargetScan",
|
|
152
|
+
show_default=True,
|
|
153
|
+
help="Comma-separated list of algorithms to use."
|
|
154
|
+
)
|
|
155
|
+
@click.pass_context
|
|
156
|
+
def mirna(ctx, algorithm):
|
|
157
|
+
"""Output miRNA interactions for identified circRNAs."""
|
|
158
|
+
cfg = ctx.obj.get('cfg')
|
|
159
|
+
if not cfg:
|
|
160
|
+
raise click.UsageError("A config file must be provided via -c/--config")
|
|
161
|
+
|
|
162
|
+
if algorithm:
|
|
163
|
+
valid_algs = {'miranda', 'pita', 'targetscan'}
|
|
164
|
+
parsed_algs = [a.strip() for a in algorithm.split(',')]
|
|
165
|
+
invalid_algs = [a for a in parsed_algs if a.lower() not in valid_algs]
|
|
166
|
+
if invalid_algs:
|
|
167
|
+
raise click.BadParameter(f"Invalid algorithms provided: {', '.join(invalid_algs)}. Valid options are: miRanda, PITA, TargetScan")
|
|
168
|
+
# Keep original case for 'contains' check, or use lowercase for case-insensitive check
|
|
169
|
+
cfg['mirna_algorithms'] = parsed_algs
|
|
170
|
+
else:
|
|
171
|
+
cfg['mirna_algorithms'] = ["miRanda", "PITA", "TargetScan"]
|
|
172
|
+
|
|
173
|
+
def processor():
|
|
174
|
+
lookup_dict = ctx.obj.get('lookup_dict')
|
|
175
|
+
run_mirna(lookup_dict=lookup_dict, **cfg)
|
|
176
|
+
return processor
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
@cli.command('rbp')
|
|
180
|
+
@click.pass_context
|
|
181
|
+
def rbp(ctx):
|
|
182
|
+
"""Output RBP interactions for identified circRNAs."""
|
|
183
|
+
cfg = ctx.obj.get('cfg')
|
|
184
|
+
if not cfg:
|
|
185
|
+
raise click.UsageError("A config file must be provided via -c/--config")
|
|
186
|
+
|
|
187
|
+
def processor():
|
|
188
|
+
lookup_dict = ctx.obj.get('lookup_dict')
|
|
189
|
+
run_rbp(lookup_dict=lookup_dict, **cfg)
|
|
190
|
+
return processor
|
|
191
|
+
|
|
192
|
+
def run_annotation(lookup_dict=None, **kwargs):
|
|
193
|
+
"""Run the annotation workflow."""
|
|
194
|
+
|
|
195
|
+
# Lookup tables
|
|
196
|
+
if lookup_dict is None:
|
|
197
|
+
lookup_dict = instantiate_lookup_driver.instantiate_driver(kwargs, verbose=kwargs.get('verbose', 1))
|
|
198
|
+
|
|
199
|
+
tmp_dir = kwargs.get("global_parameters", {}).get("tmp_dir", "tmp")
|
|
200
|
+
|
|
201
|
+
# Filter lookup dictionary if databases option is provided
|
|
202
|
+
databases = kwargs.get("annotate_databases")
|
|
203
|
+
if databases:
|
|
204
|
+
filtered_lookup_dict = {}
|
|
205
|
+
for sample, db_dict in lookup_dict.items():
|
|
206
|
+
filtered_lookup_dict[sample] = {k: v for k, v in db_dict.items() if k.lower() in databases}
|
|
207
|
+
else:
|
|
208
|
+
filtered_lookup_dict = lookup_dict
|
|
209
|
+
|
|
210
|
+
# Pull annotation tables
|
|
211
|
+
annotation_tables = fetch_annotation_tables(filtered_lookup_dict, tmp_dir_path=tmp_dir, verbose=kwargs.get("verbose", 1))
|
|
212
|
+
|
|
213
|
+
# Annotate + write to file in parallel
|
|
214
|
+
dr = (
|
|
215
|
+
driver.Builder()
|
|
216
|
+
.enable_dynamic_execution(allow_experimental_mode=True)
|
|
217
|
+
.with_local_executor(executors.SynchronousLocalTaskExecutor())
|
|
218
|
+
.with_remote_executor(executors.MultiThreadingExecutor(max_tasks=kwargs.get("global_parameters", {}).get("max_tasks", 1)))
|
|
219
|
+
.with_modules(annotation_driver)
|
|
220
|
+
.build()
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
dr.execute(
|
|
224
|
+
['close_annotation'],
|
|
225
|
+
inputs={'config': kwargs, 'annotation_tables': annotation_tables, 'lookup_results': filtered_lookup_dict}
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
return lookup_dict
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def run_fasta(lookup_dict=None, **kwargs):
|
|
232
|
+
"""Generate FASTA output from circRNA sequences.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
lookup_dict: Optional pre-computed lookup results. If None, will be generated from scratch.
|
|
236
|
+
**kwargs: Configuration parameters.
|
|
237
|
+
"""
|
|
238
|
+
# Generate lookup tables if not provided (i.e., fasta running standalone)
|
|
239
|
+
if lookup_dict is None:
|
|
240
|
+
lookup_dict = instantiate_lookup_driver.instantiate_driver(kwargs, verbose=kwargs.get("verbose", 1))
|
|
241
|
+
|
|
242
|
+
databases = kwargs.get("fasta_databases")
|
|
243
|
+
if databases:
|
|
244
|
+
filtered_lookup_dict = {}
|
|
245
|
+
for sample, db_dict in lookup_dict.items():
|
|
246
|
+
filtered_lookup_dict[sample] = {k: v for k, v in db_dict.items() if k.lower() in databases}
|
|
247
|
+
else:
|
|
248
|
+
filtered_lookup_dict = lookup_dict
|
|
249
|
+
|
|
250
|
+
tmp_dir = kwargs.get("global_parameters", {}).get("tmp_dir", "tmp")
|
|
251
|
+
sequence_tables = fetch_sequence_tables(filtered_lookup_dict, tmp_dir_path=tmp_dir, verbose=kwargs.get("verbose", 1))
|
|
252
|
+
|
|
253
|
+
dr = (
|
|
254
|
+
driver.Builder()
|
|
255
|
+
.enable_dynamic_execution(allow_experimental_mode=True)
|
|
256
|
+
.with_local_executor(executors.SynchronousLocalTaskExecutor())
|
|
257
|
+
.with_remote_executor(executors.MultiThreadingExecutor(max_tasks=kwargs.get("global_parameters", {}).get("max_tasks", 1)))
|
|
258
|
+
.with_modules(sequence_driver)
|
|
259
|
+
.build()
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
dr.execute(
|
|
263
|
+
['close_sequence'],
|
|
264
|
+
inputs={'config': kwargs, 'lookup_dict': filtered_lookup_dict, 'sequence_tables': sequence_tables}
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
return lookup_dict
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def run_mirna(lookup_dict=None, **kwargs):
|
|
271
|
+
"""Output miRNA interactions for identified circRNAs.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
lookup_dict: Optional pre-computed lookup results. If None, will be generated from scratch.
|
|
275
|
+
**kwargs: Configuration parameters.
|
|
276
|
+
"""
|
|
277
|
+
if lookup_dict is None:
|
|
278
|
+
lookup_dict = instantiate_lookup_driver.instantiate_driver(kwargs, verbose=kwargs.get("verbose", 1))
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
tmp_dir = kwargs.get("global_parameters", {}).get("tmp_dir", "tmp")
|
|
282
|
+
mirna_tables = fetch_mirna_tables(lookup_dict, tmp_dir_path=tmp_dir, verbose=kwargs.get("verbose", 1))
|
|
283
|
+
|
|
284
|
+
dr = (
|
|
285
|
+
driver.Builder()
|
|
286
|
+
.enable_dynamic_execution(allow_experimental_mode=True)
|
|
287
|
+
.with_local_executor(executors.SynchronousLocalTaskExecutor())
|
|
288
|
+
.with_remote_executor(executors.MultiThreadingExecutor(max_tasks=kwargs.get("global_parameters", {}).get("max_tasks", 1)))
|
|
289
|
+
.with_modules(mirna_driver)
|
|
290
|
+
.build()
|
|
291
|
+
)
|
|
292
|
+
dr.execute(
|
|
293
|
+
['close_mirna'],
|
|
294
|
+
inputs={'config': kwargs, 'lookup_dict': lookup_dict, 'mirna_tables': mirna_tables}
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def run_rbp(lookup_dict=None, **kwargs):
|
|
299
|
+
"""Output RBP interactions for identified circRNAs.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
lookup_dict: Optional pre-computed lookup results. If None, will be generated from scratch.
|
|
303
|
+
**kwargs: Configuration parameters.
|
|
304
|
+
"""
|
|
305
|
+
if lookup_dict is None:
|
|
306
|
+
lookup_dict = instantiate_lookup_driver.instantiate_driver(kwargs, verbose=kwargs.get("verbose", 1))
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
tmp_dir = kwargs.get("global_parameters", {}).get("tmp_dir", "tmp")
|
|
310
|
+
rbp_tables = fetch_rbp_tables(lookup_dict, tmp_dir_path=tmp_dir, verbose=kwargs.get("verbose", 1))
|
|
311
|
+
|
|
312
|
+
dr = (
|
|
313
|
+
driver.Builder()
|
|
314
|
+
.enable_dynamic_execution(allow_experimental_mode=True)
|
|
315
|
+
.with_local_executor(executors.SynchronousLocalTaskExecutor())
|
|
316
|
+
.with_remote_executor(executors.MultiThreadingExecutor(max_tasks=kwargs.get("global_parameters", {}).get("max_tasks", 1)))
|
|
317
|
+
.with_modules(rbp_driver)
|
|
318
|
+
.build()
|
|
319
|
+
)
|
|
320
|
+
dr.execute(
|
|
321
|
+
['close_rbp'],
|
|
322
|
+
inputs={'config': kwargs, 'lookup_dict': lookup_dict, 'rbp_tables': rbp_tables}
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def run_tool(**kwargs):
|
|
327
|
+
"""Deprecated: Use run_annotation() instead."""
|
|
328
|
+
run_annotation(**kwargs)
|
|
329
|
+
|
|
330
|
+
if __name__ == "__main__":
|
|
331
|
+
cli()
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pycircdb
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: pycircdb: integrated circRNA database annotation for computational workflows.
|
|
5
|
+
Author-email: Barry Digby <b.digby237@gmail.com>
|
|
6
|
+
Requires-Python: >=3.14
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: apache-hamilton[lsp]>=1.90.0
|
|
9
|
+
Requires-Dist: boto3>=1.42.89
|
|
10
|
+
Requires-Dist: click>=8.3.2
|
|
11
|
+
Requires-Dist: polars>=1.39.3
|
|
12
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
13
|
+
Requires-Dist: rich-click>=1.9.7
|
|
14
|
+
Requires-Dist: sf-hamilton[lsp]>=1.89.0
|
|
15
|
+
|
|
16
|
+
# pycircdb
|
|
17
|
+
|
|
18
|
+
A command-line tool for identifying and annotating circRNA interactions.
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install pycircdb
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Usage
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pycircdb --help
|
|
30
|
+
```
|
|
31
|
+
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
config.py
|
|
3
|
+
main.py
|
|
4
|
+
pyproject.toml
|
|
5
|
+
pycircdb.egg-info/PKG-INFO
|
|
6
|
+
pycircdb.egg-info/SOURCES.txt
|
|
7
|
+
pycircdb.egg-info/dependency_links.txt
|
|
8
|
+
pycircdb.egg-info/entry_points.txt
|
|
9
|
+
pycircdb.egg-info/requires.txt
|
|
10
|
+
pycircdb.egg-info/top_level.txt
|
|
11
|
+
sequence_tables/extract_sequences.py
|
|
12
|
+
sequence_tables/split_cscd_by_chr.py
|
|
13
|
+
utils/__init__.py
|
|
14
|
+
utils/md5sum_check.py
|
|
15
|
+
utils/output_mapping.py
|
|
16
|
+
utils/annotate/annotate_driver.py
|
|
17
|
+
utils/annotate/annotate_subdag.py
|
|
18
|
+
utils/annotate/annotate_subdriver.py
|
|
19
|
+
utils/connect_s3/download_annotation_tables.py
|
|
20
|
+
utils/connect_s3/download_lookup_tables.py
|
|
21
|
+
utils/connect_s3/download_mirna_tables.py
|
|
22
|
+
utils/connect_s3/download_rbp_tables.py
|
|
23
|
+
utils/connect_s3/download_sequence_tables.py
|
|
24
|
+
utils/detect_inputs/detect_inputs_driver.py
|
|
25
|
+
utils/detect_inputs/detect_inputs_subdag.py
|
|
26
|
+
utils/fasta/sequence_driver.py
|
|
27
|
+
utils/fasta/sequence_subdag.py
|
|
28
|
+
utils/mirna/mirna_driver.py
|
|
29
|
+
utils/rbp/rbp_driver.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "pycircdb"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "pycircdb: integrated circRNA database annotation for computational workflows."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [{ name = "Barry Digby", email = "b.digby237@gmail.com" }]
|
|
7
|
+
requires-python = ">=3.14"
|
|
8
|
+
dependencies = [
|
|
9
|
+
"apache-hamilton[lsp]>=1.90.0",
|
|
10
|
+
"boto3>=1.42.89",
|
|
11
|
+
"click>=8.3.2",
|
|
12
|
+
"polars>=1.39.3",
|
|
13
|
+
"pyyaml>=6.0.3",
|
|
14
|
+
"rich-click>=1.9.7",
|
|
15
|
+
"sf-hamilton[lsp]>=1.89.0",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[build-system]
|
|
19
|
+
requires = ["setuptools>=61.0"]
|
|
20
|
+
build-backend = "setuptools.build_meta"
|
|
21
|
+
|
|
22
|
+
[tool.setuptools.packages.find]
|
|
23
|
+
where = ["."]
|
|
24
|
+
include = ["*", "assets", "utils", "utils.*"]
|
|
25
|
+
|
|
26
|
+
[tool.setuptools]
|
|
27
|
+
py-modules = ["main", "config"]
|
|
28
|
+
|
|
29
|
+
[tool.setuptools.package-data]
|
|
30
|
+
"*" = ["assets/*.csv"]
|
|
31
|
+
|
|
32
|
+
[dependency-groups]
|
|
33
|
+
dev = [
|
|
34
|
+
"pytest>=9.0.3",
|
|
35
|
+
"pytest-cov>=7.1.0",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[tool.pytest.ini_options]
|
|
39
|
+
addopts = "--cov=utils --cov=config --cov-report=term-missing --cov-report=html"
|
|
40
|
+
testpaths = ["tests"]
|
|
41
|
+
|
|
42
|
+
[tool.coverage.run]
|
|
43
|
+
source = ["utils", "config.py", "main.py"]
|
|
44
|
+
omit = ["tests/*", "venv/*"]
|
|
45
|
+
[project.scripts]
|
|
46
|
+
pycircdb = "main:cli"
|
|
47
|
+
|