pycircdb 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. pycircdb-0.1.0/PKG-INFO +31 -0
  2. pycircdb-0.1.0/README.md +16 -0
  3. pycircdb-0.1.0/config.py +116 -0
  4. pycircdb-0.1.0/main.py +331 -0
  5. pycircdb-0.1.0/pycircdb.egg-info/PKG-INFO +31 -0
  6. pycircdb-0.1.0/pycircdb.egg-info/SOURCES.txt +29 -0
  7. pycircdb-0.1.0/pycircdb.egg-info/dependency_links.txt +1 -0
  8. pycircdb-0.1.0/pycircdb.egg-info/entry_points.txt +2 -0
  9. pycircdb-0.1.0/pycircdb.egg-info/requires.txt +7 -0
  10. pycircdb-0.1.0/pycircdb.egg-info/top_level.txt +12 -0
  11. pycircdb-0.1.0/pyproject.toml +47 -0
  12. pycircdb-0.1.0/sequence_tables/extract_sequences.py +68 -0
  13. pycircdb-0.1.0/sequence_tables/split_cscd_by_chr.py +31 -0
  14. pycircdb-0.1.0/setup.cfg +4 -0
  15. pycircdb-0.1.0/utils/__init__.py +0 -0
  16. pycircdb-0.1.0/utils/annotate/annotate_driver.py +82 -0
  17. pycircdb-0.1.0/utils/annotate/annotate_subdag.py +170 -0
  18. pycircdb-0.1.0/utils/annotate/annotate_subdriver.py +69 -0
  19. pycircdb-0.1.0/utils/connect_s3/download_annotation_tables.py +171 -0
  20. pycircdb-0.1.0/utils/connect_s3/download_lookup_tables.py +84 -0
  21. pycircdb-0.1.0/utils/connect_s3/download_mirna_tables.py +102 -0
  22. pycircdb-0.1.0/utils/connect_s3/download_rbp_tables.py +102 -0
  23. pycircdb-0.1.0/utils/connect_s3/download_sequence_tables.py +163 -0
  24. pycircdb-0.1.0/utils/detect_inputs/detect_inputs_driver.py +33 -0
  25. pycircdb-0.1.0/utils/detect_inputs/detect_inputs_subdag.py +82 -0
  26. pycircdb-0.1.0/utils/fasta/sequence_driver.py +74 -0
  27. pycircdb-0.1.0/utils/fasta/sequence_subdag.py +183 -0
  28. pycircdb-0.1.0/utils/md5sum_check.py +110 -0
  29. pycircdb-0.1.0/utils/mirna/mirna_driver.py +101 -0
  30. pycircdb-0.1.0/utils/output_mapping.py +0 -0
  31. pycircdb-0.1.0/utils/rbp/rbp_driver.py +94 -0
@@ -0,0 +1,31 @@
1
+ Metadata-Version: 2.4
2
+ Name: pycircdb
3
+ Version: 0.1.0
4
+ Summary: pycircdb: integrated circRNA database annotation for computational workflows.
5
+ Author-email: Barry Digby <b.digby237@gmail.com>
6
+ Requires-Python: >=3.14
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: apache-hamilton[lsp]>=1.90.0
9
+ Requires-Dist: boto3>=1.42.89
10
+ Requires-Dist: click>=8.3.2
11
+ Requires-Dist: polars>=1.39.3
12
+ Requires-Dist: pyyaml>=6.0.3
13
+ Requires-Dist: rich-click>=1.9.7
14
+ Requires-Dist: sf-hamilton[lsp]>=1.89.0
15
+
16
+ # pycircdb
17
+
18
+ A command-line tool for identifying and annotating circRNA interactions.
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ pip install pycircdb
24
+ ```
25
+
26
+ ## Usage
27
+
28
+ ```bash
29
+ pycircdb --help
30
+ ```
31
+
@@ -0,0 +1,16 @@
1
+ # pycircdb
2
+
3
+ A command-line tool for identifying and annotating circRNA interactions.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install pycircdb
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```bash
14
+ pycircdb --help
15
+ ```
16
+
@@ -0,0 +1,116 @@
1
+ import json
2
+ import shutil
3
+ import rich_click as click
4
+ from rich.console import Console, Group
5
+ from rich.panel import Panel
6
+ from rich.text import Text
7
+ from rich.table import Table
8
+ from rich import box
9
+ from pathlib import Path
10
+ from typing import List, Optional, TypedDict, Union
11
+
12
+ console = Console(stderr=True, highlight=False)
13
+ CONFIG_DIR = Path(__file__).parent.absolute()
14
+
15
+ DEFAULT_CONFIG_DATA = {
16
+ "global_parameters": {
17
+ "max_tasks": 1,
18
+ "output_dir": "results/"
19
+ },
20
+ "samples": {
21
+ "sample_1": {
22
+ "file_path": "path/to/sample1.txt",
23
+ "reference": "hg38",
24
+ "zero_based": True
25
+ }
26
+ }
27
+ }
28
+
29
+ class ToolConfig(TypedDict, total=False):
30
+ input: List[str]
31
+ reference: Union[str, List[str], None]
32
+ zero_based: Union[bool, List[bool], None]
33
+ max_tasks: int
34
+
35
+ def create_config(name: str):
36
+ """Generate a default configuration file."""
37
+ output_path = Path.cwd() / f"{name}.json"
38
+ with open(output_path, "w") as f:
39
+ json.dump(DEFAULT_CONFIG_DATA, f, indent=4)
40
+ click.echo(f"Created default configuration file at {output_path}")
41
+
42
+ def load_config(user_config_path: Optional[str] = None, verbose: int = 1) -> ToolConfig:
43
+ """
44
+ Load default config and override with a user config file if provided.
45
+ """
46
+ config: ToolConfig = {}
47
+
48
+ # Load defaults directly from memory
49
+ config.update(DEFAULT_CONFIG_DATA)
50
+
51
+ # Load user config if provided
52
+ if user_config_path:
53
+ user_path = Path(user_config_path)
54
+ if user_path.is_file():
55
+ with user_path.open() as f:
56
+ user_config = json.load(f) or {}
57
+ config.update(user_config)
58
+
59
+ # Verbosity check
60
+ if verbose >= 1:
61
+ display_path = str(Path(user_config_path).resolve()) if user_config_path else 'defaults'
62
+ console.print(Text(f"✓ Configuration File Loaded: {display_path}", style="bold green"))
63
+
64
+ return config
65
+
66
+ def print_config_panel(config: ToolConfig, user_config_path: Optional[str] = None):
67
+ """Prints the rich panel for the workflow configuration."""
68
+ global_table = Table(show_header=True, header_style="bold cyan", box=box.ROUNDED, expand=True)
69
+ global_params = config.get("global_parameters", {})
70
+ for key in global_params.keys():
71
+ global_table.add_column(str(key), justify="center", style="magenta")
72
+ if global_params:
73
+ global_table.add_row(*[str(val) for val in global_params.values()])
74
+
75
+ sample_table = Table(show_header=True, header_style="bold blue", box=box.ROUNDED, expand=True)
76
+ sample_table.add_column("Sample Name", style="bold green")
77
+ sample_table.add_column("Input File", style="yellow")
78
+ sample_table.add_column("Ref", style="cyan", justify="center")
79
+ sample_table.add_column("0-based", style="magenta", justify="center")
80
+
81
+ for sample_name, sample_info in config.get("samples", {}).items():
82
+ sample_table.add_row(
83
+ sample_name,
84
+ str(sample_info.get("input", sample_info.get("file_path", ""))),
85
+ str(sample_info.get("reference", "")),
86
+ str(sample_info.get("zero_based", ""))
87
+ )
88
+
89
+ db_table = Table(show_header=True, header_style="bold yellow", box=box.ROUNDED, expand=True)
90
+ db_table.add_column("Annotation DB", style="cyan")
91
+ db_table.add_column("FASTA DB", style="magenta")
92
+ db_table.add_column("miRNA Algorithms", style="green")
93
+
94
+ ann_dbs = config.get("annotate_databases", ["arraystar", "circbank", "circbase", "circpedia", "circrna_db", "cscd", "exorbase"])
95
+ fas_dbs = config.get("fasta_databases", ["arraystar", "circbank", "circbase", "circpedia", "circrna_db", "cscd"])
96
+ mir_algs = config.get("mirna_algorithms", ["miranda", "pita", "targetscan"])
97
+
98
+ max_len = max(len(ann_dbs), len(fas_dbs), len(mir_algs))
99
+ for i in range(max_len):
100
+ a = ann_dbs[i] if i < len(ann_dbs) else ""
101
+ f = fas_dbs[i] if i < len(fas_dbs) else ""
102
+ m = mir_algs[i] if i < len(mir_algs) else ""
103
+ db_table.add_row(a, f, m)
104
+
105
+ panel_group = Group(
106
+ Text("Global Parameters:", style="bold white"),
107
+ global_table,
108
+ Text(""),
109
+ Text("Samples:", style="bold white"),
110
+ sample_table,
111
+ Text(""),
112
+ Text("Databases & Algorithms:", style="bold white"),
113
+ db_table
114
+ )
115
+
116
+ console.print(Panel(panel_group, title="[bold white]Workflow Configuration[/bold white]", border_style="green", expand=False))
pycircdb-0.1.0/main.py ADDED
@@ -0,0 +1,331 @@
1
+ import json
2
+ import shutil
3
+ from pathlib import Path
4
+ import rich_click as click
5
+ from typing import Tuple, List, Optional
6
+ from config import create_config, load_config, print_config_panel
7
+
8
+ # Workflow stuff
9
+ from hamilton import driver
10
+ from hamilton.execution import executors
11
+
12
+ from utils.connect_s3.download_annotation_tables import fetch_annotation_tables
13
+ from utils.connect_s3.download_sequence_tables import fetch_sequence_tables
14
+ from utils.connect_s3.download_mirna_tables import fetch_mirna_tables
15
+ from utils.connect_s3.download_rbp_tables import fetch_rbp_tables
16
+
17
+ import utils.detect_inputs.detect_inputs_driver as instantiate_lookup_driver
18
+ import utils.annotate.annotate_driver as annotation_driver
19
+ import utils.fasta.sequence_driver as sequence_driver
20
+ import utils.mirna.mirna_driver as mirna_driver
21
+ import utils.rbp.rbp_driver as rbp_driver
22
+ from rich.console import Console
23
+
24
+ console = Console(stderr=True, highlight=False)
25
+
26
+ @click.group(chain=True, context_settings=dict(help_option_names=['-h', '--help']))
27
+ @click.option(
28
+ "-c",
29
+ "--config",
30
+ type=click.Path(exists=True, dir_okay=False, readable=True),
31
+ required=False,
32
+ help="Path to the JSON config file containing workflow parameters."
33
+ )
34
+ @click.option(
35
+ "-v",
36
+ "--verbose",
37
+ type=click.IntRange(0, 2),
38
+ default=1,
39
+ help="Verbosity level: 0 (silent), 1 (high-level, default), 2 (all outputs)."
40
+ )
41
+ @click.pass_context
42
+ def cli(ctx, config, verbose):
43
+ """Main CLI tool."""
44
+ ctx.ensure_object(dict)
45
+
46
+ ctx.obj['verbose'] = verbose
47
+
48
+ if config:
49
+ cfg = load_config(config, verbose=verbose)
50
+ if not cfg.get('samples'):
51
+ raise click.UsageError("Configuration file must contain a 'samples' dictionary.")
52
+ cfg.update({'verbose': verbose})
53
+ ctx.obj['cfg'] = cfg
54
+ else:
55
+ ctx.obj['cfg'] = None
56
+ ctx.obj['lookup_dict'] = None
57
+
58
+
59
+ @cli.result_callback()
60
+ @click.pass_context
61
+ def process_pipeline(ctx, processors, config, verbose):
62
+ """Execute all processors returned by subcommands after parsing args."""
63
+ cfg = ctx.obj.get('cfg')
64
+ if cfg:
65
+ # Populate missing config for printing correctly
66
+ if 'annotate_databases' not in cfg:
67
+ cfg['annotate_databases'] = ['arraystar', 'circbank', 'circbase', 'circpedia', 'circrna_db', 'cscd', 'exorbase']
68
+ if 'fasta_databases' not in cfg:
69
+ cfg['fasta_databases'] = ['arraystar', 'circbank', 'circbase', 'circpedia', 'circrna_db', 'cscd']
70
+ if 'mirna_algorithms' not in cfg:
71
+ cfg['mirna_algorithms'] = ['miranda', 'pita', 'targetscan']
72
+
73
+ if verbose >= 2:
74
+ print_config_panel(cfg, config)
75
+
76
+ for processor in processors:
77
+ processor()
78
+
79
+ @cli.command('annotate')
80
+ @click.option(
81
+ "-d",
82
+ "--database",
83
+ type=str,
84
+ required=False,
85
+ default="arraystar,circbank,circbase,circpedia,circRNA_DB,CSCD,exorbase",
86
+ show_default=True,
87
+ help="Comma-separated list of databases to use."
88
+ )
89
+ @click.pass_context
90
+ def annotate(ctx, database):
91
+ """Annotate circRNAs using a JSON configuration file."""
92
+ cfg = ctx.obj.get('cfg')
93
+ if not cfg:
94
+ raise click.UsageError("A config file must be provided via -c/--config before subcommands (e.g., main.py -c config.json annotate)")
95
+
96
+ if database:
97
+ valid_dbs = {'arraystar', 'circbank', 'circbase', 'circpedia', 'circrna_db', 'cscd', 'exorbase'}
98
+ parsed_dbs = [d.strip().lower() for d in database.split(',')]
99
+ invalid_dbs = [d for d in parsed_dbs if d not in valid_dbs]
100
+ if invalid_dbs:
101
+ raise click.BadParameter(f"Invalid databases provided: {', '.join(invalid_dbs)}. Valid options are: {', '.join(sorted(valid_dbs))}")
102
+ cfg['annotate_databases'] = parsed_dbs
103
+ else:
104
+ cfg['annotate_databases'] = ["arraystar", "circbank", "circbase", "circpedia", "circrna_db", "cscd", "exorbase"]
105
+
106
+ def processor():
107
+ lookup_dict = ctx.obj.get('lookup_dict')
108
+ ctx.obj['lookup_dict'] = run_annotation(lookup_dict=lookup_dict, **cfg)
109
+ return processor
110
+
111
+
112
+ @cli.command('fasta')
113
+ @click.option(
114
+ "-d",
115
+ "--database",
116
+ type=str,
117
+ required=False,
118
+ default="arraystar,circbank,circbase,circpedia,circRNA_DB,CSCD",
119
+ show_default=True,
120
+ help="Comma-separated list of databases to use."
121
+ )
122
+ @click.pass_context
123
+ def fasta(ctx, database):
124
+ """Output circRNA sequences in FASTA format."""
125
+ cfg = ctx.obj.get('cfg')
126
+ if not cfg:
127
+ raise click.UsageError("A config file must be provided via -c/--config")
128
+
129
+ if database:
130
+ valid_dbs = {'arraystar', 'circbank', 'circbase', 'circpedia', 'circrna_db', 'cscd'}
131
+ parsed_dbs = [d.strip().lower() for d in database.split(',')]
132
+ invalid_dbs = [d for d in parsed_dbs if d not in valid_dbs]
133
+ if invalid_dbs:
134
+ raise click.BadParameter(f"Invalid databases provided: {', '.join(invalid_dbs)}. Valid options are: {', '.join(sorted(valid_dbs))}")
135
+ cfg['fasta_databases'] = parsed_dbs
136
+ else:
137
+ cfg['fasta_databases'] = ["arraystar", "circbank", "circbase", "circpedia", "circrna_db", "cscd"]
138
+
139
+ def processor():
140
+ lookup_dict = ctx.obj.get('lookup_dict')
141
+ ctx.obj['lookup_dict'] = run_fasta(lookup_dict=lookup_dict, **cfg)
142
+ return processor
143
+
144
+
145
+ @cli.command('mirna')
146
+ @click.option(
147
+ "-a",
148
+ "--algorithm",
149
+ type=str,
150
+ required=False,
151
+ default="miRanda,PITA,TargetScan",
152
+ show_default=True,
153
+ help="Comma-separated list of algorithms to use."
154
+ )
155
+ @click.pass_context
156
+ def mirna(ctx, algorithm):
157
+ """Output miRNA interactions for identified circRNAs."""
158
+ cfg = ctx.obj.get('cfg')
159
+ if not cfg:
160
+ raise click.UsageError("A config file must be provided via -c/--config")
161
+
162
+ if algorithm:
163
+ valid_algs = {'miranda', 'pita', 'targetscan'}
164
+ parsed_algs = [a.strip() for a in algorithm.split(',')]
165
+ invalid_algs = [a for a in parsed_algs if a.lower() not in valid_algs]
166
+ if invalid_algs:
167
+ raise click.BadParameter(f"Invalid algorithms provided: {', '.join(invalid_algs)}. Valid options are: miRanda, PITA, TargetScan")
168
+ # Keep original case for 'contains' check, or use lowercase for case-insensitive check
169
+ cfg['mirna_algorithms'] = parsed_algs
170
+ else:
171
+ cfg['mirna_algorithms'] = ["miRanda", "PITA", "TargetScan"]
172
+
173
+ def processor():
174
+ lookup_dict = ctx.obj.get('lookup_dict')
175
+ run_mirna(lookup_dict=lookup_dict, **cfg)
176
+ return processor
177
+
178
+
179
+ @cli.command('rbp')
180
+ @click.pass_context
181
+ def rbp(ctx):
182
+ """Output RBP interactions for identified circRNAs."""
183
+ cfg = ctx.obj.get('cfg')
184
+ if not cfg:
185
+ raise click.UsageError("A config file must be provided via -c/--config")
186
+
187
+ def processor():
188
+ lookup_dict = ctx.obj.get('lookup_dict')
189
+ run_rbp(lookup_dict=lookup_dict, **cfg)
190
+ return processor
191
+
192
+ def run_annotation(lookup_dict=None, **kwargs):
193
+ """Run the annotation workflow."""
194
+
195
+ # Lookup tables
196
+ if lookup_dict is None:
197
+ lookup_dict = instantiate_lookup_driver.instantiate_driver(kwargs, verbose=kwargs.get('verbose', 1))
198
+
199
+ tmp_dir = kwargs.get("global_parameters", {}).get("tmp_dir", "tmp")
200
+
201
+ # Filter lookup dictionary if databases option is provided
202
+ databases = kwargs.get("annotate_databases")
203
+ if databases:
204
+ filtered_lookup_dict = {}
205
+ for sample, db_dict in lookup_dict.items():
206
+ filtered_lookup_dict[sample] = {k: v for k, v in db_dict.items() if k.lower() in databases}
207
+ else:
208
+ filtered_lookup_dict = lookup_dict
209
+
210
+ # Pull annotation tables
211
+ annotation_tables = fetch_annotation_tables(filtered_lookup_dict, tmp_dir_path=tmp_dir, verbose=kwargs.get("verbose", 1))
212
+
213
+ # Annotate + write to file in parallel
214
+ dr = (
215
+ driver.Builder()
216
+ .enable_dynamic_execution(allow_experimental_mode=True)
217
+ .with_local_executor(executors.SynchronousLocalTaskExecutor())
218
+ .with_remote_executor(executors.MultiThreadingExecutor(max_tasks=kwargs.get("global_parameters", {}).get("max_tasks", 1)))
219
+ .with_modules(annotation_driver)
220
+ .build()
221
+ )
222
+
223
+ dr.execute(
224
+ ['close_annotation'],
225
+ inputs={'config': kwargs, 'annotation_tables': annotation_tables, 'lookup_results': filtered_lookup_dict}
226
+ )
227
+
228
+ return lookup_dict
229
+
230
+
231
+ def run_fasta(lookup_dict=None, **kwargs):
232
+ """Generate FASTA output from circRNA sequences.
233
+
234
+ Args:
235
+ lookup_dict: Optional pre-computed lookup results. If None, will be generated from scratch.
236
+ **kwargs: Configuration parameters.
237
+ """
238
+ # Generate lookup tables if not provided (i.e., fasta running standalone)
239
+ if lookup_dict is None:
240
+ lookup_dict = instantiate_lookup_driver.instantiate_driver(kwargs, verbose=kwargs.get("verbose", 1))
241
+
242
+ databases = kwargs.get("fasta_databases")
243
+ if databases:
244
+ filtered_lookup_dict = {}
245
+ for sample, db_dict in lookup_dict.items():
246
+ filtered_lookup_dict[sample] = {k: v for k, v in db_dict.items() if k.lower() in databases}
247
+ else:
248
+ filtered_lookup_dict = lookup_dict
249
+
250
+ tmp_dir = kwargs.get("global_parameters", {}).get("tmp_dir", "tmp")
251
+ sequence_tables = fetch_sequence_tables(filtered_lookup_dict, tmp_dir_path=tmp_dir, verbose=kwargs.get("verbose", 1))
252
+
253
+ dr = (
254
+ driver.Builder()
255
+ .enable_dynamic_execution(allow_experimental_mode=True)
256
+ .with_local_executor(executors.SynchronousLocalTaskExecutor())
257
+ .with_remote_executor(executors.MultiThreadingExecutor(max_tasks=kwargs.get("global_parameters", {}).get("max_tasks", 1)))
258
+ .with_modules(sequence_driver)
259
+ .build()
260
+ )
261
+
262
+ dr.execute(
263
+ ['close_sequence'],
264
+ inputs={'config': kwargs, 'lookup_dict': filtered_lookup_dict, 'sequence_tables': sequence_tables}
265
+ )
266
+
267
+ return lookup_dict
268
+
269
+
270
+ def run_mirna(lookup_dict=None, **kwargs):
271
+ """Output miRNA interactions for identified circRNAs.
272
+
273
+ Args:
274
+ lookup_dict: Optional pre-computed lookup results. If None, will be generated from scratch.
275
+ **kwargs: Configuration parameters.
276
+ """
277
+ if lookup_dict is None:
278
+ lookup_dict = instantiate_lookup_driver.instantiate_driver(kwargs, verbose=kwargs.get("verbose", 1))
279
+
280
+
281
+ tmp_dir = kwargs.get("global_parameters", {}).get("tmp_dir", "tmp")
282
+ mirna_tables = fetch_mirna_tables(lookup_dict, tmp_dir_path=tmp_dir, verbose=kwargs.get("verbose", 1))
283
+
284
+ dr = (
285
+ driver.Builder()
286
+ .enable_dynamic_execution(allow_experimental_mode=True)
287
+ .with_local_executor(executors.SynchronousLocalTaskExecutor())
288
+ .with_remote_executor(executors.MultiThreadingExecutor(max_tasks=kwargs.get("global_parameters", {}).get("max_tasks", 1)))
289
+ .with_modules(mirna_driver)
290
+ .build()
291
+ )
292
+ dr.execute(
293
+ ['close_mirna'],
294
+ inputs={'config': kwargs, 'lookup_dict': lookup_dict, 'mirna_tables': mirna_tables}
295
+ )
296
+
297
+
298
+ def run_rbp(lookup_dict=None, **kwargs):
299
+ """Output RBP interactions for identified circRNAs.
300
+
301
+ Args:
302
+ lookup_dict: Optional pre-computed lookup results. If None, will be generated from scratch.
303
+ **kwargs: Configuration parameters.
304
+ """
305
+ if lookup_dict is None:
306
+ lookup_dict = instantiate_lookup_driver.instantiate_driver(kwargs, verbose=kwargs.get("verbose", 1))
307
+
308
+
309
+ tmp_dir = kwargs.get("global_parameters", {}).get("tmp_dir", "tmp")
310
+ rbp_tables = fetch_rbp_tables(lookup_dict, tmp_dir_path=tmp_dir, verbose=kwargs.get("verbose", 1))
311
+
312
+ dr = (
313
+ driver.Builder()
314
+ .enable_dynamic_execution(allow_experimental_mode=True)
315
+ .with_local_executor(executors.SynchronousLocalTaskExecutor())
316
+ .with_remote_executor(executors.MultiThreadingExecutor(max_tasks=kwargs.get("global_parameters", {}).get("max_tasks", 1)))
317
+ .with_modules(rbp_driver)
318
+ .build()
319
+ )
320
+ dr.execute(
321
+ ['close_rbp'],
322
+ inputs={'config': kwargs, 'lookup_dict': lookup_dict, 'rbp_tables': rbp_tables}
323
+ )
324
+
325
+
326
+ def run_tool(**kwargs):
327
+ """Deprecated: Use run_annotation() instead."""
328
+ run_annotation(**kwargs)
329
+
330
+ if __name__ == "__main__":
331
+ cli()
@@ -0,0 +1,31 @@
1
+ Metadata-Version: 2.4
2
+ Name: pycircdb
3
+ Version: 0.1.0
4
+ Summary: pycircdb: integrated circRNA database annotation for computational workflows.
5
+ Author-email: Barry Digby <b.digby237@gmail.com>
6
+ Requires-Python: >=3.14
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: apache-hamilton[lsp]>=1.90.0
9
+ Requires-Dist: boto3>=1.42.89
10
+ Requires-Dist: click>=8.3.2
11
+ Requires-Dist: polars>=1.39.3
12
+ Requires-Dist: pyyaml>=6.0.3
13
+ Requires-Dist: rich-click>=1.9.7
14
+ Requires-Dist: sf-hamilton[lsp]>=1.89.0
15
+
16
+ # pycircdb
17
+
18
+ A command-line tool for identifying and annotating circRNA interactions.
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ pip install pycircdb
24
+ ```
25
+
26
+ ## Usage
27
+
28
+ ```bash
29
+ pycircdb --help
30
+ ```
31
+
@@ -0,0 +1,29 @@
1
+ README.md
2
+ config.py
3
+ main.py
4
+ pyproject.toml
5
+ pycircdb.egg-info/PKG-INFO
6
+ pycircdb.egg-info/SOURCES.txt
7
+ pycircdb.egg-info/dependency_links.txt
8
+ pycircdb.egg-info/entry_points.txt
9
+ pycircdb.egg-info/requires.txt
10
+ pycircdb.egg-info/top_level.txt
11
+ sequence_tables/extract_sequences.py
12
+ sequence_tables/split_cscd_by_chr.py
13
+ utils/__init__.py
14
+ utils/md5sum_check.py
15
+ utils/output_mapping.py
16
+ utils/annotate/annotate_driver.py
17
+ utils/annotate/annotate_subdag.py
18
+ utils/annotate/annotate_subdriver.py
19
+ utils/connect_s3/download_annotation_tables.py
20
+ utils/connect_s3/download_lookup_tables.py
21
+ utils/connect_s3/download_mirna_tables.py
22
+ utils/connect_s3/download_rbp_tables.py
23
+ utils/connect_s3/download_sequence_tables.py
24
+ utils/detect_inputs/detect_inputs_driver.py
25
+ utils/detect_inputs/detect_inputs_subdag.py
26
+ utils/fasta/sequence_driver.py
27
+ utils/fasta/sequence_subdag.py
28
+ utils/mirna/mirna_driver.py
29
+ utils/rbp/rbp_driver.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ pycircdb = main:cli
@@ -0,0 +1,7 @@
1
+ apache-hamilton[lsp]>=1.90.0
2
+ boto3>=1.42.89
3
+ click>=8.3.2
4
+ polars>=1.39.3
5
+ pyyaml>=6.0.3
6
+ rich-click>=1.9.7
7
+ sf-hamilton[lsp]>=1.89.0
@@ -0,0 +1,12 @@
1
+ assets
2
+ build
3
+ config
4
+ db_tables
5
+ dist
6
+ lookup_tables
7
+ main
8
+ results
9
+ sequence_tables
10
+ test
11
+ tmp
12
+ utils
@@ -0,0 +1,47 @@
1
+ [project]
2
+ name = "pycircdb"
3
+ version = "0.1.0"
4
+ description = "pycircdb: integrated circRNA database annotation for computational workflows."
5
+ readme = "README.md"
6
+ authors = [{ name = "Barry Digby", email = "b.digby237@gmail.com" }]
7
+ requires-python = ">=3.14"
8
+ dependencies = [
9
+ "apache-hamilton[lsp]>=1.90.0",
10
+ "boto3>=1.42.89",
11
+ "click>=8.3.2",
12
+ "polars>=1.39.3",
13
+ "pyyaml>=6.0.3",
14
+ "rich-click>=1.9.7",
15
+ "sf-hamilton[lsp]>=1.89.0",
16
+ ]
17
+
18
+ [build-system]
19
+ requires = ["setuptools>=61.0"]
20
+ build-backend = "setuptools.build_meta"
21
+
22
+ [tool.setuptools.packages.find]
23
+ where = ["."]
24
+ include = ["*", "assets", "utils", "utils.*"]
25
+
26
+ [tool.setuptools]
27
+ py-modules = ["main", "config"]
28
+
29
+ [tool.setuptools.package-data]
30
+ "*" = ["assets/*.csv"]
31
+
32
+ [dependency-groups]
33
+ dev = [
34
+ "pytest>=9.0.3",
35
+ "pytest-cov>=7.1.0",
36
+ ]
37
+
38
+ [tool.pytest.ini_options]
39
+ addopts = "--cov=utils --cov=config --cov-report=term-missing --cov-report=html"
40
+ testpaths = ["tests"]
41
+
42
+ [tool.coverage.run]
43
+ source = ["utils", "config.py", "main.py"]
44
+ omit = ["tests/*", "venv/*"]
45
+ [project.scripts]
46
+ pycircdb = "main:cli"
47
+