biotailor 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,93 @@
1
+ # Copilot Instructions — biotailor (Python SDK)
2
+
3
+ ## Project Overview
4
+
5
+ Python SDK for the [Biotailor](https://biotailor.com) bioinformatics pipeline platform. Provides programmatic access to create, run, and monitor bioinformatics pipelines via the Biotailor REST API.
6
+
7
+ ## Architecture
8
+
9
+ ```
10
+ src/biotailor/
11
+ ├── __init__.py # Public API exports (__all__) + __version__
12
+ ├── client.py # BiotailorClient — main entry point, all HTTP methods
13
+ ├── models.py # Dataclasses: ToolConfig, Job, Dataset, JobStatus, etc.
14
+ ├── pipeline.py # Pipeline builder (fluent/chainable API)
15
+ ├── uploader.py # S3 presigned URL uploads (single + multipart, concurrent)
16
+ └── exceptions.py # Exception hierarchy: BiotailorError → API/Validation/Upload
17
+ ```
18
+
19
+ - **`client.py`** — `BiotailorClient` wraps all API calls (tools, jobs, datasets, uploads, downloads, polling). Uses `requests.Session` with Bearer auth.
20
+ - **`models.py`** — Pure dataclasses mirroring backend JSON. Parsing helpers (`parse_tool_config`, `parse_job`, `parse_dataset`) convert camelCase API responses to snake_case fields.
21
+ - **`pipeline.py`** — `Pipeline` is a chainable builder: `set_param()`, `set_input()`, `set_dataset()`, `set_hardware()`. Validates against `ToolConfig` and builds workflow JSON for the run endpoint.
22
+ - **`uploader.py`** — Handles S3 uploads via presigned URLs. Supports single PUT and multipart uploads with retries, concurrent threads, and tqdm progress bars.
23
+ - **`exceptions.py`** — Four exception classes: `BiotailorError` (base), `BiotailorAPIError` (HTTP errors with status_code), `BiotailorValidationError` (client-side), `BiotailorUploadError` (S3 failures).
24
+
25
+ ### Job Execution Flow
26
+
27
+ ```
28
+ client.run(pipeline)
29
+ → POST /jobs (create job)
30
+ → POST /jobs/{id}/run (submit workflow, get upload URLs)
31
+ → PUT presigned URLs (upload files via uploader.py)
32
+ → POST /jobs/{id}/confirm-uploaded
33
+ ```
34
+
35
+ `client.run_and_wait()` adds polling via `wait_for_completion()` + optional `download_outputs()`.
36
+
37
+ ## Dev Workflow
38
+
39
+ ```bash
40
+ pip install -e ".[dev]" # Install with dev deps (pytest, responses, ruff)
41
+ pytest tests/ -v # Run unit tests
42
+ ruff check src/ tests/ # Lint
43
+ ```
44
+
45
+ ### Integration Tests
46
+
47
+ Integration tests live in `tests/integration/` and hit a real API. They require `BIOTAILOR_API_KEY` env var and are not run in CI.
48
+
49
+ ## Conventions
50
+
51
+ ### Python Version & Style
52
+
53
+ - **Target**: Python 3.9+ (`from __future__ import annotations` used throughout)
54
+ - **Build system**: Hatchling
55
+ - **Linter**: Ruff — line length 100, rules: E, F, I, N, W
56
+ - **No type checker** configured (no mypy/pyright in dev deps)
57
+
58
+ ### Code Patterns
59
+
60
+ - **Dataclasses** for all models — no Pydantic, no attrs
61
+ - **`from __future__ import annotations`** at top of every module for PEP 604 style hints
62
+ - **snake_case** for Python fields, **camelCase** for JSON keys sent to/from the API
63
+ - **Parsing helpers** in `models.py` handle the camelCase→snake_case conversion (e.g. `displayName` → `display_name`)
64
+ - **Fluent builder** pattern on `Pipeline` — all setter methods return `self`
65
+ - **`requests`** library for HTTP — no `httpx`, no `aiohttp`
66
+ - **`tqdm`** for progress bars (uploads + downloads)
67
+ - **Logging** via `logging.getLogger("biotailor")` — debug mode controlled by `BiotailorClient(debug=True)`
68
+
69
+ ### Testing Patterns
70
+
71
+ - **`responses`** library to mock HTTP calls (not `requests_mock`)
72
+ - **`pytest-mock`** for general mocking
73
+ - Shared fixtures and sample JSON payloads in `tests/conftest.py`
74
+ - Test classes grouped by feature: `TestClientInit`, `TestListTools`, `TestRunPipeline`, etc.
75
+ - API key for tests: `btk_abcdef123456.sk_0123456789abcdef0123456789abcdef` (from conftest)
76
+ - Use `@responses.activate` decorator on test methods that mock HTTP
77
+
78
+ ### API Key Format
79
+
80
+ Pattern: `btk_<hex>.sk_<hex>` — validated by regex in `BiotailorClient.__init__`.
81
+
82
+ ### Public API
83
+
84
+ All public exports go in `__init__.py`'s `__all__` list. Keep this in sync when adding new models or classes.
85
+
86
+ ## Dependencies
87
+
88
+ - **Runtime**: `requests>=2.28.0`, `tqdm>=4.64.0`
89
+ - **Dev**: `pytest>=7.0`, `pytest-mock>=3.10`, `responses>=0.23`, `ruff>=0.1.0`
90
+
91
+ ## Relationship to Biotailor Backend
92
+
93
+ This SDK talks to the same REST API served by the SST v2 backend in the `biotailor` monorepo. The workflow JSON schema, job statuses (`BUILDING`, `UPLOADING`, `STARTING`, `RUNNING`, `SUCCEEDED`, `FAILED`, `CANCELLED`), and tool config shapes must stay in sync with `packages/core/` types in the backend.
@@ -0,0 +1,40 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+
7
+ # Distribution / packaging
8
+ dist/
9
+ build/
10
+ *.egg-info/
11
+ *.egg
12
+
13
+ # Virtual environments
14
+ .venv/
15
+ venv/
16
+ ENV/
17
+
18
+ # Testing
19
+ .pytest_cache/
20
+ .coverage
21
+ htmlcov/
22
+ .tox/
23
+
24
+ # IDE
25
+ .vscode/
26
+ .idea/
27
+ *.swp
28
+ *.swo
29
+
30
+ # OS
31
+ .DS_Store
32
+ Thumbs.db
33
+
34
+ # Ruff
35
+ .ruff_cache/
36
+
37
+ .pypirc
38
+
39
+ # Project-specific
40
+ tests/integration/testdata/reads-large.fq
@@ -0,0 +1,75 @@
1
+ Metadata-Version: 2.4
2
+ Name: biotailor
3
+ Version: 0.1.0
4
+ Summary: Python SDK for the Biotailor bioinformatics pipeline platform
5
+ Author: Biotailor Team
6
+ License-Expression: MIT
7
+ Keywords: bioinformatics,biotailor,nextflow,pipeline
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Science/Research
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
17
+ Requires-Python: >=3.9
18
+ Requires-Dist: requests>=2.28.0
19
+ Requires-Dist: tqdm>=4.64.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest-mock>=3.10; extra == 'dev'
22
+ Requires-Dist: pytest>=7.0; extra == 'dev'
23
+ Requires-Dist: responses>=0.23; extra == 'dev'
24
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
25
+ Description-Content-Type: text/markdown
26
+
27
+ # biotailor
28
+
29
+ Python SDK for the [Biotailor](https://biotailor.com) bioinformatics pipeline platform.
30
+
31
+ ## Installation
32
+
33
+ ```bash
34
+ pip install biotailor
35
+ ```
36
+
37
+ ## Quickstart
38
+
39
+ ```python
40
+ from biotailor import BiotailorClient, Pipeline
41
+
42
+ # 1. Create a client (get your API key from the Biotailor website)
43
+ client = BiotailorClient(api_key="btk_xxx.sk_yyy")
44
+
45
+ # 2. Discover available tools
46
+ tools = client.list_tools()
47
+ for t in tools:
48
+ print(f"{t.toolid}: {t.display_name}")
49
+
50
+ # 3. Build a pipeline
51
+ tool = client.get_tool("fastp-single")
52
+
53
+ pipeline = (
54
+ Pipeline(name="QC my reads", tool=tool)
55
+ .set_input("input", "reads.fastq")
56
+ .set_param("qualified_quality_phred", 20)
57
+ )
58
+
59
+ # 4. Run and wait
60
+ job = client.run_and_wait(pipeline, output_dir="./results")
61
+ print(f"Job {job.jobid} finished with status: {job.job_status.value}")
62
+ ```
63
+
64
+ ## Development
65
+
66
+ ```bash
67
+ # Install in dev mode
68
+ pip install -e ".[dev]"
69
+
70
+ # Run tests
71
+ pytest tests/ -v
72
+
73
+ # Lint
74
+ ruff check src/ tests/
75
+ ```
@@ -0,0 +1,49 @@
1
+ # biotailor
2
+
3
+ Python SDK for the [Biotailor](https://biotailor.com) bioinformatics pipeline platform.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install biotailor
9
+ ```
10
+
11
+ ## Quickstart
12
+
13
+ ```python
14
+ from biotailor import BiotailorClient, Pipeline
15
+
16
+ # 1. Create a client (get your API key from the Biotailor website)
17
+ client = BiotailorClient(api_key="btk_xxx.sk_yyy")
18
+
19
+ # 2. Discover available tools
20
+ tools = client.list_tools()
21
+ for t in tools:
22
+ print(f"{t.toolid}: {t.display_name}")
23
+
24
+ # 3. Build a pipeline
25
+ tool = client.get_tool("fastp-single")
26
+
27
+ pipeline = (
28
+ Pipeline(name="QC my reads", tool=tool)
29
+ .set_input("input", "reads.fastq")
30
+ .set_param("qualified_quality_phred", 20)
31
+ )
32
+
33
+ # 4. Run and wait
34
+ job = client.run_and_wait(pipeline, output_dir="./results")
35
+ print(f"Job {job.jobid} finished with status: {job.job_status.value}")
36
+ ```
37
+
38
+ ## Development
39
+
40
+ ```bash
41
+ # Install in dev mode
42
+ pip install -e ".[dev]"
43
+
44
+ # Run tests
45
+ pytest tests/ -v
46
+
47
+ # Lint
48
+ ruff check src/ tests/
49
+ ```
@@ -0,0 +1,36 @@
1
+ @AS500713:64:HFKJJBGXY:1:11101:1675:1101 1:A:0:TATAGCCT+GACCCCCA
2
+
3
+ +
4
+
5
+ @AS500713:64:HFKJJBGXY:1:11101:17113:1101 1:A:0:TATAGCCT+GTTTCTTA
6
+ TACAAAATGCACATCGCTGAAAGGGGTAAAGGAGAGAAATCGCTTTATAAAACCTTGAAAAGGAATATTCAAATATAAGCTGGGAAGGTATAAAAAACTCTGTACATCACAAGTAAACAAATGGAACCTGCAAAATATTAAACAAAGGATT
7
+ +
8
+ AAAAAEEEEE6EEAAAEEEEE6EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEEEEEEEEEECFE####EEEE6EE<AAEEEAEEEEEEEEEEEEAEEEEEEEA<E/AAEEEAEEEEE/EEEEAAEEE
9
+ @AS500713:64:HFKJJBGXY:1:11101:1675:1101 1:A:0:TATAGCCT+GACCCCCA
10
+ TAGGAGGCTTGGAGTACCAATAATAAAGTGAGCCCACCTTCCTGGTACCCAGACATTTCAGGAGGTCGGGAAATTTTTAAACCCAGGCAGCTTCCTGGCAGTGACATTTGGAGCATCAAAGTGGTAAATAAAATTTCATTTACATTAATAT
11
+ +
12
+ 6AAAAAEEEEE/E/EA/E/AEA6EE//AEE66/AAE//EEE/E//E/AA/EEE/A/AEE/EEA//EEEEEEEE6EEAAA/E###6E/6//6<EAAEEE/EEEA/EA/EEEEEE/<<EEEE//A/EE<AEEEEE/</AA</E<AAAE/E<E/
13
+ @AS500713:64:HFKJJBGXY:1:11101:17113:1101 1:A:0:TATAGCCT+GTTTCTTA
14
+ TACAAAATGCACATCGCTGAAAGGGGTAAAGGAGAGAAATCGCTTTATAAAACCTTGAAAAGGAATATTCAAATATAAGCTGGGAAGGTATAAAAAACTCTGTACATCACAAGTAAACAAATGGAACCTGCAAAATATTAAACAAAGGATT
15
+ +
16
+ AAAAAEEEEE6EEAAAEEEEE6EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEEEEEEEEEEEEE/EEEEEEE6EE<AAEEEAEEEEEEEEEEEEAEEEEEEEA<E/AAEEEAEEEEE/EEEEAAEEE
17
+ @AS500713:64:HFKJJBGXY:1:11101:1675:1101 1:A:0:TATAGCCT+GACCCCCA
18
+ TAGGAGGCTTGGAGTACCAATAATAAAGTGAGCCCACCTTCCTGGTACCCAGACATTTCAGGAGGTCGGGAAATTTTTAAACCCAGGCAGCTTCCTGGCAGTGACATTTGGAGCATCAAAGTGGTAAATAAAATTTCATTTACATTAATAT
19
+ +
20
+ 6AAAAAEEEEE/E/EA/E/AEA6EE//AEE66/AAE//EEE/E//E/AA/EEE/A/AEE/EEA//EEEEEEEE6EEAAA/E/A/6E/6//6<EAAEEE/EEEA/EA/EEEEEE/<<EEEE//A/EE<AEEEEE/</AA</E<AAAE/E<E/
21
+ @AS500713:64:HFKJJBGXY:1:11101:17113:1101 1:A:0:TATAGCCT+GTTTCTTA
22
+ TACAAAATGCACATCGCTGAAAGGGGTAAAGGAGAGAAATCGCTTTATAAAACCTTGAAAAGGAATATTCAAATATAAGCTGGGAAGGTATAAAAAACTCTGTACATCACAAGTAAACAAATGGAACCTGCAAAATATTAAACAAAGGATT
23
+ +
24
+ AAAAAEEEEE6EEAAAEEEEE6EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEEEEEEEEEEEEE/EEEEEEE6EE<AAEEEAEEEEEEEEEEEEAEEEEEEEA<E/AAEEEAEEEEE/EEEEAAEEE
25
+ @AS500713:64:HFKJJBGXY:1:11101:1675:1101 1:A:0:TATAGCCT+GACCCCCA
26
+ TAGGAGGCTTGGAGTACCAATAATAAAGTGAGCCCACCTTCCTGGTACCCAGACATTTCAGGAGGTCGGGAAATTTTTAAACCCAGGCAGCTTCCTGGCAGTGACATTTGGAGCATCAAAGTGGTAAATAAAATTTCATTTACATTAATAT
27
+ +
28
+ 6AAAAAEEEEE/E/EA/E/AEA6EE//AEE66/AAE//EEE/E//E/AA/EEE/A/AEE/EEA//EEEEEEEE6EEAAA/E/A/6E/6//6<EAAEEE/EEEA/EA/EEEEEE/<<EEEE//A/EE<AEEEEE/</AA</E<AAAE/E<E/
29
+ @AS500713:64:HFKJJBGXY:1:11101:17113:1101 1:A:0:TATAGCCT+GTTTCTTA
30
+ TACAAAATGCACATCGCTGAAAGGGGTAAAGGAGAGAAATCGCTTTATAAAACCTTGAAAAGGAATATTCAAATATAAGCTGGGAAGGTATAAAAAACTCTGTACATCACAAGTAAACAAATGGAACCTGCAAAATATTAAACAAAGGATT
31
+ +
32
+ AAAAAEEEEE6EEAAAEEEEE6EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEEEEEEEEEEEEE/EEEEEEE6EE<AAEEEAEEEEEEEEEEEEAEEEEEEEA<E/AAEEEAEEEEE/EEEEAAEEE
33
+ @AS500713:64:HFKJJBGXY:1:11101:17113:1101 1:A:0:TATAGCCT+GTTTCTTA
34
+ CCCCCCCCCCACATCGCTGAAAGGGGTAAAGGAGAGAAATCGCTTTATAAAACCTTGAAAAGGAATATTCAAATATAAGCTGGGAAGGTATAAAAAACTCTGTACATCACAAGTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
35
+ +
36
+ AAAAAEEEEE6EEAAAEEEEE6EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEEEEEEEEEEEEE/EEEEEEE6EE<AAEEEAEEEEEEEEEEEEAEEEEEEEA<E/AAEEEAEEEEE/EEEEAAEEE
@@ -0,0 +1,26 @@
1
+ """Run a fastp quality-control pipeline in ~10 lines of code."""
2
+
3
+ import os
4
+ from biotailor import BiotailorClient, Pipeline
5
+
6
+ # 1. Connect
7
+ client = BiotailorClient(
8
+ api_key=os.environ["BIOTAILOR_API_KEY"],
9
+ base_url="https://api.dev-isaac.biotailor.org",
10
+ )
11
+
12
+ # 2. Pick a tool
13
+ fastp = client.get_tool("fastp")
14
+
15
+ # 3. Build the pipeline
16
+ pipeline = (
17
+ Pipeline(name="demo-fastp", tool=fastp)
18
+ .set_pair_end(False)
19
+ .set_input("--in1", "examples/reads.fastq")
20
+ .set_param("--qualified_quality_phred", 20)
21
+ .set_param("--length_required", 30)
22
+ )
23
+
24
+ # 4. Run and wait
25
+ job = client.run_and_wait(pipeline, output_dir="outputs/")
26
+ print(f"Done! Status: {job.job_status.value}")
@@ -0,0 +1,49 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "biotailor"
7
+ version = "0.1.0"
8
+ description = "Python SDK for the Biotailor bioinformatics pipeline platform"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.9"
12
+ authors = [{ name = "Biotailor Team" }]
13
+ keywords = ["bioinformatics", "pipeline", "biotailor", "nextflow"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Science/Research",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.9",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
24
+ ]
25
+ dependencies = [
26
+ "requests>=2.28.0",
27
+ "tqdm>=4.64.0",
28
+ ]
29
+
30
+ [project.optional-dependencies]
31
+ dev = [
32
+ "pytest>=7.0",
33
+ "pytest-mock>=3.10",
34
+ "responses>=0.23",
35
+ "ruff>=0.1.0",
36
+ ]
37
+
38
+ [tool.hatch.build.targets.wheel]
39
+ packages = ["src/biotailor"]
40
+
41
+ [tool.ruff]
42
+ target-version = "py39"
43
+ line-length = 100
44
+
45
+ [tool.ruff.lint]
46
+ select = ["E", "F", "I", "N", "W"]
47
+
48
+ [tool.pytest.ini_options]
49
+ testpaths = ["tests"]
@@ -0,0 +1,41 @@
1
+ """Biotailor Python SDK — programmatic access to the Biotailor bioinformatics platform."""
2
+
3
+ from biotailor.client import BiotailorClient
4
+ from biotailor.exceptions import (
5
+ BiotailorAPIError,
6
+ BiotailorError,
7
+ BiotailorUploadError,
8
+ BiotailorValidationError,
9
+ )
10
+ from biotailor.models import (
11
+ Dataset,
12
+ DefaultHardware,
13
+ Job,
14
+ JobStatus,
15
+ Option,
16
+ Output,
17
+ ToolConfig,
18
+ ToolParameter,
19
+ Validation,
20
+ )
21
+ from biotailor.pipeline import Pipeline
22
+
23
+ __all__ = [
24
+ "BiotailorClient",
25
+ "BiotailorAPIError",
26
+ "BiotailorError",
27
+ "BiotailorUploadError",
28
+ "BiotailorValidationError",
29
+ "Dataset",
30
+ "DefaultHardware",
31
+ "Job",
32
+ "JobStatus",
33
+ "Option",
34
+ "Output",
35
+ "Pipeline",
36
+ "ToolConfig",
37
+ "ToolParameter",
38
+ "Validation",
39
+ ]
40
+
41
+ __version__ = "0.1.0"