alias-mapper 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. alias_mapper-1.0.0/LICENSE +21 -0
  2. alias_mapper-1.0.0/PKG-INFO +217 -0
  3. alias_mapper-1.0.0/README.md +187 -0
  4. alias_mapper-1.0.0/pyproject.toml +68 -0
  5. alias_mapper-1.0.0/setup.cfg +4 -0
  6. alias_mapper-1.0.0/src/alias_mapper/__init__.py +8 -0
  7. alias_mapper-1.0.0/src/alias_mapper/_ssl.py +40 -0
  8. alias_mapper-1.0.0/src/alias_mapper/alias_source.py +358 -0
  9. alias_mapper-1.0.0/src/alias_mapper/bootstrap.py +305 -0
  10. alias_mapper-1.0.0/src/alias_mapper/build_alias_db.py +407 -0
  11. alias_mapper-1.0.0/src/alias_mapper/cli.py +585 -0
  12. alias_mapper-1.0.0/src/alias_mapper/formats/__init__.py +68 -0
  13. alias_mapper-1.0.0/src/alias_mapper/formats/_io.py +73 -0
  14. alias_mapper-1.0.0/src/alias_mapper/formats/_resolve.py +117 -0
  15. alias_mapper-1.0.0/src/alias_mapper/formats/base.py +51 -0
  16. alias_mapper-1.0.0/src/alias_mapper/formats/fasta.py +91 -0
  17. alias_mapper-1.0.0/src/alias_mapper/formats/gff.py +63 -0
  18. alias_mapper-1.0.0/src/alias_mapper.egg-info/PKG-INFO +217 -0
  19. alias_mapper-1.0.0/src/alias_mapper.egg-info/SOURCES.txt +28 -0
  20. alias_mapper-1.0.0/src/alias_mapper.egg-info/dependency_links.txt +1 -0
  21. alias_mapper-1.0.0/src/alias_mapper.egg-info/entry_points.txt +2 -0
  22. alias_mapper-1.0.0/src/alias_mapper.egg-info/requires.txt +8 -0
  23. alias_mapper-1.0.0/src/alias_mapper.egg-info/top_level.txt +1 -0
  24. alias_mapper-1.0.0/tests/test_alias_source.py +115 -0
  25. alias_mapper-1.0.0/tests/test_build_db.py +78 -0
  26. alias_mapper-1.0.0/tests/test_cli_multi.py +213 -0
  27. alias_mapper-1.0.0/tests/test_cli_smoke.py +77 -0
  28. alias_mapper-1.0.0/tests/test_formats.py +115 -0
  29. alias_mapper-1.0.0/tests/test_io.py +75 -0
  30. alias_mapper-1.0.0/tests/test_resolve.py +99 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Max Reese
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,217 @@
1
+ Metadata-Version: 2.4
2
+ Name: alias-mapper
3
+ Version: 1.0.0
4
+ Summary: Translate chromosome/scaffold names in bioinformatics files between naming conventions
5
+ Author: Max Reese
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/guigolab/alias-mapper
8
+ Project-URL: Issues, https://github.com/guigolab/alias-mapper/issues
9
+ Keywords: bioinformatics,genomics,gff,fasta,naming-conventions,ncbi
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Operating System :: OS Independent
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: platformdirs>=4.0
24
+ Requires-Dist: certifi
25
+ Provides-Extra: trusted
26
+ Requires-Dist: truststore; extra == "trusted"
27
+ Provides-Extra: test
28
+ Requires-Dist: pytest>=7; extra == "test"
29
+ Dynamic: license-file
30
+
31
+ # alias-mapper
32
+
33
+ Translate chromosome and scaffold names in bioinformatics files
34
+ between naming conventions (GenBank, RefSeq, UCSC, and others).
35
+
36
+ ## What it does
37
+
38
+ Research files from different sources use different names for the same
39
+ sequences: `chr1`, `NC_000001.11`, `CM000663.2`, and `1` can all refer
40
+ to the same human chromosome. Files using different conventions can't
41
+ be combined without translation.
42
+
43
+ `alias-mapper` rewrites the sequence names in GFF, GTF, and FASTA
44
+ files from one convention to another using a precomputed alias table
45
+ built from NCBI assembly reports. Source convention and genome
46
+ assembly are auto-detected from the input by default.
47
+
48
+ ## Install
49
+
50
+ ```bash
51
+ pip install git+https://github.com/guigolab/alias-mapper.git
52
+ ```
53
+
54
+ On networks that perform TLS inspection (corporate / institutional,
55
+ e.g. CRG), also install the `trusted` extra so the tool uses the
56
+ system keychain for cert verification:
57
+
58
+ ```bash
59
+ pip install "alias-mapper[trusted] @ git+https://github.com/guigolab/alias-mapper.git"
60
+ ```
61
+
62
+ The first time you run `convert`, the tool downloads the latest alias
63
+ data (~100 MB) from GitHub Releases and builds a local SQLite database
64
+ in your platform cache directory:
65
+
66
+ - macOS: `~/Library/Caches/alias-mapper/aliases.db`
67
+ - Linux: `~/.cache/alias-mapper/aliases.db`
68
+ - Windows: `%LOCALAPPDATA%\alias-mapper\Cache\aliases.db`
69
+
70
+ First-run setup takes about a minute. Subsequent runs use the cached
71
+ database directly. If the database schema changes in a newer release,
72
+ the cache is rebuilt automatically.
73
+
74
+ ## Quickstart
75
+
76
+ ```bash
77
+ alias-mapper convert annotations.gff --to ucsc -o annotations.ucsc.gff
78
+ ```
79
+
80
+ A summary on stderr reports how many rows were translated and how many
81
+ had sequence names not in the alias database (those rows are passed
82
+ through unchanged with a warning).
83
+
84
+ ## Usage
85
+
86
+ ```
87
+ # single file
88
+ alias-mapper convert <input> --to <convention> -o <output> [options]
89
+
90
+ # multi-file: conform annotations to a reference FASTA (FASTA untouched)
91
+ alias-mapper convert --fasta <ref> [<ann> ...] --out-dir <dir> [options]
92
+
93
+ # multi-file: force the FASTA and annotations to one convention
94
+ alias-mapper convert --fasta <ref> [<ann> ...] --overwrite-to <convention> --out-dir <dir>
95
+
96
+ alias-mapper update
97
+ ```
98
+
99
+ ### Subcommands
100
+
101
+ - **`convert`** — translate a single file, or a reference FASTA plus
102
+ its annotation files (multi-file mode; see [Multi-file mode](#multi-file-mode)).
103
+ - **`update`** — re-download the latest alias data and rebuild the
104
+ cached database. Run manually when you want newer data.
105
+
106
+ ### Supported file types
107
+
108
+ GFF (`.gff`, `.gff3`), GTF (`.gtf`), and FASTA (`.fa`, `.fasta`,
109
+ `.fna`). The translator is picked by file extension.
110
+
111
+ ### Supported conventions
112
+
113
+ `genbank`, `refseq`, `ucsc`, `sequence-name`, `assigned-molecule`.
114
+
115
+ ### Examples
116
+
117
+ ```bash
118
+ # Translate from RefSeq to UCSC explicitly
119
+ alias-mapper convert annotations.gff \
120
+ --from refseq --to ucsc \
121
+ -o out.gff
122
+
123
+ # Pin the assembly when auto-detection is ambiguous
124
+ alias-mapper convert annotations.gff \
125
+ --to ucsc \
126
+ --assembly GCF_000001405.40 \
127
+ -o out.gff
128
+
129
+ # FASTA — same syntax, different file
130
+ alias-mapper convert reference.fa \
131
+ --from genbank --to sequence-name \
132
+ --assembly GCA_963924405.1 \
133
+ -o reference.renamed.fa
134
+
135
+ # Multi-file conform: rewrite the annotations to match reference.fa's
136
+ # own convention; reference.fa is left untouched
137
+ alias-mapper convert --fasta reference.fa genes.gff peaks.bed.gff \
138
+ --out-dir conformed/
139
+
140
+ # Multi-file overwrite: force reference.fa and its annotations to UCSC
141
+ alias-mapper convert --fasta reference.fa genes.gff \
142
+ --overwrite-to ucsc --out-dir ucsc_out/
143
+
144
+ # Refresh the cached alias data
145
+ alias-mapper update
146
+ ```
147
+
148
+ ### Multi-file mode
149
+
150
+ Pass `--fasta <ref>` to process a reference FASTA together with its
151
+ annotation files in one invocation. The assembly is detected once from
152
+ the FASTA and the alias table is loaded once for the whole batch.
153
+ Outputs go to `--out-dir`, named `<stem>.<convention>.<ext>` (gzip
154
+ preserved).
155
+
156
+ There are two modes:
157
+
158
+ - **Conform** (the default, when `--overwrite-to` is omitted): each
159
+ annotation is rewritten to match the FASTA's *own* convention, and
160
+ the FASTA is left unchanged. Use this to make a set of annotations
161
+ agree with a genome you already have. The FASTA is not copied into
162
+ the output directory, since it is unchanged.
163
+ - **Overwrite** (`--overwrite-to <convention>`): the FASTA and every
164
+ annotation are converted to the named convention.
165
+
166
+ `--to` is single-file only; in `--fasta` mode use `--overwrite-to`
167
+ (or omit it to conform).
168
+
169
+ ### Flags (`convert`)
170
+
171
+ | Flag | Mode | Purpose |
172
+ | ---------------- | ----------- | ------------------------------------------------------------- |
173
+ | `--to` | single-file | Target naming convention (required in single-file mode) |
174
+ | `-o` | single-file | Output path |
175
+ | `--fasta` | multi-file | Reference FASTA; enables multi-file mode |
176
+ | `--overwrite-to` | multi-file | Force the FASTA and all annotations to this convention |
177
+ | `--out-dir` | multi-file | Output directory for the converted files |
178
+ | `--from` | both | Source convention. Auto-detected if absent (not used to conform) |
179
+ | `--assembly` | both | Assembly accession. Auto-detected if absent |
180
+ | `--alias-db` | both | Path to a specific alias SQLite database (overrides cache) |
181
+
182
+ ### Auto-detection
183
+
184
+ When `--from` or `--assembly` is omitted, the tool reads up to 50
185
+ unique sequence names from the input and scores them against the
186
+ database. It commits to a result only when the top candidate has at
187
+ least 5 matches and beats the runner-up by 2× or more. Otherwise it
188
+ errors out and asks for the flag explicitly.
189
+
190
+ ### Unmapped names
191
+
192
+ If a sequence name in the input isn't in the alias database, the line
193
+ is written to the output unchanged and counted in the unmapped total.
194
+ Up to five example names are printed at the end of the run so you can
195
+ see what didn't translate.
196
+
197
+ Before giving up on a name, the tool tries a couple of conservative
198
+ fallbacks: swapping a UCSC-style `vN` version separator for the `.N`
199
+ form (and vice versa), and stripping an `ENA|...|accession` header
200
+ wrapper down to the bare accession. These only run when the exact name
201
+ isn't found, so they never override a direct match.
202
+
203
+ ## Data updates
204
+
205
+ A weekly GitHub Actions workflow rebuilds the alias dataset from
206
+ NCBI's published assembly summaries and publishes it as a
207
+ `data-YYYY-MM-DD` GitHub Release. Each release ships three artifacts:
208
+
209
+ - `aliases.tsv.gz` — the merged-row alias data the CLI consumes.
210
+ - `historical.tsv.gz` — dead-accession lookup with suppression dates
211
+ and best-effort replacements.
212
+ - `failures.tsv` — per-assembly collection failure log.
213
+
214
+ ## More
215
+
216
+ See [`docs/design.md`](docs/design.md) for architecture, design
217
+ decisions, and direction.
@@ -0,0 +1,187 @@
1
+ # alias-mapper
2
+
3
+ Translate chromosome and scaffold names in bioinformatics files
4
+ between naming conventions (GenBank, RefSeq, UCSC, and others).
5
+
6
+ ## What it does
7
+
8
+ Research files from different sources use different names for the same
9
+ sequences: `chr1`, `NC_000001.11`, `CM000663.2`, and `1` can all refer
10
+ to the same human chromosome. Files using different conventions can't
11
+ be combined without translation.
12
+
13
+ `alias-mapper` rewrites the sequence names in GFF, GTF, and FASTA
14
+ files from one convention to another using a precomputed alias table
15
+ built from NCBI assembly reports. Source convention and genome
16
+ assembly are auto-detected from the input by default.
17
+
18
+ ## Install
19
+
20
+ ```bash
21
+ pip install git+https://github.com/guigolab/alias-mapper.git
22
+ ```
23
+
24
+ On networks that perform TLS inspection (corporate / institutional,
25
+ e.g. CRG), also install the `trusted` extra so the tool uses the
26
+ system keychain for cert verification:
27
+
28
+ ```bash
29
+ pip install "alias-mapper[trusted] @ git+https://github.com/guigolab/alias-mapper.git"
30
+ ```
31
+
32
+ The first time you run `convert`, the tool downloads the latest alias
33
+ data (~100 MB) from GitHub Releases and builds a local SQLite database
34
+ in your platform cache directory:
35
+
36
+ - macOS: `~/Library/Caches/alias-mapper/aliases.db`
37
+ - Linux: `~/.cache/alias-mapper/aliases.db`
38
+ - Windows: `%LOCALAPPDATA%\alias-mapper\Cache\aliases.db`
39
+
40
+ First-run setup takes about a minute. Subsequent runs use the cached
41
+ database directly. If the database schema changes in a newer release,
42
+ the cache is rebuilt automatically.
43
+
44
+ ## Quickstart
45
+
46
+ ```bash
47
+ alias-mapper convert annotations.gff --to ucsc -o annotations.ucsc.gff
48
+ ```
49
+
50
+ A summary on stderr reports how many rows were translated and how many
51
+ had sequence names not in the alias database (those rows are passed
52
+ through unchanged with a warning).
53
+
54
+ ## Usage
55
+
56
+ ```
57
+ # single file
58
+ alias-mapper convert <input> --to <convention> -o <output> [options]
59
+
60
+ # multi-file: conform annotations to a reference FASTA (FASTA untouched)
61
+ alias-mapper convert --fasta <ref> [<ann> ...] --out-dir <dir> [options]
62
+
63
+ # multi-file: force the FASTA and annotations to one convention
64
+ alias-mapper convert --fasta <ref> [<ann> ...] --overwrite-to <convention> --out-dir <dir>
65
+
66
+ alias-mapper update
67
+ ```
68
+
69
+ ### Subcommands
70
+
71
+ - **`convert`** — translate a single file, or a reference FASTA plus
72
+ its annotation files (multi-file mode; see [Multi-file mode](#multi-file-mode)).
73
+ - **`update`** — re-download the latest alias data and rebuild the
74
+ cached database. Run manually when you want newer data.
75
+
76
+ ### Supported file types
77
+
78
+ GFF (`.gff`, `.gff3`), GTF (`.gtf`), and FASTA (`.fa`, `.fasta`,
79
+ `.fna`). The translator is picked by file extension.
80
+
81
+ ### Supported conventions
82
+
83
+ `genbank`, `refseq`, `ucsc`, `sequence-name`, `assigned-molecule`.
84
+
85
+ ### Examples
86
+
87
+ ```bash
88
+ # Translate from RefSeq to UCSC explicitly
89
+ alias-mapper convert annotations.gff \
90
+ --from refseq --to ucsc \
91
+ -o out.gff
92
+
93
+ # Pin the assembly when auto-detection is ambiguous
94
+ alias-mapper convert annotations.gff \
95
+ --to ucsc \
96
+ --assembly GCF_000001405.40 \
97
+ -o out.gff
98
+
99
+ # FASTA — same syntax, different file
100
+ alias-mapper convert reference.fa \
101
+ --from genbank --to sequence-name \
102
+ --assembly GCA_963924405.1 \
103
+ -o reference.renamed.fa
104
+
105
+ # Multi-file conform: rewrite the annotations to match reference.fa's
106
+ # own convention; reference.fa is left untouched
107
+ alias-mapper convert --fasta reference.fa genes.gff peaks.bed.gff \
108
+ --out-dir conformed/
109
+
110
+ # Multi-file overwrite: force reference.fa and its annotations to UCSC
111
+ alias-mapper convert --fasta reference.fa genes.gff \
112
+ --overwrite-to ucsc --out-dir ucsc_out/
113
+
114
+ # Refresh the cached alias data
115
+ alias-mapper update
116
+ ```
117
+
118
+ ### Multi-file mode
119
+
120
+ Pass `--fasta <ref>` to process a reference FASTA together with its
121
+ annotation files in one invocation. The assembly is detected once from
122
+ the FASTA and the alias table is loaded once for the whole batch.
123
+ Outputs go to `--out-dir`, named `<stem>.<convention>.<ext>` (gzip
124
+ preserved).
125
+
126
+ There are two modes:
127
+
128
+ - **Conform** (the default, when `--overwrite-to` is omitted): each
129
+ annotation is rewritten to match the FASTA's *own* convention, and
130
+ the FASTA is left unchanged. Use this to make a set of annotations
131
+ agree with a genome you already have. The FASTA is not copied into
132
+ the output directory, since it is unchanged.
133
+ - **Overwrite** (`--overwrite-to <convention>`): the FASTA and every
134
+ annotation are converted to the named convention.
135
+
136
+ `--to` is single-file only; in `--fasta` mode use `--overwrite-to`
137
+ (or omit it to conform).
138
+
139
+ ### Flags (`convert`)
140
+
141
+ | Flag | Mode | Purpose |
142
+ | ---------------- | ----------- | ------------------------------------------------------------- |
143
+ | `--to` | single-file | Target naming convention (required in single-file mode) |
144
+ | `-o` | single-file | Output path |
145
+ | `--fasta` | multi-file | Reference FASTA; enables multi-file mode |
146
+ | `--overwrite-to` | multi-file | Force the FASTA and all annotations to this convention |
147
+ | `--out-dir` | multi-file | Output directory for the converted files |
148
+ | `--from` | both | Source convention. Auto-detected if absent (not used to conform) |
149
+ | `--assembly` | both | Assembly accession. Auto-detected if absent |
150
+ | `--alias-db` | both | Path to a specific alias SQLite database (overrides cache) |
151
+
152
+ ### Auto-detection
153
+
154
+ When `--from` or `--assembly` is omitted, the tool reads up to 50
155
+ unique sequence names from the input and scores them against the
156
+ database. It commits to a result only when the top candidate has at
157
+ least 5 matches and beats the runner-up by 2× or more. Otherwise it
158
+ errors out and asks for the flag explicitly.
159
+
160
+ ### Unmapped names
161
+
162
+ If a sequence name in the input isn't in the alias database, the line
163
+ is written to the output unchanged and counted in the unmapped total.
164
+ Up to five example names are printed at the end of the run so you can
165
+ see what didn't translate.
166
+
167
+ Before giving up on a name, the tool tries a couple of conservative
168
+ fallbacks: swapping a UCSC-style `vN` version separator for the `.N`
169
+ form (and vice versa), and stripping an `ENA|...|accession` header
170
+ wrapper down to the bare accession. These only run when the exact name
171
+ isn't found, so they never override a direct match.
172
+
173
+ ## Data updates
174
+
175
+ A weekly GitHub Actions workflow rebuilds the alias dataset from
176
+ NCBI's published assembly summaries and publishes it as a
177
+ `data-YYYY-MM-DD` GitHub Release. Each release ships three artifacts:
178
+
179
+ - `aliases.tsv.gz` — the merged-row alias data the CLI consumes.
180
+ - `historical.tsv.gz` — dead-accession lookup with suppression dates
181
+ and best-effort replacements.
182
+ - `failures.tsv` — per-assembly collection failure log.
183
+
184
+ ## More
185
+
186
+ See [`docs/design.md`](docs/design.md) for architecture, design
187
+ decisions, and direction.
@@ -0,0 +1,68 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "alias-mapper"
7
+ dynamic = ["version"]
8
+ description = "Translate chromosome/scaffold names in bioinformatics files between naming conventions"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ authors = [
13
+ { name = "Max Reese" },
14
+ ]
15
+ keywords = ["bioinformatics", "genomics", "gff", "fasta", "naming-conventions", "ncbi"]
16
+ classifiers = [
17
+ "Development Status :: 5 - Production/Stable",
18
+ "Intended Audience :: Science/Research",
19
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
+ "Operating System :: OS Independent",
27
+ ]
28
+ dependencies = [
29
+ "platformdirs>=4.0",
30
+ "certifi",
31
+ ]
32
+
33
+ [project.optional-dependencies]
34
+ # truststore makes the package use the system keychain for TLS
35
+ # verification, which is necessary on networks that do TLS inspection
36
+ # (e.g. corporate / institutional networks like CRG's). Harmless
37
+ # elsewhere. Install with: pip install alias-mapper[trusted]
38
+ trusted = [
39
+ "truststore",
40
+ ]
41
+ # Test dependencies. Install with: pip install -e .[test]
42
+ test = [
43
+ "pytest>=7",
44
+ ]
45
+
46
+ [project.urls]
47
+ Homepage = "https://github.com/guigolab/alias-mapper"
48
+ Issues = "https://github.com/guigolab/alias-mapper/issues"
49
+
50
+ [project.scripts]
51
+ alias-mapper = "alias_mapper.cli:main"
52
+
53
+ # Single-source the version from the package so a release is one number to
54
+ # bump (src/alias_mapper/__init__.py). setuptools reads __version__ without
55
+ # importing the package, so this stays cheap and import-safe.
56
+ [tool.setuptools.dynamic]
57
+ version = { attr = "alias_mapper.__version__" }
58
+
59
+ [tool.setuptools.packages.find]
60
+ where = ["src"]
61
+ include = ["alias_mapper*"]
62
+
63
+ [tool.pytest.ini_options]
64
+ testpaths = ["tests"]
65
+ # Put src/ on sys.path so `pytest` finds the package without an install
66
+ # (CI does `pip install -e .[test]`, but this keeps a bare `pytest` working
67
+ # locally too). pythonpath requires pytest >= 7.
68
+ pythonpath = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,8 @@
1
+ """
2
+ alias-mapper: translate chromosome / scaffold names in bioinformatics
3
+ files between naming conventions.
4
+
5
+ See README.md and docs/design.md for usage and architecture.
6
+ """
7
+
8
+ __version__ = "1.0.0"
@@ -0,0 +1,40 @@
1
+ """
2
+ _ssl.py
3
+ -------
4
+ Shared SSL context setup for the installed alias-mapper package.
5
+
6
+ Mirrors scripts/_http.py's setup, but lives inside the package so
7
+ bootstrap.py and any future HTTP-using module (e.g. HttpAliasSource)
8
+ can import it without depending on scripts/.
9
+
10
+ Order of preference: truststore > certifi > stdlib defaults.
11
+
12
+ - truststore: uses the system keychain (necessary on networks with
13
+ TLS inspection like CRG's, which inject a non-Mozilla root cert)
14
+ - certifi: Mozilla's CA bundle, covers most environments including
15
+ GitHub Actions runners
16
+ - stdlib: last fallback, used if neither extra is installed
17
+
18
+ Both truststore and certifi are optional installs. The package will
19
+ work without them on any network where the system already trusts the
20
+ NCBI/GitHub cert chains.
21
+ """
22
+
23
+ import ssl
24
+
25
+ try:
26
+ import truststore
27
+ truststore.inject_into_ssl()
28
+ SSL_BACKEND = "truststore"
29
+ except ImportError:
30
+ SSL_BACKEND = None
31
+
32
+ try:
33
+ import certifi
34
+ SSL_CONTEXT = ssl.create_default_context(cafile=certifi.where())
35
+ if SSL_BACKEND is None:
36
+ SSL_BACKEND = "certifi"
37
+ except ImportError:
38
+ SSL_CONTEXT = ssl.create_default_context()
39
+ if SSL_BACKEND is None:
40
+ SSL_BACKEND = "stdlib"