cool-seq-tool 0.10.0__tar.gz → 0.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/PKG-INFO +2 -2
  2. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/mappers/exon_genomic_coords.py +14 -20
  3. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/schemas.py +10 -2
  4. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/sources/mane_transcript_mappings.py +46 -14
  5. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool.egg-info/PKG-INFO +2 -2
  6. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/tests/conftest.py +12 -3
  7. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/tests/mappers/test_exon_genomic_coords.py +1 -1
  8. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/tests/mappers/test_mane_transcript.py +2 -2
  9. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/tests/sources/test_mane_transcript_mappings.py +72 -10
  10. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/.coveragerc +0 -0
  11. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/.github/ISSUE_TEMPLATE/bug-report.yaml +0 -0
  12. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/.github/ISSUE_TEMPLATE/feature-request.yaml +0 -0
  13. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/.github/workflows/checks.yaml +0 -0
  14. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/.github/workflows/pr-priority-label.yaml +0 -0
  15. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/.github/workflows/release.yml +0 -0
  16. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/.github/workflows/stale.yaml +0 -0
  17. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/.gitignore +0 -0
  18. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/.pre-commit-config.yaml +0 -0
  19. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/.readthedocs.yaml +0 -0
  20. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/CITATION.cff +0 -0
  21. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/LICENSE +0 -0
  22. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/README.md +0 -0
  23. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/docs/Makefile +0 -0
  24. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/docs/make.bat +0 -0
  25. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/docs/source/_static/img/biomart.png +0 -0
  26. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/docs/source/_templates/module_summary.rst +0 -0
  27. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/docs/source/changelog.rst +0 -0
  28. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/docs/source/conf.py +0 -0
  29. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/docs/source/contributing.rst +0 -0
  30. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/docs/source/index.rst +0 -0
  31. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/docs/source/install.rst +0 -0
  32. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/docs/source/license.rst +0 -0
  33. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/docs/source/reference/index.rst +0 -0
  34. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/docs/source/transcript_selection.rst +0 -0
  35. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/docs/source/usage.rst +0 -0
  36. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/pyproject.toml +0 -0
  37. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/setup.cfg +0 -0
  38. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/__init__.py +0 -0
  39. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/app.py +0 -0
  40. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/handlers/__init__.py +0 -0
  41. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/handlers/seqrepo_access.py +0 -0
  42. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/mappers/__init__.py +0 -0
  43. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/mappers/alignment.py +0 -0
  44. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/mappers/liftover.py +0 -0
  45. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/mappers/mane_transcript.py +0 -0
  46. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/resources/__init__.py +0 -0
  47. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/resources/data_files.py +0 -0
  48. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/resources/status.py +0 -0
  49. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/resources/transcript_mapping.tsv +0 -0
  50. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/sources/__init__.py +0 -0
  51. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/sources/transcript_mappings.py +0 -0
  52. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/sources/uta_database.py +0 -0
  53. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool/utils.py +0 -0
  54. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool.egg-info/SOURCES.txt +0 -0
  55. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool.egg-info/dependency_links.txt +0 -0
  56. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool.egg-info/requires.txt +0 -0
  57. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/src/cool_seq_tool.egg-info/top_level.txt +0 -0
  58. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/tests/handlers/test_seqrepo_access.py +0 -0
  59. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/tests/mappers/test_alignment.py +0 -0
  60. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/tests/mappers/test_liftover.py +0 -0
  61. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/tests/sources/test_uta_database.py +0 -0
  62. {cool_seq_tool-0.10.0 → cool_seq_tool-0.12.0}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: cool_seq_tool
3
- Version: 0.10.0
3
+ Version: 0.12.0
4
4
  Summary: Common Operation on Lots of Sequences Tool
5
5
  Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
6
  License: MIT License
@@ -865,14 +865,14 @@ class ExonGenomicCoordsMapper:
865
865
  if use_alt_start_i and coordinate_type == CoordinateType.RESIDUE:
866
866
  genomic_pos = genomic_pos - 1 # Convert residue coordinate to inter-residue
867
867
 
868
- # Validate that the breakpoint occurs on a transcript given a gene
869
- coordinate_check = await self._validate_gene_coordinates(
870
- pos=genomic_pos, genomic_ac=genomic_ac, gene=gene
868
+ # Validate that the breakpoint between the first and last exon for the selected transcript
869
+ coordinate_check = await self._validate_genomic_breakpoint(
870
+ pos=genomic_pos, genomic_ac=genomic_ac, tx_ac=transcript
871
871
  )
872
872
  if not coordinate_check:
873
873
  return GenomicTxSeg(
874
874
  errors=[
875
- f"{genomic_pos} on {genomic_ac} does not occur within the exons for {gene}"
875
+ f"{genomic_pos} on {genomic_ac} does not occur within the exons for {transcript}"
876
876
  ]
877
877
  )
878
878
 
@@ -943,38 +943,32 @@ class ExonGenomicCoordsMapper:
943
943
  )
944
944
  return liftover_data[1] if liftover_data else None
945
945
 
946
- async def _validate_gene_coordinates(
946
+ async def _validate_genomic_breakpoint(
947
947
  self,
948
948
  pos: int,
949
949
  genomic_ac: str,
950
- gene: str,
950
+ tx_ac: str,
951
951
  ) -> bool:
952
952
  """Validate that a genomic coordinate falls within the first and last exon
953
- given a gene and accession
953
+ for a transcript on a given accession
954
954
 
955
955
  :param pos: Genomic position on ``genomic_ac``
956
956
  :param genomic_ac: RefSeq genomic accession, e.g. ``"NC_000007.14"``
957
- :param gene: A valid, case-sensitive HGNC gene symbol
957
+ :param transcript: A transcript accession
958
958
  :return: ``True`` if the coordinate falls within the first and last exon
959
- for the gene, ``False`` if not
959
+ for the transcript, ``False`` if not
960
960
  """
961
961
  query = f"""
962
962
  WITH tx_boundaries AS (
963
- SELECT
964
- tx_ac,
965
- hgnc,
966
- MIN(alt_start_i) as min_start,
967
- MAX(alt_end_i) as max_end
963
+ SELECT
964
+ MIN(alt_start_i) AS min_start,
965
+ MAX(alt_end_i) AS max_end
968
966
  FROM {self.uta_db.schema}.tx_exon_aln_v
969
- WHERE hgnc = '{gene}'
967
+ WHERE tx_ac = '{tx_ac}'
970
968
  AND alt_ac = '{genomic_ac}'
971
- GROUP BY tx_ac, hgnc
972
969
  )
973
- SELECT DISTINCT hgnc
974
- FROM tx_boundaries
970
+ SELECT * FROM tx_boundaries
975
971
  WHERE {pos} between tx_boundaries.min_start and tx_boundaries.max_end
976
- ORDER BY hgnc
977
- LIMIT 1;
978
972
  """ # noqa: S608
979
973
  results = await self.uta_db.execute_query(query)
980
974
  return bool(results)
@@ -43,11 +43,18 @@ class Assembly(str, Enum):
43
43
  return [item.value for item in cls]
44
44
 
45
45
 
46
+ class ManeStatus(str, Enum):
47
+ """Define constraints for mane status"""
48
+
49
+ SELECT = "mane_select"
50
+ PLUS_CLINICAL = "mane_plus_clinical"
51
+
52
+
46
53
  class TranscriptPriority(str, Enum):
47
54
  """Create Enum for Transcript Priority labels"""
48
55
 
49
- MANE_SELECT = "mane_select"
50
- MANE_PLUS_CLINICAL = "mane_plus_clinical"
56
+ MANE_SELECT = ManeStatus.SELECT.value
57
+ MANE_PLUS_CLINICAL = ManeStatus.PLUS_CLINICAL.value
51
58
  LONGEST_COMPATIBLE_REMAINING = "longest_compatible_remaining"
52
59
  GRCH38 = "grch38"
53
60
 
@@ -137,6 +144,7 @@ class ManeGeneData(BaseModel, extra="forbid"):
137
144
  ncbi_gene_id: StrictInt
138
145
  hgnc_id: StrictInt | None
139
146
  symbol: StrictStr
147
+ status: list[ManeStatus]
140
148
 
141
149
 
142
150
  class ServiceMeta(BaseModelForbidExtra):
@@ -117,26 +117,58 @@ class ManeTranscriptMappings:
117
117
  :param end: Genomic end position. Assumes residue coordinates.
118
118
  :return: Unique MANE gene(s) found for a genomic location
119
119
  """
120
+ # Only interested in rows where genomic location lives
120
121
  mane_rows = self.df.filter(
121
122
  (start >= pl.col("chr_start"))
122
123
  & (end <= pl.col("chr_end"))
123
124
  & (pl.col("GRCh38_chr") == ac)
124
- ).unique(subset=["#NCBI_GeneID"])
125
+ )
125
126
 
126
- if len(mane_rows) == 0:
127
+ if mane_rows.is_empty():
127
128
  return []
128
129
 
129
- mane_rows = mane_rows.with_columns(
130
- pl.col("#NCBI_GeneID")
131
- .str.split_exact(":", 1)
132
- .struct.field("field_1")
133
- .cast(pl.Int32)
134
- .alias("ncbi_gene_id"),
135
- pl.col("HGNC_ID")
136
- .str.split_exact(":", 1)
137
- .struct.field("field_1")
138
- .cast(pl.Int32)
139
- .alias("hgnc_id"),
130
+ # Group rows by NCBI ID, transform values to representation we want, MANE status
131
+ # will be converted to list with DESC order
132
+ mane_rows = mane_rows.group_by("#NCBI_GeneID").agg(
133
+ [
134
+ pl.col("#NCBI_GeneID")
135
+ .first()
136
+ .str.split_exact(":", 1)
137
+ .struct.field("field_1")
138
+ .cast(pl.Int32)
139
+ .alias("ncbi_gene_id"),
140
+ pl.col("HGNC_ID")
141
+ .first()
142
+ .str.split_exact(":", 1)
143
+ .struct.field("field_1")
144
+ .cast(pl.Int32)
145
+ .alias("hgnc_id"),
146
+ pl.col("MANE_status")
147
+ .unique()
148
+ .str.to_lowercase()
149
+ .str.replace_all(" ", "_")
150
+ .alias("status")
151
+ .sort(descending=True),
152
+ pl.col("symbol").first(),
153
+ ]
154
+ )
155
+
156
+ # Sort final rows based on MANE status
157
+ # First by length (which means gene has both select and plus clinical)
158
+ # Then by DESC order
159
+ # Then by NCBI ID ASC order
160
+ mane_rows = (
161
+ mane_rows.with_columns(
162
+ [
163
+ pl.col("status").list.len().alias("status_count"),
164
+ pl.col("status").list.join("_").alias("status_str"),
165
+ pl.col("ncbi_gene_id"),
166
+ ]
167
+ )
168
+ .sort(
169
+ ["status_count", "status_str", "ncbi_gene_id"],
170
+ descending=[True, True, False],
171
+ )
172
+ .drop(["status_count", "status_str", "#NCBI_GeneID"])
140
173
  )
141
- mane_rows = mane_rows.select(["ncbi_gene_id", "hgnc_id", "symbol"])
142
174
  return [ManeGeneData(**mane_gene) for mane_gene in mane_rows.to_dicts()]
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: cool_seq_tool
3
- Version: 0.10.0
3
+ Version: 0.12.0
4
4
  Summary: Common Operation on Lots of Sequences Tool
5
5
  Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
6
  License: MIT License
@@ -340,10 +340,19 @@ def genomic_tx_data():
340
340
  @pytest.fixture(scope="session")
341
341
  def egfr_mane_gene():
342
342
  """Create test fixture for EGFR MANE gene"""
343
- return ManeGeneData(ncbi_gene_id=1956, hgnc_id=3236, symbol="EGFR")
343
+ return ManeGeneData(
344
+ ncbi_gene_id=1956, hgnc_id=3236, symbol="EGFR", status=["mane_select"]
345
+ )
344
346
 
345
347
 
346
348
  @pytest.fixture(scope="session")
347
- def braf_mane_gene():
349
+ def braf_mane_genes():
348
350
  """Create test fixture for BRAF MANE gene"""
349
- return ManeGeneData(ncbi_gene_id=673, hgnc_id=1097, symbol="BRAF")
351
+ return [
352
+ ManeGeneData(
353
+ ncbi_gene_id=673,
354
+ hgnc_id=1097,
355
+ symbol="BRAF",
356
+ status=["mane_select", "mane_plus_clinical"],
357
+ ),
358
+ ]
@@ -1516,7 +1516,7 @@ async def test_invalid(test_egc_mapper):
1516
1516
  )
1517
1517
  genomic_tx_seg_service_checks(resp, is_valid=False)
1518
1518
  assert resp.errors == [
1519
- "9999999999998 on NC_000001.11 does not occur within the exons for TPM3"
1519
+ "9999999999998 on NC_000001.11 does not occur within the exons for NM_152263.3"
1520
1520
  ]
1521
1521
 
1522
1522
  # Must supply either gene or transcript
@@ -143,13 +143,13 @@ def grch38_egfr(egfr_mane_gene):
143
143
 
144
144
 
145
145
  @pytest.fixture(scope="module")
146
- def grch38_braf(braf_mane_gene):
146
+ def grch38_braf(braf_mane_genes):
147
147
  """Create a test fixture for grch38 responses BRAF V600E (genomic)."""
148
148
  params = {
149
149
  "pos": (140753335, 140753336),
150
150
  "status": TranscriptPriority.GRCH38.value,
151
151
  "ac": "NC_000007.14",
152
- "mane_genes": [braf_mane_gene],
152
+ "mane_genes": braf_mane_genes,
153
153
  }
154
154
  return GenomicRepresentation(**params)
155
155
 
@@ -209,28 +209,75 @@ def test_get_mane_data_from_chr_pos(
209
209
 
210
210
 
211
211
  def test_get_genomic_mane_genes(
212
- test_mane_transcript_mappings, braf_mane_gene, egfr_mane_gene
212
+ test_mane_transcript_mappings, braf_mane_genes, egfr_mane_gene
213
213
  ):
214
214
  """Test that get_genomic_mane_genes method works correctly"""
215
215
  new_df = pl.DataFrame(
216
216
  {
217
- "#NCBI_GeneID": ["GeneID:673", "GeneID:673", "GeneID:1956", "GeneID:1"],
217
+ "#NCBI_GeneID": [
218
+ "GeneID:673",
219
+ "GeneID:673",
220
+ "GeneID:1956",
221
+ "GeneID:1",
222
+ "GeneID:2",
223
+ "GeneID:2",
224
+ "GeneID:3",
225
+ ],
218
226
  "Ensembl_Gene": [
219
227
  "ENSG00000157764.14",
220
228
  "ENSG00000157764.14",
221
229
  "ENSG00000146648.21",
222
230
  "ENSG1.1",
231
+ "ENSG1.1",
232
+ "ENSG1.1",
233
+ "ENSG1.1",
234
+ ],
235
+ "HGNC_ID": [
236
+ "HGNC:1097",
237
+ "HGNC:1097",
238
+ "HGNC:3236",
239
+ "HGNC:1",
240
+ "HGNC:2",
241
+ "HGNC:2",
242
+ "HGNC:3",
223
243
  ],
224
- "HGNC_ID": ["HGNC:1097", "HGNC:1097", "HGNC:3236", "HGNC:2"],
225
- "symbol": ["BRAF", "BRAF", "EGFR", "Dummy"],
244
+ "symbol": ["BRAF", "BRAF", "EGFR", "Dummy1", "Dummy2", "Dummy2", "Dummy3"],
226
245
  "GRCh38_chr": [
227
246
  "NC_000007.14",
228
247
  "NC_000007.14",
229
248
  "NC_000007.14",
230
249
  "NC_000007.14",
250
+ "NC_000007.14",
251
+ "NC_000007.14",
252
+ "NC_000007.14",
253
+ ],
254
+ "chr_start": [
255
+ 140719337,
256
+ 140730665,
257
+ 55019017,
258
+ 55019017,
259
+ 55019017,
260
+ 55019017,
261
+ 55019017,
262
+ ],
263
+ "chr_end": [
264
+ 140924929,
265
+ 140924929,
266
+ 55211628,
267
+ 55211628,
268
+ 55211628,
269
+ 55211628,
270
+ 55211628,
271
+ ],
272
+ "MANE_status": [
273
+ "MANE Plus Clinical",
274
+ "MANE Select",
275
+ "MANE Select",
276
+ "MANE Plus Clinical",
277
+ "MANE Select",
278
+ "MANE Plus Clinical",
279
+ "MANE Select",
231
280
  ],
232
- "chr_start": [140719337, 140730665, 55019017, 55019017],
233
- "chr_end": [140924929, 140924929, 55211628, 55211628],
234
281
  }
235
282
  )
236
283
 
@@ -238,14 +285,29 @@ def test_get_genomic_mane_genes(
238
285
  mane_genes = test_mane_transcript_mappings.get_genomic_mane_genes(
239
286
  "NC_000007.14", 140753336, 140753336
240
287
  )
241
- assert mane_genes == [braf_mane_gene]
288
+ assert mane_genes == braf_mane_genes
242
289
 
243
290
  mane_genes = test_mane_transcript_mappings.get_genomic_mane_genes(
244
291
  "NC_000007.14", 55191822, 55191822
245
292
  )
246
- assert len(mane_genes) == 2
247
- assert egfr_mane_gene in mane_genes
248
- assert ManeGeneData(ncbi_gene_id=1, hgnc_id=2, symbol="Dummy") in mane_genes
293
+ assert mane_genes == [
294
+ ManeGeneData(
295
+ ncbi_gene_id=2,
296
+ hgnc_id=2,
297
+ symbol="Dummy2",
298
+ status=["mane_select", "mane_plus_clinical"],
299
+ ),
300
+ ManeGeneData(
301
+ ncbi_gene_id=3, hgnc_id=3, symbol="Dummy3", status=["mane_select"]
302
+ ),
303
+ egfr_mane_gene,
304
+ ManeGeneData(
305
+ ncbi_gene_id=1,
306
+ hgnc_id=1,
307
+ symbol="Dummy1",
308
+ status=["mane_plus_clinical"],
309
+ ),
310
+ ]
249
311
 
250
312
  # No MANE genes found for given genomic location
251
313
  mane_genes = test_mane_transcript_mappings.get_genomic_mane_genes(
File without changes
File without changes
File without changes