dayhoff-tools 1.2.5__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dayhoff_tools/__init__.py CHANGED
File without changes
File without changes
File without changes
File without changes
File without changes
dayhoff_tools/cli/main.py CHANGED
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -683,7 +683,7 @@ class Embedder(Processor):
683
683
  sequence_ids, sequences, sequence_lengths = zip(*batch)
684
684
 
685
685
  # Prepare sequences for tokenization
686
- tokenizer_input = self.prepare_tokenizer_input(sequences)
686
+ tokenizer_input = self.prepare_tokenizer_input(list(sequences))
687
687
 
688
688
  # Tokenize sequences
689
689
  encoded_input = self.tokenizer.batch_encode_plus(
dayhoff_tools/fasta.py CHANGED
@@ -25,7 +25,7 @@ def _clean_noncanonical_fasta(
25
25
  ) -> Optional[dict[str, str]]:
26
26
  """
27
27
  Read in a FASTA file containing multiple sequences, replace non-canonical amino acids,
28
- remove empty sequences, and either write the sequences to a new FASTA file or return them as a dictionary.
28
+ remove stop codons, remove empty sequences, and either write the sequences to a new FASTA file or return them as a dictionary.
29
29
 
30
30
  Args:
31
31
  input_path (str): Path to the input FASTA file.
@@ -48,7 +48,11 @@ def _clean_noncanonical_fasta(
48
48
  for line in fasta_file:
49
49
  if line.startswith(">"):
50
50
  if seq_id and seq_lines:
51
- seq = "".join(seq_lines).translate(str.maketrans("OJUZB", "XLCED"))
51
+ seq = (
52
+ "".join(seq_lines)
53
+ .translate(str.maketrans("OJUZB", "XLCED"))
54
+ .replace("*", "")
55
+ )
52
56
  if seq.strip(): # Only process non-empty sequences
53
57
  sequences[seq_id] = seq
54
58
  if output_path:
@@ -61,7 +65,11 @@ def _clean_noncanonical_fasta(
61
65
 
62
66
  # Process the last sequence
63
67
  if seq_id and seq_lines:
64
- seq = "".join(seq_lines).translate(str.maketrans("OJUZB", "XLCED"))
68
+ seq = (
69
+ "".join(seq_lines)
70
+ .translate(str.maketrans("OJUZB", "XLCED"))
71
+ .replace("*", "")
72
+ )
65
73
  if seq.strip(): # Only process non-empty sequences
66
74
  sequences[seq_id] = seq
67
75
  if output_path:
@@ -92,7 +100,7 @@ def clean_noncanonical_fasta(
92
100
  ):
93
101
  """
94
102
  Read in a FASTA file containing multiple sequences and write the sequences to a new FASTA file.
95
- Replace non-canonical amino acids along the way.
103
+ Replace non-canonical amino acids and remove stop codons along the way.
96
104
 
97
105
  Args:
98
106
  input_path (str): Path to the input FASTA file.
@@ -112,7 +120,7 @@ def clean_noncanonical_fasta_to_dict(
112
120
  ) -> dict[str, str]:
113
121
  """
114
122
  Read in a FASTA file containing multiple sequences and return the sequences as a dictionary.
115
- Replace non-canonical amino acids along the way.
123
+ Replace non-canonical amino acids and remove stop codons along the way.
116
124
 
117
125
  Args:
118
126
  input_path (str): Path to the input FASTA file.
dayhoff_tools/file_ops.py CHANGED
File without changes
dayhoff_tools/h5.py CHANGED
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
dayhoff_tools/logs.py CHANGED
File without changes
dayhoff_tools/sqlite.py CHANGED
File without changes
File without changes
@@ -1,11 +1,13 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dayhoff-tools
3
- Version: 1.2.5
3
+ Version: 1.3.1
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
7
- Requires-Python: >=3.12,<4.0
7
+ Requires-Python: >=3.10,<4.0
8
8
  Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
9
11
  Classifier: Programming Language :: Python :: 3.12
10
12
  Classifier: Programming Language :: Python :: 3.13
11
13
  Provides-Extra: embedders
@@ -24,13 +26,13 @@ Requires-Dist: pandas (>=2.2.3) ; extra == "embedders"
24
26
  Requires-Dist: pandas (>=2.2.3) ; extra == "full"
25
27
  Requires-Dist: pyyaml (>=6.0)
26
28
  Requires-Dist: questionary (>=2.0.1)
27
- Requires-Dist: rdkit (>=2025.3.2) ; extra == "full"
29
+ Requires-Dist: rdkit-pypi (>=2022.9.5) ; extra == "full"
28
30
  Requires-Dist: requests (>=2.31.0)
29
31
  Requires-Dist: sentencepiece (>=0.2.0) ; extra == "embedders"
30
32
  Requires-Dist: sentencepiece (>=0.2.0) ; extra == "full"
31
33
  Requires-Dist: sqlalchemy (>=2.0.40,<3.0.0) ; extra == "full"
32
34
  Requires-Dist: toml (>=0.10)
33
- Requires-Dist: torch (>=2.5.0) ; extra == "embedders"
35
+ Requires-Dist: torch (>=2.4.0) ; extra == "embedders"
34
36
  Requires-Dist: tqdm (>=4.67.1) ; extra == "embedders"
35
37
  Requires-Dist: tqdm (>=4.67.1) ; extra == "full"
36
38
  Requires-Dist: transformers (==4.36.2) ; extra == "full"
@@ -13,8 +13,8 @@ dayhoff_tools/deployment/deploy_utils.py,sha256=StFwbqnr2_FWiKVg3xnJF4kagTHzndqq
13
13
  dayhoff_tools/deployment/job_runner.py,sha256=hljvFpH2Bw96uYyUup5Ths72PZRL_X27KxlYzBMgguo,5086
14
14
  dayhoff_tools/deployment/processors.py,sha256=LM0CQbr4XCb3AtLbrcuDQm4tYPXsoNqgVJ4WQYDjzJc,12406
15
15
  dayhoff_tools/deployment/swarm.py,sha256=YJfvVOcAS8cYcIj2fiN4qwC2leh0I9w5A4px8ZWSF6g,22833
16
- dayhoff_tools/embedders.py,sha256=fRkyWjHo8OmbNUBY_FwrgfvyiLqpmrpI57UAb1Szn1Y,36609
17
- dayhoff_tools/fasta.py,sha256=_kA2Cpiy7JAGbBqLrjElkzbcUD_p-nO2d5Aj1LVmOvc,50509
16
+ dayhoff_tools/embedders.py,sha256=1THnmio4FYkBswy_xkIiwT-ZOEMn6ZLbTAa-Uz0-kyE,36615
17
+ dayhoff_tools/fasta.py,sha256=USdemH4c_dNhWXOTAhldvlDi8eLHogsy0YSrOnODB5I,50773
18
18
  dayhoff_tools/file_ops.py,sha256=JlGowvr-CUJFidV-4g_JmhUTN9bsYuaxtqKmnKomm-Q,8506
19
19
  dayhoff_tools/h5.py,sha256=j1nxxaiHsMidVX_XwB33P1Pz9d7K8ZKiDZwJWQUUQSY,21158
20
20
  dayhoff_tools/intake/gcp.py,sha256=uCeEskhbEwJIYpN6ne6siT1dbpTizCjjel-hRe0kReE,3030
@@ -26,7 +26,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
26
26
  dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
27
27
  dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
28
28
  dayhoff_tools/warehouse.py,sha256=8YbnQ--usrEgDQGfvpV4MrMji55A0rq2hZaOgFGh6ag,15896
29
- dayhoff_tools-1.2.5.dist-info/METADATA,sha256=EReORxgYDfQ8QCuuC0Z0z-jg5NxbQr4WNFIbNPQ2T18,2735
30
- dayhoff_tools-1.2.5.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
31
- dayhoff_tools-1.2.5.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
32
- dayhoff_tools-1.2.5.dist-info/RECORD,,
29
+ dayhoff_tools-1.3.1.dist-info/METADATA,sha256=AyP_2vo_5tVylBVzP-EMkI3tPPxJIV-VpBdQxRQFIZU,2842
30
+ dayhoff_tools-1.3.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
31
+ dayhoff_tools-1.3.1.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
32
+ dayhoff_tools-1.3.1.dist-info/RECORD,,