dayhoff-tools 1.14.15__py3-none-any.whl → 1.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dayhoff_tools/batch/workers/boltz.py +5 -5
- dayhoff_tools/cli/batch/commands/cancel.py +6 -2
- dayhoff_tools/cli/batch/commands/clean.py +6 -2
- dayhoff_tools/cli/batch/commands/embed_t5.py +0 -1
- dayhoff_tools/cli/batch/commands/finalize.py +39 -20
- dayhoff_tools/cli/batch/commands/status.py +9 -3
- dayhoff_tools/cli/batch/commands/submit.py +0 -1
- dayhoff_tools/cli/main.py +2 -59
- dayhoff_tools/cli/utility_commands.py +0 -890
- dayhoff_tools/embedders.py +2 -2
- dayhoff_tools/fasta.py +2 -4
- dayhoff_tools/intake/kegg.py +1 -3
- dayhoff_tools/intake/structure.py +4 -4
- {dayhoff_tools-1.14.15.dist-info → dayhoff_tools-1.15.1.dist-info}/METADATA +2 -2
- {dayhoff_tools-1.14.15.dist-info → dayhoff_tools-1.15.1.dist-info}/RECORD +17 -17
- {dayhoff_tools-1.14.15.dist-info → dayhoff_tools-1.15.1.dist-info}/WHEEL +1 -1
- {dayhoff_tools-1.14.15.dist-info → dayhoff_tools-1.15.1.dist-info}/entry_points.txt +0 -0
dayhoff_tools/embedders.py
CHANGED
|
@@ -179,8 +179,8 @@ class H5Reformatter(Processor):
|
|
|
179
179
|
def embedding_file_to_df(self, file_name: str) -> pd.DataFrame:
|
|
180
180
|
with h5py.File(file_name, "r") as f:
|
|
181
181
|
gene_names = list(f.keys())
|
|
182
|
-
Xg = [f[key][()] for key in gene_names] # type:ignore
|
|
183
|
-
return pd.DataFrame(np.asmatrix(Xg), index=gene_names) # type:ignore
|
|
182
|
+
Xg = [f[key][()] for key in gene_names] # type: ignore
|
|
183
|
+
return pd.DataFrame(np.asmatrix(Xg), index=gene_names) # type: ignore
|
|
184
184
|
|
|
185
185
|
def write_df_to_h5(self, df: pd.DataFrame, filename: str, description: str) -> None:
|
|
186
186
|
"""
|
dayhoff_tools/fasta.py
CHANGED
|
@@ -857,14 +857,12 @@ def fasta_to_sqlite(fasta_file: str, db_file: str, batch_size: int = 1000) -> No
|
|
|
857
857
|
# Create the SQLite database and table
|
|
858
858
|
print("Creating SQLite database...")
|
|
859
859
|
with sqlite3.connect(db_file) as conn:
|
|
860
|
-
conn.execute(
|
|
861
|
-
"""
|
|
860
|
+
conn.execute("""
|
|
862
861
|
CREATE TABLE IF NOT EXISTS proteins (
|
|
863
862
|
protein_id TEXT PRIMARY KEY,
|
|
864
863
|
sequence TEXT NOT NULL
|
|
865
864
|
)
|
|
866
|
-
"""
|
|
867
|
-
)
|
|
865
|
+
""")
|
|
868
866
|
print("Database created successfully.")
|
|
869
867
|
|
|
870
868
|
# Estimate number of records for progress bar
|
dayhoff_tools/intake/kegg.py
CHANGED
|
@@ -25,9 +25,7 @@ def get_ko2gene_df(db: str, ko: str | list[str] | None = None) -> pd.DataFrame:
|
|
|
25
25
|
query = (
|
|
26
26
|
f"SELECT gene,ko FROM gene_to_ko WHERE ko IN ({','.join('?' * len(ko))})"
|
|
27
27
|
)
|
|
28
|
-
result_df = pd.read_sql_query(
|
|
29
|
-
query, conn, params=ko # type:ignore
|
|
30
|
-
)
|
|
28
|
+
result_df = pd.read_sql_query(query, conn, params=ko) # type: ignore
|
|
31
29
|
else:
|
|
32
30
|
query = f"SELECT gene,ko FROM gene_to_ko"
|
|
33
31
|
result_df = pd.read_sql_query(query, conn)
|
|
@@ -409,10 +409,10 @@ class PDBFolderProcessor:
|
|
|
409
409
|
def _get_pdb_files(self) -> list[str]:
|
|
410
410
|
"""
|
|
411
411
|
Get a list of PDB files in the specified directory, optionally filtered by ID set.
|
|
412
|
-
Files are sorted
|
|
412
|
+
Files are sorted alphabetically to ensure consistent, reproducible processing order.
|
|
413
413
|
|
|
414
414
|
Returns:
|
|
415
|
-
List of PDB file names sorted
|
|
415
|
+
List of PDB file names sorted alphabetically.
|
|
416
416
|
"""
|
|
417
417
|
print("Scanning directory for PDB files...")
|
|
418
418
|
pdb_files = [
|
|
@@ -424,8 +424,8 @@ class PDBFolderProcessor:
|
|
|
424
424
|
f for f in pdb_files if self._extract_id_from_filename(f) in self.id_set
|
|
425
425
|
]
|
|
426
426
|
|
|
427
|
-
# Sort files
|
|
428
|
-
pdb_files.sort(
|
|
427
|
+
# Sort files alphabetically for deterministic, reproducible order
|
|
428
|
+
pdb_files.sort()
|
|
429
429
|
|
|
430
430
|
print(f"Found {len(pdb_files)} PDB files")
|
|
431
431
|
return pdb_files
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dayhoff-tools
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.15.1
|
|
4
4
|
Summary: Common tools for all the repos at Dayhoff Labs
|
|
5
5
|
Author: Daniel Martin-Alarcon
|
|
6
6
|
Author-email: dma@dayhofflabs.com
|
|
@@ -40,8 +40,8 @@ Requires-Dist: sqlalchemy (>=2.0.40,<3.0.0) ; extra == "full"
|
|
|
40
40
|
Requires-Dist: toml (>=0.10)
|
|
41
41
|
Requires-Dist: tqdm (>=4.67.1) ; extra == "embedders"
|
|
42
42
|
Requires-Dist: tqdm (>=4.67.1) ; extra == "full"
|
|
43
|
-
Requires-Dist: transformers (==4.36.2) ; extra == "full"
|
|
44
43
|
Requires-Dist: transformers (>=4.36.2) ; extra == "embedders"
|
|
44
|
+
Requires-Dist: transformers (>=4.36.2) ; extra == "full"
|
|
45
45
|
Requires-Dist: typer (>=0.9.0)
|
|
46
46
|
Requires-Dist: tzdata (>=2025.2)
|
|
47
47
|
Description-Content-Type: text/markdown
|
|
@@ -2,7 +2,7 @@ dayhoff_tools/__init__.py,sha256=M5zThPyEBRYa5CfwlzKhcqTevWn3OKu62cjV6Zqie2A,469
|
|
|
2
2
|
dayhoff_tools/batch/__init__.py,sha256=N7v1pUg3rp68W6J7cX2Gssxdfp57Z7G0WeFJh7gKeiM,163
|
|
3
3
|
dayhoff_tools/batch/workers/__init__.py,sha256=DJTtiBSE5k9J6qwG_MsXDn8hgz_lvEdaMwqKa6YyqXo,369
|
|
4
4
|
dayhoff_tools/batch/workers/base.py,sha256=ZveUds6GW-aInj9LL0_VEYkXxOyZWRcOeJdacnEb8xo,5346
|
|
5
|
-
dayhoff_tools/batch/workers/boltz.py,sha256
|
|
5
|
+
dayhoff_tools/batch/workers/boltz.py,sha256=-_rgUvKcYfrCgBLpM-eIgWXvne6UJa2MqUFyllNRLUc,18397
|
|
6
6
|
dayhoff_tools/batch/workers/embed_t5.py,sha256=A5WqsQa1WZ7_la5X5wt0XUP-VwOglH04WyEINhwipeY,2750
|
|
7
7
|
dayhoff_tools/chemistry/standardizer.py,sha256=uMn7VwHnx02nc404eO6fRuS4rsl4dvSPf2ElfZDXEpY,11188
|
|
8
8
|
dayhoff_tools/chemistry/utils.py,sha256=jt-7JgF-GeeVC421acX-bobKbLU_X94KNOW24p_P-_M,2257
|
|
@@ -11,16 +11,16 @@ dayhoff_tools/cli/batch/__init__.py,sha256=0QMxkelGUdvdd7-P-WG8YX2h0qjkKKb-0JbEH
|
|
|
11
11
|
dayhoff_tools/cli/batch/aws_batch.py,sha256=DOp8KvmTrie15O61DP1HT13PSr3s5imxM-VZHvaLO7Q,15174
|
|
12
12
|
dayhoff_tools/cli/batch/commands/__init__.py,sha256=1xRzzL_mc1hz1Pv0OWNr-g6fkL5XbEsOTGHzrqddLCA,458
|
|
13
13
|
dayhoff_tools/cli/batch/commands/boltz.py,sha256=N0LksmtOpkvnEsR0SAUHxtksKPAsQjR83h3Cis14nz4,12085
|
|
14
|
-
dayhoff_tools/cli/batch/commands/cancel.py,sha256=
|
|
15
|
-
dayhoff_tools/cli/batch/commands/clean.py,sha256=
|
|
16
|
-
dayhoff_tools/cli/batch/commands/embed_t5.py,sha256=
|
|
17
|
-
dayhoff_tools/cli/batch/commands/finalize.py,sha256=
|
|
14
|
+
dayhoff_tools/cli/batch/commands/cancel.py,sha256=GDrfidzmIB9FFTpi3Jjzgp3NCbVvbIhamo3lCfjQemI,5343
|
|
15
|
+
dayhoff_tools/cli/batch/commands/clean.py,sha256=0EhpgG9ohsegDumUYhgKlWdIcPfKnEJfK2CnfT9azjk,4606
|
|
16
|
+
dayhoff_tools/cli/batch/commands/embed_t5.py,sha256=FfuiMgkw0BMmxD5rw78G-U0Sn_goK4F6E_ItSYp9pr8,11452
|
|
17
|
+
dayhoff_tools/cli/batch/commands/finalize.py,sha256=vSruaD8rMZFd7VRvtNjJ0gXR0239jxZfmwrgC3WglCU,13666
|
|
18
18
|
dayhoff_tools/cli/batch/commands/list_jobs.py,sha256=COfxZddDVUAHeTayNAB3ruYNhgrE3osgFxY2qzf33cg,4284
|
|
19
19
|
dayhoff_tools/cli/batch/commands/local.py,sha256=dZeKhNakaM1jS-EoByAwg1nWspRRoOmYzcwzjEKBaIA,3226
|
|
20
20
|
dayhoff_tools/cli/batch/commands/logs.py,sha256=ctgJksdzFmqBdD18ePPsZe2BpuJYtHz2xAaMPnUplmQ,5293
|
|
21
21
|
dayhoff_tools/cli/batch/commands/retry.py,sha256=JgOMiwuESNG4Wp_fQ_vxy1RwyOZPt_kmVbLYdxYTVBY,9897
|
|
22
|
-
dayhoff_tools/cli/batch/commands/status.py,sha256=
|
|
23
|
-
dayhoff_tools/cli/batch/commands/submit.py,sha256=
|
|
22
|
+
dayhoff_tools/cli/batch/commands/status.py,sha256=Oneci0Wv4dfF5SoThSFyE9BTqaN_u9yhGSecC7x0rd8,13512
|
|
23
|
+
dayhoff_tools/cli/batch/commands/submit.py,sha256=ykbbXniMzgKpY0iRmrGxwOU6e4-lPXkfyJ_HQ7AyTik,6785
|
|
24
24
|
dayhoff_tools/cli/batch/job_id.py,sha256=mqr8SwcPlUWIYLaR_C4kACmL2ZFK8jaddd7B-45-XaQ,4246
|
|
25
25
|
dayhoff_tools/cli/batch/manifest.py,sha256=eLyiOFXonAUh5rfHcXyR2CzmRKXQz9-tTbJcWnVbbmE,8857
|
|
26
26
|
dayhoff_tools/cli/cloud_commands.py,sha256=xrWQZm48e09GTh8RmYh3JqXUzW8mMy07dHk_77LqZx8,41199
|
|
@@ -48,9 +48,9 @@ dayhoff_tools/cli/engines_studios/simulators/studio_status_simulator.py,sha256=6
|
|
|
48
48
|
dayhoff_tools/cli/engines_studios/ssh_config.py,sha256=UCv-jf_zSuk7FUStkCQBAJz1QkxiSEwZbdSrwt_9SMU,2932
|
|
49
49
|
dayhoff_tools/cli/engines_studios/studio_commands.py,sha256=KGSNZQS8MmM_DfQzT9SRZvuR3OK6NdIdOrqI2wJFyes,25984
|
|
50
50
|
dayhoff_tools/cli/github_commands.py,sha256=pfrxI68LObGm_gtPlQN-gHPahHV4l9k9T4GqO99NNL0,8948
|
|
51
|
-
dayhoff_tools/cli/main.py,sha256=
|
|
51
|
+
dayhoff_tools/cli/main.py,sha256=E4UXtOBRRAhcrJ3RgLCQFpJdamFN-p9xViMkX45kNgw,6456
|
|
52
52
|
dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
|
|
53
|
-
dayhoff_tools/cli/utility_commands.py,sha256=
|
|
53
|
+
dayhoff_tools/cli/utility_commands.py,sha256=Zaib879sJ8xD-RV42dSaaxdVC_lP6mLSJ-yTF5eU-pQ,9832
|
|
54
54
|
dayhoff_tools/deployment/base.py,sha256=uZnFvnPQx6pH_HmJbdThweAs3BrxMaDohpE3iX_-yk4,18377
|
|
55
55
|
dayhoff_tools/deployment/deploy_aws.py,sha256=1j16aE4hmln4pQVtcSGuIGVWbOBfWwveytvihjofADo,21519
|
|
56
56
|
dayhoff_tools/deployment/deploy_gcp.py,sha256=xgaOVsUDmP6wSEMYNkm1yRNcVskfdz80qJtCulkBIAM,8860
|
|
@@ -58,20 +58,20 @@ dayhoff_tools/deployment/deploy_utils.py,sha256=KyUFZZWn8NGT9QpR0HGqkX-huOFubvYC
|
|
|
58
58
|
dayhoff_tools/deployment/job_runner.py,sha256=hljvFpH2Bw96uYyUup5Ths72PZRL_X27KxlYzBMgguo,5086
|
|
59
59
|
dayhoff_tools/deployment/processors.py,sha256=LM0CQbr4XCb3AtLbrcuDQm4tYPXsoNqgVJ4WQYDjzJc,12406
|
|
60
60
|
dayhoff_tools/deployment/swarm.py,sha256=YJfvVOcAS8cYcIj2fiN4qwC2leh0I9w5A4px8ZWSF6g,22833
|
|
61
|
-
dayhoff_tools/embedders.py,sha256=
|
|
62
|
-
dayhoff_tools/fasta.py,sha256=
|
|
61
|
+
dayhoff_tools/embedders.py,sha256=4o49Zm0gPxGcBcofTCZMC_6IUaOOTbTY2xmVadrX_ss,36617
|
|
62
|
+
dayhoff_tools/fasta.py,sha256=tmuQ8BGpMqaDF9pJCro8roqpQsD9qeTnodNvXJmgH9w,50751
|
|
63
63
|
dayhoff_tools/file_ops.py,sha256=JlGowvr-CUJFidV-4g_JmhUTN9bsYuaxtqKmnKomm-Q,8506
|
|
64
64
|
dayhoff_tools/h5.py,sha256=j1nxxaiHsMidVX_XwB33P1Pz9d7K8ZKiDZwJWQUUQSY,21158
|
|
65
65
|
dayhoff_tools/intake/gcp.py,sha256=uCeEskhbEwJIYpN6ne6siT1dbpTizCjjel-hRe0kReE,3030
|
|
66
66
|
dayhoff_tools/intake/gtdb.py,sha256=58JNLWpr3LkXiIq-ubAcEFeR5DyN9YrA-YEoQI_FzlA,10608
|
|
67
|
-
dayhoff_tools/intake/kegg.py,sha256=
|
|
67
|
+
dayhoff_tools/intake/kegg.py,sha256=Cv1ar9X2aFd25VDqn-3zMZ0epgtMc-uFKUIYgvwxC6U,944
|
|
68
68
|
dayhoff_tools/intake/mmseqs.py,sha256=uEYzRsthJAlUeRYNCfFtJFE73SbuhfUIS1ygYFkhmtw,6435
|
|
69
|
-
dayhoff_tools/intake/structure.py,sha256=
|
|
69
|
+
dayhoff_tools/intake/structure.py,sha256=d-H9UFSJSfdp1zzp_F0lDBrMdCv3yQXl6HOVCE16u3E,27645
|
|
70
70
|
dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJqE4,16456
|
|
71
71
|
dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
|
|
72
72
|
dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
|
|
73
73
|
dayhoff_tools/warehouse.py,sha256=UETBtZD3r7WgvURqfGbyHlT7cxoiVq8isjzMuerKw8I,24475
|
|
74
|
-
dayhoff_tools-1.
|
|
75
|
-
dayhoff_tools-1.
|
|
76
|
-
dayhoff_tools-1.
|
|
77
|
-
dayhoff_tools-1.
|
|
74
|
+
dayhoff_tools-1.15.1.dist-info/METADATA,sha256=QSQwSam7GhLoduhoXHgrPYJnECe74Jym7LntG3tQou4,3135
|
|
75
|
+
dayhoff_tools-1.15.1.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
|
|
76
|
+
dayhoff_tools-1.15.1.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
|
|
77
|
+
dayhoff_tools-1.15.1.dist-info/RECORD,,
|
|
File without changes
|