dayhoff-tools 1.14.15__py3-none-any.whl → 1.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -179,8 +179,8 @@ class H5Reformatter(Processor):
179
179
  def embedding_file_to_df(self, file_name: str) -> pd.DataFrame:
180
180
  with h5py.File(file_name, "r") as f:
181
181
  gene_names = list(f.keys())
182
- Xg = [f[key][()] for key in gene_names] # type:ignore
183
- return pd.DataFrame(np.asmatrix(Xg), index=gene_names) # type:ignore
182
+ Xg = [f[key][()] for key in gene_names] # type: ignore
183
+ return pd.DataFrame(np.asmatrix(Xg), index=gene_names) # type: ignore
184
184
 
185
185
  def write_df_to_h5(self, df: pd.DataFrame, filename: str, description: str) -> None:
186
186
  """
dayhoff_tools/fasta.py CHANGED
@@ -857,14 +857,12 @@ def fasta_to_sqlite(fasta_file: str, db_file: str, batch_size: int = 1000) -> No
857
857
  # Create the SQLite database and table
858
858
  print("Creating SQLite database...")
859
859
  with sqlite3.connect(db_file) as conn:
860
- conn.execute(
861
- """
860
+ conn.execute("""
862
861
  CREATE TABLE IF NOT EXISTS proteins (
863
862
  protein_id TEXT PRIMARY KEY,
864
863
  sequence TEXT NOT NULL
865
864
  )
866
- """
867
- )
865
+ """)
868
866
  print("Database created successfully.")
869
867
 
870
868
  # Estimate number of records for progress bar
@@ -25,9 +25,7 @@ def get_ko2gene_df(db: str, ko: str | list[str] | None = None) -> pd.DataFrame:
25
25
  query = (
26
26
  f"SELECT gene,ko FROM gene_to_ko WHERE ko IN ({','.join('?' * len(ko))})"
27
27
  )
28
- result_df = pd.read_sql_query(
29
- query, conn, params=ko # type:ignore
30
- )
28
+ result_df = pd.read_sql_query(query, conn, params=ko) # type: ignore
31
29
  else:
32
30
  query = f"SELECT gene,ko FROM gene_to_ko"
33
31
  result_df = pd.read_sql_query(query, conn)
@@ -409,10 +409,10 @@ class PDBFolderProcessor:
409
409
  def _get_pdb_files(self) -> list[str]:
410
410
  """
411
411
  Get a list of PDB files in the specified directory, optionally filtered by ID set.
412
- Files are sorted by creation time to ensure consistent processing order.
412
+ Files are sorted alphabetically to ensure consistent, reproducible processing order.
413
413
 
414
414
  Returns:
415
- List of PDB file names sorted by creation time.
415
+ List of PDB file names sorted alphabetically.
416
416
  """
417
417
  print("Scanning directory for PDB files...")
418
418
  pdb_files = [
@@ -424,8 +424,8 @@ class PDBFolderProcessor:
424
424
  f for f in pdb_files if self._extract_id_from_filename(f) in self.id_set
425
425
  ]
426
426
 
427
- # Sort files by creation time
428
- pdb_files.sort(key=lambda f: os.path.getctime(os.path.join(self.pdb_dir, f)))
427
+ # Sort files alphabetically for deterministic, reproducible order
428
+ pdb_files.sort()
429
429
 
430
430
  print(f"Found {len(pdb_files)} PDB files")
431
431
  return pdb_files
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dayhoff-tools
3
- Version: 1.14.15
3
+ Version: 1.15.1
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -40,8 +40,8 @@ Requires-Dist: sqlalchemy (>=2.0.40,<3.0.0) ; extra == "full"
40
40
  Requires-Dist: toml (>=0.10)
41
41
  Requires-Dist: tqdm (>=4.67.1) ; extra == "embedders"
42
42
  Requires-Dist: tqdm (>=4.67.1) ; extra == "full"
43
- Requires-Dist: transformers (==4.36.2) ; extra == "full"
44
43
  Requires-Dist: transformers (>=4.36.2) ; extra == "embedders"
44
+ Requires-Dist: transformers (>=4.36.2) ; extra == "full"
45
45
  Requires-Dist: typer (>=0.9.0)
46
46
  Requires-Dist: tzdata (>=2025.2)
47
47
  Description-Content-Type: text/markdown
@@ -2,7 +2,7 @@ dayhoff_tools/__init__.py,sha256=M5zThPyEBRYa5CfwlzKhcqTevWn3OKu62cjV6Zqie2A,469
2
2
  dayhoff_tools/batch/__init__.py,sha256=N7v1pUg3rp68W6J7cX2Gssxdfp57Z7G0WeFJh7gKeiM,163
3
3
  dayhoff_tools/batch/workers/__init__.py,sha256=DJTtiBSE5k9J6qwG_MsXDn8hgz_lvEdaMwqKa6YyqXo,369
4
4
  dayhoff_tools/batch/workers/base.py,sha256=ZveUds6GW-aInj9LL0_VEYkXxOyZWRcOeJdacnEb8xo,5346
5
- dayhoff_tools/batch/workers/boltz.py,sha256=breCGITkX-_Xj9I_C1R-mKkG9RUCzSw3Kcdbbx_S5LA,18405
5
+ dayhoff_tools/batch/workers/boltz.py,sha256=-_rgUvKcYfrCgBLpM-eIgWXvne6UJa2MqUFyllNRLUc,18397
6
6
  dayhoff_tools/batch/workers/embed_t5.py,sha256=A5WqsQa1WZ7_la5X5wt0XUP-VwOglH04WyEINhwipeY,2750
7
7
  dayhoff_tools/chemistry/standardizer.py,sha256=uMn7VwHnx02nc404eO6fRuS4rsl4dvSPf2ElfZDXEpY,11188
8
8
  dayhoff_tools/chemistry/utils.py,sha256=jt-7JgF-GeeVC421acX-bobKbLU_X94KNOW24p_P-_M,2257
@@ -11,16 +11,16 @@ dayhoff_tools/cli/batch/__init__.py,sha256=0QMxkelGUdvdd7-P-WG8YX2h0qjkKKb-0JbEH
11
11
  dayhoff_tools/cli/batch/aws_batch.py,sha256=DOp8KvmTrie15O61DP1HT13PSr3s5imxM-VZHvaLO7Q,15174
12
12
  dayhoff_tools/cli/batch/commands/__init__.py,sha256=1xRzzL_mc1hz1Pv0OWNr-g6fkL5XbEsOTGHzrqddLCA,458
13
13
  dayhoff_tools/cli/batch/commands/boltz.py,sha256=N0LksmtOpkvnEsR0SAUHxtksKPAsQjR83h3Cis14nz4,12085
14
- dayhoff_tools/cli/batch/commands/cancel.py,sha256=kjvmCcFaMShyHfQjvR4WlII4njg4Fm4uffpWcY1qRWg,5299
15
- dayhoff_tools/cli/batch/commands/clean.py,sha256=nWOKbVM2nDuLMpyC038Q9aylOQxk2bq4N0JF65qJg-s,4570
16
- dayhoff_tools/cli/batch/commands/embed_t5.py,sha256=QXFydAw0wndevdzXF1cxikxMmvn1BuQ5p9lwutQFajU,11453
17
- dayhoff_tools/cli/batch/commands/finalize.py,sha256=OQUF9RiO8S55SCeQcFqExLKjYd-leL0Z_FOV0xMg7Dw,13497
14
+ dayhoff_tools/cli/batch/commands/cancel.py,sha256=GDrfidzmIB9FFTpi3Jjzgp3NCbVvbIhamo3lCfjQemI,5343
15
+ dayhoff_tools/cli/batch/commands/clean.py,sha256=0EhpgG9ohsegDumUYhgKlWdIcPfKnEJfK2CnfT9azjk,4606
16
+ dayhoff_tools/cli/batch/commands/embed_t5.py,sha256=FfuiMgkw0BMmxD5rw78G-U0Sn_goK4F6E_ItSYp9pr8,11452
17
+ dayhoff_tools/cli/batch/commands/finalize.py,sha256=vSruaD8rMZFd7VRvtNjJ0gXR0239jxZfmwrgC3WglCU,13666
18
18
  dayhoff_tools/cli/batch/commands/list_jobs.py,sha256=COfxZddDVUAHeTayNAB3ruYNhgrE3osgFxY2qzf33cg,4284
19
19
  dayhoff_tools/cli/batch/commands/local.py,sha256=dZeKhNakaM1jS-EoByAwg1nWspRRoOmYzcwzjEKBaIA,3226
20
20
  dayhoff_tools/cli/batch/commands/logs.py,sha256=ctgJksdzFmqBdD18ePPsZe2BpuJYtHz2xAaMPnUplmQ,5293
21
21
  dayhoff_tools/cli/batch/commands/retry.py,sha256=JgOMiwuESNG4Wp_fQ_vxy1RwyOZPt_kmVbLYdxYTVBY,9897
22
- dayhoff_tools/cli/batch/commands/status.py,sha256=4kfii7mgtJKXWwZDsR4mtb_dQgCdryGkGEjNOpDxTT0,13404
23
- dayhoff_tools/cli/batch/commands/submit.py,sha256=AXbvSReN8fLlzR5swE81pH7yvbCC1GUMCbsDrfoHAws,6786
22
+ dayhoff_tools/cli/batch/commands/status.py,sha256=Oneci0Wv4dfF5SoThSFyE9BTqaN_u9yhGSecC7x0rd8,13512
23
+ dayhoff_tools/cli/batch/commands/submit.py,sha256=ykbbXniMzgKpY0iRmrGxwOU6e4-lPXkfyJ_HQ7AyTik,6785
24
24
  dayhoff_tools/cli/batch/job_id.py,sha256=mqr8SwcPlUWIYLaR_C4kACmL2ZFK8jaddd7B-45-XaQ,4246
25
25
  dayhoff_tools/cli/batch/manifest.py,sha256=eLyiOFXonAUh5rfHcXyR2CzmRKXQz9-tTbJcWnVbbmE,8857
26
26
  dayhoff_tools/cli/cloud_commands.py,sha256=xrWQZm48e09GTh8RmYh3JqXUzW8mMy07dHk_77LqZx8,41199
@@ -48,9 +48,9 @@ dayhoff_tools/cli/engines_studios/simulators/studio_status_simulator.py,sha256=6
48
48
  dayhoff_tools/cli/engines_studios/ssh_config.py,sha256=UCv-jf_zSuk7FUStkCQBAJz1QkxiSEwZbdSrwt_9SMU,2932
49
49
  dayhoff_tools/cli/engines_studios/studio_commands.py,sha256=KGSNZQS8MmM_DfQzT9SRZvuR3OK6NdIdOrqI2wJFyes,25984
50
50
  dayhoff_tools/cli/github_commands.py,sha256=pfrxI68LObGm_gtPlQN-gHPahHV4l9k9T4GqO99NNL0,8948
51
- dayhoff_tools/cli/main.py,sha256=6ffnaFzui-bVd1ME7yThk_ZrMOofwStamEUkkYlminY,8503
51
+ dayhoff_tools/cli/main.py,sha256=E4UXtOBRRAhcrJ3RgLCQFpJdamFN-p9xViMkX45kNgw,6456
52
52
  dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
53
- dayhoff_tools/cli/utility_commands.py,sha256=O6vy3rONTeuPYZyhjnFeqf8GxUlyc7i2O11d1s3shH4,45513
53
+ dayhoff_tools/cli/utility_commands.py,sha256=Zaib879sJ8xD-RV42dSaaxdVC_lP6mLSJ-yTF5eU-pQ,9832
54
54
  dayhoff_tools/deployment/base.py,sha256=uZnFvnPQx6pH_HmJbdThweAs3BrxMaDohpE3iX_-yk4,18377
55
55
  dayhoff_tools/deployment/deploy_aws.py,sha256=1j16aE4hmln4pQVtcSGuIGVWbOBfWwveytvihjofADo,21519
56
56
  dayhoff_tools/deployment/deploy_gcp.py,sha256=xgaOVsUDmP6wSEMYNkm1yRNcVskfdz80qJtCulkBIAM,8860
@@ -58,20 +58,20 @@ dayhoff_tools/deployment/deploy_utils.py,sha256=KyUFZZWn8NGT9QpR0HGqkX-huOFubvYC
58
58
  dayhoff_tools/deployment/job_runner.py,sha256=hljvFpH2Bw96uYyUup5Ths72PZRL_X27KxlYzBMgguo,5086
59
59
  dayhoff_tools/deployment/processors.py,sha256=LM0CQbr4XCb3AtLbrcuDQm4tYPXsoNqgVJ4WQYDjzJc,12406
60
60
  dayhoff_tools/deployment/swarm.py,sha256=YJfvVOcAS8cYcIj2fiN4qwC2leh0I9w5A4px8ZWSF6g,22833
61
- dayhoff_tools/embedders.py,sha256=1THnmio4FYkBswy_xkIiwT-ZOEMn6ZLbTAa-Uz0-kyE,36615
62
- dayhoff_tools/fasta.py,sha256=USdemH4c_dNhWXOTAhldvlDi8eLHogsy0YSrOnODB5I,50773
61
+ dayhoff_tools/embedders.py,sha256=4o49Zm0gPxGcBcofTCZMC_6IUaOOTbTY2xmVadrX_ss,36617
62
+ dayhoff_tools/fasta.py,sha256=tmuQ8BGpMqaDF9pJCro8roqpQsD9qeTnodNvXJmgH9w,50751
63
63
  dayhoff_tools/file_ops.py,sha256=JlGowvr-CUJFidV-4g_JmhUTN9bsYuaxtqKmnKomm-Q,8506
64
64
  dayhoff_tools/h5.py,sha256=j1nxxaiHsMidVX_XwB33P1Pz9d7K8ZKiDZwJWQUUQSY,21158
65
65
  dayhoff_tools/intake/gcp.py,sha256=uCeEskhbEwJIYpN6ne6siT1dbpTizCjjel-hRe0kReE,3030
66
66
  dayhoff_tools/intake/gtdb.py,sha256=58JNLWpr3LkXiIq-ubAcEFeR5DyN9YrA-YEoQI_FzlA,10608
67
- dayhoff_tools/intake/kegg.py,sha256=SaVbumB4leNTSevamT29yIqHurejw1wmcCC32D5Qyco,965
67
+ dayhoff_tools/intake/kegg.py,sha256=Cv1ar9X2aFd25VDqn-3zMZ0epgtMc-uFKUIYgvwxC6U,944
68
68
  dayhoff_tools/intake/mmseqs.py,sha256=uEYzRsthJAlUeRYNCfFtJFE73SbuhfUIS1ygYFkhmtw,6435
69
- dayhoff_tools/intake/structure.py,sha256=ufN3gAodQxhnt7psK1VTQeu9rKERmo_PhoxIbB4QKMw,27660
69
+ dayhoff_tools/intake/structure.py,sha256=d-H9UFSJSfdp1zzp_F0lDBrMdCv3yQXl6HOVCE16u3E,27645
70
70
  dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJqE4,16456
71
71
  dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
72
72
  dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
73
73
  dayhoff_tools/warehouse.py,sha256=UETBtZD3r7WgvURqfGbyHlT7cxoiVq8isjzMuerKw8I,24475
74
- dayhoff_tools-1.14.15.dist-info/METADATA,sha256=YBP4lpBDAIhxYuJ65DR_ADrajk6hyLC1e_UCVc_bwx8,3136
75
- dayhoff_tools-1.14.15.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
76
- dayhoff_tools-1.14.15.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
77
- dayhoff_tools-1.14.15.dist-info/RECORD,,
74
+ dayhoff_tools-1.15.1.dist-info/METADATA,sha256=QSQwSam7GhLoduhoXHgrPYJnECe74Jym7LntG3tQou4,3135
75
+ dayhoff_tools-1.15.1.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
76
+ dayhoff_tools-1.15.1.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
77
+ dayhoff_tools-1.15.1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.3.0
2
+ Generator: poetry-core 2.3.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any