dayhoff-tools 1.1.42__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/PKG-INFO +1 -1
  2. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/README.md +0 -0
  3. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/__init__.py +0 -0
  4. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/chemistry/standardizer.py +0 -0
  5. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/chemistry/utils.py +0 -0
  6. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/cli/__init__.py +0 -0
  7. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/cli/cloud_commands.py +0 -0
  8. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/cli/main.py +0 -0
  9. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/cli/swarm_commands.py +0 -0
  10. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/cli/utility_commands.py +0 -0
  11. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/deployment/base.py +2 -2
  12. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/deployment/deploy_aws.py +0 -0
  13. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
  14. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/deployment/job_runner.py +0 -0
  15. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/deployment/processors.py +0 -0
  16. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/deployment/swarm.py +47 -12
  17. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/embedders.py +0 -0
  18. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/fasta.py +0 -0
  19. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/file_ops.py +0 -0
  20. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/h5.py +0 -0
  21. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/intake/gcp.py +0 -0
  22. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/intake/gtdb.py +0 -0
  23. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/intake/kegg.py +0 -0
  24. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/intake/mmseqs.py +0 -0
  25. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/intake/structure.py +0 -0
  26. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/intake/uniprot.py +0 -0
  27. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/logs.py +0 -0
  28. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/sqlite.py +0 -0
  29. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/warehouse.py +0 -0
  30. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/pyproject.toml +1 -1
  31. {dayhoff_tools-1.1.42 → dayhoff_tools-1.2.0}/dayhoff_tools/deployment/deploy_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dayhoff-tools
3
- Version: 1.1.42
3
+ Version: 1.2.0
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
File without changes
@@ -281,7 +281,7 @@ def run_container(config: dict, image_uri: str, mode: str) -> None:
281
281
  4. Handles container logs for detached mode
282
282
 
283
283
  The container name is generated using:
284
- - Username (from LOCAL_USER env var)
284
+ - Username (from LOCAL_USER or USER env var)
285
285
  - Timestamp (YYYYMMDD_HHMMSS format)
286
286
 
287
287
  Args:
@@ -299,7 +299,7 @@ def run_container(config: dict, image_uri: str, mode: str) -> None:
299
299
  )
300
300
 
301
301
  # Generate unique container name
302
- username = os.getenv("LOCAL_USER", "unknown_user")
302
+ username = os.getenv("LOCAL_USER") or os.getenv("USER", "unknown_user")
303
303
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
304
304
  container_name = f"{username}_job_{timestamp}"
305
305
 
@@ -128,23 +128,58 @@ def publish_cards(
128
128
  names: List[str],
129
129
  firestore_collection: str,
130
130
  ):
131
- """Publish cards to Firebase. Expects a list of filenames (not full paths),
132
- which will each be published as a new document in the collection."""
131
+ """Publish cards to Firebase using batch writes for optimal performance.
132
+
133
+ Expects a list of filenames (not full paths), which will each be published
134
+ as a new document in the collection. Uses Firestore batch writes to minimize
135
+ network round-trips and improve performance.
136
+
137
+ Args:
138
+ names: List of packet filenames to publish as cards
139
+ firestore_collection: Name of the Firestore collection to write to
140
+ """
141
+ if not names:
142
+ print("No cards to upload.")
143
+ return
133
144
 
134
145
  initialize_firebase()
135
- collection = firestore.client().collection(firestore_collection)
146
+ db = firestore.client()
147
+ collection = db.collection(firestore_collection)
148
+
149
+ # Firestore batch limit is 500 operations
150
+ BATCH_SIZE = 500
151
+ total_cards = len(names)
152
+ cards_processed = 0
153
+
154
+ # Process names in batches of up to 500
155
+ for i in range(0, total_cards, BATCH_SIZE):
156
+ batch = db.batch()
157
+ batch_names = names[i : i + BATCH_SIZE]
158
+
159
+ # Add all operations for this batch
160
+ for name in batch_names:
161
+ doc_ref = collection.document() # Auto-generate document ID
162
+ batch.set(
163
+ doc_ref,
164
+ {
165
+ "status": "available",
166
+ "packet_filename": name,
167
+ "created": datetime.now(ZoneInfo("America/Los_Angeles")),
168
+ },
169
+ )
136
170
 
137
- for name in names:
138
- collection.document().set(
139
- {
140
- "status": "available",
141
- "packet_filename": name,
142
- "created": datetime.now(ZoneInfo("America/Los_Angeles")),
143
- }
171
+ # Commit the entire batch atomically
172
+ batch.commit()
173
+ cards_processed += len(batch_names)
174
+
175
+ print(
176
+ f"Batch {i // BATCH_SIZE + 1}: Created {len(batch_names)} cards "
177
+ f"({cards_processed}/{total_cards} total)"
144
178
  )
145
- print(f"Creating card {name}")
146
179
 
147
- print(f"Uploaded {len(names)} cards.")
180
+ print(
181
+ f"Successfully uploaded {total_cards} cards in {(total_cards + BATCH_SIZE - 1) // BATCH_SIZE} batch(es)."
182
+ )
148
183
 
149
184
 
150
185
  @transactional
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
5
5
 
6
6
  [project]
7
7
  name = "dayhoff-tools"
8
- version = "1.1.42"
8
+ version = "1.2.0"
9
9
  description = "Common tools for all the repos at Dayhoff Labs"
10
10
  authors = [
11
11
  {name = "Daniel Martin-Alarcon", email = "dma@dayhofflabs.com"}