arbor-ai 0.1.9__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arbor/server/services/grpo_manager.py +15 -0
- {arbor_ai-0.1.9.dist-info → arbor_ai-0.1.10.dist-info}/METADATA +2 -1
- {arbor_ai-0.1.9.dist-info → arbor_ai-0.1.10.dist-info}/RECORD +7 -7
- {arbor_ai-0.1.9.dist-info → arbor_ai-0.1.10.dist-info}/WHEEL +0 -0
- {arbor_ai-0.1.9.dist-info → arbor_ai-0.1.10.dist-info}/entry_points.txt +0 -0
- {arbor_ai-0.1.9.dist-info → arbor_ai-0.1.10.dist-info}/licenses/LICENSE +0 -0
- {arbor_ai-0.1.9.dist-info → arbor_ai-0.1.10.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,7 @@ import json
|
|
2
2
|
import os
|
3
3
|
import random
|
4
4
|
import signal
|
5
|
+
import socket
|
5
6
|
import string
|
6
7
|
import subprocess
|
7
8
|
import sys
|
@@ -120,12 +121,17 @@ class GRPOManager:
|
|
120
121
|
|
121
122
|
num_processes = self.settings.arbor_config.training.gpu_ids.count(",") + 1
|
122
123
|
|
124
|
+
# This is the port for the accelerate main process
|
125
|
+
main_process_port = get_free_port()
|
126
|
+
|
123
127
|
params = [
|
124
128
|
"python",
|
125
129
|
"-m",
|
126
130
|
"accelerate.commands.launch",
|
127
131
|
"--num_processes",
|
128
132
|
str(num_processes),
|
133
|
+
"--main_process_port",
|
134
|
+
str(main_process_port),
|
129
135
|
]
|
130
136
|
if self.settings.arbor_config.training.accelerate_config:
|
131
137
|
params.extend(
|
@@ -328,3 +334,12 @@ class GRPOManager:
|
|
328
334
|
# >= self.train_kwargs["update_interval"]
|
329
335
|
# )
|
330
336
|
return self.model_saved_and_reload_requested
|
337
|
+
|
338
|
+
|
339
|
+
def get_free_port() -> int:
|
340
|
+
"""
|
341
|
+
Return a free TCP port on localhost.
|
342
|
+
"""
|
343
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
344
|
+
s.bind(("localhost", 0))
|
345
|
+
return s.getsockname()[1]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: arbor-ai
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.10
|
4
4
|
Summary: A framework for fine-tuning and managing language models
|
5
5
|
Author-email: Noah Ziems <nziems2@nd.edu>
|
6
6
|
Project-URL: Homepage, https://github.com/Ziems/arbor
|
@@ -57,6 +57,7 @@ inference:
|
|
57
57
|
training:
|
58
58
|
gpu_ids: '1, 2'
|
59
59
|
```
|
60
|
+
Which will use the `GPU:0` for inference with `GPU:1` and `GPU:2` reserved for training. We generally recommend splitting the GPUs roughly evenly between inference and training.
|
60
61
|
|
61
62
|
### 2️⃣ Start the Server
|
62
63
|
|
@@ -17,7 +17,7 @@ arbor/server/core/logging.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
17
17
|
arbor/server/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
18
|
arbor/server/services/dependencies.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
19
|
arbor/server/services/file_manager.py,sha256=Z9z4A4EzvPauid_DBfpim401DDtuJy_TbX4twTWDJWI,12119
|
20
|
-
arbor/server/services/grpo_manager.py,sha256=
|
20
|
+
arbor/server/services/grpo_manager.py,sha256=50g90lV8qpol7fQp2SBTXUCrF5eOP8YdxDnMLM0XY0E,13311
|
21
21
|
arbor/server/services/inference_manager.py,sha256=gHI-Biy3TtGkyWxIDKY-uqZZm_fiQJLktkPY8ezRvo8,9660
|
22
22
|
arbor/server/services/job_manager.py,sha256=m_d4UPwN_82f7t7K443DaFpFoyv7JZSZKml8tawt1Bk,2186
|
23
23
|
arbor/server/services/training_manager.py,sha256=oQdhpfxdgp_lCTb_lxhvjupdLrcg6HL3TEbct_q9F6I,21065
|
@@ -26,9 +26,9 @@ arbor/server/services/comms/comms.py,sha256=3KN3mzwPvfW2_L5hq02JdAk6yOMyhY0_pBz-
|
|
26
26
|
arbor/server/services/scripts/grpo_training.py,sha256=V36pCMZDJj2DdzquxScOddi9zP8EVPGWN3HGiftFfrY,21082
|
27
27
|
arbor/server/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
28
28
|
arbor/server/utils/helpers.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
29
|
-
arbor_ai-0.1.
|
30
|
-
arbor_ai-0.1.
|
31
|
-
arbor_ai-0.1.
|
32
|
-
arbor_ai-0.1.
|
33
|
-
arbor_ai-0.1.
|
34
|
-
arbor_ai-0.1.
|
29
|
+
arbor_ai-0.1.10.dist-info/licenses/LICENSE,sha256=5vFGrbOFeXXM83JV9o16w7ohH4WLeu3-57GocJSz8ow,1067
|
30
|
+
arbor_ai-0.1.10.dist-info/METADATA,sha256=qnUBfdKczxenG5kPTcZgQVMnWimEUPExz7nONxBYpDQ,2413
|
31
|
+
arbor_ai-0.1.10.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
|
32
|
+
arbor_ai-0.1.10.dist-info/entry_points.txt,sha256=PGBX-MfNwfIl8UPFgsX3gjtXLqSogRhOktKMpZUysD0,40
|
33
|
+
arbor_ai-0.1.10.dist-info/top_level.txt,sha256=jzWdp3BRYqvZDMFsPajrcftvvlluzVDErkD8IMRfhYs,6
|
34
|
+
arbor_ai-0.1.10.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|