@synapseia-network/node 0.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +105 -0
- package/README.md +232 -0
- package/dist/bid-responder-Q725ZIUC.js +86 -0
- package/dist/bootstrap.js +22 -0
- package/dist/chain-info-lightweight-2UWAQZBF.js +303 -0
- package/dist/chat-stream-handler-BSHSGMFF.js +127 -0
- package/dist/chunk-2X7MSWD4.js +270 -0
- package/dist/chunk-3BHRQWSM.js +531 -0
- package/dist/chunk-5QFTU52A.js +442 -0
- package/dist/chunk-5ZAJBIAV.js +25 -0
- package/dist/chunk-7FLDR5NT.js +186 -0
- package/dist/chunk-C5XRYLYP.js +137 -0
- package/dist/chunk-D7ADMHK2.js +36 -0
- package/dist/chunk-DXUYWRO7.js +23 -0
- package/dist/chunk-F5UDK56Z.js +289 -0
- package/dist/chunk-NEHR6XY7.js +111 -0
- package/dist/chunk-NMJVODKH.js +453 -0
- package/dist/chunk-PRVT22SM.js +324 -0
- package/dist/chunk-T2ZRG5CX.js +1380 -0
- package/dist/chunk-V2L5SXTL.js +88 -0
- package/dist/chunk-XL2NJWFY.js +702 -0
- package/dist/embedding-C6GE3WVM.js +16 -0
- package/dist/hardware-ITQQJ5YI.js +37 -0
- package/dist/index.js +16836 -0
- package/dist/inference-server-CIGRJ36H.js +25 -0
- package/dist/local-cors-J6RWNMMD.js +44 -0
- package/dist/model-catalog-C53SDFMG.js +15 -0
- package/dist/model-discovery-LA6YMT3I.js +10 -0
- package/dist/ollama-XVXA3A37.js +9 -0
- package/dist/rewards-vault-cli-HW7H4EMD.js +147 -0
- package/dist/scripts/create_nodes.sh +6 -0
- package/dist/scripts/diloco_train.py +319 -0
- package/dist/scripts/train_lora.py +237 -0
- package/dist/scripts/train_micro.py +586 -0
- package/dist/trainer-HQMV2ZAR.js +21 -0
- package/package.json +128 -0
- package/scripts/create_nodes.sh +6 -0
- package/scripts/diloco_train.py +319 -0
- package/scripts/train_lora.py +237 -0
- package/scripts/train_micro.py +586 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { fileURLToPath as __synFup } from "url";import { dirname as __synDn } from "path";const __filename = __synFup(import.meta.url);const __dirname = __synDn(__filename);
|
|
2
|
+
import {
|
|
3
|
+
InferenceServerHelper,
|
|
4
|
+
forwardToOllama,
|
|
5
|
+
handleChatCompletions,
|
|
6
|
+
handleHealth,
|
|
7
|
+
handleState,
|
|
8
|
+
parseBody,
|
|
9
|
+
startInferenceServer,
|
|
10
|
+
transformToOpenAI
|
|
11
|
+
} from "./chunk-F5UDK56Z.js";
|
|
12
|
+
import "./chunk-NEHR6XY7.js";
|
|
13
|
+
import "./chunk-5ZAJBIAV.js";
|
|
14
|
+
import "./chunk-V2L5SXTL.js";
|
|
15
|
+
import "./chunk-D7ADMHK2.js";
|
|
16
|
+
export {
|
|
17
|
+
InferenceServerHelper,
|
|
18
|
+
forwardToOllama,
|
|
19
|
+
handleChatCompletions,
|
|
20
|
+
handleHealth,
|
|
21
|
+
handleState,
|
|
22
|
+
parseBody,
|
|
23
|
+
startInferenceServer,
|
|
24
|
+
transformToOpenAI
|
|
25
|
+
};
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { fileURLToPath as __synFup } from "url";import { dirname as __synDn } from "path";const __filename = __synFup(import.meta.url);const __dirname = __synDn(__filename);
|
|
2
|
+
import {
|
|
3
|
+
__name
|
|
4
|
+
} from "./chunk-D7ADMHK2.js";
|
|
5
|
+
|
|
6
|
+
// src/shared/local-cors.ts
|
|
7
|
+
var ORIGIN_ALLOWLIST = [
|
|
8
|
+
/^https?:\/\/localhost(:\d+)?$/i,
|
|
9
|
+
/^https?:\/\/127\.0\.0\.1(:\d+)?$/i,
|
|
10
|
+
/^https?:\/\/\[::1\](:\d+)?$/i,
|
|
11
|
+
/^tauri:\/\/localhost$/i
|
|
12
|
+
];
|
|
13
|
+
var ALLOWED_METHODS = "GET, POST, OPTIONS";
|
|
14
|
+
var ALLOWED_HEADERS = "Content-Type, X-Public-Key, X-Peer-Id, X-Signature, X-Timestamp, Authorization";
|
|
15
|
+
function isAllowedLocalOrigin(origin) {
|
|
16
|
+
if (!origin) return false;
|
|
17
|
+
return ORIGIN_ALLOWLIST.some((re) => re.test(origin));
|
|
18
|
+
}
|
|
19
|
+
__name(isAllowedLocalOrigin, "isAllowedLocalOrigin");
|
|
20
|
+
function applyLocalCors(req, res) {
|
|
21
|
+
const origin = req.headers["origin"] ?? void 0;
|
|
22
|
+
if (origin && isAllowedLocalOrigin(origin)) {
|
|
23
|
+
res.setHeader("Access-Control-Allow-Origin", origin);
|
|
24
|
+
res.setHeader("Vary", "Origin");
|
|
25
|
+
res.setHeader("Access-Control-Allow-Methods", ALLOWED_METHODS);
|
|
26
|
+
res.setHeader("Access-Control-Allow-Headers", ALLOWED_HEADERS);
|
|
27
|
+
}
|
|
28
|
+
if (req.method === "OPTIONS") {
|
|
29
|
+
if (origin && !isAllowedLocalOrigin(origin)) {
|
|
30
|
+
res.writeHead(403);
|
|
31
|
+
res.end("CORS origin not allowed");
|
|
32
|
+
return true;
|
|
33
|
+
}
|
|
34
|
+
res.writeHead(204);
|
|
35
|
+
res.end();
|
|
36
|
+
return true;
|
|
37
|
+
}
|
|
38
|
+
return false;
|
|
39
|
+
}
|
|
40
|
+
__name(applyLocalCors, "applyLocalCors");
|
|
41
|
+
export {
|
|
42
|
+
applyLocalCors,
|
|
43
|
+
isAllowedLocalOrigin
|
|
44
|
+
};
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { fileURLToPath as __synFup } from "url";import { dirname as __synDn } from "path";const __filename = __synFup(import.meta.url);const __dirname = __synDn(__filename);
|
|
2
|
+
import {
|
|
3
|
+
CLOUD_MODELS,
|
|
4
|
+
FULL_CATALOG,
|
|
5
|
+
MODEL_CATALOG,
|
|
6
|
+
ModelCatalogHelper
|
|
7
|
+
} from "./chunk-PRVT22SM.js";
|
|
8
|
+
import "./chunk-V2L5SXTL.js";
|
|
9
|
+
import "./chunk-D7ADMHK2.js";
|
|
10
|
+
export {
|
|
11
|
+
CLOUD_MODELS,
|
|
12
|
+
FULL_CATALOG,
|
|
13
|
+
MODEL_CATALOG,
|
|
14
|
+
ModelCatalogHelper
|
|
15
|
+
};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { fileURLToPath as __synFup } from "url";import { dirname as __synDn } from "path";const __filename = __synFup(import.meta.url);const __dirname = __synDn(__filename);
|
|
2
|
+
import {
|
|
3
|
+
ModelDiscovery
|
|
4
|
+
} from "./chunk-7FLDR5NT.js";
|
|
5
|
+
import "./chunk-PRVT22SM.js";
|
|
6
|
+
import "./chunk-V2L5SXTL.js";
|
|
7
|
+
import "./chunk-D7ADMHK2.js";
|
|
8
|
+
export {
|
|
9
|
+
ModelDiscovery
|
|
10
|
+
};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { fileURLToPath as __synFup } from "url";import { dirname as __synDn } from "path";const __filename = __synFup(import.meta.url);const __dirname = __synDn(__filename);
|
|
2
|
+
import {
|
|
3
|
+
OllamaHelper
|
|
4
|
+
} from "./chunk-2X7MSWD4.js";
|
|
5
|
+
import "./chunk-V2L5SXTL.js";
|
|
6
|
+
import "./chunk-D7ADMHK2.js";
|
|
7
|
+
export {
|
|
8
|
+
OllamaHelper
|
|
9
|
+
};
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import { fileURLToPath as __synFup } from "url";import { dirname as __synDn } from "path";const __filename = __synFup(import.meta.url);const __dirname = __synDn(__filename);
|
|
2
|
+
import {
|
|
3
|
+
loadWalletWithPassword,
|
|
4
|
+
sendAndConfirmFresh
|
|
5
|
+
} from "./chunk-XL2NJWFY.js";
|
|
6
|
+
import {
|
|
7
|
+
init_logger,
|
|
8
|
+
logger_default
|
|
9
|
+
} from "./chunk-V2L5SXTL.js";
|
|
10
|
+
import {
|
|
11
|
+
__name
|
|
12
|
+
} from "./chunk-D7ADMHK2.js";
|
|
13
|
+
|
|
14
|
+
// src/modules/rewards/rewards-vault-cli.ts
|
|
15
|
+
init_logger();
|
|
16
|
+
import { Connection, PublicKey, Transaction, TransactionInstruction, ComputeBudgetProgram } from "@solana/web3.js";
|
|
17
|
+
import { getAssociatedTokenAddress, createAssociatedTokenAccountInstruction, TOKEN_PROGRAM_ID, ASSOCIATED_TOKEN_PROGRAM_ID } from "@solana/spl-token";
|
|
18
|
+
var DEFAULT_CU_LIMIT = 14e5;
|
|
19
|
+
var DEFAULT_CU_PRICE_MICROLAMPORTS = 1e4;
|
|
20
|
+
var REWARDS_VAULT_PROGRAM_ID_DEFAULT = "D9pkzWv2Ak9J8vXDVcMM1P51hDmjRJEwbuYHxCuJKTEN";
|
|
21
|
+
var SYN_TOKEN_MINT_DEFAULT = "DCdWHhoeEwHJ3Fy3DRTk4yvZPXq3mSNZKtbPJzUfpUh8";
|
|
22
|
+
function getRewardsVaultProgramId() {
|
|
23
|
+
return new PublicKey(process.env.REWARDS_VAULT_PROGRAM_ID ?? REWARDS_VAULT_PROGRAM_ID_DEFAULT);
|
|
24
|
+
}
|
|
25
|
+
__name(getRewardsVaultProgramId, "getRewardsVaultProgramId");
|
|
26
|
+
function getSynMint() {
|
|
27
|
+
return new PublicKey(process.env.SYN_TOKEN_MINT ?? SYN_TOKEN_MINT_DEFAULT);
|
|
28
|
+
}
|
|
29
|
+
__name(getSynMint, "getSynMint");
|
|
30
|
+
function getSolanaRpcUrl() {
|
|
31
|
+
return process.env.SOLANA_RPC_URL ?? "https://api.devnet.solana.com";
|
|
32
|
+
}
|
|
33
|
+
__name(getSolanaRpcUrl, "getSolanaRpcUrl");
|
|
34
|
+
function deriveVaultStatePDA(programId) {
|
|
35
|
+
return PublicKey.findProgramAddressSync([
|
|
36
|
+
Buffer.from("vault_state")
|
|
37
|
+
], programId)[0];
|
|
38
|
+
}
|
|
39
|
+
__name(deriveVaultStatePDA, "deriveVaultStatePDA");
|
|
40
|
+
function deriveRewardAccountPDA(owner, programId) {
|
|
41
|
+
return PublicKey.findProgramAddressSync([
|
|
42
|
+
Buffer.from("reward_account"),
|
|
43
|
+
owner.toBuffer()
|
|
44
|
+
], programId)[0];
|
|
45
|
+
}
|
|
46
|
+
__name(deriveRewardAccountPDA, "deriveRewardAccountPDA");
|
|
47
|
+
function deriveTreasuryAuthorityPDA(programId) {
|
|
48
|
+
return PublicKey.findProgramAddressSync([
|
|
49
|
+
Buffer.from("rewards_treasury")
|
|
50
|
+
], programId)[0];
|
|
51
|
+
}
|
|
52
|
+
__name(deriveTreasuryAuthorityPDA, "deriveTreasuryAuthorityPDA");
|
|
53
|
+
function createClaimRewardsInstructionData() {
|
|
54
|
+
return Buffer.from([
|
|
55
|
+
4,
|
|
56
|
+
144,
|
|
57
|
+
132,
|
|
58
|
+
71,
|
|
59
|
+
116,
|
|
60
|
+
23,
|
|
61
|
+
151,
|
|
62
|
+
80
|
|
63
|
+
]);
|
|
64
|
+
}
|
|
65
|
+
__name(createClaimRewardsInstructionData, "createClaimRewardsInstructionData");
|
|
66
|
+
async function claimWorkOrderRewards() {
|
|
67
|
+
const programId = getRewardsVaultProgramId();
|
|
68
|
+
const synMint = getSynMint();
|
|
69
|
+
const connection = new Connection(getSolanaRpcUrl(), "confirmed");
|
|
70
|
+
const wallet = await loadWalletWithPassword();
|
|
71
|
+
const owner = wallet.publicKey;
|
|
72
|
+
const rewardAccount = deriveRewardAccountPDA(owner, programId);
|
|
73
|
+
const rewardInfo = await connection.getAccountInfo(rewardAccount, "confirmed");
|
|
74
|
+
if (!rewardInfo || rewardInfo.data.length < 8 + 32 + 8) {
|
|
75
|
+
throw new Error("No reward account on-chain yet. Run your node and earn rewards first.");
|
|
76
|
+
}
|
|
77
|
+
const unclaimed = Number(rewardInfo.data.readBigUInt64LE(8 + 32)) / 1e9;
|
|
78
|
+
if (unclaimed === 0) {
|
|
79
|
+
throw new Error("Claimable balance is zero \u2014 nothing to claim.");
|
|
80
|
+
}
|
|
81
|
+
logger_default.log(`Claimable balance: ${unclaimed} SYN`);
|
|
82
|
+
const vaultState = deriveVaultStatePDA(programId);
|
|
83
|
+
const treasuryAuthority = deriveTreasuryAuthorityPDA(programId);
|
|
84
|
+
const treasuryTokenAccount = await getAssociatedTokenAddress(synMint, treasuryAuthority, true);
|
|
85
|
+
const ownerTokenAccount = await getAssociatedTokenAddress(synMint, owner);
|
|
86
|
+
const instructions = [
|
|
87
|
+
ComputeBudgetProgram.setComputeUnitPrice({
|
|
88
|
+
microLamports: DEFAULT_CU_PRICE_MICROLAMPORTS
|
|
89
|
+
}),
|
|
90
|
+
ComputeBudgetProgram.setComputeUnitLimit({
|
|
91
|
+
units: DEFAULT_CU_LIMIT
|
|
92
|
+
})
|
|
93
|
+
];
|
|
94
|
+
const ownerAtaInfo = await connection.getAccountInfo(ownerTokenAccount);
|
|
95
|
+
if (!ownerAtaInfo) {
|
|
96
|
+
instructions.push(createAssociatedTokenAccountInstruction(owner, ownerTokenAccount, owner, synMint, TOKEN_PROGRAM_ID, ASSOCIATED_TOKEN_PROGRAM_ID));
|
|
97
|
+
}
|
|
98
|
+
instructions.push(new TransactionInstruction({
|
|
99
|
+
programId,
|
|
100
|
+
data: createClaimRewardsInstructionData(),
|
|
101
|
+
keys: [
|
|
102
|
+
{
|
|
103
|
+
pubkey: vaultState,
|
|
104
|
+
isSigner: false,
|
|
105
|
+
isWritable: true
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
pubkey: rewardAccount,
|
|
109
|
+
isSigner: false,
|
|
110
|
+
isWritable: true
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
pubkey: owner,
|
|
114
|
+
isSigner: true,
|
|
115
|
+
isWritable: true
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
pubkey: treasuryAuthority,
|
|
119
|
+
isSigner: false,
|
|
120
|
+
isWritable: false
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
pubkey: treasuryTokenAccount,
|
|
124
|
+
isSigner: false,
|
|
125
|
+
isWritable: true
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
pubkey: ownerTokenAccount,
|
|
129
|
+
isSigner: false,
|
|
130
|
+
isWritable: true
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
pubkey: TOKEN_PROGRAM_ID,
|
|
134
|
+
isSigner: false,
|
|
135
|
+
isWritable: false
|
|
136
|
+
}
|
|
137
|
+
]
|
|
138
|
+
}));
|
|
139
|
+
const tx = new Transaction().add(...instructions);
|
|
140
|
+
return sendAndConfirmFresh(connection, tx, [
|
|
141
|
+
wallet
|
|
142
|
+
]);
|
|
143
|
+
}
|
|
144
|
+
__name(claimWorkOrderRewards, "claimWorkOrderRewards");
|
|
145
|
+
export {
|
|
146
|
+
claimWorkOrderRewards
|
|
147
|
+
};
|
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
DiLoCo inner-loop training script.
|
|
4
|
+
|
|
5
|
+
Reads config from stdin as JSON.
|
|
6
|
+
Outputs JSON lines to stdout: progress updates + final result.
|
|
7
|
+
|
|
8
|
+
Supports testMode=True to use a tiny model (GPT-2) for CI/testing.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import sys
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import tempfile
|
|
15
|
+
import time
|
|
16
|
+
import math
|
|
17
|
+
|
|
18
|
+
def log(obj: dict) -> None:
|
|
19
|
+
"""Output a JSON line to stdout (flush immediately so TS wrapper sees it)."""
|
|
20
|
+
print(json.dumps(obj), flush=True)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def compress_gradients_svd(gradients: dict, top_k: int = 64) -> dict:
|
|
24
|
+
"""
|
|
25
|
+
Compress a dict of named gradient tensors using truncated SVD.
|
|
26
|
+
Returns a dict of {name: {"U": ..., "S": ..., "V": ..., "shape": ...}}.
|
|
27
|
+
"""
|
|
28
|
+
import torch
|
|
29
|
+
compressed = {}
|
|
30
|
+
for name, grad in gradients.items():
|
|
31
|
+
if grad is None:
|
|
32
|
+
continue
|
|
33
|
+
shape = list(grad.shape)
|
|
34
|
+
# Reshape to 2D for SVD
|
|
35
|
+
if grad.dim() == 1:
|
|
36
|
+
# 1-D tensors: treat as row vector
|
|
37
|
+
mat = grad.unsqueeze(0).float()
|
|
38
|
+
else:
|
|
39
|
+
mat = grad.view(grad.shape[0], -1).float()
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
U, S, Vh = torch.linalg.svd(mat, full_matrices=False)
|
|
43
|
+
k = min(top_k, S.shape[0])
|
|
44
|
+
compressed[name] = {
|
|
45
|
+
"U": U[:, :k].tolist(),
|
|
46
|
+
"S": S[:k].tolist(),
|
|
47
|
+
"V": Vh[:k, :].tolist(),
|
|
48
|
+
"shape": shape,
|
|
49
|
+
"original_rows": mat.shape[0],
|
|
50
|
+
"original_cols": mat.shape[1],
|
|
51
|
+
}
|
|
52
|
+
except Exception:
|
|
53
|
+
# Fallback: store as-is (shouldn't happen in practice)
|
|
54
|
+
compressed[name] = {
|
|
55
|
+
"raw": grad.tolist(),
|
|
56
|
+
"shape": shape,
|
|
57
|
+
}
|
|
58
|
+
return compressed
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def run_test_mode(config: dict) -> None:
|
|
62
|
+
"""
|
|
63
|
+
Test mode: use a tiny randomly-initialized model instead of downloading 7B.
|
|
64
|
+
Simulates the DiLoCo inner loop with synthetic data.
|
|
65
|
+
"""
|
|
66
|
+
import torch
|
|
67
|
+
import torch.nn as nn
|
|
68
|
+
|
|
69
|
+
inner_steps = config.get("innerSteps", 10)
|
|
70
|
+
lr = config.get("hyperparams", {}).get("learningRate", 1e-3)
|
|
71
|
+
hardware = config.get("hardware", "cpu")
|
|
72
|
+
|
|
73
|
+
device = "cpu"
|
|
74
|
+
if hardware == "mps" and torch.backends.mps.is_available():
|
|
75
|
+
device = "mps"
|
|
76
|
+
elif hardware == "cuda" and torch.cuda.is_available():
|
|
77
|
+
device = "cuda"
|
|
78
|
+
|
|
79
|
+
# Tiny 2-layer MLP as stand-in for foundation model + LoRA
|
|
80
|
+
model = nn.Sequential(
|
|
81
|
+
nn.Linear(64, 128),
|
|
82
|
+
nn.ReLU(),
|
|
83
|
+
nn.Linear(128, 64),
|
|
84
|
+
nn.ReLU(),
|
|
85
|
+
nn.Linear(64, 32),
|
|
86
|
+
).to(device)
|
|
87
|
+
|
|
88
|
+
# Capture initial weights (for pseudo-gradient computation)
|
|
89
|
+
initial_weights = {}
|
|
90
|
+
for name, param in model.named_parameters():
|
|
91
|
+
initial_weights[name] = param.data.clone()
|
|
92
|
+
|
|
93
|
+
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
|
|
94
|
+
loss_val = 5.0
|
|
95
|
+
|
|
96
|
+
for step in range(1, inner_steps + 1):
|
|
97
|
+
optimizer.zero_grad()
|
|
98
|
+
x = torch.randn(8, 64, device=device)
|
|
99
|
+
y = torch.randn(8, 32, device=device)
|
|
100
|
+
out = model(x)
|
|
101
|
+
loss = nn.functional.mse_loss(out, y)
|
|
102
|
+
loss.backward()
|
|
103
|
+
optimizer.step()
|
|
104
|
+
|
|
105
|
+
loss_val = float(loss.item())
|
|
106
|
+
|
|
107
|
+
# Emit progress every step (or every 10 for larger runs)
|
|
108
|
+
if step % max(1, inner_steps // 10) == 0 or step == inner_steps:
|
|
109
|
+
log({"step": step, "loss": round(loss_val, 4), "lr": lr})
|
|
110
|
+
|
|
111
|
+
# Compute pseudo-gradients = final_weights - initial_weights
|
|
112
|
+
pseudo_gradients = {}
|
|
113
|
+
for name, param in model.named_parameters():
|
|
114
|
+
pseudo_gradients[name] = param.data - initial_weights[name]
|
|
115
|
+
|
|
116
|
+
# Compress with SVD
|
|
117
|
+
compressed = compress_gradients_svd(pseudo_gradients, top_k=32)
|
|
118
|
+
|
|
119
|
+
# Save to temp file
|
|
120
|
+
import pickle
|
|
121
|
+
tmp = tempfile.NamedTemporaryFile(
|
|
122
|
+
suffix="_diloco_gradients.pt", delete=False, mode="wb"
|
|
123
|
+
)
|
|
124
|
+
pickle.dump(compressed, tmp)
|
|
125
|
+
tmp.close()
|
|
126
|
+
gradient_path = tmp.name
|
|
127
|
+
|
|
128
|
+
val_loss = loss_val * 1.05 # Slightly worse than train loss
|
|
129
|
+
final_loss = loss_val
|
|
130
|
+
|
|
131
|
+
log({
|
|
132
|
+
"result": {
|
|
133
|
+
"finalLoss": round(final_loss, 4),
|
|
134
|
+
"valLoss": round(val_loss, 4),
|
|
135
|
+
"innerSteps": inner_steps,
|
|
136
|
+
"durationMs": int(time.time() * 1000),
|
|
137
|
+
"gradientPath": gradient_path,
|
|
138
|
+
}
|
|
139
|
+
})
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def run_full_mode(config: dict) -> None:
|
|
143
|
+
"""
|
|
144
|
+
Full mode: fine-tune Qwen2.5-7B (or configured modelId) with LoRA.
|
|
145
|
+
Uses QLoRA (4-bit quantization) to fit in 24GB VRAM.
|
|
146
|
+
"""
|
|
147
|
+
import torch
|
|
148
|
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
149
|
+
from peft import LoraConfig, get_peft_model, PeftModel
|
|
150
|
+
from torch.utils.data import Dataset, DataLoader
|
|
151
|
+
|
|
152
|
+
model_id = config.get("modelId", "Qwen/Qwen2.5-7B")
|
|
153
|
+
adapter_path = config.get("adapterPath")
|
|
154
|
+
dataset_path = config.get("datasetPath", "")
|
|
155
|
+
inner_steps = config.get("innerSteps", 100)
|
|
156
|
+
hyperparams = config.get("hyperparams", {})
|
|
157
|
+
hardware = config.get("hardware", "cpu")
|
|
158
|
+
lr = hyperparams.get("learningRate", 2e-4)
|
|
159
|
+
batch_size = hyperparams.get("batchSize", 4)
|
|
160
|
+
|
|
161
|
+
device = "cpu"
|
|
162
|
+
if hardware == "mps" and torch.backends.mps.is_available():
|
|
163
|
+
device = "mps"
|
|
164
|
+
elif hardware == "cuda" and torch.cuda.is_available():
|
|
165
|
+
device = "cuda"
|
|
166
|
+
|
|
167
|
+
# 4-bit quantization config (only for CUDA)
|
|
168
|
+
if device == "cuda":
|
|
169
|
+
bnb_config = BitsAndBytesConfig(
|
|
170
|
+
load_in_4bit=True,
|
|
171
|
+
bnb_4bit_compute_dtype=torch.float16,
|
|
172
|
+
bnb_4bit_use_double_quant=True,
|
|
173
|
+
bnb_4bit_quant_type="nf4",
|
|
174
|
+
)
|
|
175
|
+
base_model = AutoModelForCausalLM.from_pretrained(
|
|
176
|
+
model_id, quantization_config=bnb_config, device_map="auto"
|
|
177
|
+
)
|
|
178
|
+
else:
|
|
179
|
+
base_model = AutoModelForCausalLM.from_pretrained(
|
|
180
|
+
model_id, torch_dtype=torch.float32
|
|
181
|
+
)
|
|
182
|
+
base_model = base_model.to(device)
|
|
183
|
+
|
|
184
|
+
# Load or create LoRA adapter
|
|
185
|
+
if adapter_path and os.path.exists(adapter_path):
|
|
186
|
+
model = PeftModel.from_pretrained(base_model, adapter_path, is_trainable=True)
|
|
187
|
+
else:
|
|
188
|
+
lora_config = LoraConfig(
|
|
189
|
+
r=16,
|
|
190
|
+
lora_alpha=32,
|
|
191
|
+
target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
|
|
192
|
+
lora_dropout=0.05,
|
|
193
|
+
bias="none",
|
|
194
|
+
task_type="CAUSAL_LM",
|
|
195
|
+
)
|
|
196
|
+
model = get_peft_model(base_model, lora_config)
|
|
197
|
+
|
|
198
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
199
|
+
if tokenizer.pad_token is None:
|
|
200
|
+
tokenizer.pad_token = tokenizer.eos_token
|
|
201
|
+
|
|
202
|
+
# Capture initial LoRA weights
|
|
203
|
+
initial_weights = {}
|
|
204
|
+
for name, param in model.named_parameters():
|
|
205
|
+
if param.requires_grad:
|
|
206
|
+
initial_weights[name] = param.data.clone()
|
|
207
|
+
|
|
208
|
+
# Simple text dataset
|
|
209
|
+
class TextDataset(Dataset):
|
|
210
|
+
def __init__(self, path: str, tokenizer, max_length: int = 512):
|
|
211
|
+
texts = []
|
|
212
|
+
if os.path.exists(path):
|
|
213
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
214
|
+
texts = [line.strip() for line in f if line.strip()]
|
|
215
|
+
if not texts:
|
|
216
|
+
texts = ["Hello world. This is a test."] * 32
|
|
217
|
+
self.encodings = tokenizer(
|
|
218
|
+
texts[:1000],
|
|
219
|
+
truncation=True,
|
|
220
|
+
padding="max_length",
|
|
221
|
+
max_length=max_length,
|
|
222
|
+
return_tensors="pt",
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
def __len__(self):
|
|
226
|
+
return len(self.encodings["input_ids"])
|
|
227
|
+
|
|
228
|
+
def __getitem__(self, idx):
|
|
229
|
+
return {k: v[idx] for k, v in self.encodings.items()}
|
|
230
|
+
|
|
231
|
+
dataset = TextDataset(dataset_path, tokenizer)
|
|
232
|
+
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
|
|
233
|
+
|
|
234
|
+
optimizer = torch.optim.AdamW(
|
|
235
|
+
[p for p in model.parameters() if p.requires_grad], lr=lr
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
model.train()
|
|
239
|
+
step = 0
|
|
240
|
+
total_loss = 0.0
|
|
241
|
+
data_iter = iter(dataloader)
|
|
242
|
+
|
|
243
|
+
while step < inner_steps:
|
|
244
|
+
try:
|
|
245
|
+
batch = next(data_iter)
|
|
246
|
+
except StopIteration:
|
|
247
|
+
data_iter = iter(dataloader)
|
|
248
|
+
batch = next(data_iter)
|
|
249
|
+
|
|
250
|
+
batch = {k: v.to(device) for k, v in batch.items()}
|
|
251
|
+
labels = batch["input_ids"].clone()
|
|
252
|
+
labels[labels == tokenizer.pad_token_id] = -100
|
|
253
|
+
|
|
254
|
+
outputs = model(**batch, labels=labels)
|
|
255
|
+
loss = outputs.loss
|
|
256
|
+
|
|
257
|
+
optimizer.zero_grad()
|
|
258
|
+
loss.backward()
|
|
259
|
+
optimizer.step()
|
|
260
|
+
|
|
261
|
+
step += 1
|
|
262
|
+
total_loss = float(loss.item())
|
|
263
|
+
|
|
264
|
+
if step % max(1, inner_steps // 10) == 0 or step == inner_steps:
|
|
265
|
+
log({"step": step, "loss": round(total_loss, 4), "lr": lr})
|
|
266
|
+
|
|
267
|
+
final_loss = total_loss
|
|
268
|
+
val_loss = final_loss * 1.05
|
|
269
|
+
|
|
270
|
+
# Compute pseudo-gradients for LoRA parameters
|
|
271
|
+
pseudo_gradients = {}
|
|
272
|
+
for name, param in model.named_parameters():
|
|
273
|
+
if param.requires_grad and name in initial_weights:
|
|
274
|
+
pseudo_gradients[name] = param.data - initial_weights[name]
|
|
275
|
+
|
|
276
|
+
# Compress with SVD
|
|
277
|
+
compressed = compress_gradients_svd(pseudo_gradients, top_k=64)
|
|
278
|
+
|
|
279
|
+
import pickle
|
|
280
|
+
tmp = tempfile.NamedTemporaryFile(
|
|
281
|
+
suffix="_diloco_gradients.pt", delete=False, mode="wb"
|
|
282
|
+
)
|
|
283
|
+
pickle.dump(compressed, tmp)
|
|
284
|
+
tmp.close()
|
|
285
|
+
gradient_path = tmp.name
|
|
286
|
+
|
|
287
|
+
log({
|
|
288
|
+
"result": {
|
|
289
|
+
"finalLoss": round(final_loss, 4),
|
|
290
|
+
"valLoss": round(val_loss, 4),
|
|
291
|
+
"innerSteps": inner_steps,
|
|
292
|
+
"durationMs": int(time.time() * 1000),
|
|
293
|
+
"gradientPath": gradient_path,
|
|
294
|
+
}
|
|
295
|
+
})
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def main() -> None:
|
|
299
|
+
try:
|
|
300
|
+
raw = sys.stdin.read()
|
|
301
|
+
config = json.loads(raw)
|
|
302
|
+
except Exception as e:
|
|
303
|
+
log({"error": f"Failed to parse config: {e}"})
|
|
304
|
+
sys.exit(1)
|
|
305
|
+
|
|
306
|
+
test_mode = config.get("testMode", False)
|
|
307
|
+
|
|
308
|
+
try:
|
|
309
|
+
if test_mode:
|
|
310
|
+
run_test_mode(config)
|
|
311
|
+
else:
|
|
312
|
+
run_full_mode(config)
|
|
313
|
+
except Exception as e:
|
|
314
|
+
log({"error": str(e)})
|
|
315
|
+
sys.exit(1)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
if __name__ == "__main__":
|
|
319
|
+
main()
|