@buley/neural 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.html +12 -0
- package/package.json +20 -0
- package/src/bench/benchmark.ts +107 -0
- package/src/db/repository.test.ts +67 -0
- package/src/db/repository.ts +36 -0
- package/src/db/schema.ts +40 -0
- package/src/engine/gpu.test.ts +120 -0
- package/src/engine/gpu.ts +255 -0
- package/src/engine/shaders/brain.wgsl +59 -0
- package/src/engine/shaders/training.wgsl +102 -0
- package/src/engine/training.test.ts +106 -0
- package/src/engine/translator.test.ts +58 -0
- package/src/engine/translator.ts +54 -0
- package/src/index.ts +118 -0
- package/src/types.ts +13 -0
- package/tsconfig.json +24 -0
- package/vite.config.ts +13 -0
package/index.html
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
<!doctype html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8" />
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
6
|
+
<title>Neural 2.0: The Transparent Brain</title>
|
|
7
|
+
</head>
|
|
8
|
+
<body>
|
|
9
|
+
<div id="app"></div>
|
|
10
|
+
<script type="module" src="/src/index.ts"></script>
|
|
11
|
+
</body>
|
|
12
|
+
</html>
|
package/package.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@buley/neural",
|
|
3
|
+
"version": "2.0.0",
|
|
4
|
+
"description": "A Transparent, Local-First, WebGPU-Accelerated Neural Graph Database.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./src/index.ts",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"dev": "vite",
|
|
9
|
+
"build": "tsc && vite build",
|
|
10
|
+
"bench": "bun src/bench/benchmark.ts"
|
|
11
|
+
},
|
|
12
|
+
"dependencies": {
|
|
13
|
+
"@buley/dash": "^1.1.0"
|
|
14
|
+
},
|
|
15
|
+
"devDependencies": {
|
|
16
|
+
"@webgpu/types": "^0.1.69",
|
|
17
|
+
"typescript": "^5.3.3",
|
|
18
|
+
"vite": "^5.0.0"
|
|
19
|
+
}
|
|
20
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
|
|
2
|
+
import { GPUEngine } from "../engine/gpu";
|
|
3
|
+
import { performance } from "perf_hooks";
|
|
4
|
+
|
|
5
|
+
// Mock WebGPU types for Node environment if needed, likely handled by bun's specialized runtime or mocks if strictly node.
|
|
6
|
+
// However, since we are using Bun, we might need a headless WebGPU implementation or we run this in a real browser.
|
|
7
|
+
// REALITY CHECK: running WebGPU in a headless CI/Node environment usually requires 'headless-gl' or similar, but WebGPU is newer.
|
|
8
|
+
// Bun does not support WebGPU native out of the box yet.
|
|
9
|
+
// For this task, since the user is on Mac, we will assume they might run this via a browser test runner OR we simulate/mock for the "structure" of the benchmark
|
|
10
|
+
// if actual GPU access isn't available in the terminal.
|
|
11
|
+
// BUT: The roadmap implies real GPU benchmarks.
|
|
12
|
+
// Strategy: We will write the benchmark to be runnable. If it fails due to missing GPU in terminal,
|
|
13
|
+
// we'll note that it needs to be run in a browser context (e.g. via the web app or a test runner that supports it).
|
|
14
|
+
// actually, for the purpose of this agent, I'll implement it assuming the environment *might* support it or I'll add a check.
|
|
15
|
+
|
|
16
|
+
async function runBenchmark(label: string, networkSize: number, batchSize: number, iterations: number) {
|
|
17
|
+
console.log(`\n--- Benchmark: ${label} ---`);
|
|
18
|
+
console.log(`Network: ${networkSize} Neurons, Batch: ${batchSize}`);
|
|
19
|
+
|
|
20
|
+
const gpu = new GPUEngine();
|
|
21
|
+
|
|
22
|
+
try {
|
|
23
|
+
await gpu.init();
|
|
24
|
+
} catch (e) {
|
|
25
|
+
console.error("WebGPU Initialize Failed (Expected in non-browser env):", e);
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Prepare Data
|
|
30
|
+
const weights = new Float32Array(networkSize * networkSize); // Full connectivity
|
|
31
|
+
const biases = new Float32Array(networkSize);
|
|
32
|
+
const inputs = new Float32Array(networkSize * batchSize);
|
|
33
|
+
const targets = new Float32Array(networkSize * batchSize);
|
|
34
|
+
|
|
35
|
+
// Init Buffers
|
|
36
|
+
const startObj = performance.now();
|
|
37
|
+
gpu.prepareBuffers(networkSize, weights, biases, batchSize);
|
|
38
|
+
gpu.prepareTrainingBuffers(targets, 0.01);
|
|
39
|
+
const initTime = performance.now() - startObj;
|
|
40
|
+
console.log(`Initialization/Upload: ${initTime.toFixed(2)}ms`);
|
|
41
|
+
|
|
42
|
+
// Warmup
|
|
43
|
+
await gpu.runTick(inputs);
|
|
44
|
+
|
|
45
|
+
// Measure Inference
|
|
46
|
+
const startInf = performance.now();
|
|
47
|
+
for (let i = 0; i < iterations; i++) {
|
|
48
|
+
await gpu.runTick(inputs);
|
|
49
|
+
}
|
|
50
|
+
const endInf = performance.now();
|
|
51
|
+
const infTime = endInf - startInf;
|
|
52
|
+
const infOPS = (iterations * batchSize) / (infTime / 1000);
|
|
53
|
+
console.log(`Inference: ${infTime.toFixed(2)}ms for ${iterations} ticks`);
|
|
54
|
+
console.log(`Throughput: ${infOPS.toFixed(0)} samples/sec`);
|
|
55
|
+
|
|
56
|
+
// Measure Training
|
|
57
|
+
const startTrain = performance.now();
|
|
58
|
+
for (let i = 0; i < iterations; i++) {
|
|
59
|
+
await gpu.trainTick();
|
|
60
|
+
}
|
|
61
|
+
const endTrain = performance.now();
|
|
62
|
+
const trainTime = endTrain - startTrain;
|
|
63
|
+
const trainOPS = (iterations * batchSize) / (trainTime / 1000);
|
|
64
|
+
console.log(`Training: ${trainTime.toFixed(2)}ms for ${iterations} ticks`);
|
|
65
|
+
console.log(`Throughput: ${trainOPS.toFixed(0)} samples/sec`);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async function main() {
|
|
69
|
+
// Small
|
|
70
|
+
await runBenchmark("Small", 100, 1, 100);
|
|
71
|
+
|
|
72
|
+
// Medium
|
|
73
|
+
await runBenchmark("Medium (Batched)", 1000, 32, 50);
|
|
74
|
+
|
|
75
|
+
// Large
|
|
76
|
+
await runBenchmark("Large (Batched)", 5000, 64, 20);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Check for WebGPU polyfill or mock if running in Node without headers
|
|
80
|
+
if (!global.navigator?.gpu) {
|
|
81
|
+
console.log("No WebGPU detected in global scope. Mocking for CLI structure verification...");
|
|
82
|
+
// @ts-ignore
|
|
83
|
+
global.navigator = {
|
|
84
|
+
gpu: {
|
|
85
|
+
requestAdapter: async () => ({
|
|
86
|
+
requestDevice: async () => ({
|
|
87
|
+
createShaderModule: () => ({}),
|
|
88
|
+
createComputePipeline: () => ({ getBindGroupLayout: () => ({}) }),
|
|
89
|
+
createBuffer: (d: any) => ({ getMappedRange: () => new ArrayBuffer(d.size), unmap: () => {}, mapAsync: async () => {} }),
|
|
90
|
+
createBindGroup: () => ({}),
|
|
91
|
+
createCommandEncoder: () => ({
|
|
92
|
+
beginComputePass: () => ({ setPipeline:()=>{}, setBindGroup:()=>{}, dispatchWorkgroups:()=>{}, end:()=>{} }),
|
|
93
|
+
copyBufferToBuffer: ()=>{},
|
|
94
|
+
finish: ()=>({})
|
|
95
|
+
}),
|
|
96
|
+
queue: { writeBuffer: ()=>{}, submit: ()=>{} }
|
|
97
|
+
})
|
|
98
|
+
})
|
|
99
|
+
}
|
|
100
|
+
};
|
|
101
|
+
// @ts-ignore
|
|
102
|
+
global.GPUBufferUsage = { STORAGE: 1, COPY_DST: 2, COPY_SRC: 4, UNIFORM: 8, MAP_READ: 16 };
|
|
103
|
+
// @ts-ignore
|
|
104
|
+
global.GPUMapMode = { READ: 1 };
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
main();
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { expect, test, describe, mock, beforeAll } from "bun:test";
|
|
2
|
+
import { NeuronRepository, SynapseRepository } from "./repository";
|
|
3
|
+
|
|
4
|
+
// Mock @buley/dash
|
|
5
|
+
const mockDash = {
|
|
6
|
+
execute: mock((query, params) => {
|
|
7
|
+
// Simple mock implementation
|
|
8
|
+
if (query.includes("INSERT")) return Promise.resolve();
|
|
9
|
+
if (query.includes("SELECT * FROM neurons")) return Promise.resolve([
|
|
10
|
+
{ id: "n1", type: "input", bias: 0.1, activation: "tanh" }
|
|
11
|
+
]);
|
|
12
|
+
if (query.includes("SELECT * FROM synapses")) return Promise.resolve([
|
|
13
|
+
{ id: "s1", from_id: "n1", to_id: "n2", weight: 0.5 }
|
|
14
|
+
]);
|
|
15
|
+
return Promise.resolve([]);
|
|
16
|
+
}),
|
|
17
|
+
addWithEmbedding: mock(() => Promise.resolve())
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
// Mock module
|
|
21
|
+
mock.module("@buley/dash", () => ({
|
|
22
|
+
dash: mockDash
|
|
23
|
+
}));
|
|
24
|
+
|
|
25
|
+
describe("NeuronRepository", () => {
|
|
26
|
+
test("create() executes INSERT query", async () => {
|
|
27
|
+
const repo = new NeuronRepository();
|
|
28
|
+
await repo.create({ id: "n1", type: "input", bias: 0.1, activation: "tanh" });
|
|
29
|
+
expect(mockDash.execute).toHaveBeenCalled();
|
|
30
|
+
const call = mockDash.execute.mock.calls[0];
|
|
31
|
+
expect(call[0]).toContain("INSERT INTO neurons");
|
|
32
|
+
expect(call[1]).toEqual(["n1", "input", 0.1, "tanh"]);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
test("createWithSemantics() calls addWithEmbedding", async () => {
|
|
36
|
+
const repo = new NeuronRepository();
|
|
37
|
+
await repo.createWithSemantics(
|
|
38
|
+
{ id: "n2", type: "hidden", bias: 0, activation: "relu" },
|
|
39
|
+
"detects curves"
|
|
40
|
+
);
|
|
41
|
+
expect(mockDash.addWithEmbedding).toHaveBeenCalledWith("n2", "detects curves");
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
test("getAll() returns neurons", async () => {
|
|
45
|
+
mockDash.execute.mockClear();
|
|
46
|
+
const repo = new NeuronRepository();
|
|
47
|
+
const results = await repo.getAll();
|
|
48
|
+
expect(results.length).toBe(1);
|
|
49
|
+
expect(results[0].id).toBe("n1");
|
|
50
|
+
});
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
describe("SynapseRepository", () => {
|
|
54
|
+
test("create() executes INSERT query", async () => {
|
|
55
|
+
const repo = new SynapseRepository();
|
|
56
|
+
await repo.create({ id: "s1", from_id: "n1", to_id: "n2", weight: 0.5 });
|
|
57
|
+
expect(mockDash.execute).toHaveBeenCalled();
|
|
58
|
+
// Check latest call
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
test("getAll() returns synapses", async () => {
|
|
62
|
+
const repo = new SynapseRepository();
|
|
63
|
+
const results = await repo.getAll();
|
|
64
|
+
expect(results.length).toBe(1);
|
|
65
|
+
expect(results[0].weight).toBe(0.5);
|
|
66
|
+
});
|
|
67
|
+
});
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { dash } from "@buley/dash";
|
|
2
|
+
import { Neuron, Synapse } from "../types";
|
|
3
|
+
|
|
4
|
+
export class NeuronRepository {
|
|
5
|
+
async create(neuron: Neuron): Promise<void> {
|
|
6
|
+
await dash.execute(
|
|
7
|
+
"INSERT INTO neurons (id, type, bias, activation) VALUES (?, ?, ?, ?)",
|
|
8
|
+
[neuron.id, neuron.type, neuron.bias, neuron.activation]
|
|
9
|
+
);
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
// Feature: Add with semantic embedding
|
|
13
|
+
async createWithSemantics(neuron: Neuron, description: string): Promise<void> {
|
|
14
|
+
// We store the structured data normally
|
|
15
|
+
await this.create(neuron);
|
|
16
|
+
// And we map the ID to a semantic embedding in dash's hidden semantic store
|
|
17
|
+
await dash.addWithEmbedding(neuron.id, description);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
async getAll(): Promise<Neuron[]> {
|
|
21
|
+
return await dash.execute("SELECT * FROM neurons") as Neuron[];
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export class SynapseRepository {
|
|
26
|
+
async create(synapse: Synapse): Promise<void> {
|
|
27
|
+
await dash.execute(
|
|
28
|
+
"INSERT INTO synapses (id, from_id, to_id, weight) VALUES (?, ?, ?, ?)",
|
|
29
|
+
[synapse.id, synapse.from_id, synapse.to_id, synapse.weight]
|
|
30
|
+
);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async getAll(): Promise<Synapse[]> {
|
|
34
|
+
return await dash.execute("SELECT * FROM synapses") as Synapse[];
|
|
35
|
+
}
|
|
36
|
+
}
|
package/src/db/schema.ts
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
|
|
2
|
+
import { dash } from "@buley/dash";
|
|
3
|
+
|
|
4
|
+
export async function initializeSchema() {
|
|
5
|
+
console.log("Initializing Neural Schema...");
|
|
6
|
+
|
|
7
|
+
// Neurons Table
|
|
8
|
+
// id: UUID
|
|
9
|
+
// type: input, hidden, output
|
|
10
|
+
// bias: float
|
|
11
|
+
// activation: string (tanh, relu, sigmoid)
|
|
12
|
+
await dash.execute(`
|
|
13
|
+
CREATE TABLE IF NOT EXISTS neurons (
|
|
14
|
+
id TEXT PRIMARY KEY,
|
|
15
|
+
type TEXT NOT NULL,
|
|
16
|
+
bias REAL DEFAULT 0.0,
|
|
17
|
+
activation TEXT DEFAULT 'tanh',
|
|
18
|
+
created_at INTEGER DEFAULT (unixepoch())
|
|
19
|
+
)
|
|
20
|
+
`);
|
|
21
|
+
|
|
22
|
+
// Synapses Table
|
|
23
|
+
// id: UUID
|
|
24
|
+
// from_id: neuron UUID
|
|
25
|
+
// to_id: neuron UUID
|
|
26
|
+
// weight: float
|
|
27
|
+
await dash.execute(`
|
|
28
|
+
CREATE TABLE IF NOT EXISTS synapses (
|
|
29
|
+
id TEXT PRIMARY KEY,
|
|
30
|
+
from_id TEXT NOT NULL,
|
|
31
|
+
to_id TEXT NOT NULL,
|
|
32
|
+
weight REAL DEFAULT 0.0,
|
|
33
|
+
created_at INTEGER DEFAULT (unixepoch()),
|
|
34
|
+
FOREIGN KEY(from_id) REFERENCES neurons(id),
|
|
35
|
+
FOREIGN KEY(to_id) REFERENCES neurons(id)
|
|
36
|
+
)
|
|
37
|
+
`);
|
|
38
|
+
|
|
39
|
+
console.log("Schema initialized.");
|
|
40
|
+
}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import { expect, test, describe, mock, beforeAll } from "bun:test";
|
|
2
|
+
import { GPUEngine } from "./gpu";
|
|
3
|
+
|
|
4
|
+
// Mock WebGPU Globals
|
|
5
|
+
const mockDevice = {
|
|
6
|
+
createShaderModule: mock(() => ({})),
|
|
7
|
+
createComputePipeline: mock(() => ({
|
|
8
|
+
getBindGroupLayout: mock(() => ({}))
|
|
9
|
+
})),
|
|
10
|
+
createBuffer: mock((desc: any) => ({
|
|
11
|
+
getMappedRange: () => new ArrayBuffer(desc.size),
|
|
12
|
+
unmap: () => {},
|
|
13
|
+
mapAsync: async () => {}
|
|
14
|
+
})),
|
|
15
|
+
createBindGroup: mock(() => ({})),
|
|
16
|
+
createCommandEncoder: mock(() => ({
|
|
17
|
+
beginComputePass: mock(() => ({
|
|
18
|
+
setPipeline: mock(() => {}),
|
|
19
|
+
setBindGroup: mock(() => {}),
|
|
20
|
+
dispatchWorkgroups: mock(() => {}),
|
|
21
|
+
end: mock(() => {})
|
|
22
|
+
})),
|
|
23
|
+
copyBufferToBuffer: mock(() => {}),
|
|
24
|
+
finish: mock(() => ({}))
|
|
25
|
+
})),
|
|
26
|
+
queue: {
|
|
27
|
+
writeBuffer: mock(() => {}),
|
|
28
|
+
submit: mock(() => {})
|
|
29
|
+
}
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
const mockAdapter = {
|
|
33
|
+
requestDevice: mock(async () => mockDevice)
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
// Polyfill navigator.gpu
|
|
37
|
+
// @ts-ignore
|
|
38
|
+
global.navigator = {
|
|
39
|
+
gpu: {
|
|
40
|
+
requestAdapter: mock(async () => mockAdapter)
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
// Polyfill Globals
|
|
45
|
+
// @ts-ignore
|
|
46
|
+
global.GPUBufferUsage = {
|
|
47
|
+
MAP_READ: 1,
|
|
48
|
+
MAP_WRITE: 2,
|
|
49
|
+
COPY_SRC: 4,
|
|
50
|
+
COPY_DST: 8,
|
|
51
|
+
INDEX: 16,
|
|
52
|
+
VERTEX: 32,
|
|
53
|
+
UNIFORM: 64,
|
|
54
|
+
STORAGE: 128,
|
|
55
|
+
INDIRECT: 256,
|
|
56
|
+
QUERY_RESOLVE: 512
|
|
57
|
+
};
|
|
58
|
+
// @ts-ignore
|
|
59
|
+
global.GPUMapMode = {
|
|
60
|
+
READ: 1,
|
|
61
|
+
WRITE: 2
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
describe("GPUEngine", () => {
|
|
65
|
+
test("init() requests adapter and device", async () => {
|
|
66
|
+
const gpu = new GPUEngine();
|
|
67
|
+
await gpu.init();
|
|
68
|
+
expect(navigator.gpu.requestAdapter).toHaveBeenCalled();
|
|
69
|
+
expect(mockAdapter.requestDevice).toHaveBeenCalled();
|
|
70
|
+
expect(gpu.device).toBeDefined();
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
test("prepareBuffers() creates GPU buffers", async () => {
|
|
74
|
+
const gpu = new GPUEngine();
|
|
75
|
+
await gpu.init();
|
|
76
|
+
|
|
77
|
+
const weights = new Float32Array([1, 2, 3, 4]);
|
|
78
|
+
const biases = new Float32Array([0, 0]);
|
|
79
|
+
|
|
80
|
+
gpu.prepareBuffers(2, weights, biases);
|
|
81
|
+
|
|
82
|
+
expect(mockDevice.createBuffer).toHaveBeenCalledTimes(5); // W, I, B, O, Uniforms
|
|
83
|
+
expect(mockDevice.createBindGroup).toHaveBeenCalled();
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
test("runTick() dispatches compute shader", async () => {
|
|
87
|
+
const gpu = new GPUEngine();
|
|
88
|
+
await gpu.init();
|
|
89
|
+
gpu.prepareBuffers(2, new Float32Array(4), new Float32Array(2));
|
|
90
|
+
|
|
91
|
+
const inputs = new Float32Array([1, 0]);
|
|
92
|
+
await gpu.runTick(inputs);
|
|
93
|
+
|
|
94
|
+
expect(mockDevice.queue.writeBuffer).toHaveBeenCalled();
|
|
95
|
+
expect(mockDevice.createCommandEncoder).toHaveBeenCalled();
|
|
96
|
+
// Check dispatch
|
|
97
|
+
// We can't easily check the nested mock calls count without storing the mock,
|
|
98
|
+
// but if no error threw, the flow worked.
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
test("prepareBuffers() and runTick() with Batch Size > 1", async () => {
|
|
102
|
+
const gpu = new GPUEngine();
|
|
103
|
+
await gpu.init();
|
|
104
|
+
|
|
105
|
+
const N = 4;
|
|
106
|
+
const B = 2; // Batch Size 2
|
|
107
|
+
|
|
108
|
+
gpu.prepareBuffers(N, new Float32Array(N), new Float32Array(N), B);
|
|
109
|
+
|
|
110
|
+
// Input size = N * B = 8
|
|
111
|
+
const inputs = new Float32Array(N * B);
|
|
112
|
+
await gpu.runTick(inputs);
|
|
113
|
+
|
|
114
|
+
expect(mockDevice.createBuffer).toHaveBeenCalled();
|
|
115
|
+
expect(mockDevice.queue.writeBuffer).toHaveBeenCalled();
|
|
116
|
+
|
|
117
|
+
// Verify buffer size in mock? Mock doesn't store state.
|
|
118
|
+
// But verifying it runs without throwing "Input size mismatch" proves validation worked.
|
|
119
|
+
});
|
|
120
|
+
});
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
import shaderCode from './shaders/brain.wgsl?raw';
|
|
2
|
+
import trainingShaderCode from './shaders/training.wgsl?raw';
|
|
3
|
+
// import trainingShaderCode from './shaders/training.wgsl?raw'; // Handled in replace block above, this is safety check
|
|
4
|
+
|
|
5
|
+
export class GPUEngine {
|
|
6
|
+
device: GPUDevice | null = null;
|
|
7
|
+
pipeline: GPUComputePipeline | null = null;
|
|
8
|
+
bindGroup: GPUBindGroup | null = null;
|
|
9
|
+
|
|
10
|
+
// Training Buffers
|
|
11
|
+
deltaBuffer: GPUBuffer | null = null;
|
|
12
|
+
targetBuffer: GPUBuffer | null = null;
|
|
13
|
+
paramBuffer: GPUBuffer | null = null;
|
|
14
|
+
|
|
15
|
+
trainingPipeline: GPUComputePipeline | null = null;
|
|
16
|
+
deltaPipeline: GPUComputePipeline | null = null;
|
|
17
|
+
trainingBindGroup: GPUBindGroup | null = null;
|
|
18
|
+
|
|
19
|
+
// Buffers
|
|
20
|
+
weightBuffer: GPUBuffer | null = null;
|
|
21
|
+
inputBuffer: GPUBuffer | null = null;
|
|
22
|
+
biasBuffer: GPUBuffer | null = null;
|
|
23
|
+
outputBuffer: GPUBuffer | null = null;
|
|
24
|
+
uniformBuffer: GPUBuffer | null = null;
|
|
25
|
+
|
|
26
|
+
networkSize: number = 0;
|
|
27
|
+
batchSize: number = 1;
|
|
28
|
+
|
|
29
|
+
async init() {
|
|
30
|
+
if (!navigator.gpu) throw new Error("WebGPU not supported");
|
|
31
|
+
const adapter = await navigator.gpu.requestAdapter();
|
|
32
|
+
if (!adapter) throw new Error("No GPU adapter found");
|
|
33
|
+
this.device = await adapter.requestDevice();
|
|
34
|
+
|
|
35
|
+
const shaderModule = this.device.createShaderModule({ code: shaderCode });
|
|
36
|
+
const trainingModule = this.device.createShaderModule({ code: trainingShaderCode });
|
|
37
|
+
|
|
38
|
+
this.pipeline = this.device.createComputePipeline({
|
|
39
|
+
layout: 'auto',
|
|
40
|
+
compute: { module: shaderModule, entryPoint: 'main' }
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
this.trainingPipeline = this.device.createComputePipeline({
|
|
44
|
+
layout: 'auto',
|
|
45
|
+
compute: { module: trainingModule, entryPoint: 'update_weights' }
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
this.deltaPipeline = this.device.createComputePipeline({
|
|
49
|
+
layout: 'auto',
|
|
50
|
+
compute: { module: trainingModule, entryPoint: 'calculate_deltas' }
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
console.log("GPUEngine initialized");
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Prepare buffers based on network size (N) and Batch Size (B)
|
|
57
|
+
prepareBuffers(size: number, weights: Float32Array, biases: Float32Array, batchSize: number = 1) {
|
|
58
|
+
if (!this.device || !this.pipeline) throw new Error("GPUEngine not initialized");
|
|
59
|
+
this.networkSize = size;
|
|
60
|
+
this.batchSize = batchSize;
|
|
61
|
+
|
|
62
|
+
// Create Buffers
|
|
63
|
+
// Weights & Biases are shared (Size N or N*N)
|
|
64
|
+
this.weightBuffer = this.createBuffer(weights, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST);
|
|
65
|
+
this.biasBuffer = this.createBuffer(biases, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST);
|
|
66
|
+
|
|
67
|
+
// Inputs & Outputs are Batched (Size N * B)
|
|
68
|
+
const batchedSize = size * batchSize;
|
|
69
|
+
this.inputBuffer = this.createBuffer(new Float32Array(batchedSize), GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST);
|
|
70
|
+
this.outputBuffer = this.createBuffer(new Float32Array(batchedSize), GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST);
|
|
71
|
+
|
|
72
|
+
// Dimensions Uniform: [Size, BatchSize]
|
|
73
|
+
const dimArray = new Uint32Array([size, batchSize]);
|
|
74
|
+
this.uniformBuffer = this.createBuffer(dimArray, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST);
|
|
75
|
+
|
|
76
|
+
// Bind Group
|
|
77
|
+
this.bindGroup = this.device.createBindGroup({
|
|
78
|
+
layout: this.pipeline.getBindGroupLayout(0),
|
|
79
|
+
entries: [
|
|
80
|
+
{ binding: 0, resource: { buffer: this.weightBuffer } },
|
|
81
|
+
{ binding: 1, resource: { buffer: this.inputBuffer } },
|
|
82
|
+
{ binding: 2, resource: { buffer: this.biasBuffer } },
|
|
83
|
+
{ binding: 3, resource: { buffer: this.outputBuffer } },
|
|
84
|
+
{ binding: 4, resource: { buffer: this.uniformBuffer } },
|
|
85
|
+
]
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
private createBuffer(data: Float32Array | Uint32Array, usage: number): GPUBuffer {
|
|
90
|
+
if (!this.device) throw new Error("Device null");
|
|
91
|
+
const buffer = this.device.createBuffer({
|
|
92
|
+
size: data.byteLength,
|
|
93
|
+
usage: usage,
|
|
94
|
+
mappedAtCreation: true
|
|
95
|
+
});
|
|
96
|
+
if (data instanceof Float32Array) {
|
|
97
|
+
new Float32Array(buffer.getMappedRange()).set(data);
|
|
98
|
+
} else {
|
|
99
|
+
new Uint32Array(buffer.getMappedRange()).set(data);
|
|
100
|
+
}
|
|
101
|
+
buffer.unmap();
|
|
102
|
+
return buffer;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
async runTick(inputs: Float32Array): Promise<Float32Array> {
|
|
106
|
+
if (!this.device || !this.pipeline || !this.bindGroup || !this.inputBuffer || !this.outputBuffer) {
|
|
107
|
+
throw new Error("GPU buffers not ready");
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (inputs.length !== this.networkSize * this.batchSize) {
|
|
111
|
+
throw new Error(`Input size mismatch. Expected ${this.networkSize * this.batchSize}, got ${inputs.length}`);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Upload Input
|
|
115
|
+
this.device.queue.writeBuffer(this.inputBuffer, 0, inputs);
|
|
116
|
+
|
|
117
|
+
// Encode Command
|
|
118
|
+
const commandEncoder = this.device.createCommandEncoder();
|
|
119
|
+
const passEncoder = commandEncoder.beginComputePass();
|
|
120
|
+
passEncoder.setPipeline(this.pipeline);
|
|
121
|
+
passEncoder.setBindGroup(0, this.bindGroup);
|
|
122
|
+
|
|
123
|
+
// Dispatch (Size / WorkgroupSize, 1, BatchSize)
|
|
124
|
+
const workgroupSize = 64;
|
|
125
|
+
const workgroupCount = Math.ceil(this.networkSize / workgroupSize);
|
|
126
|
+
passEncoder.dispatchWorkgroups(workgroupCount, 1, this.batchSize);
|
|
127
|
+
passEncoder.end();
|
|
128
|
+
|
|
129
|
+
// Read Output
|
|
130
|
+
const size = inputs.byteLength;
|
|
131
|
+
const gpuReadBuffer = this.device.createBuffer({
|
|
132
|
+
size: size,
|
|
133
|
+
usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
commandEncoder.copyBufferToBuffer(this.outputBuffer, 0, gpuReadBuffer, 0, size);
|
|
137
|
+
|
|
138
|
+
const gpuCommands = commandEncoder.finish();
|
|
139
|
+
this.device.queue.submit([gpuCommands]);
|
|
140
|
+
|
|
141
|
+
await gpuReadBuffer.mapAsync(GPUMapMode.READ);
|
|
142
|
+
const result = new Float32Array(gpuReadBuffer.getMappedRange());
|
|
143
|
+
const output = new Float32Array(result); // Copy
|
|
144
|
+
gpuReadBuffer.unmap();
|
|
145
|
+
|
|
146
|
+
return output;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
prepareTrainingBuffers(targets: Float32Array, learningRate: number) {
|
|
150
|
+
if (!this.device || !this.trainingPipeline || !this.weightBuffer || !this.outputBuffer || !this.biasBuffer || !this.uniformBuffer) {
|
|
151
|
+
throw new Error("GPU not ready for training");
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (targets.length !== this.networkSize * this.batchSize) {
|
|
155
|
+
throw new Error(`Target size mismatch. Expected ${this.networkSize * this.batchSize}, got ${targets.length}`);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Deltas & Targets are Batched (Size N * B)
|
|
159
|
+
const batchedSize = this.networkSize * this.batchSize;
|
|
160
|
+
this.deltaBuffer = this.createBuffer(new Float32Array(batchedSize), GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC);
|
|
161
|
+
this.targetBuffer = this.createBuffer(targets, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST);
|
|
162
|
+
this.paramBuffer = this.createBuffer(new Float32Array([learningRate]), GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST);
|
|
163
|
+
|
|
164
|
+
this.trainingBindGroup = this.device.createBindGroup({
|
|
165
|
+
layout: this.trainingPipeline.getBindGroupLayout(0),
|
|
166
|
+
entries: [
|
|
167
|
+
{ binding: 0, resource: { buffer: this.weightBuffer } },
|
|
168
|
+
{ binding: 1, resource: { buffer: this.outputBuffer } },
|
|
169
|
+
{ binding: 2, resource: { buffer: this.biasBuffer } },
|
|
170
|
+
{ binding: 3, resource: { buffer: this.deltaBuffer } },
|
|
171
|
+
{ binding: 4, resource: { buffer: this.targetBuffer } },
|
|
172
|
+
{ binding: 5, resource: { buffer: this.uniformBuffer } },
|
|
173
|
+
{ binding: 6, resource: { buffer: this.paramBuffer } }
|
|
174
|
+
]
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
private subscribers: ((event: { type: 'loss' | 'epoch', value: number }) => void)[] = [];
|
|
179
|
+
|
|
180
|
+
subscribe(callback: (event: { type: 'loss' | 'epoch', value: number }) => void) {
|
|
181
|
+
this.subscribers.push(callback);
|
|
182
|
+
return () => {
|
|
183
|
+
this.subscribers = this.subscribers.filter(s => s !== callback);
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
private emit(event: { type: 'loss' | 'epoch', value: number }) {
|
|
188
|
+
this.subscribers.forEach(cb => cb(event));
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
async train(inputs: Float32Array, targets: Float32Array): Promise<Float32Array> {
|
|
192
|
+
// 1. Forward Pass
|
|
193
|
+
const outputs = await this.runTick(inputs);
|
|
194
|
+
|
|
195
|
+
// 2. Calculate Loss (MSE) on CPU for UI Feedback
|
|
196
|
+
// Only feasible if batch size is small or we sample.
|
|
197
|
+
// For demo, we just calc full MSE.
|
|
198
|
+
let totalLoss = 0;
|
|
199
|
+
for (let i = 0; i < outputs.length; i++) {
|
|
200
|
+
// Only if target is valid? Assuming targets cover all neurons logic as per shader
|
|
201
|
+
const t = targets[i];
|
|
202
|
+
if (t > -998) {
|
|
203
|
+
const diff = outputs[i] - t;
|
|
204
|
+
totalLoss += 0.5 * diff * diff;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
const meanLoss = totalLoss / this.batchSize; // Approx
|
|
208
|
+
this.emit({ type: 'loss', value: meanLoss });
|
|
209
|
+
|
|
210
|
+
// 3. Backward Pass
|
|
211
|
+
// Ensure buffers (deltas, targets) are ready?
|
|
212
|
+
// Reuse prepareTrainingBuffers or assume already called?
|
|
213
|
+
// Let's assume prepareTrainingBuffers was called ONCE before loop.
|
|
214
|
+
// We just need to update TARGETS buffer!
|
|
215
|
+
if (this.targetBuffer) {
|
|
216
|
+
this.device?.queue.writeBuffer(this.targetBuffer, 0, targets);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Run Training Shaders
|
|
220
|
+
await this.trainTick();
|
|
221
|
+
|
|
222
|
+
this.emit({ type: 'epoch', value: 1 }); // Just tick count really
|
|
223
|
+
return outputs;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
async trainTick(deltas?: Float32Array): Promise<void> {
|
|
227
|
+
if (!this.device || !this.trainingPipeline || !this.deltaPipeline || !this.trainingBindGroup || !this.deltaBuffer) {
|
|
228
|
+
throw new Error("Training not ready");
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
if (deltas && deltas.length > 0) {
|
|
232
|
+
this.device.queue.writeBuffer(this.deltaBuffer, 0, deltas);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
const commandEncoder = this.device.createCommandEncoder();
|
|
236
|
+
const passEncoder = commandEncoder.beginComputePass();
|
|
237
|
+
|
|
238
|
+
// Pass 1: Calculate Deltas (Batched)
|
|
239
|
+
passEncoder.setPipeline(this.deltaPipeline);
|
|
240
|
+
passEncoder.setBindGroup(0, this.trainingBindGroup);
|
|
241
|
+
const workgroupSize = 64;
|
|
242
|
+
const workgroupCount = Math.ceil(this.networkSize / workgroupSize);
|
|
243
|
+
passEncoder.dispatchWorkgroups(workgroupCount, 1, this.batchSize);
|
|
244
|
+
|
|
245
|
+
passEncoder.end();
|
|
246
|
+
|
|
247
|
+
const updatePass = commandEncoder.beginComputePass();
|
|
248
|
+
updatePass.setPipeline(this.trainingPipeline);
|
|
249
|
+
updatePass.setBindGroup(0, this.trainingBindGroup); // Re-bind for new pass
|
|
250
|
+
updatePass.dispatchWorkgroups(workgroupCount, 1, 1); // Not batched
|
|
251
|
+
updatePass.end();
|
|
252
|
+
|
|
253
|
+
this.device.queue.submit([commandEncoder.finish()]);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
|
|
2
|
+
// Structure of our compute shader
|
|
3
|
+
// Group 0: Bindings for data
|
|
4
|
+
// Binding 0: Matrix W (Weights) - N x N flattened array
|
|
5
|
+
// Binding 1: Vector X (Current Neuron Values) - N length array
|
|
6
|
+
// Binding 2: Vector B (Biases) - N length array
|
|
7
|
+
// Binding 3: Vector Y (Output Neuron Values) - N length array
|
|
8
|
+
// Binding 4: Dimensions Uniform - Struct { size: u32 }
|
|
9
|
+
|
|
10
|
+
struct Dimensions {
|
|
11
|
+
size: u32,
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
@group(0) @binding(0) var<storage, read> weights: array<f32>;
|
|
15
|
+
@group(0) @binding(1) var<storage, read> input: array<f32>;
|
|
16
|
+
@group(0) @binding(2) var<storage, read> biases: array<f32>;
|
|
17
|
+
@group(0) @binding(3) var<storage, read_write> output: array<f32>;
|
|
18
|
+
@group(0) @binding(4) var<uniform> dims: Dimensions;
|
|
19
|
+
|
|
20
|
+
// Activation Functions
|
|
21
|
+
fn tanh_approx(x: f32) -> f32 {
|
|
22
|
+
let e2x = exp(2.0 * x);
|
|
23
|
+
return (e2x - 1.0) / (e2x + 1.0);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
@compute @workgroup_size(64, 1, 1)
|
|
27
|
+
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
|
|
28
|
+
let row = global_id.x;
|
|
29
|
+
let batch = global_id.z;
|
|
30
|
+
let size = dims.size;
|
|
31
|
+
|
|
32
|
+
if (row >= size) {
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Dot product: Row of W * Vector X
|
|
37
|
+
var sum: f32 = 0.0;
|
|
38
|
+
|
|
39
|
+
// Batch offset for input/output
|
|
40
|
+
let batch_offset = batch * size;
|
|
41
|
+
|
|
42
|
+
for (var col: u32 = 0u; col < size; col = col + 1u) {
|
|
43
|
+
// W is shared (not batched): weights[row * size + col]
|
|
44
|
+
let w_idx = row * size + col;
|
|
45
|
+
|
|
46
|
+
// Input is batched: input[batch * size + col]
|
|
47
|
+
let input_idx = batch_offset + col;
|
|
48
|
+
|
|
49
|
+
sum = sum + (weights[w_idx] * input[input_idx]);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Add Bias (Shared)
|
|
53
|
+
sum = sum + biases[row];
|
|
54
|
+
|
|
55
|
+
// Activation
|
|
56
|
+
// Output is batched: output[batch * size + row]
|
|
57
|
+
let out_idx = batch_offset + row;
|
|
58
|
+
output[out_idx] = tanh_approx(sum);
|
|
59
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
struct Dimensions {
|
|
2
|
+
size: u32,
|
|
3
|
+
batchSize: u32,
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
struct TrainingParams {
|
|
7
|
+
learningRate: f32,
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
@group(0) @binding(0) var<storage, read_write> weights: array<f32>;
|
|
11
|
+
@group(0) @binding(1) var<storage, read> values: array<f32>; // Batched Activations (N * B)
|
|
12
|
+
@group(0) @binding(2) var<storage, read> biases: array<f32>;
|
|
13
|
+
@group(0) @binding(3) var<storage, read_write> deltas: array<f32>; // Batched Deltas (N * B)
|
|
14
|
+
@group(0) @binding(4) var<storage, read> targets: array<f32>; // Batched Targets
|
|
15
|
+
@group(0) @binding(5) var<uniform> dims: Dimensions;
|
|
16
|
+
@group(0) @binding(6) var<uniform> params: TrainingParams;
|
|
17
|
+
|
|
18
|
+
fn tanh_derivative(val: f32) -> f32 {
|
|
19
|
+
return 1.0 - (val * val);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// 1. Calculate Deltas (Backward Pass) - 3D Dispatched (64, 1, B)
|
|
23
|
+
@compute @workgroup_size(64, 1, 1)
|
|
24
|
+
fn calculate_deltas(@builtin(global_invocation_id) global_id: vec3<u32>) {
|
|
25
|
+
let index = global_id.x;
|
|
26
|
+
let batch = global_id.z;
|
|
27
|
+
let size = dims.size;
|
|
28
|
+
|
|
29
|
+
if (index >= size) { return; }
|
|
30
|
+
|
|
31
|
+
let batch_offset = batch * size;
|
|
32
|
+
let neuron_idx = batch_offset + index;
|
|
33
|
+
|
|
34
|
+
let activation = values[neuron_idx];
|
|
35
|
+
let derivative = tanh_derivative(activation);
|
|
36
|
+
|
|
37
|
+
var error_sum: f32 = 0.0;
|
|
38
|
+
|
|
39
|
+
// Backpropagate error from "Next Layer" (all other neurons k)
|
|
40
|
+
// For each k (destination), we need delta_k.
|
|
41
|
+
// delta_k is also batched! deltas[batch * size + k]
|
|
42
|
+
for (var k: u32 = 0u; k < size; k = k + 1u) {
|
|
43
|
+
// Weight FROM index TO k
|
|
44
|
+
let w_idx = k * size + index;
|
|
45
|
+
let weight_ki = weights[w_idx];
|
|
46
|
+
|
|
47
|
+
let delta_k_idx = batch_offset + k;
|
|
48
|
+
let delta_k = deltas[delta_k_idx];
|
|
49
|
+
|
|
50
|
+
error_sum = error_sum + (delta_k * weight_ki);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Add immediate error (MSE derivative: y - t)
|
|
54
|
+
// targets[batch * size + index]
|
|
55
|
+
let target = targets[neuron_idx];
|
|
56
|
+
if (target > -998.0) {
|
|
57
|
+
error_sum = error_sum + (activation - target);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
deltas[neuron_idx] = error_sum * derivative;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// 2. Update Weights (Optimizer Step) - 1D Dispatched (64, 1, 1) - Accumulates Gradients over Batch
|
|
64
|
+
@compute @workgroup_size(64)
|
|
65
|
+
fn update_weights(@builtin(global_invocation_id) global_id: vec3<u32>) {
|
|
66
|
+
let row = global_id.x; // Target neuron
|
|
67
|
+
let size = dims.size;
|
|
68
|
+
let batch_size = dims.batchSize;
|
|
69
|
+
|
|
70
|
+
if (row >= size) { return; }
|
|
71
|
+
|
|
72
|
+
let lr = params.learningRate;
|
|
73
|
+
|
|
74
|
+
// Update incoming weights to this neuron 'row'
|
|
75
|
+
// W_ji (row, col)
|
|
76
|
+
for (var col: u32 = 0u; col < size; col = col + 1u) {
|
|
77
|
+
let w_idx = row * size + col;
|
|
78
|
+
|
|
79
|
+
// Accumulate gradient over batch
|
|
80
|
+
var gradient_sum: f32 = 0.0;
|
|
81
|
+
|
|
82
|
+
for (var b: u32 = 0u; b < batch_size; b = b + 1u) {
|
|
83
|
+
let batch_offset = b * size;
|
|
84
|
+
|
|
85
|
+
// delta_j (for this batch item)
|
|
86
|
+
let delta_j = deltas[batch_offset + row];
|
|
87
|
+
|
|
88
|
+
// input_i (activation of source col for this batch item)
|
|
89
|
+
let input_val = values[batch_offset + col];
|
|
90
|
+
|
|
91
|
+
gradient_sum = gradient_sum + (delta_j * input_val);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// SGD Update (Mean Gradient? Or Sum? Usually Mean for batch)
|
|
95
|
+
// Let's use Sum * (LearningRate / BatchSize) effectively, or just keep LR as is and user adjusts.
|
|
96
|
+
// Standard is Mean Gradient.
|
|
97
|
+
|
|
98
|
+
let mean_gradient = gradient_sum / f32(batch_size);
|
|
99
|
+
|
|
100
|
+
weights[w_idx] = weights[w_idx] - (lr * mean_gradient);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import { expect, test, describe, mock } from "bun:test";
|
|
2
|
+
import { GPUEngine } from "./gpu";
|
|
3
|
+
|
|
4
|
+
// Mock GPU globals again (copy from gpu.test.ts or import shared mock if we had one)
|
|
5
|
+
// Ideally refactor mocks to a setup file. For now, inline.
|
|
6
|
+
const mockDevice = {
|
|
7
|
+
createShaderModule: mock(() => ({})),
|
|
8
|
+
createComputePipeline: mock(() => ({
|
|
9
|
+
getBindGroupLayout: mock(() => ({}))
|
|
10
|
+
})),
|
|
11
|
+
createBuffer: mock((desc: any) => ({
|
|
12
|
+
getMappedRange: () => new ArrayBuffer(desc.size),
|
|
13
|
+
unmap: () => {},
|
|
14
|
+
mapAsync: async () => {}
|
|
15
|
+
})),
|
|
16
|
+
createBindGroup: mock(() => ({})),
|
|
17
|
+
createCommandEncoder: mock(() => ({
|
|
18
|
+
beginComputePass: mock(() => ({
|
|
19
|
+
setPipeline: mock(() => {}),
|
|
20
|
+
setBindGroup: mock(() => {}),
|
|
21
|
+
dispatchWorkgroups: mock(() => {}),
|
|
22
|
+
end: mock(() => {})
|
|
23
|
+
})),
|
|
24
|
+
copyBufferToBuffer: mock(() => {}),
|
|
25
|
+
finish: mock(() => ({}))
|
|
26
|
+
})),
|
|
27
|
+
queue: {
|
|
28
|
+
writeBuffer: mock(() => {}),
|
|
29
|
+
submit: mock(() => {})
|
|
30
|
+
}
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
const mockAdapter = {
|
|
34
|
+
requestDevice: mock(async () => mockDevice)
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
// @ts-ignore
|
|
38
|
+
global.navigator = { gpu: { requestAdapter: mock(async () => mockAdapter) } };
|
|
39
|
+
// @ts-ignore
|
|
40
|
+
global.GPUBufferUsage = { STORAGE: 1, COPY_DST: 2, COPY_SRC: 4, UNIFORM: 8, MAP_READ: 16 };
|
|
41
|
+
// @ts-ignore
|
|
42
|
+
global.GPUMapMode = { READ: 1 };
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
describe("Training Loop", () => {
|
|
46
|
+
|
|
47
|
+
test("prepareTrainingBuffers allocation (Batch=1)", async () => {
|
|
48
|
+
const gpu = new GPUEngine();
|
|
49
|
+
await gpu.init();
|
|
50
|
+
|
|
51
|
+
gpu.networkSize = 10;
|
|
52
|
+
gpu.batchSize = 1;
|
|
53
|
+
gpu.weightBuffer = {} as any;
|
|
54
|
+
gpu.outputBuffer = {} as any;
|
|
55
|
+
gpu.biasBuffer = {} as any;
|
|
56
|
+
gpu.uniformBuffer = {} as any;
|
|
57
|
+
|
|
58
|
+
gpu.prepareTrainingBuffers(new Float32Array(10), 0.01);
|
|
59
|
+
|
|
60
|
+
expect(mockDevice.createBuffer).toHaveBeenCalled();
|
|
61
|
+
expect(gpu.deltaBuffer).toBeDefined();
|
|
62
|
+
expect(gpu.targetBuffer).toBeDefined();
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
test("trainTick dispatch (Batch=1)", async () => {
|
|
66
|
+
const gpu = new GPUEngine();
|
|
67
|
+
await gpu.init();
|
|
68
|
+
gpu.networkSize = 10;
|
|
69
|
+
gpu.batchSize = 1;
|
|
70
|
+
gpu.weightBuffer = {} as any;
|
|
71
|
+
gpu.outputBuffer = {} as any;
|
|
72
|
+
gpu.biasBuffer = {} as any;
|
|
73
|
+
gpu.uniformBuffer = {} as any;
|
|
74
|
+
|
|
75
|
+
gpu.prepareTrainingBuffers(new Float32Array(10), 0.01);
|
|
76
|
+
|
|
77
|
+
await gpu.trainTick(); // Uses internal buffer/logic
|
|
78
|
+
|
|
79
|
+
// Should dispatch twice (Delta Calc + Weight Update)
|
|
80
|
+
// We can check calls to createCommandEncoder -> beginComputePass
|
|
81
|
+
const encoder = mockDevice.createCommandEncoder.mock.results.at(-1)?.value;
|
|
82
|
+
const pass = encoder?.beginComputePass.mock.results.at(-1)?.value;
|
|
83
|
+
// mock logic is a bit simple, let's just check overall calls
|
|
84
|
+
expect(mockDevice.queue.submit).toHaveBeenCalled();
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
test("prepareTrainingBuffers allocation (Batch=2)", async () => {
|
|
88
|
+
const gpu = new GPUEngine();
|
|
89
|
+
await gpu.init();
|
|
90
|
+
|
|
91
|
+
gpu.networkSize = 10;
|
|
92
|
+
gpu.batchSize = 2; // Test Batch > 1
|
|
93
|
+
gpu.weightBuffer = {} as any;
|
|
94
|
+
gpu.outputBuffer = {} as any;
|
|
95
|
+
gpu.biasBuffer = {} as any;
|
|
96
|
+
gpu.uniformBuffer = {} as any;
|
|
97
|
+
|
|
98
|
+
// Target size must be 20
|
|
99
|
+
gpu.prepareTrainingBuffers(new Float32Array(20), 0.01);
|
|
100
|
+
|
|
101
|
+
expect(mockDevice.createBuffer).toHaveBeenCalled();
|
|
102
|
+
expect(gpu.deltaBuffer).toBeDefined();
|
|
103
|
+
// Check delta buffer size? Mock doesn't store it easily accessible, but verify it didn't throw.
|
|
104
|
+
});
|
|
105
|
+
});
|
|
106
|
+
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { expect, test, describe } from "bun:test";
|
|
2
|
+
import { Translator } from "./translator";
|
|
3
|
+
import { Neuron, Synapse } from "../types";
|
|
4
|
+
import { mock } from "bun:test";
|
|
5
|
+
|
|
6
|
+
mock.module("@buley/dash", () => ({
|
|
7
|
+
dash: {}
|
|
8
|
+
}));
|
|
9
|
+
|
|
10
|
+
describe("Translator", () => {
|
|
11
|
+
test("flatten() correctly converts graph to matrices", () => {
|
|
12
|
+
const translator = new Translator();
|
|
13
|
+
|
|
14
|
+
const neurons: Neuron[] = [
|
|
15
|
+
{ id: "n1", type: "input", bias: 0.5, activation: "tanh" },
|
|
16
|
+
{ id: "n2", type: "output", bias: -0.2, activation: "tanh" }
|
|
17
|
+
];
|
|
18
|
+
|
|
19
|
+
const synapses: Synapse[] = [
|
|
20
|
+
// Connection n1 -> n2 (weight 0.8)
|
|
21
|
+
{ id: "s1", from_id: "n1", to_id: "n2", weight: 0.8 }
|
|
22
|
+
];
|
|
23
|
+
|
|
24
|
+
const result = translator.flatten(neurons, synapses);
|
|
25
|
+
|
|
26
|
+
expect(result.size).toBe(2);
|
|
27
|
+
|
|
28
|
+
// Check Biases
|
|
29
|
+
expect(result.biases[0]).toBe(0.5);
|
|
30
|
+
expect(result.biases[1]).toBeCloseTo(-0.2);
|
|
31
|
+
|
|
32
|
+
// Check Weights Matrix (Size 2x2 = 4 elements)
|
|
33
|
+
// Matrix is flattened: row * size + col
|
|
34
|
+
// n1 is index 0, n2 is index 1
|
|
35
|
+
// s1 is 0->1.
|
|
36
|
+
// If weights[to * size + from], then weights[1 * 2 + 0] = weights[2] should be 0.8
|
|
37
|
+
|
|
38
|
+
// Index 2 is row 1, col 0. (Target n2, Source n1)
|
|
39
|
+
expect(result.weights[2]).toBeCloseTo(0.8);
|
|
40
|
+
|
|
41
|
+
// Others should be 0
|
|
42
|
+
expect(result.weights[0]).toBe(0); // 0->0
|
|
43
|
+
expect(result.weights[1]).toBe(0); // 0->1 (n2 -> n1) - Wait, if 1*2+0=2, then 0*2+1=1.
|
|
44
|
+
// Let's re-verify my translator logic.
|
|
45
|
+
// fromIdx = 0, toIdx = 1.
|
|
46
|
+
// flatIndex = (toIdx * size) + fromIdx = (1 * 2) + 0 = 2.
|
|
47
|
+
|
|
48
|
+
expect(result.weights[1]).toBe(0);
|
|
49
|
+
expect(result.weights[3]).toBe(0); // 1->1
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
test("handles empty graph", () => {
|
|
53
|
+
const translator = new Translator();
|
|
54
|
+
const result = translator.flatten([], []);
|
|
55
|
+
expect(result.size).toBe(0);
|
|
56
|
+
expect(result.weights.length).toBe(0);
|
|
57
|
+
});
|
|
58
|
+
});
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { Neuron, Synapse } from "../types";
|
|
2
|
+
|
|
3
|
+
export class Translator {
|
|
4
|
+
// Maps Neuron logical IDs (UUIDs) to Matrix Indices (0...N)
|
|
5
|
+
private idToIndex: Map<string, number> = new Map();
|
|
6
|
+
private indexToId: string[] = [];
|
|
7
|
+
|
|
8
|
+
// Converts Graph -> Dense Matrices
|
|
9
|
+
flatten(neurons: Neuron[], synapses: Synapse[]): {
|
|
10
|
+
size: number,
|
|
11
|
+
weights: Float32Array,
|
|
12
|
+
biases: Float32Array,
|
|
13
|
+
initialValues: Float32Array
|
|
14
|
+
} {
|
|
15
|
+
const size = neurons.length;
|
|
16
|
+
this.idToIndex.clear();
|
|
17
|
+
this.indexToId = new Array(size);
|
|
18
|
+
|
|
19
|
+
// 1. Map IDs
|
|
20
|
+
neurons.forEach((n, i) => {
|
|
21
|
+
this.idToIndex.set(n.id, i);
|
|
22
|
+
this.indexToId[i] = n.id;
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
// 2. Prepare Biases & Initial Values
|
|
26
|
+
const biases = new Float32Array(size);
|
|
27
|
+
const initialValues = new Float32Array(size); // Default 0
|
|
28
|
+
|
|
29
|
+
neurons.forEach((n, i) => {
|
|
30
|
+
biases[i] = n.bias;
|
|
31
|
+
// initialValues could be persisted state, but defaulting to 0 for now
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
// 3. Prepare Weights Matrix (N x N)
|
|
35
|
+
// Flattened: Row-major or implementation specific.
|
|
36
|
+
// Our shader expects: weights[row * size + col]
|
|
37
|
+
// where row = target neuron, col = source neuron
|
|
38
|
+
const weights = new Float32Array(size * size);
|
|
39
|
+
|
|
40
|
+
synapses.forEach(s => {
|
|
41
|
+
const fromIdx = this.idToIndex.get(s.from_id);
|
|
42
|
+
const toIdx = this.idToIndex.get(s.to_id);
|
|
43
|
+
|
|
44
|
+
if (fromIdx !== undefined && toIdx !== undefined) {
|
|
45
|
+
// target (row) = toIdx
|
|
46
|
+
// source (col) = fromIdx
|
|
47
|
+
const flatIndex = (toIdx * size) + fromIdx;
|
|
48
|
+
weights[flatIndex] = s.weight;
|
|
49
|
+
}
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
return { size, weights, biases, initialValues };
|
|
53
|
+
}
|
|
54
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { dash } from "@buley/dash";
|
|
2
|
+
import { initializeSchema } from "./db/schema";
|
|
3
|
+
import { NeuronRepository, SynapseRepository } from "./db/repository";
|
|
4
|
+
import { GPUEngine } from "./engine/gpu";
|
|
5
|
+
import { Translator } from "./engine/translator";
|
|
6
|
+
|
|
7
|
+
export class NeuralEngine {
|
|
8
|
+
gpu: GPUEngine;
|
|
9
|
+
neuronRepo: NeuronRepository;
|
|
10
|
+
synapseRepo: SynapseRepository;
|
|
11
|
+
private translator: Translator;
|
|
12
|
+
|
|
13
|
+
constructor() {
|
|
14
|
+
this.gpu = new GPUEngine();
|
|
15
|
+
this.neuronRepo = new NeuronRepository();
|
|
16
|
+
this.synapseRepo = new SynapseRepository();
|
|
17
|
+
this.translator = new Translator();
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Cache
|
|
21
|
+
private neurons: any[] = [];
|
|
22
|
+
private synapses: any[] = [];
|
|
23
|
+
|
|
24
|
+
async init() {
|
|
25
|
+
console.log("Neural 2.0 Engine Initializing...");
|
|
26
|
+
|
|
27
|
+
// 1. Persistence
|
|
28
|
+
await dash.ready();
|
|
29
|
+
await initializeSchema();
|
|
30
|
+
|
|
31
|
+
// 2. Compute
|
|
32
|
+
await this.gpu.init();
|
|
33
|
+
this.gpu.batchSize = 2; // Default to mini-batch of 2 for demo
|
|
34
|
+
|
|
35
|
+
// 3. Hydration
|
|
36
|
+
this.neurons = await this.neuronRepo.getAll();
|
|
37
|
+
this.synapses = await this.synapseRepo.getAll();
|
|
38
|
+
|
|
39
|
+
if (this.neurons.length === 0) {
|
|
40
|
+
console.log("Seeding test network...");
|
|
41
|
+
const n1 = "n1-" + crypto.randomUUID();
|
|
42
|
+
const n2 = "n2-" + crypto.randomUUID();
|
|
43
|
+
await this.neuronRepo.create({ id: n1, type: 'input', bias: 0, activation: 'tanh' });
|
|
44
|
+
await this.neuronRepo.create({ id: n2, type: 'output', bias: 0.5, activation: 'tanh' });
|
|
45
|
+
await this.synapseRepo.create({ id: crypto.randomUUID(), from_id: n1, to_id: n2, weight: 0.8 });
|
|
46
|
+
|
|
47
|
+
// Seed more for visualizer?
|
|
48
|
+
// Let's create a random cluster for demo purposes if empty
|
|
49
|
+
for(let i=0; i<50; i++) {
|
|
50
|
+
await this.neuronRepo.create({ id: `auto-${i}`, type: 'hidden', bias: 0, activation: 'tanh' });
|
|
51
|
+
}
|
|
52
|
+
// Connect them
|
|
53
|
+
const all = await this.neuronRepo.getAll();
|
|
54
|
+
for(let i=0; i<50; i++) {
|
|
55
|
+
const s = all[Math.floor(Math.random() * all.length)].id;
|
|
56
|
+
const t = all[Math.floor(Math.random() * all.length)].id;
|
|
57
|
+
if(s!==t) await this.synapseRepo.create({ id: crypto.randomUUID(), from_id: s, to_id: t, weight: Math.random() });
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
this.neurons = await this.neuronRepo.getAll();
|
|
61
|
+
this.synapses = await this.synapseRepo.getAll();
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// 4. Compile to GPU
|
|
65
|
+
console.log(`Compiling graph: ${this.neurons.length} neurons, ${this.synapses.length} synapses`);
|
|
66
|
+
const data = this.translator.flatten(this.neurons, this.synapses);
|
|
67
|
+
|
|
68
|
+
this.gpu.prepareBuffers(data.size, data.weights, data.biases, this.gpu.batchSize);
|
|
69
|
+
// Also prepare training buffers!
|
|
70
|
+
// Init target buffer with zeros
|
|
71
|
+
this.gpu.prepareTrainingBuffers(new Float32Array(data.size * this.gpu.batchSize), 0.1);
|
|
72
|
+
|
|
73
|
+
console.log("Engine Ready.");
|
|
74
|
+
return data;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
getGraphData() {
|
|
78
|
+
// Map ID -> Index
|
|
79
|
+
const map = new Map<string, number>();
|
|
80
|
+
this.neurons.forEach((n, i) => map.set(n.id, i));
|
|
81
|
+
|
|
82
|
+
const edges = this.synapses.map(s => ({
|
|
83
|
+
id: s.id,
|
|
84
|
+
source: map.get(s.from_id) || 0,
|
|
85
|
+
target: map.get(s.to_id) || 0,
|
|
86
|
+
weight: s.weight
|
|
87
|
+
}));
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
nodeCount: this.neurons.length,
|
|
91
|
+
edges
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async deleteSynapse(id: string) {
|
|
96
|
+
console.log(`Lesioning synapse: ${id}`);
|
|
97
|
+
await this.synapseRepo.delete(id);
|
|
98
|
+
|
|
99
|
+
// Update Cache
|
|
100
|
+
this.synapses = this.synapses.filter(s => s.id !== id);
|
|
101
|
+
|
|
102
|
+
// Recompile (Heavy!)
|
|
103
|
+
// In a real app we'd just zero the weight in buffer
|
|
104
|
+
// But for "The Visible Brain" seeing it disappear is cooler.
|
|
105
|
+
const data = this.translator.flatten(this.neurons, this.synapses);
|
|
106
|
+
this.gpu.prepareBuffers(data.size, data.weights, data.biases, this.gpu.batchSize);
|
|
107
|
+
// Reset training buffers too to be safe/simple
|
|
108
|
+
this.gpu.prepareTrainingBuffers(new Float32Array(data.size * this.gpu.batchSize), 0.1);
|
|
109
|
+
|
|
110
|
+
return this.getGraphData();
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Keep a standalone init for backward compatibility or simple scripts if needed
|
|
115
|
+
export async function init() {
|
|
116
|
+
const engine = new NeuralEngine();
|
|
117
|
+
return engine.init();
|
|
118
|
+
}
|
package/src/types.ts
ADDED
package/tsconfig.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ESNext",
|
|
4
|
+
"useDefineForClassFields": true,
|
|
5
|
+
"module": "ESNext",
|
|
6
|
+
"lib": ["ESNext", "DOM", "DOM.Iterable"],
|
|
7
|
+
"skipLibCheck": true,
|
|
8
|
+
|
|
9
|
+
/* Bundler mode */
|
|
10
|
+
"moduleResolution": "bundler",
|
|
11
|
+
"allowImportingTsExtensions": true,
|
|
12
|
+
"resolveJsonModule": true,
|
|
13
|
+
"isolatedModules": true,
|
|
14
|
+
"noEmit": true,
|
|
15
|
+
|
|
16
|
+
/* Linting */
|
|
17
|
+
"strict": true,
|
|
18
|
+
"noUnusedLocals": true,
|
|
19
|
+
"noUnusedParameters": true,
|
|
20
|
+
"noFallthroughCasesInSwitch": true,
|
|
21
|
+
"types": ["@webgpu/types", "bun-types"]
|
|
22
|
+
},
|
|
23
|
+
"include": ["src"]
|
|
24
|
+
}
|
package/vite.config.ts
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { defineConfig } from 'vite';
|
|
2
|
+
|
|
3
|
+
export default defineConfig({
|
|
4
|
+
server: {
|
|
5
|
+
headers: {
|
|
6
|
+
'Cross-Origin-Opener-Policy': 'same-origin',
|
|
7
|
+
'Cross-Origin-Embedder-Policy': 'require-corp',
|
|
8
|
+
},
|
|
9
|
+
},
|
|
10
|
+
optimizeDeps: {
|
|
11
|
+
exclude: ['@buley/dash', '@sqlite.org/sqlite-wasm'],
|
|
12
|
+
},
|
|
13
|
+
});
|