effect-gpt 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -0
- package/data/chat_training_data.json +55 -0
- package/data/pretraining_data.json +27 -0
- package/package.json +25 -0
- package/src/cli/errors.ts +51 -0
- package/src/cli/main.ts +163 -0
- package/src/config.ts +3 -0
- package/src/data/Dataset.ts +168 -0
- package/src/errors.ts +73 -0
- package/src/index.ts +88 -0
- package/src/model/Embeddings.ts +108 -0
- package/src/model/FeedForward.ts +121 -0
- package/src/model/LLM.ts +124 -0
- package/src/model/LayerNorm.ts +138 -0
- package/src/model/ModelLayer.ts +10 -0
- package/src/model/OutputProjection.ts +76 -0
- package/src/model/SelfAttention.ts +169 -0
- package/src/model/TransformerBlock.ts +53 -0
- package/src/services/Logger.ts +124 -0
- package/src/services/Metrics.ts +260 -0
- package/src/services/Random.ts +98 -0
- package/src/services/SeedLayer.ts +39 -0
- package/src/services/index.ts +32 -0
- package/src/tensor/Tensor2D.ts +42 -0
- package/src/tensor/ops.ts +371 -0
- package/src/tensor/random.ts +32 -0
- package/src/tokenize/split.ts +27 -0
- package/src/tokenize/tokenize.ts +28 -0
- package/src/training/Adam.ts +61 -0
- package/src/training/clip.ts +16 -0
- package/src/training/loss.ts +35 -0
- package/src/training/train.ts +203 -0
- package/src/vocab/Vocab.ts +79 -0
- package/tests/fixtures/csv_bad.csv +2 -0
- package/tests/fixtures/csv_good.csv +3 -0
- package/tests/ts/cli_error_format.test.ts +26 -0
- package/tests/ts/dataset.test.ts +35 -0
- package/tests/ts/embeddings.test.ts +81 -0
- package/tests/ts/errors.test.ts +36 -0
- package/tests/ts/feed_forward.test.ts +74 -0
- package/tests/ts/initNormal.test.ts +41 -0
- package/tests/ts/layer_norm.test.ts +96 -0
- package/tests/ts/llm_parameters.test.ts +96 -0
- package/tests/ts/llm_predict.test.ts +98 -0
- package/tests/ts/llm_tokenize.test.ts +69 -0
- package/tests/ts/output_projection.test.ts +78 -0
- package/tests/ts/random.test.ts +44 -0
- package/tests/ts/self_attention.test.ts +63 -0
- package/tests/ts/support/factories.ts +126 -0
- package/tests/ts/support/runEffect.ts +29 -0
- package/tests/ts/support/seed.ts +12 -0
- package/tests/ts/support/stubs.ts +58 -0
- package/tests/ts/support/tensorMatchers.ts +96 -0
- package/tests/ts/support.test.ts +165 -0
- package/tests/ts/train_loop.test.ts +229 -0
- package/tests/ts/transformer_block.test.ts +72 -0
- package/tsconfig.json +20 -0
- package/tsconfig.test.json +8 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Custom tensor assertion helpers for deterministic testing.
|
|
3
|
+
*/
|
|
4
|
+
import { expect } from "bun:test"
|
|
5
|
+
import type { Tensor2D } from "../../../src/tensor/Tensor2D"
|
|
6
|
+
|
|
7
|
+
const DEFAULT_EPSILON = 1e-5
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Asserts that a tensor has the expected shape.
|
|
11
|
+
*/
|
|
12
|
+
export const expectShape = (tensor: Tensor2D, shape: [number, number]): void => {
|
|
13
|
+
expect(tensor.rows).toBe(shape[0])
|
|
14
|
+
expect(tensor.cols).toBe(shape[1])
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Asserts that two tensors have the same shape and all elements are within epsilon.
|
|
19
|
+
*/
|
|
20
|
+
export const expectClose = (
|
|
21
|
+
actual: Tensor2D,
|
|
22
|
+
expected: Tensor2D,
|
|
23
|
+
epsilon: number = DEFAULT_EPSILON
|
|
24
|
+
): void => {
|
|
25
|
+
expectShape(actual, [expected.rows, expected.cols])
|
|
26
|
+
|
|
27
|
+
for (let i = 0; i < actual.data.length; i++) {
|
|
28
|
+
const a = actual.data[i]
|
|
29
|
+
const e = expected.data[i]
|
|
30
|
+
const diff = Math.abs(a - e)
|
|
31
|
+
if (diff > epsilon) {
|
|
32
|
+
const row = Math.floor(i / actual.cols)
|
|
33
|
+
const col = i % actual.cols
|
|
34
|
+
throw new Error(
|
|
35
|
+
`Tensor mismatch at [${row}, ${col}]: got ${a}, expected ${e}, diff ${diff} > epsilon ${epsilon}`
|
|
36
|
+
)
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Asserts that all elements of a tensor are within epsilon of a target value.
|
|
43
|
+
*/
|
|
44
|
+
export const expectAllClose = (
|
|
45
|
+
tensor: Tensor2D,
|
|
46
|
+
value: number,
|
|
47
|
+
epsilon: number = DEFAULT_EPSILON
|
|
48
|
+
): void => {
|
|
49
|
+
for (let i = 0; i < tensor.data.length; i++) {
|
|
50
|
+
const v = tensor.data[i]
|
|
51
|
+
const diff = Math.abs(v - value)
|
|
52
|
+
if (diff > epsilon) {
|
|
53
|
+
const row = Math.floor(i / tensor.cols)
|
|
54
|
+
const col = i % tensor.cols
|
|
55
|
+
throw new Error(
|
|
56
|
+
`Tensor element at [${row}, ${col}]: got ${v}, expected ~${value}, diff ${diff} > epsilon ${epsilon}`
|
|
57
|
+
)
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Asserts that two tensors are NOT equal (at least one element differs by more than epsilon).
|
|
64
|
+
*/
|
|
65
|
+
export const expectNotClose = (
|
|
66
|
+
actual: Tensor2D,
|
|
67
|
+
expected: Tensor2D,
|
|
68
|
+
epsilon: number = DEFAULT_EPSILON
|
|
69
|
+
): void => {
|
|
70
|
+
if (actual.rows !== expected.rows || actual.cols !== expected.cols) {
|
|
71
|
+
return // Different shapes means not equal
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
for (let i = 0; i < actual.data.length; i++) {
|
|
75
|
+
const diff = Math.abs(actual.data[i] - expected.data[i])
|
|
76
|
+
if (diff > epsilon) {
|
|
77
|
+
return // Found a difference
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
throw new Error("Expected tensors to differ, but they are equal within epsilon")
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Asserts that a tensor contains finite values (no NaN or Infinity).
|
|
86
|
+
*/
|
|
87
|
+
export const expectFinite = (tensor: Tensor2D): void => {
|
|
88
|
+
for (let i = 0; i < tensor.data.length; i++) {
|
|
89
|
+
const v = tensor.data[i]
|
|
90
|
+
if (!Number.isFinite(v)) {
|
|
91
|
+
const row = Math.floor(i / tensor.cols)
|
|
92
|
+
const col = i % tensor.cols
|
|
93
|
+
throw new Error(`Non-finite value at [${row}, ${col}]: ${v}`)
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for Tier 2 test utilities.
|
|
3
|
+
*/
|
|
4
|
+
import { describe, expect, it } from "bun:test"
|
|
5
|
+
import * as T from "../../src/tensor/Tensor2D"
|
|
6
|
+
import {
|
|
7
|
+
expectShape,
|
|
8
|
+
expectClose,
|
|
9
|
+
expectAllClose,
|
|
10
|
+
expectNotClose,
|
|
11
|
+
expectFinite
|
|
12
|
+
} from "./support/tensorMatchers"
|
|
13
|
+
import { runEffect, runEffectFail } from "./support/runEffect"
|
|
14
|
+
import {
|
|
15
|
+
makeEmbeddings,
|
|
16
|
+
makeSelfAttention,
|
|
17
|
+
makeFeedForward,
|
|
18
|
+
makeLayerNorm,
|
|
19
|
+
makeTransformerBlock,
|
|
20
|
+
makeOutputProjection,
|
|
21
|
+
makeRng
|
|
22
|
+
} from "./support/factories"
|
|
23
|
+
import * as Effect from "effect/Effect"
|
|
24
|
+
import { ShapeError } from "../../src/tensor/ops"
|
|
25
|
+
import { EMBEDDING_DIM, HIDDEN_DIM } from "../../src/config"
|
|
26
|
+
|
|
27
|
+
describe("tensorMatchers", () => {
|
|
28
|
+
it("expectShape passes for correct shape", () => {
|
|
29
|
+
const t = T.zeros(3, 4)
|
|
30
|
+
expectShape(t, [3, 4])
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
it("expectShape fails for wrong shape", () => {
|
|
34
|
+
const t = T.zeros(3, 4)
|
|
35
|
+
expect(() => expectShape(t, [3, 5])).toThrow()
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
it("expectClose passes for identical tensors", () => {
|
|
39
|
+
const t1 = T.fromArray(2, 2, [1, 2, 3, 4])
|
|
40
|
+
const t2 = T.fromArray(2, 2, [1, 2, 3, 4])
|
|
41
|
+
expectClose(t1, t2)
|
|
42
|
+
})
|
|
43
|
+
|
|
44
|
+
it("expectClose passes within epsilon", () => {
|
|
45
|
+
const t1 = T.fromArray(2, 2, [1.0, 2.0, 3.0, 4.0])
|
|
46
|
+
const t2 = T.fromArray(2, 2, [1.000001, 2.000001, 3.000001, 4.000001])
|
|
47
|
+
expectClose(t1, t2)
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
it("expectClose fails beyond epsilon", () => {
|
|
51
|
+
const t1 = T.fromArray(2, 2, [1, 2, 3, 4])
|
|
52
|
+
const t2 = T.fromArray(2, 2, [1, 2, 3, 5])
|
|
53
|
+
expect(() => expectClose(t1, t2)).toThrow()
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
it("expectAllClose passes when all near value", () => {
|
|
57
|
+
const t = T.fromArray(2, 2, [1.0, 1.000005, 0.999995, 1.0])
|
|
58
|
+
expectAllClose(t, 1.0)
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
it("expectNotClose passes when tensors differ", () => {
|
|
62
|
+
const t1 = T.fromArray(2, 2, [1, 2, 3, 4])
|
|
63
|
+
const t2 = T.fromArray(2, 2, [1, 2, 3, 100])
|
|
64
|
+
expectNotClose(t1, t2)
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
it("expectNotClose fails when tensors are equal", () => {
|
|
68
|
+
const t1 = T.fromArray(2, 2, [1, 2, 3, 4])
|
|
69
|
+
const t2 = T.fromArray(2, 2, [1, 2, 3, 4])
|
|
70
|
+
expect(() => expectNotClose(t1, t2)).toThrow()
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
it("expectFinite passes for finite values", () => {
|
|
74
|
+
const t = T.fromArray(2, 2, [1, 2, 3, 4])
|
|
75
|
+
expectFinite(t)
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
it("expectFinite fails for NaN", () => {
|
|
79
|
+
const t = T.fromArray(2, 2, [1, NaN, 3, 4])
|
|
80
|
+
expect(() => expectFinite(t)).toThrow()
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
it("expectFinite fails for Infinity", () => {
|
|
84
|
+
const t = T.fromArray(2, 2, [1, Infinity, 3, 4])
|
|
85
|
+
expect(() => expectFinite(t)).toThrow()
|
|
86
|
+
})
|
|
87
|
+
})
|
|
88
|
+
|
|
89
|
+
describe("runEffect", () => {
|
|
90
|
+
it("returns value from successful effect", () => {
|
|
91
|
+
const effect = Effect.succeed(42)
|
|
92
|
+
expect(runEffect(effect)).toBe(42)
|
|
93
|
+
})
|
|
94
|
+
|
|
95
|
+
it("throws on failed effect", () => {
|
|
96
|
+
const effect = Effect.fail(new ShapeError("test error"))
|
|
97
|
+
expect(() => runEffect(effect)).toThrow()
|
|
98
|
+
})
|
|
99
|
+
|
|
100
|
+
it("runEffectFail returns the error", () => {
|
|
101
|
+
const effect = Effect.fail(new ShapeError("test error"))
|
|
102
|
+
const error = runEffectFail(effect)
|
|
103
|
+
expect(error).toBeInstanceOf(ShapeError)
|
|
104
|
+
expect(error.message).toBe("test error")
|
|
105
|
+
})
|
|
106
|
+
})
|
|
107
|
+
|
|
108
|
+
describe("factories", () => {
|
|
109
|
+
it("makeEmbeddings produces deterministic weights", () => {
|
|
110
|
+
const e1 = makeEmbeddings(100, { seed: 42 })
|
|
111
|
+
const e2 = makeEmbeddings(100, { seed: 42 })
|
|
112
|
+
expectClose(e1.tokenEmbeddings, e2.tokenEmbeddings)
|
|
113
|
+
expectClose(e1.positionalEmbeddings, e2.positionalEmbeddings)
|
|
114
|
+
})
|
|
115
|
+
|
|
116
|
+
it("makeEmbeddings with different seeds produces different weights", () => {
|
|
117
|
+
const e1 = makeEmbeddings(100, { seed: 42 })
|
|
118
|
+
const e2 = makeEmbeddings(100, { seed: 43 })
|
|
119
|
+
expectNotClose(e1.tokenEmbeddings, e2.tokenEmbeddings)
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
it("makeSelfAttention produces deterministic weights", () => {
|
|
123
|
+
const a1 = makeSelfAttention({ seed: 42 })
|
|
124
|
+
const a2 = makeSelfAttention({ seed: 42 })
|
|
125
|
+
expectClose(a1.wQ, a2.wQ)
|
|
126
|
+
expectClose(a1.wK, a2.wK)
|
|
127
|
+
expectClose(a1.wV, a2.wV)
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
it("makeFeedForward produces deterministic weights", () => {
|
|
131
|
+
const f1 = makeFeedForward({ seed: 42 })
|
|
132
|
+
const f2 = makeFeedForward({ seed: 42 })
|
|
133
|
+
expectClose(f1.w1, f2.w1)
|
|
134
|
+
expectClose(f1.w2, f2.w2)
|
|
135
|
+
})
|
|
136
|
+
|
|
137
|
+
it("makeLayerNorm initializes correctly", () => {
|
|
138
|
+
const ln = makeLayerNorm()
|
|
139
|
+
expectShape(ln.gamma, [1, EMBEDDING_DIM])
|
|
140
|
+
expectShape(ln.beta, [1, EMBEDDING_DIM])
|
|
141
|
+
expectAllClose(ln.gamma, 1.0)
|
|
142
|
+
expectAllClose(ln.beta, 0.0)
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
it("makeTransformerBlock produces deterministic weights", () => {
|
|
146
|
+
const tb1 = makeTransformerBlock({ seed: 42 })
|
|
147
|
+
const tb2 = makeTransformerBlock({ seed: 42 })
|
|
148
|
+
expectClose(tb1.attention.wQ, tb2.attention.wQ)
|
|
149
|
+
expectClose(tb1.feedForward.w1, tb2.feedForward.w1)
|
|
150
|
+
})
|
|
151
|
+
|
|
152
|
+
it("makeOutputProjection produces deterministic weights", () => {
|
|
153
|
+
const op1 = makeOutputProjection(100, { seed: 42 })
|
|
154
|
+
const op2 = makeOutputProjection(100, { seed: 42 })
|
|
155
|
+
expectClose(op1.wOut, op2.wOut)
|
|
156
|
+
})
|
|
157
|
+
|
|
158
|
+
it("makeRng produces deterministic sequences", () => {
|
|
159
|
+
const rng1 = makeRng(42)
|
|
160
|
+
const rng2 = makeRng(42)
|
|
161
|
+
const seq1 = Array.from({ length: 5 }, () => rng1.next())
|
|
162
|
+
const seq2 = Array.from({ length: 5 }, () => rng2.next())
|
|
163
|
+
expect(seq1).toEqual(seq2)
|
|
164
|
+
})
|
|
165
|
+
})
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test"
|
|
2
|
+
import * as Effect from "effect/Effect"
|
|
3
|
+
import * as Layer from "effect/Layer"
|
|
4
|
+
import { makeLLM } from "./support/factories"
|
|
5
|
+
import { expectNotClose } from "./support/tensorMatchers"
|
|
6
|
+
import { CANONICAL_SEED } from "./support/seed"
|
|
7
|
+
import {
|
|
8
|
+
train,
|
|
9
|
+
makeLLMLayer,
|
|
10
|
+
makeTrainingConfigLayer,
|
|
11
|
+
makePreprocessSettingsLayer
|
|
12
|
+
} from "../../src/training/train"
|
|
13
|
+
import { Embeddings } from "../../src/model/Embeddings"
|
|
14
|
+
import { TransformerBlock } from "../../src/model/TransformerBlock"
|
|
15
|
+
import { OutputProjection } from "../../src/model/OutputProjection"
|
|
16
|
+
import * as T from "../../src/tensor/Tensor2D"
|
|
17
|
+
import { SilentLoggerLive, Logger } from "../../src/services/Logger"
|
|
18
|
+
import { NoOpMetricsLive } from "../../src/services/Metrics"
|
|
19
|
+
import { TestServicesLayer as BaseTestServicesLayer } from "./support/stubs"
|
|
20
|
+
|
|
21
|
+
const TestServicesLayer = Layer.mergeAll(
|
|
22
|
+
BaseTestServicesLayer,
|
|
23
|
+
makePreprocessSettingsLayer({ concurrency: "unbounded", batchSize: 1 })
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
describe("Train Loop", () => {
|
|
27
|
+
const tinyVocab = ["hello", "world", "is", "this", "test", "</s>"]
|
|
28
|
+
const tinyCorpus = ["hello world </s>", "this is </s>", "test world </s>"]
|
|
29
|
+
|
|
30
|
+
const createTinyLLM = (seed: number = CANONICAL_SEED) =>
|
|
31
|
+
makeLLM({ seed, vocabWords: tinyVocab, numTransformerBlocks: 1 })
|
|
32
|
+
|
|
33
|
+
test("training mutates embeddings weights", () => {
|
|
34
|
+
const llm = createTinyLLM()
|
|
35
|
+
const embeddings = llm.network[0] as Embeddings
|
|
36
|
+
const tokenEmbeddingsBefore = T.clone(embeddings.tokenEmbeddings)
|
|
37
|
+
|
|
38
|
+
Effect.runSync(
|
|
39
|
+
train(tinyCorpus).pipe(
|
|
40
|
+
Effect.provide(makeLLMLayer(llm)),
|
|
41
|
+
Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.01 })),
|
|
42
|
+
Effect.provide(TestServicesLayer)
|
|
43
|
+
)
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
expectNotClose(embeddings.tokenEmbeddings, tokenEmbeddingsBefore)
|
|
47
|
+
})
|
|
48
|
+
|
|
49
|
+
test("training mutates transformer weights", () => {
|
|
50
|
+
const llm = createTinyLLM()
|
|
51
|
+
const transformer = llm.network[1] as TransformerBlock
|
|
52
|
+
const w1Before = T.clone(transformer.feedForward.w1)
|
|
53
|
+
const wQBefore = T.clone(transformer.attention.wQ)
|
|
54
|
+
|
|
55
|
+
Effect.runSync(
|
|
56
|
+
train(tinyCorpus).pipe(
|
|
57
|
+
Effect.provide(makeLLMLayer(llm)),
|
|
58
|
+
Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.01 })),
|
|
59
|
+
Effect.provide(TestServicesLayer)
|
|
60
|
+
)
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
expectNotClose(transformer.feedForward.w1, w1Before)
|
|
64
|
+
expectNotClose(transformer.attention.wQ, wQBefore)
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
test("training mutates output projection weights", () => {
|
|
68
|
+
const llm = createTinyLLM()
|
|
69
|
+
const output = llm.network[2] as OutputProjection
|
|
70
|
+
const wOutBefore = T.clone(output.wOut)
|
|
71
|
+
|
|
72
|
+
Effect.runSync(
|
|
73
|
+
train(tinyCorpus).pipe(
|
|
74
|
+
Effect.provide(makeLLMLayer(llm)),
|
|
75
|
+
Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.01 })),
|
|
76
|
+
Effect.provide(TestServicesLayer)
|
|
77
|
+
)
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
expectNotClose(output.wOut, wOutBefore)
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
test("loss decreases over epochs", async () => {
|
|
84
|
+
const llm = createTinyLLM()
|
|
85
|
+
const losses: Array<number> = []
|
|
86
|
+
|
|
87
|
+
const makeCaptureLossLogger = () => {
|
|
88
|
+
const service = {
|
|
89
|
+
log: (_level: any, _message: string, data?: Record<string, unknown>) => {
|
|
90
|
+
if (data?.loss !== undefined) {
|
|
91
|
+
losses.push(data.loss as number)
|
|
92
|
+
}
|
|
93
|
+
return Effect.void
|
|
94
|
+
},
|
|
95
|
+
debug: () => Effect.void,
|
|
96
|
+
info: (_message: string, data?: Record<string, unknown>) => {
|
|
97
|
+
if (data?.loss !== undefined) {
|
|
98
|
+
losses.push(data.loss as number)
|
|
99
|
+
}
|
|
100
|
+
return Effect.void
|
|
101
|
+
},
|
|
102
|
+
warn: () => Effect.void,
|
|
103
|
+
error: () => Effect.void
|
|
104
|
+
}
|
|
105
|
+
return Layer.succeed(Logger, service)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const program = train(tinyCorpus).pipe(
|
|
109
|
+
Effect.provide(makeLLMLayer(llm)),
|
|
110
|
+
Effect.provide(makeTrainingConfigLayer({ epochs: 3, learningRate: 0.01 })),
|
|
111
|
+
Effect.provide(makeCaptureLossLogger()),
|
|
112
|
+
Effect.provide(NoOpMetricsLive),
|
|
113
|
+
Effect.provide(makePreprocessSettingsLayer({ concurrency: "unbounded", batchSize: 1 }))
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
await Effect.runPromise(program)
|
|
117
|
+
|
|
118
|
+
expect(losses.length).toBe(3)
|
|
119
|
+
expect(losses[2]).toBeLessThan(losses[0]!)
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
test("seeded RNG produces deterministic training", () => {
|
|
123
|
+
const llm1 = createTinyLLM(CANONICAL_SEED)
|
|
124
|
+
const llm2 = createTinyLLM(CANONICAL_SEED)
|
|
125
|
+
|
|
126
|
+
const runOnce = (llm: ReturnType<typeof createTinyLLM>) =>
|
|
127
|
+
Effect.runSync(
|
|
128
|
+
train(tinyCorpus).pipe(
|
|
129
|
+
Effect.provide(makeLLMLayer(llm)),
|
|
130
|
+
Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.01 })),
|
|
131
|
+
Effect.provide(TestServicesLayer)
|
|
132
|
+
)
|
|
133
|
+
)
|
|
134
|
+
runOnce(llm1)
|
|
135
|
+
runOnce(llm2)
|
|
136
|
+
|
|
137
|
+
const embeddings1 = llm1.network[0] as Embeddings
|
|
138
|
+
const embeddings2 = llm2.network[0] as Embeddings
|
|
139
|
+
|
|
140
|
+
for (let i = 0; i < embeddings1.tokenEmbeddings.data.length; i++) {
|
|
141
|
+
expect(embeddings1.tokenEmbeddings.data[i]).toBe(embeddings2.tokenEmbeddings.data[i])
|
|
142
|
+
}
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
test("empty corpus does not crash", () => {
|
|
146
|
+
const llm = createTinyLLM()
|
|
147
|
+
expect(() =>
|
|
148
|
+
Effect.runSync(
|
|
149
|
+
train([]).pipe(
|
|
150
|
+
Effect.provide(makeLLMLayer(llm)),
|
|
151
|
+
Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.01 })),
|
|
152
|
+
Effect.provide(TestServicesLayer)
|
|
153
|
+
)
|
|
154
|
+
)
|
|
155
|
+
).not.toThrow()
|
|
156
|
+
})
|
|
157
|
+
|
|
158
|
+
test("single example corpus trains", () => {
|
|
159
|
+
const llm = createTinyLLM()
|
|
160
|
+
const embeddings = llm.network[0] as Embeddings
|
|
161
|
+
const before = T.clone(embeddings.tokenEmbeddings)
|
|
162
|
+
|
|
163
|
+
Effect.runSync(
|
|
164
|
+
train(["hello world </s>"]).pipe(
|
|
165
|
+
Effect.provide(makeLLMLayer(llm)),
|
|
166
|
+
Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.01 })),
|
|
167
|
+
Effect.provide(TestServicesLayer)
|
|
168
|
+
)
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
expectNotClose(embeddings.tokenEmbeddings, before)
|
|
172
|
+
})
|
|
173
|
+
|
|
174
|
+
test("multiple epochs further mutate weights", () => {
|
|
175
|
+
const llm = createTinyLLM()
|
|
176
|
+
const embeddings = llm.network[0] as Embeddings
|
|
177
|
+
|
|
178
|
+
Effect.runSync(
|
|
179
|
+
train(tinyCorpus).pipe(
|
|
180
|
+
Effect.provide(makeLLMLayer(llm)),
|
|
181
|
+
Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.01 })),
|
|
182
|
+
Effect.provide(TestServicesLayer)
|
|
183
|
+
)
|
|
184
|
+
)
|
|
185
|
+
const afterEpoch1 = T.clone(embeddings.tokenEmbeddings)
|
|
186
|
+
|
|
187
|
+
Effect.runSync(
|
|
188
|
+
train(tinyCorpus).pipe(
|
|
189
|
+
Effect.provide(makeLLMLayer(llm)),
|
|
190
|
+
Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.01 })),
|
|
191
|
+
Effect.provide(TestServicesLayer)
|
|
192
|
+
)
|
|
193
|
+
)
|
|
194
|
+
expectNotClose(embeddings.tokenEmbeddings, afterEpoch1)
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
test("training with higher learning rate causes larger weight changes", () => {
|
|
198
|
+
const llm1 = createTinyLLM(CANONICAL_SEED)
|
|
199
|
+
const llm2 = createTinyLLM(CANONICAL_SEED)
|
|
200
|
+
|
|
201
|
+
const embeddings1 = llm1.network[0] as Embeddings
|
|
202
|
+
const embeddings2 = llm2.network[0] as Embeddings
|
|
203
|
+
const initial = T.clone(embeddings1.tokenEmbeddings)
|
|
204
|
+
|
|
205
|
+
Effect.runSync(
|
|
206
|
+
train(tinyCorpus).pipe(
|
|
207
|
+
Effect.provide(makeLLMLayer(llm1)),
|
|
208
|
+
Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.001 })),
|
|
209
|
+
Effect.provide(TestServicesLayer)
|
|
210
|
+
)
|
|
211
|
+
)
|
|
212
|
+
Effect.runSync(
|
|
213
|
+
train(tinyCorpus).pipe(
|
|
214
|
+
Effect.provide(makeLLMLayer(llm2)),
|
|
215
|
+
Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.1 })),
|
|
216
|
+
Effect.provide(TestServicesLayer)
|
|
217
|
+
)
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
let diff1 = 0
|
|
221
|
+
let diff2 = 0
|
|
222
|
+
for (let i = 0; i < initial.data.length; i++) {
|
|
223
|
+
diff1 += Math.abs(embeddings1.tokenEmbeddings.data[i]! - initial.data[i]!)
|
|
224
|
+
diff2 += Math.abs(embeddings2.tokenEmbeddings.data[i]! - initial.data[i]!)
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
expect(diff2).toBeGreaterThan(diff1)
|
|
228
|
+
})
|
|
229
|
+
})
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test"
|
|
2
|
+
import { runEffect } from "./support/runEffect"
|
|
3
|
+
import { expectShape, expectNotClose, expectFinite } from "./support/tensorMatchers"
|
|
4
|
+
import { makeTransformerBlock } from "./support/factories"
|
|
5
|
+
import * as T from "../../src/tensor/Tensor2D"
|
|
6
|
+
import { EMBEDDING_DIM, HIDDEN_DIM } from "../../src/config"
|
|
7
|
+
|
|
8
|
+
describe("TransformerBlock", () => {
|
|
9
|
+
test("forward shape preserved [1, EMBEDDING_DIM]", () => {
|
|
10
|
+
const block = makeTransformerBlock()
|
|
11
|
+
const input = T.ones(1, EMBEDDING_DIM)
|
|
12
|
+
const output = runEffect(block.forward(input))
|
|
13
|
+
expectShape(output, [1, EMBEDDING_DIM])
|
|
14
|
+
})
|
|
15
|
+
|
|
16
|
+
test("forward shape preserved across sequence lengths", () => {
|
|
17
|
+
const block = makeTransformerBlock()
|
|
18
|
+
for (let seqLen = 1; seqLen <= 4; seqLen++) {
|
|
19
|
+
const input = T.ones(seqLen, EMBEDDING_DIM)
|
|
20
|
+
const output = runEffect(block.forward(input))
|
|
21
|
+
expectShape(output, [seqLen, EMBEDDING_DIM])
|
|
22
|
+
}
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
test("output contains finite values", () => {
|
|
26
|
+
const block = makeTransformerBlock()
|
|
27
|
+
const input = T.ones(2, EMBEDDING_DIM)
|
|
28
|
+
const output = runEffect(block.forward(input))
|
|
29
|
+
expectFinite(output)
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
test("backward shape matches input", () => {
|
|
33
|
+
const block = makeTransformerBlock()
|
|
34
|
+
const input = T.ones(2, EMBEDDING_DIM)
|
|
35
|
+
runEffect(block.forward(input))
|
|
36
|
+
const dOut = T.ones(2, EMBEDDING_DIM)
|
|
37
|
+
const grad = runEffect(block.backward(dOut, 0.01))
|
|
38
|
+
expectShape(grad, [2, EMBEDDING_DIM])
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
test("backward updates sub-layer weights", () => {
|
|
42
|
+
const block = makeTransformerBlock()
|
|
43
|
+
const input = T.zeros(2, EMBEDDING_DIM)
|
|
44
|
+
for (let i = 0; i < input.data.length; i++) {
|
|
45
|
+
input.data[i] = (i % 10) * 0.1
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const wQBefore = T.clone(block.attention.wQ)
|
|
49
|
+
const w1Before = T.clone(block.feedForward.w1)
|
|
50
|
+
const betaBefore = T.clone(block.norm1.beta)
|
|
51
|
+
|
|
52
|
+
runEffect(block.forward(input))
|
|
53
|
+
const dOut = T.ones(2, EMBEDDING_DIM)
|
|
54
|
+
runEffect(block.backward(dOut, 0.01))
|
|
55
|
+
|
|
56
|
+
expectNotClose(block.attention.wQ, wQBefore)
|
|
57
|
+
expectNotClose(block.feedForward.w1, w1Before)
|
|
58
|
+
expectNotClose(block.norm1.beta, betaBefore)
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
test("parametersCount equals sum of components", () => {
|
|
62
|
+
const block = makeTransformerBlock()
|
|
63
|
+
|
|
64
|
+
const attentionParams = 3 * EMBEDDING_DIM * EMBEDDING_DIM
|
|
65
|
+
const feedForwardParams =
|
|
66
|
+
EMBEDDING_DIM * HIDDEN_DIM + HIDDEN_DIM + HIDDEN_DIM * EMBEDDING_DIM + EMBEDDING_DIM
|
|
67
|
+
const normParams = 2 * (2 * EMBEDDING_DIM)
|
|
68
|
+
|
|
69
|
+
const expectedTotal = attentionParams + feedForwardParams + normParams
|
|
70
|
+
expect(block.parametersCount).toBe(expectedTotal)
|
|
71
|
+
})
|
|
72
|
+
})
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "ESNext",
|
|
5
|
+
"moduleResolution": "Bundler",
|
|
6
|
+
"lib": ["ES2022"],
|
|
7
|
+
"types": ["bun-types"],
|
|
8
|
+
"strict": true,
|
|
9
|
+
"esModuleInterop": true,
|
|
10
|
+
"skipLibCheck": true,
|
|
11
|
+
"forceConsistentCasingInFileNames": true,
|
|
12
|
+
"declaration": true,
|
|
13
|
+
"declarationMap": true,
|
|
14
|
+
"sourceMap": true,
|
|
15
|
+
"outDir": "dist",
|
|
16
|
+
"rootDir": "src"
|
|
17
|
+
},
|
|
18
|
+
"include": ["src"],
|
|
19
|
+
"exclude": ["node_modules", "dist"]
|
|
20
|
+
}
|