effect-gpt 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -0
- package/data/chat_training_data.json +55 -0
- package/data/pretraining_data.json +27 -0
- package/package.json +25 -0
- package/src/cli/errors.ts +51 -0
- package/src/cli/main.ts +163 -0
- package/src/config.ts +3 -0
- package/src/data/Dataset.ts +168 -0
- package/src/errors.ts +73 -0
- package/src/index.ts +88 -0
- package/src/model/Embeddings.ts +108 -0
- package/src/model/FeedForward.ts +121 -0
- package/src/model/LLM.ts +124 -0
- package/src/model/LayerNorm.ts +138 -0
- package/src/model/ModelLayer.ts +10 -0
- package/src/model/OutputProjection.ts +76 -0
- package/src/model/SelfAttention.ts +169 -0
- package/src/model/TransformerBlock.ts +53 -0
- package/src/services/Logger.ts +124 -0
- package/src/services/Metrics.ts +260 -0
- package/src/services/Random.ts +98 -0
- package/src/services/SeedLayer.ts +39 -0
- package/src/services/index.ts +32 -0
- package/src/tensor/Tensor2D.ts +42 -0
- package/src/tensor/ops.ts +371 -0
- package/src/tensor/random.ts +32 -0
- package/src/tokenize/split.ts +27 -0
- package/src/tokenize/tokenize.ts +28 -0
- package/src/training/Adam.ts +61 -0
- package/src/training/clip.ts +16 -0
- package/src/training/loss.ts +35 -0
- package/src/training/train.ts +203 -0
- package/src/vocab/Vocab.ts +79 -0
- package/tests/fixtures/csv_bad.csv +2 -0
- package/tests/fixtures/csv_good.csv +3 -0
- package/tests/ts/cli_error_format.test.ts +26 -0
- package/tests/ts/dataset.test.ts +35 -0
- package/tests/ts/embeddings.test.ts +81 -0
- package/tests/ts/errors.test.ts +36 -0
- package/tests/ts/feed_forward.test.ts +74 -0
- package/tests/ts/initNormal.test.ts +41 -0
- package/tests/ts/layer_norm.test.ts +96 -0
- package/tests/ts/llm_parameters.test.ts +96 -0
- package/tests/ts/llm_predict.test.ts +98 -0
- package/tests/ts/llm_tokenize.test.ts +69 -0
- package/tests/ts/output_projection.test.ts +78 -0
- package/tests/ts/random.test.ts +44 -0
- package/tests/ts/self_attention.test.ts +63 -0
- package/tests/ts/support/factories.ts +126 -0
- package/tests/ts/support/runEffect.ts +29 -0
- package/tests/ts/support/seed.ts +12 -0
- package/tests/ts/support/stubs.ts +58 -0
- package/tests/ts/support/tensorMatchers.ts +96 -0
- package/tests/ts/support.test.ts +165 -0
- package/tests/ts/train_loop.test.ts +229 -0
- package/tests/ts/transformer_block.test.ts +72 -0
- package/tsconfig.json +20 -0
- package/tsconfig.test.json +8 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test"
|
|
2
|
+
import { LLM } from "../../src/model/LLM"
|
|
3
|
+
import { Vocab } from "../../src/vocab/Vocab"
|
|
4
|
+
import { EMBEDDING_DIM, HIDDEN_DIM, MAX_SEQ_LEN } from "../../src/config"
|
|
5
|
+
import { makeLLM } from "./support/factories"
|
|
6
|
+
import { seeded } from "../../src/tensor/random"
|
|
7
|
+
|
|
8
|
+
describe("LLM Parameters", () => {
|
|
9
|
+
const computeExpectedParams = (vocabSize: number, numTransformerBlocks: number): number => {
|
|
10
|
+
const embeddingsParams = vocabSize * EMBEDDING_DIM + MAX_SEQ_LEN * EMBEDDING_DIM
|
|
11
|
+
|
|
12
|
+
const selfAttentionParams = 3 * EMBEDDING_DIM * EMBEDDING_DIM
|
|
13
|
+
const feedForwardParams =
|
|
14
|
+
EMBEDDING_DIM * HIDDEN_DIM + HIDDEN_DIM + HIDDEN_DIM * EMBEDDING_DIM + EMBEDDING_DIM
|
|
15
|
+
const layerNormParams = 2 * (2 * EMBEDDING_DIM)
|
|
16
|
+
const transformerBlockParams = selfAttentionParams + feedForwardParams + layerNormParams
|
|
17
|
+
|
|
18
|
+
const outputProjectionParams = EMBEDDING_DIM * vocabSize + vocabSize
|
|
19
|
+
|
|
20
|
+
return embeddingsParams + numTransformerBlocks * transformerBlockParams + outputProjectionParams
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
test("default LLM parameter count matches formula", () => {
|
|
24
|
+
const llm = LLM.default(seeded(1))
|
|
25
|
+
const vocabSize = llm.vocab.words.length
|
|
26
|
+
const expected = computeExpectedParams(vocabSize, 1)
|
|
27
|
+
expect(llm.totalParameters()).toBe(expected)
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
test("make LLM parameter count matches formula (3 transformer blocks)", () => {
|
|
31
|
+
const vocab = Vocab.make(Vocab.defaultWords())
|
|
32
|
+
const llm = LLM.make(vocab, seeded(2))
|
|
33
|
+
const vocabSize = vocab.words.length
|
|
34
|
+
const expected = computeExpectedParams(vocabSize, 3)
|
|
35
|
+
expect(llm.totalParameters()).toBe(expected)
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
test("factory LLM with 1 block matches formula", () => {
|
|
39
|
+
const vocabWords = Vocab.defaultWords()
|
|
40
|
+
const llm = makeLLM({ vocabWords, numTransformerBlocks: 1 })
|
|
41
|
+
const expected = computeExpectedParams(vocabWords.length, 1)
|
|
42
|
+
expect(llm.totalParameters()).toBe(expected)
|
|
43
|
+
})
|
|
44
|
+
|
|
45
|
+
test("factory LLM with 2 blocks matches formula", () => {
|
|
46
|
+
const vocabWords = Vocab.defaultWords()
|
|
47
|
+
const llm = makeLLM({ vocabWords, numTransformerBlocks: 2 })
|
|
48
|
+
const expected = computeExpectedParams(vocabWords.length, 2)
|
|
49
|
+
expect(llm.totalParameters()).toBe(expected)
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
test("individual layer params sum to total", () => {
|
|
53
|
+
const llm = LLM.default(seeded(3))
|
|
54
|
+
const sumOfLayers = llm.network.reduce((sum, layer) => sum + layer.parametersCount, 0)
|
|
55
|
+
expect(llm.totalParameters()).toBe(sumOfLayers)
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
test("larger vocab increases parameter count", () => {
|
|
59
|
+
const smallVocab = ["a", "b", "</s>"]
|
|
60
|
+
const largeVocab = ["a", "b", "c", "d", "e", "f", "g", "h", "</s>"]
|
|
61
|
+
|
|
62
|
+
const smallLLM = makeLLM({ vocabWords: smallVocab, numTransformerBlocks: 1 })
|
|
63
|
+
const largeLLM = makeLLM({ vocabWords: largeVocab, numTransformerBlocks: 1 })
|
|
64
|
+
|
|
65
|
+
expect(largeLLM.totalParameters()).toBeGreaterThan(smallLLM.totalParameters())
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
test("more transformer blocks increases parameter count", () => {
|
|
69
|
+
const vocabWords = Vocab.defaultWords()
|
|
70
|
+
const llm1Block = makeLLM({ vocabWords, numTransformerBlocks: 1 })
|
|
71
|
+
const llm3Blocks = makeLLM({ vocabWords, numTransformerBlocks: 3 })
|
|
72
|
+
|
|
73
|
+
expect(llm3Blocks.totalParameters()).toBeGreaterThan(llm1Block.totalParameters())
|
|
74
|
+
})
|
|
75
|
+
|
|
76
|
+
test("parameter count formula components", () => {
|
|
77
|
+
const vocabSize = 6
|
|
78
|
+
const numBlocks = 1
|
|
79
|
+
|
|
80
|
+
const embeddingsParams = vocabSize * EMBEDDING_DIM + MAX_SEQ_LEN * EMBEDDING_DIM
|
|
81
|
+
expect(embeddingsParams).toBe(6 * 128 + 80 * 128)
|
|
82
|
+
|
|
83
|
+
const selfAttentionParams = 3 * EMBEDDING_DIM * EMBEDDING_DIM
|
|
84
|
+
expect(selfAttentionParams).toBe(3 * 128 * 128)
|
|
85
|
+
|
|
86
|
+
const feedForwardParams =
|
|
87
|
+
EMBEDDING_DIM * HIDDEN_DIM + HIDDEN_DIM + HIDDEN_DIM * EMBEDDING_DIM + EMBEDDING_DIM
|
|
88
|
+
expect(feedForwardParams).toBe(128 * 256 + 256 + 256 * 128 + 128)
|
|
89
|
+
|
|
90
|
+
const layerNormParams = 2 * (2 * EMBEDDING_DIM)
|
|
91
|
+
expect(layerNormParams).toBe(2 * (2 * 128))
|
|
92
|
+
|
|
93
|
+
const outputProjectionParams = EMBEDDING_DIM * vocabSize + vocabSize
|
|
94
|
+
expect(outputProjectionParams).toBe(128 * 6 + 6)
|
|
95
|
+
})
|
|
96
|
+
})
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test"
|
|
2
|
+
import * as Option from "effect/Option"
|
|
3
|
+
import { runEffect } from "./support/runEffect"
|
|
4
|
+
import { makeLLM, makeLLMWithNetwork, makeEmbeddings, makeTransformerBlock } from "./support/factories"
|
|
5
|
+
import { StubOutputProjection } from "./support/stubs"
|
|
6
|
+
import { Vocab } from "../../src/vocab/Vocab"
|
|
7
|
+
import { CANONICAL_SEED } from "./support/seed"
|
|
8
|
+
|
|
9
|
+
describe("LLM Predict", () => {
|
|
10
|
+
const vocabWords = Vocab.defaultWords()
|
|
11
|
+
const eosTokenId = vocabWords.indexOf("</s>")
|
|
12
|
+
|
|
13
|
+
test("predict stops at EOS token", () => {
|
|
14
|
+
const vocab = Vocab.make(vocabWords)
|
|
15
|
+
const stubOutput = new StubOutputProjection(vocabWords.length, eosTokenId, 2)
|
|
16
|
+
|
|
17
|
+
const llm = makeLLMWithNetwork({
|
|
18
|
+
vocabWords,
|
|
19
|
+
network: [
|
|
20
|
+
makeEmbeddings(vocabWords.length, { seed: CANONICAL_SEED }),
|
|
21
|
+
makeTransformerBlock({ seed: CANONICAL_SEED }),
|
|
22
|
+
stubOutput
|
|
23
|
+
]
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
const result = runEffect(llm.predict("hello"))
|
|
27
|
+
expect(result).toContain("</s>")
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
test("predict output token count respects EOS", () => {
|
|
31
|
+
const stubOutput = new StubOutputProjection(vocabWords.length, eosTokenId, 3)
|
|
32
|
+
|
|
33
|
+
const llm = makeLLMWithNetwork({
|
|
34
|
+
vocabWords,
|
|
35
|
+
network: [
|
|
36
|
+
makeEmbeddings(vocabWords.length, { seed: CANONICAL_SEED }),
|
|
37
|
+
makeTransformerBlock({ seed: CANONICAL_SEED }),
|
|
38
|
+
stubOutput
|
|
39
|
+
]
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
const tokens = runEffect(llm.forward("hello"))
|
|
43
|
+
expect(tokens.length).toBeLessThanOrEqual(3)
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
test("predict decodes tokens correctly", () => {
|
|
47
|
+
const stubOutput = new StubOutputProjection(vocabWords.length, eosTokenId, 1)
|
|
48
|
+
|
|
49
|
+
const llm = makeLLMWithNetwork({
|
|
50
|
+
vocabWords,
|
|
51
|
+
network: [
|
|
52
|
+
makeEmbeddings(vocabWords.length, { seed: CANONICAL_SEED }),
|
|
53
|
+
makeTransformerBlock({ seed: CANONICAL_SEED }),
|
|
54
|
+
stubOutput
|
|
55
|
+
]
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
const tokens = runEffect(llm.forward("hello"))
|
|
59
|
+
for (const tokenId of tokens) {
|
|
60
|
+
const decoded = llm.vocab.decode(tokenId)
|
|
61
|
+
expect(Option.isSome(decoded)).toBe(true)
|
|
62
|
+
}
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
test("predict with empty input returns empty string", () => {
|
|
66
|
+
const llm = makeLLM({ vocabWords })
|
|
67
|
+
const result = runEffect(llm.predict(""))
|
|
68
|
+
expect(result).toBe("")
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
test("forward with empty input returns empty tokens", () => {
|
|
72
|
+
const llm = makeLLM({ vocabWords })
|
|
73
|
+
const tokens = runEffect(llm.forward(""))
|
|
74
|
+
expect(tokens.length).toBe(0)
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
test("predict with seeded RNG is deterministic", () => {
|
|
78
|
+
const createLLM = () => {
|
|
79
|
+
const stubOutput = new StubOutputProjection(vocabWords.length, eosTokenId, 2)
|
|
80
|
+
return makeLLMWithNetwork({
|
|
81
|
+
vocabWords,
|
|
82
|
+
network: [
|
|
83
|
+
makeEmbeddings(vocabWords.length, { seed: CANONICAL_SEED }),
|
|
84
|
+
makeTransformerBlock({ seed: CANONICAL_SEED }),
|
|
85
|
+
stubOutput
|
|
86
|
+
]
|
|
87
|
+
})
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const llm1 = createLLM()
|
|
91
|
+
const llm2 = createLLM()
|
|
92
|
+
|
|
93
|
+
const result1 = runEffect(llm1.predict("hello"))
|
|
94
|
+
const result2 = runEffect(llm2.predict("hello"))
|
|
95
|
+
|
|
96
|
+
expect(result1).toBe(result2)
|
|
97
|
+
})
|
|
98
|
+
})
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test"
|
|
2
|
+
import { Vocab } from "../../src/vocab/Vocab"
|
|
3
|
+
import { tokenize } from "../../src/tokenize/tokenize"
|
|
4
|
+
import * as Option from "effect/Option"
|
|
5
|
+
|
|
6
|
+
describe("LLM Tokenize", () => {
|
|
7
|
+
const vocab = Vocab.make(Vocab.defaultWords())
|
|
8
|
+
|
|
9
|
+
test("encode single word", () => {
|
|
10
|
+
const tokens = tokenize("hello", vocab)
|
|
11
|
+
expect(tokens.length).toBe(1)
|
|
12
|
+
expect(tokens[0]).toBe(Option.getOrThrow(vocab.encode("hello")))
|
|
13
|
+
})
|
|
14
|
+
|
|
15
|
+
test("encode multiple words", () => {
|
|
16
|
+
const tokens = tokenize("hello world", vocab)
|
|
17
|
+
expect(tokens.length).toBe(2)
|
|
18
|
+
expect(tokens[0]).toBe(Option.getOrThrow(vocab.encode("hello")))
|
|
19
|
+
expect(tokens[1]).toBe(Option.getOrThrow(vocab.encode("world")))
|
|
20
|
+
})
|
|
21
|
+
|
|
22
|
+
test("</s> token encodes correctly", () => {
|
|
23
|
+
const eosId = vocab.encode("</s>")
|
|
24
|
+
expect(Option.isSome(eosId)).toBe(true)
|
|
25
|
+
const tokens = tokenize("</s>", vocab)
|
|
26
|
+
expect(tokens.length).toBe(1)
|
|
27
|
+
expect(tokens[0]).toBe(Option.getOrThrow(eosId))
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
test("decode returns original word", () => {
|
|
31
|
+
const tokens = tokenize("hello", vocab)
|
|
32
|
+
const decoded = vocab.decode(tokens[0]!)
|
|
33
|
+
expect(Option.isSome(decoded)).toBe(true)
|
|
34
|
+
expect(Option.getOrThrow(decoded)).toBe("hello")
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
test("roundtrip encode/decode for all default words", () => {
|
|
38
|
+
const words = Vocab.defaultWords()
|
|
39
|
+
for (const word of words) {
|
|
40
|
+
const tokens = tokenize(word, vocab)
|
|
41
|
+
expect(tokens.length).toBeGreaterThanOrEqual(1)
|
|
42
|
+
const decoded = vocab.decode(tokens[0]!)
|
|
43
|
+
expect(Option.isSome(decoded)).toBe(true)
|
|
44
|
+
}
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
test("sentence with </s> appended encodes correctly", () => {
|
|
48
|
+
const tokens = tokenize("hello world </s>", vocab)
|
|
49
|
+
expect(tokens.length).toBe(3)
|
|
50
|
+
const lastToken = tokens[tokens.length - 1]!
|
|
51
|
+
const eosId = Option.getOrThrow(vocab.encode("</s>"))
|
|
52
|
+
expect(lastToken).toBe(eosId)
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
test("empty string returns empty tokens", () => {
|
|
56
|
+
const tokens = tokenize("", vocab)
|
|
57
|
+
expect(tokens.length).toBe(0)
|
|
58
|
+
})
|
|
59
|
+
|
|
60
|
+
test("unknown word returns empty for that word", () => {
|
|
61
|
+
const tokens = tokenize("unknownxyz", vocab)
|
|
62
|
+
expect(tokens.length).toBe(0)
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
test("multiple spaces handled correctly", () => {
|
|
66
|
+
const tokens = tokenize("hello world", vocab)
|
|
67
|
+
expect(tokens.length).toBe(2)
|
|
68
|
+
})
|
|
69
|
+
})
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test"
|
|
2
|
+
import { runEffect } from "./support/runEffect"
|
|
3
|
+
import { expectShape, expectNotClose, expectFinite } from "./support/tensorMatchers"
|
|
4
|
+
import { makeOutputProjection } from "./support/factories"
|
|
5
|
+
import * as T from "../../src/tensor/Tensor2D"
|
|
6
|
+
import { EMBEDDING_DIM } from "../../src/config"
|
|
7
|
+
|
|
8
|
+
describe("OutputProjection", () => {
|
|
9
|
+
const vocabSize = 10
|
|
10
|
+
|
|
11
|
+
test("weight dimensions [EMBEDDING_DIM, vocabSize]", () => {
|
|
12
|
+
const proj = makeOutputProjection(vocabSize)
|
|
13
|
+
expectShape(proj.wOut, [EMBEDDING_DIM, vocabSize])
|
|
14
|
+
})
|
|
15
|
+
|
|
16
|
+
test("bias dimensions [1, vocabSize]", () => {
|
|
17
|
+
const proj = makeOutputProjection(vocabSize)
|
|
18
|
+
expectShape(proj.bOut, [1, vocabSize])
|
|
19
|
+
})
|
|
20
|
+
|
|
21
|
+
test("forward [seqLen, dim] → [seqLen, vocabSize]", () => {
|
|
22
|
+
const proj = makeOutputProjection(vocabSize)
|
|
23
|
+
const input = T.ones(3, EMBEDDING_DIM)
|
|
24
|
+
const output = runEffect(proj.forward(input))
|
|
25
|
+
expectShape(output, [3, vocabSize])
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
test("forward across sequence lengths", () => {
|
|
29
|
+
const proj = makeOutputProjection(vocabSize)
|
|
30
|
+
for (let seqLen = 1; seqLen <= 4; seqLen++) {
|
|
31
|
+
const input = T.ones(seqLen, EMBEDDING_DIM)
|
|
32
|
+
const output = runEffect(proj.forward(input))
|
|
33
|
+
expectShape(output, [seqLen, vocabSize])
|
|
34
|
+
}
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
test("output contains finite values", () => {
|
|
38
|
+
const proj = makeOutputProjection(vocabSize)
|
|
39
|
+
const input = T.ones(3, EMBEDDING_DIM)
|
|
40
|
+
const output = runEffect(proj.forward(input))
|
|
41
|
+
expectFinite(output)
|
|
42
|
+
})
|
|
43
|
+
|
|
44
|
+
test("backward gradient shape [seqLen, EMBEDDING_DIM]", () => {
|
|
45
|
+
const proj = makeOutputProjection(vocabSize)
|
|
46
|
+
const input = T.ones(3, EMBEDDING_DIM)
|
|
47
|
+
runEffect(proj.forward(input))
|
|
48
|
+
const dOut = T.ones(3, vocabSize)
|
|
49
|
+
const grad = runEffect(proj.backward(dOut, 0.01))
|
|
50
|
+
expectShape(grad, [3, EMBEDDING_DIM])
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
test("backward updates wOut", () => {
|
|
54
|
+
const proj = makeOutputProjection(vocabSize)
|
|
55
|
+
const wOutBefore = T.clone(proj.wOut)
|
|
56
|
+
const input = T.ones(3, EMBEDDING_DIM)
|
|
57
|
+
runEffect(proj.forward(input))
|
|
58
|
+
const dOut = T.ones(3, vocabSize)
|
|
59
|
+
runEffect(proj.backward(dOut, 0.01))
|
|
60
|
+
expectNotClose(proj.wOut, wOutBefore)
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
test("backward updates bOut", () => {
|
|
64
|
+
const proj = makeOutputProjection(vocabSize)
|
|
65
|
+
const bOutBefore = T.clone(proj.bOut)
|
|
66
|
+
const input = T.ones(3, EMBEDDING_DIM)
|
|
67
|
+
runEffect(proj.forward(input))
|
|
68
|
+
const dOut = T.ones(3, vocabSize)
|
|
69
|
+
runEffect(proj.backward(dOut, 0.01))
|
|
70
|
+
expectNotClose(proj.bOut, bOutBefore)
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
test("parametersCount", () => {
|
|
74
|
+
const proj = makeOutputProjection(vocabSize)
|
|
75
|
+
const expected = EMBEDDING_DIM * vocabSize + vocabSize
|
|
76
|
+
expect(proj.parametersCount).toBe(expected)
|
|
77
|
+
})
|
|
78
|
+
})
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { describe, expect, it } from "bun:test"
|
|
2
|
+
import { seeded } from "../../src/tensor/random"
|
|
3
|
+
import { testRng, CANONICAL_SEED } from "./support/seed"
|
|
4
|
+
|
|
5
|
+
describe("Rng", () => {
|
|
6
|
+
it("produces deterministic sequence with same seed", () => {
|
|
7
|
+
const rng1 = seeded(42)
|
|
8
|
+
const rng2 = seeded(42)
|
|
9
|
+
|
|
10
|
+
const seq1 = Array.from({ length: 10 }, () => rng1.next())
|
|
11
|
+
const seq2 = Array.from({ length: 10 }, () => rng2.next())
|
|
12
|
+
|
|
13
|
+
expect(seq1).toEqual(seq2)
|
|
14
|
+
})
|
|
15
|
+
|
|
16
|
+
it("produces different sequences with different seeds", () => {
|
|
17
|
+
const rng1 = seeded(42)
|
|
18
|
+
const rng2 = seeded(43)
|
|
19
|
+
|
|
20
|
+
const seq1 = Array.from({ length: 10 }, () => rng1.next())
|
|
21
|
+
const seq2 = Array.from({ length: 10 }, () => rng2.next())
|
|
22
|
+
|
|
23
|
+
expect(seq1).not.toEqual(seq2)
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
it("values are in [0, 1) range", () => {
|
|
27
|
+
const rng = testRng()
|
|
28
|
+
for (let i = 0; i < 1000; i++) {
|
|
29
|
+
const val = rng.next()
|
|
30
|
+
expect(val).toBeGreaterThanOrEqual(0)
|
|
31
|
+
expect(val).toBeLessThan(1)
|
|
32
|
+
}
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
it("testRng uses canonical seed", () => {
|
|
36
|
+
const rng1 = testRng()
|
|
37
|
+
const rng2 = seeded(CANONICAL_SEED)
|
|
38
|
+
|
|
39
|
+
const seq1 = Array.from({ length: 5 }, () => rng1.next())
|
|
40
|
+
const seq2 = Array.from({ length: 5 }, () => rng2.next())
|
|
41
|
+
|
|
42
|
+
expect(seq1).toEqual(seq2)
|
|
43
|
+
})
|
|
44
|
+
})
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test"
|
|
2
|
+
import { runEffect } from "./support/runEffect"
|
|
3
|
+
import { expectShape, expectNotClose, expectFinite } from "./support/tensorMatchers"
|
|
4
|
+
import { makeSelfAttention } from "./support/factories"
|
|
5
|
+
import * as T from "../../src/tensor/Tensor2D"
|
|
6
|
+
import { EMBEDDING_DIM } from "../../src/config"
|
|
7
|
+
|
|
8
|
+
describe("SelfAttention", () => {
|
|
9
|
+
test("forward shape matches input", () => {
|
|
10
|
+
const attention = makeSelfAttention()
|
|
11
|
+
const input = T.ones(3, EMBEDDING_DIM)
|
|
12
|
+
const output = runEffect(attention.forward(input))
|
|
13
|
+
expectShape(output, [3, EMBEDDING_DIM])
|
|
14
|
+
})
|
|
15
|
+
|
|
16
|
+
test("shape across sequence lengths 1..5", () => {
|
|
17
|
+
const attention = makeSelfAttention()
|
|
18
|
+
for (let seqLen = 1; seqLen <= 4; seqLen++) {
|
|
19
|
+
const input = T.ones(seqLen, EMBEDDING_DIM)
|
|
20
|
+
const output = runEffect(attention.forward(input))
|
|
21
|
+
expectShape(output, [seqLen, EMBEDDING_DIM])
|
|
22
|
+
}
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
test("output contains finite values", () => {
|
|
26
|
+
const attention = makeSelfAttention()
|
|
27
|
+
const input = T.ones(3, EMBEDDING_DIM)
|
|
28
|
+
const output = runEffect(attention.forward(input))
|
|
29
|
+
expectFinite(output)
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
test("backward returns correct shape", () => {
|
|
33
|
+
const attention = makeSelfAttention()
|
|
34
|
+
const seqLen = 3
|
|
35
|
+
const input = T.ones(seqLen, EMBEDDING_DIM)
|
|
36
|
+
runEffect(attention.forward(input))
|
|
37
|
+
const gradOut = T.ones(seqLen, EMBEDDING_DIM)
|
|
38
|
+
const grad = runEffect(attention.backward(gradOut, 0.01))
|
|
39
|
+
expectShape(grad, [seqLen, EMBEDDING_DIM])
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
test("backward updates wQ/wK/wV weights", () => {
|
|
43
|
+
const attention = makeSelfAttention()
|
|
44
|
+
const wQBefore = T.clone(attention.wQ)
|
|
45
|
+
const wKBefore = T.clone(attention.wK)
|
|
46
|
+
const wVBefore = T.clone(attention.wV)
|
|
47
|
+
|
|
48
|
+
const input = T.ones(3, EMBEDDING_DIM)
|
|
49
|
+
runEffect(attention.forward(input))
|
|
50
|
+
const gradOut = T.ones(3, EMBEDDING_DIM)
|
|
51
|
+
runEffect(attention.backward(gradOut, 0.01))
|
|
52
|
+
|
|
53
|
+
expectNotClose(attention.wQ, wQBefore)
|
|
54
|
+
expectNotClose(attention.wK, wKBefore)
|
|
55
|
+
expectNotClose(attention.wV, wVBefore)
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
test("parametersCount", () => {
|
|
59
|
+
const attention = makeSelfAttention()
|
|
60
|
+
const expectedCount = 3 * EMBEDDING_DIM * EMBEDDING_DIM
|
|
61
|
+
expect(attention.parametersCount).toBe(expectedCount)
|
|
62
|
+
})
|
|
63
|
+
})
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Seeded model factories for reproducible tests.
|
|
3
|
+
* All factories accept optional seed to produce deterministic weights.
|
|
4
|
+
*/
|
|
5
|
+
import { seeded, type Rng } from "../../../src/tensor/random"
|
|
6
|
+
import { CANONICAL_SEED } from "./seed"
|
|
7
|
+
import { EMBEDDING_DIM, HIDDEN_DIM, MAX_SEQ_LEN } from "../../../src/config"
|
|
8
|
+
import { Embeddings } from "../../../src/model/Embeddings"
|
|
9
|
+
import { SelfAttention } from "../../../src/model/SelfAttention"
|
|
10
|
+
import { FeedForward } from "../../../src/model/FeedForward"
|
|
11
|
+
import { LayerNorm } from "../../../src/model/LayerNorm"
|
|
12
|
+
import { TransformerBlock } from "../../../src/model/TransformerBlock"
|
|
13
|
+
import { OutputProjection } from "../../../src/model/OutputProjection"
|
|
14
|
+
import { LLM } from "../../../src/model/LLM"
|
|
15
|
+
import { Vocab } from "../../../src/vocab/Vocab"
|
|
16
|
+
import type { ModelLayer } from "../../../src/model/ModelLayer"
|
|
17
|
+
|
|
18
|
+
interface EmbeddingsOptions {
|
|
19
|
+
seed?: number
|
|
20
|
+
embeddingDim?: number
|
|
21
|
+
maxSeqLen?: number
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export const makeEmbeddings = (vocabSize: number, options: EmbeddingsOptions = {}): Embeddings => {
|
|
25
|
+
const { seed = CANONICAL_SEED, embeddingDim = EMBEDDING_DIM, maxSeqLen = MAX_SEQ_LEN } = options
|
|
26
|
+
const rng = seeded(seed)
|
|
27
|
+
return new Embeddings(vocabSize, embeddingDim, maxSeqLen, rng)
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
interface SelfAttentionOptions {
|
|
31
|
+
seed?: number
|
|
32
|
+
embeddingDim?: number
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export const makeSelfAttention = (options: SelfAttentionOptions = {}): SelfAttention => {
|
|
36
|
+
const { seed = CANONICAL_SEED, embeddingDim = EMBEDDING_DIM } = options
|
|
37
|
+
const rng = seeded(seed)
|
|
38
|
+
return new SelfAttention(embeddingDim, rng)
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
interface FeedForwardOptions {
|
|
42
|
+
seed?: number
|
|
43
|
+
embeddingDim?: number
|
|
44
|
+
hiddenDim?: number
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export const makeFeedForward = (options: FeedForwardOptions = {}): FeedForward => {
|
|
48
|
+
const { seed = CANONICAL_SEED, embeddingDim = EMBEDDING_DIM, hiddenDim = HIDDEN_DIM } = options
|
|
49
|
+
const rng = seeded(seed)
|
|
50
|
+
return new FeedForward(embeddingDim, hiddenDim, rng)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
interface LayerNormOptions {
|
|
54
|
+
embeddingDim?: number
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export const makeLayerNorm = (options: LayerNormOptions = {}): LayerNorm => {
|
|
58
|
+
const { embeddingDim = EMBEDDING_DIM } = options
|
|
59
|
+
return new LayerNorm(embeddingDim)
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
interface TransformerBlockOptions {
|
|
63
|
+
seed?: number
|
|
64
|
+
embeddingDim?: number
|
|
65
|
+
hiddenDim?: number
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export const makeTransformerBlock = (options: TransformerBlockOptions = {}): TransformerBlock => {
|
|
69
|
+
const { seed = CANONICAL_SEED, embeddingDim = EMBEDDING_DIM, hiddenDim = HIDDEN_DIM } = options
|
|
70
|
+
const rng = seeded(seed)
|
|
71
|
+
return new TransformerBlock(embeddingDim, hiddenDim, rng)
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
interface OutputProjectionOptions {
|
|
75
|
+
seed?: number
|
|
76
|
+
embeddingDim?: number
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export const makeOutputProjection = (
|
|
80
|
+
vocabSize: number,
|
|
81
|
+
options: OutputProjectionOptions = {}
|
|
82
|
+
): OutputProjection => {
|
|
83
|
+
const { seed = CANONICAL_SEED, embeddingDim = EMBEDDING_DIM } = options
|
|
84
|
+
const rng = seeded(seed)
|
|
85
|
+
return new OutputProjection(embeddingDim, vocabSize, rng)
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Creates a fresh RNG instance for custom use in tests.
|
|
90
|
+
*/
|
|
91
|
+
export const makeRng = (seed: number = CANONICAL_SEED): Rng => seeded(seed)
|
|
92
|
+
|
|
93
|
+
interface LLMOptions {
|
|
94
|
+
seed?: number
|
|
95
|
+
vocabWords?: ReadonlyArray<string>
|
|
96
|
+
numTransformerBlocks?: number
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export const makeLLM = (options: LLMOptions = {}): LLM => {
|
|
100
|
+
const {
|
|
101
|
+
seed = CANONICAL_SEED,
|
|
102
|
+
vocabWords = Vocab.defaultWords(),
|
|
103
|
+
numTransformerBlocks = 1
|
|
104
|
+
} = options
|
|
105
|
+
const vocab = Vocab.make(vocabWords)
|
|
106
|
+
const rng = seeded(seed)
|
|
107
|
+
const network: Array<ModelLayer> = [new Embeddings(vocab.words.length, EMBEDDING_DIM, MAX_SEQ_LEN, rng)]
|
|
108
|
+
|
|
109
|
+
for (let i = 0; i < numTransformerBlocks; i++) {
|
|
110
|
+
network.push(new TransformerBlock(EMBEDDING_DIM, HIDDEN_DIM, rng))
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
network.push(new OutputProjection(EMBEDDING_DIM, vocab.words.length, rng))
|
|
114
|
+
return new LLM(vocab, network)
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
interface LLMWithCustomNetworkOptions {
|
|
118
|
+
vocabWords?: ReadonlyArray<string>
|
|
119
|
+
network: ReadonlyArray<ModelLayer>
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
export const makeLLMWithNetwork = (options: LLMWithCustomNetworkOptions): LLM => {
|
|
123
|
+
const { vocabWords = Vocab.defaultWords(), network } = options
|
|
124
|
+
const vocab = Vocab.make(vocabWords)
|
|
125
|
+
return new LLM(vocab, network)
|
|
126
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Helpers for running Effect programs in Bun tests.
|
|
3
|
+
*/
|
|
4
|
+
import * as Effect from "effect/Effect"
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Runs an Effect synchronously and returns the result.
|
|
8
|
+
* Throws if the effect fails or requires async execution.
|
|
9
|
+
* Use for pure, synchronous effects in tests.
|
|
10
|
+
*/
|
|
11
|
+
export const runEffect = <A, E>(effect: Effect.Effect<A, E>): A => Effect.runSync(effect)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Runs an Effect and expects it to fail with a specific error type.
|
|
17
|
+
* Returns the error for further assertions.
|
|
18
|
+
*/
|
|
19
|
+
export const runEffectFail = <A, E>(effect: Effect.Effect<A, E>): E => {
|
|
20
|
+
const result = Effect.runSyncExit(effect)
|
|
21
|
+
if (result._tag === "Failure") {
|
|
22
|
+
const cause = result.cause
|
|
23
|
+
if (cause._tag === "Fail") {
|
|
24
|
+
return cause.error
|
|
25
|
+
}
|
|
26
|
+
throw new Error(`Effect failed with unexpected cause: ${cause._tag}`)
|
|
27
|
+
}
|
|
28
|
+
throw new Error("Expected Effect to fail, but it succeeded")
|
|
29
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test helper to create seeded RNG with canonical test seed.
|
|
3
|
+
*/
|
|
4
|
+
import { seeded, type Rng } from "../../../src/tensor/random"
|
|
5
|
+
|
|
6
|
+
/** Canonical seed for deterministic tests (matches Rust convention). */
|
|
7
|
+
export const CANONICAL_SEED = 1337
|
|
8
|
+
|
|
9
|
+
/** Creates a seeded RNG with the canonical test seed. */
|
|
10
|
+
export const testRng = (): Rng => seeded(CANONICAL_SEED)
|
|
11
|
+
|
|
12
|
+
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stub implementations for integration testing.
|
|
3
|
+
*/
|
|
4
|
+
import * as Effect from "effect/Effect"
|
|
5
|
+
import * as Layer from "effect/Layer"
|
|
6
|
+
import type { Tensor2D } from "../../../src/tensor/Tensor2D"
|
|
7
|
+
import * as T from "../../../src/tensor/Tensor2D"
|
|
8
|
+
import type { ShapeError } from "../../../src/tensor/ops"
|
|
9
|
+
import type { ModelLayer } from "../../../src/model/ModelLayer"
|
|
10
|
+
import { SilentLoggerLive } from "../../../src/services/Logger"
|
|
11
|
+
import { NoOpMetricsLive } from "../../../src/services/Metrics"
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Stub OutputProjection that forces EOS token after N forward calls.
|
|
15
|
+
* Used to test prediction loop termination.
|
|
16
|
+
*/
|
|
17
|
+
export class StubOutputProjection implements ModelLayer {
|
|
18
|
+
readonly _tag = "OutputProjection" as const
|
|
19
|
+
private callCount = 0
|
|
20
|
+
private readonly eosAfter: number
|
|
21
|
+
private readonly eosTokenId: number
|
|
22
|
+
private readonly vocabSize: number
|
|
23
|
+
|
|
24
|
+
constructor(vocabSize: number, eosTokenId: number, eosAfter: number) {
|
|
25
|
+
this.vocabSize = vocabSize
|
|
26
|
+
this.eosTokenId = eosTokenId
|
|
27
|
+
this.eosAfter = eosAfter
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
get parametersCount(): number {
|
|
31
|
+
return 0
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
forward(input: Tensor2D): Effect.Effect<Tensor2D, ShapeError> {
|
|
35
|
+
return Effect.sync(() => {
|
|
36
|
+
this.callCount++
|
|
37
|
+
const logits = T.zeros(input.rows, this.vocabSize)
|
|
38
|
+
const targetToken = this.callCount >= this.eosAfter ? this.eosTokenId : 0
|
|
39
|
+
const lastRowStart = (input.rows - 1) * this.vocabSize
|
|
40
|
+
logits.data[lastRowStart + targetToken] = 100.0
|
|
41
|
+
return logits
|
|
42
|
+
})
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
backward(_dOut: Tensor2D, _lr: number): Effect.Effect<Tensor2D, ShapeError> {
|
|
46
|
+
return Effect.succeed(T.zeros(1, 1))
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
resetCallCount(): void {
|
|
50
|
+
this.callCount = 0
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Shared test services layer with silent logging and no-op metrics.
|
|
56
|
+
* Use this in tests to suppress log output and avoid metrics overhead.
|
|
57
|
+
*/
|
|
58
|
+
export const TestServicesLayer = Layer.mergeAll(SilentLoggerLive, NoOpMetricsLive)
|