effect-gpt 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -0
- package/data/chat_training_data.json +55 -0
- package/data/pretraining_data.json +27 -0
- package/package.json +25 -0
- package/src/cli/errors.ts +51 -0
- package/src/cli/main.ts +163 -0
- package/src/config.ts +3 -0
- package/src/data/Dataset.ts +168 -0
- package/src/errors.ts +73 -0
- package/src/index.ts +88 -0
- package/src/model/Embeddings.ts +108 -0
- package/src/model/FeedForward.ts +121 -0
- package/src/model/LLM.ts +124 -0
- package/src/model/LayerNorm.ts +138 -0
- package/src/model/ModelLayer.ts +10 -0
- package/src/model/OutputProjection.ts +76 -0
- package/src/model/SelfAttention.ts +169 -0
- package/src/model/TransformerBlock.ts +53 -0
- package/src/services/Logger.ts +124 -0
- package/src/services/Metrics.ts +260 -0
- package/src/services/Random.ts +98 -0
- package/src/services/SeedLayer.ts +39 -0
- package/src/services/index.ts +32 -0
- package/src/tensor/Tensor2D.ts +42 -0
- package/src/tensor/ops.ts +371 -0
- package/src/tensor/random.ts +32 -0
- package/src/tokenize/split.ts +27 -0
- package/src/tokenize/tokenize.ts +28 -0
- package/src/training/Adam.ts +61 -0
- package/src/training/clip.ts +16 -0
- package/src/training/loss.ts +35 -0
- package/src/training/train.ts +203 -0
- package/src/vocab/Vocab.ts +79 -0
- package/tests/fixtures/csv_bad.csv +2 -0
- package/tests/fixtures/csv_good.csv +3 -0
- package/tests/ts/cli_error_format.test.ts +26 -0
- package/tests/ts/dataset.test.ts +35 -0
- package/tests/ts/embeddings.test.ts +81 -0
- package/tests/ts/errors.test.ts +36 -0
- package/tests/ts/feed_forward.test.ts +74 -0
- package/tests/ts/initNormal.test.ts +41 -0
- package/tests/ts/layer_norm.test.ts +96 -0
- package/tests/ts/llm_parameters.test.ts +96 -0
- package/tests/ts/llm_predict.test.ts +98 -0
- package/tests/ts/llm_tokenize.test.ts +69 -0
- package/tests/ts/output_projection.test.ts +78 -0
- package/tests/ts/random.test.ts +44 -0
- package/tests/ts/self_attention.test.ts +63 -0
- package/tests/ts/support/factories.ts +126 -0
- package/tests/ts/support/runEffect.ts +29 -0
- package/tests/ts/support/seed.ts +12 -0
- package/tests/ts/support/stubs.ts +58 -0
- package/tests/ts/support/tensorMatchers.ts +96 -0
- package/tests/ts/support.test.ts +165 -0
- package/tests/ts/train_loop.test.ts +229 -0
- package/tests/ts/transformer_block.test.ts +72 -0
- package/tsconfig.json +20 -0
- package/tsconfig.test.json +8 -0
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import * as Context from "effect/Context"
|
|
2
|
+
import * as Layer from "effect/Layer"
|
|
3
|
+
import * as Effect from "effect/Effect"
|
|
4
|
+
import { seeded, systemRng, type Rng } from "../tensor/random"
|
|
5
|
+
|
|
6
|
+
export interface SeedService {
|
|
7
|
+
/** Shared RNG stream used for deterministic initialization. */
|
|
8
|
+
readonly rng: Rng
|
|
9
|
+
/** Creates a new SeedService derived from the current stream (for isolation when desired). */
|
|
10
|
+
readonly fork: () => SeedService
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface SeedServiceId {
|
|
14
|
+
readonly SeedService: unique symbol
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export const Seed = Context.GenericTag<SeedServiceId, SeedService>("SeedService")
|
|
18
|
+
|
|
19
|
+
const makeSeedService = (seed?: number): SeedService => {
|
|
20
|
+
const rng = seed === undefined ? systemRng() : seeded(seed)
|
|
21
|
+
|
|
22
|
+
return {
|
|
23
|
+
rng,
|
|
24
|
+
fork: () => {
|
|
25
|
+
// Advance the stream to produce a derived seed; fall back to system entropy when non-deterministic.
|
|
26
|
+
const nextSeed = Math.floor(rng.next() * 0xffffffff)
|
|
27
|
+
return makeSeedService(seed === undefined ? undefined : nextSeed)
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Layer that provides a shared SeedService. Passing a seed yields deterministic initialization; omitting it uses
|
|
34
|
+
* nondeterministic Math.random.
|
|
35
|
+
*/
|
|
36
|
+
export const SeedLayer = (seed?: number): Layer.Layer<SeedServiceId> => Layer.succeed(Seed, makeSeedService(seed))
|
|
37
|
+
|
|
38
|
+
/** Effect helper to grab the current RNG from context. */
|
|
39
|
+
export const useSeedRng = (): Effect.Effect<Rng, never, SeedServiceId> => Effect.map(Seed, (service) => service.rng)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
export type { LogLevel, LoggerService, LoggerServiceId } from "./Logger"
|
|
2
|
+
export {
|
|
3
|
+
Logger,
|
|
4
|
+
ConsoleLoggerLive,
|
|
5
|
+
TerminalLoggerLive,
|
|
6
|
+
NullLoggerLive,
|
|
7
|
+
SilentLoggerLive,
|
|
8
|
+
PrettyLoggerLive,
|
|
9
|
+
log,
|
|
10
|
+
debug,
|
|
11
|
+
info,
|
|
12
|
+
warn,
|
|
13
|
+
error
|
|
14
|
+
} from "./Logger"
|
|
15
|
+
|
|
16
|
+
export type { RandomService, RandomServiceId } from "./Random"
|
|
17
|
+
export { Random, SeededRandomLive, SystemRandomLive, next, nextGaussian, nextInt, fork } from "./Random"
|
|
18
|
+
|
|
19
|
+
export type { SeedService, SeedServiceId } from "./SeedLayer"
|
|
20
|
+
export { Seed, SeedLayer, useSeedRng } from "./SeedLayer"
|
|
21
|
+
|
|
22
|
+
export type { Counter, Gauge, Histogram, TimingResult, MetricsService, MetricsSnapshot, MetricsServiceId } from "./Metrics"
|
|
23
|
+
export {
|
|
24
|
+
Metrics,
|
|
25
|
+
InMemoryMetricsLive,
|
|
26
|
+
NoOpMetricsLive,
|
|
27
|
+
counter,
|
|
28
|
+
gauge,
|
|
29
|
+
histogram,
|
|
30
|
+
timed,
|
|
31
|
+
snapshot
|
|
32
|
+
} from "./Metrics"
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
export interface Tensor2D {
|
|
2
|
+
readonly rows: number
|
|
3
|
+
readonly cols: number
|
|
4
|
+
readonly data: Float32Array
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export const make = (rows: number, cols: number, data: Float32Array): Tensor2D => {
|
|
8
|
+
if (data.length !== rows * cols) {
|
|
9
|
+
throw new Error(`Data length ${data.length} does not match shape ${rows}x${cols}`)
|
|
10
|
+
}
|
|
11
|
+
return { rows, cols, data }
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export const zeros = (rows: number, cols: number): Tensor2D => {
|
|
15
|
+
return { rows, cols, data: new Float32Array(rows * cols) }
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export const ones = (rows: number, cols: number): Tensor2D => {
|
|
19
|
+
const data = new Float32Array(rows * cols)
|
|
20
|
+
data.fill(1)
|
|
21
|
+
return { rows, cols, data }
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export const clone = (t: Tensor2D): Tensor2D => {
|
|
25
|
+
return { rows: t.rows, cols: t.cols, data: new Float32Array(t.data) }
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export const get = (t: Tensor2D, row: number, col: number): number => {
|
|
29
|
+
return t.data[row * t.cols + col]
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export const set = (t: Tensor2D, row: number, col: number, value: number): void => {
|
|
33
|
+
t.data[row * t.cols + col] = value
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export const fromArray = (rows: number, cols: number, arr: ArrayLike<number>): Tensor2D => {
|
|
37
|
+
const data = new Float32Array(arr)
|
|
38
|
+
if (data.length !== rows * cols) {
|
|
39
|
+
throw new Error(`Array length ${data.length} does not match shape ${rows}x${cols}`)
|
|
40
|
+
}
|
|
41
|
+
return { rows, cols, data }
|
|
42
|
+
}
|
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
import * as Effect from "effect/Effect"
|
|
2
|
+
import type { Tensor2D } from "./Tensor2D"
|
|
3
|
+
import * as T from "./Tensor2D"
|
|
4
|
+
import type { Rng } from "./random"
|
|
5
|
+
import type { RandomServiceId } from "../services/Random"
|
|
6
|
+
import { Random } from "../services/Random"
|
|
7
|
+
|
|
8
|
+
export class ShapeError extends Error {
|
|
9
|
+
readonly _tag = "ShapeError"
|
|
10
|
+
constructor(message: string) {
|
|
11
|
+
super(message)
|
|
12
|
+
this.name = "ShapeError"
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export const matMul = (a: Tensor2D, b: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
|
|
17
|
+
Effect.sync(() => {
|
|
18
|
+
if (a.cols !== b.rows) {
|
|
19
|
+
throw new ShapeError(`matMul: a.cols (${a.cols}) !== b.rows (${b.rows})`)
|
|
20
|
+
}
|
|
21
|
+
const result = T.zeros(a.rows, b.cols)
|
|
22
|
+
for (let i = 0; i < a.rows; i++) {
|
|
23
|
+
for (let j = 0; j < b.cols; j++) {
|
|
24
|
+
let sum = 0
|
|
25
|
+
for (let k = 0; k < a.cols; k++) {
|
|
26
|
+
sum += T.get(a, i, k) * T.get(b, k, j)
|
|
27
|
+
}
|
|
28
|
+
T.set(result, i, j, sum)
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
return result
|
|
32
|
+
}).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
|
|
33
|
+
|
|
34
|
+
export const add = (a: Tensor2D, b: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
|
|
35
|
+
Effect.sync(() => {
|
|
36
|
+
if (a.rows !== b.rows || a.cols !== b.cols) {
|
|
37
|
+
throw new ShapeError(`add: shapes (${a.rows},${a.cols}) and (${b.rows},${b.cols}) do not match`)
|
|
38
|
+
}
|
|
39
|
+
const data = new Float32Array(a.data.length)
|
|
40
|
+
for (let i = 0; i < data.length; i++) {
|
|
41
|
+
data[i] = a.data[i] + b.data[i]
|
|
42
|
+
}
|
|
43
|
+
return T.make(a.rows, a.cols, data)
|
|
44
|
+
}).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
|
|
45
|
+
|
|
46
|
+
export const sub = (a: Tensor2D, b: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
|
|
47
|
+
Effect.sync(() => {
|
|
48
|
+
if (a.rows !== b.rows || a.cols !== b.cols) {
|
|
49
|
+
throw new ShapeError(`sub: shapes (${a.rows},${a.cols}) and (${b.rows},${b.cols}) do not match`)
|
|
50
|
+
}
|
|
51
|
+
const data = new Float32Array(a.data.length)
|
|
52
|
+
for (let i = 0; i < data.length; i++) {
|
|
53
|
+
data[i] = a.data[i] - b.data[i]
|
|
54
|
+
}
|
|
55
|
+
return T.make(a.rows, a.cols, data)
|
|
56
|
+
}).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
|
|
57
|
+
|
|
58
|
+
export const mul = (a: Tensor2D, b: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
|
|
59
|
+
Effect.sync(() => {
|
|
60
|
+
if (a.rows !== b.rows || a.cols !== b.cols) {
|
|
61
|
+
throw new ShapeError(`mul: shapes (${a.rows},${a.cols}) and (${b.rows},${b.cols}) do not match`)
|
|
62
|
+
}
|
|
63
|
+
const data = new Float32Array(a.data.length)
|
|
64
|
+
for (let i = 0; i < data.length; i++) {
|
|
65
|
+
data[i] = a.data[i] * b.data[i]
|
|
66
|
+
}
|
|
67
|
+
return T.make(a.rows, a.cols, data)
|
|
68
|
+
}).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
|
|
69
|
+
|
|
70
|
+
export const div = (a: Tensor2D, b: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
|
|
71
|
+
Effect.sync(() => {
|
|
72
|
+
if (a.rows !== b.rows || a.cols !== b.cols) {
|
|
73
|
+
throw new ShapeError(`div: shapes (${a.rows},${a.cols}) and (${b.rows},${b.cols}) do not match`)
|
|
74
|
+
}
|
|
75
|
+
const data = new Float32Array(a.data.length)
|
|
76
|
+
for (let i = 0; i < data.length; i++) {
|
|
77
|
+
data[i] = a.data[i] / b.data[i]
|
|
78
|
+
}
|
|
79
|
+
return T.make(a.rows, a.cols, data)
|
|
80
|
+
}).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
|
|
81
|
+
|
|
82
|
+
export const addScalar = (t: Tensor2D, scalar: number): Tensor2D => {
|
|
83
|
+
const data = new Float32Array(t.data.length)
|
|
84
|
+
for (let i = 0; i < data.length; i++) {
|
|
85
|
+
data[i] = t.data[i] + scalar
|
|
86
|
+
}
|
|
87
|
+
return T.make(t.rows, t.cols, data)
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export const mulScalar = (t: Tensor2D, scalar: number): Tensor2D => {
|
|
91
|
+
const data = new Float32Array(t.data.length)
|
|
92
|
+
for (let i = 0; i < data.length; i++) {
|
|
93
|
+
data[i] = t.data[i] * scalar
|
|
94
|
+
}
|
|
95
|
+
return T.make(t.rows, t.cols, data)
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export const addRowBias = (matrix: Tensor2D, bias: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
|
|
99
|
+
Effect.sync(() => {
|
|
100
|
+
if (bias.rows !== 1 || bias.cols !== matrix.cols) {
|
|
101
|
+
throw new ShapeError(`addRowBias: bias shape (${bias.rows},${bias.cols}) incompatible with matrix cols ${matrix.cols}`)
|
|
102
|
+
}
|
|
103
|
+
const data = new Float32Array(matrix.data.length)
|
|
104
|
+
for (let i = 0; i < matrix.rows; i++) {
|
|
105
|
+
for (let j = 0; j < matrix.cols; j++) {
|
|
106
|
+
data[i * matrix.cols + j] = T.get(matrix, i, j) + bias.data[j]
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return T.make(matrix.rows, matrix.cols, data)
|
|
110
|
+
}).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
|
|
111
|
+
|
|
112
|
+
export const meanRows = (t: Tensor2D): Tensor2D => {
|
|
113
|
+
const data = new Float32Array(t.rows)
|
|
114
|
+
for (let i = 0; i < t.rows; i++) {
|
|
115
|
+
let sum = 0
|
|
116
|
+
for (let j = 0; j < t.cols; j++) {
|
|
117
|
+
sum += T.get(t, i, j)
|
|
118
|
+
}
|
|
119
|
+
data[i] = sum / t.cols
|
|
120
|
+
}
|
|
121
|
+
return T.make(t.rows, 1, data)
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export const sumCols = (t: Tensor2D): Tensor2D => {
|
|
125
|
+
const data = new Float32Array(t.cols)
|
|
126
|
+
for (let j = 0; j < t.cols; j++) {
|
|
127
|
+
let sum = 0
|
|
128
|
+
for (let i = 0; i < t.rows; i++) {
|
|
129
|
+
sum += T.get(t, i, j)
|
|
130
|
+
}
|
|
131
|
+
data[j] = sum
|
|
132
|
+
}
|
|
133
|
+
return T.make(1, t.cols, data)
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export const meanCols = (t: Tensor2D): Tensor2D => {
|
|
137
|
+
const data = new Float32Array(t.cols)
|
|
138
|
+
const scale = 1 / t.rows
|
|
139
|
+
for (let j = 0; j < t.cols; j++) {
|
|
140
|
+
let sum = 0
|
|
141
|
+
for (let i = 0; i < t.rows; i++) {
|
|
142
|
+
sum += T.get(t, i, j)
|
|
143
|
+
}
|
|
144
|
+
data[j] = sum * scale
|
|
145
|
+
}
|
|
146
|
+
return T.make(1, t.cols, data)
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
export const stdRows = (t: Tensor2D): Tensor2D => {
|
|
150
|
+
const means = meanRows(t)
|
|
151
|
+
const data = new Float32Array(t.rows)
|
|
152
|
+
for (let i = 0; i < t.rows; i++) {
|
|
153
|
+
const mean = means.data[i]
|
|
154
|
+
let sumSq = 0
|
|
155
|
+
for (let j = 0; j < t.cols; j++) {
|
|
156
|
+
const diff = T.get(t, i, j) - mean
|
|
157
|
+
sumSq += diff * diff
|
|
158
|
+
}
|
|
159
|
+
data[i] = Math.sqrt(sumSq / t.cols)
|
|
160
|
+
}
|
|
161
|
+
return T.make(t.rows, 1, data)
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export const varRows = (t: Tensor2D): Tensor2D => {
|
|
165
|
+
const means = meanRows(t)
|
|
166
|
+
const data = new Float32Array(t.rows)
|
|
167
|
+
for (let i = 0; i < t.rows; i++) {
|
|
168
|
+
const mean = means.data[i]
|
|
169
|
+
let sumSq = 0
|
|
170
|
+
for (let j = 0; j < t.cols; j++) {
|
|
171
|
+
const diff = T.get(t, i, j) - mean
|
|
172
|
+
sumSq += diff * diff
|
|
173
|
+
}
|
|
174
|
+
data[i] = sumSq / t.cols
|
|
175
|
+
}
|
|
176
|
+
return T.make(t.rows, 1, data)
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
export const mapScalar = (t: Tensor2D, fn: (val: number) => number): Tensor2D => {
|
|
180
|
+
const data = new Float32Array(t.data.length)
|
|
181
|
+
for (let i = 0; i < data.length; i++) {
|
|
182
|
+
data[i] = fn(t.data[i])
|
|
183
|
+
}
|
|
184
|
+
return T.make(t.rows, t.cols, data)
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
export const softmaxRows = (t: Tensor2D): Tensor2D => {
|
|
188
|
+
const data = new Float32Array(t.data.length)
|
|
189
|
+
for (let i = 0; i < t.rows; i++) {
|
|
190
|
+
let maxVal = -Infinity
|
|
191
|
+
for (let j = 0; j < t.cols; j++) {
|
|
192
|
+
const val = T.get(t, i, j)
|
|
193
|
+
if (val > maxVal) maxVal = val
|
|
194
|
+
}
|
|
195
|
+
let sumExp = 0
|
|
196
|
+
for (let j = 0; j < t.cols; j++) {
|
|
197
|
+
const exp = Math.exp(T.get(t, i, j) - maxVal)
|
|
198
|
+
data[i * t.cols + j] = exp
|
|
199
|
+
sumExp += exp
|
|
200
|
+
}
|
|
201
|
+
for (let j = 0; j < t.cols; j++) {
|
|
202
|
+
data[i * t.cols + j] /= sumExp
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
return T.make(t.rows, t.cols, data)
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
export const transpose = (t: Tensor2D): Tensor2D => {
|
|
209
|
+
const data = new Float32Array(t.rows * t.cols)
|
|
210
|
+
for (let i = 0; i < t.rows; i++) {
|
|
211
|
+
for (let j = 0; j < t.cols; j++) {
|
|
212
|
+
data[j * t.rows + i] = T.get(t, i, j)
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
return T.make(t.cols, t.rows, data)
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
export const gatherRows = (embeddings: Tensor2D, tokenIds: ReadonlyArray<number>): Effect.Effect<Tensor2D, ShapeError> =>
|
|
219
|
+
Effect.sync(() => {
|
|
220
|
+
const data = new Float32Array(tokenIds.length * embeddings.cols)
|
|
221
|
+
for (let i = 0; i < tokenIds.length; i++) {
|
|
222
|
+
const tokenId = tokenIds[i]
|
|
223
|
+
if (tokenId < 0 || tokenId >= embeddings.rows) {
|
|
224
|
+
throw new ShapeError(`gatherRows: tokenId ${tokenId} out of bounds [0, ${embeddings.rows})`)
|
|
225
|
+
}
|
|
226
|
+
for (let j = 0; j < embeddings.cols; j++) {
|
|
227
|
+
data[i * embeddings.cols + j] = T.get(embeddings, tokenId, j)
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
return T.make(tokenIds.length, embeddings.cols, data)
|
|
231
|
+
}).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
|
|
232
|
+
|
|
233
|
+
export const sliceRows = (t: Tensor2D, start: number, end: number): Effect.Effect<Tensor2D, ShapeError> =>
|
|
234
|
+
Effect.sync(() => {
|
|
235
|
+
if (start < 0 || end > t.rows || start >= end) {
|
|
236
|
+
throw new ShapeError(`sliceRows: invalid range [${start}, ${end}) for tensor with ${t.rows} rows`)
|
|
237
|
+
}
|
|
238
|
+
const numRows = end - start
|
|
239
|
+
const data = new Float32Array(numRows * t.cols)
|
|
240
|
+
for (let i = 0; i < numRows; i++) {
|
|
241
|
+
for (let j = 0; j < t.cols; j++) {
|
|
242
|
+
data[i * t.cols + j] = T.get(t, start + i, j)
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
return T.make(numRows, t.cols, data)
|
|
246
|
+
}).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
|
|
247
|
+
|
|
248
|
+
export const relu = (t: Tensor2D): Tensor2D => {
|
|
249
|
+
const data = new Float32Array(t.data.length)
|
|
250
|
+
for (let i = 0; i < data.length; i++) {
|
|
251
|
+
data[i] = Math.max(0, t.data[i])
|
|
252
|
+
}
|
|
253
|
+
return T.make(t.rows, t.cols, data)
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
export const argmaxRows = (t: Tensor2D): ReadonlyArray<number> => {
|
|
257
|
+
const result: Array<number> = []
|
|
258
|
+
for (let i = 0; i < t.rows; i++) {
|
|
259
|
+
let maxIdx = 0
|
|
260
|
+
let maxVal = T.get(t, i, 0)
|
|
261
|
+
for (let j = 1; j < t.cols; j++) {
|
|
262
|
+
const val = T.get(t, i, j)
|
|
263
|
+
if (val > maxVal) {
|
|
264
|
+
maxVal = val
|
|
265
|
+
maxIdx = j
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
result.push(maxIdx)
|
|
269
|
+
}
|
|
270
|
+
return result
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
export const broadcastSubCol = (t: Tensor2D, col: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
|
|
274
|
+
Effect.sync(() => {
|
|
275
|
+
if (col.cols !== 1 || col.rows !== t.rows) {
|
|
276
|
+
throw new ShapeError(`broadcastSubCol: col shape (${col.rows},${col.cols}) incompatible with tensor rows ${t.rows}`)
|
|
277
|
+
}
|
|
278
|
+
const data = new Float32Array(t.data.length)
|
|
279
|
+
for (let i = 0; i < t.rows; i++) {
|
|
280
|
+
for (let j = 0; j < t.cols; j++) {
|
|
281
|
+
data[i * t.cols + j] = T.get(t, i, j) - col.data[i]
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
return T.make(t.rows, t.cols, data)
|
|
285
|
+
}).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
|
|
286
|
+
|
|
287
|
+
export const broadcastDivCol = (t: Tensor2D, col: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
|
|
288
|
+
Effect.sync(() => {
|
|
289
|
+
if (col.cols !== 1 || col.rows !== t.rows) {
|
|
290
|
+
throw new ShapeError(`broadcastDivCol: col shape (${col.rows},${col.cols}) incompatible with tensor rows ${t.rows}`)
|
|
291
|
+
}
|
|
292
|
+
const data = new Float32Array(t.data.length)
|
|
293
|
+
for (let i = 0; i < t.rows; i++) {
|
|
294
|
+
for (let j = 0; j < t.cols; j++) {
|
|
295
|
+
data[i * t.cols + j] = T.get(t, i, j) / col.data[i]
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
return T.make(t.rows, t.cols, data)
|
|
299
|
+
}).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
|
|
300
|
+
|
|
301
|
+
export const broadcastMulCol = (t: Tensor2D, col: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
|
|
302
|
+
Effect.sync(() => {
|
|
303
|
+
if (col.cols !== 1 || col.rows !== t.rows) {
|
|
304
|
+
throw new ShapeError(`broadcastMulCol: col shape (${col.rows},${col.cols}) incompatible with tensor rows ${t.rows}`)
|
|
305
|
+
}
|
|
306
|
+
const data = new Float32Array(t.data.length)
|
|
307
|
+
for (let i = 0; i < t.rows; i++) {
|
|
308
|
+
for (let j = 0; j < t.cols; j++) {
|
|
309
|
+
data[i * t.cols + j] = T.get(t, i, j) * col.data[i]
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
return T.make(t.rows, t.cols, data)
|
|
313
|
+
}).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
|
|
314
|
+
|
|
315
|
+
export const broadcastMulRow = (t: Tensor2D, row: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
|
|
316
|
+
Effect.sync(() => {
|
|
317
|
+
if (row.rows !== 1 || row.cols !== t.cols) {
|
|
318
|
+
throw new ShapeError(`broadcastMulRow: row shape (${row.rows},${row.cols}) incompatible with tensor cols ${t.cols}`)
|
|
319
|
+
}
|
|
320
|
+
const data = new Float32Array(t.data.length)
|
|
321
|
+
for (let i = 0; i < t.rows; i++) {
|
|
322
|
+
for (let j = 0; j < t.cols; j++) {
|
|
323
|
+
data[i * t.cols + j] = T.get(t, i, j) * row.data[j]
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
return T.make(t.rows, t.cols, data)
|
|
327
|
+
}).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
|
|
328
|
+
|
|
329
|
+
export const broadcastAddRow = (t: Tensor2D, row: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
|
|
330
|
+
Effect.sync(() => {
|
|
331
|
+
if (row.rows !== 1 || row.cols !== t.cols) {
|
|
332
|
+
throw new ShapeError(`broadcastAddRow: row shape (${row.rows},${row.cols}) incompatible with tensor cols ${t.cols}`)
|
|
333
|
+
}
|
|
334
|
+
const data = new Float32Array(t.data.length)
|
|
335
|
+
for (let i = 0; i < t.rows; i++) {
|
|
336
|
+
for (let j = 0; j < t.cols; j++) {
|
|
337
|
+
data[i * t.cols + j] = T.get(t, i, j) + row.data[j]
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
return T.make(t.rows, t.cols, data)
|
|
341
|
+
}).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
|
|
342
|
+
|
|
343
|
+
export const initNormal = (rows: number, cols: number, mean: number, std: number, rng: Rng): Tensor2D => {
|
|
344
|
+
const rand = () => rng.next()
|
|
345
|
+
const data = new Float32Array(rows * cols)
|
|
346
|
+
for (let i = 0; i < data.length; i++) {
|
|
347
|
+
let u1 = rand()
|
|
348
|
+
let u2 = rand()
|
|
349
|
+
while (u1 === 0) u1 = rand()
|
|
350
|
+
const z = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2)
|
|
351
|
+
data[i] = mean + std * z
|
|
352
|
+
}
|
|
353
|
+
return T.make(rows, cols, data)
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
export const initNormalEffect = (
|
|
357
|
+
rows: number,
|
|
358
|
+
cols: number,
|
|
359
|
+
mean: number,
|
|
360
|
+
std: number
|
|
361
|
+
): Effect.Effect<Tensor2D, never, RandomServiceId> =>
|
|
362
|
+
Effect.flatMap(Random, (random) =>
|
|
363
|
+
Effect.gen(function* () {
|
|
364
|
+
const data = new Float32Array(rows * cols)
|
|
365
|
+
for (let i = 0; i < data.length; i++) {
|
|
366
|
+
const value = yield* random.nextGaussian(mean, std)
|
|
367
|
+
data[i] = value
|
|
368
|
+
}
|
|
369
|
+
return T.make(rows, cols, data)
|
|
370
|
+
})
|
|
371
|
+
)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic random number generator for reproducible weight initialization.
|
|
3
|
+
* Uses mulberry32 algorithm for fast, seedable pseudo-random numbers.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export interface Rng {
|
|
7
|
+
/** Returns a random number in [0, 1) */
|
|
8
|
+
next(): number
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Creates a seeded RNG using the mulberry32 algorithm.
|
|
13
|
+
* Produces deterministic sequences for reproducible tests.
|
|
14
|
+
*/
|
|
15
|
+
export const seeded = (seed: number): Rng => {
|
|
16
|
+
let state = seed >>> 0
|
|
17
|
+
|
|
18
|
+
return {
|
|
19
|
+
next(): number {
|
|
20
|
+
state = (state + 0x6d2b79f5) >>> 0
|
|
21
|
+
let t = state
|
|
22
|
+
t = Math.imul(t ^ (t >>> 15), t | 1)
|
|
23
|
+
t ^= t + Math.imul(t ^ (t >>> 7), t | 61)
|
|
24
|
+
return ((t ^ (t >>> 14)) >>> 0) / 4294967296
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/** Non-deterministic RNG wrapper around Math.random for convenience. */
|
|
30
|
+
export const systemRng = (): Rng => ({
|
|
31
|
+
next: () => Math.random()
|
|
32
|
+
})
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
const ASCII_PUNCTUATION = /[!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]/
|
|
2
|
+
|
|
3
|
+
const isAsciiPunctuation = (ch: string): boolean =>
|
|
4
|
+
ch.length === 1 && ASCII_PUNCTUATION.test(ch)
|
|
5
|
+
|
|
6
|
+
export const splitWordToTokens = (word: string): ReadonlyArray<string> => {
|
|
7
|
+
const tokens: Array<string> = []
|
|
8
|
+
let current = ""
|
|
9
|
+
|
|
10
|
+
for (const ch of word) {
|
|
11
|
+
if (isAsciiPunctuation(ch)) {
|
|
12
|
+
if (current.length > 0) {
|
|
13
|
+
tokens.push(current)
|
|
14
|
+
current = ""
|
|
15
|
+
}
|
|
16
|
+
tokens.push(ch)
|
|
17
|
+
} else {
|
|
18
|
+
current += ch
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
if (current.length > 0) {
|
|
23
|
+
tokens.push(current)
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
return tokens
|
|
27
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import * as Option from "effect/Option"
|
|
2
|
+
import { splitWordToTokens } from "./split"
|
|
3
|
+
import { Vocab } from "../vocab/Vocab"
|
|
4
|
+
|
|
5
|
+
export const tokenize = (text: string, vocab: Vocab): ReadonlyArray<number> => {
|
|
6
|
+
const tokens: Array<number> = []
|
|
7
|
+
const words = text.split(/\s+/).filter((w) => w.length > 0)
|
|
8
|
+
|
|
9
|
+
for (const word of words) {
|
|
10
|
+
if (word === "</s>") {
|
|
11
|
+
const tokenId = vocab.encode(word)
|
|
12
|
+
if (Option.isSome(tokenId)) {
|
|
13
|
+
tokens.push(tokenId.value as number)
|
|
14
|
+
}
|
|
15
|
+
continue
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const split = splitWordToTokens(word)
|
|
19
|
+
for (const part of split) {
|
|
20
|
+
const tokenId = vocab.encode(part)
|
|
21
|
+
if (Option.isSome(tokenId)) {
|
|
22
|
+
tokens.push(tokenId.value as number)
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return tokens
|
|
28
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import type { Tensor2D } from "../tensor/Tensor2D"
|
|
2
|
+
import * as T from "../tensor/Tensor2D"
|
|
3
|
+
import { ShapeError } from "../tensor/ops"
|
|
4
|
+
|
|
5
|
+
export class Adam {
|
|
6
|
+
readonly beta1 = 0.9
|
|
7
|
+
readonly beta2 = 0.999
|
|
8
|
+
readonly epsilon = 1e-8
|
|
9
|
+
timestep = 0
|
|
10
|
+
m: Tensor2D
|
|
11
|
+
v: Tensor2D
|
|
12
|
+
|
|
13
|
+
private constructor(rows: number, cols: number) {
|
|
14
|
+
this.m = T.zeros(rows, cols)
|
|
15
|
+
this.v = T.zeros(rows, cols)
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
static make(rows: number, cols: number): Adam {
|
|
19
|
+
return new Adam(rows, cols)
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
step(params: Tensor2D, grads: Tensor2D, lr: number): void {
|
|
23
|
+
if (params.rows !== grads.rows || params.cols !== grads.cols) {
|
|
24
|
+
throw new ShapeError(
|
|
25
|
+
`Adam.step: params shape (${params.rows},${params.cols}) != grads shape (${grads.rows},${grads.cols})`
|
|
26
|
+
)
|
|
27
|
+
}
|
|
28
|
+
if (this.m.rows !== params.rows || this.m.cols !== params.cols) {
|
|
29
|
+
throw new ShapeError(
|
|
30
|
+
`Adam.step: optimizer shape (${this.m.rows},${this.m.cols}) != params shape (${params.rows},${params.cols})`
|
|
31
|
+
)
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
this.timestep += 1
|
|
35
|
+
const beta1 = this.beta1
|
|
36
|
+
const beta2 = this.beta2
|
|
37
|
+
const oneMinusB1 = 1 - beta1
|
|
38
|
+
const oneMinusB2 = 1 - beta2
|
|
39
|
+
|
|
40
|
+
const mData = this.m.data
|
|
41
|
+
const vData = this.v.data
|
|
42
|
+
const pData = params.data
|
|
43
|
+
const gData = grads.data
|
|
44
|
+
|
|
45
|
+
for (let i = 0; i < gData.length; i++) {
|
|
46
|
+
const g = gData[i]
|
|
47
|
+
mData[i] = mData[i] * beta1 + g * oneMinusB1
|
|
48
|
+
vData[i] = vData[i] * beta2 + g * g * oneMinusB2
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const mHatScale = 1 - Math.pow(beta1, this.timestep)
|
|
52
|
+
const vHatScale = 1 - Math.pow(beta2, this.timestep)
|
|
53
|
+
|
|
54
|
+
for (let i = 0; i < pData.length; i++) {
|
|
55
|
+
const mHat = mData[i] / mHatScale
|
|
56
|
+
const vHat = vData[i] / vHatScale
|
|
57
|
+
const update = mHat / (Math.sqrt(vHat) + this.epsilon)
|
|
58
|
+
pData[i] -= lr * update
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { Tensor2D } from "../tensor/Tensor2D"
|
|
2
|
+
|
|
3
|
+
export const clipGlobalL2 = (grads: Tensor2D, maxNorm: number): void => {
|
|
4
|
+
let sumSq = 0
|
|
5
|
+
for (let i = 0; i < grads.data.length; i++) {
|
|
6
|
+
const v = grads.data[i]
|
|
7
|
+
sumSq += v * v
|
|
8
|
+
}
|
|
9
|
+
const norm = Math.sqrt(sumSq)
|
|
10
|
+
if (norm > maxNorm) {
|
|
11
|
+
const scale = maxNorm / norm
|
|
12
|
+
for (let i = 0; i < grads.data.length; i++) {
|
|
13
|
+
grads.data[i] *= scale
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import type { Tensor2D } from "../tensor/Tensor2D"
|
|
2
|
+
import * as T from "../tensor/Tensor2D"
|
|
3
|
+
import * as Ops from "../tensor/ops"
|
|
4
|
+
|
|
5
|
+
export const softmaxRows = (logits: Tensor2D): Tensor2D => Ops.softmaxRows(logits)
|
|
6
|
+
|
|
7
|
+
export const crossEntropyLoss = (probs: Tensor2D, targetIds: ReadonlyArray<number>): number => {
|
|
8
|
+
if (probs.rows !== targetIds.length) {
|
|
9
|
+
throw new Ops.ShapeError(`crossEntropyLoss: probs.rows (${probs.rows}) !== targetIds.length (${targetIds.length})`)
|
|
10
|
+
}
|
|
11
|
+
let loss = 0
|
|
12
|
+
for (let i = 0; i < probs.rows; i++) {
|
|
13
|
+
const idx = targetIds[i]
|
|
14
|
+
const prob = probs.data[i * probs.cols + idx]
|
|
15
|
+
const clamped = prob < 1e-15 ? 1e-15 : prob
|
|
16
|
+
loss -= Math.log(clamped)
|
|
17
|
+
}
|
|
18
|
+
return loss / targetIds.length
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export const dLogits = (probs: Tensor2D, targetIds: ReadonlyArray<number>): Tensor2D => {
|
|
22
|
+
if (probs.rows !== targetIds.length) {
|
|
23
|
+
throw new Ops.ShapeError(`dLogits: probs.rows (${probs.rows}) !== targetIds.length (${targetIds.length})`)
|
|
24
|
+
}
|
|
25
|
+
const data = new Float32Array(probs.data)
|
|
26
|
+
for (let i = 0; i < probs.rows; i++) {
|
|
27
|
+
const idx = targetIds[i]
|
|
28
|
+
data[i * probs.cols + idx] -= 1
|
|
29
|
+
}
|
|
30
|
+
const scale = 1 / targetIds.length
|
|
31
|
+
for (let i = 0; i < data.length; i++) {
|
|
32
|
+
data[i] *= scale
|
|
33
|
+
}
|
|
34
|
+
return T.make(probs.rows, probs.cols, data)
|
|
35
|
+
}
|