effect-gpt 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +50 -0
  2. package/data/chat_training_data.json +55 -0
  3. package/data/pretraining_data.json +27 -0
  4. package/package.json +25 -0
  5. package/src/cli/errors.ts +51 -0
  6. package/src/cli/main.ts +163 -0
  7. package/src/config.ts +3 -0
  8. package/src/data/Dataset.ts +168 -0
  9. package/src/errors.ts +73 -0
  10. package/src/index.ts +88 -0
  11. package/src/model/Embeddings.ts +108 -0
  12. package/src/model/FeedForward.ts +121 -0
  13. package/src/model/LLM.ts +124 -0
  14. package/src/model/LayerNorm.ts +138 -0
  15. package/src/model/ModelLayer.ts +10 -0
  16. package/src/model/OutputProjection.ts +76 -0
  17. package/src/model/SelfAttention.ts +169 -0
  18. package/src/model/TransformerBlock.ts +53 -0
  19. package/src/services/Logger.ts +124 -0
  20. package/src/services/Metrics.ts +260 -0
  21. package/src/services/Random.ts +98 -0
  22. package/src/services/SeedLayer.ts +39 -0
  23. package/src/services/index.ts +32 -0
  24. package/src/tensor/Tensor2D.ts +42 -0
  25. package/src/tensor/ops.ts +371 -0
  26. package/src/tensor/random.ts +32 -0
  27. package/src/tokenize/split.ts +27 -0
  28. package/src/tokenize/tokenize.ts +28 -0
  29. package/src/training/Adam.ts +61 -0
  30. package/src/training/clip.ts +16 -0
  31. package/src/training/loss.ts +35 -0
  32. package/src/training/train.ts +203 -0
  33. package/src/vocab/Vocab.ts +79 -0
  34. package/tests/fixtures/csv_bad.csv +2 -0
  35. package/tests/fixtures/csv_good.csv +3 -0
  36. package/tests/ts/cli_error_format.test.ts +26 -0
  37. package/tests/ts/dataset.test.ts +35 -0
  38. package/tests/ts/embeddings.test.ts +81 -0
  39. package/tests/ts/errors.test.ts +36 -0
  40. package/tests/ts/feed_forward.test.ts +74 -0
  41. package/tests/ts/initNormal.test.ts +41 -0
  42. package/tests/ts/layer_norm.test.ts +96 -0
  43. package/tests/ts/llm_parameters.test.ts +96 -0
  44. package/tests/ts/llm_predict.test.ts +98 -0
  45. package/tests/ts/llm_tokenize.test.ts +69 -0
  46. package/tests/ts/output_projection.test.ts +78 -0
  47. package/tests/ts/random.test.ts +44 -0
  48. package/tests/ts/self_attention.test.ts +63 -0
  49. package/tests/ts/support/factories.ts +126 -0
  50. package/tests/ts/support/runEffect.ts +29 -0
  51. package/tests/ts/support/seed.ts +12 -0
  52. package/tests/ts/support/stubs.ts +58 -0
  53. package/tests/ts/support/tensorMatchers.ts +96 -0
  54. package/tests/ts/support.test.ts +165 -0
  55. package/tests/ts/train_loop.test.ts +229 -0
  56. package/tests/ts/transformer_block.test.ts +72 -0
  57. package/tsconfig.json +20 -0
  58. package/tsconfig.test.json +8 -0
@@ -0,0 +1,39 @@
1
+ import * as Context from "effect/Context"
2
+ import * as Layer from "effect/Layer"
3
+ import * as Effect from "effect/Effect"
4
+ import { seeded, systemRng, type Rng } from "../tensor/random"
5
+
6
+ export interface SeedService {
7
+ /** Shared RNG stream used for deterministic initialization. */
8
+ readonly rng: Rng
9
+ /** Creates a new SeedService derived from the current stream (for isolation when desired). */
10
+ readonly fork: () => SeedService
11
+ }
12
+
13
+ export interface SeedServiceId {
14
+ readonly SeedService: unique symbol
15
+ }
16
+
17
+ export const Seed = Context.GenericTag<SeedServiceId, SeedService>("SeedService")
18
+
19
+ const makeSeedService = (seed?: number): SeedService => {
20
+ const rng = seed === undefined ? systemRng() : seeded(seed)
21
+
22
+ return {
23
+ rng,
24
+ fork: () => {
25
+ // Advance the stream to produce a derived seed; fall back to system entropy when non-deterministic.
26
+ const nextSeed = Math.floor(rng.next() * 0xffffffff)
27
+ return makeSeedService(seed === undefined ? undefined : nextSeed)
28
+ }
29
+ }
30
+ }
31
+
32
+ /**
33
+ * Layer that provides a shared SeedService. Passing a seed yields deterministic initialization; omitting it uses
34
+ * nondeterministic Math.random.
35
+ */
36
+ export const SeedLayer = (seed?: number): Layer.Layer<SeedServiceId> => Layer.succeed(Seed, makeSeedService(seed))
37
+
38
+ /** Effect helper to grab the current RNG from context. */
39
+ export const useSeedRng = (): Effect.Effect<Rng, never, SeedServiceId> => Effect.map(Seed, (service) => service.rng)
@@ -0,0 +1,32 @@
1
+ export type { LogLevel, LoggerService, LoggerServiceId } from "./Logger"
2
+ export {
3
+ Logger,
4
+ ConsoleLoggerLive,
5
+ TerminalLoggerLive,
6
+ NullLoggerLive,
7
+ SilentLoggerLive,
8
+ PrettyLoggerLive,
9
+ log,
10
+ debug,
11
+ info,
12
+ warn,
13
+ error
14
+ } from "./Logger"
15
+
16
+ export type { RandomService, RandomServiceId } from "./Random"
17
+ export { Random, SeededRandomLive, SystemRandomLive, next, nextGaussian, nextInt, fork } from "./Random"
18
+
19
+ export type { SeedService, SeedServiceId } from "./SeedLayer"
20
+ export { Seed, SeedLayer, useSeedRng } from "./SeedLayer"
21
+
22
+ export type { Counter, Gauge, Histogram, TimingResult, MetricsService, MetricsSnapshot, MetricsServiceId } from "./Metrics"
23
+ export {
24
+ Metrics,
25
+ InMemoryMetricsLive,
26
+ NoOpMetricsLive,
27
+ counter,
28
+ gauge,
29
+ histogram,
30
+ timed,
31
+ snapshot
32
+ } from "./Metrics"
@@ -0,0 +1,42 @@
1
+ export interface Tensor2D {
2
+ readonly rows: number
3
+ readonly cols: number
4
+ readonly data: Float32Array
5
+ }
6
+
7
+ export const make = (rows: number, cols: number, data: Float32Array): Tensor2D => {
8
+ if (data.length !== rows * cols) {
9
+ throw new Error(`Data length ${data.length} does not match shape ${rows}x${cols}`)
10
+ }
11
+ return { rows, cols, data }
12
+ }
13
+
14
+ export const zeros = (rows: number, cols: number): Tensor2D => {
15
+ return { rows, cols, data: new Float32Array(rows * cols) }
16
+ }
17
+
18
+ export const ones = (rows: number, cols: number): Tensor2D => {
19
+ const data = new Float32Array(rows * cols)
20
+ data.fill(1)
21
+ return { rows, cols, data }
22
+ }
23
+
24
+ export const clone = (t: Tensor2D): Tensor2D => {
25
+ return { rows: t.rows, cols: t.cols, data: new Float32Array(t.data) }
26
+ }
27
+
28
+ export const get = (t: Tensor2D, row: number, col: number): number => {
29
+ return t.data[row * t.cols + col]
30
+ }
31
+
32
+ export const set = (t: Tensor2D, row: number, col: number, value: number): void => {
33
+ t.data[row * t.cols + col] = value
34
+ }
35
+
36
+ export const fromArray = (rows: number, cols: number, arr: ArrayLike<number>): Tensor2D => {
37
+ const data = new Float32Array(arr)
38
+ if (data.length !== rows * cols) {
39
+ throw new Error(`Array length ${data.length} does not match shape ${rows}x${cols}`)
40
+ }
41
+ return { rows, cols, data }
42
+ }
@@ -0,0 +1,371 @@
1
+ import * as Effect from "effect/Effect"
2
+ import type { Tensor2D } from "./Tensor2D"
3
+ import * as T from "./Tensor2D"
4
+ import type { Rng } from "./random"
5
+ import type { RandomServiceId } from "../services/Random"
6
+ import { Random } from "../services/Random"
7
+
8
+ export class ShapeError extends Error {
9
+ readonly _tag = "ShapeError"
10
+ constructor(message: string) {
11
+ super(message)
12
+ this.name = "ShapeError"
13
+ }
14
+ }
15
+
16
+ export const matMul = (a: Tensor2D, b: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
17
+ Effect.sync(() => {
18
+ if (a.cols !== b.rows) {
19
+ throw new ShapeError(`matMul: a.cols (${a.cols}) !== b.rows (${b.rows})`)
20
+ }
21
+ const result = T.zeros(a.rows, b.cols)
22
+ for (let i = 0; i < a.rows; i++) {
23
+ for (let j = 0; j < b.cols; j++) {
24
+ let sum = 0
25
+ for (let k = 0; k < a.cols; k++) {
26
+ sum += T.get(a, i, k) * T.get(b, k, j)
27
+ }
28
+ T.set(result, i, j, sum)
29
+ }
30
+ }
31
+ return result
32
+ }).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
33
+
34
+ export const add = (a: Tensor2D, b: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
35
+ Effect.sync(() => {
36
+ if (a.rows !== b.rows || a.cols !== b.cols) {
37
+ throw new ShapeError(`add: shapes (${a.rows},${a.cols}) and (${b.rows},${b.cols}) do not match`)
38
+ }
39
+ const data = new Float32Array(a.data.length)
40
+ for (let i = 0; i < data.length; i++) {
41
+ data[i] = a.data[i] + b.data[i]
42
+ }
43
+ return T.make(a.rows, a.cols, data)
44
+ }).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
45
+
46
+ export const sub = (a: Tensor2D, b: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
47
+ Effect.sync(() => {
48
+ if (a.rows !== b.rows || a.cols !== b.cols) {
49
+ throw new ShapeError(`sub: shapes (${a.rows},${a.cols}) and (${b.rows},${b.cols}) do not match`)
50
+ }
51
+ const data = new Float32Array(a.data.length)
52
+ for (let i = 0; i < data.length; i++) {
53
+ data[i] = a.data[i] - b.data[i]
54
+ }
55
+ return T.make(a.rows, a.cols, data)
56
+ }).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
57
+
58
+ export const mul = (a: Tensor2D, b: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
59
+ Effect.sync(() => {
60
+ if (a.rows !== b.rows || a.cols !== b.cols) {
61
+ throw new ShapeError(`mul: shapes (${a.rows},${a.cols}) and (${b.rows},${b.cols}) do not match`)
62
+ }
63
+ const data = new Float32Array(a.data.length)
64
+ for (let i = 0; i < data.length; i++) {
65
+ data[i] = a.data[i] * b.data[i]
66
+ }
67
+ return T.make(a.rows, a.cols, data)
68
+ }).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
69
+
70
+ export const div = (a: Tensor2D, b: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
71
+ Effect.sync(() => {
72
+ if (a.rows !== b.rows || a.cols !== b.cols) {
73
+ throw new ShapeError(`div: shapes (${a.rows},${a.cols}) and (${b.rows},${b.cols}) do not match`)
74
+ }
75
+ const data = new Float32Array(a.data.length)
76
+ for (let i = 0; i < data.length; i++) {
77
+ data[i] = a.data[i] / b.data[i]
78
+ }
79
+ return T.make(a.rows, a.cols, data)
80
+ }).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
81
+
82
+ export const addScalar = (t: Tensor2D, scalar: number): Tensor2D => {
83
+ const data = new Float32Array(t.data.length)
84
+ for (let i = 0; i < data.length; i++) {
85
+ data[i] = t.data[i] + scalar
86
+ }
87
+ return T.make(t.rows, t.cols, data)
88
+ }
89
+
90
+ export const mulScalar = (t: Tensor2D, scalar: number): Tensor2D => {
91
+ const data = new Float32Array(t.data.length)
92
+ for (let i = 0; i < data.length; i++) {
93
+ data[i] = t.data[i] * scalar
94
+ }
95
+ return T.make(t.rows, t.cols, data)
96
+ }
97
+
98
+ export const addRowBias = (matrix: Tensor2D, bias: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
99
+ Effect.sync(() => {
100
+ if (bias.rows !== 1 || bias.cols !== matrix.cols) {
101
+ throw new ShapeError(`addRowBias: bias shape (${bias.rows},${bias.cols}) incompatible with matrix cols ${matrix.cols}`)
102
+ }
103
+ const data = new Float32Array(matrix.data.length)
104
+ for (let i = 0; i < matrix.rows; i++) {
105
+ for (let j = 0; j < matrix.cols; j++) {
106
+ data[i * matrix.cols + j] = T.get(matrix, i, j) + bias.data[j]
107
+ }
108
+ }
109
+ return T.make(matrix.rows, matrix.cols, data)
110
+ }).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
111
+
112
+ export const meanRows = (t: Tensor2D): Tensor2D => {
113
+ const data = new Float32Array(t.rows)
114
+ for (let i = 0; i < t.rows; i++) {
115
+ let sum = 0
116
+ for (let j = 0; j < t.cols; j++) {
117
+ sum += T.get(t, i, j)
118
+ }
119
+ data[i] = sum / t.cols
120
+ }
121
+ return T.make(t.rows, 1, data)
122
+ }
123
+
124
+ export const sumCols = (t: Tensor2D): Tensor2D => {
125
+ const data = new Float32Array(t.cols)
126
+ for (let j = 0; j < t.cols; j++) {
127
+ let sum = 0
128
+ for (let i = 0; i < t.rows; i++) {
129
+ sum += T.get(t, i, j)
130
+ }
131
+ data[j] = sum
132
+ }
133
+ return T.make(1, t.cols, data)
134
+ }
135
+
136
+ export const meanCols = (t: Tensor2D): Tensor2D => {
137
+ const data = new Float32Array(t.cols)
138
+ const scale = 1 / t.rows
139
+ for (let j = 0; j < t.cols; j++) {
140
+ let sum = 0
141
+ for (let i = 0; i < t.rows; i++) {
142
+ sum += T.get(t, i, j)
143
+ }
144
+ data[j] = sum * scale
145
+ }
146
+ return T.make(1, t.cols, data)
147
+ }
148
+
149
+ export const stdRows = (t: Tensor2D): Tensor2D => {
150
+ const means = meanRows(t)
151
+ const data = new Float32Array(t.rows)
152
+ for (let i = 0; i < t.rows; i++) {
153
+ const mean = means.data[i]
154
+ let sumSq = 0
155
+ for (let j = 0; j < t.cols; j++) {
156
+ const diff = T.get(t, i, j) - mean
157
+ sumSq += diff * diff
158
+ }
159
+ data[i] = Math.sqrt(sumSq / t.cols)
160
+ }
161
+ return T.make(t.rows, 1, data)
162
+ }
163
+
164
+ export const varRows = (t: Tensor2D): Tensor2D => {
165
+ const means = meanRows(t)
166
+ const data = new Float32Array(t.rows)
167
+ for (let i = 0; i < t.rows; i++) {
168
+ const mean = means.data[i]
169
+ let sumSq = 0
170
+ for (let j = 0; j < t.cols; j++) {
171
+ const diff = T.get(t, i, j) - mean
172
+ sumSq += diff * diff
173
+ }
174
+ data[i] = sumSq / t.cols
175
+ }
176
+ return T.make(t.rows, 1, data)
177
+ }
178
+
179
+ export const mapScalar = (t: Tensor2D, fn: (val: number) => number): Tensor2D => {
180
+ const data = new Float32Array(t.data.length)
181
+ for (let i = 0; i < data.length; i++) {
182
+ data[i] = fn(t.data[i])
183
+ }
184
+ return T.make(t.rows, t.cols, data)
185
+ }
186
+
187
+ export const softmaxRows = (t: Tensor2D): Tensor2D => {
188
+ const data = new Float32Array(t.data.length)
189
+ for (let i = 0; i < t.rows; i++) {
190
+ let maxVal = -Infinity
191
+ for (let j = 0; j < t.cols; j++) {
192
+ const val = T.get(t, i, j)
193
+ if (val > maxVal) maxVal = val
194
+ }
195
+ let sumExp = 0
196
+ for (let j = 0; j < t.cols; j++) {
197
+ const exp = Math.exp(T.get(t, i, j) - maxVal)
198
+ data[i * t.cols + j] = exp
199
+ sumExp += exp
200
+ }
201
+ for (let j = 0; j < t.cols; j++) {
202
+ data[i * t.cols + j] /= sumExp
203
+ }
204
+ }
205
+ return T.make(t.rows, t.cols, data)
206
+ }
207
+
208
+ export const transpose = (t: Tensor2D): Tensor2D => {
209
+ const data = new Float32Array(t.rows * t.cols)
210
+ for (let i = 0; i < t.rows; i++) {
211
+ for (let j = 0; j < t.cols; j++) {
212
+ data[j * t.rows + i] = T.get(t, i, j)
213
+ }
214
+ }
215
+ return T.make(t.cols, t.rows, data)
216
+ }
217
+
218
+ export const gatherRows = (embeddings: Tensor2D, tokenIds: ReadonlyArray<number>): Effect.Effect<Tensor2D, ShapeError> =>
219
+ Effect.sync(() => {
220
+ const data = new Float32Array(tokenIds.length * embeddings.cols)
221
+ for (let i = 0; i < tokenIds.length; i++) {
222
+ const tokenId = tokenIds[i]
223
+ if (tokenId < 0 || tokenId >= embeddings.rows) {
224
+ throw new ShapeError(`gatherRows: tokenId ${tokenId} out of bounds [0, ${embeddings.rows})`)
225
+ }
226
+ for (let j = 0; j < embeddings.cols; j++) {
227
+ data[i * embeddings.cols + j] = T.get(embeddings, tokenId, j)
228
+ }
229
+ }
230
+ return T.make(tokenIds.length, embeddings.cols, data)
231
+ }).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
232
+
233
+ export const sliceRows = (t: Tensor2D, start: number, end: number): Effect.Effect<Tensor2D, ShapeError> =>
234
+ Effect.sync(() => {
235
+ if (start < 0 || end > t.rows || start >= end) {
236
+ throw new ShapeError(`sliceRows: invalid range [${start}, ${end}) for tensor with ${t.rows} rows`)
237
+ }
238
+ const numRows = end - start
239
+ const data = new Float32Array(numRows * t.cols)
240
+ for (let i = 0; i < numRows; i++) {
241
+ for (let j = 0; j < t.cols; j++) {
242
+ data[i * t.cols + j] = T.get(t, start + i, j)
243
+ }
244
+ }
245
+ return T.make(numRows, t.cols, data)
246
+ }).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
247
+
248
+ export const relu = (t: Tensor2D): Tensor2D => {
249
+ const data = new Float32Array(t.data.length)
250
+ for (let i = 0; i < data.length; i++) {
251
+ data[i] = Math.max(0, t.data[i])
252
+ }
253
+ return T.make(t.rows, t.cols, data)
254
+ }
255
+
256
+ export const argmaxRows = (t: Tensor2D): ReadonlyArray<number> => {
257
+ const result: Array<number> = []
258
+ for (let i = 0; i < t.rows; i++) {
259
+ let maxIdx = 0
260
+ let maxVal = T.get(t, i, 0)
261
+ for (let j = 1; j < t.cols; j++) {
262
+ const val = T.get(t, i, j)
263
+ if (val > maxVal) {
264
+ maxVal = val
265
+ maxIdx = j
266
+ }
267
+ }
268
+ result.push(maxIdx)
269
+ }
270
+ return result
271
+ }
272
+
273
+ export const broadcastSubCol = (t: Tensor2D, col: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
274
+ Effect.sync(() => {
275
+ if (col.cols !== 1 || col.rows !== t.rows) {
276
+ throw new ShapeError(`broadcastSubCol: col shape (${col.rows},${col.cols}) incompatible with tensor rows ${t.rows}`)
277
+ }
278
+ const data = new Float32Array(t.data.length)
279
+ for (let i = 0; i < t.rows; i++) {
280
+ for (let j = 0; j < t.cols; j++) {
281
+ data[i * t.cols + j] = T.get(t, i, j) - col.data[i]
282
+ }
283
+ }
284
+ return T.make(t.rows, t.cols, data)
285
+ }).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
286
+
287
+ export const broadcastDivCol = (t: Tensor2D, col: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
288
+ Effect.sync(() => {
289
+ if (col.cols !== 1 || col.rows !== t.rows) {
290
+ throw new ShapeError(`broadcastDivCol: col shape (${col.rows},${col.cols}) incompatible with tensor rows ${t.rows}`)
291
+ }
292
+ const data = new Float32Array(t.data.length)
293
+ for (let i = 0; i < t.rows; i++) {
294
+ for (let j = 0; j < t.cols; j++) {
295
+ data[i * t.cols + j] = T.get(t, i, j) / col.data[i]
296
+ }
297
+ }
298
+ return T.make(t.rows, t.cols, data)
299
+ }).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
300
+
301
+ export const broadcastMulCol = (t: Tensor2D, col: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
302
+ Effect.sync(() => {
303
+ if (col.cols !== 1 || col.rows !== t.rows) {
304
+ throw new ShapeError(`broadcastMulCol: col shape (${col.rows},${col.cols}) incompatible with tensor rows ${t.rows}`)
305
+ }
306
+ const data = new Float32Array(t.data.length)
307
+ for (let i = 0; i < t.rows; i++) {
308
+ for (let j = 0; j < t.cols; j++) {
309
+ data[i * t.cols + j] = T.get(t, i, j) * col.data[i]
310
+ }
311
+ }
312
+ return T.make(t.rows, t.cols, data)
313
+ }).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
314
+
315
+ export const broadcastMulRow = (t: Tensor2D, row: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
316
+ Effect.sync(() => {
317
+ if (row.rows !== 1 || row.cols !== t.cols) {
318
+ throw new ShapeError(`broadcastMulRow: row shape (${row.rows},${row.cols}) incompatible with tensor cols ${t.cols}`)
319
+ }
320
+ const data = new Float32Array(t.data.length)
321
+ for (let i = 0; i < t.rows; i++) {
322
+ for (let j = 0; j < t.cols; j++) {
323
+ data[i * t.cols + j] = T.get(t, i, j) * row.data[j]
324
+ }
325
+ }
326
+ return T.make(t.rows, t.cols, data)
327
+ }).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
328
+
329
+ export const broadcastAddRow = (t: Tensor2D, row: Tensor2D): Effect.Effect<Tensor2D, ShapeError> =>
330
+ Effect.sync(() => {
331
+ if (row.rows !== 1 || row.cols !== t.cols) {
332
+ throw new ShapeError(`broadcastAddRow: row shape (${row.rows},${row.cols}) incompatible with tensor cols ${t.cols}`)
333
+ }
334
+ const data = new Float32Array(t.data.length)
335
+ for (let i = 0; i < t.rows; i++) {
336
+ for (let j = 0; j < t.cols; j++) {
337
+ data[i * t.cols + j] = T.get(t, i, j) + row.data[j]
338
+ }
339
+ }
340
+ return T.make(t.rows, t.cols, data)
341
+ }).pipe(Effect.catchAllDefect((e) => Effect.fail(e as ShapeError)))
342
+
343
+ export const initNormal = (rows: number, cols: number, mean: number, std: number, rng: Rng): Tensor2D => {
344
+ const rand = () => rng.next()
345
+ const data = new Float32Array(rows * cols)
346
+ for (let i = 0; i < data.length; i++) {
347
+ let u1 = rand()
348
+ let u2 = rand()
349
+ while (u1 === 0) u1 = rand()
350
+ const z = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2)
351
+ data[i] = mean + std * z
352
+ }
353
+ return T.make(rows, cols, data)
354
+ }
355
+
356
+ export const initNormalEffect = (
357
+ rows: number,
358
+ cols: number,
359
+ mean: number,
360
+ std: number
361
+ ): Effect.Effect<Tensor2D, never, RandomServiceId> =>
362
+ Effect.flatMap(Random, (random) =>
363
+ Effect.gen(function* () {
364
+ const data = new Float32Array(rows * cols)
365
+ for (let i = 0; i < data.length; i++) {
366
+ const value = yield* random.nextGaussian(mean, std)
367
+ data[i] = value
368
+ }
369
+ return T.make(rows, cols, data)
370
+ })
371
+ )
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Deterministic random number generator for reproducible weight initialization.
3
+ * Uses mulberry32 algorithm for fast, seedable pseudo-random numbers.
4
+ */
5
+
6
+ export interface Rng {
7
+ /** Returns a random number in [0, 1) */
8
+ next(): number
9
+ }
10
+
11
+ /**
12
+ * Creates a seeded RNG using the mulberry32 algorithm.
13
+ * Produces deterministic sequences for reproducible tests.
14
+ */
15
+ export const seeded = (seed: number): Rng => {
16
+ let state = seed >>> 0
17
+
18
+ return {
19
+ next(): number {
20
+ state = (state + 0x6d2b79f5) >>> 0
21
+ let t = state
22
+ t = Math.imul(t ^ (t >>> 15), t | 1)
23
+ t ^= t + Math.imul(t ^ (t >>> 7), t | 61)
24
+ return ((t ^ (t >>> 14)) >>> 0) / 4294967296
25
+ }
26
+ }
27
+ }
28
+
29
+ /** Non-deterministic RNG wrapper around Math.random for convenience. */
30
+ export const systemRng = (): Rng => ({
31
+ next: () => Math.random()
32
+ })
@@ -0,0 +1,27 @@
1
+ const ASCII_PUNCTUATION = /[!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]/
2
+
3
+ const isAsciiPunctuation = (ch: string): boolean =>
4
+ ch.length === 1 && ASCII_PUNCTUATION.test(ch)
5
+
6
+ export const splitWordToTokens = (word: string): ReadonlyArray<string> => {
7
+ const tokens: Array<string> = []
8
+ let current = ""
9
+
10
+ for (const ch of word) {
11
+ if (isAsciiPunctuation(ch)) {
12
+ if (current.length > 0) {
13
+ tokens.push(current)
14
+ current = ""
15
+ }
16
+ tokens.push(ch)
17
+ } else {
18
+ current += ch
19
+ }
20
+ }
21
+
22
+ if (current.length > 0) {
23
+ tokens.push(current)
24
+ }
25
+
26
+ return tokens
27
+ }
@@ -0,0 +1,28 @@
1
+ import * as Option from "effect/Option"
2
+ import { splitWordToTokens } from "./split"
3
+ import { Vocab } from "../vocab/Vocab"
4
+
5
+ export const tokenize = (text: string, vocab: Vocab): ReadonlyArray<number> => {
6
+ const tokens: Array<number> = []
7
+ const words = text.split(/\s+/).filter((w) => w.length > 0)
8
+
9
+ for (const word of words) {
10
+ if (word === "</s>") {
11
+ const tokenId = vocab.encode(word)
12
+ if (Option.isSome(tokenId)) {
13
+ tokens.push(tokenId.value as number)
14
+ }
15
+ continue
16
+ }
17
+
18
+ const split = splitWordToTokens(word)
19
+ for (const part of split) {
20
+ const tokenId = vocab.encode(part)
21
+ if (Option.isSome(tokenId)) {
22
+ tokens.push(tokenId.value as number)
23
+ }
24
+ }
25
+ }
26
+
27
+ return tokens
28
+ }
@@ -0,0 +1,61 @@
1
+ import type { Tensor2D } from "../tensor/Tensor2D"
2
+ import * as T from "../tensor/Tensor2D"
3
+ import { ShapeError } from "../tensor/ops"
4
+
5
+ export class Adam {
6
+ readonly beta1 = 0.9
7
+ readonly beta2 = 0.999
8
+ readonly epsilon = 1e-8
9
+ timestep = 0
10
+ m: Tensor2D
11
+ v: Tensor2D
12
+
13
+ private constructor(rows: number, cols: number) {
14
+ this.m = T.zeros(rows, cols)
15
+ this.v = T.zeros(rows, cols)
16
+ }
17
+
18
+ static make(rows: number, cols: number): Adam {
19
+ return new Adam(rows, cols)
20
+ }
21
+
22
+ step(params: Tensor2D, grads: Tensor2D, lr: number): void {
23
+ if (params.rows !== grads.rows || params.cols !== grads.cols) {
24
+ throw new ShapeError(
25
+ `Adam.step: params shape (${params.rows},${params.cols}) != grads shape (${grads.rows},${grads.cols})`
26
+ )
27
+ }
28
+ if (this.m.rows !== params.rows || this.m.cols !== params.cols) {
29
+ throw new ShapeError(
30
+ `Adam.step: optimizer shape (${this.m.rows},${this.m.cols}) != params shape (${params.rows},${params.cols})`
31
+ )
32
+ }
33
+
34
+ this.timestep += 1
35
+ const beta1 = this.beta1
36
+ const beta2 = this.beta2
37
+ const oneMinusB1 = 1 - beta1
38
+ const oneMinusB2 = 1 - beta2
39
+
40
+ const mData = this.m.data
41
+ const vData = this.v.data
42
+ const pData = params.data
43
+ const gData = grads.data
44
+
45
+ for (let i = 0; i < gData.length; i++) {
46
+ const g = gData[i]
47
+ mData[i] = mData[i] * beta1 + g * oneMinusB1
48
+ vData[i] = vData[i] * beta2 + g * g * oneMinusB2
49
+ }
50
+
51
+ const mHatScale = 1 - Math.pow(beta1, this.timestep)
52
+ const vHatScale = 1 - Math.pow(beta2, this.timestep)
53
+
54
+ for (let i = 0; i < pData.length; i++) {
55
+ const mHat = mData[i] / mHatScale
56
+ const vHat = vData[i] / vHatScale
57
+ const update = mHat / (Math.sqrt(vHat) + this.epsilon)
58
+ pData[i] -= lr * update
59
+ }
60
+ }
61
+ }
@@ -0,0 +1,16 @@
1
+ import type { Tensor2D } from "../tensor/Tensor2D"
2
+
3
+ export const clipGlobalL2 = (grads: Tensor2D, maxNorm: number): void => {
4
+ let sumSq = 0
5
+ for (let i = 0; i < grads.data.length; i++) {
6
+ const v = grads.data[i]
7
+ sumSq += v * v
8
+ }
9
+ const norm = Math.sqrt(sumSq)
10
+ if (norm > maxNorm) {
11
+ const scale = maxNorm / norm
12
+ for (let i = 0; i < grads.data.length; i++) {
13
+ grads.data[i] *= scale
14
+ }
15
+ }
16
+ }
@@ -0,0 +1,35 @@
1
+ import type { Tensor2D } from "../tensor/Tensor2D"
2
+ import * as T from "../tensor/Tensor2D"
3
+ import * as Ops from "../tensor/ops"
4
+
5
+ export const softmaxRows = (logits: Tensor2D): Tensor2D => Ops.softmaxRows(logits)
6
+
7
+ export const crossEntropyLoss = (probs: Tensor2D, targetIds: ReadonlyArray<number>): number => {
8
+ if (probs.rows !== targetIds.length) {
9
+ throw new Ops.ShapeError(`crossEntropyLoss: probs.rows (${probs.rows}) !== targetIds.length (${targetIds.length})`)
10
+ }
11
+ let loss = 0
12
+ for (let i = 0; i < probs.rows; i++) {
13
+ const idx = targetIds[i]
14
+ const prob = probs.data[i * probs.cols + idx]
15
+ const clamped = prob < 1e-15 ? 1e-15 : prob
16
+ loss -= Math.log(clamped)
17
+ }
18
+ return loss / targetIds.length
19
+ }
20
+
21
+ export const dLogits = (probs: Tensor2D, targetIds: ReadonlyArray<number>): Tensor2D => {
22
+ if (probs.rows !== targetIds.length) {
23
+ throw new Ops.ShapeError(`dLogits: probs.rows (${probs.rows}) !== targetIds.length (${targetIds.length})`)
24
+ }
25
+ const data = new Float32Array(probs.data)
26
+ for (let i = 0; i < probs.rows; i++) {
27
+ const idx = targetIds[i]
28
+ data[i * probs.cols + idx] -= 1
29
+ }
30
+ const scale = 1 / targetIds.length
31
+ for (let i = 0; i < data.length; i++) {
32
+ data[i] *= scale
33
+ }
34
+ return T.make(probs.rows, probs.cols, data)
35
+ }