bun-scikit 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +187 -0
  3. package/binding.gyp +21 -0
  4. package/docs/README.md +7 -0
  5. package/docs/native-abi.md +53 -0
  6. package/index.ts +1 -0
  7. package/package.json +76 -0
  8. package/scripts/build-node-addon.ts +26 -0
  9. package/scripts/build-zig-kernels.ts +50 -0
  10. package/scripts/check-api-docs-coverage.ts +52 -0
  11. package/scripts/check-benchmark-health.ts +140 -0
  12. package/scripts/install-native.ts +160 -0
  13. package/scripts/package-native-artifacts.ts +62 -0
  14. package/scripts/sync-benchmark-readme.ts +181 -0
  15. package/scripts/update-benchmark-history.ts +91 -0
  16. package/src/ensemble/RandomForestClassifier.ts +136 -0
  17. package/src/ensemble/RandomForestRegressor.ts +136 -0
  18. package/src/index.ts +32 -0
  19. package/src/linear_model/LinearRegression.ts +136 -0
  20. package/src/linear_model/LogisticRegression.ts +260 -0
  21. package/src/linear_model/SGDClassifier.ts +161 -0
  22. package/src/linear_model/SGDRegressor.ts +104 -0
  23. package/src/metrics/classification.ts +294 -0
  24. package/src/metrics/regression.ts +51 -0
  25. package/src/model_selection/GridSearchCV.ts +244 -0
  26. package/src/model_selection/KFold.ts +82 -0
  27. package/src/model_selection/RepeatedKFold.ts +49 -0
  28. package/src/model_selection/RepeatedStratifiedKFold.ts +50 -0
  29. package/src/model_selection/StratifiedKFold.ts +112 -0
  30. package/src/model_selection/StratifiedShuffleSplit.ts +211 -0
  31. package/src/model_selection/crossValScore.ts +165 -0
  32. package/src/model_selection/trainTestSplit.ts +82 -0
  33. package/src/naive_bayes/GaussianNB.ts +148 -0
  34. package/src/native/node-addon/bun_scikit_addon.cpp +450 -0
  35. package/src/native/zigKernels.ts +576 -0
  36. package/src/neighbors/KNeighborsClassifier.ts +85 -0
  37. package/src/pipeline/ColumnTransformer.ts +203 -0
  38. package/src/pipeline/FeatureUnion.ts +123 -0
  39. package/src/pipeline/Pipeline.ts +168 -0
  40. package/src/preprocessing/MinMaxScaler.ts +113 -0
  41. package/src/preprocessing/OneHotEncoder.ts +91 -0
  42. package/src/preprocessing/PolynomialFeatures.ts +158 -0
  43. package/src/preprocessing/RobustScaler.ts +149 -0
  44. package/src/preprocessing/SimpleImputer.ts +150 -0
  45. package/src/preprocessing/StandardScaler.ts +92 -0
  46. package/src/svm/LinearSVC.ts +117 -0
  47. package/src/tree/DecisionTreeClassifier.ts +394 -0
  48. package/src/tree/DecisionTreeRegressor.ts +407 -0
  49. package/src/types.ts +18 -0
  50. package/src/utils/linalg.ts +209 -0
  51. package/src/utils/validation.ts +78 -0
  52. package/zig/kernels.zig +1327 -0
@@ -0,0 +1,1327 @@
1
+ const std = @import("std");
2
+
3
+ const allocator = std.heap.page_allocator;
4
+ const ABI_VERSION: u32 = 1;
5
+ const Status = enum(u32) {
6
+ ok = 1,
7
+ invalid_handle = 2,
8
+ invalid_shape = 3,
9
+ allocation_failed = 4,
10
+ fit_failed = 5,
11
+ symbol_unavailable = 6,
12
+ };
13
+
14
+ pub export fn bun_scikit_abi_version() u32 {
15
+ return ABI_VERSION;
16
+ }
17
+
18
+ pub export fn bun_scikit_status_ok() u32 {
19
+ return @intFromEnum(Status.ok);
20
+ }
21
+
22
+ pub export fn bun_scikit_status_invalid_handle() u32 {
23
+ return @intFromEnum(Status.invalid_handle);
24
+ }
25
+
26
+ pub export fn bun_scikit_status_invalid_shape() u32 {
27
+ return @intFromEnum(Status.invalid_shape);
28
+ }
29
+
30
+ pub export fn bun_scikit_status_allocation_failed() u32 {
31
+ return @intFromEnum(Status.allocation_failed);
32
+ }
33
+
34
+ pub export fn bun_scikit_status_fit_failed() u32 {
35
+ return @intFromEnum(Status.fit_failed);
36
+ }
37
+
38
+ pub export fn bun_scikit_status_symbol_unavailable() u32 {
39
+ return @intFromEnum(Status.symbol_unavailable);
40
+ }
41
+
42
+ const LinearModel = struct {
43
+ n_features: usize,
44
+ fit_intercept: bool,
45
+ coefficients: []f64,
46
+ intercept: f64,
47
+ };
48
+
49
+ const LogisticModel = struct {
50
+ n_features: usize,
51
+ fit_intercept: bool,
52
+ coefficients: []f64,
53
+ gradients: []f64,
54
+ intercept: f64,
55
+ };
56
+
57
+ const TreeNode = struct {
58
+ prediction: u8,
59
+ feature_index: usize,
60
+ threshold: f64,
61
+ left_index: usize,
62
+ right_index: usize,
63
+ is_leaf: bool,
64
+ };
65
+
66
+ const DecisionTreeModel = struct {
67
+ n_features: usize,
68
+ max_depth: usize,
69
+ min_samples_split: usize,
70
+ min_samples_leaf: usize,
71
+ max_features_mode: u8,
72
+ max_features_value: usize,
73
+ random_state: u32,
74
+ use_random_state: bool,
75
+ root_index: usize,
76
+ has_root: bool,
77
+ nodes: std.ArrayListUnmanaged(TreeNode),
78
+ };
79
+
80
+ const SplitResult = struct {
81
+ threshold: f64,
82
+ impurity: f64,
83
+ left_indices: []usize,
84
+ right_indices: []usize,
85
+ };
86
+
87
+ const Mulberry32 = struct {
88
+ state: u32,
89
+
90
+ fn init(seed: u32) Mulberry32 {
91
+ return .{ .state = seed };
92
+ }
93
+
94
+ fn next(self: *Mulberry32) f64 {
95
+ self.state +%= 0x6d2b79f5;
96
+ var t = self.state ^ (self.state >> 15);
97
+ t = @as(u32, @truncate(@as(u64, t) *% @as(u64, (1 | self.state))));
98
+ t ^= t +% @as(u32, @truncate(@as(u64, (t ^ (t >> 7))) *% @as(u64, (61 | t))));
99
+ return @as(f64, @floatFromInt(t ^ (t >> 14))) / 4294967296.0;
100
+ }
101
+
102
+ fn nextIndex(self: *Mulberry32, limit: usize) usize {
103
+ if (limit <= 1) {
104
+ return 0;
105
+ }
106
+ const value = self.next();
107
+ const idx = @as(usize, @intFromFloat(@floor(value * @as(f64, @floatFromInt(limit)))));
108
+ return if (idx >= limit) limit - 1 else idx;
109
+ }
110
+ };
111
+
112
+ inline fn sigmoid(z: f64) f64 {
113
+ if (z >= 0.0) {
114
+ const exp_neg = @exp(-z);
115
+ return 1.0 / (1.0 + exp_neg);
116
+ }
117
+
118
+ const exp_pos = @exp(z);
119
+ return exp_pos / (1.0 + exp_pos);
120
+ }
121
+
122
+ inline fn asLinearModel(handle: usize) ?*LinearModel {
123
+ if (handle == 0) {
124
+ return null;
125
+ }
126
+ return @as(*LinearModel, @ptrFromInt(handle));
127
+ }
128
+
129
+ inline fn asLogisticModel(handle: usize) ?*LogisticModel {
130
+ if (handle == 0) {
131
+ return null;
132
+ }
133
+ return @as(*LogisticModel, @ptrFromInt(handle));
134
+ }
135
+
136
+ inline fn asDecisionTreeModel(handle: usize) ?*DecisionTreeModel {
137
+ if (handle == 0) {
138
+ return null;
139
+ }
140
+ return @as(*DecisionTreeModel, @ptrFromInt(handle));
141
+ }
142
+
143
+ inline fn giniImpurity(positive_count: usize, sample_count: usize) f64 {
144
+ if (sample_count == 0) {
145
+ return 0.0;
146
+ }
147
+ const p1 = @as(f64, @floatFromInt(positive_count)) / @as(f64, @floatFromInt(sample_count));
148
+ const p0 = 1.0 - p1;
149
+ return 1.0 - p1 * p1 - p0 * p0;
150
+ }
151
+
152
+ fn resolveMaxFeatures(model: *const DecisionTreeModel) usize {
153
+ switch (model.max_features_mode) {
154
+ 0 => return model.n_features,
155
+ 1 => {
156
+ const k = @as(usize, @intFromFloat(@floor(@sqrt(@as(f64, @floatFromInt(model.n_features))))));
157
+ return if (k < 1) 1 else k;
158
+ },
159
+ 2 => {
160
+ const k = @as(usize, @intFromFloat(@floor(std.math.log2(@as(f64, @floatFromInt(model.n_features))))));
161
+ return if (k < 1) 1 else k;
162
+ },
163
+ 3 => return std.math.clamp(model.max_features_value, 1, model.n_features),
164
+ else => return model.n_features,
165
+ }
166
+ }
167
+
168
+ fn freeSplit(split: SplitResult) void {
169
+ allocator.free(split.left_indices);
170
+ allocator.free(split.right_indices);
171
+ }
172
+
173
+ fn selectCandidateFeatures(model: *const DecisionTreeModel, rng: *Mulberry32) ![]usize {
174
+ const k = resolveMaxFeatures(model);
175
+ if (k >= model.n_features) {
176
+ const all_features = try allocator.alloc(usize, model.n_features);
177
+ errdefer allocator.free(all_features);
178
+ for (all_features, 0..) |*entry, idx| {
179
+ entry.* = idx;
180
+ }
181
+ return all_features;
182
+ }
183
+
184
+ const shuffled = try allocator.alloc(usize, model.n_features);
185
+ errdefer allocator.free(shuffled);
186
+ for (shuffled, 0..) |*entry, idx| {
187
+ entry.* = idx;
188
+ }
189
+
190
+ var i = model.n_features;
191
+ while (i > 1) {
192
+ i -= 1;
193
+ const j = rng.nextIndex(i + 1);
194
+ const tmp = shuffled[i];
195
+ shuffled[i] = shuffled[j];
196
+ shuffled[j] = tmp;
197
+ }
198
+
199
+ const selected = try allocator.alloc(usize, k);
200
+ @memcpy(selected, shuffled[0..k]);
201
+ allocator.free(shuffled);
202
+ return selected;
203
+ }
204
+
205
+ fn findBestSplitForFeature(
206
+ model: *const DecisionTreeModel,
207
+ x_ptr: [*]const f64,
208
+ y_ptr: [*]const u8,
209
+ indices: []const usize,
210
+ feature_index: usize,
211
+ ) !?SplitResult {
212
+ const sample_count = indices.len;
213
+ if (sample_count < 2) {
214
+ return null;
215
+ }
216
+
217
+ const sorted_indices = try allocator.alloc(usize, sample_count);
218
+ defer allocator.free(sorted_indices);
219
+ @memcpy(sorted_indices, indices);
220
+
221
+ const SortContext = struct {
222
+ x_ptr: [*]const f64,
223
+ n_features: usize,
224
+ feature_index: usize,
225
+ fn lessThan(ctx: @This(), a: usize, b: usize) bool {
226
+ return ctx.x_ptr[a * ctx.n_features + ctx.feature_index] <
227
+ ctx.x_ptr[b * ctx.n_features + ctx.feature_index];
228
+ }
229
+ };
230
+ std.sort.heap(usize, sorted_indices, SortContext{
231
+ .x_ptr = x_ptr,
232
+ .n_features = model.n_features,
233
+ .feature_index = feature_index,
234
+ }, SortContext.lessThan);
235
+
236
+ var total_positive: usize = 0;
237
+ for (sorted_indices) |sample_index| {
238
+ total_positive += y_ptr[sample_index];
239
+ }
240
+
241
+ var left_count: usize = 0;
242
+ var left_positive: usize = 0;
243
+ var best_impurity = std.math.inf(f64);
244
+ var best_threshold: f64 = 0.0;
245
+ var best_split_index: usize = 0;
246
+ var found = false;
247
+
248
+ var i: usize = 1;
249
+ while (i < sample_count) : (i += 1) {
250
+ const previous_index = sorted_indices[i - 1];
251
+ left_count += 1;
252
+ left_positive += y_ptr[previous_index];
253
+ const right_count = sample_count - left_count;
254
+
255
+ if (left_count < model.min_samples_leaf or right_count < model.min_samples_leaf) {
256
+ continue;
257
+ }
258
+
259
+ const left_value = x_ptr[previous_index * model.n_features + feature_index];
260
+ const right_value = x_ptr[sorted_indices[i] * model.n_features + feature_index];
261
+ if (left_value == right_value) {
262
+ continue;
263
+ }
264
+
265
+ const right_positive = total_positive - left_positive;
266
+ const impurity =
267
+ (@as(f64, @floatFromInt(left_count)) / @as(f64, @floatFromInt(sample_count))) *
268
+ giniImpurity(left_positive, left_count) +
269
+ (@as(f64, @floatFromInt(right_count)) / @as(f64, @floatFromInt(sample_count))) *
270
+ giniImpurity(right_positive, right_count);
271
+
272
+ if (impurity < best_impurity) {
273
+ best_impurity = impurity;
274
+ best_threshold = (left_value + right_value) / 2.0;
275
+ best_split_index = i;
276
+ found = true;
277
+ }
278
+ }
279
+
280
+ if (!found) {
281
+ return null;
282
+ }
283
+
284
+ const left_indices = try allocator.alloc(usize, best_split_index);
285
+ errdefer allocator.free(left_indices);
286
+ const right_size = sample_count - best_split_index;
287
+ const right_indices = try allocator.alloc(usize, right_size);
288
+ errdefer allocator.free(right_indices);
289
+
290
+ @memcpy(left_indices, sorted_indices[0..best_split_index]);
291
+ @memcpy(right_indices, sorted_indices[best_split_index..]);
292
+
293
+ return SplitResult{
294
+ .threshold = best_threshold,
295
+ .impurity = best_impurity,
296
+ .left_indices = left_indices,
297
+ .right_indices = right_indices,
298
+ };
299
+ }
300
+
301
+ fn buildDecisionTreeNode(
302
+ model: *DecisionTreeModel,
303
+ x_ptr: [*]const f64,
304
+ y_ptr: [*]const u8,
305
+ indices: []const usize,
306
+ depth: usize,
307
+ rng: *Mulberry32,
308
+ ) !usize {
309
+ const sample_count = indices.len;
310
+ var positive_count: usize = 0;
311
+ for (indices) |sample_index| {
312
+ positive_count += y_ptr[sample_index];
313
+ }
314
+ const prediction: u8 = if (positive_count * 2 >= sample_count) 1 else 0;
315
+
316
+ const same_class = positive_count == 0 or positive_count == sample_count;
317
+ const depth_stop = depth >= model.max_depth;
318
+ const split_stop = sample_count < model.min_samples_split;
319
+ if (same_class or depth_stop or split_stop) {
320
+ const node_index = model.nodes.items.len;
321
+ try model.nodes.append(allocator, TreeNode{
322
+ .prediction = prediction,
323
+ .feature_index = 0,
324
+ .threshold = 0.0,
325
+ .left_index = 0,
326
+ .right_index = 0,
327
+ .is_leaf = true,
328
+ });
329
+ return node_index;
330
+ }
331
+
332
+ const parent_impurity = giniImpurity(positive_count, sample_count);
333
+ const candidate_features = try selectCandidateFeatures(model, rng);
334
+ defer allocator.free(candidate_features);
335
+
336
+ var best_feature: usize = 0;
337
+ var best_split: ?SplitResult = null;
338
+ var best_found = false;
339
+
340
+ for (candidate_features) |feature_index| {
341
+ const split_opt = try findBestSplitForFeature(model, x_ptr, y_ptr, indices, feature_index);
342
+ if (split_opt) |split| {
343
+ if (!best_found or split.impurity < best_split.?.impurity) {
344
+ if (best_split) |previous| {
345
+ freeSplit(previous);
346
+ }
347
+ best_split = split;
348
+ best_feature = feature_index;
349
+ best_found = true;
350
+ } else {
351
+ freeSplit(split);
352
+ }
353
+ }
354
+ }
355
+
356
+ if (!best_found) {
357
+ const node_index = model.nodes.items.len;
358
+ try model.nodes.append(allocator, TreeNode{
359
+ .prediction = prediction,
360
+ .feature_index = 0,
361
+ .threshold = 0.0,
362
+ .left_index = 0,
363
+ .right_index = 0,
364
+ .is_leaf = true,
365
+ });
366
+ return node_index;
367
+ }
368
+
369
+ const split = best_split.?;
370
+ defer freeSplit(split);
371
+ if (split.impurity >= parent_impurity - 1e-12) {
372
+ const node_index = model.nodes.items.len;
373
+ try model.nodes.append(allocator, TreeNode{
374
+ .prediction = prediction,
375
+ .feature_index = 0,
376
+ .threshold = 0.0,
377
+ .left_index = 0,
378
+ .right_index = 0,
379
+ .is_leaf = true,
380
+ });
381
+ return node_index;
382
+ }
383
+
384
+ const node_index = model.nodes.items.len;
385
+ try model.nodes.append(allocator, TreeNode{
386
+ .prediction = prediction,
387
+ .feature_index = 0,
388
+ .threshold = 0.0,
389
+ .left_index = 0,
390
+ .right_index = 0,
391
+ .is_leaf = false,
392
+ });
393
+
394
+ const left_index = try buildDecisionTreeNode(
395
+ model,
396
+ x_ptr,
397
+ y_ptr,
398
+ split.left_indices,
399
+ depth + 1,
400
+ rng,
401
+ );
402
+ const right_index = try buildDecisionTreeNode(
403
+ model,
404
+ x_ptr,
405
+ y_ptr,
406
+ split.right_indices,
407
+ depth + 1,
408
+ rng,
409
+ );
410
+
411
+ model.nodes.items[node_index] = TreeNode{
412
+ .prediction = prediction,
413
+ .feature_index = best_feature,
414
+ .threshold = split.threshold,
415
+ .left_index = left_index,
416
+ .right_index = right_index,
417
+ .is_leaf = false,
418
+ };
419
+
420
+ return node_index;
421
+ }
422
+
423
+ fn solveSymmetricPositiveDefinite(
424
+ dim: usize,
425
+ gram: []const f64,
426
+ rhs: []const f64,
427
+ lower: []f64,
428
+ forward: []f64,
429
+ solution: []f64,
430
+ ) bool {
431
+ const epsilon = 1e-12;
432
+ @memset(lower, 0.0);
433
+
434
+ var i: usize = 0;
435
+ while (i < dim) : (i += 1) {
436
+ const row_offset_i = i * dim;
437
+ var j: usize = 0;
438
+ while (j <= i) : (j += 1) {
439
+ const row_offset_j = j * dim;
440
+ var sum = gram[row_offset_i + j];
441
+ var k: usize = 0;
442
+ while (k < j) : (k += 1) {
443
+ sum -= lower[row_offset_i + k] * lower[row_offset_j + k];
444
+ }
445
+
446
+ if (i == j) {
447
+ if (sum <= epsilon) {
448
+ return false;
449
+ }
450
+ lower[row_offset_i + j] = @sqrt(sum);
451
+ } else {
452
+ lower[row_offset_i + j] = sum / lower[row_offset_j + j];
453
+ }
454
+ }
455
+ }
456
+
457
+ i = 0;
458
+ while (i < dim) : (i += 1) {
459
+ const row_offset = i * dim;
460
+ var sum = rhs[i];
461
+ var k: usize = 0;
462
+ while (k < i) : (k += 1) {
463
+ sum -= lower[row_offset + k] * forward[k];
464
+ }
465
+ forward[i] = sum / lower[row_offset + i];
466
+ }
467
+
468
+ var reverse: usize = dim;
469
+ while (reverse > 0) {
470
+ const idx = reverse - 1;
471
+ var sum = forward[idx];
472
+ var k = idx + 1;
473
+ while (k < dim) : (k += 1) {
474
+ sum -= lower[k * dim + idx] * solution[k];
475
+ }
476
+ solution[idx] = sum / lower[idx * dim + idx];
477
+ reverse -= 1;
478
+ }
479
+
480
+ return true;
481
+ }
482
+
483
+ pub export fn linear_model_create(n_features: usize, fit_intercept: u8) usize {
484
+ if (n_features == 0) {
485
+ return 0;
486
+ }
487
+
488
+ const model = allocator.create(LinearModel) catch return 0;
489
+ errdefer allocator.destroy(model);
490
+ const coefficients = allocator.alloc(f64, n_features) catch return 0;
491
+ @memset(coefficients, 0.0);
492
+
493
+ model.* = .{
494
+ .n_features = n_features,
495
+ .fit_intercept = fit_intercept != 0,
496
+ .coefficients = coefficients,
497
+ .intercept = 0.0,
498
+ };
499
+ return @intFromPtr(model);
500
+ }
501
+
502
+ pub export fn linear_model_destroy(handle: usize) void {
503
+ const model = asLinearModel(handle) orelse return;
504
+ allocator.free(model.coefficients);
505
+ allocator.destroy(model);
506
+ }
507
+
508
+ pub export fn linear_model_fit(
509
+ handle: usize,
510
+ x_ptr: [*]const f64,
511
+ y_ptr: [*]const f64,
512
+ n_samples: usize,
513
+ l2: f64,
514
+ ) u8 {
515
+ const model = asLinearModel(handle) orelse return 0;
516
+ if (n_samples == 0 or model.n_features == 0) {
517
+ return 0;
518
+ }
519
+
520
+ const has_intercept: usize = if (model.fit_intercept) 1 else 0;
521
+ const dim = model.n_features + has_intercept;
522
+ const offset = has_intercept;
523
+
524
+ const base_gram = allocator.alloc(f64, dim * dim) catch return 0;
525
+ defer allocator.free(base_gram);
526
+ const gram_attempt = allocator.alloc(f64, dim * dim) catch return 0;
527
+ defer allocator.free(gram_attempt);
528
+ const rhs = allocator.alloc(f64, dim) catch return 0;
529
+ defer allocator.free(rhs);
530
+ const lower = allocator.alloc(f64, dim * dim) catch return 0;
531
+ defer allocator.free(lower);
532
+ const forward = allocator.alloc(f64, dim) catch return 0;
533
+ defer allocator.free(forward);
534
+ const solution = allocator.alloc(f64, dim) catch return 0;
535
+ defer allocator.free(solution);
536
+
537
+ @memset(base_gram, 0.0);
538
+ @memset(rhs, 0.0);
539
+
540
+ var sample_index: usize = 0;
541
+ while (sample_index < n_samples) : (sample_index += 1) {
542
+ const row_offset = sample_index * model.n_features;
543
+ const target = y_ptr[sample_index];
544
+
545
+ var i: usize = 0;
546
+ while (i < dim) : (i += 1) {
547
+ const xi = if (model.fit_intercept and i == 0) 1.0 else x_ptr[row_offset + (i - offset)];
548
+ rhs[i] += xi * target;
549
+
550
+ const gram_row_offset = i * dim;
551
+ var j: usize = 0;
552
+ while (j <= i) : (j += 1) {
553
+ const xj = if (model.fit_intercept and j == 0) 1.0 else x_ptr[row_offset + (j - offset)];
554
+ base_gram[gram_row_offset + j] += xi * xj;
555
+ }
556
+ }
557
+ }
558
+
559
+ var row: usize = 1;
560
+ while (row < dim) : (row += 1) {
561
+ var col: usize = 0;
562
+ while (col < row) : (col += 1) {
563
+ base_gram[col * dim + row] = base_gram[row * dim + col];
564
+ }
565
+ }
566
+
567
+ var regularization: f64 = if (l2 > 0.0) l2 else 1e-8;
568
+ var attempt: usize = 0;
569
+ while (attempt < 4) : (attempt += 1) {
570
+ @memcpy(gram_attempt, base_gram);
571
+
572
+ var diag: usize = 0;
573
+ while (diag < dim) : (diag += 1) {
574
+ if (model.fit_intercept and diag == 0) {
575
+ continue;
576
+ }
577
+ gram_attempt[diag * dim + diag] += regularization;
578
+ }
579
+
580
+ if (solveSymmetricPositiveDefinite(dim, gram_attempt, rhs, lower, forward, solution)) {
581
+ if (model.fit_intercept) {
582
+ model.intercept = solution[0];
583
+ var idx: usize = 0;
584
+ while (idx < model.n_features) : (idx += 1) {
585
+ model.coefficients[idx] = solution[idx + 1];
586
+ }
587
+ } else {
588
+ model.intercept = 0.0;
589
+ @memcpy(model.coefficients, solution[0..model.n_features]);
590
+ }
591
+ return 1;
592
+ }
593
+
594
+ regularization *= 10.0;
595
+ }
596
+
597
+ return 0;
598
+ }
599
+
600
+ pub export fn linear_model_predict(
601
+ handle: usize,
602
+ x_ptr: [*]const f64,
603
+ n_samples: usize,
604
+ out_ptr: [*]f64,
605
+ ) u8 {
606
+ const model = asLinearModel(handle) orelse return 0;
607
+ if (n_samples == 0 or model.n_features == 0) {
608
+ return 0;
609
+ }
610
+
611
+ var i: usize = 0;
612
+ while (i < n_samples) : (i += 1) {
613
+ const row_offset = i * model.n_features;
614
+ var prediction = model.intercept;
615
+ var j: usize = 0;
616
+ while (j < model.n_features) : (j += 1) {
617
+ prediction += x_ptr[row_offset + j] * model.coefficients[j];
618
+ }
619
+ out_ptr[i] = prediction;
620
+ }
621
+
622
+ return 1;
623
+ }
624
+
625
+ pub export fn linear_model_copy_coefficients(handle: usize, out_ptr: [*]f64) u8 {
626
+ const model = asLinearModel(handle) orelse return 0;
627
+ @memcpy(out_ptr[0..model.n_features], model.coefficients);
628
+ return 1;
629
+ }
630
+
631
+ pub export fn linear_model_get_intercept(handle: usize) f64 {
632
+ const model = asLinearModel(handle) orelse return 0.0;
633
+ return model.intercept;
634
+ }
635
+
636
+ pub export fn logistic_model_create(n_features: usize, fit_intercept: u8) usize {
637
+ if (n_features == 0) {
638
+ return 0;
639
+ }
640
+
641
+ const model = allocator.create(LogisticModel) catch return 0;
642
+ errdefer allocator.destroy(model);
643
+
644
+ const coefficients = allocator.alloc(f64, n_features) catch return 0;
645
+ errdefer allocator.free(coefficients);
646
+ @memset(coefficients, 0.0);
647
+
648
+ const gradients = allocator.alloc(f64, n_features) catch return 0;
649
+ @memset(gradients, 0.0);
650
+
651
+ model.* = .{
652
+ .n_features = n_features,
653
+ .fit_intercept = fit_intercept != 0,
654
+ .coefficients = coefficients,
655
+ .gradients = gradients,
656
+ .intercept = 0.0,
657
+ };
658
+ return @intFromPtr(model);
659
+ }
660
+
661
+ pub export fn logistic_model_destroy(handle: usize) void {
662
+ const model = asLogisticModel(handle) orelse return;
663
+ allocator.free(model.coefficients);
664
+ allocator.free(model.gradients);
665
+ allocator.destroy(model);
666
+ }
667
+
668
+ fn logisticThetaLength(model: *const LogisticModel) usize {
669
+ return model.n_features + @as(usize, if (model.fit_intercept) 1 else 0);
670
+ }
671
+
672
+ fn logisticLoadThetaFromModel(model: *const LogisticModel, theta: []f64) void {
673
+ if (model.fit_intercept) {
674
+ theta[0] = model.intercept;
675
+ @memcpy(theta[1 .. 1 + model.n_features], model.coefficients);
676
+ } else {
677
+ @memcpy(theta[0..model.n_features], model.coefficients);
678
+ }
679
+ }
680
+
681
+ fn logisticStoreThetaToModel(model: *LogisticModel, theta: []const f64) void {
682
+ if (model.fit_intercept) {
683
+ model.intercept = theta[0];
684
+ @memcpy(model.coefficients, theta[1 .. 1 + model.n_features]);
685
+ } else {
686
+ model.intercept = 0.0;
687
+ @memcpy(model.coefficients, theta[0..model.n_features]);
688
+ }
689
+ }
690
+
691
+ fn dotProduct(a: []const f64, b: []const f64) f64 {
692
+ var sum: f64 = 0.0;
693
+ for (a, b) |av, bv| {
694
+ sum += av * bv;
695
+ }
696
+ return sum;
697
+ }
698
+
699
+ fn maxAbs(values: []const f64) f64 {
700
+ var max_value: f64 = 0.0;
701
+ for (values) |value| {
702
+ const abs_value = @abs(value);
703
+ if (abs_value > max_value) {
704
+ max_value = abs_value;
705
+ }
706
+ }
707
+ return max_value;
708
+ }
709
+
710
+ fn logisticLossAndGradient(
711
+ model: *const LogisticModel,
712
+ x_ptr: [*]const f64,
713
+ y_ptr: [*]const f64,
714
+ n_samples: usize,
715
+ l2: f64,
716
+ theta: []const f64,
717
+ gradient: []f64,
718
+ ) f64 {
719
+ @memset(gradient, 0.0);
720
+
721
+ const theta_offset: usize = if (model.fit_intercept) 1 else 0;
722
+ var total_loss: f64 = 0.0;
723
+ var i: usize = 0;
724
+ while (i < n_samples) : (i += 1) {
725
+ const y_value = y_ptr[i];
726
+ const row_offset = i * model.n_features;
727
+ var z: f64 = if (model.fit_intercept) theta[0] else 0.0;
728
+ var j: usize = 0;
729
+ while (j < model.n_features) : (j += 1) {
730
+ z += x_ptr[row_offset + j] * theta[theta_offset + j];
731
+ }
732
+
733
+ const prediction = sigmoid(z);
734
+ const residual = prediction - y_value;
735
+ if (model.fit_intercept) {
736
+ gradient[0] += residual;
737
+ }
738
+ j = 0;
739
+ while (j < model.n_features) : (j += 1) {
740
+ gradient[theta_offset + j] += residual * x_ptr[row_offset + j];
741
+ }
742
+
743
+ // Stable binary cross-entropy evaluation.
744
+ if (z >= 0.0) {
745
+ total_loss += (1.0 - y_value) * z + @log(1.0 + @exp(-z));
746
+ } else {
747
+ total_loss += -y_value * z + @log(1.0 + @exp(z));
748
+ }
749
+ }
750
+
751
+ const sample_scale = 1.0 / @as(f64, @floatFromInt(n_samples));
752
+ total_loss *= sample_scale;
753
+ for (gradient) |*entry| {
754
+ entry.* *= sample_scale;
755
+ }
756
+
757
+ if (l2 > 0.0) {
758
+ var l2_sum: f64 = 0.0;
759
+ var j: usize = 0;
760
+ while (j < model.n_features) : (j += 1) {
761
+ const weight = theta[theta_offset + j];
762
+ l2_sum += weight * weight;
763
+ gradient[theta_offset + j] += sample_scale * l2 * weight;
764
+ }
765
+ total_loss += 0.5 * sample_scale * l2 * l2_sum;
766
+ }
767
+
768
+ return total_loss;
769
+ }
770
+
771
+ pub export fn logistic_model_fit(
772
+ handle: usize,
773
+ x_ptr: [*]const f64,
774
+ y_ptr: [*]const f64,
775
+ n_samples: usize,
776
+ learning_rate: f64,
777
+ l2: f64,
778
+ max_iters: usize,
779
+ tolerance: f64,
780
+ ) usize {
781
+ const model = asLogisticModel(handle) orelse return 0;
782
+ if (n_samples == 0 or model.n_features == 0 or max_iters == 0) {
783
+ return 0;
784
+ }
785
+
786
+ @memset(model.coefficients, 0.0);
787
+ @memset(model.gradients, 0.0);
788
+ model.intercept = 0.0;
789
+
790
+ const sample_scale = 1.0 / @as(f64, @floatFromInt(n_samples));
791
+ var iter: usize = 0;
792
+ while (iter < max_iters) : (iter += 1) {
793
+ @memset(model.gradients, 0.0);
794
+ var intercept_gradient: f64 = 0.0;
795
+
796
+ var i: usize = 0;
797
+ while (i < n_samples) : (i += 1) {
798
+ const row_offset = i * model.n_features;
799
+ var z = model.intercept;
800
+ var j: usize = 0;
801
+ while (j < model.n_features) : (j += 1) {
802
+ z += x_ptr[row_offset + j] * model.coefficients[j];
803
+ }
804
+
805
+ const residual = sigmoid(z) - y_ptr[i];
806
+ intercept_gradient += residual;
807
+
808
+ j = 0;
809
+ while (j < model.n_features) : (j += 1) {
810
+ model.gradients[j] += residual * x_ptr[row_offset + j];
811
+ }
812
+ }
813
+
814
+ var max_update: f64 = 0.0;
815
+ var j: usize = 0;
816
+ while (j < model.n_features) : (j += 1) {
817
+ const l2_term = if (l2 > 0.0) l2 * model.coefficients[j] else 0.0;
818
+ const delta = learning_rate * (sample_scale * model.gradients[j] + sample_scale * l2_term);
819
+ model.coefficients[j] -= delta;
820
+ const abs_delta = @abs(delta);
821
+ if (abs_delta > max_update) {
822
+ max_update = abs_delta;
823
+ }
824
+ }
825
+
826
+ if (model.fit_intercept) {
827
+ const intercept_delta = learning_rate * sample_scale * intercept_gradient;
828
+ model.intercept -= intercept_delta;
829
+ const abs_intercept_delta = @abs(intercept_delta);
830
+ if (abs_intercept_delta > max_update) {
831
+ max_update = abs_intercept_delta;
832
+ }
833
+ }
834
+
835
+ if (max_update < tolerance) {
836
+ return iter + 1;
837
+ }
838
+ }
839
+
840
+ return max_iters;
841
+ }
842
+
843
+ pub export fn logistic_model_fit_lbfgs(
844
+ handle: usize,
845
+ x_ptr: [*]const f64,
846
+ y_ptr: [*]const f64,
847
+ n_samples: usize,
848
+ max_iters: usize,
849
+ tolerance: f64,
850
+ l2: f64,
851
+ memory: usize,
852
+ ) usize {
853
+ const model = asLogisticModel(handle) orelse return 0;
854
+ if (n_samples == 0 or model.n_features == 0 or max_iters == 0) {
855
+ return 0;
856
+ }
857
+
858
+ const theta_len = logisticThetaLength(model);
859
+ const history_size = std.math.clamp(memory, 3, 20);
860
+
861
+ const theta = allocator.alloc(f64, theta_len) catch return 0;
862
+ defer allocator.free(theta);
863
+ const theta_next = allocator.alloc(f64, theta_len) catch return 0;
864
+ defer allocator.free(theta_next);
865
+ const gradient = allocator.alloc(f64, theta_len) catch return 0;
866
+ defer allocator.free(gradient);
867
+ const gradient_next = allocator.alloc(f64, theta_len) catch return 0;
868
+ defer allocator.free(gradient_next);
869
+ const direction = allocator.alloc(f64, theta_len) catch return 0;
870
+ defer allocator.free(direction);
871
+ const q = allocator.alloc(f64, theta_len) catch return 0;
872
+ defer allocator.free(q);
873
+ const r = allocator.alloc(f64, theta_len) catch return 0;
874
+ defer allocator.free(r);
875
+ const s_history = allocator.alloc(f64, history_size * theta_len) catch return 0;
876
+ defer allocator.free(s_history);
877
+ const y_history = allocator.alloc(f64, history_size * theta_len) catch return 0;
878
+ defer allocator.free(y_history);
879
+ const rho_history = allocator.alloc(f64, history_size) catch return 0;
880
+ defer allocator.free(rho_history);
881
+ const alpha_history = allocator.alloc(f64, history_size) catch return 0;
882
+ defer allocator.free(alpha_history);
883
+ const index_history = allocator.alloc(usize, history_size) catch return 0;
884
+ defer allocator.free(index_history);
885
+
886
+ logisticLoadThetaFromModel(model, theta);
887
+ @memset(theta_next, 0.0);
888
+ @memset(gradient, 0.0);
889
+ @memset(gradient_next, 0.0);
890
+ @memset(direction, 0.0);
891
+ @memset(q, 0.0);
892
+ @memset(r, 0.0);
893
+
894
+ var loss = logisticLossAndGradient(model, x_ptr, y_ptr, n_samples, l2, theta, gradient);
895
+ if (!std.math.isFinite(loss)) {
896
+ return 0;
897
+ }
898
+
899
+ var history_count: usize = 0;
900
+ var history_head: usize = 0;
901
+
902
+ var iter: usize = 0;
903
+ while (iter < max_iters) : (iter += 1) {
904
+ if (maxAbs(gradient) < tolerance) {
905
+ logisticStoreThetaToModel(model, theta);
906
+ return iter + 1;
907
+ }
908
+
909
+ @memcpy(q, gradient);
910
+ var loop_count: usize = 0;
911
+ while (loop_count < history_count) : (loop_count += 1) {
912
+ const idx = (history_head + history_size - 1 - loop_count) % history_size;
913
+ index_history[loop_count] = idx;
914
+ const s = s_history[idx * theta_len .. (idx + 1) * theta_len];
915
+ const y_vec = y_history[idx * theta_len .. (idx + 1) * theta_len];
916
+ const alpha = rho_history[idx] * dotProduct(s, q);
917
+ alpha_history[loop_count] = alpha;
918
+ var t: usize = 0;
919
+ while (t < theta_len) : (t += 1) {
920
+ q[t] -= alpha * y_vec[t];
921
+ }
922
+ }
923
+
924
+ var gamma: f64 = 1.0;
925
+ if (history_count > 0) {
926
+ const latest_idx = (history_head + history_size - 1) % history_size;
927
+ const s_latest = s_history[latest_idx * theta_len .. (latest_idx + 1) * theta_len];
928
+ const y_latest = y_history[latest_idx * theta_len .. (latest_idx + 1) * theta_len];
929
+ const sy = dotProduct(s_latest, y_latest);
930
+ const yy = dotProduct(y_latest, y_latest);
931
+ if (yy > 1e-20 and sy > 0.0) {
932
+ gamma = sy / yy;
933
+ }
934
+ }
935
+
936
+ for (r, q) |*entry, q_value| {
937
+ entry.* = gamma * q_value;
938
+ }
939
+
940
+ var backward = history_count;
941
+ while (backward > 0) {
942
+ const pos = backward - 1;
943
+ const idx = index_history[pos];
944
+ const s = s_history[idx * theta_len .. (idx + 1) * theta_len];
945
+ const y_vec = y_history[idx * theta_len .. (idx + 1) * theta_len];
946
+ const beta = rho_history[idx] * dotProduct(y_vec, r);
947
+ const alpha = alpha_history[pos];
948
+ var t: usize = 0;
949
+ while (t < theta_len) : (t += 1) {
950
+ r[t] += s[t] * (alpha - beta);
951
+ }
952
+ backward -= 1;
953
+ }
954
+
955
+ for (direction, r) |*entry, r_value| {
956
+ entry.* = -r_value;
957
+ }
958
+
959
+ var directional_derivative = dotProduct(direction, gradient);
960
+ if (directional_derivative >= -1e-20) {
961
+ for (direction, gradient) |*entry, g_value| {
962
+ entry.* = -g_value;
963
+ }
964
+ directional_derivative = -dotProduct(gradient, gradient);
965
+ }
966
+
967
+ var step: f64 = 1.0;
968
+ const c1: f64 = 1e-4;
969
+ const min_step: f64 = 1e-12;
970
+ var candidate_loss: f64 = loss;
971
+ var accepted = false;
972
+ while (step >= min_step) {
973
+ var t: usize = 0;
974
+ while (t < theta_len) : (t += 1) {
975
+ theta_next[t] = theta[t] + step * direction[t];
976
+ }
977
+
978
+ candidate_loss = logisticLossAndGradient(
979
+ model,
980
+ x_ptr,
981
+ y_ptr,
982
+ n_samples,
983
+ l2,
984
+ theta_next,
985
+ gradient_next,
986
+ );
987
+ if (std.math.isFinite(candidate_loss) and
988
+ candidate_loss <= loss + c1 * step * directional_derivative)
989
+ {
990
+ accepted = true;
991
+ break;
992
+ }
993
+ step *= 0.5;
994
+ }
995
+
996
+ if (!accepted) {
997
+ logisticStoreThetaToModel(model, theta);
998
+ return iter + 1;
999
+ }
1000
+
1001
+ var max_step_update: f64 = 0.0;
1002
+ var t: usize = 0;
1003
+ while (t < theta_len) : (t += 1) {
1004
+ const delta = theta_next[t] - theta[t];
1005
+ const abs_delta = @abs(delta);
1006
+ if (abs_delta > max_step_update) {
1007
+ max_step_update = abs_delta;
1008
+ }
1009
+ direction[t] = delta;
1010
+ q[t] = gradient_next[t] - gradient[t];
1011
+ }
1012
+
1013
+ const sy = dotProduct(direction, q);
1014
+ if (sy > 1e-12) {
1015
+ const idx = history_head;
1016
+ @memcpy(s_history[idx * theta_len .. (idx + 1) * theta_len], direction);
1017
+ @memcpy(y_history[idx * theta_len .. (idx + 1) * theta_len], q);
1018
+ rho_history[idx] = 1.0 / sy;
1019
+ history_head = (history_head + 1) % history_size;
1020
+ if (history_count < history_size) {
1021
+ history_count += 1;
1022
+ }
1023
+ }
1024
+
1025
+ @memcpy(theta, theta_next);
1026
+ @memcpy(gradient, gradient_next);
1027
+ loss = candidate_loss;
1028
+
1029
+ if (max_step_update < tolerance) {
1030
+ logisticStoreThetaToModel(model, theta);
1031
+ return iter + 1;
1032
+ }
1033
+ }
1034
+
1035
+ logisticStoreThetaToModel(model, theta);
1036
+ return max_iters;
1037
+ }
1038
+
1039
+ pub export fn logistic_model_predict_proba(
1040
+ handle: usize,
1041
+ x_ptr: [*]const f64,
1042
+ n_samples: usize,
1043
+ out_positive_ptr: [*]f64,
1044
+ ) u8 {
1045
+ const model = asLogisticModel(handle) orelse return 0;
1046
+ if (n_samples == 0 or model.n_features == 0) {
1047
+ return 0;
1048
+ }
1049
+
1050
+ var i: usize = 0;
1051
+ while (i < n_samples) : (i += 1) {
1052
+ const row_offset = i * model.n_features;
1053
+ var z = model.intercept;
1054
+ var j: usize = 0;
1055
+ while (j < model.n_features) : (j += 1) {
1056
+ z += x_ptr[row_offset + j] * model.coefficients[j];
1057
+ }
1058
+ out_positive_ptr[i] = sigmoid(z);
1059
+ }
1060
+
1061
+ return 1;
1062
+ }
1063
+
1064
+ pub export fn logistic_model_predict(
1065
+ handle: usize,
1066
+ x_ptr: [*]const f64,
1067
+ n_samples: usize,
1068
+ out_labels_ptr: [*]u8,
1069
+ ) u8 {
1070
+ const model = asLogisticModel(handle) orelse return 0;
1071
+ if (n_samples == 0 or model.n_features == 0) {
1072
+ return 0;
1073
+ }
1074
+
1075
+ var i: usize = 0;
1076
+ while (i < n_samples) : (i += 1) {
1077
+ const row_offset = i * model.n_features;
1078
+ var z = model.intercept;
1079
+ var j: usize = 0;
1080
+ while (j < model.n_features) : (j += 1) {
1081
+ z += x_ptr[row_offset + j] * model.coefficients[j];
1082
+ }
1083
+ out_labels_ptr[i] = if (sigmoid(z) >= 0.5) 1 else 0;
1084
+ }
1085
+
1086
+ return 1;
1087
+ }
1088
+
1089
+ pub export fn logistic_model_copy_coefficients(handle: usize, out_ptr: [*]f64) u8 {
1090
+ const model = asLogisticModel(handle) orelse return 0;
1091
+ @memcpy(out_ptr[0..model.n_features], model.coefficients);
1092
+ return 1;
1093
+ }
1094
+
1095
+ pub export fn logistic_model_get_intercept(handle: usize) f64 {
1096
+ const model = asLogisticModel(handle) orelse return 0.0;
1097
+ return model.intercept;
1098
+ }
1099
+
1100
+ pub export fn decision_tree_model_create(
1101
+ max_depth: usize,
1102
+ min_samples_split: usize,
1103
+ min_samples_leaf: usize,
1104
+ max_features_mode: u8,
1105
+ max_features_value: usize,
1106
+ random_state: u32,
1107
+ use_random_state: u8,
1108
+ n_features: usize,
1109
+ ) usize {
1110
+ if (n_features == 0 or max_depth == 0) {
1111
+ return 0;
1112
+ }
1113
+
1114
+ const model = allocator.create(DecisionTreeModel) catch return 0;
1115
+ errdefer allocator.destroy(model);
1116
+ model.* = .{
1117
+ .n_features = n_features,
1118
+ .max_depth = max_depth,
1119
+ .min_samples_split = if (min_samples_split < 2) 2 else min_samples_split,
1120
+ .min_samples_leaf = if (min_samples_leaf < 1) 1 else min_samples_leaf,
1121
+ .max_features_mode = max_features_mode,
1122
+ .max_features_value = max_features_value,
1123
+ .random_state = random_state,
1124
+ .use_random_state = use_random_state != 0,
1125
+ .root_index = 0,
1126
+ .has_root = false,
1127
+ .nodes = .empty,
1128
+ };
1129
+ return @intFromPtr(model);
1130
+ }
1131
+
1132
+ pub export fn decision_tree_model_destroy(handle: usize) void {
1133
+ const model = asDecisionTreeModel(handle) orelse return;
1134
+ model.nodes.deinit(allocator);
1135
+ allocator.destroy(model);
1136
+ }
1137
+
1138
+ pub export fn decision_tree_model_fit(
1139
+ handle: usize,
1140
+ x_ptr: [*]const f64,
1141
+ y_ptr: [*]const u8,
1142
+ n_samples: usize,
1143
+ n_features: usize,
1144
+ sample_indices_ptr: [*]const u32,
1145
+ sample_count: usize,
1146
+ ) u8 {
1147
+ const model = asDecisionTreeModel(handle) orelse return 0;
1148
+ if (n_samples == 0 or n_features == 0 or n_features != model.n_features) {
1149
+ return 0;
1150
+ }
1151
+
1152
+ model.nodes.clearRetainingCapacity();
1153
+ model.has_root = false;
1154
+
1155
+ const root_size = if (sample_count == 0) n_samples else sample_count;
1156
+ if (root_size == 0) {
1157
+ return 0;
1158
+ }
1159
+
1160
+ const root_indices = allocator.alloc(usize, root_size) catch return 0;
1161
+ defer allocator.free(root_indices);
1162
+
1163
+ if (sample_count == 0) {
1164
+ for (root_indices, 0..) |*entry, idx| {
1165
+ entry.* = idx;
1166
+ }
1167
+ } else {
1168
+ for (root_indices, 0..) |*entry, idx| {
1169
+ const sample_index = @as(usize, sample_indices_ptr[idx]);
1170
+ if (sample_index >= n_samples) {
1171
+ return 0;
1172
+ }
1173
+ entry.* = sample_index;
1174
+ }
1175
+ }
1176
+
1177
+ const rng_seed: u32 = if (model.use_random_state)
1178
+ model.random_state
1179
+ else
1180
+ @as(u32, @truncate(@as(u64, @bitCast(std.time.microTimestamp()))));
1181
+ var rng = Mulberry32.init(rng_seed);
1182
+ const root_index = buildDecisionTreeNode(model, x_ptr, y_ptr, root_indices, 0, &rng) catch {
1183
+ model.nodes.clearRetainingCapacity();
1184
+ model.has_root = false;
1185
+ return 0;
1186
+ };
1187
+ model.root_index = root_index;
1188
+ model.has_root = true;
1189
+ return 1;
1190
+ }
1191
+
1192
+ pub export fn decision_tree_model_predict(
1193
+ handle: usize,
1194
+ x_ptr: [*]const f64,
1195
+ n_samples: usize,
1196
+ n_features: usize,
1197
+ out_labels_ptr: [*]u8,
1198
+ ) u8 {
1199
+ const model = asDecisionTreeModel(handle) orelse return 0;
1200
+ if (!model.has_root or n_samples == 0 or n_features != model.n_features) {
1201
+ return 0;
1202
+ }
1203
+
1204
+ var i: usize = 0;
1205
+ while (i < n_samples) : (i += 1) {
1206
+ const row_offset = i * model.n_features;
1207
+ var node_index = model.root_index;
1208
+ while (true) {
1209
+ const node = model.nodes.items[node_index];
1210
+ if (node.is_leaf) {
1211
+ out_labels_ptr[i] = node.prediction;
1212
+ break;
1213
+ }
1214
+
1215
+ const value = x_ptr[row_offset + node.feature_index];
1216
+ node_index = if (value <= node.threshold) node.left_index else node.right_index;
1217
+ }
1218
+ }
1219
+
1220
+ return 1;
1221
+ }
1222
+
1223
+ pub export fn logistic_train_epoch(
1224
+ x_ptr: [*]const f64,
1225
+ y_ptr: [*]const f64,
1226
+ n_samples: usize,
1227
+ n_features: usize,
1228
+ weights_ptr: [*]f64,
1229
+ intercept_ptr: *f64,
1230
+ gradients_ptr: [*]f64,
1231
+ learning_rate: f64,
1232
+ l2: f64,
1233
+ fit_intercept: u8,
1234
+ ) f64 {
1235
+ if (n_samples == 0 or n_features == 0) {
1236
+ return 0.0;
1237
+ }
1238
+
1239
+ var j: usize = 0;
1240
+ while (j < n_features) : (j += 1) {
1241
+ gradients_ptr[j] = 0.0;
1242
+ }
1243
+
1244
+ var intercept_gradient: f64 = 0.0;
1245
+ var i: usize = 0;
1246
+ while (i < n_samples) : (i += 1) {
1247
+ const row_offset = i * n_features;
1248
+ var z = intercept_ptr.*;
1249
+
1250
+ j = 0;
1251
+ while (j < n_features) : (j += 1) {
1252
+ z += x_ptr[row_offset + j] * weights_ptr[j];
1253
+ }
1254
+
1255
+ const residual = sigmoid(z) - y_ptr[i];
1256
+ intercept_gradient += residual;
1257
+
1258
+ j = 0;
1259
+ while (j < n_features) : (j += 1) {
1260
+ gradients_ptr[j] += residual * x_ptr[row_offset + j];
1261
+ }
1262
+ }
1263
+
1264
+ const sample_scale = 1.0 / @as(f64, @floatFromInt(n_samples));
1265
+ var max_update: f64 = 0.0;
1266
+ j = 0;
1267
+ while (j < n_features) : (j += 1) {
1268
+ const l2_term = if (l2 > 0.0) l2 * weights_ptr[j] else 0.0;
1269
+ const delta = learning_rate * (sample_scale * gradients_ptr[j] + sample_scale * l2_term);
1270
+ weights_ptr[j] -= delta;
1271
+ const abs_delta = @abs(delta);
1272
+ if (abs_delta > max_update) {
1273
+ max_update = abs_delta;
1274
+ }
1275
+ }
1276
+
1277
+ if (fit_intercept != 0) {
1278
+ const intercept_delta = learning_rate * sample_scale * intercept_gradient;
1279
+ intercept_ptr.* -= intercept_delta;
1280
+ const abs_intercept_delta = @abs(intercept_delta);
1281
+ if (abs_intercept_delta > max_update) {
1282
+ max_update = abs_intercept_delta;
1283
+ }
1284
+ }
1285
+
1286
+ return max_update;
1287
+ }
1288
+
1289
+ pub export fn logistic_train_epochs(
1290
+ x_ptr: [*]const f64,
1291
+ y_ptr: [*]const f64,
1292
+ n_samples: usize,
1293
+ n_features: usize,
1294
+ weights_ptr: [*]f64,
1295
+ intercept_ptr: *f64,
1296
+ gradients_ptr: [*]f64,
1297
+ learning_rate: f64,
1298
+ l2: f64,
1299
+ fit_intercept: u8,
1300
+ max_iters: usize,
1301
+ tolerance: f64,
1302
+ ) usize {
1303
+ if (max_iters == 0 or n_samples == 0 or n_features == 0) {
1304
+ return 0;
1305
+ }
1306
+
1307
+ var iter: usize = 0;
1308
+ while (iter < max_iters) : (iter += 1) {
1309
+ const max_update = logistic_train_epoch(
1310
+ x_ptr,
1311
+ y_ptr,
1312
+ n_samples,
1313
+ n_features,
1314
+ weights_ptr,
1315
+ intercept_ptr,
1316
+ gradients_ptr,
1317
+ learning_rate,
1318
+ l2,
1319
+ fit_intercept,
1320
+ );
1321
+ if (max_update < tolerance) {
1322
+ return iter + 1;
1323
+ }
1324
+ }
1325
+
1326
+ return max_iters;
1327
+ }