goldenflow 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3588 @@
1
+ 'use strict';
2
+
3
+ var fs = require('fs');
4
+ var path = require('path');
5
+ var os = require('os');
6
+ var readline = require('readline');
7
+ var http = require('http');
8
+
9
+ var __defProp = Object.defineProperty;
10
+ var __getOwnPropNames = Object.getOwnPropertyNames;
11
+ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
12
+ get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
13
+ }) : x)(function(x) {
14
+ if (typeof require !== "undefined") return require.apply(this, arguments);
15
+ throw Error('Dynamic require of "' + x + '" is not supported');
16
+ });
17
+ var __esm = (fn, res) => function __init() {
18
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
19
+ };
20
+ var __export = (target, all) => {
21
+ for (var name in all)
22
+ __defProp(target, name, { get: all[name], enumerable: true });
23
+ };
24
+
25
+ // src/core/types.ts
26
+ function makeTransformRecord(input) {
27
+ return {
28
+ sampleBefore: [],
29
+ sampleAfter: [],
30
+ ...input
31
+ };
32
+ }
33
+ function makeManifest(source) {
34
+ return new exports.MutableManifest(source);
35
+ }
36
+ function makeColumnProfile(input) {
37
+ return {
38
+ sampleValues: [],
39
+ detectedFormat: null,
40
+ ...input
41
+ };
42
+ }
43
+ function makeConfig(input) {
44
+ return {
45
+ source: null,
46
+ output: null,
47
+ transforms: [],
48
+ splits: [],
49
+ renames: {},
50
+ drop: [],
51
+ filters: [],
52
+ dedup: null,
53
+ mappings: [],
54
+ ...input
55
+ };
56
+ }
57
+ exports.MutableManifest = void 0;
58
+ var init_types = __esm({
59
+ "src/core/types.ts"() {
60
+ exports.MutableManifest = class {
61
+ source;
62
+ records = [];
63
+ errors = [];
64
+ createdAt;
65
+ constructor(source) {
66
+ this.source = source;
67
+ this.createdAt = (/* @__PURE__ */ new Date()).toISOString();
68
+ }
69
+ addRecord(record) {
70
+ this.records.push(record);
71
+ }
72
+ addError(column, transform, row, error) {
73
+ this.errors.push({ column, transform, row, error });
74
+ }
75
+ toDict() {
76
+ return {
77
+ source: this.source,
78
+ created_at: this.createdAt,
79
+ records: this.records.map((r) => ({
80
+ column: r.column,
81
+ transform: r.transform,
82
+ affected_rows: r.affectedRows,
83
+ total_rows: r.totalRows,
84
+ sample_before: r.sampleBefore,
85
+ sample_after: r.sampleAfter
86
+ })),
87
+ errors: this.errors.map((e) => ({
88
+ column: e.column,
89
+ transform: e.transform,
90
+ row: e.row,
91
+ error: e.error
92
+ })),
93
+ summary: {
94
+ total_transforms: this.records.length,
95
+ total_errors: this.errors.length,
96
+ columns_affected: [...new Set(this.records.map((r) => r.column))]
97
+ }
98
+ };
99
+ }
100
+ };
101
+ }
102
+ });
103
+
104
+ // src/core/transforms/registry.ts
105
+ function registerTransform(opts, func) {
106
+ _REGISTRY.set(opts.name, {
107
+ name: opts.name,
108
+ func,
109
+ inputTypes: opts.inputTypes,
110
+ autoApply: opts.autoApply ?? false,
111
+ priority: opts.priority ?? 50,
112
+ mode: opts.mode ?? "series"
113
+ });
114
+ }
115
+ function getTransform(name) {
116
+ return _REGISTRY.get(name);
117
+ }
118
+ function listTransforms() {
119
+ return [..._REGISTRY.values()].sort((a, b) => b.priority - a.priority);
120
+ }
121
+ function parseTransformName(raw) {
122
+ const parts = raw.split(":");
123
+ return [parts[0], parts.slice(1)];
124
+ }
125
+ function registry() {
126
+ return _REGISTRY;
127
+ }
128
+ var _REGISTRY;
129
+ var init_registry = __esm({
130
+ "src/core/transforms/registry.ts"() {
131
+ _REGISTRY = /* @__PURE__ */ new Map();
132
+ }
133
+ });
134
+
135
+ // src/core/domains/people-hr.ts
136
+ var people_hr_exports = {};
137
+ __export(people_hr_exports, {
138
+ PACK: () => PACK
139
+ });
140
+ function ssnValidate(values) {
141
+ return values.map((v) => {
142
+ if (v === null || typeof v !== "string") return v;
143
+ const m = v.trim().match(SSN_RE);
144
+ if (!m) return false;
145
+ if (m[1] === "000" || m[2] === "00" || m[3] === "0000") return false;
146
+ return true;
147
+ });
148
+ }
149
+ var SSN_RE, PACK;
150
+ var init_people_hr = __esm({
151
+ "src/core/domains/people-hr.ts"() {
152
+ init_types();
153
+ init_registry();
154
+ SSN_RE = /^(\d{3})-?(\d{2})-?(\d{4})$/;
155
+ registerTransform(
156
+ { name: "ssn_validate", inputTypes: ["ssn", "string"], priority: 55, mode: "series" },
157
+ ssnValidate
158
+ );
159
+ PACK = {
160
+ name: "people_hr",
161
+ description: "Name parsing, SSN formatting, employment dates, gender/boolean standardization",
162
+ transforms: [
163
+ "split_name",
164
+ "split_name_reverse",
165
+ "strip_titles",
166
+ "strip_suffixes",
167
+ "name_proper",
168
+ "ssn_mask",
169
+ "ssn_validate",
170
+ "date_iso8601",
171
+ "gender_standardize",
172
+ "boolean_normalize"
173
+ ],
174
+ defaultConfig: makeConfig({
175
+ transforms: [
176
+ { column: "name", ops: ["strip", "strip_titles", "title_case"] },
177
+ { column: "ssn", ops: ["ssn_validate"] },
178
+ { column: "gender", ops: ["gender_standardize"] },
179
+ { column: "hire_date", ops: ["date_iso8601"] },
180
+ { column: "active", ops: ["boolean_normalize"] }
181
+ ]
182
+ })
183
+ };
184
+ }
185
+ });
186
+
187
+ // src/core/domains/healthcare.ts
188
+ var healthcare_exports = {};
189
+ __export(healthcare_exports, {
190
+ PACK: () => PACK2
191
+ });
192
+ function npiValidate(values) {
193
+ return values.map((v) => {
194
+ if (v === null || typeof v !== "string") return v;
195
+ const digits = v.replace(/\D/g, "");
196
+ if (digits.length !== 10) return false;
197
+ const full = "80840" + digits;
198
+ let total = 0;
199
+ for (let i = full.length - 1, pos = 0; i >= 0; i--, pos++) {
200
+ let n = parseInt(full[i], 10);
201
+ if (pos % 2 === 1) {
202
+ n *= 2;
203
+ if (n > 9) n -= 9;
204
+ }
205
+ total += n;
206
+ }
207
+ return total % 10 === 0;
208
+ });
209
+ }
210
+ function icd10Format(values) {
211
+ return values.map((v) => {
212
+ if (v === null || typeof v !== "string") return v;
213
+ const code = v.trim().toUpperCase().replace(/\./g, "");
214
+ return code.length > 3 ? code.slice(0, 3) + "." + code.slice(3) : code;
215
+ });
216
+ }
217
+ var PACK2;
218
+ var init_healthcare = __esm({
219
+ "src/core/domains/healthcare.ts"() {
220
+ init_types();
221
+ init_registry();
222
+ registerTransform(
223
+ { name: "npi_validate", inputTypes: ["string"], priority: 50, mode: "series" },
224
+ npiValidate
225
+ );
226
+ registerTransform(
227
+ { name: "icd10_format", inputTypes: ["string"], priority: 50, mode: "series" },
228
+ icd10Format
229
+ );
230
+ PACK2 = {
231
+ name: "healthcare",
232
+ description: "MRN normalization, ICD-10 formatting, NPI validation, date standardization",
233
+ transforms: ["npi_validate", "icd10_format", "date_iso8601", "null_standardize", "strip"],
234
+ defaultConfig: makeConfig({
235
+ transforms: [
236
+ { column: "npi", ops: ["npi_validate"] },
237
+ { column: "icd10_code", ops: ["icd10_format"] },
238
+ { column: "service_date", ops: ["date_iso8601"] },
239
+ { column: "patient_name", ops: ["strip", "title_case"] }
240
+ ]
241
+ })
242
+ };
243
+ }
244
+ });
245
+
246
+ // src/core/domains/finance.ts
247
+ var finance_exports = {};
248
+ __export(finance_exports, {
249
+ PACK: () => PACK3
250
+ });
251
+ function accountMask(values) {
252
+ return values.map((v) => {
253
+ if (v === null || typeof v !== "string") return v;
254
+ const digits = v.replace(/\D/g, "");
255
+ if (digits.length < 4) return v;
256
+ return "*".repeat(digits.length - 4) + digits.slice(-4);
257
+ });
258
+ }
259
+ function cusipFormat(values) {
260
+ return values.map((v) => {
261
+ if (v === null || typeof v !== "string") return v;
262
+ return v.trim().toUpperCase().slice(0, 9);
263
+ });
264
+ }
265
+ var PACK3;
266
+ var init_finance = __esm({
267
+ "src/core/domains/finance.ts"() {
268
+ init_types();
269
+ init_registry();
270
+ registerTransform(
271
+ { name: "account_mask", inputTypes: ["string"], priority: 50, mode: "series" },
272
+ accountMask
273
+ );
274
+ registerTransform(
275
+ { name: "cusip_format", inputTypes: ["string"], priority: 50, mode: "series" },
276
+ cusipFormat
277
+ );
278
+ PACK3 = {
279
+ name: "finance",
280
+ description: "Account masking, currency standardization, CUSIP/ISIN formatting",
281
+ transforms: ["account_mask", "cusip_format", "currency_strip", "date_iso8601"],
282
+ defaultConfig: makeConfig({
283
+ transforms: [
284
+ { column: "account_number", ops: ["account_mask"] },
285
+ { column: "amount", ops: ["currency_strip"] },
286
+ { column: "transaction_date", ops: ["date_iso8601"] }
287
+ ]
288
+ })
289
+ };
290
+ }
291
+ });
292
+
293
+ // src/core/domains/ecommerce.ts
294
+ var ecommerce_exports = {};
295
+ __export(ecommerce_exports, {
296
+ PACK: () => PACK4
297
+ });
298
+ function skuNormalize(values) {
299
+ return values.map((v) => {
300
+ if (v === null || typeof v !== "string") return v;
301
+ return v.trim().toUpperCase().replace(/[^A-Z0-9-]/g, "");
302
+ });
303
+ }
304
+ var PACK4;
305
+ var init_ecommerce = __esm({
306
+ "src/core/domains/ecommerce.ts"() {
307
+ init_types();
308
+ init_registry();
309
+ registerTransform(
310
+ { name: "sku_normalize", inputTypes: ["string"], priority: 50, mode: "series" },
311
+ skuNormalize
312
+ );
313
+ PACK4 = {
314
+ name: "ecommerce",
315
+ description: "SKU normalization, price cleaning, category standardization",
316
+ transforms: ["sku_normalize", "currency_strip", "category_auto_correct", "strip"],
317
+ defaultConfig: makeConfig({
318
+ transforms: [
319
+ { column: "sku", ops: ["sku_normalize"] },
320
+ { column: "price", ops: ["currency_strip"] },
321
+ { column: "category", ops: ["strip", "title_case"] }
322
+ ]
323
+ })
324
+ };
325
+ }
326
+ });
327
+
328
+ // src/core/domains/real-estate.ts
329
+ var real_estate_exports = {};
330
+ __export(real_estate_exports, {
331
+ PACK: () => PACK5
332
+ });
333
+ function mlsNormalize(values) {
334
+ return values.map((v) => {
335
+ if (v === null || typeof v !== "string") return v;
336
+ return v.trim().toUpperCase();
337
+ });
338
+ }
339
+ var PACK5;
340
+ var init_real_estate = __esm({
341
+ "src/core/domains/real-estate.ts"() {
342
+ init_types();
343
+ init_registry();
344
+ registerTransform(
345
+ { name: "mls_normalize", inputTypes: ["string"], priority: 50, mode: "series" },
346
+ mlsNormalize
347
+ );
348
+ PACK5 = {
349
+ name: "real_estate",
350
+ description: "Address parsing (USPS), MLS ID normalization, price cleaning",
351
+ transforms: ["mls_normalize", "address_standardize", "zip_normalize", "currency_strip"],
352
+ defaultConfig: makeConfig({
353
+ transforms: [
354
+ { column: "mls_id", ops: ["mls_normalize"] },
355
+ { column: "address", ops: ["strip", "address_standardize"] },
356
+ { column: "price", ops: ["currency_strip"] },
357
+ { column: "zip", ops: ["zip_normalize"] }
358
+ ]
359
+ })
360
+ };
361
+ }
362
+ });
363
+
364
+ // src/core/index.ts
365
+ init_types();
366
+
367
+ // src/core/data.ts
368
+ var NULL_STRINGS = /* @__PURE__ */ new Set([
369
+ "",
370
+ "null",
371
+ "none",
372
+ "nan",
373
+ "n/a",
374
+ "na",
375
+ "nil",
376
+ "#n/a",
377
+ "missing",
378
+ "undefined"
379
+ ]);
380
+ function isNullish(v) {
381
+ if (v === null || v === void 0) return true;
382
+ if (typeof v === "string") return NULL_STRINGS.has(v.toLowerCase().trim());
383
+ if (typeof v === "number") return Number.isNaN(v);
384
+ return false;
385
+ }
386
+ function toColumnValue(v) {
387
+ if (isNullish(v)) return null;
388
+ if (typeof v === "string") return v;
389
+ if (typeof v === "number") return v;
390
+ if (typeof v === "boolean") return v;
391
+ return String(v);
392
+ }
393
+ function mulberry32(seed) {
394
+ let s = seed | 0;
395
+ return () => {
396
+ s = s + 1831565813 | 0;
397
+ let t = Math.imul(s ^ s >>> 15, 1 | s);
398
+ t = t + Math.imul(t ^ t >>> 7, 61 | t) ^ t;
399
+ return ((t ^ t >>> 14) >>> 0) / 4294967296;
400
+ };
401
+ }
402
+ var TabularData = class _TabularData {
403
+ _rows;
404
+ _columnCache = /* @__PURE__ */ new Map();
405
+ constructor(rows) {
406
+ this._rows = rows;
407
+ }
408
+ get rows() {
409
+ return this._rows;
410
+ }
411
+ get columns() {
412
+ if (this._rows.length === 0) return [];
413
+ return Object.keys(this._rows[0]);
414
+ }
415
+ get rowCount() {
416
+ return this._rows.length;
417
+ }
418
+ // ---- Column access ----
419
+ column(name) {
420
+ const cached = this._columnCache.get(name);
421
+ if (cached) return cached;
422
+ const values = this._rows.map((r) => toColumnValue(r[name]));
423
+ this._columnCache.set(name, values);
424
+ return values;
425
+ }
426
+ /** Raw column access — preserves original values without null coercion.
427
+ * Use for profiling where "N/A" should remain a string, not become null. */
428
+ rawColumn(name) {
429
+ return this._rows.map((r) => {
430
+ const v = r[name];
431
+ if (v === null || v === void 0) return null;
432
+ if (typeof v === "string" || typeof v === "number" || typeof v === "boolean") return v;
433
+ return String(v);
434
+ });
435
+ }
436
+ // ---- Null handling ----
437
+ nullCount(col) {
438
+ let count = 0;
439
+ for (const v of this.column(col)) {
440
+ if (v === null) count++;
441
+ }
442
+ return count;
443
+ }
444
+ dropNulls(col) {
445
+ return this.column(col).filter((v) => v !== null);
446
+ }
447
+ // ---- Type inference ----
448
+ dtype(col) {
449
+ const values = this.dropNulls(col);
450
+ if (values.length === 0) return "null";
451
+ let hasInt = false;
452
+ let hasFloat = false;
453
+ let hasBool = false;
454
+ let hasString = false;
455
+ for (const v of values) {
456
+ if (typeof v === "boolean") {
457
+ hasBool = true;
458
+ } else if (typeof v === "number") {
459
+ if (Number.isInteger(v)) hasInt = true;
460
+ else hasFloat = true;
461
+ } else {
462
+ hasString = true;
463
+ }
464
+ }
465
+ if (hasString) return "string";
466
+ if (hasBool && !hasInt && !hasFloat) return "boolean";
467
+ if (hasFloat) return "float";
468
+ if (hasInt) return "integer";
469
+ return "string";
470
+ }
471
+ // ---- Aggregation ----
472
+ nUnique(col) {
473
+ const set = /* @__PURE__ */ new Set();
474
+ for (const v of this.dropNulls(col)) set.add(v);
475
+ return set.size;
476
+ }
477
+ valueCounts(col) {
478
+ const map = /* @__PURE__ */ new Map();
479
+ for (const v of this.dropNulls(col)) {
480
+ map.set(v, (map.get(v) ?? 0) + 1);
481
+ }
482
+ return map;
483
+ }
484
+ /** MUST use loop — Math.min(...array) crashes on >65K elements. */
485
+ min(col) {
486
+ const nums = this.numericValues(col);
487
+ if (nums.length === 0) return null;
488
+ let m = nums[0];
489
+ for (let i = 1; i < nums.length; i++) {
490
+ if (nums[i] < m) m = nums[i];
491
+ }
492
+ return m;
493
+ }
494
+ /** MUST use loop — Math.max(...array) crashes on >65K elements. */
495
+ max(col) {
496
+ const nums = this.numericValues(col);
497
+ if (nums.length === 0) return null;
498
+ let m = nums[0];
499
+ for (let i = 1; i < nums.length; i++) {
500
+ if (nums[i] > m) m = nums[i];
501
+ }
502
+ return m;
503
+ }
504
+ mean(col) {
505
+ const nums = this.numericValues(col);
506
+ if (nums.length === 0) return null;
507
+ let sum = 0;
508
+ for (const n of nums) sum += n;
509
+ return sum / nums.length;
510
+ }
511
+ std(col) {
512
+ const nums = this.numericValues(col);
513
+ if (nums.length < 2) return null;
514
+ const avg = this.mean(col);
515
+ let sumSq = 0;
516
+ for (const n of nums) sumSq += (n - avg) ** 2;
517
+ return Math.sqrt(sumSq / (nums.length - 1));
518
+ }
519
+ // ---- Filtering & sampling ----
520
+ filter(predicate) {
521
+ return new _TabularData(this._rows.filter(predicate));
522
+ }
523
+ head(n) {
524
+ return new _TabularData(this._rows.slice(0, n));
525
+ }
526
+ sample(n, seed = 42) {
527
+ if (n >= this._rows.length) return this;
528
+ const rng = mulberry32(seed);
529
+ const indices = Array.from({ length: this._rows.length }, (_, i) => i);
530
+ for (let i = indices.length - 1; i > 0 && indices.length - 1 - i < n; i--) {
531
+ const j = Math.floor(rng() * (i + 1));
532
+ [indices[i], indices[j]] = [indices[j], indices[i]];
533
+ }
534
+ const sampled = indices.slice(indices.length - n).map((i) => this._rows[i]);
535
+ return new _TabularData(sampled);
536
+ }
537
+ // ---- String operations ----
538
+ strContains(col, pattern) {
539
+ return this.column(col).map(
540
+ (v) => typeof v === "string" ? pattern.test(v) : false
541
+ );
542
+ }
543
+ strLengths(col) {
544
+ return this.column(col).map(
545
+ (v) => typeof v === "string" ? v.length : 0
546
+ );
547
+ }
548
+ // ---- Casting ----
549
+ castFloat(col) {
550
+ return this.column(col).map((v) => {
551
+ if (v === null) return null;
552
+ const n = Number(v);
553
+ return Number.isFinite(n) ? n : null;
554
+ });
555
+ }
556
+ castInt(col) {
557
+ return this.column(col).map((v) => {
558
+ if (v === null) return null;
559
+ const n = Number(v);
560
+ return Number.isFinite(n) ? Math.trunc(n) : null;
561
+ });
562
+ }
563
+ // ---- Helpers ----
564
+ numericValues(col) {
565
+ const result = [];
566
+ for (const v of this.column(col)) {
567
+ if (typeof v === "number" && Number.isFinite(v)) {
568
+ result.push(v);
569
+ }
570
+ }
571
+ return result;
572
+ }
573
+ stringValues(col) {
574
+ const result = [];
575
+ for (const v of this.column(col)) {
576
+ if (typeof v === "string") result.push(v);
577
+ }
578
+ return result;
579
+ }
580
+ sortedNumeric(col) {
581
+ return this.numericValues(col).sort((a, b) => a - b);
582
+ }
583
+ isSorted(col, descending = false) {
584
+ const nums = this.numericValues(col);
585
+ for (let i = 1; i < nums.length; i++) {
586
+ if (descending ? nums[i] > nums[i - 1] : nums[i] < nums[i - 1]) {
587
+ return false;
588
+ }
589
+ }
590
+ return true;
591
+ }
592
+ };
593
+
594
+ // src/core/transforms/text.ts
595
+ init_registry();
596
+ function mapStrings(values, fn) {
597
+ return values.map((v) => {
598
+ if (v === null || typeof v !== "string") return v;
599
+ return fn(v);
600
+ });
601
+ }
602
+ function strip(values) {
603
+ return mapStrings(values, (s) => s.trim());
604
+ }
605
+ registerTransform(
606
+ { name: "strip", inputTypes: ["string"], autoApply: true, priority: 90, mode: "expr" },
607
+ strip
608
+ );
609
+ function lowercase(values) {
610
+ return mapStrings(values, (s) => s.toLowerCase());
611
+ }
612
+ registerTransform(
613
+ { name: "lowercase", inputTypes: ["string"], priority: 50, mode: "expr" },
614
+ lowercase
615
+ );
616
+ function uppercase(values) {
617
+ return mapStrings(values, (s) => s.toUpperCase());
618
+ }
619
+ registerTransform(
620
+ { name: "uppercase", inputTypes: ["string"], priority: 50, mode: "expr" },
621
+ uppercase
622
+ );
623
+ function titleCase(values) {
624
+ return mapStrings(
625
+ values,
626
+ (s) => s.toLowerCase().replace(/\b\w/g, (ch) => ch.toUpperCase())
627
+ );
628
+ }
629
+ registerTransform(
630
+ { name: "title_case", inputTypes: ["string"], priority: 50, mode: "expr" },
631
+ titleCase
632
+ );
633
+ function normalizeUnicode(values) {
634
+ return mapStrings(
635
+ values,
636
+ (s) => s.normalize("NFKD").replace(new RegExp("\\p{M}", "gu"), "")
637
+ );
638
+ }
639
+ registerTransform(
640
+ { name: "normalize_unicode", inputTypes: ["string"], autoApply: true, priority: 85, mode: "series" },
641
+ normalizeUnicode
642
+ );
643
+ function removePunctuation(values) {
644
+ return mapStrings(values, (s) => s.replace(/[^\w\s]/g, ""));
645
+ }
646
+ registerTransform(
647
+ { name: "remove_punctuation", inputTypes: ["string"], priority: 40, mode: "series" },
648
+ removePunctuation
649
+ );
650
+ function collapseWhitespace(values) {
651
+ return mapStrings(values, (s) => s.replace(/\s+/g, " ").trim());
652
+ }
653
+ registerTransform(
654
+ { name: "collapse_whitespace", inputTypes: ["string"], autoApply: true, priority: 80, mode: "expr" },
655
+ collapseWhitespace
656
+ );
657
+ function truncate(values, n = 255) {
658
+ const maxLen = typeof n === "number" ? n : Number(n) || 255;
659
+ return mapStrings(values, (s) => s.slice(0, maxLen));
660
+ }
661
+ registerTransform(
662
+ { name: "truncate", inputTypes: ["string"], priority: 30, mode: "series" },
663
+ truncate
664
+ );
665
+ function normalizeQuotes(values) {
666
+ return mapStrings(
667
+ values,
668
+ (s) => s.replace(/[\u2018\u2019\u201A\u201B]/g, "'").replace(/[\u201C\u201D\u201E\u201F]/g, '"')
669
+ );
670
+ }
671
+ registerTransform(
672
+ { name: "normalize_quotes", inputTypes: ["string"], autoApply: true, priority: 84, mode: "series" },
673
+ normalizeQuotes
674
+ );
675
+ function removeHtmlTags(values) {
676
+ return mapStrings(values, (s) => s.replace(/<[^>]*>/g, ""));
677
+ }
678
+ registerTransform(
679
+ { name: "remove_html_tags", inputTypes: ["string"], priority: 45, mode: "series" },
680
+ removeHtmlTags
681
+ );
682
+ function removeUrls(values) {
683
+ return mapStrings(
684
+ values,
685
+ (s) => s.replace(/https?:\/\/[^\s]+/g, "").trim()
686
+ );
687
+ }
688
+ registerTransform(
689
+ { name: "remove_urls", inputTypes: ["string"], priority: 40, mode: "series" },
690
+ removeUrls
691
+ );
692
+ function removeDigits(values) {
693
+ return mapStrings(values, (s) => s.replace(/\d/g, ""));
694
+ }
695
+ registerTransform(
696
+ { name: "remove_digits", inputTypes: ["string"], priority: 35, mode: "series" },
697
+ removeDigits
698
+ );
699
+ function padLeft(values, width = 10, char = "0") {
700
+ const w = typeof width === "number" ? width : Number(width) || 10;
701
+ const c = typeof char === "string" ? char : "0";
702
+ return mapStrings(values, (s) => s.padStart(w, c));
703
+ }
704
+ registerTransform(
705
+ { name: "pad_left", inputTypes: ["string"], priority: 30, mode: "series" },
706
+ padLeft
707
+ );
708
+ function padRight(values, width = 10, char = " ") {
709
+ const w = typeof width === "number" ? width : Number(width) || 10;
710
+ const c = typeof char === "string" ? char : " ";
711
+ return mapStrings(values, (s) => s.padEnd(w, c));
712
+ }
713
+ registerTransform(
714
+ { name: "pad_right", inputTypes: ["string"], priority: 30, mode: "series" },
715
+ padRight
716
+ );
717
+ function removeEmojis(values) {
718
+ const emojiPattern = /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{FE00}-\u{FE0F}\u{1F900}-\u{1F9FF}\u{1FA00}-\u{1FA6F}\u{1FA70}-\u{1FAFF}\u{200D}\u{20E3}\u{E0020}-\u{E007F}]/gu;
719
+ return mapStrings(values, (s) => s.replace(emojiPattern, ""));
720
+ }
721
+ registerTransform(
722
+ { name: "remove_emojis", inputTypes: ["string"], priority: 38, mode: "series" },
723
+ removeEmojis
724
+ );
725
+ function fixMojibake(values) {
726
+ return mapStrings(values, (s) => {
727
+ try {
728
+ const encoder = new TextEncoder();
729
+ const bytes = new Uint8Array(s.length);
730
+ for (let i = 0; i < s.length; i++) {
731
+ const code = s.charCodeAt(i);
732
+ if (code > 255) return s;
733
+ bytes[i] = code;
734
+ }
735
+ const decoded = new TextDecoder("utf-8", { fatal: true }).decode(bytes);
736
+ return decoded;
737
+ } catch {
738
+ return s;
739
+ }
740
+ });
741
+ }
742
+ registerTransform(
743
+ { name: "fix_mojibake", inputTypes: ["string"], priority: 86, mode: "series" },
744
+ fixMojibake
745
+ );
746
+ function normalizeLineEndings(values) {
747
+ return mapStrings(values, (s) => s.replace(/\r\n/g, "\n").replace(/\r/g, "\n"));
748
+ }
749
+ registerTransform(
750
+ { name: "normalize_line_endings", inputTypes: ["string"], priority: 82, mode: "series" },
751
+ normalizeLineEndings
752
+ );
753
+ function extractNumbers(values) {
754
+ return mapStrings(values, (s) => {
755
+ const nums = s.match(/-?\d+(?:\.\d+)?/g);
756
+ return nums ? nums.join(" ") : "";
757
+ });
758
+ }
759
+ registerTransform(
760
+ { name: "extract_numbers", inputTypes: ["string"], priority: 30, mode: "series" },
761
+ extractNumbers
762
+ );
763
+
764
+ // src/core/transforms/phone.ts
765
+ init_registry();
766
+ function extractDigits(s) {
767
+ return s.replace(/\D/g, "");
768
+ }
769
+ function normalizeUsDigits(s) {
770
+ const digits = extractDigits(s);
771
+ if (digits.length === 10) return digits;
772
+ if (digits.length === 11 && digits[0] === "1") return digits.slice(1);
773
+ return null;
774
+ }
775
+ function phoneE164(values) {
776
+ return values.map((v) => {
777
+ if (v === null || typeof v !== "string") return v;
778
+ const digits = normalizeUsDigits(v);
779
+ if (digits === null) return v;
780
+ return `+1${digits}`;
781
+ });
782
+ }
783
+ registerTransform(
784
+ { name: "phone_e164", inputTypes: ["phone"], autoApply: true, priority: 50, mode: "series" },
785
+ phoneE164
786
+ );
787
+ function phoneNational(values) {
788
+ return values.map((v) => {
789
+ if (v === null || typeof v !== "string") return v;
790
+ const digits = normalizeUsDigits(v);
791
+ if (digits === null) return v;
792
+ return `(${digits.slice(0, 3)}) ${digits.slice(3, 6)}-${digits.slice(6)}`;
793
+ });
794
+ }
795
+ registerTransform(
796
+ { name: "phone_national", inputTypes: ["phone"], priority: 50, mode: "series" },
797
+ phoneNational
798
+ );
799
+ function phoneDigits(values) {
800
+ return values.map((v) => {
801
+ if (v === null || typeof v !== "string") return v;
802
+ return extractDigits(v);
803
+ });
804
+ }
805
+ registerTransform(
806
+ { name: "phone_digits", inputTypes: ["phone"], priority: 50, mode: "series" },
807
+ phoneDigits
808
+ );
809
+ function phoneValidate(values) {
810
+ return values.map((v) => {
811
+ if (v === null || typeof v !== "string") return v;
812
+ const digits = extractDigits(v);
813
+ return digits.length === 10 || digits.length === 11 && digits[0] === "1";
814
+ });
815
+ }
816
+ registerTransform(
817
+ { name: "phone_validate", inputTypes: ["phone"], priority: 60, mode: "series" },
818
+ phoneValidate
819
+ );
820
+ function phoneCountryCode(values) {
821
+ return values.map((v) => {
822
+ if (v === null || typeof v !== "string") return v;
823
+ const digits = extractDigits(v);
824
+ if (digits.length === 10) return 1;
825
+ if (digits.length === 11 && digits[0] === "1") return 1;
826
+ return null;
827
+ });
828
+ }
829
+ registerTransform(
830
+ { name: "phone_country_code", inputTypes: ["phone"], priority: 45, mode: "series" },
831
+ phoneCountryCode
832
+ );
833
+
834
+ // src/core/transforms/names.ts
835
+ init_registry();
836
+ function mapStrings2(values, fn) {
837
+ return values.map((v) => {
838
+ if (v === null || typeof v !== "string") return v;
839
+ return fn(v);
840
+ });
841
+ }
842
+ var _TITLES = /^(Mr\.?|Mrs\.?|Ms\.?|Miss\.?|Dr\.?|Prof\.?|Rev\.?|Sr\.?|Sra\.?)\s+/i;
843
+ var _SUFFIXES = /\s+(Jr\.?|Sr\.?|II|III|IV|MD|PhD|PharmD|DDS|DVM|Esq\.?|CPA|RN|DO)$/i;
844
+ var _INITIAL_PATTERN = /\b[A-Z]\.\s/;
845
+ var _MC_PATTERN = /\bMc(\w)/g;
846
+ var _O_PATTERN = /\bO'(\w)/g;
847
+ var _NICKNAMES = {
848
+ bob: "Robert",
849
+ rob: "Robert",
850
+ robby: "Robert",
851
+ robbie: "Robert",
852
+ bobby: "Robert",
853
+ bill: "William",
854
+ billy: "William",
855
+ will: "William",
856
+ willy: "William",
857
+ jim: "James",
858
+ jimmy: "James",
859
+ jamie: "James",
860
+ mike: "Michael",
861
+ mikey: "Michael",
862
+ mick: "Michael",
863
+ dick: "Richard",
864
+ rick: "Richard",
865
+ rich: "Richard",
866
+ ricky: "Richard",
867
+ tom: "Thomas",
868
+ tommy: "Thomas",
869
+ joe: "Joseph",
870
+ joey: "Joseph",
871
+ jack: "John",
872
+ johnny: "John",
873
+ jon: "Jonathan",
874
+ dave: "David",
875
+ davy: "David",
876
+ steve: "Steven",
877
+ stevie: "Steven",
878
+ dan: "Daniel",
879
+ danny: "Daniel",
880
+ pat: "Patrick",
881
+ patty: "Patricia",
882
+ patsy: "Patricia",
883
+ chris: "Christopher",
884
+ kit: "Christopher",
885
+ tony: "Anthony",
886
+ ed: "Edward",
887
+ eddie: "Edward",
888
+ ted: "Edward",
889
+ teddy: "Edward",
890
+ al: "Albert",
891
+ bert: "Albert",
892
+ charlie: "Charles",
893
+ chuck: "Charles",
894
+ sam: "Samuel",
895
+ sammy: "Samuel",
896
+ ben: "Benjamin",
897
+ benny: "Benjamin",
898
+ matt: "Matthew",
899
+ andy: "Andrew",
900
+ drew: "Andrew",
901
+ nick: "Nicholas",
902
+ alex: "Alexander",
903
+ liz: "Elizabeth",
904
+ beth: "Elizabeth",
905
+ betty: "Elizabeth",
906
+ kate: "Katherine",
907
+ kathy: "Katherine",
908
+ katie: "Katherine",
909
+ sue: "Susan",
910
+ susie: "Susan",
911
+ meg: "Margaret",
912
+ maggie: "Margaret",
913
+ peggy: "Margaret",
914
+ jenny: "Jennifer",
915
+ jen: "Jennifer",
916
+ debbie: "Deborah",
917
+ deb: "Deborah",
918
+ barb: "Barbara",
919
+ cindy: "Cynthia",
920
+ sandy: "Sandra"
921
+ };
922
+ function splitName(rows, column) {
923
+ return rows.map((row) => {
924
+ const val = row[column];
925
+ if (val === null || val === void 0 || typeof val !== "string") {
926
+ return { ...row, first_name: null, last_name: null };
927
+ }
928
+ const trimmed = val.trim();
929
+ const lastSpace = trimmed.lastIndexOf(" ");
930
+ if (lastSpace === -1) {
931
+ return { ...row, first_name: trimmed, last_name: "" };
932
+ }
933
+ return {
934
+ ...row,
935
+ first_name: trimmed.slice(0, lastSpace),
936
+ last_name: trimmed.slice(lastSpace + 1)
937
+ };
938
+ });
939
+ }
940
+ registerTransform(
941
+ { name: "split_name", inputTypes: ["name"], priority: 50, mode: "dataframe" },
942
+ splitName
943
+ );
944
+ function splitNameReverse(rows, column) {
945
+ return rows.map((row) => {
946
+ const val = row[column];
947
+ if (val === null || val === void 0 || typeof val !== "string") {
948
+ return { ...row, first_name: null, last_name: null };
949
+ }
950
+ const commaIdx = val.indexOf(",");
951
+ if (commaIdx === -1) {
952
+ return { ...row, first_name: val.trim(), last_name: "" };
953
+ }
954
+ return {
955
+ ...row,
956
+ last_name: val.slice(0, commaIdx).trim(),
957
+ first_name: val.slice(commaIdx + 1).trim()
958
+ };
959
+ });
960
+ }
961
+ registerTransform(
962
+ { name: "split_name_reverse", inputTypes: ["name"], priority: 50, mode: "dataframe" },
963
+ splitNameReverse
964
+ );
965
+ function stripTitles(values) {
966
+ return mapStrings2(values, (s) => s.replace(_TITLES, "").trim());
967
+ }
968
+ registerTransform(
969
+ { name: "strip_titles", inputTypes: ["name"], autoApply: true, priority: 70, mode: "series" },
970
+ stripTitles
971
+ );
972
+ function stripSuffixes(values) {
973
+ return mapStrings2(values, (s) => s.replace(_SUFFIXES, "").trim());
974
+ }
975
+ registerTransform(
976
+ { name: "strip_suffixes", inputTypes: ["name"], priority: 60, mode: "series" },
977
+ stripSuffixes
978
+ );
979
+ function nameProper(values) {
980
+ return mapStrings2(values, (s) => {
981
+ let result = s.toLowerCase().replace(/\b\w/g, (ch) => ch.toUpperCase());
982
+ result = result.replace(_MC_PATTERN, (_match, letter) => `Mc${letter.toUpperCase()}`);
983
+ result = result.replace(_O_PATTERN, (_match, letter) => `O'${letter.toUpperCase()}`);
984
+ return result;
985
+ });
986
+ }
987
+ registerTransform(
988
+ { name: "name_proper", inputTypes: ["name"], priority: 45, mode: "series" },
989
+ nameProper
990
+ );
991
+ function initialExpand(values) {
992
+ const flagged = [];
993
+ const result = values.map((v, i) => {
994
+ if (v !== null && typeof v === "string" && _INITIAL_PATTERN.test(v)) {
995
+ flagged.push(i);
996
+ }
997
+ return v === void 0 ? null : v;
998
+ });
999
+ return [result, flagged];
1000
+ }
1001
+ registerTransform(
1002
+ { name: "initial_expand", inputTypes: ["name"], priority: 40, mode: "series" },
1003
+ initialExpand
1004
+ );
1005
+ function nicknameStandardize(values) {
1006
+ return mapStrings2(values, (s) => {
1007
+ const lookup = s.trim().toLowerCase();
1008
+ return _NICKNAMES[lookup] ?? s;
1009
+ });
1010
+ }
1011
+ registerTransform(
1012
+ { name: "nickname_standardize", inputTypes: ["name"], priority: 42, mode: "series" },
1013
+ nicknameStandardize
1014
+ );
1015
+ function mergeName(rows, column, lastNameCol = "last_name") {
1016
+ const lnCol = typeof lastNameCol === "string" ? lastNameCol : "last_name";
1017
+ if (rows.length > 0 && !(lnCol in rows[0])) {
1018
+ return rows.map((r) => ({ ...r }));
1019
+ }
1020
+ return rows.map((row) => {
1021
+ const first = row[column];
1022
+ const last = row[lnCol];
1023
+ const parts = [];
1024
+ if (first !== null && first !== void 0) {
1025
+ const s = String(first).trim();
1026
+ if (s) parts.push(s);
1027
+ }
1028
+ if (last !== null && last !== void 0) {
1029
+ const s = String(last).trim();
1030
+ if (s) parts.push(s);
1031
+ }
1032
+ return { ...row, full_name: parts.length > 0 ? parts.join(" ") : null };
1033
+ });
1034
+ }
1035
+ registerTransform(
1036
+ { name: "merge_name", inputTypes: ["name"], priority: 45, mode: "dataframe" },
1037
+ mergeName
1038
+ );
1039
+
1040
+ // src/core/transforms/address.ts
1041
+ init_registry();
1042
+ function mapStrings3(values, fn) {
1043
+ return values.map((v) => {
1044
+ if (v === null || typeof v !== "string") return v;
1045
+ return fn(v);
1046
+ });
1047
+ }
1048
+ var _STREET_ABBREV = {
1049
+ Street: "St",
1050
+ Avenue: "Ave",
1051
+ Boulevard: "Blvd",
1052
+ Drive: "Dr",
1053
+ Lane: "Ln",
1054
+ Road: "Rd",
1055
+ Court: "Ct",
1056
+ Place: "Pl",
1057
+ Circle: "Cir",
1058
+ Trail: "Trl",
1059
+ Way: "Way",
1060
+ Parkway: "Pkwy",
1061
+ Highway: "Hwy",
1062
+ Terrace: "Ter",
1063
+ Square: "Sq"
1064
+ };
1065
+ var _STREET_EXPAND = {};
1066
+ for (const [full, abbr] of Object.entries(_STREET_ABBREV)) {
1067
+ _STREET_EXPAND[abbr] = full;
1068
+ }
1069
+ var _STATES = {
1070
+ Alabama: "AL",
1071
+ Alaska: "AK",
1072
+ Arizona: "AZ",
1073
+ Arkansas: "AR",
1074
+ California: "CA",
1075
+ Colorado: "CO",
1076
+ Connecticut: "CT",
1077
+ Delaware: "DE",
1078
+ Florida: "FL",
1079
+ Georgia: "GA",
1080
+ Hawaii: "HI",
1081
+ Idaho: "ID",
1082
+ Illinois: "IL",
1083
+ Indiana: "IN",
1084
+ Iowa: "IA",
1085
+ Kansas: "KS",
1086
+ Kentucky: "KY",
1087
+ Louisiana: "LA",
1088
+ Maine: "ME",
1089
+ Maryland: "MD",
1090
+ Massachusetts: "MA",
1091
+ Michigan: "MI",
1092
+ Minnesota: "MN",
1093
+ Mississippi: "MS",
1094
+ Missouri: "MO",
1095
+ Montana: "MT",
1096
+ Nebraska: "NE",
1097
+ Nevada: "NV",
1098
+ "New Hampshire": "NH",
1099
+ "New Jersey": "NJ",
1100
+ "New Mexico": "NM",
1101
+ "New York": "NY",
1102
+ "North Carolina": "NC",
1103
+ "North Dakota": "ND",
1104
+ Ohio: "OH",
1105
+ Oklahoma: "OK",
1106
+ Oregon: "OR",
1107
+ Pennsylvania: "PA",
1108
+ "Rhode Island": "RI",
1109
+ "South Carolina": "SC",
1110
+ "South Dakota": "SD",
1111
+ Tennessee: "TN",
1112
+ Texas: "TX",
1113
+ Utah: "UT",
1114
+ Vermont: "VT",
1115
+ Virginia: "VA",
1116
+ Washington: "WA",
1117
+ "West Virginia": "WV",
1118
+ Wisconsin: "WI",
1119
+ Wyoming: "WY",
1120
+ "District Of Columbia": "DC"
1121
+ };
1122
+ var _STATES_REVERSE = {};
1123
+ for (const [name, abbr] of Object.entries(_STATES)) {
1124
+ _STATES_REVERSE[abbr] = name;
1125
+ }
1126
+ var _STATES_LOWER = {};
1127
+ for (const [name, abbr] of Object.entries(_STATES)) {
1128
+ _STATES_LOWER[name.toLowerCase()] = abbr;
1129
+ }
1130
+ var _COUNTRIES = {
1131
+ "united states": "US",
1132
+ "united states of america": "US",
1133
+ usa: "US",
1134
+ us: "US",
1135
+ "u.s.a.": "US",
1136
+ "u.s.": "US",
1137
+ america: "US",
1138
+ "united kingdom": "GB",
1139
+ uk: "GB",
1140
+ "great britain": "GB",
1141
+ england: "GB",
1142
+ scotland: "GB",
1143
+ wales: "GB",
1144
+ "northern ireland": "GB",
1145
+ canada: "CA",
1146
+ ca: "CA",
1147
+ australia: "AU",
1148
+ au: "AU",
1149
+ germany: "DE",
1150
+ deutschland: "DE",
1151
+ de: "DE",
1152
+ france: "FR",
1153
+ fr: "FR",
1154
+ italy: "IT",
1155
+ italia: "IT",
1156
+ it: "IT",
1157
+ spain: "ES",
1158
+ espana: "ES",
1159
+ es: "ES",
1160
+ mexico: "MX",
1161
+ mx: "MX",
1162
+ brazil: "BR",
1163
+ brasil: "BR",
1164
+ br: "BR",
1165
+ japan: "JP",
1166
+ jp: "JP",
1167
+ china: "CN",
1168
+ cn: "CN",
1169
+ india: "IN",
1170
+ in: "IN",
1171
+ "south korea": "KR",
1172
+ korea: "KR",
1173
+ kr: "KR",
1174
+ netherlands: "NL",
1175
+ holland: "NL",
1176
+ nl: "NL",
1177
+ sweden: "SE",
1178
+ se: "SE",
1179
+ norway: "NO",
1180
+ no: "NO",
1181
+ denmark: "DK",
1182
+ dk: "DK",
1183
+ switzerland: "CH",
1184
+ ch: "CH",
1185
+ ireland: "IE",
1186
+ ie: "IE",
1187
+ "new zealand": "NZ",
1188
+ nz: "NZ",
1189
+ singapore: "SG",
1190
+ sg: "SG",
1191
+ portugal: "PT",
1192
+ pt: "PT",
1193
+ argentina: "AR",
1194
+ ar: "AR",
1195
+ colombia: "CO",
1196
+ co: "CO",
1197
+ philippines: "PH",
1198
+ ph: "PH",
1199
+ poland: "PL",
1200
+ pl: "PL",
1201
+ belgium: "BE",
1202
+ be: "BE",
1203
+ austria: "AT",
1204
+ at: "AT"
1205
+ };
1206
+ var _UNIT_PATTERNS = [
1207
+ [/^(?:Apt|Apartment)\.?\s+/i, "Unit "],
1208
+ [/^(?:Ste|Suite)\.?\s+/i, "Ste "],
1209
+ [/^#\s*/i, "Unit "]
1210
+ ];
1211
+ var _ABBREV_PATTERNS = Object.entries(_STREET_ABBREV).map(
1212
+ ([full, abbr]) => [new RegExp(`\\b${full}\\b`, "gi"), abbr]
1213
+ );
1214
+ var _EXPAND_PATTERNS = Object.entries(_STREET_EXPAND).map(
1215
+ ([abbr, full]) => [new RegExp(`\\b${abbr}\\b`, "gi"), full]
1216
+ );
1217
+ function addressStandardize(values) {
1218
+ return mapStrings3(values, (s) => {
1219
+ let result = s;
1220
+ for (const [pattern, abbr] of _ABBREV_PATTERNS) {
1221
+ result = result.replace(pattern, abbr);
1222
+ }
1223
+ return result;
1224
+ });
1225
+ }
1226
+ registerTransform(
1227
+ { name: "address_standardize", inputTypes: ["address"], priority: 50, mode: "series" },
1228
+ addressStandardize
1229
+ );
1230
+ function addressExpand(values) {
1231
+ return mapStrings3(values, (s) => {
1232
+ let result = s;
1233
+ for (const [pattern, full] of _EXPAND_PATTERNS) {
1234
+ result = result.replace(pattern, full);
1235
+ }
1236
+ return result;
1237
+ });
1238
+ }
1239
+ registerTransform(
1240
+ { name: "address_expand", inputTypes: ["address"], priority: 50, mode: "series" },
1241
+ addressExpand
1242
+ );
1243
+ function stateAbbreviate(values) {
1244
+ return mapStrings3(values, (s) => {
1245
+ const trimmed = s.trim();
1246
+ if (trimmed.length === 2 && _STATES_REVERSE[trimmed.toUpperCase()]) {
1247
+ return trimmed.toUpperCase();
1248
+ }
1249
+ const matched = _STATES_LOWER[trimmed.toLowerCase()];
1250
+ return matched ?? s;
1251
+ });
1252
+ }
1253
+ registerTransform(
1254
+ { name: "state_abbreviate", inputTypes: ["state", "string"], priority: 50, mode: "series" },
1255
+ stateAbbreviate
1256
+ );
1257
+ function stateExpand(values) {
1258
+ return mapStrings3(values, (s) => {
1259
+ return _STATES_REVERSE[s.trim().toUpperCase()] ?? s;
1260
+ });
1261
+ }
1262
+ registerTransform(
1263
+ { name: "state_expand", inputTypes: ["state", "string"], priority: 50, mode: "series" },
1264
+ stateExpand
1265
+ );
1266
+ function zipNormalize(values) {
1267
+ return mapStrings3(values, (s) => {
1268
+ let val = s.trim();
1269
+ val = val.split("-")[0];
1270
+ if (/^\d+$/.test(val)) {
1271
+ return val.padStart(5, "0");
1272
+ }
1273
+ return val;
1274
+ });
1275
+ }
1276
+ registerTransform(
1277
+ { name: "zip_normalize", inputTypes: ["zip"], autoApply: true, priority: 55, mode: "series" },
1278
+ zipNormalize
1279
+ );
1280
+ var _ADDRESS_PATTERN = /^(.+?),\s*(.+?),\s*([A-Za-z]{2})\s+(\d{5}(?:-\d{4})?)$/;
1281
+ function splitAddress(rows, column) {
1282
+ return rows.map((row) => {
1283
+ const val = row[column];
1284
+ if (val === null || val === void 0 || typeof val !== "string") {
1285
+ return { ...row, street: null, city: null, state: null, zip: null };
1286
+ }
1287
+ const m = val.trim().match(_ADDRESS_PATTERN);
1288
+ if (m) {
1289
+ return { ...row, street: m[1], city: m[2], state: m[3], zip: m[4] };
1290
+ }
1291
+ return { ...row, street: val, city: null, state: null, zip: null };
1292
+ });
1293
+ }
1294
+ registerTransform(
1295
+ { name: "split_address", inputTypes: ["address"], priority: 45, mode: "dataframe" },
1296
+ splitAddress
1297
+ );
1298
+ function countryStandardize(values) {
1299
+ return mapStrings3(values, (s) => {
1300
+ const lookup = s.trim().toLowerCase();
1301
+ return _COUNTRIES[lookup] ?? s;
1302
+ });
1303
+ }
1304
+ registerTransform(
1305
+ { name: "country_standardize", inputTypes: ["country", "string"], priority: 50, mode: "series" },
1306
+ countryStandardize
1307
+ );
1308
+ function unitNormalize(values) {
1309
+ return mapStrings3(values, (s) => {
1310
+ let result = s.trim();
1311
+ for (const [pattern, replacement] of _UNIT_PATTERNS) {
1312
+ result = result.replace(pattern, replacement);
1313
+ }
1314
+ return result;
1315
+ });
1316
+ }
1317
+ registerTransform(
1318
+ { name: "unit_normalize", inputTypes: ["address", "string"], priority: 45, mode: "series" },
1319
+ unitNormalize
1320
+ );
1321
+
1322
+ // src/core/transforms/dates.ts
1323
+ init_registry();
1324
+ function _parseDate(val) {
1325
+ const trimmed = val.trim();
1326
+ if (!trimmed) return null;
1327
+ const d = new Date(trimmed);
1328
+ if (isNaN(d.getTime())) return null;
1329
+ return d;
1330
+ }
1331
+ function pad(n) {
1332
+ return n < 10 ? `0${n}` : String(n);
1333
+ }
1334
+ var DAY_NAMES = [
1335
+ "Sunday",
1336
+ "Monday",
1337
+ "Tuesday",
1338
+ "Wednesday",
1339
+ "Thursday",
1340
+ "Friday",
1341
+ "Saturday"
1342
+ ];
1343
+ function dateIso8601(values) {
1344
+ return values.map((v) => {
1345
+ if (v === null) return null;
1346
+ const s = String(v);
1347
+ const d = _parseDate(s);
1348
+ if (!d) return v;
1349
+ return `${d.getUTCFullYear()}-${pad(d.getUTCMonth() + 1)}-${pad(d.getUTCDate())}`;
1350
+ });
1351
+ }
1352
+ registerTransform(
1353
+ { name: "date_iso8601", inputTypes: ["date"], autoApply: true, priority: 50, mode: "series" },
1354
+ dateIso8601
1355
+ );
1356
+ function dateUs(values) {
1357
+ return values.map((v) => {
1358
+ if (v === null) return null;
1359
+ const s = String(v);
1360
+ const d = _parseDate(s);
1361
+ if (!d) return v;
1362
+ return `${pad(d.getUTCMonth() + 1)}/${pad(d.getUTCDate())}/${d.getUTCFullYear()}`;
1363
+ });
1364
+ }
1365
+ registerTransform(
1366
+ { name: "date_us", inputTypes: ["date"], priority: 50, mode: "series" },
1367
+ dateUs
1368
+ );
1369
+ function dateEu(values) {
1370
+ return values.map((v) => {
1371
+ if (v === null) return null;
1372
+ const s = String(v);
1373
+ const d = _parseDate(s);
1374
+ if (!d) return v;
1375
+ return `${pad(d.getUTCDate())}/${pad(d.getUTCMonth() + 1)}/${d.getUTCFullYear()}`;
1376
+ });
1377
+ }
1378
+ registerTransform(
1379
+ { name: "date_eu", inputTypes: ["date"], priority: 50, mode: "series" },
1380
+ dateEu
1381
+ );
1382
+ registerTransform(
1383
+ { name: "date_parse", inputTypes: ["date"], priority: 55, mode: "series" },
1384
+ dateIso8601
1385
+ );
1386
+ function ageFromDob(values, referenceDate = null) {
1387
+ const ref = referenceDate ? _parseDate(String(referenceDate)) : /* @__PURE__ */ new Date();
1388
+ if (!ref) return values.slice();
1389
+ return values.map((v) => {
1390
+ if (v === null) return null;
1391
+ const dob = _parseDate(String(v));
1392
+ if (!dob) return v;
1393
+ let age = ref.getUTCFullYear() - dob.getUTCFullYear();
1394
+ const monthDiff = ref.getUTCMonth() - dob.getUTCMonth();
1395
+ if (monthDiff < 0 || monthDiff === 0 && ref.getUTCDate() < dob.getUTCDate()) {
1396
+ age--;
1397
+ }
1398
+ return age;
1399
+ });
1400
+ }
1401
+ registerTransform(
1402
+ { name: "age_from_dob", inputTypes: ["date"], priority: 40, mode: "series" },
1403
+ ageFromDob
1404
+ );
1405
+ function datetimeIso8601(values) {
1406
+ return values.map((v) => {
1407
+ if (v === null) return null;
1408
+ const d = _parseDate(String(v));
1409
+ if (!d) return v;
1410
+ return `${d.getUTCFullYear()}-${pad(d.getUTCMonth() + 1)}-${pad(d.getUTCDate())}T${pad(d.getUTCHours())}:${pad(d.getUTCMinutes())}:${pad(d.getUTCSeconds())}`;
1411
+ });
1412
+ }
1413
+ registerTransform(
1414
+ { name: "datetime_iso8601", inputTypes: ["date"], priority: 50, mode: "series" },
1415
+ datetimeIso8601
1416
+ );
1417
+ function extractYear(values) {
1418
+ return values.map((v) => {
1419
+ if (v === null) return null;
1420
+ const d = _parseDate(String(v));
1421
+ return d ? d.getUTCFullYear() : v;
1422
+ });
1423
+ }
1424
+ function extractMonth(values) {
1425
+ return values.map((v) => {
1426
+ if (v === null) return null;
1427
+ const d = _parseDate(String(v));
1428
+ return d ? d.getUTCMonth() + 1 : v;
1429
+ });
1430
+ }
1431
+ function extractDay(values) {
1432
+ return values.map((v) => {
1433
+ if (v === null) return null;
1434
+ const d = _parseDate(String(v));
1435
+ return d ? d.getUTCDate() : v;
1436
+ });
1437
+ }
1438
+ function extractQuarter(values) {
1439
+ return values.map((v) => {
1440
+ if (v === null) return null;
1441
+ const d = _parseDate(String(v));
1442
+ if (!d) return v;
1443
+ return Math.floor(d.getUTCMonth() / 3) + 1;
1444
+ });
1445
+ }
1446
+ function extractDayOfWeek(values) {
1447
+ return values.map((v) => {
1448
+ if (v === null) return null;
1449
+ const d = _parseDate(String(v));
1450
+ return d ? DAY_NAMES[d.getUTCDay()] : v;
1451
+ });
1452
+ }
1453
+ registerTransform({ name: "extract_year", inputTypes: ["date"], priority: 35, mode: "series" }, extractYear);
1454
+ registerTransform({ name: "extract_month", inputTypes: ["date"], priority: 35, mode: "series" }, extractMonth);
1455
+ registerTransform({ name: "extract_day", inputTypes: ["date"], priority: 35, mode: "series" }, extractDay);
1456
+ registerTransform({ name: "extract_quarter", inputTypes: ["date"], priority: 35, mode: "series" }, extractQuarter);
1457
+ registerTransform({ name: "extract_day_of_week", inputTypes: ["date"], priority: 35, mode: "series" }, extractDayOfWeek);
1458
+ function dateShift(values, days = 0) {
1459
+ const shift = typeof days === "number" ? days : Number(days) || 0;
1460
+ const shiftMs = shift * 864e5;
1461
+ return values.map((v) => {
1462
+ if (v === null) return null;
1463
+ const d = _parseDate(String(v));
1464
+ if (!d) return v;
1465
+ const shifted = new Date(d.getTime() + shiftMs);
1466
+ return `${shifted.getUTCFullYear()}-${pad(shifted.getUTCMonth() + 1)}-${pad(shifted.getUTCDate())}`;
1467
+ });
1468
+ }
1469
+ registerTransform(
1470
+ { name: "date_shift", inputTypes: ["date"], priority: 30, mode: "series" },
1471
+ dateShift
1472
+ );
1473
+ function dateValidate(values) {
1474
+ return values.map((v) => {
1475
+ if (v === null) return null;
1476
+ const s = String(v).trim();
1477
+ if (!s) return false;
1478
+ return _parseDate(s) !== null;
1479
+ });
1480
+ }
1481
+ registerTransform(
1482
+ { name: "date_validate", inputTypes: ["date", "string"], priority: 60, mode: "series" },
1483
+ dateValidate
1484
+ );
1485
+
1486
+ // src/core/transforms/email.ts
1487
+ init_registry();
1488
+ var EMAIL_RE = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
1489
+ var GMAIL_DOMAINS = /* @__PURE__ */ new Set(["gmail.com", "googlemail.com"]);
1490
+ function emailLowercase(values) {
1491
+ return values.map((v) => {
1492
+ if (v === null || typeof v !== "string") return v;
1493
+ return v.toLowerCase();
1494
+ });
1495
+ }
1496
+ registerTransform(
1497
+ { name: "email_lowercase", inputTypes: ["email", "string"], priority: 55, mode: "series" },
1498
+ emailLowercase
1499
+ );
1500
+ function emailNormalize(values) {
1501
+ return values.map((v) => {
1502
+ if (v === null || typeof v !== "string") return v;
1503
+ const lowered = v.toLowerCase().trim();
1504
+ const atIdx = lowered.lastIndexOf("@");
1505
+ if (atIdx === -1) return lowered;
1506
+ let local = lowered.slice(0, atIdx);
1507
+ const domain = lowered.slice(atIdx + 1);
1508
+ const plusIdx = local.indexOf("+");
1509
+ if (plusIdx !== -1) {
1510
+ local = local.slice(0, plusIdx);
1511
+ }
1512
+ if (GMAIL_DOMAINS.has(domain)) {
1513
+ local = local.replace(/\./g, "");
1514
+ }
1515
+ return `${local}@${domain}`;
1516
+ });
1517
+ }
1518
+ registerTransform(
1519
+ { name: "email_normalize", inputTypes: ["email"], priority: 50, mode: "series" },
1520
+ emailNormalize
1521
+ );
1522
+ function emailExtractDomain(values) {
1523
+ return values.map((v) => {
1524
+ if (v === null || typeof v !== "string") return v;
1525
+ const atIdx = v.lastIndexOf("@");
1526
+ if (atIdx === -1) return null;
1527
+ return v.slice(atIdx + 1).toLowerCase();
1528
+ });
1529
+ }
1530
+ registerTransform(
1531
+ { name: "email_extract_domain", inputTypes: ["email"], priority: 40, mode: "series" },
1532
+ emailExtractDomain
1533
+ );
1534
+ function emailValidate(values) {
1535
+ return values.map((v) => {
1536
+ if (v === null || typeof v !== "string") return v;
1537
+ return EMAIL_RE.test(v.trim());
1538
+ });
1539
+ }
1540
+ registerTransform(
1541
+ { name: "email_validate", inputTypes: ["email", "string"], priority: 60, mode: "series" },
1542
+ emailValidate
1543
+ );
1544
+
1545
+ // src/core/transforms/numeric.ts
1546
+ init_registry();
1547
+ function currencyStrip(values) {
1548
+ return values.map((v) => {
1549
+ if (v === null) return null;
1550
+ if (typeof v === "number") return v;
1551
+ const cleaned = String(v).replace(/[^0-9.\-]/g, "");
1552
+ if (cleaned === "" || cleaned === "-") return v;
1553
+ const n = Number(cleaned);
1554
+ return isNaN(n) ? v : n;
1555
+ });
1556
+ }
1557
+ registerTransform(
1558
+ { name: "currency_strip", inputTypes: ["string", "numeric"], priority: 50, mode: "series" },
1559
+ currencyStrip
1560
+ );
1561
+ function percentageNormalize(values) {
1562
+ return values.map((v) => {
1563
+ if (v === null) return null;
1564
+ if (typeof v === "number") return v / 100;
1565
+ const s = String(v).replace(/%/g, "").trim();
1566
+ const n = Number(s);
1567
+ return isNaN(n) ? v : n / 100;
1568
+ });
1569
+ }
1570
+ registerTransform(
1571
+ { name: "percentage_normalize", inputTypes: ["string", "numeric"], priority: 50, mode: "series" },
1572
+ percentageNormalize
1573
+ );
1574
+ function roundTransform(values, n = 2) {
1575
+ const decimals = typeof n === "number" ? n : Number(n) || 2;
1576
+ const factor = Math.pow(10, decimals);
1577
+ return values.map((v) => {
1578
+ if (v === null) return null;
1579
+ const num = typeof v === "number" ? v : Number(v);
1580
+ if (isNaN(num)) return v;
1581
+ return Math.round(num * factor) / factor;
1582
+ });
1583
+ }
1584
+ registerTransform(
1585
+ { name: "round", inputTypes: ["numeric"], priority: 40, mode: "series" },
1586
+ roundTransform
1587
+ );
1588
+ function clamp(values, minVal = 0, maxVal = 1) {
1589
+ const lo = typeof minVal === "number" ? minVal : Number(minVal) || 0;
1590
+ const hi = typeof maxVal === "number" ? maxVal : Number(maxVal) || 1;
1591
+ return values.map((v) => {
1592
+ if (v === null) return null;
1593
+ const num = typeof v === "number" ? v : Number(v);
1594
+ if (isNaN(num)) return v;
1595
+ return Math.min(hi, Math.max(lo, num));
1596
+ });
1597
+ }
1598
+ registerTransform(
1599
+ { name: "clamp", inputTypes: ["numeric"], priority: 40, mode: "series" },
1600
+ clamp
1601
+ );
1602
+ function toInteger(values) {
1603
+ return values.map((v) => {
1604
+ if (v === null) return null;
1605
+ const num = Number(v);
1606
+ if (isNaN(num)) return v;
1607
+ return Math.trunc(num);
1608
+ });
1609
+ }
1610
+ registerTransform(
1611
+ { name: "to_integer", inputTypes: ["string", "numeric"], priority: 45, mode: "series" },
1612
+ toInteger
1613
+ );
1614
+ function absValue(values) {
1615
+ return values.map((v) => {
1616
+ if (v === null) return null;
1617
+ const num = typeof v === "number" ? v : Number(v);
1618
+ if (isNaN(num)) return v;
1619
+ return Math.abs(num);
1620
+ });
1621
+ }
1622
+ registerTransform(
1623
+ { name: "abs_value", inputTypes: ["numeric"], priority: 40, mode: "series" },
1624
+ absValue
1625
+ );
1626
+ function fillZero(values) {
1627
+ return values.map((v) => v === null ? 0 : v);
1628
+ }
1629
+ registerTransform(
1630
+ { name: "fill_zero", inputTypes: ["numeric"], priority: 35, mode: "series" },
1631
+ fillZero
1632
+ );
1633
+ function commaDecimal(values) {
1634
+ return values.map((v) => {
1635
+ if (v === null) return null;
1636
+ if (typeof v === "number") return v;
1637
+ const s = String(v);
1638
+ const converted = s.replace(/\./g, "").replace(",", ".");
1639
+ const n = Number(converted);
1640
+ return isNaN(n) ? v : n;
1641
+ });
1642
+ }
1643
+ registerTransform(
1644
+ { name: "comma_decimal", inputTypes: ["string", "numeric"], priority: 48, mode: "series" },
1645
+ commaDecimal
1646
+ );
1647
+ function scientificToDecimal(values) {
1648
+ return values.map((v) => {
1649
+ if (v === null) return null;
1650
+ const n = Number(v);
1651
+ return isNaN(n) ? v : n;
1652
+ });
1653
+ }
1654
+ registerTransform(
1655
+ { name: "scientific_to_decimal", inputTypes: ["string", "numeric"], priority: 45, mode: "series" },
1656
+ scientificToDecimal
1657
+ );
1658
+
1659
+ // src/core/transforms/categorical.ts
1660
+ init_registry();
1661
+ var TRUTHY = /* @__PURE__ */ new Set(["yes", "y", "1", "true", "t"]);
1662
+ var FALSY = /* @__PURE__ */ new Set(["no", "n", "0", "false", "f"]);
1663
+ function booleanNormalize(values) {
1664
+ return values.map((v) => {
1665
+ if (v === null) return null;
1666
+ const s = String(v).trim().toLowerCase();
1667
+ if (TRUTHY.has(s)) return true;
1668
+ if (FALSY.has(s)) return false;
1669
+ return v;
1670
+ });
1671
+ }
1672
+ registerTransform(
1673
+ { name: "boolean_normalize", inputTypes: ["boolean", "string"], priority: 50, mode: "series" },
1674
+ booleanNormalize
1675
+ );
1676
+ function genderStandardize(values) {
1677
+ return values.map((v) => {
1678
+ if (v === null) return null;
1679
+ if (typeof v !== "string") return v;
1680
+ const s = v.trim().toLowerCase();
1681
+ if (s === "male" || s === "m") return "M";
1682
+ if (s === "female" || s === "f") return "F";
1683
+ return v;
1684
+ });
1685
+ }
1686
+ registerTransform(
1687
+ { name: "gender_standardize", inputTypes: ["string"], priority: 50, mode: "series" },
1688
+ genderStandardize
1689
+ );
1690
+ var NULL_VARIANTS = /* @__PURE__ */ new Set([
1691
+ "n/a",
1692
+ "null",
1693
+ "none",
1694
+ "na",
1695
+ "nil",
1696
+ "nan",
1697
+ "-",
1698
+ ""
1699
+ ]);
1700
+ function nullStandardize(values) {
1701
+ return values.map((v) => {
1702
+ if (v === null) return null;
1703
+ if (typeof v !== "string") return v;
1704
+ const s = v.trim().toLowerCase();
1705
+ if (NULL_VARIANTS.has(s)) return null;
1706
+ return v;
1707
+ });
1708
+ }
1709
+ registerTransform(
1710
+ { name: "null_standardize", inputTypes: ["string"], autoApply: true, priority: 80, mode: "series" },
1711
+ nullStandardize
1712
+ );
1713
+ function categoryStandardize(values, mapping = null) {
1714
+ if (!mapping || typeof mapping !== "object") return values.slice();
1715
+ const lookup = /* @__PURE__ */ new Map();
1716
+ for (const [canonical, variants] of Object.entries(
1717
+ mapping
1718
+ )) {
1719
+ if (Array.isArray(variants)) {
1720
+ for (const variant of variants) {
1721
+ lookup.set(String(variant).toLowerCase(), canonical);
1722
+ }
1723
+ }
1724
+ lookup.set(canonical.toLowerCase(), canonical);
1725
+ }
1726
+ return values.map((v) => {
1727
+ if (v === null) return null;
1728
+ if (typeof v !== "string") return v;
1729
+ const key = v.trim().toLowerCase();
1730
+ return lookup.get(key) ?? v;
1731
+ });
1732
+ }
1733
+ registerTransform(
1734
+ { name: "category_standardize", inputTypes: ["string"], priority: 45, mode: "series" },
1735
+ categoryStandardize
1736
+ );
1737
+ function categoryFromFile(values, lookupPath = null) {
1738
+ if (lookupPath) {
1739
+ console.warn("[goldenflow] category_from_file is not yet implemented in the JS port \u2014 returning values unchanged");
1740
+ }
1741
+ return values.slice();
1742
+ }
1743
+ registerTransform(
1744
+ { name: "category_from_file", inputTypes: ["string"], priority: 45, mode: "series" },
1745
+ categoryFromFile
1746
+ );
1747
+
1748
+ // src/core/transforms/identifiers.ts
1749
+ init_registry();
1750
+ function mapStrings4(values, fn) {
1751
+ return values.map((v) => {
1752
+ if (v === null || typeof v !== "string") return v;
1753
+ return fn(v);
1754
+ });
1755
+ }
1756
+ function extractDigits2(val) {
1757
+ return val.replace(/\D/g, "");
1758
+ }
1759
+ function ssnFormat(values) {
1760
+ return mapStrings4(values, (s) => {
1761
+ const digits = extractDigits2(s);
1762
+ if (digits.length !== 9) return s;
1763
+ return `${digits.slice(0, 3)}-${digits.slice(3, 5)}-${digits.slice(5)}`;
1764
+ });
1765
+ }
1766
+ registerTransform(
1767
+ { name: "ssn_format", inputTypes: ["ssn", "string"], priority: 50, mode: "series" },
1768
+ ssnFormat
1769
+ );
1770
+ function ssnMask(values) {
1771
+ return mapStrings4(values, (s) => {
1772
+ const digits = extractDigits2(s);
1773
+ if (digits.length !== 9) return s;
1774
+ return `***-**-${digits.slice(5)}`;
1775
+ });
1776
+ }
1777
+ registerTransform(
1778
+ { name: "ssn_mask", inputTypes: ["ssn", "string"], priority: 50, mode: "series" },
1779
+ ssnMask
1780
+ );
1781
+ function einFormat(values) {
1782
+ return mapStrings4(values, (s) => {
1783
+ const digits = extractDigits2(s);
1784
+ if (digits.length !== 9) return s;
1785
+ return `${digits.slice(0, 2)}-${digits.slice(2)}`;
1786
+ });
1787
+ }
1788
+ registerTransform(
1789
+ { name: "ein_format", inputTypes: ["ein", "string"], priority: 50, mode: "series" },
1790
+ einFormat
1791
+ );
1792
+
1793
+ // src/core/transforms/url.ts
1794
+ init_registry();
1795
+ function mapStrings5(values, fn) {
1796
+ return values.map((v) => {
1797
+ if (v === null || typeof v !== "string") return v;
1798
+ return fn(v);
1799
+ });
1800
+ }
1801
+ var _SCHEME_RE = /^https?:\/\//i;
1802
+ function urlNormalize(values) {
1803
+ return mapStrings5(values, (s) => {
1804
+ let val = s.trim();
1805
+ if (!val) return null;
1806
+ if (!_SCHEME_RE.test(val)) {
1807
+ val = "https://" + val;
1808
+ }
1809
+ const schemeEnd = val.indexOf("://") + 3;
1810
+ const scheme = val.slice(0, schemeEnd).toLowerCase();
1811
+ const rest = val.slice(schemeEnd);
1812
+ const slashIdx = rest.indexOf("/");
1813
+ let domain;
1814
+ let path;
1815
+ if (slashIdx === -1) {
1816
+ domain = rest.toLowerCase();
1817
+ path = "";
1818
+ } else {
1819
+ domain = rest.slice(0, slashIdx).toLowerCase();
1820
+ path = rest.slice(slashIdx);
1821
+ }
1822
+ let result = scheme + domain + path;
1823
+ if (result.endsWith("/") && result.length > schemeEnd + domain.length + 1) {
1824
+ result = result.replace(/\/+$/, "");
1825
+ } else if (result.endsWith("/") && path === "/") {
1826
+ result = result.slice(0, -1);
1827
+ }
1828
+ return result;
1829
+ });
1830
+ }
1831
+ registerTransform(
1832
+ { name: "url_normalize", inputTypes: ["url", "string"], priority: 50, mode: "series" },
1833
+ urlNormalize
1834
+ );
1835
+ function urlExtractDomain(values) {
1836
+ return mapStrings5(values, (s) => {
1837
+ let val = s.trim();
1838
+ if (!val) return null;
1839
+ if (val.includes("://")) {
1840
+ val = val.split("://", 2)[1];
1841
+ }
1842
+ const domain = val.split("/", 1)[0];
1843
+ return domain ? domain.toLowerCase() : null;
1844
+ });
1845
+ }
1846
+ registerTransform(
1847
+ { name: "url_extract_domain", inputTypes: ["url", "string"], priority: 40, mode: "series" },
1848
+ urlExtractDomain
1849
+ );
1850
+
1851
+ // src/core/transforms/auto-correct.ts
1852
+ init_registry();
1853
+ function levenshtein(a, b) {
1854
+ const m = a.length;
1855
+ const n = b.length;
1856
+ if (m === 0) return n;
1857
+ if (n === 0) return m;
1858
+ const prev = new Array(n + 1);
1859
+ for (let j = 0; j <= n; j++) prev[j] = j;
1860
+ for (let i = 1; i <= m; i++) {
1861
+ let prevDiag = prev[0];
1862
+ prev[0] = i;
1863
+ for (let j = 1; j <= n; j++) {
1864
+ const temp = prev[j];
1865
+ if (a[i - 1] === b[j - 1]) {
1866
+ prev[j] = prevDiag;
1867
+ } else {
1868
+ prev[j] = 1 + Math.min(prevDiag, prev[j], prev[j - 1]);
1869
+ }
1870
+ prevDiag = temp;
1871
+ }
1872
+ }
1873
+ return prev[n];
1874
+ }
1875
+ function fuzzyRatio(a, b) {
1876
+ if (a.length === 0 && b.length === 0) return 100;
1877
+ const maxLen = Math.max(a.length, b.length);
1878
+ const dist = levenshtein(a, b);
1879
+ return 100 * (1 - dist / maxLen);
1880
+ }
1881
+ function categoryAutoCorrect(values, frequencyThreshold = 0.05, matchThreshold = 85) {
1882
+ const freqThresh = typeof frequencyThreshold === "number" ? frequencyThreshold : Number(frequencyThreshold) || 0.05;
1883
+ const matchThresh = typeof matchThreshold === "number" ? matchThreshold : Number(matchThreshold) || 85;
1884
+ const freqMap = /* @__PURE__ */ new Map();
1885
+ const casingMap = /* @__PURE__ */ new Map();
1886
+ let totalNonNull = 0;
1887
+ for (const v of values) {
1888
+ if (v === null || typeof v !== "string") continue;
1889
+ const lower = v.toLowerCase();
1890
+ totalNonNull++;
1891
+ freqMap.set(lower, (freqMap.get(lower) ?? 0) + 1);
1892
+ let casings = casingMap.get(lower);
1893
+ if (!casings) {
1894
+ casings = /* @__PURE__ */ new Map();
1895
+ casingMap.set(lower, casings);
1896
+ }
1897
+ casings.set(v, (casings.get(v) ?? 0) + 1);
1898
+ }
1899
+ if (totalNonNull === 0) return values.slice();
1900
+ const canonicals = /* @__PURE__ */ new Map();
1901
+ for (const [lower, count] of freqMap) {
1902
+ if (count / totalNonNull >= freqThresh) {
1903
+ const casings = casingMap.get(lower);
1904
+ let bestCasing = lower;
1905
+ let bestCount = 0;
1906
+ for (const [original, c] of casings) {
1907
+ if (c > bestCount) {
1908
+ bestCount = c;
1909
+ bestCasing = original;
1910
+ }
1911
+ }
1912
+ canonicals.set(lower, bestCasing);
1913
+ }
1914
+ }
1915
+ if (canonicals.size === 0) return values.slice();
1916
+ const corrections = /* @__PURE__ */ new Map();
1917
+ for (const [lower] of freqMap) {
1918
+ if (canonicals.has(lower)) continue;
1919
+ let bestCanonical = null;
1920
+ let bestScore = 0;
1921
+ for (const [canonLower, canonOriginal] of canonicals) {
1922
+ const score = fuzzyRatio(lower, canonLower);
1923
+ if (score >= matchThresh && score > bestScore) {
1924
+ bestScore = score;
1925
+ bestCanonical = canonOriginal;
1926
+ }
1927
+ }
1928
+ if (bestCanonical !== null) {
1929
+ corrections.set(lower, bestCanonical);
1930
+ }
1931
+ }
1932
+ return values.map((v) => {
1933
+ if (v === null || typeof v !== "string") return v;
1934
+ const lower = v.toLowerCase();
1935
+ const correction = corrections.get(lower);
1936
+ if (correction !== void 0) return correction;
1937
+ const canonical = canonicals.get(lower);
1938
+ if (canonical !== void 0) return canonical;
1939
+ return v;
1940
+ });
1941
+ }
1942
+ registerTransform(
1943
+ { name: "category_auto_correct", inputTypes: ["string"], autoApply: true, priority: 35, mode: "series" },
1944
+ categoryAutoCorrect
1945
+ );
1946
+
1947
+ // src/core/transforms/index.ts
1948
+ init_registry();
1949
+
1950
+ // src/core/engine/transformer.ts
1951
+ init_types();
1952
+
1953
+ // src/core/engine/profiler-bridge.ts
1954
+ init_types();
1955
+ var EMAIL_RE2 = /^[^@\s]+@[^@\s]+\.[^@\s]+$/;
1956
+ var PHONE_RE = /^[+(]?\d[\d()\-.\s]{6,18}\d$/;
1957
+ var DATE_RE = /^(\d{4}[-/]\d{1,2}[-/]\d{1,2}|\d{1,2}[-/]\d{1,2}[-/]\d{2,4}|[A-Za-z]{3,9}\s+\d{1,2},?\s+\d{4})$/;
1958
+ var NAME_RE = /^[A-Z][a-z]+(\s+[A-Z][a-z]+)+$/;
1959
+ var ZIP_RE = /^\d{5}(-\d{4})?$/;
1960
+ var NAME_PATTERNS = {
1961
+ zip: ["zip", "postal", "zipcode", "zip_code", "postal_code"],
1962
+ phone: ["phone", "tel", "mobile", "cell", "fax"],
1963
+ email: ["email", "e_mail", "mail"],
1964
+ date: ["date", "created", "updated", "timestamp", "dob", "birth"],
1965
+ state: ["state", "province", "region"],
1966
+ name: ["first_name", "last_name", "fname", "lname", "full_name", "fullname"]
1967
+ };
1968
+ function overrideTypeByColumnName(columnName, currentType) {
1969
+ if (currentType !== "string" && currentType !== "numeric") return currentType;
1970
+ const colLower = columnName.toLowerCase().replace(/-/g, "_");
1971
+ for (const [semanticType, patterns] of Object.entries(NAME_PATTERNS)) {
1972
+ for (const pattern of patterns) {
1973
+ if (colLower.includes(pattern)) return semanticType;
1974
+ }
1975
+ }
1976
+ return currentType;
1977
+ }
1978
+ function inferType(values, columnName) {
1979
+ const nonNull = values.filter((v) => v !== null);
1980
+ if (nonNull.length === 0) return "string";
1981
+ let hasNumber = false;
1982
+ let hasBoolean = false;
1983
+ for (const v of nonNull) {
1984
+ if (typeof v === "number") hasNumber = true;
1985
+ else if (typeof v === "boolean") hasBoolean = true;
1986
+ }
1987
+ if (hasNumber && !hasBoolean) return overrideTypeByColumnName(columnName, "numeric");
1988
+ if (hasBoolean && !hasNumber) return "boolean";
1989
+ const stringVals = [];
1990
+ for (const v of nonNull) {
1991
+ if (typeof v === "string") {
1992
+ const trimmed = v.trim();
1993
+ if (trimmed) stringVals.push(trimmed);
1994
+ }
1995
+ }
1996
+ if (stringVals.length === 0) return "string";
1997
+ const sample = stringVals.slice(0, 100);
1998
+ const checks = [
1999
+ ["email", EMAIL_RE2, 0.7],
2000
+ ["zip", ZIP_RE, 0.7],
2001
+ ["date", DATE_RE, 0.5],
2002
+ ["phone", PHONE_RE, 0.6],
2003
+ ["name", NAME_RE, 0.5]
2004
+ ];
2005
+ for (const [typeName, pattern, threshold] of checks) {
2006
+ let matches = 0;
2007
+ for (const v of sample) {
2008
+ if (pattern.test(v)) matches++;
2009
+ }
2010
+ if (matches / sample.length >= threshold) {
2011
+ return overrideTypeByColumnName(columnName, typeName);
2012
+ }
2013
+ }
2014
+ return overrideTypeByColumnName(columnName, "string");
2015
+ }
2016
+ function profileColumn(data, columnName) {
2017
+ const values = data.rawColumn(columnName);
2018
+ const rowCount = values.length;
2019
+ let nullCount = 0;
2020
+ const nonNullValues = [];
2021
+ const uniqueSet = /* @__PURE__ */ new Set();
2022
+ for (const v of values) {
2023
+ if (v === null) {
2024
+ nullCount++;
2025
+ continue;
2026
+ }
2027
+ nonNullValues.push(v);
2028
+ uniqueSet.add(v);
2029
+ }
2030
+ const uniqueCount = uniqueSet.size;
2031
+ const sampleValues = nonNullValues.slice(0, 5).map((v) => String(v));
2032
+ const inferredType = inferType(values, columnName);
2033
+ return makeColumnProfile({
2034
+ name: columnName,
2035
+ inferredType,
2036
+ rowCount,
2037
+ nullCount,
2038
+ nullPct: rowCount > 0 ? nullCount / rowCount : 0,
2039
+ uniqueCount,
2040
+ uniquePct: rowCount > 0 ? uniqueCount / rowCount : 0,
2041
+ sampleValues
2042
+ });
2043
+ }
2044
+ function profileDataframe(rows, filePath = "") {
2045
+ const data = new TabularData(rows);
2046
+ const columns = data.columns.map((col) => profileColumn(data, col));
2047
+ return {
2048
+ filePath,
2049
+ rowCount: data.rowCount,
2050
+ columnCount: data.columns.length,
2051
+ columns
2052
+ };
2053
+ }
2054
+
2055
+ // src/core/engine/selector.ts
2056
+ var FINDING_TRANSFORM_MAP = {
2057
+ type_inference: ["strip", "to_integer"],
2058
+ nullability: ["null_standardize"],
2059
+ uniqueness: ["strip", "collapse_whitespace", "email_normalize"],
2060
+ format_detection: ["phone_e164", "email_normalize", "date_iso8601", "zip_normalize"],
2061
+ range_distribution: ["clamp"],
2062
+ cardinality: ["category_auto_correct", "category_standardize"],
2063
+ pattern_consistency: ["phone_e164", "date_iso8601", "zip_normalize", "ssn_format"],
2064
+ encoding_detection: ["normalize_unicode", "normalize_quotes", "fix_mojibake"],
2065
+ sequence_detection: ["pad_left"],
2066
+ drift_detection: [],
2067
+ temporal_order: ["date_iso8601", "date_validate"],
2068
+ null_correlation: [],
2069
+ cross_column_validation: ["clamp"],
2070
+ cross_column: ["date_validate", "age_from_dob"]
2071
+ };
2072
+ var STRING_LIKE_TYPES = /* @__PURE__ */ new Set([
2073
+ "string",
2074
+ "email",
2075
+ "phone",
2076
+ "name",
2077
+ "address",
2078
+ "date"
2079
+ ]);
2080
+ function selectTransforms(profile, _confidenceThreshold = 0.8) {
2081
+ const all = listTransforms();
2082
+ let selected = [];
2083
+ for (const t of all) {
2084
+ if (!t.autoApply) continue;
2085
+ if (t.inputTypes.includes(profile.inferredType)) {
2086
+ selected.push(t);
2087
+ } else if (t.inputTypes.includes("string") && STRING_LIKE_TYPES.has(profile.inferredType)) {
2088
+ selected.push(t);
2089
+ }
2090
+ }
2091
+ if (profile.uniquePct > 0.1) {
2092
+ selected = selected.filter((t) => t.name !== "category_auto_correct");
2093
+ }
2094
+ selected.sort((a, b) => b.priority - a.priority);
2095
+ return selected;
2096
+ }
2097
+ function selectFromFindings(findings) {
2098
+ const columnTransforms = {};
2099
+ for (const finding of findings) {
2100
+ const check = String(finding["check"] ?? "");
2101
+ const column = String(finding["column"] ?? "");
2102
+ if (!column) continue;
2103
+ const transformNames = FINDING_TRANSFORM_MAP[check] ?? [];
2104
+ if (transformNames.length > 0) {
2105
+ if (!columnTransforms[column]) columnTransforms[column] = [];
2106
+ columnTransforms[column].push(...transformNames);
2107
+ }
2108
+ }
2109
+ for (const col of Object.keys(columnTransforms)) {
2110
+ columnTransforms[col] = [...new Set(columnTransforms[col])];
2111
+ }
2112
+ return columnTransforms;
2113
+ }
2114
+
2115
+ // src/core/engine/transformer.ts
2116
+ var TransformEngine = class {
2117
+ config;
2118
+ constructor(config) {
2119
+ this.config = makeConfig(config);
2120
+ }
2121
+ transformDf(rows, source = "<dataframe>") {
2122
+ const manifest = new exports.MutableManifest(source);
2123
+ let currentRows = [...rows];
2124
+ if (this.config.transforms.length > 0) {
2125
+ currentRows = this._applyConfigTransforms(currentRows, manifest);
2126
+ } else {
2127
+ currentRows = this._applyAutoTransforms(currentRows, manifest, source);
2128
+ }
2129
+ for (const split of this.config.splits) {
2130
+ if (currentRows.length === 0 || !(split.source in currentRows[0])) continue;
2131
+ const info = getTransform(split.method);
2132
+ if (info && info.mode === "dataframe") {
2133
+ currentRows = info.func(currentRows, split.source);
2134
+ }
2135
+ }
2136
+ for (const [oldName, newName] of Object.entries(this.config.renames)) {
2137
+ if (currentRows.length === 0 || !(oldName in currentRows[0])) continue;
2138
+ currentRows = currentRows.map((row) => {
2139
+ const newRow = {};
2140
+ for (const [k, v] of Object.entries(row)) {
2141
+ newRow[k === oldName ? newName : k] = v;
2142
+ }
2143
+ return newRow;
2144
+ });
2145
+ }
2146
+ const dropCols = new Set(this.config.drop);
2147
+ if (dropCols.size > 0 && currentRows.length > 0) {
2148
+ const existingDrops = [...dropCols].filter((c) => c in currentRows[0]);
2149
+ if (existingDrops.length > 0) {
2150
+ const dropSet = new Set(existingDrops);
2151
+ currentRows = currentRows.map((row) => {
2152
+ const newRow = {};
2153
+ for (const [k, v] of Object.entries(row)) {
2154
+ if (!dropSet.has(k)) newRow[k] = v;
2155
+ }
2156
+ return newRow;
2157
+ });
2158
+ }
2159
+ }
2160
+ for (const filt of this.config.filters) {
2161
+ if (currentRows.length === 0 || !(filt.column in currentRows[0])) continue;
2162
+ currentRows = this._applyFilter(currentRows, filt.column, filt.condition);
2163
+ }
2164
+ if (this.config.dedup) {
2165
+ const dedupCols = this.config.dedup.columns.filter(
2166
+ (c) => currentRows.length > 0 && c in currentRows[0]
2167
+ );
2168
+ if (dedupCols.length > 0) {
2169
+ const before = currentRows.length;
2170
+ const seen = /* @__PURE__ */ new Set();
2171
+ const deduped = [];
2172
+ const iterRows = this.config.dedup.keep === "last" ? [...currentRows].reverse() : currentRows;
2173
+ for (const row of iterRows) {
2174
+ const key = dedupCols.map((c) => String(row[c] ?? "")).join("\0");
2175
+ if (!seen.has(key)) {
2176
+ seen.add(key);
2177
+ deduped.push(row);
2178
+ }
2179
+ }
2180
+ if (this.config.dedup.keep === "last") deduped.reverse();
2181
+ currentRows = deduped;
2182
+ const after = currentRows.length;
2183
+ if (before !== after) {
2184
+ manifest.addRecord(
2185
+ makeTransformRecord({
2186
+ column: dedupCols.join(","),
2187
+ transform: "dedup",
2188
+ affectedRows: before - after,
2189
+ totalRows: before
2190
+ })
2191
+ );
2192
+ }
2193
+ }
2194
+ }
2195
+ const columns = currentRows.length > 0 ? Object.keys(currentRows[0]) : [];
2196
+ return { rows: currentRows, columns, manifest };
2197
+ }
2198
+ _applyConfigTransforms(rows, manifest) {
2199
+ for (const spec of this.config.transforms) {
2200
+ if (rows.length === 0 || !(spec.column in rows[0])) continue;
2201
+ for (const opRaw of spec.ops) {
2202
+ const [name, params] = parseTransformName(opRaw);
2203
+ const info = getTransform(name);
2204
+ if (!info) {
2205
+ manifest.addError(
2206
+ spec.column,
2207
+ name,
2208
+ -1,
2209
+ `Transform '${name}' not found in registry`
2210
+ );
2211
+ continue;
2212
+ }
2213
+ rows = this._applySingleTransform(rows, spec.column, info, params, manifest);
2214
+ }
2215
+ }
2216
+ return rows;
2217
+ }
2218
+ _applyAutoTransforms(rows, manifest, source) {
2219
+ const filePath = source !== "<dataframe>" ? source : "";
2220
+ const profile = profileDataframe(rows, filePath);
2221
+ for (const colProfile of profile.columns) {
2222
+ const selected = selectTransforms(colProfile);
2223
+ for (const info of selected) {
2224
+ rows = this._applySingleTransform(
2225
+ rows,
2226
+ colProfile.name,
2227
+ info,
2228
+ [],
2229
+ manifest
2230
+ );
2231
+ }
2232
+ }
2233
+ return rows;
2234
+ }
2235
+ _applySingleTransform(rows, column, info, params, manifest) {
2236
+ const totalRows = rows.length;
2237
+ const beforeSample = rows.slice(0, 3).map((r) => String(r[column] ?? ""));
2238
+ try {
2239
+ let newRows;
2240
+ if (info.mode === "dataframe") {
2241
+ newRows = info.func(rows, column, ...castParams(params));
2242
+ } else {
2243
+ const values = rows.map((r) => {
2244
+ const v = r[column];
2245
+ if (v === null || v === void 0) return null;
2246
+ if (typeof v === "string" || typeof v === "number" || typeof v === "boolean") return v;
2247
+ return String(v);
2248
+ });
2249
+ const typedParams = castParams(params);
2250
+ const result = typedParams.length > 0 ? info.func(values, ...typedParams) : info.func(values);
2251
+ let newValues;
2252
+ if (Array.isArray(result) && result.length === 2 && Array.isArray(result[1])) {
2253
+ newValues = result[0];
2254
+ const flagged = result[1];
2255
+ for (const rowIdx of flagged) {
2256
+ manifest.addError(column, info.name, rowIdx, "Flagged for review");
2257
+ }
2258
+ } else {
2259
+ newValues = result;
2260
+ }
2261
+ newRows = rows.map((row, i) => {
2262
+ const oldVal = row[column] ?? null;
2263
+ if (newValues[i] === oldVal) return row;
2264
+ return { ...row, [column]: newValues[i] };
2265
+ });
2266
+ }
2267
+ const afterSample = newRows.slice(0, 3).map((r) => String(r[column] ?? ""));
2268
+ let changed = 0;
2269
+ for (let i = 0; i < Math.min(rows.length, newRows.length); i++) {
2270
+ if (String(rows[i][column] ?? "") !== String(newRows[i][column] ?? "")) {
2271
+ changed++;
2272
+ }
2273
+ }
2274
+ manifest.addRecord(
2275
+ makeTransformRecord({
2276
+ column,
2277
+ transform: info.name,
2278
+ affectedRows: changed,
2279
+ totalRows,
2280
+ sampleBefore: beforeSample,
2281
+ sampleAfter: afterSample
2282
+ })
2283
+ );
2284
+ return newRows;
2285
+ } catch (e) {
2286
+ manifest.addError(
2287
+ column,
2288
+ info.name,
2289
+ -1,
2290
+ e instanceof Error ? e.message : String(e)
2291
+ );
2292
+ return rows;
2293
+ }
2294
+ }
2295
+ _applyFilter(rows, column, condition) {
2296
+ if (condition === "not_null") {
2297
+ return rows.filter((r) => r[column] !== null && r[column] !== void 0);
2298
+ }
2299
+ if (condition.startsWith("after:")) {
2300
+ const dateStr = condition.slice(6);
2301
+ return rows.filter((r) => String(r[column] ?? "") > dateStr);
2302
+ }
2303
+ if (condition.startsWith("before:")) {
2304
+ const dateStr = condition.slice(7);
2305
+ return rows.filter((r) => String(r[column] ?? "") < dateStr);
2306
+ }
2307
+ return rows;
2308
+ }
2309
+ };
2310
+ function castParams(params) {
2311
+ return params.map((p) => {
2312
+ const asInt = parseInt(p, 10);
2313
+ if (!Number.isNaN(asInt) && String(asInt) === p) return asInt;
2314
+ const asFloat = parseFloat(p);
2315
+ if (!Number.isNaN(asFloat)) return asFloat;
2316
+ return p;
2317
+ });
2318
+ }
2319
+
2320
+ // src/core/engine/differ.ts
2321
+ function diffDataframes(before, after) {
2322
+ const beforeCols = new Set(before.length > 0 ? Object.keys(before[0]) : []);
2323
+ const afterCols = new Set(after.length > 0 ? Object.keys(after[0]) : []);
2324
+ const addedColumns = [...afterCols].filter((c) => !beforeCols.has(c)).sort();
2325
+ const removedColumns = [...beforeCols].filter((c) => !afterCols.has(c)).sort();
2326
+ const commonCols = [...beforeCols].filter((c) => afterCols.has(c)).sort();
2327
+ const changedColumns = [];
2328
+ const columnDetails = {};
2329
+ let totalChanges = 0;
2330
+ for (const col of commonCols) {
2331
+ if (before.length !== after.length) {
2332
+ changedColumns.push(col);
2333
+ totalChanges += Math.abs(before.length - after.length);
2334
+ continue;
2335
+ }
2336
+ let changes = 0;
2337
+ for (let i = 0; i < before.length; i++) {
2338
+ const bVal = String(before[i][col] ?? "");
2339
+ const aVal = String(after[i][col] ?? "");
2340
+ if (bVal !== aVal) changes++;
2341
+ }
2342
+ if (changes > 0) {
2343
+ changedColumns.push(col);
2344
+ totalChanges += changes;
2345
+ columnDetails[col] = { changedRows: changes };
2346
+ }
2347
+ }
2348
+ return {
2349
+ totalChanges,
2350
+ changedColumns,
2351
+ addedColumns,
2352
+ removedColumns,
2353
+ rowCountBefore: before.length,
2354
+ rowCountAfter: after.length,
2355
+ columnDetails
2356
+ };
2357
+ }
2358
+
2359
+ // src/core/engine/streaming.ts
2360
+ var StreamProcessor = class {
2361
+ engine;
2362
+ _batchCount = 0;
2363
+ constructor(config) {
2364
+ this.engine = new TransformEngine(config);
2365
+ }
2366
+ /** Transform a single record. */
2367
+ transformOne(record) {
2368
+ return this.engine.transformDf([record]);
2369
+ }
2370
+ /** Transform a batch of rows. */
2371
+ transformBatch(rows) {
2372
+ this._batchCount++;
2373
+ return this.engine.transformDf(rows);
2374
+ }
2375
+ /** Process rows in chunks, yielding TransformResult per chunk. */
2376
+ *streamRows(rows, chunkSize = 1e4) {
2377
+ for (let start = 0; start < rows.length; start += chunkSize) {
2378
+ const batch = rows.slice(start, start + chunkSize);
2379
+ this._batchCount++;
2380
+ yield this.engine.transformDf(batch);
2381
+ }
2382
+ }
2383
+ get batchesProcessed() {
2384
+ return this._batchCount;
2385
+ }
2386
+ };
2387
+
2388
+ // src/core/config/schema.ts
2389
+ init_types();
2390
+ init_types();
2391
+ function validateConfig(raw) {
2392
+ const transforms = Array.isArray(raw["transforms"]) ? raw["transforms"].map((t) => ({
2393
+ column: String(t["column"] ?? ""),
2394
+ ops: Array.isArray(t["ops"]) ? t["ops"].map(String) : []
2395
+ })) : [];
2396
+ const splits = Array.isArray(raw["splits"]) ? raw["splits"].map((s) => ({
2397
+ source: String(s["source"] ?? ""),
2398
+ target: Array.isArray(s["target"]) ? s["target"].map(String) : [],
2399
+ method: String(s["method"] ?? "")
2400
+ })) : [];
2401
+ const renames = raw["renames"] && typeof raw["renames"] === "object" ? Object.fromEntries(
2402
+ Object.entries(raw["renames"]).map(
2403
+ ([k, v]) => [k, String(v)]
2404
+ )
2405
+ ) : {};
2406
+ const drop = Array.isArray(raw["drop"]) ? raw["drop"].map(String) : [];
2407
+ const filters = Array.isArray(raw["filters"]) ? raw["filters"].map((f) => ({
2408
+ column: String(f["column"] ?? ""),
2409
+ condition: String(f["condition"] ?? "")
2410
+ })) : [];
2411
+ const dedupRaw = raw["dedup"];
2412
+ const dedup = dedupRaw && typeof dedupRaw === "object" ? {
2413
+ columns: Array.isArray(dedupRaw["columns"]) ? dedupRaw["columns"].map(String) : [],
2414
+ keep: dedupRaw["keep"] === "last" ? "last" : "first"
2415
+ } : null;
2416
+ const mappings = Array.isArray(raw["mappings"]) ? raw["mappings"].map((m) => ({
2417
+ source: String(m["source"] ?? ""),
2418
+ target: m["target"],
2419
+ transform: m["transform"] ?? null
2420
+ })) : [];
2421
+ return makeConfig({
2422
+ source: raw["source"] != null ? String(raw["source"]) : null,
2423
+ output: raw["output"] != null ? String(raw["output"]) : null,
2424
+ transforms,
2425
+ splits,
2426
+ renames,
2427
+ drop,
2428
+ filters,
2429
+ dedup,
2430
+ mappings
2431
+ });
2432
+ }
2433
+
2434
+ // src/core/config/loader.ts
2435
+ init_types();
2436
+ var yamlModule = null;
2437
+ function getYaml() {
2438
+ if (yamlModule) return yamlModule;
2439
+ try {
2440
+ yamlModule = __require("yaml");
2441
+ } catch {
2442
+ }
2443
+ return yamlModule;
2444
+ }
2445
+ function loadConfigFromString(content) {
2446
+ const yaml = getYaml();
2447
+ if (!yaml) {
2448
+ throw new Error("yaml package is required for config loading. Install with: npm install yaml");
2449
+ }
2450
+ const data = yaml.parse(content);
2451
+ if (data === null || data === void 0) return makeConfig();
2452
+ if (typeof data !== "object" || Array.isArray(data)) {
2453
+ throw new Error(`Config file is not a valid YAML object (got ${Array.isArray(data) ? "array" : typeof data})`);
2454
+ }
2455
+ return validateConfig(data);
2456
+ }
2457
+ function saveConfigToString(config) {
2458
+ const yaml = getYaml();
2459
+ if (!yaml) {
2460
+ throw new Error("yaml package is required for config saving. Install with: npm install yaml");
2461
+ }
2462
+ const data = {};
2463
+ if (config.source) data["source"] = config.source;
2464
+ if (config.output) data["output"] = config.output;
2465
+ if (config.transforms.length > 0) data["transforms"] = config.transforms;
2466
+ if (config.splits.length > 0) data["splits"] = config.splits;
2467
+ if (Object.keys(config.renames).length > 0) data["renames"] = config.renames;
2468
+ if (config.drop.length > 0) data["drop"] = config.drop;
2469
+ if (config.filters.length > 0) data["filters"] = config.filters;
2470
+ if (config.dedup) data["dedup"] = config.dedup;
2471
+ if (config.mappings.length > 0) data["mappings"] = config.mappings;
2472
+ return yaml.stringify(data);
2473
+ }
2474
+ function mergeConfigs(fileConfig, cliOverrides) {
2475
+ return makeConfig({ ...fileConfig, ...cliOverrides });
2476
+ }
2477
+
2478
+ // src/core/config/learner.ts
2479
+ init_types();
2480
+ function learnConfig(rows, source = "") {
2481
+ const profile = profileDataframe(rows, source);
2482
+ const transforms = [];
2483
+ for (const colProfile of profile.columns) {
2484
+ const selected = selectTransforms(colProfile);
2485
+ if (selected.length > 0) {
2486
+ transforms.push({
2487
+ column: colProfile.name,
2488
+ ops: selected.map((t) => t.name)
2489
+ });
2490
+ }
2491
+ }
2492
+ return makeConfig({
2493
+ source: source || null,
2494
+ transforms
2495
+ });
2496
+ }
2497
+
2498
+ // src/core/mapping/name-similarity.ts
2499
+ var ALIASES = {
2500
+ first_name: ["fname", "first", "given_name", "first_nm"],
2501
+ last_name: ["lname", "last", "surname", "family_name", "last_nm"],
2502
+ email: ["email_address", "e_mail", "email_addr", "mail"],
2503
+ phone: ["phone_number", "ph", "telephone", "tel", "mobile", "cell"],
2504
+ address: ["addr", "street_address", "addr_line_1", "address_line_1"],
2505
+ city: ["town", "municipality"],
2506
+ state: ["st", "province", "region"],
2507
+ zip: ["zipcode", "zip_code", "postal_code", "postal"],
2508
+ name: ["full_name", "fullname", "customer_name"],
2509
+ created_at: ["signup_date", "signup_dt", "create_date", "date_created"]
2510
+ };
2511
+ var _ALIAS_LOOKUP = /* @__PURE__ */ new Map();
2512
+ for (const [canonical, aliases] of Object.entries(ALIASES)) {
2513
+ for (const alias of aliases) {
2514
+ _ALIAS_LOOKUP.set(alias.toLowerCase(), canonical.toLowerCase());
2515
+ }
2516
+ _ALIAS_LOOKUP.set(canonical.toLowerCase(), canonical.toLowerCase());
2517
+ }
2518
+ function fuzzyWRatio(a, b) {
2519
+ if (a === b) return 100;
2520
+ if (a.length === 0 || b.length === 0) return 0;
2521
+ const maxLen = Math.max(a.length, b.length);
2522
+ const prev = new Array(b.length + 1);
2523
+ const curr = new Array(b.length + 1);
2524
+ for (let j = 0; j <= b.length; j++) prev[j] = j;
2525
+ for (let i = 1; i <= a.length; i++) {
2526
+ curr[0] = i;
2527
+ for (let j = 1; j <= b.length; j++) {
2528
+ const cost = a[i - 1] === b[j - 1] ? 0 : 1;
2529
+ curr[j] = Math.min(prev[j] + 1, curr[j - 1] + 1, prev[j - 1] + cost);
2530
+ }
2531
+ for (let j = 0; j <= b.length; j++) prev[j] = curr[j];
2532
+ }
2533
+ const distance = prev[b.length];
2534
+ return 100 * (1 - distance / maxLen);
2535
+ }
2536
+ function nameSimilarity(source, target) {
2537
+ const sLower = source.toLowerCase().trim();
2538
+ const tLower = target.toLowerCase().trim();
2539
+ if (sLower === tLower) return 1;
2540
+ const sCanonical = _ALIAS_LOOKUP.get(sLower);
2541
+ const tCanonical = _ALIAS_LOOKUP.get(tLower);
2542
+ if (sCanonical && tCanonical && sCanonical === tCanonical) return 0.95;
2543
+ return fuzzyWRatio(sLower, tLower) / 100;
2544
+ }
2545
+
2546
+ // src/core/mapping/profile-similarity.ts
2547
+ function profileSimilarity(source, target) {
2548
+ let score = 0;
2549
+ let weights = 0;
2550
+ if (source.inferredType === target.inferredType) score += 0.4;
2551
+ weights += 0.4;
2552
+ const nullDiff = Math.abs(source.nullPct - target.nullPct);
2553
+ score += 0.2 * Math.max(0, 1 - nullDiff);
2554
+ weights += 0.2;
2555
+ const uniqueDiff = Math.abs(source.uniquePct - target.uniquePct);
2556
+ score += 0.2 * Math.max(0, 1 - uniqueDiff);
2557
+ weights += 0.2;
2558
+ if (source.uniqueCount > 0 && target.uniqueCount > 0) {
2559
+ const ratio = Math.min(source.uniqueCount, target.uniqueCount) / Math.max(source.uniqueCount, target.uniqueCount);
2560
+ score += 0.2 * ratio;
2561
+ }
2562
+ weights += 0.2;
2563
+ return weights > 0 ? score / weights : 0;
2564
+ }
2565
+
2566
+ // src/core/mapping/schema-mapper.ts
2567
+ init_types();
2568
+ var SchemaMapper = class {
2569
+ autoThreshold;
2570
+ suggestThreshold;
2571
+ constructor(autoThreshold = 0.9, suggestThreshold = 0.6) {
2572
+ this.autoThreshold = autoThreshold;
2573
+ this.suggestThreshold = suggestThreshold;
2574
+ }
2575
+ map(sourceRows, targetRows) {
2576
+ const sourceProfile = profileDataframe(sourceRows);
2577
+ const targetProfile = profileDataframe(targetRows);
2578
+ const sourceProfiles = new Map(sourceProfile.columns.map((c) => [c.name, c]));
2579
+ const targetProfiles = new Map(targetProfile.columns.map((c) => [c.name, c]));
2580
+ const sourceCols = sourceRows.length > 0 ? Object.keys(sourceRows[0]) : [];
2581
+ const targetCols = targetRows.length > 0 ? Object.keys(targetRows[0]) : [];
2582
+ const mappings = [];
2583
+ const usedTargets = /* @__PURE__ */ new Set();
2584
+ for (const sCol of sourceCols) {
2585
+ let bestMatch = null;
2586
+ let bestScore = 0;
2587
+ for (const tCol of targetCols) {
2588
+ if (usedTargets.has(tCol)) continue;
2589
+ const nScore = nameSimilarity(sCol, tCol);
2590
+ let pScore = 0;
2591
+ const sp = sourceProfiles.get(sCol);
2592
+ const tp = targetProfiles.get(tCol);
2593
+ if (sp && tp) {
2594
+ pScore = profileSimilarity(sp, tp);
2595
+ }
2596
+ const combined = 0.7 * nScore + 0.3 * pScore;
2597
+ if (combined > bestScore && combined >= this.suggestThreshold) {
2598
+ bestScore = combined;
2599
+ bestMatch = {
2600
+ source: sCol,
2601
+ target: tCol,
2602
+ confidence: Math.round(combined * 1e3) / 1e3,
2603
+ transform: null
2604
+ };
2605
+ }
2606
+ }
2607
+ if (bestMatch) {
2608
+ mappings.push(bestMatch);
2609
+ usedTargets.add(bestMatch.target);
2610
+ }
2611
+ }
2612
+ return mappings;
2613
+ }
2614
+ toConfig(mappings) {
2615
+ return makeConfig({
2616
+ mappings: mappings.map((m) => ({
2617
+ source: m.source,
2618
+ target: m.target,
2619
+ transform: m.transform
2620
+ }))
2621
+ });
2622
+ }
2623
+ };
2624
+
2625
+ // src/core/domains/index.ts
2626
+ var DOMAIN_LOADERS = {
2627
+ people_hr: () => Promise.resolve().then(() => (init_people_hr(), people_hr_exports)),
2628
+ healthcare: () => Promise.resolve().then(() => (init_healthcare(), healthcare_exports)),
2629
+ finance: () => Promise.resolve().then(() => (init_finance(), finance_exports)),
2630
+ ecommerce: () => Promise.resolve().then(() => (init_ecommerce(), ecommerce_exports)),
2631
+ real_estate: () => Promise.resolve().then(() => (init_real_estate(), real_estate_exports))
2632
+ };
2633
+ async function loadDomain(name) {
2634
+ const key = name.toLowerCase().replace(/-/g, "_").replace(/\//g, "_");
2635
+ const loader = DOMAIN_LOADERS[key];
2636
+ if (!loader) return null;
2637
+ const mod = await loader();
2638
+ return mod.PACK;
2639
+ }
2640
+ function listDomains() {
2641
+ return Object.keys(DOMAIN_LOADERS);
2642
+ }
2643
+
2644
+ // src/core/reporters/json-reporter.ts
2645
+ init_types();
2646
+ function manifestToJson(manifest) {
2647
+ if (manifest instanceof exports.MutableManifest) {
2648
+ return JSON.stringify(manifest.toDict(), null, 2);
2649
+ }
2650
+ return JSON.stringify(manifest, null, 2);
2651
+ }
2652
+
2653
+ // src/core/reporters/console.ts
2654
+ var BOLD = "\x1B[1m";
2655
+ var DIM = "\x1B[2m";
2656
+ var RED = "\x1B[31m";
2657
+ var GREEN = "\x1B[32m";
2658
+ var YELLOW = "\x1B[33m";
2659
+ var CYAN = "\x1B[36m";
2660
+ var MAGENTA = "\x1B[35m";
2661
+ var RESET = "\x1B[0m";
2662
+ function printProfile(profile) {
2663
+ console.log(`
2664
+ ${BOLD}Profile: ${profile.filePath || "<dataframe>"}${RESET}
2665
+ `);
2666
+ console.log(` ${"Column".padEnd(20)} ${"Type".padEnd(12)} ${"Nulls".padEnd(15)} ${"Unique".padEnd(10)} Sample`);
2667
+ console.log(` ${"\u2500".repeat(20)} ${"\u2500".repeat(12)} ${"\u2500".repeat(15)} ${"\u2500".repeat(10)} ${"\u2500".repeat(20)}`);
2668
+ for (const col of profile.columns) {
2669
+ const pct = (col.nullPct * 100).toFixed(0);
2670
+ console.log(
2671
+ ` ${CYAN}${col.name.padEnd(20)}${RESET} ${GREEN}${col.inferredType.padEnd(12)}${RESET} ${YELLOW}${`${col.nullCount} (${pct}%)`.padEnd(15)}${RESET} ${MAGENTA}${String(col.uniqueCount).padEnd(10)}${RESET} ${DIM}${col.sampleValues.slice(0, 3).join(", ")}${RESET}`
2672
+ );
2673
+ }
2674
+ console.log(`
2675
+ ${BOLD}${profile.rowCount}${RESET} rows, ${BOLD}${profile.columnCount}${RESET} columns`);
2676
+ }
2677
+ function printManifest(manifest) {
2678
+ if (manifest.records.length === 0 && manifest.errors.length === 0) {
2679
+ console.log(`${DIM}No transforms applied.${RESET}`);
2680
+ return;
2681
+ }
2682
+ console.log(`
2683
+ ${BOLD}Transforms Applied${RESET}
2684
+ `);
2685
+ console.log(` ${"Column".padEnd(20)} ${"Transform".padEnd(22)} ${"Affected".padEnd(12)} ${"Before".padEnd(20)} After`);
2686
+ console.log(` ${"\u2500".repeat(20)} ${"\u2500".repeat(22)} ${"\u2500".repeat(12)} ${"\u2500".repeat(20)} ${"\u2500".repeat(20)}`);
2687
+ for (const r of manifest.records) {
2688
+ const before = r.sampleBefore.slice(0, 2).join(", ");
2689
+ const after = r.sampleAfter.slice(0, 2).join(", ");
2690
+ console.log(
2691
+ ` ${CYAN}${r.column.padEnd(20)}${RESET} ${GREEN}${r.transform.padEnd(22)}${RESET} ${YELLOW}${`${r.affectedRows}/${r.totalRows}`.padEnd(12)}${RESET} ${DIM}${before.padEnd(20)}${RESET} ${BOLD}${after}${RESET}`
2692
+ );
2693
+ }
2694
+ if (manifest.errors.length > 0) {
2695
+ console.log(`
2696
+ ${RED}${BOLD}${manifest.errors.length} errors:${RESET}`);
2697
+ for (const e of manifest.errors) {
2698
+ console.log(` ${RED}${e.column}${RESET} / ${e.transform}: ${e.error}`);
2699
+ }
2700
+ }
2701
+ }
2702
+ function printDiff(diff) {
2703
+ console.log(`Rows: ${diff.rowCountBefore} \u2192 ${diff.rowCountAfter}`);
2704
+ console.log(`Total changes: ${BOLD}${diff.totalChanges}${RESET}`);
2705
+ if (diff.addedColumns.length) console.log(`Added columns: ${GREEN}${diff.addedColumns.join(", ")}${RESET}`);
2706
+ if (diff.removedColumns.length) console.log(`Removed columns: ${RED}${diff.removedColumns.join(", ")}${RESET}`);
2707
+ if (diff.changedColumns.length) console.log(`Changed columns: ${YELLOW}${diff.changedColumns.join(", ")}${RESET}`);
2708
+ }
2709
+
2710
+ // src/core/llm/corrector.ts
2711
+ init_registry();
2712
+ var _correctionsCache = /* @__PURE__ */ new Map();
2713
+ function getValueSummary(values, max = 30) {
2714
+ const counts = /* @__PURE__ */ new Map();
2715
+ for (const v of values) {
2716
+ if (v === null || typeof v !== "string") continue;
2717
+ const trimmed = v.trim();
2718
+ if (!trimmed) continue;
2719
+ counts.set(trimmed, (counts.get(trimmed) ?? 0) + 1);
2720
+ }
2721
+ const sorted = [...counts.entries()].sort((a, b) => b[1] - a[1]).slice(0, max);
2722
+ return Object.fromEntries(sorted);
2723
+ }
2724
+ function buildPrompt(columnName, valueSummary) {
2725
+ return `You are a data quality expert. Analyze this column and identify values that appear to be misspellings, abbreviations, or variants of other values in the same column.
2726
+
2727
+ Column name: ${columnName}
2728
+ Value frequencies (value: count):
2729
+ ${JSON.stringify(valueSummary, null, 2)}
2730
+
2731
+ For each incorrect value, provide the corrected canonical form. Only include values that need correction. Return JSON object mapping incorrect values to their corrections.
2732
+
2733
+ Example response:
2734
+ {"actve": "active", "ACTIVE": "active", "pendng": "pending"}
2735
+
2736
+ Return ONLY the JSON object, no other text.`;
2737
+ }
2738
+ function envVar(key) {
2739
+ if (typeof process !== "undefined" && process.env) {
2740
+ return process.env[key];
2741
+ }
2742
+ return void 0;
2743
+ }
2744
+ function validateCorrections(parsed) {
2745
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return {};
2746
+ const result = {};
2747
+ for (const [k, v] of Object.entries(parsed)) {
2748
+ if (typeof k === "string" && typeof v === "string") result[k] = v;
2749
+ }
2750
+ return result;
2751
+ }
2752
+ async function askLlmForCorrections(columnName, valueSummary) {
2753
+ const prompt = buildPrompt(columnName, valueSummary);
2754
+ const anthropicKey = envVar("ANTHROPIC_API_KEY");
2755
+ const openaiKey = envVar("OPENAI_API_KEY");
2756
+ try {
2757
+ if (anthropicKey) {
2758
+ const resp = await fetch("https://api.anthropic.com/v1/messages", {
2759
+ method: "POST",
2760
+ headers: {
2761
+ "Content-Type": "application/json",
2762
+ "x-api-key": anthropicKey,
2763
+ "anthropic-version": "2023-06-01"
2764
+ },
2765
+ body: JSON.stringify({
2766
+ model: "claude-sonnet-4-5-20250514",
2767
+ max_tokens: 1024,
2768
+ messages: [{ role: "user", content: prompt }]
2769
+ })
2770
+ });
2771
+ if (!resp.ok) {
2772
+ console.warn(`[goldenflow:llm] Anthropic API error: ${resp.status} ${resp.statusText}`);
2773
+ return {};
2774
+ }
2775
+ const data = await resp.json();
2776
+ const text = data.content?.[0]?.text ?? "";
2777
+ if (!text) {
2778
+ console.warn("[goldenflow:llm] Anthropic returned empty response");
2779
+ return {};
2780
+ }
2781
+ return validateCorrections(JSON.parse(text));
2782
+ }
2783
+ if (openaiKey) {
2784
+ const resp = await fetch("https://api.openai.com/v1/chat/completions", {
2785
+ method: "POST",
2786
+ headers: {
2787
+ "Content-Type": "application/json",
2788
+ Authorization: `Bearer ${openaiKey}`
2789
+ },
2790
+ body: JSON.stringify({
2791
+ model: "gpt-4o-mini",
2792
+ messages: [{ role: "user", content: prompt }],
2793
+ response_format: { type: "json_object" }
2794
+ })
2795
+ });
2796
+ if (!resp.ok) {
2797
+ console.warn(`[goldenflow:llm] OpenAI API error: ${resp.status} ${resp.statusText}`);
2798
+ return {};
2799
+ }
2800
+ const data = await resp.json();
2801
+ const text = data.choices?.[0]?.message?.content ?? "";
2802
+ if (!text) {
2803
+ console.warn("[goldenflow:llm] OpenAI returned empty response");
2804
+ return {};
2805
+ }
2806
+ return validateCorrections(JSON.parse(text));
2807
+ }
2808
+ } catch (e) {
2809
+ console.warn(
2810
+ `[goldenflow:llm] LLM correction failed: ${e instanceof Error ? e.message : String(e)}`
2811
+ );
2812
+ }
2813
+ return {};
2814
+ }
2815
+ async function prepareLlmCorrections(columnName, values) {
2816
+ const summary = getValueSummary(values);
2817
+ if (Object.keys(summary).length === 0) return {};
2818
+ const corrections = await askLlmForCorrections(columnName, summary);
2819
+ if (Object.keys(corrections).length > 0) {
2820
+ _correctionsCache.set(columnName, new Map(Object.entries(corrections)));
2821
+ }
2822
+ return corrections;
2823
+ }
2824
+ async function applyLlmCorrections(columnName, values) {
2825
+ if (!_correctionsCache.has(columnName)) {
2826
+ await prepareLlmCorrections(columnName, values);
2827
+ }
2828
+ const map = _correctionsCache.get(columnName);
2829
+ if (!map || map.size === 0) return [...values];
2830
+ return values.map((v) => {
2831
+ if (v === null || typeof v !== "string") return v;
2832
+ const trimmed = v.trim();
2833
+ return map.get(trimmed) ?? v;
2834
+ });
2835
+ }
2836
+ function categoryLlmCorrect(values, ...params) {
2837
+ const columnName = typeof params[0] === "string" ? params[0] : "__default__";
2838
+ const map = _correctionsCache.get(columnName);
2839
+ if (!map || map.size === 0) return [...values];
2840
+ return values.map((v) => {
2841
+ if (v === null || typeof v !== "string") return v;
2842
+ const trimmed = v.trim();
2843
+ return map.get(trimmed) ?? v;
2844
+ });
2845
+ }
2846
+ registerTransform(
2847
+ {
2848
+ name: "category_llm_correct",
2849
+ inputTypes: ["string"],
2850
+ autoApply: false,
2851
+ priority: 34,
2852
+ mode: "series"
2853
+ },
2854
+ categoryLlmCorrect
2855
+ );
2856
+
2857
+ // src/core/notebook.ts
2858
+ function transformResultToHtml(result) {
2859
+ const rows = result.rows.length;
2860
+ const cols = result.columns.length;
2861
+ const transforms = result.manifest.records.length;
2862
+ const errors = result.manifest.errors.length;
2863
+ let html = `<div style="font-family: monospace; padding: 10px; border: 1px solid #ddd; border-radius: 5px;">
2864
+ <h3 style="margin: 0 0 10px 0;">GoldenFlow TransformResult</h3>
2865
+ <table style="border-collapse: collapse; width: 100%;">
2866
+ <tr><td style="padding: 4px 8px; font-weight: bold;">Rows</td><td>${rows.toLocaleString()}</td></tr>
2867
+ <tr><td style="padding: 4px 8px; font-weight: bold;">Columns</td><td>${cols}</td></tr>
2868
+ <tr><td style="padding: 4px 8px; font-weight: bold;">Transforms Applied</td><td>${transforms}</td></tr>
2869
+ <tr><td style="padding: 4px 8px; font-weight: bold;">Errors</td><td style="color: ${errors ? "red" : "green"};">${errors}</td></tr>
2870
+ </table>`;
2871
+ if (result.manifest.records.length > 0) {
2872
+ html += `<h4 style="margin: 10px 0 5px 0;">Transforms</h4>
2873
+ <table style="border-collapse: collapse; width: 100%; font-size: 0.9em;">
2874
+ <tr style="background: #f5f5f5;">
2875
+ <th style="padding: 4px 8px; text-align: left;">Column</th>
2876
+ <th style="padding: 4px 8px; text-align: left;">Transform</th>
2877
+ <th style="padding: 4px 8px; text-align: left;">Affected</th>
2878
+ </tr>`;
2879
+ const shown = result.manifest.records.slice(0, 10);
2880
+ for (const r of shown) {
2881
+ html += `<tr>
2882
+ <td style="padding: 4px 8px;">${r.column}</td>
2883
+ <td style="padding: 4px 8px;">${r.transform}</td>
2884
+ <td style="padding: 4px 8px;">${r.affectedRows}/${r.totalRows}</td>
2885
+ </tr>`;
2886
+ }
2887
+ if (result.manifest.records.length > 10) {
2888
+ html += `<tr><td colspan="3" style="padding: 4px 8px; color: #888;">... and ${result.manifest.records.length - 10} more</td></tr>`;
2889
+ }
2890
+ html += "</table>";
2891
+ }
2892
+ html += "</div>";
2893
+ return html;
2894
+ }
2895
+ function manifestToHtml(manifest) {
2896
+ let html = `<div style="font-family: monospace; padding: 10px; border: 1px solid #ddd; border-radius: 5px;">
2897
+ <h3>GoldenFlow Manifest</h3>
2898
+ <p>Source: ${manifest.source} | Transforms: ${manifest.records.length} | Errors: ${manifest.errors.length}</p>
2899
+ <table style="border-collapse: collapse; width: 100%; font-size: 0.9em;">
2900
+ <tr style="background: #f5f5f5;">
2901
+ <th style="padding: 4px 8px; text-align: left;">Column</th>
2902
+ <th style="padding: 4px 8px; text-align: left;">Transform</th>
2903
+ <th style="padding: 4px 8px; text-align: left;">Affected</th>
2904
+ <th style="padding: 4px 8px; text-align: left;">Before</th>
2905
+ <th style="padding: 4px 8px; text-align: left;">After</th>
2906
+ </tr>`;
2907
+ for (const r of manifest.records) {
2908
+ const before = r.sampleBefore.slice(0, 2).join(", ");
2909
+ const after = r.sampleAfter.slice(0, 2).join(", ");
2910
+ html += `<tr>
2911
+ <td style="padding: 4px 8px;">${r.column}</td>
2912
+ <td style="padding: 4px 8px;">${r.transform}</td>
2913
+ <td style="padding: 4px 8px;">${r.affectedRows}/${r.totalRows}</td>
2914
+ <td style="padding: 4px 8px; color: #c00;">${before}</td>
2915
+ <td style="padding: 4px 8px; color: #0a0;">${after}</td>
2916
+ </tr>`;
2917
+ }
2918
+ html += "</table></div>";
2919
+ return html;
2920
+ }
2921
+ function profileToHtml(profile) {
2922
+ let html = `<div style="font-family: monospace; padding: 10px; border: 1px solid #ddd; border-radius: 5px;">
2923
+ <h3>GoldenFlow Profile</h3>
2924
+ <p>${profile.rowCount.toLocaleString()} rows, ${profile.columnCount} columns</p>
2925
+ <table style="border-collapse: collapse; width: 100%; font-size: 0.9em;">
2926
+ <tr style="background: #f5f5f5;">
2927
+ <th style="padding: 4px 8px; text-align: left;">Column</th>
2928
+ <th style="padding: 4px 8px; text-align: left;">Type</th>
2929
+ <th style="padding: 4px 8px; text-align: left;">Nulls</th>
2930
+ <th style="padding: 4px 8px; text-align: left;">Unique</th>
2931
+ <th style="padding: 4px 8px; text-align: left;">Sample</th>
2932
+ </tr>`;
2933
+ for (const c of profile.columns) {
2934
+ const pct = (c.nullPct * 100).toFixed(0);
2935
+ html += `<tr>
2936
+ <td style="padding: 4px 8px;">${c.name}</td>
2937
+ <td style="padding: 4px 8px;">${c.inferredType}</td>
2938
+ <td style="padding: 4px 8px;">${c.nullCount} (${pct}%)</td>
2939
+ <td style="padding: 4px 8px;">${c.uniqueCount}</td>
2940
+ <td style="padding: 4px 8px; color: #888;">${c.sampleValues.slice(0, 3).join(", ")}</td>
2941
+ </tr>`;
2942
+ }
2943
+ html += "</table></div>";
2944
+ return html;
2945
+ }
2946
+ function coerceValue(raw) {
2947
+ if (raw === "true" || raw === "True" || raw === "TRUE") return true;
2948
+ if (raw === "false" || raw === "False" || raw === "FALSE") return false;
2949
+ if (raw.length > 0 && raw === raw.trim()) {
2950
+ if (raw.length > 1 && raw[0] === "0" && raw[1] !== ".") return raw;
2951
+ const n = Number(raw);
2952
+ if (Number.isFinite(n) && raw !== "") return n;
2953
+ }
2954
+ return raw;
2955
+ }
2956
+ function parseCsv(content) {
2957
+ const lines = content.split(/\r?\n/).filter((line) => line.trim());
2958
+ if (lines.length === 0) return [];
2959
+ const headers = parseCsvLine(lines[0]);
2960
+ const rows = [];
2961
+ for (let i = 1; i < lines.length; i++) {
2962
+ const values = parseCsvLine(lines[i]);
2963
+ const row = {};
2964
+ for (let j = 0; j < headers.length; j++) {
2965
+ const raw = values[j] ?? "";
2966
+ row[headers[j]] = raw === "" ? null : coerceValue(raw);
2967
+ }
2968
+ rows.push(row);
2969
+ }
2970
+ return rows;
2971
+ }
2972
+ function parseCsvLine(line) {
2973
+ const result = [];
2974
+ let current = "";
2975
+ let inQuotes = false;
2976
+ for (let i = 0; i < line.length; i++) {
2977
+ const ch = line[i];
2978
+ if (inQuotes) {
2979
+ if (ch === '"') {
2980
+ if (i + 1 < line.length && line[i + 1] === '"') {
2981
+ current += '"';
2982
+ i++;
2983
+ } else {
2984
+ inQuotes = false;
2985
+ }
2986
+ } else {
2987
+ current += ch;
2988
+ }
2989
+ } else {
2990
+ if (ch === '"') {
2991
+ inQuotes = true;
2992
+ } else if (ch === ",") {
2993
+ result.push(current);
2994
+ current = "";
2995
+ } else {
2996
+ current += ch;
2997
+ }
2998
+ }
2999
+ }
3000
+ result.push(current);
3001
+ return result;
3002
+ }
3003
+ function rowsToCsv(rows) {
3004
+ if (rows.length === 0) return "";
3005
+ const headers = Object.keys(rows[0]);
3006
+ const lines = [headers.join(",")];
3007
+ for (const row of rows) {
3008
+ const values = headers.map((h) => {
3009
+ const v = row[h];
3010
+ if (v === null || v === void 0) return "";
3011
+ const s = String(v);
3012
+ if (s.includes(",") || s.includes('"') || s.includes("\n")) {
3013
+ return `"${s.replace(/"/g, '""')}"`;
3014
+ }
3015
+ return s;
3016
+ });
3017
+ lines.push(values.join(","));
3018
+ }
3019
+ return lines.join("\n") + "\n";
3020
+ }
3021
+ function readFile(path$1) {
3022
+ const ext = path.extname(path$1).toLowerCase();
3023
+ if (ext === ".json") {
3024
+ const content2 = fs.readFileSync(path$1, "utf-8");
3025
+ return JSON.parse(content2);
3026
+ }
3027
+ if (ext !== ".csv") {
3028
+ throw new Error(`Unsupported file format: ${ext}. Supported: .csv, .json`);
3029
+ }
3030
+ const content = fs.readFileSync(path$1, "utf-8");
3031
+ return parseCsv(content);
3032
+ }
3033
+ function writeFile(rows, path$1) {
3034
+ const ext = path.extname(path$1).toLowerCase();
3035
+ const dir = path.dirname(path$1);
3036
+ fs.mkdirSync(dir, { recursive: true });
3037
+ if (ext === ".json") {
3038
+ fs.writeFileSync(path$1, JSON.stringify(rows, null, 2));
3039
+ return;
3040
+ }
3041
+ if (ext !== ".csv") {
3042
+ throw new Error(`Unsupported file format: ${ext}. Supported: .csv, .json`);
3043
+ }
3044
+ fs.writeFileSync(path$1, rowsToCsv(rows));
3045
+ }
3046
+ function historyDir() {
3047
+ return path.join(os.homedir(), ".goldenflow", "history");
3048
+ }
3049
+ function saveRun(record) {
3050
+ const dir = historyDir();
3051
+ fs.mkdirSync(dir, { recursive: true });
3052
+ const filePath = path.join(dir, `${record.runId}.json`);
3053
+ fs.writeFileSync(filePath, JSON.stringify(record, null, 2));
3054
+ return filePath;
3055
+ }
3056
+ function listRuns(limit = 20) {
3057
+ const dir = historyDir();
3058
+ if (!fs.existsSync(dir)) return [];
3059
+ const files = fs.readdirSync(dir).filter((name) => name.endsWith(".json")).map((name) => ({ name, mtime: fs.statSync(path.join(dir, name)).mtimeMs })).sort((a, b) => b.mtime - a.mtime).slice(0, limit);
3060
+ const runs = [];
3061
+ for (const file of files) {
3062
+ try {
3063
+ const content = fs.readFileSync(path.join(dir, file.name), "utf-8");
3064
+ runs.push(JSON.parse(content));
3065
+ } catch (e) {
3066
+ console.warn(`[goldenflow:history] Skipping corrupt history file ${file.name}: ${e instanceof Error ? e.message : String(e)}`);
3067
+ }
3068
+ }
3069
+ return runs;
3070
+ }
3071
+ function getRun(runId) {
3072
+ const filePath = path.join(historyDir(), `${runId}.json`);
3073
+ if (!fs.existsSync(filePath)) return null;
3074
+ return JSON.parse(fs.readFileSync(filePath, "utf-8"));
3075
+ }
3076
+ function generateRunId() {
3077
+ const now = /* @__PURE__ */ new Date();
3078
+ const ts = now.toISOString().replace(/[-:T]/g, "").slice(0, 15);
3079
+ const suffix = String(Date.now() % 1e4).padStart(4, "0");
3080
+ return `${ts}_${suffix}`;
3081
+ }
3082
+ function sanitizePath(raw) {
3083
+ const resolved = path.isAbsolute(raw) ? path.resolve(raw) : path.resolve(process.cwd(), raw);
3084
+ const cwd = path.resolve(process.cwd());
3085
+ if (!resolved.startsWith(cwd)) {
3086
+ throw new Error(`Path '${raw}' is outside the working directory`);
3087
+ }
3088
+ return resolved;
3089
+ }
3090
+ var TOOL_DEFINITIONS = [
3091
+ {
3092
+ name: "transform",
3093
+ description: "Transform a data file. Reads a CSV, applies transforms (auto or from config), and returns the manifest.",
3094
+ inputSchema: {
3095
+ type: "object",
3096
+ properties: {
3097
+ path: { type: "string", description: "Path to the data file (CSV)." },
3098
+ config: { type: "string", description: "Optional path to a YAML config file." }
3099
+ },
3100
+ required: ["path"]
3101
+ }
3102
+ },
3103
+ {
3104
+ name: "map",
3105
+ description: "Auto-map schemas between two data files. Returns column mappings with confidence scores.",
3106
+ inputSchema: {
3107
+ type: "object",
3108
+ properties: {
3109
+ source: { type: "string", description: "Path to the source data file." },
3110
+ target: { type: "string", description: "Path to the target data file." }
3111
+ },
3112
+ required: ["source", "target"]
3113
+ }
3114
+ },
3115
+ {
3116
+ name: "profile",
3117
+ description: "Profile a data file. Returns column types, stats, and quality indicators.",
3118
+ inputSchema: {
3119
+ type: "object",
3120
+ properties: {
3121
+ path: { type: "string", description: "Path to the data file (CSV)." }
3122
+ },
3123
+ required: ["path"]
3124
+ }
3125
+ },
3126
+ {
3127
+ name: "learn",
3128
+ description: "Generate a YAML config from data patterns. Analyzes the file and returns a recommended config.",
3129
+ inputSchema: {
3130
+ type: "object",
3131
+ properties: {
3132
+ path: { type: "string", description: "Path to the data file (CSV)." }
3133
+ },
3134
+ required: ["path"]
3135
+ }
3136
+ },
3137
+ {
3138
+ name: "diff",
3139
+ description: "Compare two data files and report differences (added/removed/changed columns and rows).",
3140
+ inputSchema: {
3141
+ type: "object",
3142
+ properties: {
3143
+ path_before: { type: "string", description: "Path to the before file." },
3144
+ path_after: { type: "string", description: "Path to the after file." }
3145
+ },
3146
+ required: ["path_before", "path_after"]
3147
+ }
3148
+ },
3149
+ {
3150
+ name: "validate",
3151
+ description: "Dry-run transform. Shows what transforms would be applied without writing output.",
3152
+ inputSchema: {
3153
+ type: "object",
3154
+ properties: {
3155
+ path: { type: "string", description: "Path to the data file (CSV)." },
3156
+ config: { type: "string", description: "Optional path to a YAML config file." }
3157
+ },
3158
+ required: ["path"]
3159
+ }
3160
+ },
3161
+ {
3162
+ name: "list_transforms",
3163
+ description: "List all available transforms with their metadata (name, input types, mode, priority).",
3164
+ inputSchema: {
3165
+ type: "object",
3166
+ properties: {}
3167
+ }
3168
+ },
3169
+ {
3170
+ name: "explain_transform",
3171
+ description: "Describe a specific transform by name. Returns its metadata or an error if not found.",
3172
+ inputSchema: {
3173
+ type: "object",
3174
+ properties: {
3175
+ transform_name: { type: "string", description: "Name of the transform to describe." }
3176
+ },
3177
+ required: ["transform_name"]
3178
+ }
3179
+ },
3180
+ {
3181
+ name: "list_domains",
3182
+ description: "List all available domain packs (e.g. people_hr, healthcare, finance).",
3183
+ inputSchema: {
3184
+ type: "object",
3185
+ properties: {}
3186
+ }
3187
+ },
3188
+ {
3189
+ name: "select_from_findings",
3190
+ description: "Map GoldenCheck findings to recommended GoldenFlow transforms.",
3191
+ inputSchema: {
3192
+ type: "object",
3193
+ properties: {
3194
+ findings: {
3195
+ type: "array",
3196
+ items: { type: "object" },
3197
+ description: "Array of GoldenCheck finding objects (each with 'check' and 'column' keys)."
3198
+ }
3199
+ },
3200
+ required: ["findings"]
3201
+ }
3202
+ }
3203
+ ];
3204
+ function handleTool(name, arguments_) {
3205
+ try {
3206
+ return _handleToolInner(name, arguments_);
3207
+ } catch (e) {
3208
+ return JSON.stringify({ error: e instanceof Error ? e.message : String(e) });
3209
+ }
3210
+ }
3211
+ function _handleToolInner(name, arguments_) {
3212
+ switch (name) {
3213
+ case "transform": {
3214
+ const path = sanitizePath(String(arguments_["path"]));
3215
+ const rows = readFile(path);
3216
+ let engine;
3217
+ if (arguments_["config"]) {
3218
+ const configPath = sanitizePath(String(arguments_["config"]));
3219
+ const configContent = fs.readFileSync(configPath, "utf-8");
3220
+ const config = loadConfigFromString(configContent);
3221
+ engine = new TransformEngine(config);
3222
+ } else {
3223
+ engine = new TransformEngine();
3224
+ }
3225
+ const result = engine.transformDf(rows, path);
3226
+ return JSON.stringify({
3227
+ rows: result.rows.length,
3228
+ transforms_applied: result.manifest.records.length,
3229
+ manifest: result.manifest
3230
+ });
3231
+ }
3232
+ case "map": {
3233
+ const sourceRows = readFile(sanitizePath(String(arguments_["source"])));
3234
+ const targetRows = readFile(sanitizePath(String(arguments_["target"])));
3235
+ const mapper = new SchemaMapper();
3236
+ const mappings = mapper.map(sourceRows, targetRows);
3237
+ return JSON.stringify({
3238
+ mappings: mappings.map((m) => ({
3239
+ source: m.source,
3240
+ target: m.target,
3241
+ confidence: m.confidence
3242
+ }))
3243
+ });
3244
+ }
3245
+ case "profile": {
3246
+ const profPath = sanitizePath(String(arguments_["path"]));
3247
+ const rows = readFile(profPath);
3248
+ const profile = profileDataframe(rows, profPath);
3249
+ return JSON.stringify({
3250
+ source: profile.filePath,
3251
+ row_count: profile.rowCount,
3252
+ columns: profile.columns.map((c) => ({
3253
+ name: c.name,
3254
+ type: c.inferredType,
3255
+ null_count: c.nullCount,
3256
+ unique_count: c.uniqueCount,
3257
+ sample_values: c.sampleValues
3258
+ }))
3259
+ });
3260
+ }
3261
+ case "learn": {
3262
+ const learnPath = sanitizePath(String(arguments_["path"]));
3263
+ const rows = readFile(learnPath);
3264
+ const config = learnConfig(rows, learnPath);
3265
+ return JSON.stringify(config);
3266
+ }
3267
+ case "diff": {
3268
+ const beforeRows = readFile(sanitizePath(String(arguments_["path_before"])));
3269
+ const afterRows = readFile(sanitizePath(String(arguments_["path_after"])));
3270
+ const result = diffDataframes(beforeRows, afterRows);
3271
+ return JSON.stringify(result);
3272
+ }
3273
+ case "validate": {
3274
+ const valPath = sanitizePath(String(arguments_["path"]));
3275
+ const rows = readFile(valPath);
3276
+ let engine;
3277
+ if (arguments_["config"]) {
3278
+ const configPath = sanitizePath(String(arguments_["config"]));
3279
+ const configContent = fs.readFileSync(configPath, "utf-8");
3280
+ const config = loadConfigFromString(configContent);
3281
+ engine = new TransformEngine(config);
3282
+ } else {
3283
+ engine = new TransformEngine();
3284
+ }
3285
+ const result = engine.transformDf(rows, valPath);
3286
+ return JSON.stringify({
3287
+ mode: "would_apply",
3288
+ rows: result.rows.length,
3289
+ transforms_applied: result.manifest.records.length,
3290
+ manifest: result.manifest
3291
+ });
3292
+ }
3293
+ case "list_transforms": {
3294
+ const transforms = listTransforms();
3295
+ return JSON.stringify(
3296
+ transforms.map((t) => ({
3297
+ name: t.name,
3298
+ input_types: t.inputTypes,
3299
+ auto_apply: t.autoApply,
3300
+ priority: t.priority,
3301
+ mode: t.mode
3302
+ }))
3303
+ );
3304
+ }
3305
+ case "explain_transform": {
3306
+ const transformName = String(arguments_["transform_name"]);
3307
+ const info = getTransform(transformName);
3308
+ if (!info) {
3309
+ return JSON.stringify({ error: `Transform '${transformName}' not found.` });
3310
+ }
3311
+ return JSON.stringify({
3312
+ name: info.name,
3313
+ input_types: info.inputTypes,
3314
+ auto_apply: info.autoApply,
3315
+ priority: info.priority,
3316
+ mode: info.mode
3317
+ });
3318
+ }
3319
+ case "list_domains": {
3320
+ const domains = listDomains();
3321
+ return JSON.stringify({ domains });
3322
+ }
3323
+ case "select_from_findings": {
3324
+ const findings = arguments_["findings"];
3325
+ const result = selectFromFindings(findings);
3326
+ return JSON.stringify(result);
3327
+ }
3328
+ default:
3329
+ return JSON.stringify({ error: `Unknown tool: ${name}` });
3330
+ }
3331
+ }
3332
+ init_types();
3333
+ function watchDirectory(dirPath, options = {}) {
3334
+ const interval = options.interval ?? 2;
3335
+ const cfg = options.configPath ? loadConfigFromString(fs.readFileSync(options.configPath, "utf-8")) : makeConfig();
3336
+ const engine = new TransformEngine(cfg);
3337
+ const outDir = options.outputDir ?? dirPath;
3338
+ const seen = /* @__PURE__ */ new Map();
3339
+ console.log(`Watching ${dirPath} (interval: ${interval}s)`);
3340
+ console.log("Press Ctrl+C to stop\n");
3341
+ const poll = () => {
3342
+ try {
3343
+ const files = fs.readdirSync(dirPath).filter((f) => path.extname(f).toLowerCase() === ".csv");
3344
+ for (const file of files) {
3345
+ if (file.includes("_transformed")) continue;
3346
+ const fullPath = path.join(dirPath, file);
3347
+ const mtime = fs.statSync(fullPath).mtimeMs;
3348
+ if (!seen.has(fullPath) || seen.get(fullPath) < mtime) {
3349
+ console.log(`Detected: ${file}`);
3350
+ try {
3351
+ const rows = readFile(fullPath);
3352
+ const result = engine.transformDf(rows, fullPath);
3353
+ const stem = file.replace(path.extname(file), "");
3354
+ writeFile(result.rows, path.join(outDir, `${stem}_transformed.csv`));
3355
+ console.log(` Transformed: ${result.manifest.records.length} transforms applied`);
3356
+ } catch (e) {
3357
+ console.error(` Error: ${e instanceof Error ? e.message : String(e)}`);
3358
+ }
3359
+ seen.set(fullPath, mtime);
3360
+ }
3361
+ }
3362
+ } catch (e) {
3363
+ console.error(`Watch error: ${e instanceof Error ? e.message : String(e)}`);
3364
+ }
3365
+ };
3366
+ poll();
3367
+ const timer = setInterval(poll, interval * 1e3);
3368
+ process.on("SIGINT", () => {
3369
+ clearInterval(timer);
3370
+ console.log("\nWatch stopped.");
3371
+ process.exit(0);
3372
+ });
3373
+ }
3374
+
3375
+ // src/node/schedule.ts
3376
+ init_types();
3377
+ function parseInterval(expr) {
3378
+ const s = expr.trim().toLowerCase();
3379
+ const multipliers = { s: 1, m: 60, h: 3600, d: 86400 };
3380
+ const unit = s.slice(-1);
3381
+ const mult = multipliers[unit];
3382
+ if (!mult) throw new Error(`Invalid interval: ${expr}. Use format like '5m', '1h', '30s'`);
3383
+ const num = parseFloat(s.slice(0, -1));
3384
+ if (isNaN(num)) throw new Error(`Invalid interval: ${expr}`);
3385
+ return num * mult;
3386
+ }
3387
+ function runSchedule(filePath, options = {}) {
3388
+ const intervalStr = options.interval ?? "1h";
3389
+ const seconds = parseInterval(intervalStr);
3390
+ const cfg = options.configPath ? loadConfigFromString(fs.readFileSync(options.configPath, "utf-8")) : makeConfig();
3391
+ const engine = new TransformEngine(cfg);
3392
+ const outDir = options.outputDir ?? path.dirname(filePath);
3393
+ console.log(`Scheduled: transform ${filePath} every ${intervalStr}`);
3394
+ console.log("Press Ctrl+C to stop\n");
3395
+ let runCount = 0;
3396
+ const run = () => {
3397
+ runCount++;
3398
+ const ts = (/* @__PURE__ */ new Date()).toTimeString().slice(0, 8);
3399
+ console.log(`Run #${runCount} at ${ts}`);
3400
+ try {
3401
+ const rows = readFile(filePath);
3402
+ const result = engine.transformDf(rows, filePath);
3403
+ const ext = path.extname(filePath);
3404
+ const stem = path.basename(filePath, ext);
3405
+ writeFile(result.rows, path.join(outDir, `${stem}_transformed${ext}`));
3406
+ console.log(` Done: ${result.manifest.records.length} transforms, ${result.manifest.errors.length} errors`);
3407
+ } catch (e) {
3408
+ console.error(` Error: ${e instanceof Error ? e.message : String(e)}`);
3409
+ }
3410
+ };
3411
+ run();
3412
+ const timer = setInterval(run, seconds * 1e3);
3413
+ process.on("SIGINT", () => {
3414
+ clearInterval(timer);
3415
+ console.log(`
3416
+ Stopped after ${runCount} runs.`);
3417
+ process.exit(0);
3418
+ });
3419
+ }
3420
+ init_types();
3421
+ function ask(rl, question) {
3422
+ return new Promise((resolve3) => rl.question(question, resolve3));
3423
+ }
3424
+ async function runWizard(dataPath, outputPath = "goldenflow.yaml") {
3425
+ const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
3426
+ try {
3427
+ console.log("GoldenFlow Setup Wizard\n");
3428
+ const filePath = dataPath ?? await ask(rl, "Path to your data file: ");
3429
+ console.log(`
3430
+ Profiling ${filePath}...`);
3431
+ const rows = readFile(filePath);
3432
+ const profile = profileDataframe(rows, filePath);
3433
+ console.log(`
3434
+ ${profile.rowCount} rows, ${profile.columnCount} columns
3435
+ `);
3436
+ const columnTransforms = {};
3437
+ for (const col of profile.columns) {
3438
+ const selected = selectTransforms(col);
3439
+ const names = selected.map((t) => t.name);
3440
+ columnTransforms[col.name] = names;
3441
+ console.log(` ${col.name}: ${col.inferredType} | suggested: ${names.slice(0, 3).join(", ") || "none"}`);
3442
+ }
3443
+ console.log("\nConfigure transforms per column:\n");
3444
+ const transforms = [];
3445
+ for (const [colName, suggested] of Object.entries(columnTransforms)) {
3446
+ if (suggested.length === 0) continue;
3447
+ const answer = await ask(rl, ` Apply [${suggested.join(", ")}] to ${colName}? (Y/n) `);
3448
+ if (answer.toLowerCase() !== "n") {
3449
+ transforms.push({ column: colName, ops: suggested });
3450
+ }
3451
+ }
3452
+ const config = makeConfig({ source: filePath, transforms });
3453
+ const yaml = saveConfigToString(config);
3454
+ fs.writeFileSync(outputPath, yaml);
3455
+ console.log(`
3456
+ Config saved to ${outputPath}`);
3457
+ console.log(`Run: goldenflow-js transform ${filePath} -c ${outputPath}`);
3458
+ } finally {
3459
+ rl.close();
3460
+ }
3461
+ }
3462
+ function sanitizePath2(raw) {
3463
+ const resolved = path.isAbsolute(raw) ? path.resolve(raw) : path.resolve(process.cwd(), raw);
3464
+ const cwd = path.resolve(process.cwd());
3465
+ if (!resolved.startsWith(cwd)) {
3466
+ throw new Error(`Path '${raw}' is outside the working directory`);
3467
+ }
3468
+ return resolved;
3469
+ }
3470
+ var VERSION = "0.1.0";
3471
+ function jsonResponse(res, status, data) {
3472
+ res.writeHead(status, { "Content-Type": "application/json" });
3473
+ res.end(JSON.stringify(data));
3474
+ }
3475
+ async function readBody(req) {
3476
+ const chunks = [];
3477
+ for await (const chunk of req) chunks.push(chunk);
3478
+ return Buffer.concat(chunks).toString("utf-8");
3479
+ }
3480
+ function createApp() {
3481
+ return http.createServer(async (req, res) => {
3482
+ const url = new URL(req.url ?? "/", `http://${req.headers.host}`);
3483
+ if (url.pathname === "/health" && req.method === "GET") {
3484
+ return jsonResponse(res, 200, { status: "ok", version: VERSION });
3485
+ }
3486
+ if (url.pathname === "/transforms" && req.method === "GET") {
3487
+ const transforms = listTransforms().map((t) => ({
3488
+ name: t.name,
3489
+ input_types: [...t.inputTypes],
3490
+ auto_apply: t.autoApply,
3491
+ priority: t.priority,
3492
+ mode: t.mode
3493
+ }));
3494
+ return jsonResponse(res, 200, transforms);
3495
+ }
3496
+ if (url.pathname === "/transform" && req.method === "POST") {
3497
+ try {
3498
+ const body = await readBody(req);
3499
+ let data;
3500
+ try {
3501
+ data = JSON.parse(body);
3502
+ } catch {
3503
+ return jsonResponse(res, 400, { error: "Invalid JSON" });
3504
+ }
3505
+ let rows;
3506
+ if (data.path) {
3507
+ rows = readFile(sanitizePath2(data.path));
3508
+ } else if (Array.isArray(data.rows)) {
3509
+ rows = data.rows;
3510
+ } else {
3511
+ return jsonResponse(res, 400, { error: "Provide 'path' or 'rows'" });
3512
+ }
3513
+ const engine = new TransformEngine();
3514
+ const result = engine.transformDf(rows);
3515
+ return jsonResponse(res, 200, {
3516
+ rows: result.rows,
3517
+ manifest: {
3518
+ records: result.manifest.records,
3519
+ errors: result.manifest.errors
3520
+ }
3521
+ });
3522
+ } catch (e) {
3523
+ return jsonResponse(res, 500, { error: e instanceof Error ? e.message : String(e) });
3524
+ }
3525
+ }
3526
+ jsonResponse(res, 404, { error: "Not found" });
3527
+ });
3528
+ }
3529
+ function runServer(port = 8e3, host = "0.0.0.0") {
3530
+ const app = createApp();
3531
+ app.listen(port, host, () => {
3532
+ console.log(`GoldenFlow API server running at http://${host}:${port}`);
3533
+ });
3534
+ }
3535
+
3536
+ exports.FINDING_TRANSFORM_MAP = FINDING_TRANSFORM_MAP;
3537
+ exports.SchemaMapper = SchemaMapper;
3538
+ exports.StreamProcessor = StreamProcessor;
3539
+ exports.TOOL_DEFINITIONS = TOOL_DEFINITIONS;
3540
+ exports.TabularData = TabularData;
3541
+ exports.TransformEngine = TransformEngine;
3542
+ exports.applyLlmCorrections = applyLlmCorrections;
3543
+ exports.createApiApp = createApp;
3544
+ exports.diffDataframes = diffDataframes;
3545
+ exports.generateRunId = generateRunId;
3546
+ exports.getRun = getRun;
3547
+ exports.getTransform = getTransform;
3548
+ exports.handleTool = handleTool;
3549
+ exports.isNullish = isNullish;
3550
+ exports.learnConfig = learnConfig;
3551
+ exports.listDomains = listDomains;
3552
+ exports.listRuns = listRuns;
3553
+ exports.listTransforms = listTransforms;
3554
+ exports.loadConfigFromString = loadConfigFromString;
3555
+ exports.loadDomain = loadDomain;
3556
+ exports.makeColumnProfile = makeColumnProfile;
3557
+ exports.makeConfig = makeConfig;
3558
+ exports.makeManifest = makeManifest;
3559
+ exports.makeTransformRecord = makeTransformRecord;
3560
+ exports.manifestToHtml = manifestToHtml;
3561
+ exports.manifestToJson = manifestToJson;
3562
+ exports.mergeConfigs = mergeConfigs;
3563
+ exports.nameSimilarity = nameSimilarity;
3564
+ exports.parseTransformName = parseTransformName;
3565
+ exports.prepareLlmCorrections = prepareLlmCorrections;
3566
+ exports.printDiff = printDiff;
3567
+ exports.printManifest = printManifest;
3568
+ exports.printProfile = printProfile;
3569
+ exports.profileDataframe = profileDataframe;
3570
+ exports.profileSimilarity = profileSimilarity;
3571
+ exports.profileToHtml = profileToHtml;
3572
+ exports.readFile = readFile;
3573
+ exports.registerTransform = registerTransform;
3574
+ exports.registry = registry;
3575
+ exports.runApiServer = runServer;
3576
+ exports.runSchedule = runSchedule;
3577
+ exports.runWizard = runWizard;
3578
+ exports.saveConfigToString = saveConfigToString;
3579
+ exports.saveRun = saveRun;
3580
+ exports.selectFromFindings = selectFromFindings;
3581
+ exports.selectTransforms = selectTransforms;
3582
+ exports.toColumnValue = toColumnValue;
3583
+ exports.transformResultToHtml = transformResultToHtml;
3584
+ exports.validateConfig = validateConfig;
3585
+ exports.watchDirectory = watchDirectory;
3586
+ exports.writeFile = writeFile;
3587
+ //# sourceMappingURL=index.cjs.map
3588
+ //# sourceMappingURL=index.cjs.map