goldenflow 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2941 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __getOwnPropNames = Object.getOwnPropertyNames;
3
+ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
4
+ get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
5
+ }) : x)(function(x) {
6
+ if (typeof require !== "undefined") return require.apply(this, arguments);
7
+ throw Error('Dynamic require of "' + x + '" is not supported');
8
+ });
9
+ var __esm = (fn, res) => function __init() {
10
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
11
+ };
12
+ var __export = (target, all) => {
13
+ for (var name in all)
14
+ __defProp(target, name, { get: all[name], enumerable: true });
15
+ };
16
+
17
+ // src/core/types.ts
18
+ function makeTransformRecord(input) {
19
+ return {
20
+ sampleBefore: [],
21
+ sampleAfter: [],
22
+ ...input
23
+ };
24
+ }
25
+ function makeManifest(source) {
26
+ return new MutableManifest(source);
27
+ }
28
+ function makeColumnProfile(input) {
29
+ return {
30
+ sampleValues: [],
31
+ detectedFormat: null,
32
+ ...input
33
+ };
34
+ }
35
+ function makeConfig(input) {
36
+ return {
37
+ source: null,
38
+ output: null,
39
+ transforms: [],
40
+ splits: [],
41
+ renames: {},
42
+ drop: [],
43
+ filters: [],
44
+ dedup: null,
45
+ mappings: [],
46
+ ...input
47
+ };
48
+ }
49
+ var MutableManifest;
50
+ var init_types = __esm({
51
+ "src/core/types.ts"() {
52
+ MutableManifest = class {
53
+ source;
54
+ records = [];
55
+ errors = [];
56
+ createdAt;
57
+ constructor(source) {
58
+ this.source = source;
59
+ this.createdAt = (/* @__PURE__ */ new Date()).toISOString();
60
+ }
61
+ addRecord(record) {
62
+ this.records.push(record);
63
+ }
64
+ addError(column, transform, row, error) {
65
+ this.errors.push({ column, transform, row, error });
66
+ }
67
+ toDict() {
68
+ return {
69
+ source: this.source,
70
+ created_at: this.createdAt,
71
+ records: this.records.map((r) => ({
72
+ column: r.column,
73
+ transform: r.transform,
74
+ affected_rows: r.affectedRows,
75
+ total_rows: r.totalRows,
76
+ sample_before: r.sampleBefore,
77
+ sample_after: r.sampleAfter
78
+ })),
79
+ errors: this.errors.map((e) => ({
80
+ column: e.column,
81
+ transform: e.transform,
82
+ row: e.row,
83
+ error: e.error
84
+ })),
85
+ summary: {
86
+ total_transforms: this.records.length,
87
+ total_errors: this.errors.length,
88
+ columns_affected: [...new Set(this.records.map((r) => r.column))]
89
+ }
90
+ };
91
+ }
92
+ };
93
+ }
94
+ });
95
+
96
+ // src/core/transforms/registry.ts
97
+ function registerTransform(opts, func) {
98
+ _REGISTRY.set(opts.name, {
99
+ name: opts.name,
100
+ func,
101
+ inputTypes: opts.inputTypes,
102
+ autoApply: opts.autoApply ?? false,
103
+ priority: opts.priority ?? 50,
104
+ mode: opts.mode ?? "series"
105
+ });
106
+ }
107
+ function getTransform(name) {
108
+ return _REGISTRY.get(name);
109
+ }
110
+ function listTransforms() {
111
+ return [..._REGISTRY.values()].sort((a, b) => b.priority - a.priority);
112
+ }
113
+ function parseTransformName(raw) {
114
+ const parts = raw.split(":");
115
+ return [parts[0], parts.slice(1)];
116
+ }
117
+ function registry() {
118
+ return _REGISTRY;
119
+ }
120
+ var _REGISTRY;
121
+ var init_registry = __esm({
122
+ "src/core/transforms/registry.ts"() {
123
+ _REGISTRY = /* @__PURE__ */ new Map();
124
+ }
125
+ });
126
+
127
+ // src/core/domains/people-hr.ts
128
+ var people_hr_exports = {};
129
+ __export(people_hr_exports, {
130
+ PACK: () => PACK
131
+ });
132
+ function ssnValidate(values) {
133
+ return values.map((v) => {
134
+ if (v === null || typeof v !== "string") return v;
135
+ const m = v.trim().match(SSN_RE);
136
+ if (!m) return false;
137
+ if (m[1] === "000" || m[2] === "00" || m[3] === "0000") return false;
138
+ return true;
139
+ });
140
+ }
141
+ var SSN_RE, PACK;
142
+ var init_people_hr = __esm({
143
+ "src/core/domains/people-hr.ts"() {
144
+ init_types();
145
+ init_registry();
146
+ SSN_RE = /^(\d{3})-?(\d{2})-?(\d{4})$/;
147
+ registerTransform(
148
+ { name: "ssn_validate", inputTypes: ["ssn", "string"], priority: 55, mode: "series" },
149
+ ssnValidate
150
+ );
151
+ PACK = {
152
+ name: "people_hr",
153
+ description: "Name parsing, SSN formatting, employment dates, gender/boolean standardization",
154
+ transforms: [
155
+ "split_name",
156
+ "split_name_reverse",
157
+ "strip_titles",
158
+ "strip_suffixes",
159
+ "name_proper",
160
+ "ssn_mask",
161
+ "ssn_validate",
162
+ "date_iso8601",
163
+ "gender_standardize",
164
+ "boolean_normalize"
165
+ ],
166
+ defaultConfig: makeConfig({
167
+ transforms: [
168
+ { column: "name", ops: ["strip", "strip_titles", "title_case"] },
169
+ { column: "ssn", ops: ["ssn_validate"] },
170
+ { column: "gender", ops: ["gender_standardize"] },
171
+ { column: "hire_date", ops: ["date_iso8601"] },
172
+ { column: "active", ops: ["boolean_normalize"] }
173
+ ]
174
+ })
175
+ };
176
+ }
177
+ });
178
+
179
+ // src/core/domains/healthcare.ts
180
+ var healthcare_exports = {};
181
+ __export(healthcare_exports, {
182
+ PACK: () => PACK2
183
+ });
184
+ function npiValidate(values) {
185
+ return values.map((v) => {
186
+ if (v === null || typeof v !== "string") return v;
187
+ const digits = v.replace(/\D/g, "");
188
+ if (digits.length !== 10) return false;
189
+ const full = "80840" + digits;
190
+ let total = 0;
191
+ for (let i = full.length - 1, pos = 0; i >= 0; i--, pos++) {
192
+ let n = parseInt(full[i], 10);
193
+ if (pos % 2 === 1) {
194
+ n *= 2;
195
+ if (n > 9) n -= 9;
196
+ }
197
+ total += n;
198
+ }
199
+ return total % 10 === 0;
200
+ });
201
+ }
202
+ function icd10Format(values) {
203
+ return values.map((v) => {
204
+ if (v === null || typeof v !== "string") return v;
205
+ const code = v.trim().toUpperCase().replace(/\./g, "");
206
+ return code.length > 3 ? code.slice(0, 3) + "." + code.slice(3) : code;
207
+ });
208
+ }
209
+ var PACK2;
210
+ var init_healthcare = __esm({
211
+ "src/core/domains/healthcare.ts"() {
212
+ init_types();
213
+ init_registry();
214
+ registerTransform(
215
+ { name: "npi_validate", inputTypes: ["string"], priority: 50, mode: "series" },
216
+ npiValidate
217
+ );
218
+ registerTransform(
219
+ { name: "icd10_format", inputTypes: ["string"], priority: 50, mode: "series" },
220
+ icd10Format
221
+ );
222
+ PACK2 = {
223
+ name: "healthcare",
224
+ description: "MRN normalization, ICD-10 formatting, NPI validation, date standardization",
225
+ transforms: ["npi_validate", "icd10_format", "date_iso8601", "null_standardize", "strip"],
226
+ defaultConfig: makeConfig({
227
+ transforms: [
228
+ { column: "npi", ops: ["npi_validate"] },
229
+ { column: "icd10_code", ops: ["icd10_format"] },
230
+ { column: "service_date", ops: ["date_iso8601"] },
231
+ { column: "patient_name", ops: ["strip", "title_case"] }
232
+ ]
233
+ })
234
+ };
235
+ }
236
+ });
237
+
238
+ // src/core/domains/finance.ts
239
+ var finance_exports = {};
240
+ __export(finance_exports, {
241
+ PACK: () => PACK3
242
+ });
243
+ function accountMask(values) {
244
+ return values.map((v) => {
245
+ if (v === null || typeof v !== "string") return v;
246
+ const digits = v.replace(/\D/g, "");
247
+ if (digits.length < 4) return v;
248
+ return "*".repeat(digits.length - 4) + digits.slice(-4);
249
+ });
250
+ }
251
+ function cusipFormat(values) {
252
+ return values.map((v) => {
253
+ if (v === null || typeof v !== "string") return v;
254
+ return v.trim().toUpperCase().slice(0, 9);
255
+ });
256
+ }
257
+ var PACK3;
258
+ var init_finance = __esm({
259
+ "src/core/domains/finance.ts"() {
260
+ init_types();
261
+ init_registry();
262
+ registerTransform(
263
+ { name: "account_mask", inputTypes: ["string"], priority: 50, mode: "series" },
264
+ accountMask
265
+ );
266
+ registerTransform(
267
+ { name: "cusip_format", inputTypes: ["string"], priority: 50, mode: "series" },
268
+ cusipFormat
269
+ );
270
+ PACK3 = {
271
+ name: "finance",
272
+ description: "Account masking, currency standardization, CUSIP/ISIN formatting",
273
+ transforms: ["account_mask", "cusip_format", "currency_strip", "date_iso8601"],
274
+ defaultConfig: makeConfig({
275
+ transforms: [
276
+ { column: "account_number", ops: ["account_mask"] },
277
+ { column: "amount", ops: ["currency_strip"] },
278
+ { column: "transaction_date", ops: ["date_iso8601"] }
279
+ ]
280
+ })
281
+ };
282
+ }
283
+ });
284
+
285
+ // src/core/domains/ecommerce.ts
286
+ var ecommerce_exports = {};
287
+ __export(ecommerce_exports, {
288
+ PACK: () => PACK4
289
+ });
290
+ function skuNormalize(values) {
291
+ return values.map((v) => {
292
+ if (v === null || typeof v !== "string") return v;
293
+ return v.trim().toUpperCase().replace(/[^A-Z0-9-]/g, "");
294
+ });
295
+ }
296
+ var PACK4;
297
+ var init_ecommerce = __esm({
298
+ "src/core/domains/ecommerce.ts"() {
299
+ init_types();
300
+ init_registry();
301
+ registerTransform(
302
+ { name: "sku_normalize", inputTypes: ["string"], priority: 50, mode: "series" },
303
+ skuNormalize
304
+ );
305
+ PACK4 = {
306
+ name: "ecommerce",
307
+ description: "SKU normalization, price cleaning, category standardization",
308
+ transforms: ["sku_normalize", "currency_strip", "category_auto_correct", "strip"],
309
+ defaultConfig: makeConfig({
310
+ transforms: [
311
+ { column: "sku", ops: ["sku_normalize"] },
312
+ { column: "price", ops: ["currency_strip"] },
313
+ { column: "category", ops: ["strip", "title_case"] }
314
+ ]
315
+ })
316
+ };
317
+ }
318
+ });
319
+
320
+ // src/core/domains/real-estate.ts
321
+ var real_estate_exports = {};
322
+ __export(real_estate_exports, {
323
+ PACK: () => PACK5
324
+ });
325
+ function mlsNormalize(values) {
326
+ return values.map((v) => {
327
+ if (v === null || typeof v !== "string") return v;
328
+ return v.trim().toUpperCase();
329
+ });
330
+ }
331
+ var PACK5;
332
+ var init_real_estate = __esm({
333
+ "src/core/domains/real-estate.ts"() {
334
+ init_types();
335
+ init_registry();
336
+ registerTransform(
337
+ { name: "mls_normalize", inputTypes: ["string"], priority: 50, mode: "series" },
338
+ mlsNormalize
339
+ );
340
+ PACK5 = {
341
+ name: "real_estate",
342
+ description: "Address parsing (USPS), MLS ID normalization, price cleaning",
343
+ transforms: ["mls_normalize", "address_standardize", "zip_normalize", "currency_strip"],
344
+ defaultConfig: makeConfig({
345
+ transforms: [
346
+ { column: "mls_id", ops: ["mls_normalize"] },
347
+ { column: "address", ops: ["strip", "address_standardize"] },
348
+ { column: "price", ops: ["currency_strip"] },
349
+ { column: "zip", ops: ["zip_normalize"] }
350
+ ]
351
+ })
352
+ };
353
+ }
354
+ });
355
+
356
+ // src/core/index.ts
357
+ init_types();
358
+
359
+ // src/core/data.ts
360
+ var NULL_STRINGS = /* @__PURE__ */ new Set([
361
+ "",
362
+ "null",
363
+ "none",
364
+ "nan",
365
+ "n/a",
366
+ "na",
367
+ "nil",
368
+ "#n/a",
369
+ "missing",
370
+ "undefined"
371
+ ]);
372
+ function isNullish(v) {
373
+ if (v === null || v === void 0) return true;
374
+ if (typeof v === "string") return NULL_STRINGS.has(v.toLowerCase().trim());
375
+ if (typeof v === "number") return Number.isNaN(v);
376
+ return false;
377
+ }
378
+ function toColumnValue(v) {
379
+ if (isNullish(v)) return null;
380
+ if (typeof v === "string") return v;
381
+ if (typeof v === "number") return v;
382
+ if (typeof v === "boolean") return v;
383
+ return String(v);
384
+ }
385
+ function mulberry32(seed) {
386
+ let s = seed | 0;
387
+ return () => {
388
+ s = s + 1831565813 | 0;
389
+ let t = Math.imul(s ^ s >>> 15, 1 | s);
390
+ t = t + Math.imul(t ^ t >>> 7, 61 | t) ^ t;
391
+ return ((t ^ t >>> 14) >>> 0) / 4294967296;
392
+ };
393
+ }
394
+ var TabularData = class _TabularData {
395
+ _rows;
396
+ _columnCache = /* @__PURE__ */ new Map();
397
+ constructor(rows) {
398
+ this._rows = rows;
399
+ }
400
+ get rows() {
401
+ return this._rows;
402
+ }
403
+ get columns() {
404
+ if (this._rows.length === 0) return [];
405
+ return Object.keys(this._rows[0]);
406
+ }
407
+ get rowCount() {
408
+ return this._rows.length;
409
+ }
410
+ // ---- Column access ----
411
+ column(name) {
412
+ const cached = this._columnCache.get(name);
413
+ if (cached) return cached;
414
+ const values = this._rows.map((r) => toColumnValue(r[name]));
415
+ this._columnCache.set(name, values);
416
+ return values;
417
+ }
418
+ /** Raw column access — preserves original values without null coercion.
419
+ * Use for profiling where "N/A" should remain a string, not become null. */
420
+ rawColumn(name) {
421
+ return this._rows.map((r) => {
422
+ const v = r[name];
423
+ if (v === null || v === void 0) return null;
424
+ if (typeof v === "string" || typeof v === "number" || typeof v === "boolean") return v;
425
+ return String(v);
426
+ });
427
+ }
428
+ // ---- Null handling ----
429
+ nullCount(col) {
430
+ let count = 0;
431
+ for (const v of this.column(col)) {
432
+ if (v === null) count++;
433
+ }
434
+ return count;
435
+ }
436
+ dropNulls(col) {
437
+ return this.column(col).filter((v) => v !== null);
438
+ }
439
+ // ---- Type inference ----
440
+ dtype(col) {
441
+ const values = this.dropNulls(col);
442
+ if (values.length === 0) return "null";
443
+ let hasInt = false;
444
+ let hasFloat = false;
445
+ let hasBool = false;
446
+ let hasString = false;
447
+ for (const v of values) {
448
+ if (typeof v === "boolean") {
449
+ hasBool = true;
450
+ } else if (typeof v === "number") {
451
+ if (Number.isInteger(v)) hasInt = true;
452
+ else hasFloat = true;
453
+ } else {
454
+ hasString = true;
455
+ }
456
+ }
457
+ if (hasString) return "string";
458
+ if (hasBool && !hasInt && !hasFloat) return "boolean";
459
+ if (hasFloat) return "float";
460
+ if (hasInt) return "integer";
461
+ return "string";
462
+ }
463
+ // ---- Aggregation ----
464
+ nUnique(col) {
465
+ const set = /* @__PURE__ */ new Set();
466
+ for (const v of this.dropNulls(col)) set.add(v);
467
+ return set.size;
468
+ }
469
+ valueCounts(col) {
470
+ const map = /* @__PURE__ */ new Map();
471
+ for (const v of this.dropNulls(col)) {
472
+ map.set(v, (map.get(v) ?? 0) + 1);
473
+ }
474
+ return map;
475
+ }
476
+ /** MUST use loop — Math.min(...array) crashes on >65K elements. */
477
+ min(col) {
478
+ const nums = this.numericValues(col);
479
+ if (nums.length === 0) return null;
480
+ let m = nums[0];
481
+ for (let i = 1; i < nums.length; i++) {
482
+ if (nums[i] < m) m = nums[i];
483
+ }
484
+ return m;
485
+ }
486
+ /** MUST use loop — Math.max(...array) crashes on >65K elements. */
487
+ max(col) {
488
+ const nums = this.numericValues(col);
489
+ if (nums.length === 0) return null;
490
+ let m = nums[0];
491
+ for (let i = 1; i < nums.length; i++) {
492
+ if (nums[i] > m) m = nums[i];
493
+ }
494
+ return m;
495
+ }
496
+ mean(col) {
497
+ const nums = this.numericValues(col);
498
+ if (nums.length === 0) return null;
499
+ let sum = 0;
500
+ for (const n of nums) sum += n;
501
+ return sum / nums.length;
502
+ }
503
+ std(col) {
504
+ const nums = this.numericValues(col);
505
+ if (nums.length < 2) return null;
506
+ const avg = this.mean(col);
507
+ let sumSq = 0;
508
+ for (const n of nums) sumSq += (n - avg) ** 2;
509
+ return Math.sqrt(sumSq / (nums.length - 1));
510
+ }
511
+ // ---- Filtering & sampling ----
512
+ filter(predicate) {
513
+ return new _TabularData(this._rows.filter(predicate));
514
+ }
515
+ head(n) {
516
+ return new _TabularData(this._rows.slice(0, n));
517
+ }
518
+ sample(n, seed = 42) {
519
+ if (n >= this._rows.length) return this;
520
+ const rng = mulberry32(seed);
521
+ const indices = Array.from({ length: this._rows.length }, (_, i) => i);
522
+ for (let i = indices.length - 1; i > 0 && indices.length - 1 - i < n; i--) {
523
+ const j = Math.floor(rng() * (i + 1));
524
+ [indices[i], indices[j]] = [indices[j], indices[i]];
525
+ }
526
+ const sampled = indices.slice(indices.length - n).map((i) => this._rows[i]);
527
+ return new _TabularData(sampled);
528
+ }
529
+ // ---- String operations ----
530
+ strContains(col, pattern) {
531
+ return this.column(col).map(
532
+ (v) => typeof v === "string" ? pattern.test(v) : false
533
+ );
534
+ }
535
+ strLengths(col) {
536
+ return this.column(col).map(
537
+ (v) => typeof v === "string" ? v.length : 0
538
+ );
539
+ }
540
+ // ---- Casting ----
541
+ castFloat(col) {
542
+ return this.column(col).map((v) => {
543
+ if (v === null) return null;
544
+ const n = Number(v);
545
+ return Number.isFinite(n) ? n : null;
546
+ });
547
+ }
548
+ castInt(col) {
549
+ return this.column(col).map((v) => {
550
+ if (v === null) return null;
551
+ const n = Number(v);
552
+ return Number.isFinite(n) ? Math.trunc(n) : null;
553
+ });
554
+ }
555
+ // ---- Helpers ----
556
+ numericValues(col) {
557
+ const result = [];
558
+ for (const v of this.column(col)) {
559
+ if (typeof v === "number" && Number.isFinite(v)) {
560
+ result.push(v);
561
+ }
562
+ }
563
+ return result;
564
+ }
565
+ stringValues(col) {
566
+ const result = [];
567
+ for (const v of this.column(col)) {
568
+ if (typeof v === "string") result.push(v);
569
+ }
570
+ return result;
571
+ }
572
+ sortedNumeric(col) {
573
+ return this.numericValues(col).sort((a, b) => a - b);
574
+ }
575
+ isSorted(col, descending = false) {
576
+ const nums = this.numericValues(col);
577
+ for (let i = 1; i < nums.length; i++) {
578
+ if (descending ? nums[i] > nums[i - 1] : nums[i] < nums[i - 1]) {
579
+ return false;
580
+ }
581
+ }
582
+ return true;
583
+ }
584
+ };
585
+
586
+ // src/core/transforms/text.ts
587
+ init_registry();
588
+ function mapStrings(values, fn) {
589
+ return values.map((v) => {
590
+ if (v === null || typeof v !== "string") return v;
591
+ return fn(v);
592
+ });
593
+ }
594
+ function strip(values) {
595
+ return mapStrings(values, (s) => s.trim());
596
+ }
597
+ registerTransform(
598
+ { name: "strip", inputTypes: ["string"], autoApply: true, priority: 90, mode: "expr" },
599
+ strip
600
+ );
601
+ function lowercase(values) {
602
+ return mapStrings(values, (s) => s.toLowerCase());
603
+ }
604
+ registerTransform(
605
+ { name: "lowercase", inputTypes: ["string"], priority: 50, mode: "expr" },
606
+ lowercase
607
+ );
608
+ function uppercase(values) {
609
+ return mapStrings(values, (s) => s.toUpperCase());
610
+ }
611
+ registerTransform(
612
+ { name: "uppercase", inputTypes: ["string"], priority: 50, mode: "expr" },
613
+ uppercase
614
+ );
615
+ function titleCase(values) {
616
+ return mapStrings(
617
+ values,
618
+ (s) => s.toLowerCase().replace(/\b\w/g, (ch) => ch.toUpperCase())
619
+ );
620
+ }
621
+ registerTransform(
622
+ { name: "title_case", inputTypes: ["string"], priority: 50, mode: "expr" },
623
+ titleCase
624
+ );
625
+ function normalizeUnicode(values) {
626
+ return mapStrings(
627
+ values,
628
+ (s) => s.normalize("NFKD").replace(new RegExp("\\p{M}", "gu"), "")
629
+ );
630
+ }
631
+ registerTransform(
632
+ { name: "normalize_unicode", inputTypes: ["string"], autoApply: true, priority: 85, mode: "series" },
633
+ normalizeUnicode
634
+ );
635
+ function removePunctuation(values) {
636
+ return mapStrings(values, (s) => s.replace(/[^\w\s]/g, ""));
637
+ }
638
+ registerTransform(
639
+ { name: "remove_punctuation", inputTypes: ["string"], priority: 40, mode: "series" },
640
+ removePunctuation
641
+ );
642
+ function collapseWhitespace(values) {
643
+ return mapStrings(values, (s) => s.replace(/\s+/g, " ").trim());
644
+ }
645
+ registerTransform(
646
+ { name: "collapse_whitespace", inputTypes: ["string"], autoApply: true, priority: 80, mode: "expr" },
647
+ collapseWhitespace
648
+ );
649
+ function truncate(values, n = 255) {
650
+ const maxLen = typeof n === "number" ? n : Number(n) || 255;
651
+ return mapStrings(values, (s) => s.slice(0, maxLen));
652
+ }
653
+ registerTransform(
654
+ { name: "truncate", inputTypes: ["string"], priority: 30, mode: "series" },
655
+ truncate
656
+ );
657
+ function normalizeQuotes(values) {
658
+ return mapStrings(
659
+ values,
660
+ (s) => s.replace(/[\u2018\u2019\u201A\u201B]/g, "'").replace(/[\u201C\u201D\u201E\u201F]/g, '"')
661
+ );
662
+ }
663
+ registerTransform(
664
+ { name: "normalize_quotes", inputTypes: ["string"], autoApply: true, priority: 84, mode: "series" },
665
+ normalizeQuotes
666
+ );
667
+ function removeHtmlTags(values) {
668
+ return mapStrings(values, (s) => s.replace(/<[^>]*>/g, ""));
669
+ }
670
+ registerTransform(
671
+ { name: "remove_html_tags", inputTypes: ["string"], priority: 45, mode: "series" },
672
+ removeHtmlTags
673
+ );
674
+ function removeUrls(values) {
675
+ return mapStrings(
676
+ values,
677
+ (s) => s.replace(/https?:\/\/[^\s]+/g, "").trim()
678
+ );
679
+ }
680
+ registerTransform(
681
+ { name: "remove_urls", inputTypes: ["string"], priority: 40, mode: "series" },
682
+ removeUrls
683
+ );
684
+ function removeDigits(values) {
685
+ return mapStrings(values, (s) => s.replace(/\d/g, ""));
686
+ }
687
+ registerTransform(
688
+ { name: "remove_digits", inputTypes: ["string"], priority: 35, mode: "series" },
689
+ removeDigits
690
+ );
691
+ function padLeft(values, width = 10, char = "0") {
692
+ const w = typeof width === "number" ? width : Number(width) || 10;
693
+ const c = typeof char === "string" ? char : "0";
694
+ return mapStrings(values, (s) => s.padStart(w, c));
695
+ }
696
+ registerTransform(
697
+ { name: "pad_left", inputTypes: ["string"], priority: 30, mode: "series" },
698
+ padLeft
699
+ );
700
+ function padRight(values, width = 10, char = " ") {
701
+ const w = typeof width === "number" ? width : Number(width) || 10;
702
+ const c = typeof char === "string" ? char : " ";
703
+ return mapStrings(values, (s) => s.padEnd(w, c));
704
+ }
705
+ registerTransform(
706
+ { name: "pad_right", inputTypes: ["string"], priority: 30, mode: "series" },
707
+ padRight
708
+ );
709
+ function removeEmojis(values) {
710
+ const emojiPattern = /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{FE00}-\u{FE0F}\u{1F900}-\u{1F9FF}\u{1FA00}-\u{1FA6F}\u{1FA70}-\u{1FAFF}\u{200D}\u{20E3}\u{E0020}-\u{E007F}]/gu;
711
+ return mapStrings(values, (s) => s.replace(emojiPattern, ""));
712
+ }
713
+ registerTransform(
714
+ { name: "remove_emojis", inputTypes: ["string"], priority: 38, mode: "series" },
715
+ removeEmojis
716
+ );
717
+ function fixMojibake(values) {
718
+ return mapStrings(values, (s) => {
719
+ try {
720
+ const encoder = new TextEncoder();
721
+ const bytes = new Uint8Array(s.length);
722
+ for (let i = 0; i < s.length; i++) {
723
+ const code = s.charCodeAt(i);
724
+ if (code > 255) return s;
725
+ bytes[i] = code;
726
+ }
727
+ const decoded = new TextDecoder("utf-8", { fatal: true }).decode(bytes);
728
+ return decoded;
729
+ } catch {
730
+ return s;
731
+ }
732
+ });
733
+ }
734
+ registerTransform(
735
+ { name: "fix_mojibake", inputTypes: ["string"], priority: 86, mode: "series" },
736
+ fixMojibake
737
+ );
738
+ function normalizeLineEndings(values) {
739
+ return mapStrings(values, (s) => s.replace(/\r\n/g, "\n").replace(/\r/g, "\n"));
740
+ }
741
+ registerTransform(
742
+ { name: "normalize_line_endings", inputTypes: ["string"], priority: 82, mode: "series" },
743
+ normalizeLineEndings
744
+ );
745
+ function extractNumbers(values) {
746
+ return mapStrings(values, (s) => {
747
+ const nums = s.match(/-?\d+(?:\.\d+)?/g);
748
+ return nums ? nums.join(" ") : "";
749
+ });
750
+ }
751
+ registerTransform(
752
+ { name: "extract_numbers", inputTypes: ["string"], priority: 30, mode: "series" },
753
+ extractNumbers
754
+ );
755
+
756
+ // src/core/transforms/phone.ts
757
+ init_registry();
758
+ function extractDigits(s) {
759
+ return s.replace(/\D/g, "");
760
+ }
761
+ function normalizeUsDigits(s) {
762
+ const digits = extractDigits(s);
763
+ if (digits.length === 10) return digits;
764
+ if (digits.length === 11 && digits[0] === "1") return digits.slice(1);
765
+ return null;
766
+ }
767
+ function phoneE164(values) {
768
+ return values.map((v) => {
769
+ if (v === null || typeof v !== "string") return v;
770
+ const digits = normalizeUsDigits(v);
771
+ if (digits === null) return v;
772
+ return `+1${digits}`;
773
+ });
774
+ }
775
+ registerTransform(
776
+ { name: "phone_e164", inputTypes: ["phone"], autoApply: true, priority: 50, mode: "series" },
777
+ phoneE164
778
+ );
779
+ function phoneNational(values) {
780
+ return values.map((v) => {
781
+ if (v === null || typeof v !== "string") return v;
782
+ const digits = normalizeUsDigits(v);
783
+ if (digits === null) return v;
784
+ return `(${digits.slice(0, 3)}) ${digits.slice(3, 6)}-${digits.slice(6)}`;
785
+ });
786
+ }
787
+ registerTransform(
788
+ { name: "phone_national", inputTypes: ["phone"], priority: 50, mode: "series" },
789
+ phoneNational
790
+ );
791
+ function phoneDigits(values) {
792
+ return values.map((v) => {
793
+ if (v === null || typeof v !== "string") return v;
794
+ return extractDigits(v);
795
+ });
796
+ }
797
+ registerTransform(
798
+ { name: "phone_digits", inputTypes: ["phone"], priority: 50, mode: "series" },
799
+ phoneDigits
800
+ );
801
+ function phoneValidate(values) {
802
+ return values.map((v) => {
803
+ if (v === null || typeof v !== "string") return v;
804
+ const digits = extractDigits(v);
805
+ return digits.length === 10 || digits.length === 11 && digits[0] === "1";
806
+ });
807
+ }
808
+ registerTransform(
809
+ { name: "phone_validate", inputTypes: ["phone"], priority: 60, mode: "series" },
810
+ phoneValidate
811
+ );
812
+ function phoneCountryCode(values) {
813
+ return values.map((v) => {
814
+ if (v === null || typeof v !== "string") return v;
815
+ const digits = extractDigits(v);
816
+ if (digits.length === 10) return 1;
817
+ if (digits.length === 11 && digits[0] === "1") return 1;
818
+ return null;
819
+ });
820
+ }
821
+ registerTransform(
822
+ { name: "phone_country_code", inputTypes: ["phone"], priority: 45, mode: "series" },
823
+ phoneCountryCode
824
+ );
825
+
826
+ // src/core/transforms/names.ts
827
+ init_registry();
828
+ function mapStrings2(values, fn) {
829
+ return values.map((v) => {
830
+ if (v === null || typeof v !== "string") return v;
831
+ return fn(v);
832
+ });
833
+ }
834
+ var _TITLES = /^(Mr\.?|Mrs\.?|Ms\.?|Miss\.?|Dr\.?|Prof\.?|Rev\.?|Sr\.?|Sra\.?)\s+/i;
835
+ var _SUFFIXES = /\s+(Jr\.?|Sr\.?|II|III|IV|MD|PhD|PharmD|DDS|DVM|Esq\.?|CPA|RN|DO)$/i;
836
+ var _INITIAL_PATTERN = /\b[A-Z]\.\s/;
837
+ var _MC_PATTERN = /\bMc(\w)/g;
838
+ var _O_PATTERN = /\bO'(\w)/g;
839
+ var _NICKNAMES = {
840
+ bob: "Robert",
841
+ rob: "Robert",
842
+ robby: "Robert",
843
+ robbie: "Robert",
844
+ bobby: "Robert",
845
+ bill: "William",
846
+ billy: "William",
847
+ will: "William",
848
+ willy: "William",
849
+ jim: "James",
850
+ jimmy: "James",
851
+ jamie: "James",
852
+ mike: "Michael",
853
+ mikey: "Michael",
854
+ mick: "Michael",
855
+ dick: "Richard",
856
+ rick: "Richard",
857
+ rich: "Richard",
858
+ ricky: "Richard",
859
+ tom: "Thomas",
860
+ tommy: "Thomas",
861
+ joe: "Joseph",
862
+ joey: "Joseph",
863
+ jack: "John",
864
+ johnny: "John",
865
+ jon: "Jonathan",
866
+ dave: "David",
867
+ davy: "David",
868
+ steve: "Steven",
869
+ stevie: "Steven",
870
+ dan: "Daniel",
871
+ danny: "Daniel",
872
+ pat: "Patrick",
873
+ patty: "Patricia",
874
+ patsy: "Patricia",
875
+ chris: "Christopher",
876
+ kit: "Christopher",
877
+ tony: "Anthony",
878
+ ed: "Edward",
879
+ eddie: "Edward",
880
+ ted: "Edward",
881
+ teddy: "Edward",
882
+ al: "Albert",
883
+ bert: "Albert",
884
+ charlie: "Charles",
885
+ chuck: "Charles",
886
+ sam: "Samuel",
887
+ sammy: "Samuel",
888
+ ben: "Benjamin",
889
+ benny: "Benjamin",
890
+ matt: "Matthew",
891
+ andy: "Andrew",
892
+ drew: "Andrew",
893
+ nick: "Nicholas",
894
+ alex: "Alexander",
895
+ liz: "Elizabeth",
896
+ beth: "Elizabeth",
897
+ betty: "Elizabeth",
898
+ kate: "Katherine",
899
+ kathy: "Katherine",
900
+ katie: "Katherine",
901
+ sue: "Susan",
902
+ susie: "Susan",
903
+ meg: "Margaret",
904
+ maggie: "Margaret",
905
+ peggy: "Margaret",
906
+ jenny: "Jennifer",
907
+ jen: "Jennifer",
908
+ debbie: "Deborah",
909
+ deb: "Deborah",
910
+ barb: "Barbara",
911
+ cindy: "Cynthia",
912
+ sandy: "Sandra"
913
+ };
914
+ function splitName(rows, column) {
915
+ return rows.map((row) => {
916
+ const val = row[column];
917
+ if (val === null || val === void 0 || typeof val !== "string") {
918
+ return { ...row, first_name: null, last_name: null };
919
+ }
920
+ const trimmed = val.trim();
921
+ const lastSpace = trimmed.lastIndexOf(" ");
922
+ if (lastSpace === -1) {
923
+ return { ...row, first_name: trimmed, last_name: "" };
924
+ }
925
+ return {
926
+ ...row,
927
+ first_name: trimmed.slice(0, lastSpace),
928
+ last_name: trimmed.slice(lastSpace + 1)
929
+ };
930
+ });
931
+ }
932
+ registerTransform(
933
+ { name: "split_name", inputTypes: ["name"], priority: 50, mode: "dataframe" },
934
+ splitName
935
+ );
936
+ function splitNameReverse(rows, column) {
937
+ return rows.map((row) => {
938
+ const val = row[column];
939
+ if (val === null || val === void 0 || typeof val !== "string") {
940
+ return { ...row, first_name: null, last_name: null };
941
+ }
942
+ const commaIdx = val.indexOf(",");
943
+ if (commaIdx === -1) {
944
+ return { ...row, first_name: val.trim(), last_name: "" };
945
+ }
946
+ return {
947
+ ...row,
948
+ last_name: val.slice(0, commaIdx).trim(),
949
+ first_name: val.slice(commaIdx + 1).trim()
950
+ };
951
+ });
952
+ }
953
+ registerTransform(
954
+ { name: "split_name_reverse", inputTypes: ["name"], priority: 50, mode: "dataframe" },
955
+ splitNameReverse
956
+ );
957
+ function stripTitles(values) {
958
+ return mapStrings2(values, (s) => s.replace(_TITLES, "").trim());
959
+ }
960
+ registerTransform(
961
+ { name: "strip_titles", inputTypes: ["name"], autoApply: true, priority: 70, mode: "series" },
962
+ stripTitles
963
+ );
964
+ function stripSuffixes(values) {
965
+ return mapStrings2(values, (s) => s.replace(_SUFFIXES, "").trim());
966
+ }
967
+ registerTransform(
968
+ { name: "strip_suffixes", inputTypes: ["name"], priority: 60, mode: "series" },
969
+ stripSuffixes
970
+ );
971
+ function nameProper(values) {
972
+ return mapStrings2(values, (s) => {
973
+ let result = s.toLowerCase().replace(/\b\w/g, (ch) => ch.toUpperCase());
974
+ result = result.replace(_MC_PATTERN, (_match, letter) => `Mc${letter.toUpperCase()}`);
975
+ result = result.replace(_O_PATTERN, (_match, letter) => `O'${letter.toUpperCase()}`);
976
+ return result;
977
+ });
978
+ }
979
+ registerTransform(
980
+ { name: "name_proper", inputTypes: ["name"], priority: 45, mode: "series" },
981
+ nameProper
982
+ );
983
+ function initialExpand(values) {
984
+ const flagged = [];
985
+ const result = values.map((v, i) => {
986
+ if (v !== null && typeof v === "string" && _INITIAL_PATTERN.test(v)) {
987
+ flagged.push(i);
988
+ }
989
+ return v === void 0 ? null : v;
990
+ });
991
+ return [result, flagged];
992
+ }
993
+ registerTransform(
994
+ { name: "initial_expand", inputTypes: ["name"], priority: 40, mode: "series" },
995
+ initialExpand
996
+ );
997
+ function nicknameStandardize(values) {
998
+ return mapStrings2(values, (s) => {
999
+ const lookup = s.trim().toLowerCase();
1000
+ return _NICKNAMES[lookup] ?? s;
1001
+ });
1002
+ }
1003
+ registerTransform(
1004
+ { name: "nickname_standardize", inputTypes: ["name"], priority: 42, mode: "series" },
1005
+ nicknameStandardize
1006
+ );
1007
+ function mergeName(rows, column, lastNameCol = "last_name") {
1008
+ const lnCol = typeof lastNameCol === "string" ? lastNameCol : "last_name";
1009
+ if (rows.length > 0 && !(lnCol in rows[0])) {
1010
+ return rows.map((r) => ({ ...r }));
1011
+ }
1012
+ return rows.map((row) => {
1013
+ const first = row[column];
1014
+ const last = row[lnCol];
1015
+ const parts = [];
1016
+ if (first !== null && first !== void 0) {
1017
+ const s = String(first).trim();
1018
+ if (s) parts.push(s);
1019
+ }
1020
+ if (last !== null && last !== void 0) {
1021
+ const s = String(last).trim();
1022
+ if (s) parts.push(s);
1023
+ }
1024
+ return { ...row, full_name: parts.length > 0 ? parts.join(" ") : null };
1025
+ });
1026
+ }
1027
+ registerTransform(
1028
+ { name: "merge_name", inputTypes: ["name"], priority: 45, mode: "dataframe" },
1029
+ mergeName
1030
+ );
1031
+
1032
+ // src/core/transforms/address.ts
1033
+ init_registry();
1034
+ function mapStrings3(values, fn) {
1035
+ return values.map((v) => {
1036
+ if (v === null || typeof v !== "string") return v;
1037
+ return fn(v);
1038
+ });
1039
+ }
1040
+ var _STREET_ABBREV = {
1041
+ Street: "St",
1042
+ Avenue: "Ave",
1043
+ Boulevard: "Blvd",
1044
+ Drive: "Dr",
1045
+ Lane: "Ln",
1046
+ Road: "Rd",
1047
+ Court: "Ct",
1048
+ Place: "Pl",
1049
+ Circle: "Cir",
1050
+ Trail: "Trl",
1051
+ Way: "Way",
1052
+ Parkway: "Pkwy",
1053
+ Highway: "Hwy",
1054
+ Terrace: "Ter",
1055
+ Square: "Sq"
1056
+ };
1057
+ var _STREET_EXPAND = {};
1058
+ for (const [full, abbr] of Object.entries(_STREET_ABBREV)) {
1059
+ _STREET_EXPAND[abbr] = full;
1060
+ }
1061
+ var _STATES = {
1062
+ Alabama: "AL",
1063
+ Alaska: "AK",
1064
+ Arizona: "AZ",
1065
+ Arkansas: "AR",
1066
+ California: "CA",
1067
+ Colorado: "CO",
1068
+ Connecticut: "CT",
1069
+ Delaware: "DE",
1070
+ Florida: "FL",
1071
+ Georgia: "GA",
1072
+ Hawaii: "HI",
1073
+ Idaho: "ID",
1074
+ Illinois: "IL",
1075
+ Indiana: "IN",
1076
+ Iowa: "IA",
1077
+ Kansas: "KS",
1078
+ Kentucky: "KY",
1079
+ Louisiana: "LA",
1080
+ Maine: "ME",
1081
+ Maryland: "MD",
1082
+ Massachusetts: "MA",
1083
+ Michigan: "MI",
1084
+ Minnesota: "MN",
1085
+ Mississippi: "MS",
1086
+ Missouri: "MO",
1087
+ Montana: "MT",
1088
+ Nebraska: "NE",
1089
+ Nevada: "NV",
1090
+ "New Hampshire": "NH",
1091
+ "New Jersey": "NJ",
1092
+ "New Mexico": "NM",
1093
+ "New York": "NY",
1094
+ "North Carolina": "NC",
1095
+ "North Dakota": "ND",
1096
+ Ohio: "OH",
1097
+ Oklahoma: "OK",
1098
+ Oregon: "OR",
1099
+ Pennsylvania: "PA",
1100
+ "Rhode Island": "RI",
1101
+ "South Carolina": "SC",
1102
+ "South Dakota": "SD",
1103
+ Tennessee: "TN",
1104
+ Texas: "TX",
1105
+ Utah: "UT",
1106
+ Vermont: "VT",
1107
+ Virginia: "VA",
1108
+ Washington: "WA",
1109
+ "West Virginia": "WV",
1110
+ Wisconsin: "WI",
1111
+ Wyoming: "WY",
1112
+ "District Of Columbia": "DC"
1113
+ };
1114
+ var _STATES_REVERSE = {};
1115
+ for (const [name, abbr] of Object.entries(_STATES)) {
1116
+ _STATES_REVERSE[abbr] = name;
1117
+ }
1118
+ var _STATES_LOWER = {};
1119
+ for (const [name, abbr] of Object.entries(_STATES)) {
1120
+ _STATES_LOWER[name.toLowerCase()] = abbr;
1121
+ }
1122
+ var _COUNTRIES = {
1123
+ "united states": "US",
1124
+ "united states of america": "US",
1125
+ usa: "US",
1126
+ us: "US",
1127
+ "u.s.a.": "US",
1128
+ "u.s.": "US",
1129
+ america: "US",
1130
+ "united kingdom": "GB",
1131
+ uk: "GB",
1132
+ "great britain": "GB",
1133
+ england: "GB",
1134
+ scotland: "GB",
1135
+ wales: "GB",
1136
+ "northern ireland": "GB",
1137
+ canada: "CA",
1138
+ ca: "CA",
1139
+ australia: "AU",
1140
+ au: "AU",
1141
+ germany: "DE",
1142
+ deutschland: "DE",
1143
+ de: "DE",
1144
+ france: "FR",
1145
+ fr: "FR",
1146
+ italy: "IT",
1147
+ italia: "IT",
1148
+ it: "IT",
1149
+ spain: "ES",
1150
+ espana: "ES",
1151
+ es: "ES",
1152
+ mexico: "MX",
1153
+ mx: "MX",
1154
+ brazil: "BR",
1155
+ brasil: "BR",
1156
+ br: "BR",
1157
+ japan: "JP",
1158
+ jp: "JP",
1159
+ china: "CN",
1160
+ cn: "CN",
1161
+ india: "IN",
1162
+ in: "IN",
1163
+ "south korea": "KR",
1164
+ korea: "KR",
1165
+ kr: "KR",
1166
+ netherlands: "NL",
1167
+ holland: "NL",
1168
+ nl: "NL",
1169
+ sweden: "SE",
1170
+ se: "SE",
1171
+ norway: "NO",
1172
+ no: "NO",
1173
+ denmark: "DK",
1174
+ dk: "DK",
1175
+ switzerland: "CH",
1176
+ ch: "CH",
1177
+ ireland: "IE",
1178
+ ie: "IE",
1179
+ "new zealand": "NZ",
1180
+ nz: "NZ",
1181
+ singapore: "SG",
1182
+ sg: "SG",
1183
+ portugal: "PT",
1184
+ pt: "PT",
1185
+ argentina: "AR",
1186
+ ar: "AR",
1187
+ colombia: "CO",
1188
+ co: "CO",
1189
+ philippines: "PH",
1190
+ ph: "PH",
1191
+ poland: "PL",
1192
+ pl: "PL",
1193
+ belgium: "BE",
1194
+ be: "BE",
1195
+ austria: "AT",
1196
+ at: "AT"
1197
+ };
1198
+ var _UNIT_PATTERNS = [
1199
+ [/^(?:Apt|Apartment)\.?\s+/i, "Unit "],
1200
+ [/^(?:Ste|Suite)\.?\s+/i, "Ste "],
1201
+ [/^#\s*/i, "Unit "]
1202
+ ];
1203
+ var _ABBREV_PATTERNS = Object.entries(_STREET_ABBREV).map(
1204
+ ([full, abbr]) => [new RegExp(`\\b${full}\\b`, "gi"), abbr]
1205
+ );
1206
+ var _EXPAND_PATTERNS = Object.entries(_STREET_EXPAND).map(
1207
+ ([abbr, full]) => [new RegExp(`\\b${abbr}\\b`, "gi"), full]
1208
+ );
1209
+ function addressStandardize(values) {
1210
+ return mapStrings3(values, (s) => {
1211
+ let result = s;
1212
+ for (const [pattern, abbr] of _ABBREV_PATTERNS) {
1213
+ result = result.replace(pattern, abbr);
1214
+ }
1215
+ return result;
1216
+ });
1217
+ }
1218
+ registerTransform(
1219
+ { name: "address_standardize", inputTypes: ["address"], priority: 50, mode: "series" },
1220
+ addressStandardize
1221
+ );
1222
+ function addressExpand(values) {
1223
+ return mapStrings3(values, (s) => {
1224
+ let result = s;
1225
+ for (const [pattern, full] of _EXPAND_PATTERNS) {
1226
+ result = result.replace(pattern, full);
1227
+ }
1228
+ return result;
1229
+ });
1230
+ }
1231
+ registerTransform(
1232
+ { name: "address_expand", inputTypes: ["address"], priority: 50, mode: "series" },
1233
+ addressExpand
1234
+ );
1235
+ function stateAbbreviate(values) {
1236
+ return mapStrings3(values, (s) => {
1237
+ const trimmed = s.trim();
1238
+ if (trimmed.length === 2 && _STATES_REVERSE[trimmed.toUpperCase()]) {
1239
+ return trimmed.toUpperCase();
1240
+ }
1241
+ const matched = _STATES_LOWER[trimmed.toLowerCase()];
1242
+ return matched ?? s;
1243
+ });
1244
+ }
1245
+ registerTransform(
1246
+ { name: "state_abbreviate", inputTypes: ["state", "string"], priority: 50, mode: "series" },
1247
+ stateAbbreviate
1248
+ );
1249
+ function stateExpand(values) {
1250
+ return mapStrings3(values, (s) => {
1251
+ return _STATES_REVERSE[s.trim().toUpperCase()] ?? s;
1252
+ });
1253
+ }
1254
+ registerTransform(
1255
+ { name: "state_expand", inputTypes: ["state", "string"], priority: 50, mode: "series" },
1256
+ stateExpand
1257
+ );
1258
+ function zipNormalize(values) {
1259
+ return mapStrings3(values, (s) => {
1260
+ let val = s.trim();
1261
+ val = val.split("-")[0];
1262
+ if (/^\d+$/.test(val)) {
1263
+ return val.padStart(5, "0");
1264
+ }
1265
+ return val;
1266
+ });
1267
+ }
1268
+ registerTransform(
1269
+ { name: "zip_normalize", inputTypes: ["zip"], autoApply: true, priority: 55, mode: "series" },
1270
+ zipNormalize
1271
+ );
1272
+ var _ADDRESS_PATTERN = /^(.+?),\s*(.+?),\s*([A-Za-z]{2})\s+(\d{5}(?:-\d{4})?)$/;
1273
+ function splitAddress(rows, column) {
1274
+ return rows.map((row) => {
1275
+ const val = row[column];
1276
+ if (val === null || val === void 0 || typeof val !== "string") {
1277
+ return { ...row, street: null, city: null, state: null, zip: null };
1278
+ }
1279
+ const m = val.trim().match(_ADDRESS_PATTERN);
1280
+ if (m) {
1281
+ return { ...row, street: m[1], city: m[2], state: m[3], zip: m[4] };
1282
+ }
1283
+ return { ...row, street: val, city: null, state: null, zip: null };
1284
+ });
1285
+ }
1286
+ registerTransform(
1287
+ { name: "split_address", inputTypes: ["address"], priority: 45, mode: "dataframe" },
1288
+ splitAddress
1289
+ );
1290
+ function countryStandardize(values) {
1291
+ return mapStrings3(values, (s) => {
1292
+ const lookup = s.trim().toLowerCase();
1293
+ return _COUNTRIES[lookup] ?? s;
1294
+ });
1295
+ }
1296
+ registerTransform(
1297
+ { name: "country_standardize", inputTypes: ["country", "string"], priority: 50, mode: "series" },
1298
+ countryStandardize
1299
+ );
1300
+ function unitNormalize(values) {
1301
+ return mapStrings3(values, (s) => {
1302
+ let result = s.trim();
1303
+ for (const [pattern, replacement] of _UNIT_PATTERNS) {
1304
+ result = result.replace(pattern, replacement);
1305
+ }
1306
+ return result;
1307
+ });
1308
+ }
1309
+ registerTransform(
1310
+ { name: "unit_normalize", inputTypes: ["address", "string"], priority: 45, mode: "series" },
1311
+ unitNormalize
1312
+ );
1313
+
1314
+ // src/core/transforms/dates.ts
1315
+ init_registry();
1316
+ function _parseDate(val) {
1317
+ const trimmed = val.trim();
1318
+ if (!trimmed) return null;
1319
+ const d = new Date(trimmed);
1320
+ if (isNaN(d.getTime())) return null;
1321
+ return d;
1322
+ }
1323
+ function pad(n) {
1324
+ return n < 10 ? `0${n}` : String(n);
1325
+ }
1326
+ var DAY_NAMES = [
1327
+ "Sunday",
1328
+ "Monday",
1329
+ "Tuesday",
1330
+ "Wednesday",
1331
+ "Thursday",
1332
+ "Friday",
1333
+ "Saturday"
1334
+ ];
1335
+ function dateIso8601(values) {
1336
+ return values.map((v) => {
1337
+ if (v === null) return null;
1338
+ const s = String(v);
1339
+ const d = _parseDate(s);
1340
+ if (!d) return v;
1341
+ return `${d.getUTCFullYear()}-${pad(d.getUTCMonth() + 1)}-${pad(d.getUTCDate())}`;
1342
+ });
1343
+ }
1344
+ registerTransform(
1345
+ { name: "date_iso8601", inputTypes: ["date"], autoApply: true, priority: 50, mode: "series" },
1346
+ dateIso8601
1347
+ );
1348
+ function dateUs(values) {
1349
+ return values.map((v) => {
1350
+ if (v === null) return null;
1351
+ const s = String(v);
1352
+ const d = _parseDate(s);
1353
+ if (!d) return v;
1354
+ return `${pad(d.getUTCMonth() + 1)}/${pad(d.getUTCDate())}/${d.getUTCFullYear()}`;
1355
+ });
1356
+ }
1357
+ registerTransform(
1358
+ { name: "date_us", inputTypes: ["date"], priority: 50, mode: "series" },
1359
+ dateUs
1360
+ );
1361
+ function dateEu(values) {
1362
+ return values.map((v) => {
1363
+ if (v === null) return null;
1364
+ const s = String(v);
1365
+ const d = _parseDate(s);
1366
+ if (!d) return v;
1367
+ return `${pad(d.getUTCDate())}/${pad(d.getUTCMonth() + 1)}/${d.getUTCFullYear()}`;
1368
+ });
1369
+ }
1370
+ registerTransform(
1371
+ { name: "date_eu", inputTypes: ["date"], priority: 50, mode: "series" },
1372
+ dateEu
1373
+ );
1374
+ registerTransform(
1375
+ { name: "date_parse", inputTypes: ["date"], priority: 55, mode: "series" },
1376
+ dateIso8601
1377
+ );
1378
+ function ageFromDob(values, referenceDate = null) {
1379
+ const ref = referenceDate ? _parseDate(String(referenceDate)) : /* @__PURE__ */ new Date();
1380
+ if (!ref) return values.slice();
1381
+ return values.map((v) => {
1382
+ if (v === null) return null;
1383
+ const dob = _parseDate(String(v));
1384
+ if (!dob) return v;
1385
+ let age = ref.getUTCFullYear() - dob.getUTCFullYear();
1386
+ const monthDiff = ref.getUTCMonth() - dob.getUTCMonth();
1387
+ if (monthDiff < 0 || monthDiff === 0 && ref.getUTCDate() < dob.getUTCDate()) {
1388
+ age--;
1389
+ }
1390
+ return age;
1391
+ });
1392
+ }
1393
+ registerTransform(
1394
+ { name: "age_from_dob", inputTypes: ["date"], priority: 40, mode: "series" },
1395
+ ageFromDob
1396
+ );
1397
+ function datetimeIso8601(values) {
1398
+ return values.map((v) => {
1399
+ if (v === null) return null;
1400
+ const d = _parseDate(String(v));
1401
+ if (!d) return v;
1402
+ return `${d.getUTCFullYear()}-${pad(d.getUTCMonth() + 1)}-${pad(d.getUTCDate())}T${pad(d.getUTCHours())}:${pad(d.getUTCMinutes())}:${pad(d.getUTCSeconds())}`;
1403
+ });
1404
+ }
1405
+ registerTransform(
1406
+ { name: "datetime_iso8601", inputTypes: ["date"], priority: 50, mode: "series" },
1407
+ datetimeIso8601
1408
+ );
1409
+ function extractYear(values) {
1410
+ return values.map((v) => {
1411
+ if (v === null) return null;
1412
+ const d = _parseDate(String(v));
1413
+ return d ? d.getUTCFullYear() : v;
1414
+ });
1415
+ }
1416
+ function extractMonth(values) {
1417
+ return values.map((v) => {
1418
+ if (v === null) return null;
1419
+ const d = _parseDate(String(v));
1420
+ return d ? d.getUTCMonth() + 1 : v;
1421
+ });
1422
+ }
1423
+ function extractDay(values) {
1424
+ return values.map((v) => {
1425
+ if (v === null) return null;
1426
+ const d = _parseDate(String(v));
1427
+ return d ? d.getUTCDate() : v;
1428
+ });
1429
+ }
1430
+ function extractQuarter(values) {
1431
+ return values.map((v) => {
1432
+ if (v === null) return null;
1433
+ const d = _parseDate(String(v));
1434
+ if (!d) return v;
1435
+ return Math.floor(d.getUTCMonth() / 3) + 1;
1436
+ });
1437
+ }
1438
+ function extractDayOfWeek(values) {
1439
+ return values.map((v) => {
1440
+ if (v === null) return null;
1441
+ const d = _parseDate(String(v));
1442
+ return d ? DAY_NAMES[d.getUTCDay()] : v;
1443
+ });
1444
+ }
1445
+ registerTransform({ name: "extract_year", inputTypes: ["date"], priority: 35, mode: "series" }, extractYear);
1446
+ registerTransform({ name: "extract_month", inputTypes: ["date"], priority: 35, mode: "series" }, extractMonth);
1447
+ registerTransform({ name: "extract_day", inputTypes: ["date"], priority: 35, mode: "series" }, extractDay);
1448
+ registerTransform({ name: "extract_quarter", inputTypes: ["date"], priority: 35, mode: "series" }, extractQuarter);
1449
+ registerTransform({ name: "extract_day_of_week", inputTypes: ["date"], priority: 35, mode: "series" }, extractDayOfWeek);
1450
+ function dateShift(values, days = 0) {
1451
+ const shift = typeof days === "number" ? days : Number(days) || 0;
1452
+ const shiftMs = shift * 864e5;
1453
+ return values.map((v) => {
1454
+ if (v === null) return null;
1455
+ const d = _parseDate(String(v));
1456
+ if (!d) return v;
1457
+ const shifted = new Date(d.getTime() + shiftMs);
1458
+ return `${shifted.getUTCFullYear()}-${pad(shifted.getUTCMonth() + 1)}-${pad(shifted.getUTCDate())}`;
1459
+ });
1460
+ }
1461
+ registerTransform(
1462
+ { name: "date_shift", inputTypes: ["date"], priority: 30, mode: "series" },
1463
+ dateShift
1464
+ );
1465
+ function dateValidate(values) {
1466
+ return values.map((v) => {
1467
+ if (v === null) return null;
1468
+ const s = String(v).trim();
1469
+ if (!s) return false;
1470
+ return _parseDate(s) !== null;
1471
+ });
1472
+ }
1473
+ registerTransform(
1474
+ { name: "date_validate", inputTypes: ["date", "string"], priority: 60, mode: "series" },
1475
+ dateValidate
1476
+ );
1477
+
1478
+ // src/core/transforms/email.ts
1479
+ init_registry();
1480
+ var EMAIL_RE = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
1481
+ var GMAIL_DOMAINS = /* @__PURE__ */ new Set(["gmail.com", "googlemail.com"]);
1482
+ function emailLowercase(values) {
1483
+ return values.map((v) => {
1484
+ if (v === null || typeof v !== "string") return v;
1485
+ return v.toLowerCase();
1486
+ });
1487
+ }
1488
+ registerTransform(
1489
+ { name: "email_lowercase", inputTypes: ["email", "string"], priority: 55, mode: "series" },
1490
+ emailLowercase
1491
+ );
1492
+ function emailNormalize(values) {
1493
+ return values.map((v) => {
1494
+ if (v === null || typeof v !== "string") return v;
1495
+ const lowered = v.toLowerCase().trim();
1496
+ const atIdx = lowered.lastIndexOf("@");
1497
+ if (atIdx === -1) return lowered;
1498
+ let local = lowered.slice(0, atIdx);
1499
+ const domain = lowered.slice(atIdx + 1);
1500
+ const plusIdx = local.indexOf("+");
1501
+ if (plusIdx !== -1) {
1502
+ local = local.slice(0, plusIdx);
1503
+ }
1504
+ if (GMAIL_DOMAINS.has(domain)) {
1505
+ local = local.replace(/\./g, "");
1506
+ }
1507
+ return `${local}@${domain}`;
1508
+ });
1509
+ }
1510
+ registerTransform(
1511
+ { name: "email_normalize", inputTypes: ["email"], priority: 50, mode: "series" },
1512
+ emailNormalize
1513
+ );
1514
+ function emailExtractDomain(values) {
1515
+ return values.map((v) => {
1516
+ if (v === null || typeof v !== "string") return v;
1517
+ const atIdx = v.lastIndexOf("@");
1518
+ if (atIdx === -1) return null;
1519
+ return v.slice(atIdx + 1).toLowerCase();
1520
+ });
1521
+ }
1522
+ registerTransform(
1523
+ { name: "email_extract_domain", inputTypes: ["email"], priority: 40, mode: "series" },
1524
+ emailExtractDomain
1525
+ );
1526
+ function emailValidate(values) {
1527
+ return values.map((v) => {
1528
+ if (v === null || typeof v !== "string") return v;
1529
+ return EMAIL_RE.test(v.trim());
1530
+ });
1531
+ }
1532
+ registerTransform(
1533
+ { name: "email_validate", inputTypes: ["email", "string"], priority: 60, mode: "series" },
1534
+ emailValidate
1535
+ );
1536
+
1537
+ // src/core/transforms/numeric.ts
1538
+ init_registry();
1539
+ function currencyStrip(values) {
1540
+ return values.map((v) => {
1541
+ if (v === null) return null;
1542
+ if (typeof v === "number") return v;
1543
+ const cleaned = String(v).replace(/[^0-9.\-]/g, "");
1544
+ if (cleaned === "" || cleaned === "-") return v;
1545
+ const n = Number(cleaned);
1546
+ return isNaN(n) ? v : n;
1547
+ });
1548
+ }
1549
+ registerTransform(
1550
+ { name: "currency_strip", inputTypes: ["string", "numeric"], priority: 50, mode: "series" },
1551
+ currencyStrip
1552
+ );
1553
+ function percentageNormalize(values) {
1554
+ return values.map((v) => {
1555
+ if (v === null) return null;
1556
+ if (typeof v === "number") return v / 100;
1557
+ const s = String(v).replace(/%/g, "").trim();
1558
+ const n = Number(s);
1559
+ return isNaN(n) ? v : n / 100;
1560
+ });
1561
+ }
1562
+ registerTransform(
1563
+ { name: "percentage_normalize", inputTypes: ["string", "numeric"], priority: 50, mode: "series" },
1564
+ percentageNormalize
1565
+ );
1566
+ function roundTransform(values, n = 2) {
1567
+ const decimals = typeof n === "number" ? n : Number(n) || 2;
1568
+ const factor = Math.pow(10, decimals);
1569
+ return values.map((v) => {
1570
+ if (v === null) return null;
1571
+ const num = typeof v === "number" ? v : Number(v);
1572
+ if (isNaN(num)) return v;
1573
+ return Math.round(num * factor) / factor;
1574
+ });
1575
+ }
1576
+ registerTransform(
1577
+ { name: "round", inputTypes: ["numeric"], priority: 40, mode: "series" },
1578
+ roundTransform
1579
+ );
1580
+ function clamp(values, minVal = 0, maxVal = 1) {
1581
+ const lo = typeof minVal === "number" ? minVal : Number(minVal) || 0;
1582
+ const hi = typeof maxVal === "number" ? maxVal : Number(maxVal) || 1;
1583
+ return values.map((v) => {
1584
+ if (v === null) return null;
1585
+ const num = typeof v === "number" ? v : Number(v);
1586
+ if (isNaN(num)) return v;
1587
+ return Math.min(hi, Math.max(lo, num));
1588
+ });
1589
+ }
1590
+ registerTransform(
1591
+ { name: "clamp", inputTypes: ["numeric"], priority: 40, mode: "series" },
1592
+ clamp
1593
+ );
1594
+ function toInteger(values) {
1595
+ return values.map((v) => {
1596
+ if (v === null) return null;
1597
+ const num = Number(v);
1598
+ if (isNaN(num)) return v;
1599
+ return Math.trunc(num);
1600
+ });
1601
+ }
1602
+ registerTransform(
1603
+ { name: "to_integer", inputTypes: ["string", "numeric"], priority: 45, mode: "series" },
1604
+ toInteger
1605
+ );
1606
+ function absValue(values) {
1607
+ return values.map((v) => {
1608
+ if (v === null) return null;
1609
+ const num = typeof v === "number" ? v : Number(v);
1610
+ if (isNaN(num)) return v;
1611
+ return Math.abs(num);
1612
+ });
1613
+ }
1614
+ registerTransform(
1615
+ { name: "abs_value", inputTypes: ["numeric"], priority: 40, mode: "series" },
1616
+ absValue
1617
+ );
1618
+ function fillZero(values) {
1619
+ return values.map((v) => v === null ? 0 : v);
1620
+ }
1621
+ registerTransform(
1622
+ { name: "fill_zero", inputTypes: ["numeric"], priority: 35, mode: "series" },
1623
+ fillZero
1624
+ );
1625
+ function commaDecimal(values) {
1626
+ return values.map((v) => {
1627
+ if (v === null) return null;
1628
+ if (typeof v === "number") return v;
1629
+ const s = String(v);
1630
+ const converted = s.replace(/\./g, "").replace(",", ".");
1631
+ const n = Number(converted);
1632
+ return isNaN(n) ? v : n;
1633
+ });
1634
+ }
1635
+ registerTransform(
1636
+ { name: "comma_decimal", inputTypes: ["string", "numeric"], priority: 48, mode: "series" },
1637
+ commaDecimal
1638
+ );
1639
+ function scientificToDecimal(values) {
1640
+ return values.map((v) => {
1641
+ if (v === null) return null;
1642
+ const n = Number(v);
1643
+ return isNaN(n) ? v : n;
1644
+ });
1645
+ }
1646
+ registerTransform(
1647
+ { name: "scientific_to_decimal", inputTypes: ["string", "numeric"], priority: 45, mode: "series" },
1648
+ scientificToDecimal
1649
+ );
1650
+
1651
+ // src/core/transforms/categorical.ts
1652
+ init_registry();
1653
+ var TRUTHY = /* @__PURE__ */ new Set(["yes", "y", "1", "true", "t"]);
1654
+ var FALSY = /* @__PURE__ */ new Set(["no", "n", "0", "false", "f"]);
1655
+ function booleanNormalize(values) {
1656
+ return values.map((v) => {
1657
+ if (v === null) return null;
1658
+ const s = String(v).trim().toLowerCase();
1659
+ if (TRUTHY.has(s)) return true;
1660
+ if (FALSY.has(s)) return false;
1661
+ return v;
1662
+ });
1663
+ }
1664
+ registerTransform(
1665
+ { name: "boolean_normalize", inputTypes: ["boolean", "string"], priority: 50, mode: "series" },
1666
+ booleanNormalize
1667
+ );
1668
+ function genderStandardize(values) {
1669
+ return values.map((v) => {
1670
+ if (v === null) return null;
1671
+ if (typeof v !== "string") return v;
1672
+ const s = v.trim().toLowerCase();
1673
+ if (s === "male" || s === "m") return "M";
1674
+ if (s === "female" || s === "f") return "F";
1675
+ return v;
1676
+ });
1677
+ }
1678
+ registerTransform(
1679
+ { name: "gender_standardize", inputTypes: ["string"], priority: 50, mode: "series" },
1680
+ genderStandardize
1681
+ );
1682
+ var NULL_VARIANTS = /* @__PURE__ */ new Set([
1683
+ "n/a",
1684
+ "null",
1685
+ "none",
1686
+ "na",
1687
+ "nil",
1688
+ "nan",
1689
+ "-",
1690
+ ""
1691
+ ]);
1692
+ function nullStandardize(values) {
1693
+ return values.map((v) => {
1694
+ if (v === null) return null;
1695
+ if (typeof v !== "string") return v;
1696
+ const s = v.trim().toLowerCase();
1697
+ if (NULL_VARIANTS.has(s)) return null;
1698
+ return v;
1699
+ });
1700
+ }
1701
+ registerTransform(
1702
+ { name: "null_standardize", inputTypes: ["string"], autoApply: true, priority: 80, mode: "series" },
1703
+ nullStandardize
1704
+ );
1705
+ function categoryStandardize(values, mapping = null) {
1706
+ if (!mapping || typeof mapping !== "object") return values.slice();
1707
+ const lookup = /* @__PURE__ */ new Map();
1708
+ for (const [canonical, variants] of Object.entries(
1709
+ mapping
1710
+ )) {
1711
+ if (Array.isArray(variants)) {
1712
+ for (const variant of variants) {
1713
+ lookup.set(String(variant).toLowerCase(), canonical);
1714
+ }
1715
+ }
1716
+ lookup.set(canonical.toLowerCase(), canonical);
1717
+ }
1718
+ return values.map((v) => {
1719
+ if (v === null) return null;
1720
+ if (typeof v !== "string") return v;
1721
+ const key = v.trim().toLowerCase();
1722
+ return lookup.get(key) ?? v;
1723
+ });
1724
+ }
1725
+ registerTransform(
1726
+ { name: "category_standardize", inputTypes: ["string"], priority: 45, mode: "series" },
1727
+ categoryStandardize
1728
+ );
1729
+ function categoryFromFile(values, lookupPath = null) {
1730
+ if (lookupPath) {
1731
+ console.warn("[goldenflow] category_from_file is not yet implemented in the JS port \u2014 returning values unchanged");
1732
+ }
1733
+ return values.slice();
1734
+ }
1735
+ registerTransform(
1736
+ { name: "category_from_file", inputTypes: ["string"], priority: 45, mode: "series" },
1737
+ categoryFromFile
1738
+ );
1739
+
1740
+ // src/core/transforms/identifiers.ts
1741
+ init_registry();
1742
+ function mapStrings4(values, fn) {
1743
+ return values.map((v) => {
1744
+ if (v === null || typeof v !== "string") return v;
1745
+ return fn(v);
1746
+ });
1747
+ }
1748
+ function extractDigits2(val) {
1749
+ return val.replace(/\D/g, "");
1750
+ }
1751
+ function ssnFormat(values) {
1752
+ return mapStrings4(values, (s) => {
1753
+ const digits = extractDigits2(s);
1754
+ if (digits.length !== 9) return s;
1755
+ return `${digits.slice(0, 3)}-${digits.slice(3, 5)}-${digits.slice(5)}`;
1756
+ });
1757
+ }
1758
+ registerTransform(
1759
+ { name: "ssn_format", inputTypes: ["ssn", "string"], priority: 50, mode: "series" },
1760
+ ssnFormat
1761
+ );
1762
+ function ssnMask(values) {
1763
+ return mapStrings4(values, (s) => {
1764
+ const digits = extractDigits2(s);
1765
+ if (digits.length !== 9) return s;
1766
+ return `***-**-${digits.slice(5)}`;
1767
+ });
1768
+ }
1769
+ registerTransform(
1770
+ { name: "ssn_mask", inputTypes: ["ssn", "string"], priority: 50, mode: "series" },
1771
+ ssnMask
1772
+ );
1773
+ function einFormat(values) {
1774
+ return mapStrings4(values, (s) => {
1775
+ const digits = extractDigits2(s);
1776
+ if (digits.length !== 9) return s;
1777
+ return `${digits.slice(0, 2)}-${digits.slice(2)}`;
1778
+ });
1779
+ }
1780
+ registerTransform(
1781
+ { name: "ein_format", inputTypes: ["ein", "string"], priority: 50, mode: "series" },
1782
+ einFormat
1783
+ );
1784
+
1785
+ // src/core/transforms/url.ts
1786
+ init_registry();
1787
+ function mapStrings5(values, fn) {
1788
+ return values.map((v) => {
1789
+ if (v === null || typeof v !== "string") return v;
1790
+ return fn(v);
1791
+ });
1792
+ }
1793
+ var _SCHEME_RE = /^https?:\/\//i;
1794
+ function urlNormalize(values) {
1795
+ return mapStrings5(values, (s) => {
1796
+ let val = s.trim();
1797
+ if (!val) return null;
1798
+ if (!_SCHEME_RE.test(val)) {
1799
+ val = "https://" + val;
1800
+ }
1801
+ const schemeEnd = val.indexOf("://") + 3;
1802
+ const scheme = val.slice(0, schemeEnd).toLowerCase();
1803
+ const rest = val.slice(schemeEnd);
1804
+ const slashIdx = rest.indexOf("/");
1805
+ let domain;
1806
+ let path;
1807
+ if (slashIdx === -1) {
1808
+ domain = rest.toLowerCase();
1809
+ path = "";
1810
+ } else {
1811
+ domain = rest.slice(0, slashIdx).toLowerCase();
1812
+ path = rest.slice(slashIdx);
1813
+ }
1814
+ let result = scheme + domain + path;
1815
+ if (result.endsWith("/") && result.length > schemeEnd + domain.length + 1) {
1816
+ result = result.replace(/\/+$/, "");
1817
+ } else if (result.endsWith("/") && path === "/") {
1818
+ result = result.slice(0, -1);
1819
+ }
1820
+ return result;
1821
+ });
1822
+ }
1823
+ registerTransform(
1824
+ { name: "url_normalize", inputTypes: ["url", "string"], priority: 50, mode: "series" },
1825
+ urlNormalize
1826
+ );
1827
+ function urlExtractDomain(values) {
1828
+ return mapStrings5(values, (s) => {
1829
+ let val = s.trim();
1830
+ if (!val) return null;
1831
+ if (val.includes("://")) {
1832
+ val = val.split("://", 2)[1];
1833
+ }
1834
+ const domain = val.split("/", 1)[0];
1835
+ return domain ? domain.toLowerCase() : null;
1836
+ });
1837
+ }
1838
+ registerTransform(
1839
+ { name: "url_extract_domain", inputTypes: ["url", "string"], priority: 40, mode: "series" },
1840
+ urlExtractDomain
1841
+ );
1842
+
1843
+ // src/core/transforms/auto-correct.ts
1844
+ init_registry();
1845
+ function levenshtein(a, b) {
1846
+ const m = a.length;
1847
+ const n = b.length;
1848
+ if (m === 0) return n;
1849
+ if (n === 0) return m;
1850
+ const prev = new Array(n + 1);
1851
+ for (let j = 0; j <= n; j++) prev[j] = j;
1852
+ for (let i = 1; i <= m; i++) {
1853
+ let prevDiag = prev[0];
1854
+ prev[0] = i;
1855
+ for (let j = 1; j <= n; j++) {
1856
+ const temp = prev[j];
1857
+ if (a[i - 1] === b[j - 1]) {
1858
+ prev[j] = prevDiag;
1859
+ } else {
1860
+ prev[j] = 1 + Math.min(prevDiag, prev[j], prev[j - 1]);
1861
+ }
1862
+ prevDiag = temp;
1863
+ }
1864
+ }
1865
+ return prev[n];
1866
+ }
1867
+ function fuzzyRatio(a, b) {
1868
+ if (a.length === 0 && b.length === 0) return 100;
1869
+ const maxLen = Math.max(a.length, b.length);
1870
+ const dist = levenshtein(a, b);
1871
+ return 100 * (1 - dist / maxLen);
1872
+ }
1873
+ function categoryAutoCorrect(values, frequencyThreshold = 0.05, matchThreshold = 85) {
1874
+ const freqThresh = typeof frequencyThreshold === "number" ? frequencyThreshold : Number(frequencyThreshold) || 0.05;
1875
+ const matchThresh = typeof matchThreshold === "number" ? matchThreshold : Number(matchThreshold) || 85;
1876
+ const freqMap = /* @__PURE__ */ new Map();
1877
+ const casingMap = /* @__PURE__ */ new Map();
1878
+ let totalNonNull = 0;
1879
+ for (const v of values) {
1880
+ if (v === null || typeof v !== "string") continue;
1881
+ const lower = v.toLowerCase();
1882
+ totalNonNull++;
1883
+ freqMap.set(lower, (freqMap.get(lower) ?? 0) + 1);
1884
+ let casings = casingMap.get(lower);
1885
+ if (!casings) {
1886
+ casings = /* @__PURE__ */ new Map();
1887
+ casingMap.set(lower, casings);
1888
+ }
1889
+ casings.set(v, (casings.get(v) ?? 0) + 1);
1890
+ }
1891
+ if (totalNonNull === 0) return values.slice();
1892
+ const canonicals = /* @__PURE__ */ new Map();
1893
+ for (const [lower, count] of freqMap) {
1894
+ if (count / totalNonNull >= freqThresh) {
1895
+ const casings = casingMap.get(lower);
1896
+ let bestCasing = lower;
1897
+ let bestCount = 0;
1898
+ for (const [original, c] of casings) {
1899
+ if (c > bestCount) {
1900
+ bestCount = c;
1901
+ bestCasing = original;
1902
+ }
1903
+ }
1904
+ canonicals.set(lower, bestCasing);
1905
+ }
1906
+ }
1907
+ if (canonicals.size === 0) return values.slice();
1908
+ const corrections = /* @__PURE__ */ new Map();
1909
+ for (const [lower] of freqMap) {
1910
+ if (canonicals.has(lower)) continue;
1911
+ let bestCanonical = null;
1912
+ let bestScore = 0;
1913
+ for (const [canonLower, canonOriginal] of canonicals) {
1914
+ const score = fuzzyRatio(lower, canonLower);
1915
+ if (score >= matchThresh && score > bestScore) {
1916
+ bestScore = score;
1917
+ bestCanonical = canonOriginal;
1918
+ }
1919
+ }
1920
+ if (bestCanonical !== null) {
1921
+ corrections.set(lower, bestCanonical);
1922
+ }
1923
+ }
1924
+ return values.map((v) => {
1925
+ if (v === null || typeof v !== "string") return v;
1926
+ const lower = v.toLowerCase();
1927
+ const correction = corrections.get(lower);
1928
+ if (correction !== void 0) return correction;
1929
+ const canonical = canonicals.get(lower);
1930
+ if (canonical !== void 0) return canonical;
1931
+ return v;
1932
+ });
1933
+ }
1934
+ registerTransform(
1935
+ { name: "category_auto_correct", inputTypes: ["string"], autoApply: true, priority: 35, mode: "series" },
1936
+ categoryAutoCorrect
1937
+ );
1938
+
1939
+ // src/core/transforms/index.ts
1940
+ init_registry();
1941
+
1942
+ // src/core/engine/transformer.ts
1943
+ init_types();
1944
+
1945
+ // src/core/engine/profiler-bridge.ts
1946
+ init_types();
1947
+ var EMAIL_RE2 = /^[^@\s]+@[^@\s]+\.[^@\s]+$/;
1948
+ var PHONE_RE = /^[+(]?\d[\d()\-.\s]{6,18}\d$/;
1949
+ var DATE_RE = /^(\d{4}[-/]\d{1,2}[-/]\d{1,2}|\d{1,2}[-/]\d{1,2}[-/]\d{2,4}|[A-Za-z]{3,9}\s+\d{1,2},?\s+\d{4})$/;
1950
+ var NAME_RE = /^[A-Z][a-z]+(\s+[A-Z][a-z]+)+$/;
1951
+ var ZIP_RE = /^\d{5}(-\d{4})?$/;
1952
+ var NAME_PATTERNS = {
1953
+ zip: ["zip", "postal", "zipcode", "zip_code", "postal_code"],
1954
+ phone: ["phone", "tel", "mobile", "cell", "fax"],
1955
+ email: ["email", "e_mail", "mail"],
1956
+ date: ["date", "created", "updated", "timestamp", "dob", "birth"],
1957
+ state: ["state", "province", "region"],
1958
+ name: ["first_name", "last_name", "fname", "lname", "full_name", "fullname"]
1959
+ };
1960
+ function overrideTypeByColumnName(columnName, currentType) {
1961
+ if (currentType !== "string" && currentType !== "numeric") return currentType;
1962
+ const colLower = columnName.toLowerCase().replace(/-/g, "_");
1963
+ for (const [semanticType, patterns] of Object.entries(NAME_PATTERNS)) {
1964
+ for (const pattern of patterns) {
1965
+ if (colLower.includes(pattern)) return semanticType;
1966
+ }
1967
+ }
1968
+ return currentType;
1969
+ }
1970
+ function inferType(values, columnName) {
1971
+ const nonNull = values.filter((v) => v !== null);
1972
+ if (nonNull.length === 0) return "string";
1973
+ let hasNumber = false;
1974
+ let hasBoolean = false;
1975
+ for (const v of nonNull) {
1976
+ if (typeof v === "number") hasNumber = true;
1977
+ else if (typeof v === "boolean") hasBoolean = true;
1978
+ }
1979
+ if (hasNumber && !hasBoolean) return overrideTypeByColumnName(columnName, "numeric");
1980
+ if (hasBoolean && !hasNumber) return "boolean";
1981
+ const stringVals = [];
1982
+ for (const v of nonNull) {
1983
+ if (typeof v === "string") {
1984
+ const trimmed = v.trim();
1985
+ if (trimmed) stringVals.push(trimmed);
1986
+ }
1987
+ }
1988
+ if (stringVals.length === 0) return "string";
1989
+ const sample = stringVals.slice(0, 100);
1990
+ const checks = [
1991
+ ["email", EMAIL_RE2, 0.7],
1992
+ ["zip", ZIP_RE, 0.7],
1993
+ ["date", DATE_RE, 0.5],
1994
+ ["phone", PHONE_RE, 0.6],
1995
+ ["name", NAME_RE, 0.5]
1996
+ ];
1997
+ for (const [typeName, pattern, threshold] of checks) {
1998
+ let matches = 0;
1999
+ for (const v of sample) {
2000
+ if (pattern.test(v)) matches++;
2001
+ }
2002
+ if (matches / sample.length >= threshold) {
2003
+ return overrideTypeByColumnName(columnName, typeName);
2004
+ }
2005
+ }
2006
+ return overrideTypeByColumnName(columnName, "string");
2007
+ }
2008
+ function profileColumn(data, columnName) {
2009
+ const values = data.rawColumn(columnName);
2010
+ const rowCount = values.length;
2011
+ let nullCount = 0;
2012
+ const nonNullValues = [];
2013
+ const uniqueSet = /* @__PURE__ */ new Set();
2014
+ for (const v of values) {
2015
+ if (v === null) {
2016
+ nullCount++;
2017
+ continue;
2018
+ }
2019
+ nonNullValues.push(v);
2020
+ uniqueSet.add(v);
2021
+ }
2022
+ const uniqueCount = uniqueSet.size;
2023
+ const sampleValues = nonNullValues.slice(0, 5).map((v) => String(v));
2024
+ const inferredType = inferType(values, columnName);
2025
+ return makeColumnProfile({
2026
+ name: columnName,
2027
+ inferredType,
2028
+ rowCount,
2029
+ nullCount,
2030
+ nullPct: rowCount > 0 ? nullCount / rowCount : 0,
2031
+ uniqueCount,
2032
+ uniquePct: rowCount > 0 ? uniqueCount / rowCount : 0,
2033
+ sampleValues
2034
+ });
2035
+ }
2036
+ function profileDataframe(rows, filePath = "") {
2037
+ const data = new TabularData(rows);
2038
+ const columns = data.columns.map((col) => profileColumn(data, col));
2039
+ return {
2040
+ filePath,
2041
+ rowCount: data.rowCount,
2042
+ columnCount: data.columns.length,
2043
+ columns
2044
+ };
2045
+ }
2046
+
2047
+ // src/core/engine/selector.ts
2048
+ var FINDING_TRANSFORM_MAP = {
2049
+ type_inference: ["strip", "to_integer"],
2050
+ nullability: ["null_standardize"],
2051
+ uniqueness: ["strip", "collapse_whitespace", "email_normalize"],
2052
+ format_detection: ["phone_e164", "email_normalize", "date_iso8601", "zip_normalize"],
2053
+ range_distribution: ["clamp"],
2054
+ cardinality: ["category_auto_correct", "category_standardize"],
2055
+ pattern_consistency: ["phone_e164", "date_iso8601", "zip_normalize", "ssn_format"],
2056
+ encoding_detection: ["normalize_unicode", "normalize_quotes", "fix_mojibake"],
2057
+ sequence_detection: ["pad_left"],
2058
+ drift_detection: [],
2059
+ temporal_order: ["date_iso8601", "date_validate"],
2060
+ null_correlation: [],
2061
+ cross_column_validation: ["clamp"],
2062
+ cross_column: ["date_validate", "age_from_dob"]
2063
+ };
2064
+ var STRING_LIKE_TYPES = /* @__PURE__ */ new Set([
2065
+ "string",
2066
+ "email",
2067
+ "phone",
2068
+ "name",
2069
+ "address",
2070
+ "date"
2071
+ ]);
2072
+ function selectTransforms(profile, _confidenceThreshold = 0.8) {
2073
+ const all = listTransforms();
2074
+ let selected = [];
2075
+ for (const t of all) {
2076
+ if (!t.autoApply) continue;
2077
+ if (t.inputTypes.includes(profile.inferredType)) {
2078
+ selected.push(t);
2079
+ } else if (t.inputTypes.includes("string") && STRING_LIKE_TYPES.has(profile.inferredType)) {
2080
+ selected.push(t);
2081
+ }
2082
+ }
2083
+ if (profile.uniquePct > 0.1) {
2084
+ selected = selected.filter((t) => t.name !== "category_auto_correct");
2085
+ }
2086
+ selected.sort((a, b) => b.priority - a.priority);
2087
+ return selected;
2088
+ }
2089
+ function selectFromFindings(findings) {
2090
+ const columnTransforms = {};
2091
+ for (const finding of findings) {
2092
+ const check = String(finding["check"] ?? "");
2093
+ const column = String(finding["column"] ?? "");
2094
+ if (!column) continue;
2095
+ const transformNames = FINDING_TRANSFORM_MAP[check] ?? [];
2096
+ if (transformNames.length > 0) {
2097
+ if (!columnTransforms[column]) columnTransforms[column] = [];
2098
+ columnTransforms[column].push(...transformNames);
2099
+ }
2100
+ }
2101
+ for (const col of Object.keys(columnTransforms)) {
2102
+ columnTransforms[col] = [...new Set(columnTransforms[col])];
2103
+ }
2104
+ return columnTransforms;
2105
+ }
2106
+
2107
+ // src/core/engine/transformer.ts
2108
+ var TransformEngine = class {
2109
+ config;
2110
+ constructor(config) {
2111
+ this.config = makeConfig(config);
2112
+ }
2113
+ transformDf(rows, source = "<dataframe>") {
2114
+ const manifest = new MutableManifest(source);
2115
+ let currentRows = [...rows];
2116
+ if (this.config.transforms.length > 0) {
2117
+ currentRows = this._applyConfigTransforms(currentRows, manifest);
2118
+ } else {
2119
+ currentRows = this._applyAutoTransforms(currentRows, manifest, source);
2120
+ }
2121
+ for (const split of this.config.splits) {
2122
+ if (currentRows.length === 0 || !(split.source in currentRows[0])) continue;
2123
+ const info = getTransform(split.method);
2124
+ if (info && info.mode === "dataframe") {
2125
+ currentRows = info.func(currentRows, split.source);
2126
+ }
2127
+ }
2128
+ for (const [oldName, newName] of Object.entries(this.config.renames)) {
2129
+ if (currentRows.length === 0 || !(oldName in currentRows[0])) continue;
2130
+ currentRows = currentRows.map((row) => {
2131
+ const newRow = {};
2132
+ for (const [k, v] of Object.entries(row)) {
2133
+ newRow[k === oldName ? newName : k] = v;
2134
+ }
2135
+ return newRow;
2136
+ });
2137
+ }
2138
+ const dropCols = new Set(this.config.drop);
2139
+ if (dropCols.size > 0 && currentRows.length > 0) {
2140
+ const existingDrops = [...dropCols].filter((c) => c in currentRows[0]);
2141
+ if (existingDrops.length > 0) {
2142
+ const dropSet = new Set(existingDrops);
2143
+ currentRows = currentRows.map((row) => {
2144
+ const newRow = {};
2145
+ for (const [k, v] of Object.entries(row)) {
2146
+ if (!dropSet.has(k)) newRow[k] = v;
2147
+ }
2148
+ return newRow;
2149
+ });
2150
+ }
2151
+ }
2152
+ for (const filt of this.config.filters) {
2153
+ if (currentRows.length === 0 || !(filt.column in currentRows[0])) continue;
2154
+ currentRows = this._applyFilter(currentRows, filt.column, filt.condition);
2155
+ }
2156
+ if (this.config.dedup) {
2157
+ const dedupCols = this.config.dedup.columns.filter(
2158
+ (c) => currentRows.length > 0 && c in currentRows[0]
2159
+ );
2160
+ if (dedupCols.length > 0) {
2161
+ const before = currentRows.length;
2162
+ const seen = /* @__PURE__ */ new Set();
2163
+ const deduped = [];
2164
+ const iterRows = this.config.dedup.keep === "last" ? [...currentRows].reverse() : currentRows;
2165
+ for (const row of iterRows) {
2166
+ const key = dedupCols.map((c) => String(row[c] ?? "")).join("\0");
2167
+ if (!seen.has(key)) {
2168
+ seen.add(key);
2169
+ deduped.push(row);
2170
+ }
2171
+ }
2172
+ if (this.config.dedup.keep === "last") deduped.reverse();
2173
+ currentRows = deduped;
2174
+ const after = currentRows.length;
2175
+ if (before !== after) {
2176
+ manifest.addRecord(
2177
+ makeTransformRecord({
2178
+ column: dedupCols.join(","),
2179
+ transform: "dedup",
2180
+ affectedRows: before - after,
2181
+ totalRows: before
2182
+ })
2183
+ );
2184
+ }
2185
+ }
2186
+ }
2187
+ const columns = currentRows.length > 0 ? Object.keys(currentRows[0]) : [];
2188
+ return { rows: currentRows, columns, manifest };
2189
+ }
2190
+ _applyConfigTransforms(rows, manifest) {
2191
+ for (const spec of this.config.transforms) {
2192
+ if (rows.length === 0 || !(spec.column in rows[0])) continue;
2193
+ for (const opRaw of spec.ops) {
2194
+ const [name, params] = parseTransformName(opRaw);
2195
+ const info = getTransform(name);
2196
+ if (!info) {
2197
+ manifest.addError(
2198
+ spec.column,
2199
+ name,
2200
+ -1,
2201
+ `Transform '${name}' not found in registry`
2202
+ );
2203
+ continue;
2204
+ }
2205
+ rows = this._applySingleTransform(rows, spec.column, info, params, manifest);
2206
+ }
2207
+ }
2208
+ return rows;
2209
+ }
2210
+ _applyAutoTransforms(rows, manifest, source) {
2211
+ const filePath = source !== "<dataframe>" ? source : "";
2212
+ const profile = profileDataframe(rows, filePath);
2213
+ for (const colProfile of profile.columns) {
2214
+ const selected = selectTransforms(colProfile);
2215
+ for (const info of selected) {
2216
+ rows = this._applySingleTransform(
2217
+ rows,
2218
+ colProfile.name,
2219
+ info,
2220
+ [],
2221
+ manifest
2222
+ );
2223
+ }
2224
+ }
2225
+ return rows;
2226
+ }
2227
+ _applySingleTransform(rows, column, info, params, manifest) {
2228
+ const totalRows = rows.length;
2229
+ const beforeSample = rows.slice(0, 3).map((r) => String(r[column] ?? ""));
2230
+ try {
2231
+ let newRows;
2232
+ if (info.mode === "dataframe") {
2233
+ newRows = info.func(rows, column, ...castParams(params));
2234
+ } else {
2235
+ const values = rows.map((r) => {
2236
+ const v = r[column];
2237
+ if (v === null || v === void 0) return null;
2238
+ if (typeof v === "string" || typeof v === "number" || typeof v === "boolean") return v;
2239
+ return String(v);
2240
+ });
2241
+ const typedParams = castParams(params);
2242
+ const result = typedParams.length > 0 ? info.func(values, ...typedParams) : info.func(values);
2243
+ let newValues;
2244
+ if (Array.isArray(result) && result.length === 2 && Array.isArray(result[1])) {
2245
+ newValues = result[0];
2246
+ const flagged = result[1];
2247
+ for (const rowIdx of flagged) {
2248
+ manifest.addError(column, info.name, rowIdx, "Flagged for review");
2249
+ }
2250
+ } else {
2251
+ newValues = result;
2252
+ }
2253
+ newRows = rows.map((row, i) => {
2254
+ const oldVal = row[column] ?? null;
2255
+ if (newValues[i] === oldVal) return row;
2256
+ return { ...row, [column]: newValues[i] };
2257
+ });
2258
+ }
2259
+ const afterSample = newRows.slice(0, 3).map((r) => String(r[column] ?? ""));
2260
+ let changed = 0;
2261
+ for (let i = 0; i < Math.min(rows.length, newRows.length); i++) {
2262
+ if (String(rows[i][column] ?? "") !== String(newRows[i][column] ?? "")) {
2263
+ changed++;
2264
+ }
2265
+ }
2266
+ manifest.addRecord(
2267
+ makeTransformRecord({
2268
+ column,
2269
+ transform: info.name,
2270
+ affectedRows: changed,
2271
+ totalRows,
2272
+ sampleBefore: beforeSample,
2273
+ sampleAfter: afterSample
2274
+ })
2275
+ );
2276
+ return newRows;
2277
+ } catch (e) {
2278
+ manifest.addError(
2279
+ column,
2280
+ info.name,
2281
+ -1,
2282
+ e instanceof Error ? e.message : String(e)
2283
+ );
2284
+ return rows;
2285
+ }
2286
+ }
2287
+ _applyFilter(rows, column, condition) {
2288
+ if (condition === "not_null") {
2289
+ return rows.filter((r) => r[column] !== null && r[column] !== void 0);
2290
+ }
2291
+ if (condition.startsWith("after:")) {
2292
+ const dateStr = condition.slice(6);
2293
+ return rows.filter((r) => String(r[column] ?? "") > dateStr);
2294
+ }
2295
+ if (condition.startsWith("before:")) {
2296
+ const dateStr = condition.slice(7);
2297
+ return rows.filter((r) => String(r[column] ?? "") < dateStr);
2298
+ }
2299
+ return rows;
2300
+ }
2301
+ };
2302
+ function castParams(params) {
2303
+ return params.map((p) => {
2304
+ const asInt = parseInt(p, 10);
2305
+ if (!Number.isNaN(asInt) && String(asInt) === p) return asInt;
2306
+ const asFloat = parseFloat(p);
2307
+ if (!Number.isNaN(asFloat)) return asFloat;
2308
+ return p;
2309
+ });
2310
+ }
2311
+
2312
+ // src/core/engine/differ.ts
2313
+ function diffDataframes(before, after) {
2314
+ const beforeCols = new Set(before.length > 0 ? Object.keys(before[0]) : []);
2315
+ const afterCols = new Set(after.length > 0 ? Object.keys(after[0]) : []);
2316
+ const addedColumns = [...afterCols].filter((c) => !beforeCols.has(c)).sort();
2317
+ const removedColumns = [...beforeCols].filter((c) => !afterCols.has(c)).sort();
2318
+ const commonCols = [...beforeCols].filter((c) => afterCols.has(c)).sort();
2319
+ const changedColumns = [];
2320
+ const columnDetails = {};
2321
+ let totalChanges = 0;
2322
+ for (const col of commonCols) {
2323
+ if (before.length !== after.length) {
2324
+ changedColumns.push(col);
2325
+ totalChanges += Math.abs(before.length - after.length);
2326
+ continue;
2327
+ }
2328
+ let changes = 0;
2329
+ for (let i = 0; i < before.length; i++) {
2330
+ const bVal = String(before[i][col] ?? "");
2331
+ const aVal = String(after[i][col] ?? "");
2332
+ if (bVal !== aVal) changes++;
2333
+ }
2334
+ if (changes > 0) {
2335
+ changedColumns.push(col);
2336
+ totalChanges += changes;
2337
+ columnDetails[col] = { changedRows: changes };
2338
+ }
2339
+ }
2340
+ return {
2341
+ totalChanges,
2342
+ changedColumns,
2343
+ addedColumns,
2344
+ removedColumns,
2345
+ rowCountBefore: before.length,
2346
+ rowCountAfter: after.length,
2347
+ columnDetails
2348
+ };
2349
+ }
2350
+
2351
+ // src/core/engine/streaming.ts
2352
+ var StreamProcessor = class {
2353
+ engine;
2354
+ _batchCount = 0;
2355
+ constructor(config) {
2356
+ this.engine = new TransformEngine(config);
2357
+ }
2358
+ /** Transform a single record. */
2359
+ transformOne(record) {
2360
+ return this.engine.transformDf([record]);
2361
+ }
2362
+ /** Transform a batch of rows. */
2363
+ transformBatch(rows) {
2364
+ this._batchCount++;
2365
+ return this.engine.transformDf(rows);
2366
+ }
2367
+ /** Process rows in chunks, yielding TransformResult per chunk. */
2368
+ *streamRows(rows, chunkSize = 1e4) {
2369
+ for (let start = 0; start < rows.length; start += chunkSize) {
2370
+ const batch = rows.slice(start, start + chunkSize);
2371
+ this._batchCount++;
2372
+ yield this.engine.transformDf(batch);
2373
+ }
2374
+ }
2375
+ get batchesProcessed() {
2376
+ return this._batchCount;
2377
+ }
2378
+ };
2379
+
2380
+ // src/core/config/schema.ts
2381
+ init_types();
2382
+ init_types();
2383
+ function validateConfig(raw) {
2384
+ const transforms = Array.isArray(raw["transforms"]) ? raw["transforms"].map((t) => ({
2385
+ column: String(t["column"] ?? ""),
2386
+ ops: Array.isArray(t["ops"]) ? t["ops"].map(String) : []
2387
+ })) : [];
2388
+ const splits = Array.isArray(raw["splits"]) ? raw["splits"].map((s) => ({
2389
+ source: String(s["source"] ?? ""),
2390
+ target: Array.isArray(s["target"]) ? s["target"].map(String) : [],
2391
+ method: String(s["method"] ?? "")
2392
+ })) : [];
2393
+ const renames = raw["renames"] && typeof raw["renames"] === "object" ? Object.fromEntries(
2394
+ Object.entries(raw["renames"]).map(
2395
+ ([k, v]) => [k, String(v)]
2396
+ )
2397
+ ) : {};
2398
+ const drop = Array.isArray(raw["drop"]) ? raw["drop"].map(String) : [];
2399
+ const filters = Array.isArray(raw["filters"]) ? raw["filters"].map((f) => ({
2400
+ column: String(f["column"] ?? ""),
2401
+ condition: String(f["condition"] ?? "")
2402
+ })) : [];
2403
+ const dedupRaw = raw["dedup"];
2404
+ const dedup = dedupRaw && typeof dedupRaw === "object" ? {
2405
+ columns: Array.isArray(dedupRaw["columns"]) ? dedupRaw["columns"].map(String) : [],
2406
+ keep: dedupRaw["keep"] === "last" ? "last" : "first"
2407
+ } : null;
2408
+ const mappings = Array.isArray(raw["mappings"]) ? raw["mappings"].map((m) => ({
2409
+ source: String(m["source"] ?? ""),
2410
+ target: m["target"],
2411
+ transform: m["transform"] ?? null
2412
+ })) : [];
2413
+ return makeConfig({
2414
+ source: raw["source"] != null ? String(raw["source"]) : null,
2415
+ output: raw["output"] != null ? String(raw["output"]) : null,
2416
+ transforms,
2417
+ splits,
2418
+ renames,
2419
+ drop,
2420
+ filters,
2421
+ dedup,
2422
+ mappings
2423
+ });
2424
+ }
2425
+
2426
+ // src/core/config/loader.ts
2427
+ init_types();
2428
+ var yamlModule = null;
2429
+ function getYaml() {
2430
+ if (yamlModule) return yamlModule;
2431
+ try {
2432
+ yamlModule = __require("yaml");
2433
+ } catch {
2434
+ }
2435
+ return yamlModule;
2436
+ }
2437
+ function loadConfigFromString(content) {
2438
+ const yaml = getYaml();
2439
+ if (!yaml) {
2440
+ throw new Error("yaml package is required for config loading. Install with: npm install yaml");
2441
+ }
2442
+ const data = yaml.parse(content);
2443
+ if (data === null || data === void 0) return makeConfig();
2444
+ if (typeof data !== "object" || Array.isArray(data)) {
2445
+ throw new Error(`Config file is not a valid YAML object (got ${Array.isArray(data) ? "array" : typeof data})`);
2446
+ }
2447
+ return validateConfig(data);
2448
+ }
2449
+ function saveConfigToString(config) {
2450
+ const yaml = getYaml();
2451
+ if (!yaml) {
2452
+ throw new Error("yaml package is required for config saving. Install with: npm install yaml");
2453
+ }
2454
+ const data = {};
2455
+ if (config.source) data["source"] = config.source;
2456
+ if (config.output) data["output"] = config.output;
2457
+ if (config.transforms.length > 0) data["transforms"] = config.transforms;
2458
+ if (config.splits.length > 0) data["splits"] = config.splits;
2459
+ if (Object.keys(config.renames).length > 0) data["renames"] = config.renames;
2460
+ if (config.drop.length > 0) data["drop"] = config.drop;
2461
+ if (config.filters.length > 0) data["filters"] = config.filters;
2462
+ if (config.dedup) data["dedup"] = config.dedup;
2463
+ if (config.mappings.length > 0) data["mappings"] = config.mappings;
2464
+ return yaml.stringify(data);
2465
+ }
2466
+ function mergeConfigs(fileConfig, cliOverrides) {
2467
+ return makeConfig({ ...fileConfig, ...cliOverrides });
2468
+ }
2469
+
2470
+ // src/core/config/learner.ts
2471
+ init_types();
2472
+ function learnConfig(rows, source = "") {
2473
+ const profile = profileDataframe(rows, source);
2474
+ const transforms = [];
2475
+ for (const colProfile of profile.columns) {
2476
+ const selected = selectTransforms(colProfile);
2477
+ if (selected.length > 0) {
2478
+ transforms.push({
2479
+ column: colProfile.name,
2480
+ ops: selected.map((t) => t.name)
2481
+ });
2482
+ }
2483
+ }
2484
+ return makeConfig({
2485
+ source: source || null,
2486
+ transforms
2487
+ });
2488
+ }
2489
+
2490
+ // src/core/mapping/name-similarity.ts
2491
+ var ALIASES = {
2492
+ first_name: ["fname", "first", "given_name", "first_nm"],
2493
+ last_name: ["lname", "last", "surname", "family_name", "last_nm"],
2494
+ email: ["email_address", "e_mail", "email_addr", "mail"],
2495
+ phone: ["phone_number", "ph", "telephone", "tel", "mobile", "cell"],
2496
+ address: ["addr", "street_address", "addr_line_1", "address_line_1"],
2497
+ city: ["town", "municipality"],
2498
+ state: ["st", "province", "region"],
2499
+ zip: ["zipcode", "zip_code", "postal_code", "postal"],
2500
+ name: ["full_name", "fullname", "customer_name"],
2501
+ created_at: ["signup_date", "signup_dt", "create_date", "date_created"]
2502
+ };
2503
+ var _ALIAS_LOOKUP = /* @__PURE__ */ new Map();
2504
+ for (const [canonical, aliases] of Object.entries(ALIASES)) {
2505
+ for (const alias of aliases) {
2506
+ _ALIAS_LOOKUP.set(alias.toLowerCase(), canonical.toLowerCase());
2507
+ }
2508
+ _ALIAS_LOOKUP.set(canonical.toLowerCase(), canonical.toLowerCase());
2509
+ }
2510
+ function fuzzyWRatio(a, b) {
2511
+ if (a === b) return 100;
2512
+ if (a.length === 0 || b.length === 0) return 0;
2513
+ const maxLen = Math.max(a.length, b.length);
2514
+ const prev = new Array(b.length + 1);
2515
+ const curr = new Array(b.length + 1);
2516
+ for (let j = 0; j <= b.length; j++) prev[j] = j;
2517
+ for (let i = 1; i <= a.length; i++) {
2518
+ curr[0] = i;
2519
+ for (let j = 1; j <= b.length; j++) {
2520
+ const cost = a[i - 1] === b[j - 1] ? 0 : 1;
2521
+ curr[j] = Math.min(prev[j] + 1, curr[j - 1] + 1, prev[j - 1] + cost);
2522
+ }
2523
+ for (let j = 0; j <= b.length; j++) prev[j] = curr[j];
2524
+ }
2525
+ const distance = prev[b.length];
2526
+ return 100 * (1 - distance / maxLen);
2527
+ }
2528
+ function nameSimilarity(source, target) {
2529
+ const sLower = source.toLowerCase().trim();
2530
+ const tLower = target.toLowerCase().trim();
2531
+ if (sLower === tLower) return 1;
2532
+ const sCanonical = _ALIAS_LOOKUP.get(sLower);
2533
+ const tCanonical = _ALIAS_LOOKUP.get(tLower);
2534
+ if (sCanonical && tCanonical && sCanonical === tCanonical) return 0.95;
2535
+ return fuzzyWRatio(sLower, tLower) / 100;
2536
+ }
2537
+
2538
+ // src/core/mapping/profile-similarity.ts
2539
+ function profileSimilarity(source, target) {
2540
+ let score = 0;
2541
+ let weights = 0;
2542
+ if (source.inferredType === target.inferredType) score += 0.4;
2543
+ weights += 0.4;
2544
+ const nullDiff = Math.abs(source.nullPct - target.nullPct);
2545
+ score += 0.2 * Math.max(0, 1 - nullDiff);
2546
+ weights += 0.2;
2547
+ const uniqueDiff = Math.abs(source.uniquePct - target.uniquePct);
2548
+ score += 0.2 * Math.max(0, 1 - uniqueDiff);
2549
+ weights += 0.2;
2550
+ if (source.uniqueCount > 0 && target.uniqueCount > 0) {
2551
+ const ratio = Math.min(source.uniqueCount, target.uniqueCount) / Math.max(source.uniqueCount, target.uniqueCount);
2552
+ score += 0.2 * ratio;
2553
+ }
2554
+ weights += 0.2;
2555
+ return weights > 0 ? score / weights : 0;
2556
+ }
2557
+
2558
+ // src/core/mapping/schema-mapper.ts
2559
+ init_types();
2560
+ var SchemaMapper = class {
2561
+ autoThreshold;
2562
+ suggestThreshold;
2563
+ constructor(autoThreshold = 0.9, suggestThreshold = 0.6) {
2564
+ this.autoThreshold = autoThreshold;
2565
+ this.suggestThreshold = suggestThreshold;
2566
+ }
2567
+ map(sourceRows, targetRows) {
2568
+ const sourceProfile = profileDataframe(sourceRows);
2569
+ const targetProfile = profileDataframe(targetRows);
2570
+ const sourceProfiles = new Map(sourceProfile.columns.map((c) => [c.name, c]));
2571
+ const targetProfiles = new Map(targetProfile.columns.map((c) => [c.name, c]));
2572
+ const sourceCols = sourceRows.length > 0 ? Object.keys(sourceRows[0]) : [];
2573
+ const targetCols = targetRows.length > 0 ? Object.keys(targetRows[0]) : [];
2574
+ const mappings = [];
2575
+ const usedTargets = /* @__PURE__ */ new Set();
2576
+ for (const sCol of sourceCols) {
2577
+ let bestMatch = null;
2578
+ let bestScore = 0;
2579
+ for (const tCol of targetCols) {
2580
+ if (usedTargets.has(tCol)) continue;
2581
+ const nScore = nameSimilarity(sCol, tCol);
2582
+ let pScore = 0;
2583
+ const sp = sourceProfiles.get(sCol);
2584
+ const tp = targetProfiles.get(tCol);
2585
+ if (sp && tp) {
2586
+ pScore = profileSimilarity(sp, tp);
2587
+ }
2588
+ const combined = 0.7 * nScore + 0.3 * pScore;
2589
+ if (combined > bestScore && combined >= this.suggestThreshold) {
2590
+ bestScore = combined;
2591
+ bestMatch = {
2592
+ source: sCol,
2593
+ target: tCol,
2594
+ confidence: Math.round(combined * 1e3) / 1e3,
2595
+ transform: null
2596
+ };
2597
+ }
2598
+ }
2599
+ if (bestMatch) {
2600
+ mappings.push(bestMatch);
2601
+ usedTargets.add(bestMatch.target);
2602
+ }
2603
+ }
2604
+ return mappings;
2605
+ }
2606
+ toConfig(mappings) {
2607
+ return makeConfig({
2608
+ mappings: mappings.map((m) => ({
2609
+ source: m.source,
2610
+ target: m.target,
2611
+ transform: m.transform
2612
+ }))
2613
+ });
2614
+ }
2615
+ };
2616
+
2617
+ // src/core/domains/index.ts
2618
+ var DOMAIN_LOADERS = {
2619
+ people_hr: () => Promise.resolve().then(() => (init_people_hr(), people_hr_exports)),
2620
+ healthcare: () => Promise.resolve().then(() => (init_healthcare(), healthcare_exports)),
2621
+ finance: () => Promise.resolve().then(() => (init_finance(), finance_exports)),
2622
+ ecommerce: () => Promise.resolve().then(() => (init_ecommerce(), ecommerce_exports)),
2623
+ real_estate: () => Promise.resolve().then(() => (init_real_estate(), real_estate_exports))
2624
+ };
2625
+ async function loadDomain(name) {
2626
+ const key = name.toLowerCase().replace(/-/g, "_").replace(/\//g, "_");
2627
+ const loader = DOMAIN_LOADERS[key];
2628
+ if (!loader) return null;
2629
+ const mod = await loader();
2630
+ return mod.PACK;
2631
+ }
2632
+ function listDomains() {
2633
+ return Object.keys(DOMAIN_LOADERS);
2634
+ }
2635
+
2636
+ // src/core/reporters/json-reporter.ts
2637
+ init_types();
2638
+ function manifestToJson(manifest) {
2639
+ if (manifest instanceof MutableManifest) {
2640
+ return JSON.stringify(manifest.toDict(), null, 2);
2641
+ }
2642
+ return JSON.stringify(manifest, null, 2);
2643
+ }
2644
+
2645
+ // src/core/reporters/console.ts
2646
+ var BOLD = "\x1B[1m";
2647
+ var DIM = "\x1B[2m";
2648
+ var RED = "\x1B[31m";
2649
+ var GREEN = "\x1B[32m";
2650
+ var YELLOW = "\x1B[33m";
2651
+ var CYAN = "\x1B[36m";
2652
+ var MAGENTA = "\x1B[35m";
2653
+ var RESET = "\x1B[0m";
2654
+ function printProfile(profile) {
2655
+ console.log(`
2656
+ ${BOLD}Profile: ${profile.filePath || "<dataframe>"}${RESET}
2657
+ `);
2658
+ console.log(` ${"Column".padEnd(20)} ${"Type".padEnd(12)} ${"Nulls".padEnd(15)} ${"Unique".padEnd(10)} Sample`);
2659
+ console.log(` ${"\u2500".repeat(20)} ${"\u2500".repeat(12)} ${"\u2500".repeat(15)} ${"\u2500".repeat(10)} ${"\u2500".repeat(20)}`);
2660
+ for (const col of profile.columns) {
2661
+ const pct = (col.nullPct * 100).toFixed(0);
2662
+ console.log(
2663
+ ` ${CYAN}${col.name.padEnd(20)}${RESET} ${GREEN}${col.inferredType.padEnd(12)}${RESET} ${YELLOW}${`${col.nullCount} (${pct}%)`.padEnd(15)}${RESET} ${MAGENTA}${String(col.uniqueCount).padEnd(10)}${RESET} ${DIM}${col.sampleValues.slice(0, 3).join(", ")}${RESET}`
2664
+ );
2665
+ }
2666
+ console.log(`
2667
+ ${BOLD}${profile.rowCount}${RESET} rows, ${BOLD}${profile.columnCount}${RESET} columns`);
2668
+ }
2669
+ function printManifest(manifest) {
2670
+ if (manifest.records.length === 0 && manifest.errors.length === 0) {
2671
+ console.log(`${DIM}No transforms applied.${RESET}`);
2672
+ return;
2673
+ }
2674
+ console.log(`
2675
+ ${BOLD}Transforms Applied${RESET}
2676
+ `);
2677
+ console.log(` ${"Column".padEnd(20)} ${"Transform".padEnd(22)} ${"Affected".padEnd(12)} ${"Before".padEnd(20)} After`);
2678
+ console.log(` ${"\u2500".repeat(20)} ${"\u2500".repeat(22)} ${"\u2500".repeat(12)} ${"\u2500".repeat(20)} ${"\u2500".repeat(20)}`);
2679
+ for (const r of manifest.records) {
2680
+ const before = r.sampleBefore.slice(0, 2).join(", ");
2681
+ const after = r.sampleAfter.slice(0, 2).join(", ");
2682
+ console.log(
2683
+ ` ${CYAN}${r.column.padEnd(20)}${RESET} ${GREEN}${r.transform.padEnd(22)}${RESET} ${YELLOW}${`${r.affectedRows}/${r.totalRows}`.padEnd(12)}${RESET} ${DIM}${before.padEnd(20)}${RESET} ${BOLD}${after}${RESET}`
2684
+ );
2685
+ }
2686
+ if (manifest.errors.length > 0) {
2687
+ console.log(`
2688
+ ${RED}${BOLD}${manifest.errors.length} errors:${RESET}`);
2689
+ for (const e of manifest.errors) {
2690
+ console.log(` ${RED}${e.column}${RESET} / ${e.transform}: ${e.error}`);
2691
+ }
2692
+ }
2693
+ }
2694
+ function printDiff(diff) {
2695
+ console.log(`Rows: ${diff.rowCountBefore} \u2192 ${diff.rowCountAfter}`);
2696
+ console.log(`Total changes: ${BOLD}${diff.totalChanges}${RESET}`);
2697
+ if (diff.addedColumns.length) console.log(`Added columns: ${GREEN}${diff.addedColumns.join(", ")}${RESET}`);
2698
+ if (diff.removedColumns.length) console.log(`Removed columns: ${RED}${diff.removedColumns.join(", ")}${RESET}`);
2699
+ if (diff.changedColumns.length) console.log(`Changed columns: ${YELLOW}${diff.changedColumns.join(", ")}${RESET}`);
2700
+ }
2701
+
2702
+ // src/core/llm/corrector.ts
2703
+ init_registry();
2704
+ var _correctionsCache = /* @__PURE__ */ new Map();
2705
+ function getValueSummary(values, max = 30) {
2706
+ const counts = /* @__PURE__ */ new Map();
2707
+ for (const v of values) {
2708
+ if (v === null || typeof v !== "string") continue;
2709
+ const trimmed = v.trim();
2710
+ if (!trimmed) continue;
2711
+ counts.set(trimmed, (counts.get(trimmed) ?? 0) + 1);
2712
+ }
2713
+ const sorted = [...counts.entries()].sort((a, b) => b[1] - a[1]).slice(0, max);
2714
+ return Object.fromEntries(sorted);
2715
+ }
2716
+ function buildPrompt(columnName, valueSummary) {
2717
+ return `You are a data quality expert. Analyze this column and identify values that appear to be misspellings, abbreviations, or variants of other values in the same column.
2718
+
2719
+ Column name: ${columnName}
2720
+ Value frequencies (value: count):
2721
+ ${JSON.stringify(valueSummary, null, 2)}
2722
+
2723
+ For each incorrect value, provide the corrected canonical form. Only include values that need correction. Return JSON object mapping incorrect values to their corrections.
2724
+
2725
+ Example response:
2726
+ {"actve": "active", "ACTIVE": "active", "pendng": "pending"}
2727
+
2728
+ Return ONLY the JSON object, no other text.`;
2729
+ }
2730
+ function envVar(key) {
2731
+ if (typeof process !== "undefined" && process.env) {
2732
+ return process.env[key];
2733
+ }
2734
+ return void 0;
2735
+ }
2736
+ function validateCorrections(parsed) {
2737
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return {};
2738
+ const result = {};
2739
+ for (const [k, v] of Object.entries(parsed)) {
2740
+ if (typeof k === "string" && typeof v === "string") result[k] = v;
2741
+ }
2742
+ return result;
2743
+ }
2744
+ async function askLlmForCorrections(columnName, valueSummary) {
2745
+ const prompt = buildPrompt(columnName, valueSummary);
2746
+ const anthropicKey = envVar("ANTHROPIC_API_KEY");
2747
+ const openaiKey = envVar("OPENAI_API_KEY");
2748
+ try {
2749
+ if (anthropicKey) {
2750
+ const resp = await fetch("https://api.anthropic.com/v1/messages", {
2751
+ method: "POST",
2752
+ headers: {
2753
+ "Content-Type": "application/json",
2754
+ "x-api-key": anthropicKey,
2755
+ "anthropic-version": "2023-06-01"
2756
+ },
2757
+ body: JSON.stringify({
2758
+ model: "claude-sonnet-4-5-20250514",
2759
+ max_tokens: 1024,
2760
+ messages: [{ role: "user", content: prompt }]
2761
+ })
2762
+ });
2763
+ if (!resp.ok) {
2764
+ console.warn(`[goldenflow:llm] Anthropic API error: ${resp.status} ${resp.statusText}`);
2765
+ return {};
2766
+ }
2767
+ const data = await resp.json();
2768
+ const text = data.content?.[0]?.text ?? "";
2769
+ if (!text) {
2770
+ console.warn("[goldenflow:llm] Anthropic returned empty response");
2771
+ return {};
2772
+ }
2773
+ return validateCorrections(JSON.parse(text));
2774
+ }
2775
+ if (openaiKey) {
2776
+ const resp = await fetch("https://api.openai.com/v1/chat/completions", {
2777
+ method: "POST",
2778
+ headers: {
2779
+ "Content-Type": "application/json",
2780
+ Authorization: `Bearer ${openaiKey}`
2781
+ },
2782
+ body: JSON.stringify({
2783
+ model: "gpt-4o-mini",
2784
+ messages: [{ role: "user", content: prompt }],
2785
+ response_format: { type: "json_object" }
2786
+ })
2787
+ });
2788
+ if (!resp.ok) {
2789
+ console.warn(`[goldenflow:llm] OpenAI API error: ${resp.status} ${resp.statusText}`);
2790
+ return {};
2791
+ }
2792
+ const data = await resp.json();
2793
+ const text = data.choices?.[0]?.message?.content ?? "";
2794
+ if (!text) {
2795
+ console.warn("[goldenflow:llm] OpenAI returned empty response");
2796
+ return {};
2797
+ }
2798
+ return validateCorrections(JSON.parse(text));
2799
+ }
2800
+ } catch (e) {
2801
+ console.warn(
2802
+ `[goldenflow:llm] LLM correction failed: ${e instanceof Error ? e.message : String(e)}`
2803
+ );
2804
+ }
2805
+ return {};
2806
+ }
2807
+ async function prepareLlmCorrections(columnName, values) {
2808
+ const summary = getValueSummary(values);
2809
+ if (Object.keys(summary).length === 0) return {};
2810
+ const corrections = await askLlmForCorrections(columnName, summary);
2811
+ if (Object.keys(corrections).length > 0) {
2812
+ _correctionsCache.set(columnName, new Map(Object.entries(corrections)));
2813
+ }
2814
+ return corrections;
2815
+ }
2816
+ async function applyLlmCorrections(columnName, values) {
2817
+ if (!_correctionsCache.has(columnName)) {
2818
+ await prepareLlmCorrections(columnName, values);
2819
+ }
2820
+ const map = _correctionsCache.get(columnName);
2821
+ if (!map || map.size === 0) return [...values];
2822
+ return values.map((v) => {
2823
+ if (v === null || typeof v !== "string") return v;
2824
+ const trimmed = v.trim();
2825
+ return map.get(trimmed) ?? v;
2826
+ });
2827
+ }
2828
+ function categoryLlmCorrect(values, ...params) {
2829
+ const columnName = typeof params[0] === "string" ? params[0] : "__default__";
2830
+ const map = _correctionsCache.get(columnName);
2831
+ if (!map || map.size === 0) return [...values];
2832
+ return values.map((v) => {
2833
+ if (v === null || typeof v !== "string") return v;
2834
+ const trimmed = v.trim();
2835
+ return map.get(trimmed) ?? v;
2836
+ });
2837
+ }
2838
+ registerTransform(
2839
+ {
2840
+ name: "category_llm_correct",
2841
+ inputTypes: ["string"],
2842
+ autoApply: false,
2843
+ priority: 34,
2844
+ mode: "series"
2845
+ },
2846
+ categoryLlmCorrect
2847
+ );
2848
+
2849
+ // src/core/notebook.ts
2850
+ function transformResultToHtml(result) {
2851
+ const rows = result.rows.length;
2852
+ const cols = result.columns.length;
2853
+ const transforms = result.manifest.records.length;
2854
+ const errors = result.manifest.errors.length;
2855
+ let html = `<div style="font-family: monospace; padding: 10px; border: 1px solid #ddd; border-radius: 5px;">
2856
+ <h3 style="margin: 0 0 10px 0;">GoldenFlow TransformResult</h3>
2857
+ <table style="border-collapse: collapse; width: 100%;">
2858
+ <tr><td style="padding: 4px 8px; font-weight: bold;">Rows</td><td>${rows.toLocaleString()}</td></tr>
2859
+ <tr><td style="padding: 4px 8px; font-weight: bold;">Columns</td><td>${cols}</td></tr>
2860
+ <tr><td style="padding: 4px 8px; font-weight: bold;">Transforms Applied</td><td>${transforms}</td></tr>
2861
+ <tr><td style="padding: 4px 8px; font-weight: bold;">Errors</td><td style="color: ${errors ? "red" : "green"};">${errors}</td></tr>
2862
+ </table>`;
2863
+ if (result.manifest.records.length > 0) {
2864
+ html += `<h4 style="margin: 10px 0 5px 0;">Transforms</h4>
2865
+ <table style="border-collapse: collapse; width: 100%; font-size: 0.9em;">
2866
+ <tr style="background: #f5f5f5;">
2867
+ <th style="padding: 4px 8px; text-align: left;">Column</th>
2868
+ <th style="padding: 4px 8px; text-align: left;">Transform</th>
2869
+ <th style="padding: 4px 8px; text-align: left;">Affected</th>
2870
+ </tr>`;
2871
+ const shown = result.manifest.records.slice(0, 10);
2872
+ for (const r of shown) {
2873
+ html += `<tr>
2874
+ <td style="padding: 4px 8px;">${r.column}</td>
2875
+ <td style="padding: 4px 8px;">${r.transform}</td>
2876
+ <td style="padding: 4px 8px;">${r.affectedRows}/${r.totalRows}</td>
2877
+ </tr>`;
2878
+ }
2879
+ if (result.manifest.records.length > 10) {
2880
+ html += `<tr><td colspan="3" style="padding: 4px 8px; color: #888;">... and ${result.manifest.records.length - 10} more</td></tr>`;
2881
+ }
2882
+ html += "</table>";
2883
+ }
2884
+ html += "</div>";
2885
+ return html;
2886
+ }
2887
+ function manifestToHtml(manifest) {
2888
+ let html = `<div style="font-family: monospace; padding: 10px; border: 1px solid #ddd; border-radius: 5px;">
2889
+ <h3>GoldenFlow Manifest</h3>
2890
+ <p>Source: ${manifest.source} | Transforms: ${manifest.records.length} | Errors: ${manifest.errors.length}</p>
2891
+ <table style="border-collapse: collapse; width: 100%; font-size: 0.9em;">
2892
+ <tr style="background: #f5f5f5;">
2893
+ <th style="padding: 4px 8px; text-align: left;">Column</th>
2894
+ <th style="padding: 4px 8px; text-align: left;">Transform</th>
2895
+ <th style="padding: 4px 8px; text-align: left;">Affected</th>
2896
+ <th style="padding: 4px 8px; text-align: left;">Before</th>
2897
+ <th style="padding: 4px 8px; text-align: left;">After</th>
2898
+ </tr>`;
2899
+ for (const r of manifest.records) {
2900
+ const before = r.sampleBefore.slice(0, 2).join(", ");
2901
+ const after = r.sampleAfter.slice(0, 2).join(", ");
2902
+ html += `<tr>
2903
+ <td style="padding: 4px 8px;">${r.column}</td>
2904
+ <td style="padding: 4px 8px;">${r.transform}</td>
2905
+ <td style="padding: 4px 8px;">${r.affectedRows}/${r.totalRows}</td>
2906
+ <td style="padding: 4px 8px; color: #c00;">${before}</td>
2907
+ <td style="padding: 4px 8px; color: #0a0;">${after}</td>
2908
+ </tr>`;
2909
+ }
2910
+ html += "</table></div>";
2911
+ return html;
2912
+ }
2913
+ function profileToHtml(profile) {
2914
+ let html = `<div style="font-family: monospace; padding: 10px; border: 1px solid #ddd; border-radius: 5px;">
2915
+ <h3>GoldenFlow Profile</h3>
2916
+ <p>${profile.rowCount.toLocaleString()} rows, ${profile.columnCount} columns</p>
2917
+ <table style="border-collapse: collapse; width: 100%; font-size: 0.9em;">
2918
+ <tr style="background: #f5f5f5;">
2919
+ <th style="padding: 4px 8px; text-align: left;">Column</th>
2920
+ <th style="padding: 4px 8px; text-align: left;">Type</th>
2921
+ <th style="padding: 4px 8px; text-align: left;">Nulls</th>
2922
+ <th style="padding: 4px 8px; text-align: left;">Unique</th>
2923
+ <th style="padding: 4px 8px; text-align: left;">Sample</th>
2924
+ </tr>`;
2925
+ for (const c of profile.columns) {
2926
+ const pct = (c.nullPct * 100).toFixed(0);
2927
+ html += `<tr>
2928
+ <td style="padding: 4px 8px;">${c.name}</td>
2929
+ <td style="padding: 4px 8px;">${c.inferredType}</td>
2930
+ <td style="padding: 4px 8px;">${c.nullCount} (${pct}%)</td>
2931
+ <td style="padding: 4px 8px;">${c.uniqueCount}</td>
2932
+ <td style="padding: 4px 8px; color: #888;">${c.sampleValues.slice(0, 3).join(", ")}</td>
2933
+ </tr>`;
2934
+ }
2935
+ html += "</table></div>";
2936
+ return html;
2937
+ }
2938
+
2939
+ export { FINDING_TRANSFORM_MAP, MutableManifest, SchemaMapper, StreamProcessor, TabularData, TransformEngine, applyLlmCorrections, diffDataframes, getTransform, isNullish, learnConfig, listDomains, listTransforms, loadConfigFromString, loadDomain, makeColumnProfile, makeConfig, makeManifest, makeTransformRecord, manifestToHtml, manifestToJson, mergeConfigs, nameSimilarity, parseTransformName, prepareLlmCorrections, printDiff, printManifest, printProfile, profileDataframe, profileSimilarity, profileToHtml, registerTransform, registry, saveConfigToString, selectFromFindings, selectTransforms, toColumnValue, transformResultToHtml, validateConfig };
2940
+ //# sourceMappingURL=index.js.map
2941
+ //# sourceMappingURL=index.js.map