@mostlyrightmd/core 1.1.3 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +1 -1
  2. package/dist/discovery/index.cjs +44 -7
  3. package/dist/discovery/index.cjs.map +1 -1
  4. package/dist/discovery/index.d.cts +34 -6
  5. package/dist/discovery/index.d.ts +34 -6
  6. package/dist/discovery/index.mjs +44 -7
  7. package/dist/discovery/index.mjs.map +1 -1
  8. package/dist/index.cjs +39 -1
  9. package/dist/index.cjs.map +1 -1
  10. package/dist/index.d.cts +16 -2
  11. package/dist/index.d.ts +16 -2
  12. package/dist/index.global.js +37 -1
  13. package/dist/index.global.js.map +1 -1
  14. package/dist/index.mjs +37 -1
  15. package/dist/index.mjs.map +1 -1
  16. package/dist/internal/cache/index.browser.cjs +77 -4
  17. package/dist/internal/cache/index.browser.cjs.map +1 -1
  18. package/dist/internal/cache/index.browser.d.cts +8 -2
  19. package/dist/internal/cache/index.browser.d.ts +8 -2
  20. package/dist/internal/cache/index.browser.mjs +10 -5
  21. package/dist/internal/cache/index.browser.mjs.map +1 -1
  22. package/dist/internal/cache/index.cjs +79 -2
  23. package/dist/internal/cache/index.cjs.map +1 -1
  24. package/dist/internal/cache/index.d.cts +12 -6
  25. package/dist/internal/cache/index.d.ts +12 -6
  26. package/dist/internal/cache/index.mjs +12 -3
  27. package/dist/internal/cache/index.mjs.map +1 -1
  28. package/dist/internal/{chunk-PKJXHY27.mjs → chunk-IPC4XUYW.mjs} +70 -1
  29. package/dist/internal/chunk-IPC4XUYW.mjs.map +1 -0
  30. package/dist/internal/{keys-B7C8C88N.d.cts → versionedCacheStore-DyHDqFIC.d.cts} +23 -1
  31. package/dist/internal/{keys-B7C8C88N.d.ts → versionedCacheStore-DyHDqFIC.d.ts} +23 -1
  32. package/dist/preprocessing/index.cjs +150 -0
  33. package/dist/preprocessing/index.cjs.map +1 -0
  34. package/dist/preprocessing/index.d.cts +111 -0
  35. package/dist/preprocessing/index.d.ts +111 -0
  36. package/dist/preprocessing/index.mjs +121 -0
  37. package/dist/preprocessing/index.mjs.map +1 -0
  38. package/dist/temporal/index.cjs.map +1 -1
  39. package/dist/temporal/index.mjs.map +1 -1
  40. package/dist/validator.cjs.map +1 -1
  41. package/dist/validator.mjs.map +1 -1
  42. package/package.json +25 -2
  43. package/dist/internal/chunk-PKJXHY27.mjs.map +0 -1
@@ -0,0 +1,111 @@
1
+ /**
2
+ * Physics-based clipping defaults for canonical observation columns.
3
+ *
4
+ * Mirrors Python `mostlyright.preprocessing.PHYSICS_BOUNDS` (preprocessing.py:34-46).
5
+ * Values are `[min, max]` tuples in canonical units (°C for temp, m/s and kt
6
+ * for wind, hPa for pressure, percent for humidity, mm for precip).
7
+ *
8
+ * Both `dew_point_c`/`dewpoint_c` and `wind_dir_deg`/`wind_dir_degrees` are
9
+ * aliased to support legacy + canonical column names.
10
+ */
11
+ declare const PHYSICS_BOUNDS: ReadonlyMap<string, readonly [number, number]>;
12
+ interface ClipOutliersOptions {
13
+ /** Explicit `[lo, hi]` range. Overrides PHYSICS_BOUNDS and sigma fallback. */
14
+ bounds?: readonly [number, number];
15
+ /** Sigma multiplier for the fallback branch. Default 3.0. Must be > 0. */
16
+ std?: number;
17
+ }
18
+ /**
19
+ * Winsorize a numeric column.
20
+ *
21
+ * Mirrors Python `mostlyright.preprocessing.clip_outliers`. Returns rows with
22
+ * a derived `{col}_clipped` column; the source `col` is preserved unchanged.
23
+ *
24
+ * Decision tree:
25
+ * - `opts.bounds` set → clip to explicit `[lo, hi]`
26
+ * - `PHYSICS_BOUNDS.has(col)` → clip to physics defaults
27
+ * - else → sigma fallback (`mu ± std*sigma`)
28
+ *
29
+ * **Phase 3.5 review-iter fixes:**
30
+ * - Throws `RangeError` if `std ≤ 0` in the sigma fallback (matches Python
31
+ * `ValueError` at preprocessing.py:84-88; silent dataset corruption
32
+ * otherwise).
33
+ * - Sigma=0 pass-through: when all values are identical, sample sigma is
34
+ * zero and the clamp `[mu, mu]` would collapse the column. Pass values
35
+ * through unchanged instead.
36
+ *
37
+ * @param rows input rows (NOT mutated; pure function)
38
+ * @param col column to clip
39
+ * @param opts optional bounds / std overrides; defaults: PHYSICS_BOUNDS or sigma=3
40
+ * @returns new array of rows, each carrying `{col}_clipped`
41
+ * @throws RangeError if sigma fallback would use `std <= 0` or non-finite std
42
+ */
43
+ declare function clipOutliers<Row extends Record<string, unknown>>(rows: ReadonlyArray<Row>, col: string, opts?: ClipOutliersOptions): ReadonlyArray<Row & Record<string, number | null>>;
44
+
45
+ /** Options for {@link crosscheckIemGhcnh}. */
46
+ interface CrosscheckOptions {
47
+ /**
48
+ * Maximum acceptable absolute delta in °C between paired IEM/GHCNh
49
+ * `temp_c` values. Defaults to `2.0` °C (matches Python
50
+ * `crosscheck_iem_ghcnh(tol_c=2.0)`). A delta strictly greater than
51
+ * `tolC` produces a disagreement row; equality does NOT.
52
+ */
53
+ tolC?: number;
54
+ }
55
+ /**
56
+ * Disagreement row emitted by {@link crosscheckIemGhcnh}. Keys are
57
+ * camelCase per the TS-idiom Parity-Ticket; Python's snake_case
58
+ * equivalents are `event_time`, `temp_c_iem`, `temp_c_ghcnh`, `delta_c`.
59
+ */
60
+ interface CrosscheckDisagreement {
61
+ readonly station: string;
62
+ readonly eventTime: string;
63
+ readonly tempCIem: number;
64
+ readonly tempCGhcnh: number;
65
+ readonly deltaC: number;
66
+ }
67
+ /**
68
+ * Minimal row shape consumed by {@link crosscheckIemGhcnh}. Rows MUST
69
+ * carry `station: string`, `eventTime: string`, and `temp_c: number |
70
+ * null` (or `undefined`/non-finite, which are skipped). Additional keys
71
+ * are allowed and ignored.
72
+ */
73
+ interface CrosscheckRowIn {
74
+ station?: unknown;
75
+ eventTime?: unknown;
76
+ temp_c?: unknown;
77
+ }
78
+ /**
79
+ * Cross-check IEM and GHCNh temperatures; return rows where the two
80
+ * sources disagree above `opts.tolC` (default 2.0 °C).
81
+ *
82
+ * Algorithm:
83
+ * 1. If `iemRows.length === 0 || ghcnhRows.length === 0` → return `[]`
84
+ * (matches Python qc.py:212-215).
85
+ * 2. Validate `station` + `eventTime` present (string) on every input
86
+ * row; throw `Error` on first violation (parity with Python
87
+ * `ValueError` at qc.py:217-220).
88
+ * 3. Build `iemMap: Map<string, IemRow>` keyed by
89
+ * `${row.station}|${row.eventTime}`. On duplicate keys, LAST iem row
90
+ * wins — deterministic but a documented deviation from Python's
91
+ * `pd.merge` (which would cartesian-product duplicates).
92
+ * 4. For each GHCNh row, look up the matching IEM row by composite key.
93
+ * If missing → skip. If either `temp_c` is null / non-finite →
94
+ * skip.
95
+ * 5. If `Math.abs(iem.temp_c - ghcnh.temp_c) > tolC` → emit a
96
+ * disagreement row. STRICT `>` (NOT `>=`).
97
+ *
98
+ * Output array order matches the iteration order of `ghcnhRows`
99
+ * (deterministic, independent of `iemRows` order).
100
+ *
101
+ * Pure: input arrays are NOT mutated.
102
+ *
103
+ * @param iemRows IEM observation rows.
104
+ * @param ghcnhRows GHCNh observation rows.
105
+ * @param opts Tolerance options. `tolC` default = 2.0.
106
+ * @throws Error if any iem or ghcnh row is missing `station` or
107
+ * `eventTime` (or they are not strings).
108
+ */
109
+ declare function crosscheckIemGhcnh(iemRows: ReadonlyArray<CrosscheckRowIn>, ghcnhRows: ReadonlyArray<CrosscheckRowIn>, opts?: CrosscheckOptions): ReadonlyArray<CrosscheckDisagreement>;
110
+
111
+ export { type ClipOutliersOptions, type CrosscheckDisagreement, type CrosscheckOptions, PHYSICS_BOUNDS, clipOutliers, crosscheckIemGhcnh as iemCrosscheck };
@@ -0,0 +1,111 @@
1
+ /**
2
+ * Physics-based clipping defaults for canonical observation columns.
3
+ *
4
+ * Mirrors Python `mostlyright.preprocessing.PHYSICS_BOUNDS` (preprocessing.py:34-46).
5
+ * Values are `[min, max]` tuples in canonical units (°C for temp, m/s and kt
6
+ * for wind, hPa for pressure, percent for humidity, mm for precip).
7
+ *
8
+ * Both `dew_point_c`/`dewpoint_c` and `wind_dir_deg`/`wind_dir_degrees` are
9
+ * aliased to support legacy + canonical column names.
10
+ */
11
+ declare const PHYSICS_BOUNDS: ReadonlyMap<string, readonly [number, number]>;
12
+ interface ClipOutliersOptions {
13
+ /** Explicit `[lo, hi]` range. Overrides PHYSICS_BOUNDS and sigma fallback. */
14
+ bounds?: readonly [number, number];
15
+ /** Sigma multiplier for the fallback branch. Default 3.0. Must be > 0. */
16
+ std?: number;
17
+ }
18
+ /**
19
+ * Winsorize a numeric column.
20
+ *
21
+ * Mirrors Python `mostlyright.preprocessing.clip_outliers`. Returns rows with
22
+ * a derived `{col}_clipped` column; the source `col` is preserved unchanged.
23
+ *
24
+ * Decision tree:
25
+ * - `opts.bounds` set → clip to explicit `[lo, hi]`
26
+ * - `PHYSICS_BOUNDS.has(col)` → clip to physics defaults
27
+ * - else → sigma fallback (`mu ± std*sigma`)
28
+ *
29
+ * **Phase 3.5 review-iter fixes:**
30
+ * - Throws `RangeError` if `std ≤ 0` in the sigma fallback (matches Python
31
+ * `ValueError` at preprocessing.py:84-88; silent dataset corruption
32
+ * otherwise).
33
+ * - Sigma=0 pass-through: when all values are identical, sample sigma is
34
+ * zero and the clamp `[mu, mu]` would collapse the column. Pass values
35
+ * through unchanged instead.
36
+ *
37
+ * @param rows input rows (NOT mutated; pure function)
38
+ * @param col column to clip
39
+ * @param opts optional bounds / std overrides; defaults: PHYSICS_BOUNDS or sigma=3
40
+ * @returns new array of rows, each carrying `{col}_clipped`
41
+ * @throws RangeError if sigma fallback would use `std <= 0` or non-finite std
42
+ */
43
+ declare function clipOutliers<Row extends Record<string, unknown>>(rows: ReadonlyArray<Row>, col: string, opts?: ClipOutliersOptions): ReadonlyArray<Row & Record<string, number | null>>;
44
+
45
+ /** Options for {@link crosscheckIemGhcnh}. */
46
+ interface CrosscheckOptions {
47
+ /**
48
+ * Maximum acceptable absolute delta in °C between paired IEM/GHCNh
49
+ * `temp_c` values. Defaults to `2.0` °C (matches Python
50
+ * `crosscheck_iem_ghcnh(tol_c=2.0)`). A delta strictly greater than
51
+ * `tolC` produces a disagreement row; equality does NOT.
52
+ */
53
+ tolC?: number;
54
+ }
55
+ /**
56
+ * Disagreement row emitted by {@link crosscheckIemGhcnh}. Keys are
57
+ * camelCase per the TS-idiom Parity-Ticket; Python's snake_case
58
+ * equivalents are `event_time`, `temp_c_iem`, `temp_c_ghcnh`, `delta_c`.
59
+ */
60
+ interface CrosscheckDisagreement {
61
+ readonly station: string;
62
+ readonly eventTime: string;
63
+ readonly tempCIem: number;
64
+ readonly tempCGhcnh: number;
65
+ readonly deltaC: number;
66
+ }
67
+ /**
68
+ * Minimal row shape consumed by {@link crosscheckIemGhcnh}. Rows MUST
69
+ * carry `station: string`, `eventTime: string`, and `temp_c: number |
70
+ * null` (or `undefined`/non-finite, which are skipped). Additional keys
71
+ * are allowed and ignored.
72
+ */
73
+ interface CrosscheckRowIn {
74
+ station?: unknown;
75
+ eventTime?: unknown;
76
+ temp_c?: unknown;
77
+ }
78
+ /**
79
+ * Cross-check IEM and GHCNh temperatures; return rows where the two
80
+ * sources disagree above `opts.tolC` (default 2.0 °C).
81
+ *
82
+ * Algorithm:
83
+ * 1. If `iemRows.length === 0 || ghcnhRows.length === 0` → return `[]`
84
+ * (matches Python qc.py:212-215).
85
+ * 2. Validate `station` + `eventTime` present (string) on every input
86
+ * row; throw `Error` on first violation (parity with Python
87
+ * `ValueError` at qc.py:217-220).
88
+ * 3. Build `iemMap: Map<string, IemRow>` keyed by
89
+ * `${row.station}|${row.eventTime}`. On duplicate keys, LAST iem row
90
+ * wins — deterministic but a documented deviation from Python's
91
+ * `pd.merge` (which would cartesian-product duplicates).
92
+ * 4. For each GHCNh row, look up the matching IEM row by composite key.
93
+ * If missing → skip. If either `temp_c` is null / non-finite →
94
+ * skip.
95
+ * 5. If `Math.abs(iem.temp_c - ghcnh.temp_c) > tolC` → emit a
96
+ * disagreement row. STRICT `>` (NOT `>=`).
97
+ *
98
+ * Output array order matches the iteration order of `ghcnhRows`
99
+ * (deterministic, independent of `iemRows` order).
100
+ *
101
+ * Pure: input arrays are NOT mutated.
102
+ *
103
+ * @param iemRows IEM observation rows.
104
+ * @param ghcnhRows GHCNh observation rows.
105
+ * @param opts Tolerance options. `tolC` default = 2.0.
106
+ * @throws Error if any iem or ghcnh row is missing `station` or
107
+ * `eventTime` (or they are not strings).
108
+ */
109
+ declare function crosscheckIemGhcnh(iemRows: ReadonlyArray<CrosscheckRowIn>, ghcnhRows: ReadonlyArray<CrosscheckRowIn>, opts?: CrosscheckOptions): ReadonlyArray<CrosscheckDisagreement>;
110
+
111
+ export { type ClipOutliersOptions, type CrosscheckDisagreement, type CrosscheckOptions, PHYSICS_BOUNDS, clipOutliers, crosscheckIemGhcnh as iemCrosscheck };
@@ -0,0 +1,121 @@
1
+ // src/transforms/clip.ts
2
+ var PHYSICS_BOUNDS = /* @__PURE__ */ new Map([
3
+ ["temp_c", [-89, 57]],
4
+ ["dew_point_c", [-89, 35]],
5
+ ["dewpoint_c", [-89, 35]],
6
+ ["wind_speed_ms", [0, 100]],
7
+ ["wind_speed_kt", [0, 200]],
8
+ ["wind_dir_deg", [0, 360]],
9
+ ["wind_dir_degrees", [0, 360]],
10
+ ["slp_hpa", [870, 1085]],
11
+ ["sea_level_pressure_mb", [870, 1085]],
12
+ ["relative_humidity_pct_2m", [0, 100]],
13
+ ["precip_mm_1h", [0, 305]]
14
+ ]);
15
+ function clipOutliers(rows, col, opts = {}) {
16
+ const std = opts.std ?? 3;
17
+ const key = `${col}_clipped`;
18
+ let lo;
19
+ let hi;
20
+ let passThrough = false;
21
+ if (opts.bounds !== void 0) {
22
+ [lo, hi] = opts.bounds;
23
+ } else if (PHYSICS_BOUNDS.has(col)) {
24
+ const b = PHYSICS_BOUNDS.get(col);
25
+ if (b === void 0) {
26
+ throw new Error(`PHYSICS_BOUNDS.get(${col}) unexpectedly undefined`);
27
+ }
28
+ [lo, hi] = b;
29
+ } else {
30
+ if (!Number.isFinite(std) || std <= 0) {
31
+ throw new RangeError(
32
+ `clipOutliers: std must be > 0 for the sigma fallback (got ${std}); pass bounds=[lo, hi] or use a physics-default column`
33
+ );
34
+ }
35
+ const vals = [];
36
+ for (const r of rows) {
37
+ const v = r?.[col];
38
+ if (typeof v === "number" && Number.isFinite(v)) vals.push(v);
39
+ }
40
+ if (vals.length < 2) {
41
+ passThrough = true;
42
+ lo = Number.NEGATIVE_INFINITY;
43
+ hi = Number.POSITIVE_INFINITY;
44
+ } else {
45
+ const mu = vals.reduce((a, b) => a + b, 0) / vals.length;
46
+ const sumSq = vals.reduce((a, b) => a + (b - mu) ** 2, 0);
47
+ const sigma = Math.sqrt(sumSq / (vals.length - 1));
48
+ if (sigma === 0 || !Number.isFinite(sigma)) {
49
+ passThrough = true;
50
+ lo = Number.NEGATIVE_INFINITY;
51
+ hi = Number.POSITIVE_INFINITY;
52
+ } else {
53
+ lo = mu - std * sigma;
54
+ hi = mu + std * sigma;
55
+ }
56
+ }
57
+ }
58
+ const out = [];
59
+ for (const r of rows) {
60
+ const v = r?.[col];
61
+ let clipped;
62
+ if (typeof v === "number" && Number.isFinite(v)) {
63
+ clipped = passThrough ? v : Math.min(Math.max(v, lo), hi);
64
+ } else {
65
+ clipped = null;
66
+ }
67
+ out.push({ ...r, [key]: clipped });
68
+ }
69
+ return out;
70
+ }
71
+
72
+ // src/qc/crosscheck.ts
73
+ function crosscheckIemGhcnh(iemRows, ghcnhRows, opts = {}) {
74
+ const tolC = opts.tolC ?? 2;
75
+ if (iemRows.length === 0 || ghcnhRows.length === 0) return [];
76
+ for (const r of iemRows) {
77
+ if (typeof r?.station !== "string" || typeof r?.eventTime !== "string") {
78
+ throw new Error(
79
+ "crosscheckIemGhcnh: iem rows must carry 'station' (string) and 'eventTime' (string) keys"
80
+ );
81
+ }
82
+ }
83
+ for (const r of ghcnhRows) {
84
+ if (typeof r?.station !== "string" || typeof r?.eventTime !== "string") {
85
+ throw new Error(
86
+ "crosscheckIemGhcnh: ghcnh rows must carry 'station' (string) and 'eventTime' (string) keys"
87
+ );
88
+ }
89
+ }
90
+ const iemMap = /* @__PURE__ */ new Map();
91
+ for (const r of iemRows) {
92
+ const key = `${r.station}|${r.eventTime}`;
93
+ iemMap.set(key, r);
94
+ }
95
+ const out = [];
96
+ for (const g of ghcnhRows) {
97
+ const key = `${g.station}|${g.eventTime}`;
98
+ const i = iemMap.get(key);
99
+ if (i === void 0) continue;
100
+ const iT = typeof i.temp_c === "number" && Number.isFinite(i.temp_c) ? i.temp_c : null;
101
+ const gT = typeof g.temp_c === "number" && Number.isFinite(g.temp_c) ? g.temp_c : null;
102
+ if (iT === null || gT === null) continue;
103
+ const delta = Math.abs(iT - gT);
104
+ if (delta > tolC) {
105
+ out.push({
106
+ station: g.station,
107
+ eventTime: g.eventTime,
108
+ tempCIem: iT,
109
+ tempCGhcnh: gT,
110
+ deltaC: delta
111
+ });
112
+ }
113
+ }
114
+ return out;
115
+ }
116
+ export {
117
+ PHYSICS_BOUNDS,
118
+ clipOutliers,
119
+ crosscheckIemGhcnh as iemCrosscheck
120
+ };
121
+ //# sourceMappingURL=index.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/transforms/clip.ts","../../src/qc/crosscheck.ts"],"sourcesContent":["// TS-W4 Plan 04 Task 2 — clipOutliers (winsorize) + PHYSICS_BOUNDS.\n//\n// Pure row→row port of Python `mostlyright.preprocessing.clip_outliers` at\n// packages/core/src/mostlyright/preprocessing.py:49-91. The v0.1.0 canonical\n// surface (supersedes the older `transforms.clip_outliers`).\n//\n// Decision tree (mirrors Python preprocessing.py:75-91):\n// 1. opts.bounds set → clip to explicit [lo, hi]\n// 2. PHYSICS_BOUNDS.has(col) → clip to physics defaults\n// 3. else → sigma fallback (mu ± std*sigma)\n//\n// Phase 3.5 review-iter HIGH fixes (preserved here):\n// - Architect iter-1 HIGH: std<=0 in the sigma branch silently collapses\n// every row to the mean. Python raises ValueError; we throw RangeError.\n// - Sigma=0 pass-through: when all values are identical, sample sigma is\n// zero and the clamp [mu, mu] would collapse the column. Pass values\n// through unchanged instead (a TS-side improvement on top of Python).\n//\n// Numeric coercion is STRICT: only `typeof v === 'number' && Number.isFinite(v)`\n// passes through. Strings like '5' do NOT auto-parse. Matches Wave 2/3/04-task1.\n\n/**\n * Physics-based clipping defaults for canonical observation columns.\n *\n * Mirrors Python `mostlyright.preprocessing.PHYSICS_BOUNDS` (preprocessing.py:34-46).\n * Values are `[min, max]` tuples in canonical units (°C for temp, m/s and kt\n * for wind, hPa for pressure, percent for humidity, mm for precip).\n *\n * Both `dew_point_c`/`dewpoint_c` and `wind_dir_deg`/`wind_dir_degrees` are\n * aliased to support legacy + canonical column names.\n */\nexport const PHYSICS_BOUNDS: ReadonlyMap<string, readonly [number, number]> = new Map([\n [\"temp_c\", [-89.0, 57.0] as const],\n [\"dew_point_c\", [-89.0, 35.0] as const],\n [\"dewpoint_c\", [-89.0, 35.0] as const],\n [\"wind_speed_ms\", [0.0, 100.0] as const],\n [\"wind_speed_kt\", [0.0, 200.0] as const],\n [\"wind_dir_deg\", [0.0, 360.0] as const],\n [\"wind_dir_degrees\", [0.0, 360.0] as const],\n [\"slp_hpa\", [870.0, 1085.0] as const],\n [\"sea_level_pressure_mb\", [870.0, 1085.0] as const],\n [\"relative_humidity_pct_2m\", [0.0, 100.0] as const],\n [\"precip_mm_1h\", [0.0, 305.0] as const],\n]);\n\nexport interface ClipOutliersOptions {\n /** Explicit `[lo, hi]` range. Overrides PHYSICS_BOUNDS and sigma fallback. */\n bounds?: readonly [number, number];\n /** Sigma multiplier for the fallback branch. Default 3.0. Must be > 0. */\n std?: number;\n}\n\n/**\n * Winsorize a numeric column.\n *\n * Mirrors Python `mostlyright.preprocessing.clip_outliers`. Returns rows with\n * a derived `{col}_clipped` column; the source `col` is preserved unchanged.\n *\n * Decision tree:\n * - `opts.bounds` set → clip to explicit `[lo, hi]`\n * - `PHYSICS_BOUNDS.has(col)` → clip to physics defaults\n * - else → sigma fallback (`mu ± std*sigma`)\n *\n * **Phase 3.5 review-iter fixes:**\n * - Throws `RangeError` if `std ≤ 0` in the sigma fallback (matches Python\n * `ValueError` at preprocessing.py:84-88; silent dataset corruption\n * otherwise).\n * - Sigma=0 pass-through: when all values are identical, sample sigma is\n * zero and the clamp `[mu, mu]` would collapse the column. Pass values\n * through unchanged instead.\n *\n * @param rows input rows (NOT mutated; pure function)\n * @param col column to clip\n * @param opts optional bounds / std overrides; defaults: PHYSICS_BOUNDS or sigma=3\n * @returns new array of rows, each carrying `{col}_clipped`\n * @throws RangeError if sigma fallback would use `std <= 0` or non-finite std\n */\nexport function clipOutliers<Row extends Record<string, unknown>>(\n rows: ReadonlyArray<Row>,\n col: string,\n opts: ClipOutliersOptions = {},\n): ReadonlyArray<Row & Record<string, number | null>> {\n const std = opts.std ?? 3.0;\n const key = `${col}_clipped`;\n\n // Determine clip range. `passThrough` short-circuits to \"copy value unchanged\"\n // for the sigma=0 / n<2 edge cases (Phase 3.5 review-iter HIGH fix).\n let lo: number;\n let hi: number;\n let passThrough = false;\n\n if (opts.bounds !== undefined) {\n [lo, hi] = opts.bounds;\n } else if (PHYSICS_BOUNDS.has(col)) {\n const b = PHYSICS_BOUNDS.get(col);\n if (b === undefined) {\n // Unreachable (we just checked has()), but the narrowing requires it.\n throw new Error(`PHYSICS_BOUNDS.get(${col}) unexpectedly undefined`);\n }\n [lo, hi] = b;\n } else {\n // Sigma fallback. Architect iter-1 HIGH: std<=0 collapses to mu.\n if (!Number.isFinite(std) || std <= 0) {\n throw new RangeError(\n `clipOutliers: std must be > 0 for the sigma fallback (got ${std}); pass bounds=[lo, hi] or use a physics-default column`,\n );\n }\n // Compute mu + sigma over non-null finite values.\n const vals: number[] = [];\n for (const r of rows) {\n const v = r?.[col];\n if (typeof v === \"number\" && Number.isFinite(v)) vals.push(v);\n }\n if (vals.length < 2) {\n // Not enough values to compute sample sigma → pass-through.\n passThrough = true;\n lo = Number.NEGATIVE_INFINITY;\n hi = Number.POSITIVE_INFINITY;\n } else {\n const mu = vals.reduce((a, b) => a + b, 0) / vals.length;\n const sumSq = vals.reduce((a, b) => a + (b - mu) ** 2, 0);\n const sigma = Math.sqrt(sumSq / (vals.length - 1)); // sample stdev (Bessel n-1)\n if (sigma === 0 || !Number.isFinite(sigma)) {\n // Phase 3.5 review-iter HIGH: pass values through unchanged\n // instead of collapsing to [mu, mu] (NOT NaN, NOT mu).\n passThrough = true;\n lo = Number.NEGATIVE_INFINITY;\n hi = Number.POSITIVE_INFINITY;\n } else {\n lo = mu - std * sigma;\n hi = mu + std * sigma;\n }\n }\n }\n\n const out: Array<Row & Record<string, number | null>> = [];\n for (const r of rows) {\n const v = r?.[col];\n let clipped: number | null;\n if (typeof v === \"number\" && Number.isFinite(v)) {\n clipped = passThrough ? v : Math.min(Math.max(v, lo), hi);\n } else {\n clipped = null;\n }\n out.push({ ...(r as Row), [key]: clipped } as Row & Record<string, number | null>);\n }\n return out;\n}\n","// TS-W4 Plan 06 — crosscheckIemGhcnh: disagreement detection between IEM +\n// GHCNh temperature readings. Mirrors Python\n// `mostlyright.qc.crosscheck_iem_ghcnh` at\n// `packages/core/src/mostlyright/qc.py:191-228`.\n//\n// Inner-joins by composite key `(station, eventTime)`. For matched pairs\n// where both temp_c values are finite numbers and the absolute delta\n// exceeds `opts.tolC` (default 2.0 °C), emits a disagreement row.\n//\n// Threshold is STRICT `>` (NOT `>=`) per Python qc.py:228 —\n// `merged.loc[merged[\"delta_c\"] > tol_c]`. A delta exactly equal to the\n// tolerance produces NO disagreement.\n//\n// Parity-Ticket: Python returns snake_case keys\n// (event_time, temp_c_iem, temp_c_ghcnh, delta_c); TS returns camelCase\n// (eventTime, tempCIem, tempCGhcnh, deltaC) to match the TS-idiom used\n// elsewhere in the codebase (see `obsQcStatus` from Wave 5). Wire-format\n// conversion to snake_case happens at the JSON serializer boundary\n// (TS-W3 Plan 07 `jsonDumps`).\n//\n// Lives at the `@mostlyrightmd/core/qc` subpath (NOT root barrel) to keep\n// the main `@mostlyrightmd/core` bundle under its 25 KB size-limit gate.\n\n/** Options for {@link crosscheckIemGhcnh}. */\nexport interface CrosscheckOptions {\n /**\n * Maximum acceptable absolute delta in °C between paired IEM/GHCNh\n * `temp_c` values. Defaults to `2.0` °C (matches Python\n * `crosscheck_iem_ghcnh(tol_c=2.0)`). A delta strictly greater than\n * `tolC` produces a disagreement row; equality does NOT.\n */\n tolC?: number;\n}\n\n/**\n * Disagreement row emitted by {@link crosscheckIemGhcnh}. Keys are\n * camelCase per the TS-idiom Parity-Ticket; Python's snake_case\n * equivalents are `event_time`, `temp_c_iem`, `temp_c_ghcnh`, `delta_c`.\n */\nexport interface CrosscheckDisagreement {\n readonly station: string;\n readonly eventTime: string;\n readonly tempCIem: number;\n readonly tempCGhcnh: number;\n readonly deltaC: number;\n}\n\n/**\n * Minimal row shape consumed by {@link crosscheckIemGhcnh}. Rows MUST\n * carry `station: string`, `eventTime: string`, and `temp_c: number |\n * null` (or `undefined`/non-finite, which are skipped). Additional keys\n * are allowed and ignored.\n */\ninterface CrosscheckRowIn {\n station?: unknown;\n eventTime?: unknown;\n temp_c?: unknown;\n}\n\n/**\n * Cross-check IEM and GHCNh temperatures; return rows where the two\n * sources disagree above `opts.tolC` (default 2.0 °C).\n *\n * Algorithm:\n * 1. If `iemRows.length === 0 || ghcnhRows.length === 0` → return `[]`\n * (matches Python qc.py:212-215).\n * 2. Validate `station` + `eventTime` present (string) on every input\n * row; throw `Error` on first violation (parity with Python\n * `ValueError` at qc.py:217-220).\n * 3. Build `iemMap: Map<string, IemRow>` keyed by\n * `${row.station}|${row.eventTime}`. On duplicate keys, LAST iem row\n * wins — deterministic but a documented deviation from Python's\n * `pd.merge` (which would cartesian-product duplicates).\n * 4. For each GHCNh row, look up the matching IEM row by composite key.\n * If missing → skip. If either `temp_c` is null / non-finite →\n * skip.\n * 5. If `Math.abs(iem.temp_c - ghcnh.temp_c) > tolC` → emit a\n * disagreement row. STRICT `>` (NOT `>=`).\n *\n * Output array order matches the iteration order of `ghcnhRows`\n * (deterministic, independent of `iemRows` order).\n *\n * Pure: input arrays are NOT mutated.\n *\n * @param iemRows IEM observation rows.\n * @param ghcnhRows GHCNh observation rows.\n * @param opts Tolerance options. `tolC` default = 2.0.\n * @throws Error if any iem or ghcnh row is missing `station` or\n * `eventTime` (or they are not strings).\n */\nexport function crosscheckIemGhcnh(\n iemRows: ReadonlyArray<CrosscheckRowIn>,\n ghcnhRows: ReadonlyArray<CrosscheckRowIn>,\n opts: CrosscheckOptions = {},\n): ReadonlyArray<CrosscheckDisagreement> {\n const tolC = opts.tolC ?? 2.0;\n\n if (iemRows.length === 0 || ghcnhRows.length === 0) return [];\n\n // Validate column presence upfront (parity with Python ValueError).\n for (const r of iemRows) {\n if (typeof r?.station !== \"string\" || typeof r?.eventTime !== \"string\") {\n throw new Error(\n \"crosscheckIemGhcnh: iem rows must carry 'station' (string) and 'eventTime' (string) keys\",\n );\n }\n }\n for (const r of ghcnhRows) {\n if (typeof r?.station !== \"string\" || typeof r?.eventTime !== \"string\") {\n throw new Error(\n \"crosscheckIemGhcnh: ghcnh rows must carry 'station' (string) and 'eventTime' (string) keys\",\n );\n }\n }\n\n // Build iem lookup map. Last-wins on duplicate (station, eventTime).\n const iemMap = new Map<string, CrosscheckRowIn>();\n for (const r of iemRows) {\n const key = `${r.station as string}|${r.eventTime as string}`;\n iemMap.set(key, r);\n }\n\n const out: CrosscheckDisagreement[] = [];\n for (const g of ghcnhRows) {\n const key = `${g.station as string}|${g.eventTime as string}`;\n const i = iemMap.get(key);\n if (i === undefined) continue;\n const iT = typeof i.temp_c === \"number\" && Number.isFinite(i.temp_c) ? i.temp_c : null;\n const gT = typeof g.temp_c === \"number\" && Number.isFinite(g.temp_c) ? g.temp_c : null;\n if (iT === null || gT === null) continue;\n const delta = Math.abs(iT - gT);\n if (delta > tolC) {\n out.push({\n station: g.station as string,\n eventTime: g.eventTime as string,\n tempCIem: iT,\n tempCGhcnh: gT,\n deltaC: delta,\n });\n }\n }\n return out;\n}\n"],"mappings":";AA+BO,IAAM,iBAAiE,oBAAI,IAAI;AAAA,EACpF,CAAC,UAAU,CAAC,KAAO,EAAI,CAAU;AAAA,EACjC,CAAC,eAAe,CAAC,KAAO,EAAI,CAAU;AAAA,EACtC,CAAC,cAAc,CAAC,KAAO,EAAI,CAAU;AAAA,EACrC,CAAC,iBAAiB,CAAC,GAAK,GAAK,CAAU;AAAA,EACvC,CAAC,iBAAiB,CAAC,GAAK,GAAK,CAAU;AAAA,EACvC,CAAC,gBAAgB,CAAC,GAAK,GAAK,CAAU;AAAA,EACtC,CAAC,oBAAoB,CAAC,GAAK,GAAK,CAAU;AAAA,EAC1C,CAAC,WAAW,CAAC,KAAO,IAAM,CAAU;AAAA,EACpC,CAAC,yBAAyB,CAAC,KAAO,IAAM,CAAU;AAAA,EAClD,CAAC,4BAA4B,CAAC,GAAK,GAAK,CAAU;AAAA,EAClD,CAAC,gBAAgB,CAAC,GAAK,GAAK,CAAU;AACxC,CAAC;AAkCM,SAAS,aACd,MACA,KACA,OAA4B,CAAC,GACuB;AACpD,QAAM,MAAM,KAAK,OAAO;AACxB,QAAM,MAAM,GAAG,GAAG;AAIlB,MAAI;AACJ,MAAI;AACJ,MAAI,cAAc;AAElB,MAAI,KAAK,WAAW,QAAW;AAC7B,KAAC,IAAI,EAAE,IAAI,KAAK;AAAA,EAClB,WAAW,eAAe,IAAI,GAAG,GAAG;AAClC,UAAM,IAAI,eAAe,IAAI,GAAG;AAChC,QAAI,MAAM,QAAW;AAEnB,YAAM,IAAI,MAAM,sBAAsB,GAAG,0BAA0B;AAAA,IACrE;AACA,KAAC,IAAI,EAAE,IAAI;AAAA,EACb,OAAO;AAEL,QAAI,CAAC,OAAO,SAAS,GAAG,KAAK,OAAO,GAAG;AACrC,YAAM,IAAI;AAAA,QACR,6DAA6D,GAAG;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,OAAiB,CAAC;AACxB,eAAW,KAAK,MAAM;AACpB,YAAM,IAAI,IAAI,GAAG;AACjB,UAAI,OAAO,MAAM,YAAY,OAAO,SAAS,CAAC,EAAG,MAAK,KAAK,CAAC;AAAA,IAC9D;AACA,QAAI,KAAK,SAAS,GAAG;AAEnB,oBAAc;AACd,WAAK,OAAO;AACZ,WAAK,OAAO;AAAA,IACd,OAAO;AACL,YAAM,KAAK,KAAK,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,KAAK;AAClD,YAAM,QAAQ,KAAK,OAAO,CAAC,GAAG,MAAM,KAAK,IAAI,OAAO,GAAG,CAAC;AACxD,YAAM,QAAQ,KAAK,KAAK,SAAS,KAAK,SAAS,EAAE;AACjD,UAAI,UAAU,KAAK,CAAC,OAAO,SAAS,KAAK,GAAG;AAG1C,sBAAc;AACd,aAAK,OAAO;AACZ,aAAK,OAAO;AAAA,MACd,OAAO;AACL,aAAK,KAAK,MAAM;AAChB,aAAK,KAAK,MAAM;AAAA,MAClB;AAAA,IACF;AAAA,EACF;AAEA,QAAM,MAAkD,CAAC;AACzD,aAAW,KAAK,MAAM;AACpB,UAAM,IAAI,IAAI,GAAG;AACjB,QAAI;AACJ,QAAI,OAAO,MAAM,YAAY,OAAO,SAAS,CAAC,GAAG;AAC/C,gBAAU,cAAc,IAAI,KAAK,IAAI,KAAK,IAAI,GAAG,EAAE,GAAG,EAAE;AAAA,IAC1D,OAAO;AACL,gBAAU;AAAA,IACZ;AACA,QAAI,KAAK,EAAE,GAAI,GAAW,CAAC,GAAG,GAAG,QAAQ,CAAwC;AAAA,EACnF;AACA,SAAO;AACT;;;ACzDO,SAAS,mBACd,SACA,WACA,OAA0B,CAAC,GACY;AACvC,QAAM,OAAO,KAAK,QAAQ;AAE1B,MAAI,QAAQ,WAAW,KAAK,UAAU,WAAW,EAAG,QAAO,CAAC;AAG5D,aAAW,KAAK,SAAS;AACvB,QAAI,OAAO,GAAG,YAAY,YAAY,OAAO,GAAG,cAAc,UAAU;AACtE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACA,aAAW,KAAK,WAAW;AACzB,QAAI,OAAO,GAAG,YAAY,YAAY,OAAO,GAAG,cAAc,UAAU;AACtE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAGA,QAAM,SAAS,oBAAI,IAA6B;AAChD,aAAW,KAAK,SAAS;AACvB,UAAM,MAAM,GAAG,EAAE,OAAiB,IAAI,EAAE,SAAmB;AAC3D,WAAO,IAAI,KAAK,CAAC;AAAA,EACnB;AAEA,QAAM,MAAgC,CAAC;AACvC,aAAW,KAAK,WAAW;AACzB,UAAM,MAAM,GAAG,EAAE,OAAiB,IAAI,EAAE,SAAmB;AAC3D,UAAM,IAAI,OAAO,IAAI,GAAG;AACxB,QAAI,MAAM,OAAW;AACrB,UAAM,KAAK,OAAO,EAAE,WAAW,YAAY,OAAO,SAAS,EAAE,MAAM,IAAI,EAAE,SAAS;AAClF,UAAM,KAAK,OAAO,EAAE,WAAW,YAAY,OAAO,SAAS,EAAE,MAAM,IAAI,EAAE,SAAS;AAClF,QAAI,OAAO,QAAQ,OAAO,KAAM;AAChC,UAAM,QAAQ,KAAK,IAAI,KAAK,EAAE;AAC9B,QAAI,QAAQ,MAAM;AAChB,UAAI,KAAK;AAAA,QACP,SAAS,EAAE;AAAA,QACX,WAAW,EAAE;AAAA,QACb,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,QAAQ;AAAA,MACV,CAAC;AAAA,IACH;AAAA,EACF;AACA,SAAO;AACT;","names":[]}