@mostlyrightmd/core 0.1.0-rc.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +3 -0
- package/dist/discovery/index.cjs +1646 -0
- package/dist/discovery/index.cjs.map +1 -0
- package/dist/discovery/index.d.cts +313 -0
- package/dist/discovery/index.d.ts +313 -0
- package/dist/discovery/index.mjs +1609 -0
- package/dist/discovery/index.mjs.map +1 -0
- package/dist/formats/index.cjs +498 -0
- package/dist/formats/index.cjs.map +1 -0
- package/dist/formats/index.d.cts +97 -0
- package/dist/formats/index.d.ts +97 -0
- package/dist/formats/index.mjs +465 -0
- package/dist/formats/index.mjs.map +1 -0
- package/dist/index.cjs +1624 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +559 -0
- package/dist/index.d.ts +559 -0
- package/dist/index.global.js +1582 -0
- package/dist/index.global.js.map +1 -0
- package/dist/index.mjs +1557 -0
- package/dist/index.mjs.map +1 -0
- package/dist/internal/bounds.cjs +125 -0
- package/dist/internal/bounds.cjs.map +1 -0
- package/dist/internal/bounds.d.cts +36 -0
- package/dist/internal/bounds.d.ts +36 -0
- package/dist/internal/bounds.mjs +81 -0
- package/dist/internal/bounds.mjs.map +1 -0
- package/dist/internal/cache/fs.cjs +217 -0
- package/dist/internal/cache/fs.cjs.map +1 -0
- package/dist/internal/cache/fs.d.cts +57 -0
- package/dist/internal/cache/fs.d.ts +57 -0
- package/dist/internal/cache/fs.mjs +179 -0
- package/dist/internal/cache/fs.mjs.map +1 -0
- package/dist/internal/cache/index.browser.cjs +1184 -0
- package/dist/internal/cache/index.browser.cjs.map +1 -0
- package/dist/internal/cache/index.browser.d.cts +20 -0
- package/dist/internal/cache/index.browser.d.ts +20 -0
- package/dist/internal/cache/index.browser.mjs +36 -0
- package/dist/internal/cache/index.browser.mjs.map +1 -0
- package/dist/internal/cache/index.cjs +1389 -0
- package/dist/internal/cache/index.cjs.map +1 -0
- package/dist/internal/cache/index.d.cts +16 -0
- package/dist/internal/cache/index.d.ts +16 -0
- package/dist/internal/cache/index.mjs +40 -0
- package/dist/internal/cache/index.mjs.map +1 -0
- package/dist/internal/chunk-PKJXHY27.mjs +1137 -0
- package/dist/internal/chunk-PKJXHY27.mjs.map +1 -0
- package/dist/internal/convert.cjs +161 -0
- package/dist/internal/convert.cjs.map +1 -0
- package/dist/internal/convert.d.cts +44 -0
- package/dist/internal/convert.d.ts +44 -0
- package/dist/internal/convert.mjs +117 -0
- package/dist/internal/convert.mjs.map +1 -0
- package/dist/internal/fs-O6XR4WWW.mjs +183 -0
- package/dist/internal/fs-O6XR4WWW.mjs.map +1 -0
- package/dist/internal/keys-B7C8C88N.d.cts +191 -0
- package/dist/internal/keys-B7C8C88N.d.ts +191 -0
- package/dist/internal/merge/index.cjs +75 -0
- package/dist/internal/merge/index.cjs.map +1 -0
- package/dist/internal/merge/index.d.cts +74 -0
- package/dist/internal/merge/index.d.ts +74 -0
- package/dist/internal/merge/index.mjs +46 -0
- package/dist/internal/merge/index.mjs.map +1 -0
- package/dist/internal/pairs.cjs +328 -0
- package/dist/internal/pairs.cjs.map +1 -0
- package/dist/internal/pairs.d.cts +105 -0
- package/dist/internal/pairs.d.ts +105 -0
- package/dist/internal/pairs.mjs +298 -0
- package/dist/internal/pairs.mjs.map +1 -0
- package/dist/qc/index.cjs +247 -0
- package/dist/qc/index.cjs.map +1 -0
- package/dist/qc/index.d.cts +140 -0
- package/dist/qc/index.d.ts +140 -0
- package/dist/qc/index.mjs +212 -0
- package/dist/qc/index.mjs.map +1 -0
- package/dist/temporal/index.cjs +504 -0
- package/dist/temporal/index.cjs.map +1 -0
- package/dist/temporal/index.d.cts +121 -0
- package/dist/temporal/index.d.ts +121 -0
- package/dist/temporal/index.mjs +474 -0
- package/dist/temporal/index.mjs.map +1 -0
- package/dist/transforms/index.cjs +399 -0
- package/dist/transforms/index.cjs.map +1 -0
- package/dist/transforms/index.d.cts +193 -0
- package/dist/transforms/index.d.ts +193 -0
- package/dist/transforms/index.mjs +362 -0
- package/dist/transforms/index.mjs.map +1 -0
- package/dist/validator.cjs +1870 -0
- package/dist/validator.cjs.map +1 -0
- package/dist/validator.d.cts +30 -0
- package/dist/validator.d.ts +30 -0
- package/dist/validator.mjs +1843 -0
- package/dist/validator.mjs.map +1 -0
- package/package.json +115 -0
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shift a column by `n` rows.
|
|
3
|
+
*
|
|
4
|
+
* Mirrors Python `pd.Series.shift(periods=n)` semantics: at output index `i`,
|
|
5
|
+
* the derived column carries `rows[i-n][col]` if available, else `null`.
|
|
6
|
+
*
|
|
7
|
+
* @param rows input rows (NOT mutated; pure function)
|
|
8
|
+
* @param col column name to lag
|
|
9
|
+
* @param n positive integer; rows at index `< n` get `null` in the derived column
|
|
10
|
+
* @returns new array of rows, each carrying `{col}_lag_{n}` derived column
|
|
11
|
+
* @throws RangeError if `n < 1` or `!Number.isInteger(n)`
|
|
12
|
+
*/
|
|
13
|
+
declare function lag<Row extends Record<string, unknown>>(rows: ReadonlyArray<Row>, col: string, n?: number): ReadonlyArray<Row & Record<string, number | null>>;
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* First (or nth-order) discrete difference of a column.
|
|
17
|
+
*
|
|
18
|
+
* At output index `i`, the derived column carries
|
|
19
|
+
* `rows[i][col] - rows[i-n][col]` if both are finite numbers, else `null`.
|
|
20
|
+
*
|
|
21
|
+
* @param rows input rows (NOT mutated; pure function)
|
|
22
|
+
* @param col column name to difference
|
|
23
|
+
* @param n positive integer step (default 1)
|
|
24
|
+
* @returns new array of rows, each carrying `{col}_diff_{n}` derived column
|
|
25
|
+
* @throws RangeError if `n < 1` or `!Number.isInteger(n)`
|
|
26
|
+
*/
|
|
27
|
+
declare function diff<Row extends Record<string, unknown>>(rows: ReadonlyArray<Row>, col: string, n?: number): ReadonlyArray<Row & Record<string, number | null>>;
|
|
28
|
+
/**
|
|
29
|
+
* Second discrete difference of a column.
|
|
30
|
+
*
|
|
31
|
+
* Equivalent to `diff(diff(col))`. The first two output rows carry `null`
|
|
32
|
+
* (no prior diff available). Mirrors Python `df[column].diff().diff()` which
|
|
33
|
+
* returns a single Series — so the TS output carries ONLY `{col}_diff2`,
|
|
34
|
+
* NOT the intermediate `{col}_diff_1` from the first pass.
|
|
35
|
+
*
|
|
36
|
+
* @param rows input rows (NOT mutated; pure function)
|
|
37
|
+
* @param col column name to second-difference
|
|
38
|
+
* @returns new array of rows, each carrying `{col}_diff2` derived column
|
|
39
|
+
*/
|
|
40
|
+
declare function diff2<Row extends Record<string, unknown>>(rows: ReadonlyArray<Row>, col: string): ReadonlyArray<Row & Record<string, number | null>>;
|
|
41
|
+
|
|
42
|
+
/** The set of reducer names accepted by `rolling`. Ordering is API surface. */
|
|
43
|
+
declare const ROLLING_FNS: readonly ["mean", "median", "min", "max", "std", "count"];
|
|
44
|
+
/** Union of the six reducer-name string literals. */
|
|
45
|
+
type RollingFn = (typeof ROLLING_FNS)[number];
|
|
46
|
+
/**
|
|
47
|
+
* Windowed reduction over a numeric column.
|
|
48
|
+
*
|
|
49
|
+
* At each output row `i`, the window covers
|
|
50
|
+
* `rows[max(0, i-window+1) .. i]` (inclusive both ends), so the first
|
|
51
|
+
* `window-1` rows compute against a partial (still-filling) window —
|
|
52
|
+
* `min_periods=1` semantics from Python.
|
|
53
|
+
*
|
|
54
|
+
* @param rows input rows (NOT mutated; pure function)
|
|
55
|
+
* @param col column name to reduce over
|
|
56
|
+
* @param window positive integer window size (≥ 1)
|
|
57
|
+
* @param fn one of `'mean' | 'median' | 'min' | 'max' | 'std' | 'count'`
|
|
58
|
+
* @returns new array of rows, each carrying `{col}_rolling_{window}_{fn}`
|
|
59
|
+
* @throws RangeError if `window < 1`, non-integer, or `fn` is not a `RollingFn`
|
|
60
|
+
*/
|
|
61
|
+
declare function rolling<Row extends Record<string, unknown>>(rows: ReadonlyArray<Row>, col: string, window: number, fn?: RollingFn): ReadonlyArray<Row & Record<string, number | null>>;
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Add 8 cyclical-pair calendar features to each row.
|
|
65
|
+
*
|
|
66
|
+
* Pairs (sin²+cos² ≈ 1, so a model sees wraparound — Dec→Jan is 1 month
|
|
67
|
+
* apart, not 11):
|
|
68
|
+
*
|
|
69
|
+
* - `month_sin` / `month_cos` (period 12)
|
|
70
|
+
* - `dow_sin` / `dow_cos` (period 7, ISO Mon=0..Sun=6)
|
|
71
|
+
* - `hour_sin` / `hour_cos` (period 24)
|
|
72
|
+
* - `day_of_year_sin` / `day_of_year_cos` (period 365.0, NOT 365.25)
|
|
73
|
+
*
|
|
74
|
+
* TZ handling: when `tz` is an IANA zone name like 'America/New_York',
|
|
75
|
+
* month/dow/hour/dayOfYear come from the LOCAL clock in that tz (via
|
|
76
|
+
* `Intl.DateTimeFormat`). When `tz` is omitted, UTC parts are used.
|
|
77
|
+
*
|
|
78
|
+
* Invalid `tz` throws `RangeError` BEFORE any row processing (fail-fast).
|
|
79
|
+
*
|
|
80
|
+
* @param rows input rows (NOT mutated)
|
|
81
|
+
* @param dateCol column name containing a date — accepted as ISO string,
|
|
82
|
+
* `Date` instance, or finite epoch-ms number
|
|
83
|
+
* @param tz optional IANA timezone name (validated upfront)
|
|
84
|
+
* @returns new array of rows, each carrying 8 new derived columns
|
|
85
|
+
* (each value is a `number` from sin/cos, or `null` when
|
|
86
|
+
* the row's date is non-parseable)
|
|
87
|
+
* @throws RangeError if `tz` is provided but not a valid IANA zone
|
|
88
|
+
*/
|
|
89
|
+
declare function calendarFeatures<Row extends Record<string, unknown>>(rows: ReadonlyArray<Row>, dateCol: string, tz?: string): ReadonlyArray<Row & Record<string, number | null>>;
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Pairwise difference between two numeric columns.
|
|
93
|
+
*
|
|
94
|
+
* Mirrors Python `transforms.spread(df, col_a, col_b)`. Derived column name
|
|
95
|
+
* is exactly `{colA}_minus_{colB}`. Value at index i is `rows[i][colA] -
|
|
96
|
+
* rows[i][colB]` when both are finite numbers; otherwise `null`.
|
|
97
|
+
*
|
|
98
|
+
* @param rows input rows (NOT mutated; pure function)
|
|
99
|
+
* @param colA minuend column
|
|
100
|
+
* @param colB subtrahend column
|
|
101
|
+
* @returns new array of rows, each carrying `{colA}_minus_{colB}` column
|
|
102
|
+
*/
|
|
103
|
+
declare function spread<Row extends Record<string, unknown>>(rows: ReadonlyArray<Row>, colA: string, colB: string): ReadonlyArray<Row & Record<string, number | null>>;
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* NWS wind-chill formula (°F). Domain: `tempF ≤ 50 AND windMph > 3`.
|
|
107
|
+
*
|
|
108
|
+
* Mirrors Python `transforms.wind_chill` at transforms.py:108-116. The
|
|
109
|
+
* formula is the 2001 NWS standard:
|
|
110
|
+
*
|
|
111
|
+
* wc = 35.74 + 0.6215 * T - 35.75 * V^0.16 + 0.4275 * T * V^0.16
|
|
112
|
+
*
|
|
113
|
+
* where T is air temperature in °F and V is wind speed in mph.
|
|
114
|
+
*
|
|
115
|
+
* **Out-of-domain (NOT null — Python parity):** when the domain bounds are
|
|
116
|
+
* not satisfied, returns `tempF` unchanged — physically "wind chill equals
|
|
117
|
+
* air temperature when wind is calm or air is warm". Null is reserved for
|
|
118
|
+
* null / undefined / non-finite inputs.
|
|
119
|
+
*
|
|
120
|
+
* Reference: https://www.weather.gov/safety/cold-wind-chill-chart
|
|
121
|
+
* Sanity: `windChill(20, 15) ≈ 6 °F` matches the NWS chart.
|
|
122
|
+
*/
|
|
123
|
+
declare function windChill(tempF: number | null | undefined, windMph: number | null | undefined): number | null;
|
|
124
|
+
/**
|
|
125
|
+
* NWS heat index (°F) using the Rothfusz regression. Domain: `tempF ≥ 80`.
|
|
126
|
+
*
|
|
127
|
+
* Mirrors Python `transforms.heat_index` at transforms.py:119-147. Includes:
|
|
128
|
+
*
|
|
129
|
+
* 1. A simple approximation `simple = 0.5*(T + 61 + (T-68)*1.2 + RH*0.094)`
|
|
130
|
+
* used when `(simple + T)/2 < 80` (low-effective-temperature fast path).
|
|
131
|
+
* 2. The Rothfusz 9-term polynomial:
|
|
132
|
+
* hi = -42.379 + 2.04901523*T + 10.14333127*RH - 0.22475541*T*RH
|
|
133
|
+
* - 0.00683783*T² - 0.05481717*RH² + 0.00122874*T²*RH
|
|
134
|
+
* + 0.00085282*T*RH² - 0.00000199*T²*RH²
|
|
135
|
+
* 3. A dry-air adjustment when `RH < 13 && 80 ≤ T ≤ 112`:
|
|
136
|
+
* hi -= ((13 - RH) / 4) * sqrt((17 - |T - 95|) / 17)
|
|
137
|
+
* 4. A humid-air adjustment when `RH > 85 && 80 ≤ T ≤ 87`:
|
|
138
|
+
* hi += ((RH - 85) / 10) * ((87 - T) / 5)
|
|
139
|
+
*
|
|
140
|
+
* **Out-of-domain (NOT null — Python parity):** when `tempF < 80`, returns
|
|
141
|
+
* `tempF` unchanged. Null is reserved for null / undefined / non-finite
|
|
142
|
+
* inputs.
|
|
143
|
+
*
|
|
144
|
+
* Reference: https://www.wpc.ncep.noaa.gov/html/heatindex.shtml
|
|
145
|
+
* Sanity: `heatIndex(90, 70) ≈ 106 °F` matches the NWS Rothfusz table.
|
|
146
|
+
*/
|
|
147
|
+
declare function heatIndex(tempF: number | null | undefined, rhPct: number | null | undefined): number | null;
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Physics-based clipping defaults for canonical observation columns.
|
|
151
|
+
*
|
|
152
|
+
* Mirrors Python `mostlyright.preprocessing.PHYSICS_BOUNDS` (preprocessing.py:34-46).
|
|
153
|
+
* Values are `[min, max]` tuples in canonical units (°C for temp, m/s and kt
|
|
154
|
+
* for wind, hPa for pressure, percent for humidity, mm for precip).
|
|
155
|
+
*
|
|
156
|
+
* Both `dew_point_c`/`dewpoint_c` and `wind_dir_deg`/`wind_dir_degrees` are
|
|
157
|
+
* aliased to support legacy + canonical column names.
|
|
158
|
+
*/
|
|
159
|
+
declare const PHYSICS_BOUNDS: ReadonlyMap<string, readonly [number, number]>;
|
|
160
|
+
interface ClipOutliersOptions {
|
|
161
|
+
/** Explicit `[lo, hi]` range. Overrides PHYSICS_BOUNDS and sigma fallback. */
|
|
162
|
+
bounds?: readonly [number, number];
|
|
163
|
+
/** Sigma multiplier for the fallback branch. Default 3.0. Must be > 0. */
|
|
164
|
+
std?: number;
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Winsorize a numeric column.
|
|
168
|
+
*
|
|
169
|
+
* Mirrors Python `mostlyright.preprocessing.clip_outliers`. Returns rows with
|
|
170
|
+
* a derived `{col}_clipped` column; the source `col` is preserved unchanged.
|
|
171
|
+
*
|
|
172
|
+
* Decision tree:
|
|
173
|
+
* - `opts.bounds` set → clip to explicit `[lo, hi]`
|
|
174
|
+
* - `PHYSICS_BOUNDS.has(col)` → clip to physics defaults
|
|
175
|
+
* - else → sigma fallback (`mu ± std*sigma`)
|
|
176
|
+
*
|
|
177
|
+
* **Phase 3.5 review-iter fixes:**
|
|
178
|
+
* - Throws `RangeError` if `std ≤ 0` in the sigma fallback (matches Python
|
|
179
|
+
* `ValueError` at preprocessing.py:84-88; silent dataset corruption
|
|
180
|
+
* otherwise).
|
|
181
|
+
* - Sigma=0 pass-through: when all values are identical, sample sigma is
|
|
182
|
+
* zero and the clamp `[mu, mu]` would collapse the column. Pass values
|
|
183
|
+
* through unchanged instead.
|
|
184
|
+
*
|
|
185
|
+
* @param rows input rows (NOT mutated; pure function)
|
|
186
|
+
* @param col column to clip
|
|
187
|
+
* @param opts optional bounds / std overrides; defaults: PHYSICS_BOUNDS or sigma=3
|
|
188
|
+
* @returns new array of rows, each carrying `{col}_clipped`
|
|
189
|
+
* @throws RangeError if sigma fallback would use `std <= 0` or non-finite std
|
|
190
|
+
*/
|
|
191
|
+
declare function clipOutliers<Row extends Record<string, unknown>>(rows: ReadonlyArray<Row>, col: string, opts?: ClipOutliersOptions): ReadonlyArray<Row & Record<string, number | null>>;
|
|
192
|
+
|
|
193
|
+
export { type ClipOutliersOptions, PHYSICS_BOUNDS, ROLLING_FNS, type RollingFn, calendarFeatures, clipOutliers, diff, diff2, heatIndex, lag, rolling, spread, windChill };
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shift a column by `n` rows.
|
|
3
|
+
*
|
|
4
|
+
* Mirrors Python `pd.Series.shift(periods=n)` semantics: at output index `i`,
|
|
5
|
+
* the derived column carries `rows[i-n][col]` if available, else `null`.
|
|
6
|
+
*
|
|
7
|
+
* @param rows input rows (NOT mutated; pure function)
|
|
8
|
+
* @param col column name to lag
|
|
9
|
+
* @param n positive integer; rows at index `< n` get `null` in the derived column
|
|
10
|
+
* @returns new array of rows, each carrying `{col}_lag_{n}` derived column
|
|
11
|
+
* @throws RangeError if `n < 1` or `!Number.isInteger(n)`
|
|
12
|
+
*/
|
|
13
|
+
declare function lag<Row extends Record<string, unknown>>(rows: ReadonlyArray<Row>, col: string, n?: number): ReadonlyArray<Row & Record<string, number | null>>;
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* First (or nth-order) discrete difference of a column.
|
|
17
|
+
*
|
|
18
|
+
* At output index `i`, the derived column carries
|
|
19
|
+
* `rows[i][col] - rows[i-n][col]` if both are finite numbers, else `null`.
|
|
20
|
+
*
|
|
21
|
+
* @param rows input rows (NOT mutated; pure function)
|
|
22
|
+
* @param col column name to difference
|
|
23
|
+
* @param n positive integer step (default 1)
|
|
24
|
+
* @returns new array of rows, each carrying `{col}_diff_{n}` derived column
|
|
25
|
+
* @throws RangeError if `n < 1` or `!Number.isInteger(n)`
|
|
26
|
+
*/
|
|
27
|
+
declare function diff<Row extends Record<string, unknown>>(rows: ReadonlyArray<Row>, col: string, n?: number): ReadonlyArray<Row & Record<string, number | null>>;
|
|
28
|
+
/**
|
|
29
|
+
* Second discrete difference of a column.
|
|
30
|
+
*
|
|
31
|
+
* Equivalent to `diff(diff(col))`. The first two output rows carry `null`
|
|
32
|
+
* (no prior diff available). Mirrors Python `df[column].diff().diff()` which
|
|
33
|
+
* returns a single Series — so the TS output carries ONLY `{col}_diff2`,
|
|
34
|
+
* NOT the intermediate `{col}_diff_1` from the first pass.
|
|
35
|
+
*
|
|
36
|
+
* @param rows input rows (NOT mutated; pure function)
|
|
37
|
+
* @param col column name to second-difference
|
|
38
|
+
* @returns new array of rows, each carrying `{col}_diff2` derived column
|
|
39
|
+
*/
|
|
40
|
+
declare function diff2<Row extends Record<string, unknown>>(rows: ReadonlyArray<Row>, col: string): ReadonlyArray<Row & Record<string, number | null>>;
|
|
41
|
+
|
|
42
|
+
/** The set of reducer names accepted by `rolling`. Ordering is API surface. */
|
|
43
|
+
declare const ROLLING_FNS: readonly ["mean", "median", "min", "max", "std", "count"];
|
|
44
|
+
/** Union of the six reducer-name string literals. */
|
|
45
|
+
type RollingFn = (typeof ROLLING_FNS)[number];
|
|
46
|
+
/**
|
|
47
|
+
* Windowed reduction over a numeric column.
|
|
48
|
+
*
|
|
49
|
+
* At each output row `i`, the window covers
|
|
50
|
+
* `rows[max(0, i-window+1) .. i]` (inclusive both ends), so the first
|
|
51
|
+
* `window-1` rows compute against a partial (still-filling) window —
|
|
52
|
+
* `min_periods=1` semantics from Python.
|
|
53
|
+
*
|
|
54
|
+
* @param rows input rows (NOT mutated; pure function)
|
|
55
|
+
* @param col column name to reduce over
|
|
56
|
+
* @param window positive integer window size (≥ 1)
|
|
57
|
+
* @param fn one of `'mean' | 'median' | 'min' | 'max' | 'std' | 'count'`
|
|
58
|
+
* @returns new array of rows, each carrying `{col}_rolling_{window}_{fn}`
|
|
59
|
+
* @throws RangeError if `window < 1`, non-integer, or `fn` is not a `RollingFn`
|
|
60
|
+
*/
|
|
61
|
+
declare function rolling<Row extends Record<string, unknown>>(rows: ReadonlyArray<Row>, col: string, window: number, fn?: RollingFn): ReadonlyArray<Row & Record<string, number | null>>;
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Add 8 cyclical-pair calendar features to each row.
|
|
65
|
+
*
|
|
66
|
+
* Pairs (sin²+cos² ≈ 1, so a model sees wraparound — Dec→Jan is 1 month
|
|
67
|
+
* apart, not 11):
|
|
68
|
+
*
|
|
69
|
+
* - `month_sin` / `month_cos` (period 12)
|
|
70
|
+
* - `dow_sin` / `dow_cos` (period 7, ISO Mon=0..Sun=6)
|
|
71
|
+
* - `hour_sin` / `hour_cos` (period 24)
|
|
72
|
+
* - `day_of_year_sin` / `day_of_year_cos` (period 365.0, NOT 365.25)
|
|
73
|
+
*
|
|
74
|
+
* TZ handling: when `tz` is an IANA zone name like 'America/New_York',
|
|
75
|
+
* month/dow/hour/dayOfYear come from the LOCAL clock in that tz (via
|
|
76
|
+
* `Intl.DateTimeFormat`). When `tz` is omitted, UTC parts are used.
|
|
77
|
+
*
|
|
78
|
+
* Invalid `tz` throws `RangeError` BEFORE any row processing (fail-fast).
|
|
79
|
+
*
|
|
80
|
+
* @param rows input rows (NOT mutated)
|
|
81
|
+
* @param dateCol column name containing a date — accepted as ISO string,
|
|
82
|
+
* `Date` instance, or finite epoch-ms number
|
|
83
|
+
* @param tz optional IANA timezone name (validated upfront)
|
|
84
|
+
* @returns new array of rows, each carrying 8 new derived columns
|
|
85
|
+
* (each value is a `number` from sin/cos, or `null` when
|
|
86
|
+
* the row's date is non-parseable)
|
|
87
|
+
* @throws RangeError if `tz` is provided but not a valid IANA zone
|
|
88
|
+
*/
|
|
89
|
+
declare function calendarFeatures<Row extends Record<string, unknown>>(rows: ReadonlyArray<Row>, dateCol: string, tz?: string): ReadonlyArray<Row & Record<string, number | null>>;
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Pairwise difference between two numeric columns.
|
|
93
|
+
*
|
|
94
|
+
* Mirrors Python `transforms.spread(df, col_a, col_b)`. Derived column name
|
|
95
|
+
* is exactly `{colA}_minus_{colB}`. Value at index i is `rows[i][colA] -
|
|
96
|
+
* rows[i][colB]` when both are finite numbers; otherwise `null`.
|
|
97
|
+
*
|
|
98
|
+
* @param rows input rows (NOT mutated; pure function)
|
|
99
|
+
* @param colA minuend column
|
|
100
|
+
* @param colB subtrahend column
|
|
101
|
+
* @returns new array of rows, each carrying `{colA}_minus_{colB}` column
|
|
102
|
+
*/
|
|
103
|
+
declare function spread<Row extends Record<string, unknown>>(rows: ReadonlyArray<Row>, colA: string, colB: string): ReadonlyArray<Row & Record<string, number | null>>;
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* NWS wind-chill formula (°F). Domain: `tempF ≤ 50 AND windMph > 3`.
|
|
107
|
+
*
|
|
108
|
+
* Mirrors Python `transforms.wind_chill` at transforms.py:108-116. The
|
|
109
|
+
* formula is the 2001 NWS standard:
|
|
110
|
+
*
|
|
111
|
+
* wc = 35.74 + 0.6215 * T - 35.75 * V^0.16 + 0.4275 * T * V^0.16
|
|
112
|
+
*
|
|
113
|
+
* where T is air temperature in °F and V is wind speed in mph.
|
|
114
|
+
*
|
|
115
|
+
* **Out-of-domain (NOT null — Python parity):** when the domain bounds are
|
|
116
|
+
* not satisfied, returns `tempF` unchanged — physically "wind chill equals
|
|
117
|
+
* air temperature when wind is calm or air is warm". Null is reserved for
|
|
118
|
+
* null / undefined / non-finite inputs.
|
|
119
|
+
*
|
|
120
|
+
* Reference: https://www.weather.gov/safety/cold-wind-chill-chart
|
|
121
|
+
* Sanity: `windChill(20, 15) ≈ 6 °F` matches the NWS chart.
|
|
122
|
+
*/
|
|
123
|
+
declare function windChill(tempF: number | null | undefined, windMph: number | null | undefined): number | null;
|
|
124
|
+
/**
|
|
125
|
+
* NWS heat index (°F) using the Rothfusz regression. Domain: `tempF ≥ 80`.
|
|
126
|
+
*
|
|
127
|
+
* Mirrors Python `transforms.heat_index` at transforms.py:119-147. Includes:
|
|
128
|
+
*
|
|
129
|
+
* 1. A simple approximation `simple = 0.5*(T + 61 + (T-68)*1.2 + RH*0.094)`
|
|
130
|
+
* used when `(simple + T)/2 < 80` (low-effective-temperature fast path).
|
|
131
|
+
* 2. The Rothfusz 9-term polynomial:
|
|
132
|
+
* hi = -42.379 + 2.04901523*T + 10.14333127*RH - 0.22475541*T*RH
|
|
133
|
+
* - 0.00683783*T² - 0.05481717*RH² + 0.00122874*T²*RH
|
|
134
|
+
* + 0.00085282*T*RH² - 0.00000199*T²*RH²
|
|
135
|
+
* 3. A dry-air adjustment when `RH < 13 && 80 ≤ T ≤ 112`:
|
|
136
|
+
* hi -= ((13 - RH) / 4) * sqrt((17 - |T - 95|) / 17)
|
|
137
|
+
* 4. A humid-air adjustment when `RH > 85 && 80 ≤ T ≤ 87`:
|
|
138
|
+
* hi += ((RH - 85) / 10) * ((87 - T) / 5)
|
|
139
|
+
*
|
|
140
|
+
* **Out-of-domain (NOT null — Python parity):** when `tempF < 80`, returns
|
|
141
|
+
* `tempF` unchanged. Null is reserved for null / undefined / non-finite
|
|
142
|
+
* inputs.
|
|
143
|
+
*
|
|
144
|
+
* Reference: https://www.wpc.ncep.noaa.gov/html/heatindex.shtml
|
|
145
|
+
* Sanity: `heatIndex(90, 70) ≈ 106 °F` matches the NWS Rothfusz table.
|
|
146
|
+
*/
|
|
147
|
+
declare function heatIndex(tempF: number | null | undefined, rhPct: number | null | undefined): number | null;
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Physics-based clipping defaults for canonical observation columns.
|
|
151
|
+
*
|
|
152
|
+
* Mirrors Python `mostlyright.preprocessing.PHYSICS_BOUNDS` (preprocessing.py:34-46).
|
|
153
|
+
* Values are `[min, max]` tuples in canonical units (°C for temp, m/s and kt
|
|
154
|
+
* for wind, hPa for pressure, percent for humidity, mm for precip).
|
|
155
|
+
*
|
|
156
|
+
* Both `dew_point_c`/`dewpoint_c` and `wind_dir_deg`/`wind_dir_degrees` are
|
|
157
|
+
* aliased to support legacy + canonical column names.
|
|
158
|
+
*/
|
|
159
|
+
declare const PHYSICS_BOUNDS: ReadonlyMap<string, readonly [number, number]>;
|
|
160
|
+
interface ClipOutliersOptions {
|
|
161
|
+
/** Explicit `[lo, hi]` range. Overrides PHYSICS_BOUNDS and sigma fallback. */
|
|
162
|
+
bounds?: readonly [number, number];
|
|
163
|
+
/** Sigma multiplier for the fallback branch. Default 3.0. Must be > 0. */
|
|
164
|
+
std?: number;
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Winsorize a numeric column.
|
|
168
|
+
*
|
|
169
|
+
* Mirrors Python `mostlyright.preprocessing.clip_outliers`. Returns rows with
|
|
170
|
+
* a derived `{col}_clipped` column; the source `col` is preserved unchanged.
|
|
171
|
+
*
|
|
172
|
+
* Decision tree:
|
|
173
|
+
* - `opts.bounds` set → clip to explicit `[lo, hi]`
|
|
174
|
+
* - `PHYSICS_BOUNDS.has(col)` → clip to physics defaults
|
|
175
|
+
* - else → sigma fallback (`mu ± std*sigma`)
|
|
176
|
+
*
|
|
177
|
+
* **Phase 3.5 review-iter fixes:**
|
|
178
|
+
* - Throws `RangeError` if `std ≤ 0` in the sigma fallback (matches Python
|
|
179
|
+
* `ValueError` at preprocessing.py:84-88; silent dataset corruption
|
|
180
|
+
* otherwise).
|
|
181
|
+
* - Sigma=0 pass-through: when all values are identical, sample sigma is
|
|
182
|
+
* zero and the clamp `[mu, mu]` would collapse the column. Pass values
|
|
183
|
+
* through unchanged instead.
|
|
184
|
+
*
|
|
185
|
+
* @param rows input rows (NOT mutated; pure function)
|
|
186
|
+
* @param col column to clip
|
|
187
|
+
* @param opts optional bounds / std overrides; defaults: PHYSICS_BOUNDS or sigma=3
|
|
188
|
+
* @returns new array of rows, each carrying `{col}_clipped`
|
|
189
|
+
* @throws RangeError if sigma fallback would use `std <= 0` or non-finite std
|
|
190
|
+
*/
|
|
191
|
+
declare function clipOutliers<Row extends Record<string, unknown>>(rows: ReadonlyArray<Row>, col: string, opts?: ClipOutliersOptions): ReadonlyArray<Row & Record<string, number | null>>;
|
|
192
|
+
|
|
193
|
+
export { type ClipOutliersOptions, PHYSICS_BOUNDS, ROLLING_FNS, type RollingFn, calendarFeatures, clipOutliers, diff, diff2, heatIndex, lag, rolling, spread, windChill };
|