climagrid 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
climagrid/__init__.py ADDED
@@ -0,0 +1,17 @@
1
+ """
2
+ climagrid — Climate data, grid-ready.
3
+
4
+ Open-source Python toolkit that converts public NOAA, NASA, USDA, and
5
+ U.S. Forest Service data into standardized environmental stress features
6
+ for electric utility predictive maintenance systems.
7
+
8
+ Designed for rural electric cooperatives and municipal utilities.
9
+
10
+ License: Apache 2.0
11
+ """
12
+
13
+ from climagrid.pipeline.orchestrator import run
14
+ from climagrid.schema import schema_summary, validate_dataframe
15
+
16
+ __version__ = "0.1.0"
17
+ __all__ = ["run", "schema_summary", "validate_dataframe"]
@@ -0,0 +1,4 @@
1
+ from climagrid.assets.joiner import AssetEnvironmentJoiner
2
+ from climagrid.assets.registry import AssetRegistry
3
+
4
+ __all__ = ["AssetRegistry", "AssetEnvironmentJoiner"]
@@ -0,0 +1,177 @@
1
+ """
2
+ AssetEnvironmentJoiner — spatially joins environmental data to asset locations.
3
+
4
+ For each asset in the registry, finds the nearest data point (grid cell or
5
+ station) in the environmental DataFrame and extracts its time series.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import warnings
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+ from scipy.spatial import cKDTree
15
+
16
+ from climagrid.assets.registry import AssetRegistry
17
+
18
+
19
+ class AssetEnvironmentJoiner:
20
+ """
21
+ Joins time-series environmental data to utility asset point locations.
22
+
23
+ Strategy: nearest-neighbor match in Euclidean lat/lon space (valid for
24
+ small regions, <500 km extents). For large extents consider haversine.
25
+
26
+ Parameters
27
+ ----------
28
+ max_distance_km:
29
+ Reject matches farther than this distance. Points beyond this
30
+ threshold will have NaN environmental values. Default 100 km.
31
+
32
+ Example
33
+ -------
34
+ >>> registry = AssetRegistry("assets.csv")
35
+ >>> nasa = NasaPowerAdapter()
36
+ >>> env_df = nasa.fetch(bbox, start_dt, end_dt)
37
+ >>> joiner = AssetEnvironmentJoiner()
38
+ >>> result = joiner.join(registry, env_df)
39
+ >>> result.head()
40
+ """
41
+
42
+ def __init__(self, max_distance_km: float = 100.0):
43
+ self._max_distance_km = max_distance_km
44
+
45
+ def join(
46
+ self,
47
+ registry: AssetRegistry,
48
+ env_df: pd.DataFrame,
49
+ time_col: str = "timestamp",
50
+ ) -> pd.DataFrame:
51
+ """
52
+ Join environmental observations to each asset for every timestamp.
53
+
54
+ Parameters
55
+ ----------
56
+ registry:
57
+ AssetRegistry with asset locations.
58
+ env_df:
59
+ DataFrame returned by any adapter's fetch() method.
60
+ Must have 'lat', 'lon', and at least one timestamp.
61
+ time_col:
62
+ Name of the timestamp column in env_df.
63
+
64
+ Returns
65
+ -------
66
+ pd.DataFrame
67
+ One row per (asset_id, timestamp) with index columns and all
68
+ environmental columns present in env_df.
69
+ """
70
+ assets = registry.assets
71
+
72
+ if env_df.empty:
73
+ return pd.DataFrame(
74
+ {"asset_id": assets["asset_id"].values}
75
+ )
76
+
77
+ if "lat" not in env_df.columns or "lon" not in env_df.columns:
78
+ raise ValueError("env_df must contain 'lat' and 'lon' columns")
79
+
80
+ # Build KD-tree from unique environmental grid points
81
+ env_points = env_df[["lat", "lon"]].drop_duplicates().reset_index(drop=True)
82
+ tree = cKDTree(env_points[["lat", "lon"]].values)
83
+
84
+ asset_lats = assets["lat"].values
85
+ asset_lons = assets["lon"].values
86
+ asset_coords = np.column_stack([asset_lats, asset_lons])
87
+
88
+ # Query nearest grid point for each asset
89
+ distances_deg, indices = tree.query(asset_coords, k=1)
90
+ # Rough conversion: 1 degree ≈ 111 km
91
+ distances_km = distances_deg * 111.0
92
+
93
+ # Warn about far matches
94
+ too_far = distances_km > self._max_distance_km
95
+ if too_far.any():
96
+ n_far = too_far.sum()
97
+ warnings.warn(
98
+ f"{n_far} asset(s) are more than {self._max_distance_km} km "
99
+ "from any environmental data point. Those rows will have NaN values.",
100
+ UserWarning,
101
+ stacklevel=2,
102
+ )
103
+
104
+ # Map each asset to its nearest environmental grid point lat/lon
105
+ nearest_lats = env_points.loc[indices, "lat"].values
106
+ nearest_lons = env_points.loc[indices, "lon"].values
107
+
108
+ # Build result: for each asset, extract the env time series at its nearest point
109
+ result_frames: list[pd.DataFrame] = []
110
+
111
+ env_value_cols = [
112
+ c for c in env_df.columns
113
+ if c not in {"lat", "lon", time_col}
114
+ ]
115
+
116
+ for i, row in assets.iterrows():
117
+ asset_id = row["asset_id"]
118
+ asset_lat = row["lat"]
119
+ asset_lon = row["lon"]
120
+
121
+ nn_lat = nearest_lats[list(assets.index).index(i) if i in assets.index else i]
122
+ nn_lon = nearest_lons[list(assets.index).index(i) if i in assets.index else i]
123
+
124
+ env_slice = env_df[
125
+ (env_df["lat"] == nn_lat) & (env_df["lon"] == nn_lon)
126
+ ][env_value_cols + ([time_col] if time_col in env_df.columns else [])].copy()
127
+
128
+ env_slice["asset_id"] = asset_id
129
+ env_slice["lat"] = asset_lat
130
+ env_slice["lon"] = asset_lon
131
+
132
+ if distances_km[list(assets.index).index(i) if i in assets.index else i] > self._max_distance_km:
133
+ for col in env_value_cols:
134
+ env_slice[col] = float("nan")
135
+
136
+ result_frames.append(env_slice)
137
+
138
+ if not result_frames:
139
+ return pd.DataFrame()
140
+
141
+ result = pd.concat(result_frames, ignore_index=True)
142
+
143
+ # Reorder columns: asset_id, timestamp, lat, lon, then env columns
144
+ front_cols = ["asset_id"]
145
+ if time_col in result.columns:
146
+ front_cols.append(time_col)
147
+ front_cols += ["lat", "lon"]
148
+ remaining = [c for c in result.columns if c not in front_cols]
149
+ return result[front_cols + remaining].reset_index(drop=True)
150
+
151
+ def join_point(
152
+ self,
153
+ asset_lat: float,
154
+ asset_lon: float,
155
+ env_df: pd.DataFrame,
156
+ time_col: str = "timestamp",
157
+ ) -> pd.DataFrame:
158
+ """Convenience method: join env data for a single lat/lon point."""
159
+ import os
160
+ import tempfile
161
+
162
+ from climagrid.assets.registry import AssetRegistry
163
+
164
+ tmp_data = pd.DataFrame([{
165
+ "asset_id": "point",
166
+ "lat": asset_lat,
167
+ "lon": asset_lon,
168
+ }])
169
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
170
+ tmp_data.to_csv(f, index=False)
171
+ tmp_path = f.name
172
+
173
+ try:
174
+ reg = AssetRegistry(tmp_path)
175
+ return self.join(reg, env_df, time_col)
176
+ finally:
177
+ os.unlink(tmp_path)
@@ -0,0 +1,153 @@
1
+ """
2
+ AssetRegistry — loads utility asset records from CSV or GeoJSON.
3
+
4
+ Each asset must have at minimum: asset_id, lat, lon.
5
+ Optional fields: asset_type, voltage_kv, install_year, manufacturer.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from pathlib import Path
11
+
12
+ import geopandas as gpd
13
+ import pandas as pd
14
+
15
+ REQUIRED_COLUMNS = {"asset_id", "lat", "lon"}
16
+
17
+ ASSET_TYPE_VALUES = {
18
+ "transformer",
19
+ "circuit_breaker",
20
+ "transmission_line",
21
+ "distribution_line",
22
+ "substation",
23
+ "capacitor_bank",
24
+ "recloser",
25
+ "other",
26
+ }
27
+
28
+
29
+ class AssetRegistry:
30
+ """
31
+ Loads and validates a utility asset registry from CSV or GeoJSON.
32
+
33
+ Parameters
34
+ ----------
35
+ path:
36
+ Path to a CSV file (must have asset_id, lat, lon columns) or
37
+ a GeoJSON file (must have asset_id and Point geometry).
38
+ asset_type_filter:
39
+ If provided, only include assets of these types.
40
+
41
+ Example
42
+ -------
43
+ >>> registry = AssetRegistry("my_coop_assets.csv")
44
+ >>> registry.assets.head()
45
+ """
46
+
47
+ def __init__(
48
+ self,
49
+ path: str | Path,
50
+ asset_type_filter: list[str] | None = None,
51
+ ):
52
+ self._path = Path(path)
53
+ self._gdf = self._load(self._path)
54
+
55
+ if asset_type_filter:
56
+ self._gdf = self._gdf[
57
+ self._gdf["asset_type"].isin(asset_type_filter)
58
+ ]
59
+
60
+ @property
61
+ def assets(self) -> gpd.GeoDataFrame:
62
+ """GeoDataFrame with one row per asset, CRS=EPSG:4326."""
63
+ return self._gdf
64
+
65
+ @property
66
+ def count(self) -> int:
67
+ return len(self._gdf)
68
+
69
+ @property
70
+ def bounding_box(self) -> tuple[float, float, float, float]:
71
+ """(min_lat, max_lat, min_lon, max_lon) covering all assets."""
72
+ bounds = self._gdf.total_bounds # (minx, miny, maxx, maxy)
73
+ return bounds[1], bounds[3], bounds[0], bounds[2]
74
+
75
+ def __len__(self) -> int:
76
+ return self.count
77
+
78
+ def __repr__(self) -> str:
79
+ return f"AssetRegistry(n={self.count}, path={self._path.name!r})"
80
+
81
+ # ------------------------------------------------------------------
82
+ # Internal loading
83
+ # ------------------------------------------------------------------
84
+
85
+ def _load(self, path: Path) -> gpd.GeoDataFrame:
86
+ suffix = path.suffix.lower()
87
+
88
+ if suffix == ".csv":
89
+ return self._load_csv(path)
90
+ elif suffix in {".geojson", ".json"}:
91
+ return self._load_geojson(path)
92
+ else:
93
+ raise ValueError(
94
+ f"Unsupported file type: {suffix!r}. "
95
+ "Use .csv or .geojson."
96
+ )
97
+
98
+ def _load_csv(self, path: Path) -> gpd.GeoDataFrame:
99
+ df = pd.read_csv(path, dtype={"asset_id": str})
100
+ self._check_required_columns(df, path)
101
+ df["lat"] = pd.to_numeric(df["lat"], errors="coerce")
102
+ df["lon"] = pd.to_numeric(df["lon"], errors="coerce")
103
+ n_before = len(df)
104
+ df = df.dropna(subset=["lat", "lon"])
105
+ if len(df) < n_before:
106
+ import warnings
107
+ warnings.warn(
108
+ f"Dropped {n_before - len(df)} rows with null lat/lon",
109
+ UserWarning,
110
+ stacklevel=3,
111
+ )
112
+ gdf = gpd.GeoDataFrame(
113
+ df,
114
+ geometry=gpd.points_from_xy(df["lon"], df["lat"]),
115
+ crs="EPSG:4326",
116
+ )
117
+ return gdf
118
+
119
+ def _load_geojson(self, path: Path) -> gpd.GeoDataFrame:
120
+ gdf = gpd.read_file(path)
121
+ gdf = gdf.set_crs("EPSG:4326") if gdf.crs is None else gdf.to_crs("EPSG:4326")
122
+
123
+ # Extract lat/lon from geometry if not present
124
+ if "lat" not in gdf.columns:
125
+ gdf["lat"] = gdf.geometry.y
126
+ if "lon" not in gdf.columns:
127
+ gdf["lon"] = gdf.geometry.x
128
+
129
+ df_check = pd.DataFrame(gdf.drop(columns="geometry"))
130
+ self._check_required_columns(df_check, path)
131
+ return gdf
132
+
133
+ @staticmethod
134
+ def _check_required_columns(df: pd.DataFrame, path: Path) -> None:
135
+ missing = REQUIRED_COLUMNS - set(df.columns)
136
+ if missing:
137
+ raise ValueError(
138
+ f"Asset file {path.name!r} is missing required columns: "
139
+ f"{sorted(missing)}. "
140
+ f"Required: {sorted(REQUIRED_COLUMNS)}"
141
+ )
142
+
143
+
144
+ def load_sample_assets() -> AssetRegistry:
145
+ """Load the bundled 50-asset sample registry for testing and demos."""
146
+ here = Path(__file__).parent.parent.parent.parent
147
+ sample_path = here / "examples" / "data" / "sample_assets.csv"
148
+ if not sample_path.exists():
149
+ raise FileNotFoundError(
150
+ f"Sample asset file not found at {sample_path}. "
151
+ "Has the repository been cloned fully?"
152
+ )
153
+ return AssetRegistry(sample_path)
climagrid/cli.py ADDED
@@ -0,0 +1,166 @@
1
+ """
2
+ Command-line interface for climagrid.
3
+
4
+ Usage examples
5
+ --------------
6
+ climagrid fetch --assets assets.csv --start 2024-07-01 --end 2024-07-08
7
+ climagrid fetch --assets assets.csv --start 2024-07-01 --end 2024-07-08 \\
8
+ --sources nasa_power,usfs_wfigs --output features.parquet
9
+ climagrid schema
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import sys
15
+ from datetime import datetime, timezone
16
+ from pathlib import Path
17
+
18
+ import click
19
+
20
+
21
+ @click.group()
22
+ @click.version_option(package_name="climagrid")
23
+ def main() -> None:
24
+ """climagrid — climate data, grid-ready.
25
+
26
+ Fetch NOAA/NASA/USDA/USFS environmental data and compute
27
+ predictive-maintenance stress features for utility assets.
28
+ """
29
+
30
+
31
+ @main.command()
32
+ @click.option(
33
+ "--assets", "-a",
34
+ required=True,
35
+ type=click.Path(exists=True, path_type=Path),
36
+ help="Asset CSV or GeoJSON file (must have asset_id, lat, lon columns).",
37
+ )
38
+ @click.option(
39
+ "--start", "-s",
40
+ required=True,
41
+ metavar="YYYY-MM-DD",
42
+ help="Start date (UTC).",
43
+ )
44
+ @click.option(
45
+ "--end", "-e",
46
+ required=True,
47
+ metavar="YYYY-MM-DD",
48
+ help="End date (UTC, inclusive).",
49
+ )
50
+ @click.option(
51
+ "--sources",
52
+ default="nasa_power",
53
+ show_default=True,
54
+ help="Comma-separated data source names. "
55
+ "Valid: nasa_power, noaa_hrrr, noaa_ncei, usda_nrcs, usfs_wfigs.",
56
+ )
57
+ @click.option(
58
+ "--features",
59
+ default="all",
60
+ show_default=True,
61
+ help="Comma-separated feature names or 'all'. "
62
+ "Valid: thermal, conductor_sag, freeze_thaw, ice_loading, soil, wildfire.",
63
+ )
64
+ @click.option(
65
+ "--output", "-o",
66
+ default="climagrid_output.parquet",
67
+ show_default=True,
68
+ type=click.Path(path_type=Path),
69
+ help="Output file path (.parquet or .csv).",
70
+ )
71
+ @click.option(
72
+ "--long-form",
73
+ is_flag=True,
74
+ default=False,
75
+ help="Write long-form Parquet (feature_name, feature_value rows) instead of wide.",
76
+ )
77
+ @click.option(
78
+ "--bbox-radius",
79
+ default=50.0,
80
+ show_default=True,
81
+ metavar="KM",
82
+ help="Bounding box radius around asset centroid for data fetch.",
83
+ )
84
+ def fetch(
85
+ assets: Path,
86
+ start: str,
87
+ end: str,
88
+ sources: str,
89
+ features: str,
90
+ output: Path,
91
+ long_form: bool,
92
+ bbox_radius: float,
93
+ ) -> None:
94
+ """Fetch environmental data and compute stress features for utility assets."""
95
+ import climagrid
96
+ from climagrid.outputs import to_csv, to_long_parquet, to_parquet
97
+
98
+ try:
99
+ start_dt = datetime.fromisoformat(start).replace(tzinfo=timezone.utc)
100
+ end_dt = datetime.fromisoformat(end).replace(tzinfo=timezone.utc)
101
+ except ValueError as exc:
102
+ raise click.BadParameter(str(exc), param_hint="--start/--end") from exc
103
+
104
+ source_list = [s.strip() for s in sources.split(",") if s.strip()]
105
+ feature_list: list[str] | str = (
106
+ "all"
107
+ if features.strip().lower() == "all"
108
+ else [f.strip() for f in features.split(",") if f.strip()]
109
+ )
110
+
111
+ click.echo(f"Assets: {assets}")
112
+ click.echo(f"Period: {start} → {end}")
113
+ click.echo(f"Sources: {', '.join(source_list)}")
114
+ click.echo(f"Features: {features}")
115
+
116
+ try:
117
+ df = climagrid.run(
118
+ assets,
119
+ start_dt=start_dt,
120
+ end_dt=end_dt,
121
+ sources=source_list,
122
+ features=feature_list,
123
+ bbox_radius_km=bbox_radius,
124
+ )
125
+ except Exception as exc:
126
+ click.secho(f"Error: {exc}", fg="red", err=True)
127
+ sys.exit(1)
128
+
129
+ if df.empty:
130
+ click.secho("Warning: result is empty — check source availability.", fg="yellow")
131
+
132
+ suffix = output.suffix.lower()
133
+ if long_form and suffix == ".parquet":
134
+ out_path = to_long_parquet(df, output)
135
+ fmt = "long-form Parquet"
136
+ elif suffix == ".csv":
137
+ out_path = to_csv(df, output)
138
+ fmt = "CSV"
139
+ else:
140
+ out_path = to_parquet(df, output)
141
+ fmt = "Parquet"
142
+
143
+ click.secho(
144
+ f"✓ {len(df):,} rows × {df.shape[1]} columns → {out_path} ({fmt})",
145
+ fg="green",
146
+ )
147
+
148
+
149
+ @main.command()
150
+ @click.option(
151
+ "--output", "-o",
152
+ default=None,
153
+ type=click.Path(path_type=Path),
154
+ help="Optional path to write schema JSON file.",
155
+ )
156
+ def schema(output: Path | None) -> None:
157
+ """Print the climagrid column schema."""
158
+ import climagrid
159
+
160
+ summary = climagrid.schema_summary()
161
+ click.echo(summary.to_string(index=False))
162
+
163
+ if output:
164
+ from climagrid.outputs import to_json_schema
165
+ to_json_schema(output)
166
+ click.secho(f"✓ Schema written to {output}", fg="green")
@@ -0,0 +1,15 @@
1
+ from climagrid.features.conductor_sag import ConductorSagIndex
2
+ from climagrid.features.freeze_thaw import FreezeThawtCycleCounter
3
+ from climagrid.features.ice_loading import IceLoadingRisk
4
+ from climagrid.features.soil import SoilSaturationIndex
5
+ from climagrid.features.thermal import ThermalStressIndex
6
+ from climagrid.features.wildfire import WildfireProximityScore
7
+
8
+ __all__ = [
9
+ "ThermalStressIndex",
10
+ "FreezeThawtCycleCounter",
11
+ "IceLoadingRisk",
12
+ "SoilSaturationIndex",
13
+ "WildfireProximityScore",
14
+ "ConductorSagIndex",
15
+ ]
@@ -0,0 +1,136 @@
1
+ """
2
+ ConductorSagIndex — thermal sag estimation for overhead T&D lines.
3
+
4
+ When conductor temperature rises, the aluminum/ACSR strands expand and
5
+ the conductor sags downward, reducing ground clearance. Excessive sag
6
+ causes regulatory violations and phase-to-ground faults.
7
+
8
+ Thermal sag is governed by the IEEE 738-2012 (Standard for Calculating
9
+ the Current-Temperature Relationship of Bare Overhead Conductors). This
10
+ module implements a simplified version using ambient temperature and solar
11
+ irradiance as primary inputs.
12
+
13
+ The output is a normalized index [0, 1] representing sag relative to the
14
+ maximum allowable sag (configurable), suitable as an ML feature.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import numpy as np
20
+ import pandas as pd
21
+
22
+
23
+ class ConductorSagIndex:
24
+ """
25
+ Computes normalized conductor thermal sag index from weather inputs.
26
+
27
+ Simplified IEEE 738-2012 heat balance:
28
+ T_conductor ≈ T_ambient + (I²R + Q_solar - Q_convective) / (thermal_capacity)
29
+
30
+ For stress *feature* purposes (not full current rating), we approximate
31
+ the conductor temperature as ambient + solar heating - convective cooling,
32
+ then compute sag as a fraction of the maximum design sag.
33
+
34
+ Parameters
35
+ ----------
36
+ temp_col:
37
+ Ambient temperature column in °C.
38
+ solar_col:
39
+ Global horizontal irradiance column in W/m².
40
+ wind_col:
41
+ Wind speed column in m/s. Wind is the primary cooling mechanism.
42
+ max_sag_temp_c:
43
+ Conductor temperature at which sag reaches the design maximum.
44
+ Default 75°C (typical for ACSR "Drake" conductor per IEEE 738).
45
+ conductor_absorptivity:
46
+ Solar absorptivity of the conductor surface (0–1). Default 0.5.
47
+ conductor_emissivity:
48
+ Emissivity for radiated cooling (0–1). Default 0.5.
49
+ conductor_diameter_mm:
50
+ Conductor outer diameter for convective heat loss. Default 28.1 mm (Drake ACSR).
51
+
52
+ Example
53
+ -------
54
+ >>> csi = ConductorSagIndex()
55
+ >>> df = csi.compute(env_df)
56
+ >>> df["feat_conductor_sag_index"]
57
+ """
58
+
59
+ # Stefan-Boltzmann constant W/(m²·K⁴)
60
+ _SIGMA = 5.6704e-8
61
+
62
+ def __init__(
63
+ self,
64
+ temp_col: str = "hrrr_temperature_2m",
65
+ solar_col: str = "hrrr_solar_irradiance_ghi",
66
+ wind_col: str = "hrrr_wind_speed_10m",
67
+ max_sag_temp_c: float = 75.0,
68
+ conductor_absorptivity: float = 0.5,
69
+ conductor_emissivity: float = 0.5,
70
+ conductor_diameter_mm: float = 28.1,
71
+ ):
72
+ self._temp_col = temp_col
73
+ self._solar_col = solar_col
74
+ self._wind_col = wind_col
75
+ self._max_sag_temp_c = max_sag_temp_c
76
+ self._alpha = conductor_absorptivity
77
+ self._eps = conductor_emissivity
78
+ self._d = conductor_diameter_mm / 1000.0 # convert to metres
79
+
80
+ def compute(self, df: pd.DataFrame) -> pd.DataFrame:
81
+ """Add feat_conductor_sag_index column [0, 1]. Returns a copy."""
82
+ df = df.copy()
83
+
84
+ temp = self._resolve_col(df, self._temp_col, ["nasa_temperature_2m", "ncei_temperature_max"])
85
+ solar = self._resolve_col(df, self._solar_col, ["nasa_solar_irradiance_ghi"])
86
+ wind = self._resolve_col(df, self._wind_col, ["nasa_wind_speed_10m", "ncei_wind_speed"])
87
+
88
+ if temp is None:
89
+ df["feat_conductor_sag_index"] = float("nan")
90
+ return df
91
+
92
+ # Defaults when data not available
93
+ if solar is None:
94
+ solar = np.full_like(temp, 400.0) # moderate irradiance
95
+ if wind is None:
96
+ wind = np.full_like(temp, 1.0) # near-calm (conservative)
97
+
98
+ wind = np.maximum(wind, 0.5) # prevent divide-by-zero in convection
99
+
100
+ # Solar heat gain per unit length (W/m)
101
+ q_solar = self._alpha * solar * self._d
102
+
103
+ # Convective cooling (simplified Morgan formula per IEEE 738 Eq. 3a)
104
+ # q_conv = (1.01 + 1.35 * Re^0.52) * k_f * (T_c - T_a)
105
+ # Simplified for feature purposes: linear approximation
106
+ # Full implementation would require air density, viscosity, thermal conductivity
107
+ # We use: q_conv ≈ h_c * d * delta_T where h_c ≈ 10 * sqrt(wind) W/(m²·K) (typical)
108
+ h_c = 10.0 * np.sqrt(wind)
109
+ q_conv_per_k = h_c * self._d # W/(m·K) per unit temperature rise
110
+
111
+ # Steady-state conductor temperature rise above ambient (°C)
112
+ # Radiation delta omitted for simplicity (dominated by convection at typical loadings)
113
+ delta_t = np.maximum(q_solar / q_conv_per_k, 0.0)
114
+ t_conductor = temp + delta_t
115
+
116
+ # Sag index: ratio of conductor temperature to max allowable, clamped [0,1]
117
+ # Using linear approximation: sag scales approximately linearly with temperature
118
+ # (IEEE 738 Table B.1 confirms near-linear relationship for typical conductors)
119
+ baseline_temp = 25.0 # °C design reference temperature
120
+ sag_index = (t_conductor - baseline_temp) / (self._max_sag_temp_c - baseline_temp)
121
+ df["feat_conductor_sag_index"] = np.clip(sag_index, 0.0, 1.0)
122
+
123
+ return df
124
+
125
+ @property
126
+ def _sigma_val(self) -> float:
127
+ return self._SIGMA
128
+
129
+ @staticmethod
130
+ def _resolve_col(df: pd.DataFrame, primary: str, fallbacks: list[str]):
131
+ if primary in df.columns:
132
+ return df[primary].values.astype(float)
133
+ for fb in fallbacks:
134
+ if fb in df.columns:
135
+ return df[fb].values.astype(float)
136
+ return None