downsampler 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,80 @@
1
+ """downsampler - Timeseries DataFrame downsampling with LTTB, aggregation methods, and fidelity testing.
2
+
3
+ This package provides tools for downsampling time series data in pandas DataFrames,
4
+ with support for:
5
+ - LTTB (Largest Triangle Three Buckets) algorithm for visual fidelity
6
+ - Multiple aggregation methods (mean, median, min, max)
7
+ - Gap-aware processing
8
+ - Edge handling strategies
9
+ - Deferred/lazy data fetching
10
+ - Fidelity testing and comparison
11
+
12
+ Example:
13
+ >>> import pandas as pd
14
+ >>> from downsampler import downsample, DownsampleConfig, AggregationMethod
15
+ >>>
16
+ >>> # Create sample data
17
+ >>> df = pd.DataFrame(
18
+ ... {'value': [1, 2, 3, 4, 5]},
19
+ ... index=pd.date_range('2024-01-01', periods=5, freq='1min')
20
+ ... )
21
+ >>>
22
+ >>> # Downsample using mean
23
+ >>> result = downsample(df, target_cadence='5min')
24
+ >>>
25
+ >>> # Downsample using LTTB
26
+ >>> config = DownsampleConfig(
27
+ ... method=AggregationMethod.LTTB,
28
+ ... lttb_target_column='value'
29
+ ... )
30
+ >>> result = downsample(df, target_cadence='5min', config=config)
31
+ """
32
+
33
+ from downsampler.config import (
34
+ AggregationMethod,
35
+ EdgeHandling,
36
+ GapHandling,
37
+ DownsampleConfig,
38
+ )
39
+ from downsampler.core import downsample, downsample_multi_aggregate
40
+ from downsampler.gaps import (
41
+ find_gap_indices,
42
+ groupby_gaps,
43
+ wrap_in_nans,
44
+ mark_gaps_in_dataframe,
45
+ )
46
+ from downsampler.lttb import downsample_lttb
47
+ from downsampler.aggregators import (
48
+ downsample_mean,
49
+ downsample_median,
50
+ downsample_min,
51
+ downsample_max,
52
+ )
53
+ from downsampler.deferred import deferred_downsample
54
+
55
+ __version__ = "0.1.0"
56
+
57
+ __all__ = [
58
+ # Config
59
+ "AggregationMethod",
60
+ "EdgeHandling",
61
+ "GapHandling",
62
+ "DownsampleConfig",
63
+ # Core
64
+ "downsample",
65
+ "downsample_multi_aggregate",
66
+ # Gaps
67
+ "find_gap_indices",
68
+ "groupby_gaps",
69
+ "wrap_in_nans",
70
+ "mark_gaps_in_dataframe",
71
+ # LTTB
72
+ "downsample_lttb",
73
+ # Aggregators
74
+ "downsample_mean",
75
+ "downsample_median",
76
+ "downsample_min",
77
+ "downsample_max",
78
+ # Deferred
79
+ "deferred_downsample",
80
+ ]
@@ -0,0 +1,338 @@
1
+ """Aggregation-based downsampling methods."""
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+
6
+ from downsampler.config import DownsampleConfig, GapHandling, EdgeHandling, AggregationMethod
7
+ from downsampler.gaps import split_at_gaps, mark_gaps_in_dataframe
8
+ from downsampler.edges import apply_edge_handling
9
+ from downsampler.utils import parse_cadence, get_numeric_columns
10
+
11
+
12
+ def _apply_aggregation(
13
+ df: pd.DataFrame,
14
+ target_cadence: pd.Timedelta,
15
+ method: str,
16
+ columns: list[str] | None = None
17
+ ) -> pd.DataFrame:
18
+ """Apply an aggregation method to downsample a DataFrame.
19
+
20
+ Args:
21
+ df: Input DataFrame with DatetimeIndex.
22
+ target_cadence: Target cadence for resampling.
23
+ method: Aggregation method ('mean', 'median', 'min', 'max').
24
+ columns: Columns to aggregate. If None, all numeric columns.
25
+
26
+ Returns:
27
+ Aggregated DataFrame.
28
+ """
29
+ if columns is None:
30
+ columns = get_numeric_columns(df)
31
+
32
+ resampler = df[columns].resample(target_cadence, origin='epoch')
33
+
34
+ if method == 'mean':
35
+ return resampler.mean()
36
+ elif method == 'median':
37
+ return resampler.median()
38
+ elif method == 'min':
39
+ return resampler.min()
40
+ elif method == 'max':
41
+ return resampler.max()
42
+ else:
43
+ raise ValueError(f"Unknown aggregation method: {method}")
44
+
45
+
46
+ def downsample_mean(
47
+ df: pd.DataFrame,
48
+ target_cadence: str | pd.Timedelta,
49
+ columns: list[str] | None = None,
50
+ gap_threshold: pd.Timedelta | None = None,
51
+ mark_gaps: bool = True
52
+ ) -> pd.DataFrame:
53
+ """Downsample using mean aggregation.
54
+
55
+ Args:
56
+ df: Input DataFrame with DatetimeIndex.
57
+ target_cadence: Target cadence as ISO duration string or Timedelta.
58
+ columns: Columns to include. If None, all numeric columns.
59
+ gap_threshold: Minimum duration to consider as a gap.
60
+ mark_gaps: Whether to insert NaN markers at gaps.
61
+
62
+ Returns:
63
+ Downsampled DataFrame.
64
+
65
+ Example:
66
+ >>> df = pd.DataFrame(
67
+ ... {'value': range(100)},
68
+ ... index=pd.date_range('2024-01-01', periods=100, freq='1min')
69
+ ... )
70
+ >>> result = downsample_mean(df, '10min')
71
+ >>> len(result)
72
+ 10
73
+ """
74
+ return _downsample_with_aggregation(
75
+ df, target_cadence, 'mean', columns, gap_threshold, mark_gaps
76
+ )
77
+
78
+
79
+ def downsample_median(
80
+ df: pd.DataFrame,
81
+ target_cadence: str | pd.Timedelta,
82
+ columns: list[str] | None = None,
83
+ gap_threshold: pd.Timedelta | None = None,
84
+ mark_gaps: bool = True
85
+ ) -> pd.DataFrame:
86
+ """Downsample using median aggregation.
87
+
88
+ Args:
89
+ df: Input DataFrame with DatetimeIndex.
90
+ target_cadence: Target cadence as ISO duration string or Timedelta.
91
+ columns: Columns to include. If None, all numeric columns.
92
+ gap_threshold: Minimum duration to consider as a gap.
93
+ mark_gaps: Whether to insert NaN markers at gaps.
94
+
95
+ Returns:
96
+ Downsampled DataFrame.
97
+ """
98
+ return _downsample_with_aggregation(
99
+ df, target_cadence, 'median', columns, gap_threshold, mark_gaps
100
+ )
101
+
102
+
103
+ def downsample_min(
104
+ df: pd.DataFrame,
105
+ target_cadence: str | pd.Timedelta,
106
+ columns: list[str] | None = None,
107
+ gap_threshold: pd.Timedelta | None = None,
108
+ mark_gaps: bool = True
109
+ ) -> pd.DataFrame:
110
+ """Downsample using minimum aggregation.
111
+
112
+ Args:
113
+ df: Input DataFrame with DatetimeIndex.
114
+ target_cadence: Target cadence as ISO duration string or Timedelta.
115
+ columns: Columns to include. If None, all numeric columns.
116
+ gap_threshold: Minimum duration to consider as a gap.
117
+ mark_gaps: Whether to insert NaN markers at gaps.
118
+
119
+ Returns:
120
+ Downsampled DataFrame.
121
+ """
122
+ return _downsample_with_aggregation(
123
+ df, target_cadence, 'min', columns, gap_threshold, mark_gaps
124
+ )
125
+
126
+
127
+ def downsample_max(
128
+ df: pd.DataFrame,
129
+ target_cadence: str | pd.Timedelta,
130
+ columns: list[str] | None = None,
131
+ gap_threshold: pd.Timedelta | None = None,
132
+ mark_gaps: bool = True
133
+ ) -> pd.DataFrame:
134
+ """Downsample using maximum aggregation.
135
+
136
+ Args:
137
+ df: Input DataFrame with DatetimeIndex.
138
+ target_cadence: Target cadence as ISO duration string or Timedelta.
139
+ columns: Columns to include. If None, all numeric columns.
140
+ gap_threshold: Minimum duration to consider as a gap.
141
+ mark_gaps: Whether to insert NaN markers at gaps.
142
+
143
+ Returns:
144
+ Downsampled DataFrame.
145
+ """
146
+ return _downsample_with_aggregation(
147
+ df, target_cadence, 'max', columns, gap_threshold, mark_gaps
148
+ )
149
+
150
+
151
+ def _downsample_with_aggregation(
152
+ df: pd.DataFrame,
153
+ target_cadence: str | pd.Timedelta,
154
+ method: str,
155
+ columns: list[str] | None = None,
156
+ gap_threshold: pd.Timedelta | None = None,
157
+ mark_gaps: bool = True
158
+ ) -> pd.DataFrame:
159
+ """Internal function for aggregation-based downsampling.
160
+
161
+ Args:
162
+ df: Input DataFrame.
163
+ target_cadence: Target cadence.
164
+ method: Aggregation method.
165
+ columns: Columns to include.
166
+ gap_threshold: Gap threshold.
167
+ mark_gaps: Whether to mark gaps.
168
+
169
+ Returns:
170
+ Downsampled DataFrame.
171
+ """
172
+ target_cadence = parse_cadence(target_cadence)
173
+
174
+ if gap_threshold is None:
175
+ gap_threshold = 2 * target_cadence
176
+
177
+ # Apply aggregation
178
+ result = _apply_aggregation(df, target_cadence, method, columns)
179
+
180
+ # Mark gaps if requested
181
+ if mark_gaps:
182
+ result = mark_gaps_in_dataframe(
183
+ result,
184
+ nominal_timedelta=target_cadence,
185
+ nominal_start_time=df.index[0] if len(df) > 0 else None,
186
+ nominal_end_time=df.index[-1] + target_cadence if len(df) > 0 else None
187
+ )
188
+
189
+ return result
190
+
191
+
192
+ def downsample_multi_aggregate(
193
+ df: pd.DataFrame,
194
+ target_cadence: str | pd.Timedelta,
195
+ variables: list[str],
196
+ aggregations: list[str] = ["min", "mean", "max"],
197
+ min_completeness: float = 0.9,
198
+ source_cadence: str | pd.Timedelta | None = None
199
+ ) -> pd.DataFrame:
200
+ """Create multiple aggregation columns for specified variables.
201
+
202
+ Produces columns like 'density_min', 'density_mean', 'density_max'
203
+ from a single 'density' column.
204
+
205
+ Args:
206
+ df: Input DataFrame with DatetimeIndex.
207
+ target_cadence: Target cadence as ISO duration string or Timedelta.
208
+ variables: List of column names to aggregate.
209
+ aggregations: List of aggregation methods to apply.
210
+ min_completeness: Minimum fraction of expected points required
211
+ for valid output (0.0 to 1.0).
212
+ source_cadence: Original cadence of the data for completeness
213
+ calculation. If None, estimated from data.
214
+
215
+ Returns:
216
+ DataFrame with aggregated columns named {variable}_{aggregation}.
217
+
218
+ Example:
219
+ >>> df = pd.DataFrame(
220
+ ... {'density': np.random.randn(1000), 'velocity': np.random.randn(1000)},
221
+ ... index=pd.date_range('2024-01-01', periods=1000, freq='1s')
222
+ ... )
223
+ >>> result = downsample_multi_aggregate(
224
+ ... df, '1min', ['density', 'velocity'], ['min', 'mean', 'max']
225
+ ... )
226
+ >>> list(result.columns)
227
+ ['density_min', 'density_mean', 'density_max', 'velocity_min', 'velocity_mean', 'velocity_max', 'coverage']
228
+ """
229
+ target_cadence = parse_cadence(target_cadence)
230
+
231
+ # Estimate source cadence if not provided
232
+ if source_cadence is None:
233
+ from downsampler.utils import estimate_cadence
234
+ source_cadence = estimate_cadence(df)
235
+ else:
236
+ source_cadence = parse_cadence(source_cadence)
237
+
238
+ # Compute statistics with count
239
+ aggstats = [*aggregations, 'count']
240
+ df_agg = df[variables].resample(
241
+ target_cadence, label='left', origin='epoch'
242
+ ).agg(aggstats)
243
+
244
+ # Adjust index to middle of cadence (for proper time representation)
245
+ df_agg.index = df_agg.index + 0.5 * target_cadence
246
+
247
+ # Compute completeness/coverage
248
+ maxcount = target_cadence / source_cadence
249
+ coverage = df_agg[[(v, 'count') for v in variables]].apply(max, axis=1) / maxcount
250
+
251
+ # Set data to NaN if statistics are based on limited observations
252
+ for var in variables:
253
+ for aggstat in aggregations:
254
+ df_agg.loc[:, (var, aggstat)] = (
255
+ df_agg.loc[:, (var, aggstat)].where(
256
+ df_agg.loc[:, (var, 'count')] > min_completeness * maxcount
257
+ )
258
+ )
259
+
260
+ # Remove count columns
261
+ for var in variables:
262
+ df_agg.drop((var, "count"), axis=1, inplace=True)
263
+
264
+ # Flatten multi-index columns to single index (e.g., "density_min")
265
+ df_agg.columns = ["_".join(col_name) for col_name in df_agg.columns.to_flat_index()]
266
+
267
+ # Add coverage column
268
+ df_agg['coverage'] = coverage
269
+
270
+ return df_agg
271
+
272
+
273
+ def downsample_with_config(
274
+ df: pd.DataFrame,
275
+ target_cadence: str | pd.Timedelta,
276
+ config: DownsampleConfig
277
+ ) -> pd.DataFrame:
278
+ """Apply aggregation-based downsampling with full configuration.
279
+
280
+ Args:
281
+ df: Input DataFrame with DatetimeIndex.
282
+ target_cadence: Target cadence.
283
+ config: Downsampling configuration.
284
+
285
+ Returns:
286
+ Downsampled DataFrame.
287
+ """
288
+ target_cadence = parse_cadence(target_cadence)
289
+ gap_threshold = config.get_gap_threshold(target_cadence)
290
+
291
+ # Determine columns to process
292
+ columns = config.include_columns if config.include_columns else None
293
+
294
+ # Map method to function
295
+ method_map = {
296
+ AggregationMethod.MEAN: 'mean',
297
+ AggregationMethod.MEDIAN: 'median',
298
+ AggregationMethod.MIN: 'min',
299
+ AggregationMethod.MAX: 'max',
300
+ }
301
+
302
+ method_str = method_map.get(config.method)
303
+ if method_str is None:
304
+ raise ValueError(f"Method {config.method} is not an aggregation method")
305
+
306
+ # Process based on gap handling
307
+ if config.gap_handling == GapHandling.SEGMENT:
308
+ segments = split_at_gaps(df, gap_threshold)
309
+ results = []
310
+ for segment in segments:
311
+ if len(segment) < config.min_points_per_segment:
312
+ continue
313
+ result = _apply_aggregation(segment, target_cadence, method_str, columns)
314
+ results.append(result)
315
+
316
+ if not results:
317
+ return pd.DataFrame(columns=df.columns if columns is None else columns)
318
+
319
+ result = pd.concat(results).sort_index()
320
+ result = mark_gaps_in_dataframe(result, nominal_timedelta=target_cadence)
321
+ else:
322
+ result = _apply_aggregation(df, target_cadence, method_str, columns)
323
+ result = mark_gaps_in_dataframe(result, nominal_timedelta=target_cadence)
324
+
325
+ # Apply edge handling
326
+ if len(result) > 0:
327
+ result = apply_edge_handling(
328
+ result,
329
+ config.edge_handling,
330
+ config.edge_window
331
+ )
332
+
333
+ # Filter out excluded columns
334
+ if config.exclude_columns:
335
+ cols_to_drop = [c for c in config.exclude_columns if c in result.columns]
336
+ result = result.drop(columns=cols_to_drop)
337
+
338
+ return result
downsampler/config.py ADDED
@@ -0,0 +1,72 @@
1
+ """Configuration dataclasses and enums for downsampler."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from enum import Enum
5
+ from typing import Union
6
+
7
+ import pandas as pd
8
+
9
+
10
+ class AggregationMethod(str, Enum):
11
+ """Aggregation methods for downsampling."""
12
+ MEAN = "mean"
13
+ MEDIAN = "median"
14
+ MIN = "min"
15
+ MAX = "max"
16
+ LTTB = "lttb"
17
+
18
+
19
+ class EdgeHandling(str, Enum):
20
+ """Strategies for handling edge points in downsampled data."""
21
+ DISCARD = "discard" # Remove edge points
22
+ FLAG = "flag" # Keep edges, add '_is_edge' column
23
+ KEEP = "keep" # Keep as-is
24
+
25
+
26
+ class GapHandling(str, Enum):
27
+ """Strategies for handling gaps in time series data."""
28
+ SEGMENT = "segment" # Split at gaps, process independently
29
+ INTERPOLATE = "interpolate" # Fill gaps first
30
+ IGNORE = "ignore" # Treat as continuous
31
+
32
+
33
+ @dataclass
34
+ class DownsampleConfig:
35
+ """Configuration for downsampling operations.
36
+
37
+ Attributes:
38
+ method: The aggregation method to use for downsampling.
39
+ lttb_target_column: For LTTB, the column to optimize visual fidelity for.
40
+ include_columns: Columns to include in the output (empty means all).
41
+ exclude_columns: Columns to exclude from the output.
42
+ gap_handling: Strategy for handling gaps in the data.
43
+ gap_threshold: Minimum duration to consider as a gap.
44
+ "auto" means 2x the target cadence.
45
+ edge_handling: Strategy for handling edge points.
46
+ edge_window: Number of points at each edge to consider as edge points.
47
+ min_points_per_segment: Minimum points required in a segment for processing.
48
+ """
49
+ method: AggregationMethod = AggregationMethod.MEAN
50
+ lttb_target_column: str | None = None
51
+ include_columns: list[str] = field(default_factory=list)
52
+ exclude_columns: list[str] = field(default_factory=list)
53
+ gap_handling: GapHandling = GapHandling.SEGMENT
54
+ gap_threshold: Union[str, pd.Timedelta] = "auto"
55
+ edge_handling: EdgeHandling = EdgeHandling.FLAG
56
+ edge_window: int = 2
57
+ min_points_per_segment: int = 3
58
+
59
+ def get_gap_threshold(self, target_cadence: pd.Timedelta) -> pd.Timedelta:
60
+ """Get the gap threshold, computing auto value if needed.
61
+
62
+ Args:
63
+ target_cadence: The target cadence for downsampling.
64
+
65
+ Returns:
66
+ The gap threshold as a Timedelta.
67
+ """
68
+ if self.gap_threshold == "auto":
69
+ return 2 * target_cadence
70
+ elif isinstance(self.gap_threshold, str):
71
+ return pd.to_timedelta(self.gap_threshold)
72
+ return self.gap_threshold
downsampler/core.py ADDED
@@ -0,0 +1,166 @@
1
+ """Core downsampling functions."""
2
+
3
+ import pandas as pd
4
+
5
+ from downsampler.config import DownsampleConfig, AggregationMethod
6
+ from downsampler.utils import parse_cadence
7
+ from downsampler.lttb import downsample_lttb_with_config
8
+ from downsampler.aggregators import downsample_with_config as aggregate_with_config
9
+ from downsampler.aggregators import downsample_multi_aggregate as _downsample_multi_aggregate
10
+
11
+
12
+ def downsample(
13
+ df: pd.DataFrame,
14
+ target_cadence: str | pd.Timedelta,
15
+ config: DownsampleConfig | None = None,
16
+ **kwargs
17
+ ) -> pd.DataFrame:
18
+ """Downsample a DataFrame to a lower cadence.
19
+
20
+ This is the main entry point for downsampling operations. It supports
21
+ multiple methods including LTTB and various aggregation methods.
22
+
23
+ Args:
24
+ df: Input DataFrame with DatetimeIndex.
25
+ target_cadence: Target cadence as ISO duration string (e.g., "PT1H")
26
+ or pandas Timedelta.
27
+ config: Downsampling configuration. If None, uses default config
28
+ with mean aggregation.
29
+ **kwargs: Additional keyword arguments that override config settings.
30
+ Supported kwargs:
31
+ - method: AggregationMethod or string ('mean', 'lttb', etc.)
32
+ - lttb_target_column: Column to optimize for LTTB
33
+ - include_columns: Columns to include
34
+ - exclude_columns: Columns to exclude
35
+ - gap_threshold: Gap threshold
36
+ - edge_handling: Edge handling strategy
37
+ - edge_window: Edge window size
38
+
39
+ Returns:
40
+ Downsampled DataFrame.
41
+
42
+ Examples:
43
+ Basic mean downsampling:
44
+ >>> df = pd.DataFrame(
45
+ ... {'value': range(100)},
46
+ ... index=pd.date_range('2024-01-01', periods=100, freq='1min')
47
+ ... )
48
+ >>> result = downsample(df, '10min')
49
+ >>> len(result)
50
+ 10
51
+
52
+ LTTB downsampling:
53
+ >>> from downsampler import AggregationMethod, DownsampleConfig
54
+ >>> config = DownsampleConfig(
55
+ ... method=AggregationMethod.LTTB,
56
+ ... lttb_target_column='value'
57
+ ... )
58
+ >>> result = downsample(df, '10min', config=config)
59
+
60
+ Using kwargs:
61
+ >>> result = downsample(df, '10min', method='max')
62
+ """
63
+ # Create config if not provided
64
+ if config is None:
65
+ config = DownsampleConfig()
66
+
67
+ # Apply kwargs overrides
68
+ if kwargs:
69
+ config = _apply_kwargs_to_config(config, kwargs)
70
+
71
+ target_cadence = parse_cadence(target_cadence)
72
+
73
+ # Route to appropriate implementation
74
+ if config.method == AggregationMethod.LTTB:
75
+ return downsample_lttb_with_config(df, target_cadence, config)
76
+ else:
77
+ return aggregate_with_config(df, target_cadence, config)
78
+
79
+
80
+ def downsample_multi_aggregate(
81
+ df: pd.DataFrame,
82
+ target_cadence: str | pd.Timedelta,
83
+ variables: list[str],
84
+ aggregations: list[str] = ["min", "mean", "max"],
85
+ config: DownsampleConfig | None = None,
86
+ **kwargs
87
+ ) -> pd.DataFrame:
88
+ """Create columns like 'input_min', 'input_mean', 'input_max'.
89
+
90
+ This function creates multiple aggregated columns from each input
91
+ variable, useful for showing data ranges in visualizations.
92
+
93
+ Args:
94
+ df: Input DataFrame with DatetimeIndex.
95
+ target_cadence: Target cadence as ISO duration string or Timedelta.
96
+ variables: List of column names to aggregate.
97
+ aggregations: List of aggregation methods to apply.
98
+ Default: ["min", "mean", "max"]
99
+ config: Downsampling configuration (used for min_completeness if
100
+ specified in a future version).
101
+ **kwargs: Additional keyword arguments:
102
+ - min_completeness: Minimum fraction of expected points (0.0-1.0)
103
+ - source_cadence: Original cadence for completeness calculation
104
+
105
+ Returns:
106
+ DataFrame with aggregated columns named {variable}_{aggregation}.
107
+
108
+ Example:
109
+ >>> import numpy as np
110
+ >>> df = pd.DataFrame(
111
+ ... {'density': np.random.randn(1000), 'velocity': np.random.randn(1000)},
112
+ ... index=pd.date_range('2024-01-01', periods=1000, freq='1s')
113
+ ... )
114
+ >>> result = downsample_multi_aggregate(
115
+ ... df, '1min', ['density', 'velocity']
116
+ ... )
117
+ >>> 'density_min' in result.columns
118
+ True
119
+ >>> 'density_mean' in result.columns
120
+ True
121
+ >>> 'density_max' in result.columns
122
+ True
123
+ """
124
+ min_completeness = kwargs.get('min_completeness', 0.9)
125
+ source_cadence = kwargs.get('source_cadence', None)
126
+
127
+ return _downsample_multi_aggregate(
128
+ df=df,
129
+ target_cadence=target_cadence,
130
+ variables=variables,
131
+ aggregations=aggregations,
132
+ min_completeness=min_completeness,
133
+ source_cadence=source_cadence
134
+ )
135
+
136
+
137
+ def _apply_kwargs_to_config(
138
+ config: DownsampleConfig,
139
+ kwargs: dict
140
+ ) -> DownsampleConfig:
141
+ """Apply keyword arguments to a config, creating a new config.
142
+
143
+ Args:
144
+ config: Base configuration.
145
+ kwargs: Keyword arguments to apply.
146
+
147
+ Returns:
148
+ New configuration with kwargs applied.
149
+ """
150
+ from dataclasses import replace
151
+
152
+ # Map string method names to enum values
153
+ if 'method' in kwargs:
154
+ method = kwargs['method']
155
+ if isinstance(method, str):
156
+ kwargs['method'] = AggregationMethod(method)
157
+
158
+ # Filter to only valid config fields
159
+ valid_fields = {
160
+ 'method', 'lttb_target_column', 'include_columns', 'exclude_columns',
161
+ 'gap_handling', 'gap_threshold', 'edge_handling', 'edge_window',
162
+ 'min_points_per_segment'
163
+ }
164
+ filtered_kwargs = {k: v for k, v in kwargs.items() if k in valid_fields}
165
+
166
+ return replace(config, **filtered_kwargs)