featcopilot 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- featcopilot/__init__.py +10 -1
- featcopilot/core/__init__.py +2 -0
- featcopilot/core/feature.py +5 -1
- featcopilot/core/transform_rule.py +276 -0
- featcopilot/engines/relational.py +5 -2
- featcopilot/engines/tabular.py +151 -5
- featcopilot/engines/text.py +352 -11
- featcopilot/engines/timeseries.py +235 -3
- featcopilot/llm/__init__.py +6 -1
- featcopilot/llm/code_generator.py +7 -4
- featcopilot/llm/copilot_client.py +97 -20
- featcopilot/llm/explainer.py +6 -3
- featcopilot/llm/litellm_client.py +595 -0
- featcopilot/llm/semantic_engine.py +717 -26
- featcopilot/llm/transform_rule_generator.py +403 -0
- featcopilot/selection/importance.py +40 -9
- featcopilot/selection/redundancy.py +39 -10
- featcopilot/selection/statistical.py +107 -34
- featcopilot/selection/unified.py +57 -3
- featcopilot/stores/__init__.py +17 -0
- featcopilot/stores/base.py +166 -0
- featcopilot/stores/feast_store.py +541 -0
- featcopilot/stores/rule_store.py +343 -0
- featcopilot/transformers/sklearn_compat.py +18 -6
- featcopilot/utils/__init__.py +14 -0
- featcopilot/utils/logger.py +47 -0
- featcopilot/utils/models.py +287 -0
- featcopilot/utils/parallel.py +5 -1
- {featcopilot-0.1.0.dist-info → featcopilot-0.3.0.dist-info}/METADATA +56 -25
- featcopilot-0.3.0.dist-info/RECORD +38 -0
- featcopilot-0.1.0.dist-info/RECORD +0 -29
- {featcopilot-0.1.0.dist-info → featcopilot-0.3.0.dist-info}/WHEEL +0 -0
- {featcopilot-0.1.0.dist-info → featcopilot-0.3.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,541 @@
|
|
|
1
|
+
"""Feast feature store integration.
|
|
2
|
+
|
|
3
|
+
Provides integration with Feast (https://feast.dev) for saving
|
|
4
|
+
and retrieving engineered features.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import tempfile
|
|
8
|
+
from datetime import datetime, timedelta
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Optional, Union
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
from pydantic import Field
|
|
14
|
+
|
|
15
|
+
from featcopilot.core.feature import FeatureSet, FeatureType
|
|
16
|
+
from featcopilot.stores.base import BaseFeatureStore, FeatureStoreConfig
|
|
17
|
+
from featcopilot.utils.logger import get_logger
|
|
18
|
+
|
|
19
|
+
logger = get_logger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class FeastConfig(FeatureStoreConfig):
|
|
23
|
+
"""Configuration for Feast feature store."""
|
|
24
|
+
|
|
25
|
+
name: str = "feast"
|
|
26
|
+
repo_path: Optional[str] = Field(default=None, description="Path to Feast repo directory")
|
|
27
|
+
project_name: str = Field(default="featcopilot", description="Feast project name")
|
|
28
|
+
provider: str = Field(default="local", description="Feast provider (local, gcp, aws)")
|
|
29
|
+
online_store_type: str = Field(default="sqlite", description="Online store type")
|
|
30
|
+
offline_store_type: str = Field(default="file", description="Offline store type")
|
|
31
|
+
ttl_days: int = Field(default=365, description="Feature TTL in days")
|
|
32
|
+
auto_materialize: bool = Field(default=True, description="Auto-materialize to online store")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class FeastFeatureStore(BaseFeatureStore):
|
|
36
|
+
"""
|
|
37
|
+
Feast feature store integration.
|
|
38
|
+
|
|
39
|
+
Enables saving FeatCopilot-generated features to Feast for:
|
|
40
|
+
- Historical feature retrieval (training)
|
|
41
|
+
- Online feature serving (inference)
|
|
42
|
+
- Feature discovery and reuse
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
repo_path : str, optional
|
|
47
|
+
Path to Feast repository. If None, creates a temporary repo.
|
|
48
|
+
project_name : str, default='featcopilot'
|
|
49
|
+
Name of the Feast project
|
|
50
|
+
entity_columns : list, optional
|
|
51
|
+
Columns to use as entity keys
|
|
52
|
+
timestamp_column : str, optional
|
|
53
|
+
Column containing event timestamps
|
|
54
|
+
provider : str, default='local'
|
|
55
|
+
Feast provider (local, gcp, aws)
|
|
56
|
+
auto_materialize : bool, default=True
|
|
57
|
+
Whether to automatically materialize features to online store
|
|
58
|
+
|
|
59
|
+
Examples
|
|
60
|
+
--------
|
|
61
|
+
Basic usage with FeatCopilot:
|
|
62
|
+
|
|
63
|
+
>>> from featcopilot import AutoFeatureEngineer
|
|
64
|
+
>>> from featcopilot.stores import FeastFeatureStore
|
|
65
|
+
>>>
|
|
66
|
+
>>> # Generate features
|
|
67
|
+
>>> engineer = AutoFeatureEngineer(engines=['tabular'])
|
|
68
|
+
>>> X_transformed = engineer.fit_transform(X, y)
|
|
69
|
+
>>>
|
|
70
|
+
>>> # Save to Feast
|
|
71
|
+
>>> store = FeastFeatureStore(
|
|
72
|
+
... repo_path='./feature_repo',
|
|
73
|
+
... entity_columns=['customer_id'],
|
|
74
|
+
... timestamp_column='event_timestamp'
|
|
75
|
+
... )
|
|
76
|
+
>>> store.initialize()
|
|
77
|
+
>>> store.save_features(
|
|
78
|
+
... X_transformed,
|
|
79
|
+
... feature_view_name='customer_features',
|
|
80
|
+
... description='Customer churn prediction features'
|
|
81
|
+
... )
|
|
82
|
+
|
|
83
|
+
Retrieve features for training:
|
|
84
|
+
|
|
85
|
+
>>> entity_df = pd.DataFrame({
|
|
86
|
+
... 'customer_id': [1, 2, 3],
|
|
87
|
+
... 'event_timestamp': [datetime.now()] * 3
|
|
88
|
+
... })
|
|
89
|
+
>>> features = store.get_features(
|
|
90
|
+
... entity_df,
|
|
91
|
+
... feature_names=['age_income_ratio', 'tenure_months'],
|
|
92
|
+
... feature_view_name='customer_features'
|
|
93
|
+
... )
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def __init__(
|
|
97
|
+
self,
|
|
98
|
+
repo_path: Optional[str] = None,
|
|
99
|
+
project_name: str = "featcopilot",
|
|
100
|
+
entity_columns: Optional[list[str]] = None,
|
|
101
|
+
timestamp_column: Optional[str] = None,
|
|
102
|
+
provider: str = "local",
|
|
103
|
+
online_store_type: str = "sqlite",
|
|
104
|
+
offline_store_type: str = "file",
|
|
105
|
+
ttl_days: int = 365,
|
|
106
|
+
auto_materialize: bool = True,
|
|
107
|
+
**kwargs,
|
|
108
|
+
):
|
|
109
|
+
config = FeastConfig(
|
|
110
|
+
repo_path=repo_path,
|
|
111
|
+
project_name=project_name,
|
|
112
|
+
entity_columns=entity_columns or [],
|
|
113
|
+
timestamp_column=timestamp_column,
|
|
114
|
+
provider=provider,
|
|
115
|
+
online_store_type=online_store_type,
|
|
116
|
+
offline_store_type=offline_store_type,
|
|
117
|
+
ttl_days=ttl_days,
|
|
118
|
+
auto_materialize=auto_materialize,
|
|
119
|
+
**kwargs,
|
|
120
|
+
)
|
|
121
|
+
super().__init__(config)
|
|
122
|
+
self.config: FeastConfig = config
|
|
123
|
+
self._feast_store = None
|
|
124
|
+
self._repo_path: Optional[Path] = None
|
|
125
|
+
self._temp_dir: Optional[tempfile.TemporaryDirectory] = None
|
|
126
|
+
self._feature_views: dict[str, Any] = {}
|
|
127
|
+
self._entities: dict[str, Any] = {}
|
|
128
|
+
|
|
129
|
+
def initialize(self) -> None:
|
|
130
|
+
"""
|
|
131
|
+
Initialize the Feast feature store.
|
|
132
|
+
|
|
133
|
+
Creates the Feast repo if it doesn't exist and initializes
|
|
134
|
+
the FeatureStore object.
|
|
135
|
+
"""
|
|
136
|
+
try:
|
|
137
|
+
from feast import FeatureStore
|
|
138
|
+
except ImportError as err:
|
|
139
|
+
raise ImportError(
|
|
140
|
+
"Feast is not installed. Install with: pip install feast\n"
|
|
141
|
+
"Or install FeatCopilot with Feast support: pip install featcopilot[feast]"
|
|
142
|
+
) from err
|
|
143
|
+
|
|
144
|
+
# Set up repo path
|
|
145
|
+
if self.config.repo_path:
|
|
146
|
+
self._repo_path = Path(self.config.repo_path)
|
|
147
|
+
self._repo_path.mkdir(parents=True, exist_ok=True)
|
|
148
|
+
else:
|
|
149
|
+
self._temp_dir = tempfile.TemporaryDirectory()
|
|
150
|
+
self._repo_path = Path(self._temp_dir.name)
|
|
151
|
+
|
|
152
|
+
# Create feature_store.yaml if not exists
|
|
153
|
+
config_path = self._repo_path / "feature_store.yaml"
|
|
154
|
+
if not config_path.exists():
|
|
155
|
+
self._create_feast_config(config_path)
|
|
156
|
+
|
|
157
|
+
# Initialize Feast store
|
|
158
|
+
self._feast_store = FeatureStore(repo_path=str(self._repo_path))
|
|
159
|
+
self._is_initialized = True
|
|
160
|
+
|
|
161
|
+
logger.info(f"Feast feature store initialized at {self._repo_path}")
|
|
162
|
+
|
|
163
|
+
def _create_feast_config(self, config_path: Path) -> None:
|
|
164
|
+
"""Create Feast feature_store.yaml configuration."""
|
|
165
|
+
online_store_config = self._get_online_store_config()
|
|
166
|
+
offline_store_config = self._get_offline_store_config()
|
|
167
|
+
|
|
168
|
+
config_content = f"""project: {self.config.project_name}
|
|
169
|
+
registry: {self._repo_path}/registry.db
|
|
170
|
+
provider: {self.config.provider}
|
|
171
|
+
|
|
172
|
+
online_store:
|
|
173
|
+
{online_store_config}
|
|
174
|
+
|
|
175
|
+
offline_store:
|
|
176
|
+
{offline_store_config}
|
|
177
|
+
|
|
178
|
+
entity_key_serialization_version: 2
|
|
179
|
+
"""
|
|
180
|
+
config_path.write_text(config_content)
|
|
181
|
+
|
|
182
|
+
def _get_online_store_config(self) -> str:
|
|
183
|
+
"""Get online store configuration."""
|
|
184
|
+
if self.config.online_store_type == "sqlite":
|
|
185
|
+
return f" type: sqlite\n path: {self._repo_path}/online_store.db"
|
|
186
|
+
elif self.config.online_store_type == "redis":
|
|
187
|
+
return " type: redis\n connection_string: localhost:6379"
|
|
188
|
+
else:
|
|
189
|
+
return f" type: {self.config.online_store_type}"
|
|
190
|
+
|
|
191
|
+
def _get_offline_store_config(self) -> str:
|
|
192
|
+
"""Get offline store configuration."""
|
|
193
|
+
if self.config.offline_store_type == "file":
|
|
194
|
+
return " type: file"
|
|
195
|
+
elif self.config.offline_store_type == "bigquery":
|
|
196
|
+
return " type: bigquery"
|
|
197
|
+
elif self.config.offline_store_type == "redshift":
|
|
198
|
+
return " type: redshift"
|
|
199
|
+
else:
|
|
200
|
+
return f" type: {self.config.offline_store_type}"
|
|
201
|
+
|
|
202
|
+
def _infer_feast_dtype(self, pandas_dtype: str, feat_type: Optional[FeatureType] = None) -> str:
|
|
203
|
+
"""Infer Feast data type from pandas dtype."""
|
|
204
|
+
from feast import ValueType
|
|
205
|
+
|
|
206
|
+
dtype_str = str(pandas_dtype).lower()
|
|
207
|
+
|
|
208
|
+
if feat_type == FeatureType.BOOLEAN or "bool" in dtype_str:
|
|
209
|
+
return ValueType.BOOL
|
|
210
|
+
elif "int64" in dtype_str or "int32" in dtype_str:
|
|
211
|
+
return ValueType.INT64
|
|
212
|
+
elif "float" in dtype_str or "double" in dtype_str:
|
|
213
|
+
return ValueType.DOUBLE
|
|
214
|
+
elif "object" in dtype_str or "string" in dtype_str:
|
|
215
|
+
return ValueType.STRING
|
|
216
|
+
elif "datetime" in dtype_str:
|
|
217
|
+
return ValueType.UNIX_TIMESTAMP
|
|
218
|
+
else:
|
|
219
|
+
return ValueType.DOUBLE # Default to double for numeric
|
|
220
|
+
|
|
221
|
+
def save_features(
|
|
222
|
+
self,
|
|
223
|
+
df: pd.DataFrame,
|
|
224
|
+
feature_set: Optional[FeatureSet] = None,
|
|
225
|
+
feature_view_name: str = "featcopilot_features",
|
|
226
|
+
description: Optional[str] = None,
|
|
227
|
+
entity_columns: Optional[list[str]] = None,
|
|
228
|
+
timestamp_column: Optional[str] = None,
|
|
229
|
+
**kwargs,
|
|
230
|
+
) -> None:
|
|
231
|
+
"""
|
|
232
|
+
Save features to Feast.
|
|
233
|
+
|
|
234
|
+
Parameters
|
|
235
|
+
----------
|
|
236
|
+
df : DataFrame
|
|
237
|
+
DataFrame containing features to save
|
|
238
|
+
feature_set : FeatureSet, optional
|
|
239
|
+
FeatCopilot FeatureSet with metadata
|
|
240
|
+
feature_view_name : str
|
|
241
|
+
Name for the Feast feature view
|
|
242
|
+
description : str, optional
|
|
243
|
+
Description of the feature view
|
|
244
|
+
entity_columns : list, optional
|
|
245
|
+
Override entity columns from config
|
|
246
|
+
timestamp_column : str, optional
|
|
247
|
+
Override timestamp column from config
|
|
248
|
+
"""
|
|
249
|
+
# Determine entity and timestamp columns (validate before imports)
|
|
250
|
+
entity_cols = entity_columns or self.config.entity_columns
|
|
251
|
+
ts_col = timestamp_column or self.config.timestamp_column
|
|
252
|
+
|
|
253
|
+
# Validate columns exist
|
|
254
|
+
if not entity_cols:
|
|
255
|
+
raise ValueError(
|
|
256
|
+
"entity_columns must be specified either in config or save_features(). "
|
|
257
|
+
"These are the key columns that identify each row (e.g., 'customer_id')."
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
for col in entity_cols:
|
|
261
|
+
if col not in df.columns:
|
|
262
|
+
raise ValueError(f"Entity column '{col}' not found in DataFrame")
|
|
263
|
+
|
|
264
|
+
if not self._is_initialized:
|
|
265
|
+
self.initialize()
|
|
266
|
+
|
|
267
|
+
from feast import Entity, FeatureView, Field, FileSource
|
|
268
|
+
from feast.types import Float64, Int64, String
|
|
269
|
+
|
|
270
|
+
# Add timestamp column if not present
|
|
271
|
+
if ts_col and ts_col not in df.columns:
|
|
272
|
+
df = df.copy()
|
|
273
|
+
df[ts_col] = datetime.now()
|
|
274
|
+
elif not ts_col:
|
|
275
|
+
ts_col = "event_timestamp"
|
|
276
|
+
df = df.copy()
|
|
277
|
+
df[ts_col] = datetime.now()
|
|
278
|
+
|
|
279
|
+
# Save DataFrame to parquet (use absolute path for Feast compatibility)
|
|
280
|
+
data_path = (self._repo_path / f"{feature_view_name}.parquet").resolve()
|
|
281
|
+
df.to_parquet(data_path, index=False)
|
|
282
|
+
|
|
283
|
+
# Create entities
|
|
284
|
+
entities = []
|
|
285
|
+
for entity_col in entity_cols:
|
|
286
|
+
entity_name = entity_col.replace(" ", "_").lower()
|
|
287
|
+
if entity_name not in self._entities:
|
|
288
|
+
# Infer value type from dataframe
|
|
289
|
+
col_dtype = str(df[entity_col].dtype)
|
|
290
|
+
if "int" in col_dtype:
|
|
291
|
+
from feast import ValueType
|
|
292
|
+
|
|
293
|
+
value_type = ValueType.INT64
|
|
294
|
+
elif "float" in col_dtype:
|
|
295
|
+
from feast import ValueType
|
|
296
|
+
|
|
297
|
+
value_type = ValueType.DOUBLE
|
|
298
|
+
else:
|
|
299
|
+
from feast import ValueType
|
|
300
|
+
|
|
301
|
+
value_type = ValueType.STRING
|
|
302
|
+
|
|
303
|
+
entity = Entity(
|
|
304
|
+
name=entity_name,
|
|
305
|
+
value_type=value_type,
|
|
306
|
+
description=f"Entity key: {entity_col}",
|
|
307
|
+
)
|
|
308
|
+
self._entities[entity_name] = entity
|
|
309
|
+
entities.append(self._entities[entity_name])
|
|
310
|
+
|
|
311
|
+
# Determine feature columns (exclude entity and timestamp)
|
|
312
|
+
exclude_cols = set(entity_cols) | {ts_col}
|
|
313
|
+
feature_cols = [c for c in df.columns if c not in exclude_cols]
|
|
314
|
+
|
|
315
|
+
# Create schema
|
|
316
|
+
schema = []
|
|
317
|
+
for col in feature_cols:
|
|
318
|
+
dtype = str(df[col].dtype)
|
|
319
|
+
if "int" in dtype:
|
|
320
|
+
schema.append(Field(name=col, dtype=Int64))
|
|
321
|
+
elif "float" in dtype or "double" in dtype:
|
|
322
|
+
schema.append(Field(name=col, dtype=Float64))
|
|
323
|
+
elif "object" in dtype or "string" in dtype:
|
|
324
|
+
schema.append(Field(name=col, dtype=String))
|
|
325
|
+
else:
|
|
326
|
+
schema.append(Field(name=col, dtype=Float64))
|
|
327
|
+
|
|
328
|
+
# Create file source
|
|
329
|
+
source = FileSource(
|
|
330
|
+
path=str(data_path),
|
|
331
|
+
timestamp_field=ts_col,
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
# Create feature view
|
|
335
|
+
feature_view = FeatureView(
|
|
336
|
+
name=feature_view_name,
|
|
337
|
+
entities=entities, # Pass Entity objects, not strings
|
|
338
|
+
ttl=timedelta(days=self.config.ttl_days),
|
|
339
|
+
schema=schema,
|
|
340
|
+
source=source,
|
|
341
|
+
description=description or "Features generated by FeatCopilot",
|
|
342
|
+
tags=self.config.tags,
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
self._feature_views[feature_view_name] = feature_view
|
|
346
|
+
|
|
347
|
+
# Apply to Feast
|
|
348
|
+
self._feast_store.apply([*entities, feature_view])
|
|
349
|
+
|
|
350
|
+
logger.info(f"Saved {len(feature_cols)} features to Feast view '{feature_view_name}'")
|
|
351
|
+
|
|
352
|
+
# Materialize to online store if enabled
|
|
353
|
+
if self.config.auto_materialize:
|
|
354
|
+
self._materialize(feature_view_name)
|
|
355
|
+
|
|
356
|
+
def _materialize(self, feature_view_name: str) -> None:
|
|
357
|
+
"""Materialize features to online store."""
|
|
358
|
+
try:
|
|
359
|
+
end_date = datetime.now()
|
|
360
|
+
start_date = end_date - timedelta(days=self.config.ttl_days)
|
|
361
|
+
|
|
362
|
+
self._feast_store.materialize(
|
|
363
|
+
start_date=start_date,
|
|
364
|
+
end_date=end_date,
|
|
365
|
+
feature_views=[feature_view_name],
|
|
366
|
+
)
|
|
367
|
+
logger.info(f"Materialized '{feature_view_name}' to online store")
|
|
368
|
+
except Exception as e:
|
|
369
|
+
logger.warning(f"Could not materialize to online store: {e}")
|
|
370
|
+
|
|
371
|
+
def get_features(
|
|
372
|
+
self,
|
|
373
|
+
entity_df: pd.DataFrame,
|
|
374
|
+
feature_names: list[str],
|
|
375
|
+
feature_view_name: str = "featcopilot_features",
|
|
376
|
+
online: bool = False,
|
|
377
|
+
**kwargs,
|
|
378
|
+
) -> pd.DataFrame:
|
|
379
|
+
"""
|
|
380
|
+
Retrieve features from Feast.
|
|
381
|
+
|
|
382
|
+
Parameters
|
|
383
|
+
----------
|
|
384
|
+
entity_df : DataFrame
|
|
385
|
+
DataFrame with entity keys and timestamps
|
|
386
|
+
feature_names : list
|
|
387
|
+
Names of features to retrieve
|
|
388
|
+
feature_view_name : str
|
|
389
|
+
Name of the feature view
|
|
390
|
+
online : bool, default=False
|
|
391
|
+
If True, use online store; otherwise use offline store
|
|
392
|
+
|
|
393
|
+
Returns
|
|
394
|
+
-------
|
|
395
|
+
DataFrame
|
|
396
|
+
DataFrame with requested features
|
|
397
|
+
"""
|
|
398
|
+
if not self._is_initialized:
|
|
399
|
+
self.initialize()
|
|
400
|
+
|
|
401
|
+
# Format feature references
|
|
402
|
+
feature_refs = [f"{feature_view_name}:{name}" for name in feature_names]
|
|
403
|
+
|
|
404
|
+
if online:
|
|
405
|
+
# Get from online store
|
|
406
|
+
entity_rows = entity_df.to_dict("records")
|
|
407
|
+
result = self._feast_store.get_online_features(
|
|
408
|
+
features=feature_refs,
|
|
409
|
+
entity_rows=entity_rows,
|
|
410
|
+
)
|
|
411
|
+
return pd.DataFrame(result.to_dict())
|
|
412
|
+
else:
|
|
413
|
+
# Get from offline store (historical)
|
|
414
|
+
result = self._feast_store.get_historical_features(
|
|
415
|
+
entity_df=entity_df,
|
|
416
|
+
features=feature_refs,
|
|
417
|
+
)
|
|
418
|
+
return result.to_df()
|
|
419
|
+
|
|
420
|
+
def get_online_features(
|
|
421
|
+
self,
|
|
422
|
+
entity_dict: Union[dict[str, list], pd.DataFrame],
|
|
423
|
+
feature_names: list[str],
|
|
424
|
+
feature_view_name: str = "featcopilot_features",
|
|
425
|
+
) -> dict[str, Any]:
|
|
426
|
+
"""
|
|
427
|
+
Get features from online store for real-time inference.
|
|
428
|
+
|
|
429
|
+
Parameters
|
|
430
|
+
----------
|
|
431
|
+
entity_dict : dict or DataFrame
|
|
432
|
+
Entity keys as dict or DataFrame
|
|
433
|
+
feature_names : list
|
|
434
|
+
Names of features to retrieve
|
|
435
|
+
feature_view_name : str
|
|
436
|
+
Name of the feature view
|
|
437
|
+
|
|
438
|
+
Returns
|
|
439
|
+
-------
|
|
440
|
+
dict
|
|
441
|
+
Features as dictionary
|
|
442
|
+
"""
|
|
443
|
+
if not self._is_initialized:
|
|
444
|
+
self.initialize()
|
|
445
|
+
|
|
446
|
+
if isinstance(entity_dict, pd.DataFrame):
|
|
447
|
+
entity_rows = entity_dict.to_dict("records")
|
|
448
|
+
else:
|
|
449
|
+
# Convert dict of lists to list of dicts
|
|
450
|
+
keys = list(entity_dict.keys())
|
|
451
|
+
n_rows = len(entity_dict[keys[0]])
|
|
452
|
+
entity_rows = [{k: entity_dict[k][i] for k in keys} for i in range(n_rows)]
|
|
453
|
+
|
|
454
|
+
feature_refs = [f"{feature_view_name}:{name}" for name in feature_names]
|
|
455
|
+
|
|
456
|
+
result = self._feast_store.get_online_features(
|
|
457
|
+
features=feature_refs,
|
|
458
|
+
entity_rows=entity_rows,
|
|
459
|
+
)
|
|
460
|
+
return result.to_dict()
|
|
461
|
+
|
|
462
|
+
def push_features(
|
|
463
|
+
self,
|
|
464
|
+
df: pd.DataFrame,
|
|
465
|
+
feature_view_name: str = "featcopilot_features",
|
|
466
|
+
) -> None:
|
|
467
|
+
"""
|
|
468
|
+
Push features to online store (streaming/real-time update).
|
|
469
|
+
|
|
470
|
+
Parameters
|
|
471
|
+
----------
|
|
472
|
+
df : DataFrame
|
|
473
|
+
DataFrame with entity keys and feature values
|
|
474
|
+
feature_view_name : str
|
|
475
|
+
Name of the feature view
|
|
476
|
+
"""
|
|
477
|
+
if not self._is_initialized:
|
|
478
|
+
self.initialize()
|
|
479
|
+
|
|
480
|
+
self._feast_store.push(feature_view_name, df)
|
|
481
|
+
logger.info(f"Pushed {len(df)} rows to '{feature_view_name}'")
|
|
482
|
+
|
|
483
|
+
def list_feature_views(self) -> list[str]:
|
|
484
|
+
"""List all feature views in the store."""
|
|
485
|
+
if not self._is_initialized:
|
|
486
|
+
self.initialize()
|
|
487
|
+
|
|
488
|
+
views = self._feast_store.list_feature_views()
|
|
489
|
+
return [v.name for v in views]
|
|
490
|
+
|
|
491
|
+
def get_feature_view_schema(self, feature_view_name: str) -> dict[str, Any]:
|
|
492
|
+
"""Get schema/metadata for a feature view."""
|
|
493
|
+
if not self._is_initialized:
|
|
494
|
+
self.initialize()
|
|
495
|
+
|
|
496
|
+
try:
|
|
497
|
+
fv = self._feast_store.get_feature_view(feature_view_name)
|
|
498
|
+
return {
|
|
499
|
+
"name": fv.name,
|
|
500
|
+
"entities": list(fv.entities),
|
|
501
|
+
"features": [{"name": f.name, "dtype": str(f.dtype)} for f in fv.schema],
|
|
502
|
+
"ttl": str(fv.ttl),
|
|
503
|
+
"description": fv.description,
|
|
504
|
+
"tags": fv.tags,
|
|
505
|
+
}
|
|
506
|
+
except Exception as e:
|
|
507
|
+
logger.error(f"Could not get schema for '{feature_view_name}': {e}")
|
|
508
|
+
return {}
|
|
509
|
+
|
|
510
|
+
def delete_feature_view(self, feature_view_name: str) -> bool:
|
|
511
|
+
"""Delete a feature view."""
|
|
512
|
+
if not self._is_initialized:
|
|
513
|
+
self.initialize()
|
|
514
|
+
|
|
515
|
+
try:
|
|
516
|
+
self._feast_store.get_feature_view(feature_view_name) # Verify it exists
|
|
517
|
+
self._feast_store.delete_feature_view(feature_view_name)
|
|
518
|
+
self._feature_views.pop(feature_view_name, None)
|
|
519
|
+
|
|
520
|
+
# Clean up data file
|
|
521
|
+
data_path = self._repo_path / f"{feature_view_name}.parquet"
|
|
522
|
+
if data_path.exists():
|
|
523
|
+
data_path.unlink()
|
|
524
|
+
|
|
525
|
+
logger.info(f"Deleted feature view '{feature_view_name}'")
|
|
526
|
+
return True
|
|
527
|
+
except Exception as e:
|
|
528
|
+
logger.error(f"Could not delete '{feature_view_name}': {e}")
|
|
529
|
+
return False
|
|
530
|
+
|
|
531
|
+
def close(self) -> None:
|
|
532
|
+
"""Close the Feast store and clean up resources."""
|
|
533
|
+
self._feast_store = None
|
|
534
|
+
self._is_initialized = False
|
|
535
|
+
|
|
536
|
+
if self._temp_dir:
|
|
537
|
+
self._temp_dir.cleanup()
|
|
538
|
+
self._temp_dir = None
|
|
539
|
+
|
|
540
|
+
def __repr__(self) -> str:
|
|
541
|
+
return f"FeastFeatureStore(repo_path='{self._repo_path}', project='{self.config.project_name}')"
|