featcopilot 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,6 +28,10 @@ class TimeSeriesEngineConfig(EngineConfig):
28
28
  "autocorrelation",
29
29
  "peaks",
30
30
  "trends",
31
+ "entropy",
32
+ "energy",
33
+ "complexity",
34
+ "counts",
31
35
  ],
32
36
  description="Feature groups to extract",
33
37
  )
@@ -36,6 +40,7 @@ class TimeSeriesEngineConfig(EngineConfig):
36
40
  )
37
41
  n_fft_coefficients: int = Field(default=10, description="Number of FFT coefficients")
38
42
  n_autocorr_lags: int = Field(default=10, description="Number of autocorrelation lags")
43
+ entropy_bins: int = Field(default=10, description="Number of bins for binned entropy")
39
44
 
40
45
 
41
46
  class TimeSeriesEngine(BaseEngine):
@@ -66,7 +71,7 @@ class TimeSeriesEngine(BaseEngine):
66
71
  >>> X_features = engine.fit_transform(time_series_df)
67
72
  """
68
73
 
69
- # Feature extraction functions
74
+ # Feature extraction functions (tsfresh-inspired)
70
75
  FEATURE_EXTRACTORS = {
71
76
  "basic_stats": "_extract_basic_stats",
72
77
  "distribution": "_extract_distribution",
@@ -75,6 +80,10 @@ class TimeSeriesEngine(BaseEngine):
75
80
  "trends": "_extract_trends",
76
81
  "rolling": "_extract_rolling",
77
82
  "fft": "_extract_fft",
83
+ "entropy": "_extract_entropy",
84
+ "energy": "_extract_energy",
85
+ "complexity": "_extract_complexity",
86
+ "counts": "_extract_counts",
78
87
  }
79
88
 
80
89
  def __init__(
@@ -400,6 +409,226 @@ class TimeSeriesEngine(BaseEngine):
400
409
 
401
410
  return features
402
411
 
412
+ def _extract_entropy(self, series: np.ndarray, col: str) -> dict[str, float]:
413
+ """Extract entropy-based features (tsfresh-inspired)."""
414
+ features = {}
415
+ prefix = col
416
+
417
+ series_clean = series[~np.isnan(series)]
418
+ if len(series_clean) < 4:
419
+ return features
420
+
421
+ # Binned entropy
422
+ try:
423
+ hist, _ = np.histogram(series_clean, bins=self.config.entropy_bins)
424
+ hist = hist[hist > 0]
425
+ probs = hist / hist.sum()
426
+ features[f"{prefix}_binned_entropy"] = -np.sum(probs * np.log(probs + 1e-10))
427
+ except Exception:
428
+ features[f"{prefix}_binned_entropy"] = 0
429
+
430
+ # Sample entropy (simplified implementation)
431
+ try:
432
+ features[f"{prefix}_sample_entropy"] = self._sample_entropy(series_clean, m=2, r=0.2)
433
+ except Exception:
434
+ features[f"{prefix}_sample_entropy"] = 0
435
+
436
+ # Approximate entropy
437
+ try:
438
+ features[f"{prefix}_approximate_entropy"] = self._approximate_entropy(series_clean, m=2, r=0.2)
439
+ except Exception:
440
+ features[f"{prefix}_approximate_entropy"] = 0
441
+
442
+ return features
443
+
444
+ def _sample_entropy(self, series: np.ndarray, m: int = 2, r: float = 0.2) -> float:
445
+ """Compute sample entropy of a time series."""
446
+ n = len(series)
447
+ if n < m + 2:
448
+ return 0
449
+
450
+ # Normalize r by std
451
+ r = r * np.std(series)
452
+ if r == 0:
453
+ return 0
454
+
455
+ def _count_matches(template_length):
456
+ count = 0
457
+ templates = np.array([series[i : i + template_length] for i in range(n - template_length)])
458
+ for i in range(len(templates)):
459
+ for j in range(i + 1, len(templates)):
460
+ if np.max(np.abs(templates[i] - templates[j])) < r:
461
+ count += 1
462
+ return count
463
+
464
+ a = _count_matches(m)
465
+ b = _count_matches(m + 1)
466
+
467
+ if a == 0 or b == 0:
468
+ return 0
469
+
470
+ return -np.log(b / a)
471
+
472
+ def _approximate_entropy(self, series: np.ndarray, m: int = 2, r: float = 0.2) -> float:
473
+ """Compute approximate entropy of a time series."""
474
+ n = len(series)
475
+ if n < m + 2:
476
+ return 0
477
+
478
+ r = r * np.std(series)
479
+ if r == 0:
480
+ return 0
481
+
482
+ def _phi(m_val):
483
+ patterns = np.array([series[i : i + m_val] for i in range(n - m_val + 1)])
484
+ counts = np.zeros(len(patterns))
485
+ for i, pattern in enumerate(patterns):
486
+ for other in patterns:
487
+ if np.max(np.abs(pattern - other)) < r:
488
+ counts[i] += 1
489
+ counts = counts / len(patterns)
490
+ return np.sum(np.log(counts + 1e-10)) / len(patterns)
491
+
492
+ return _phi(m) - _phi(m + 1)
493
+
494
+ def _extract_energy(self, series: np.ndarray, col: str) -> dict[str, float]:
495
+ """Extract energy-based features (tsfresh-inspired)."""
496
+ features = {}
497
+ prefix = col
498
+
499
+ series_clean = series[~np.isnan(series)]
500
+ if len(series_clean) < 2:
501
+ return features
502
+
503
+ # Absolute energy: sum of squared values
504
+ features[f"{prefix}_abs_energy"] = np.sum(series_clean**2)
505
+
506
+ # Mean absolute change
507
+ features[f"{prefix}_mean_abs_change"] = np.mean(np.abs(np.diff(series_clean)))
508
+
509
+ # Mean second derivative central
510
+ if len(series_clean) >= 3:
511
+ second_deriv = series_clean[2:] - 2 * series_clean[1:-1] + series_clean[:-2]
512
+ features[f"{prefix}_mean_second_deriv_central"] = np.mean(second_deriv)
513
+
514
+ # Root mean square
515
+ features[f"{prefix}_rms"] = np.sqrt(np.mean(series_clean**2))
516
+
517
+ # Crest factor (peak/rms)
518
+ rms = features[f"{prefix}_rms"]
519
+ if rms > 0:
520
+ features[f"{prefix}_crest_factor"] = np.max(np.abs(series_clean)) / rms
521
+
522
+ return features
523
+
524
+ def _extract_complexity(self, series: np.ndarray, col: str) -> dict[str, float]:
525
+ """Extract complexity features (tsfresh-inspired)."""
526
+ features = {}
527
+ prefix = col
528
+
529
+ series_clean = series[~np.isnan(series)]
530
+ if len(series_clean) < 3:
531
+ return features
532
+
533
+ # CID_CE: Complexity-invariant distance
534
+ diff = np.diff(series_clean)
535
+ features[f"{prefix}_cid_ce"] = np.sqrt(np.sum(diff**2))
536
+
537
+ # C3: Time series complexity (lag 1)
538
+ if len(series_clean) >= 3:
539
+ n = len(series_clean)
540
+ c3 = np.sum(series_clean[2:n] * series_clean[1 : n - 1] * series_clean[0 : n - 2]) / (n - 2)
541
+ features[f"{prefix}_c3"] = c3
542
+
543
+ # Ratio of unique values to length
544
+ features[f"{prefix}_ratio_unique_values"] = len(np.unique(series_clean)) / len(series_clean)
545
+
546
+ # Has duplicate
547
+ features[f"{prefix}_has_duplicate"] = 1 if len(np.unique(series_clean)) < len(series_clean) else 0
548
+
549
+ # Has duplicate max
550
+ max_val = np.max(series_clean)
551
+ features[f"{prefix}_has_duplicate_max"] = 1 if np.sum(series_clean == max_val) > 1 else 0
552
+
553
+ # Has duplicate min
554
+ min_val = np.min(series_clean)
555
+ features[f"{prefix}_has_duplicate_min"] = 1 if np.sum(series_clean == min_val) > 1 else 0
556
+
557
+ # Sum of reoccurring values
558
+ unique, counts = np.unique(series_clean, return_counts=True)
559
+ reoccurring_mask = counts > 1
560
+ features[f"{prefix}_sum_reoccurring_values"] = np.sum(unique[reoccurring_mask] * counts[reoccurring_mask])
561
+
562
+ # Sum of reoccurring data points
563
+ features[f"{prefix}_sum_reoccurring_data_points"] = np.sum(counts[reoccurring_mask])
564
+
565
+ # Percentage of reoccurring data points
566
+ features[f"{prefix}_pct_reoccurring_data_points"] = np.sum(counts[reoccurring_mask]) / len(series_clean)
567
+
568
+ return features
569
+
570
+ def _extract_counts(self, series: np.ndarray, col: str) -> dict[str, float]:
571
+ """Extract count-based features (tsfresh-inspired)."""
572
+ features = {}
573
+ prefix = col
574
+
575
+ series_clean = series[~np.isnan(series)]
576
+ if len(series_clean) < 2:
577
+ return features
578
+
579
+ mean_val = np.mean(series_clean)
580
+
581
+ # Count above mean
582
+ features[f"{prefix}_count_above_mean"] = np.sum(series_clean > mean_val)
583
+
584
+ # Count below mean
585
+ features[f"{prefix}_count_below_mean"] = np.sum(series_clean < mean_val)
586
+
587
+ # First location of maximum
588
+ features[f"{prefix}_first_loc_max"] = np.argmax(series_clean) / len(series_clean)
589
+
590
+ # First location of minimum
591
+ features[f"{prefix}_first_loc_min"] = np.argmin(series_clean) / len(series_clean)
592
+
593
+ # Last location of maximum
594
+ features[f"{prefix}_last_loc_max"] = (len(series_clean) - 1 - np.argmax(series_clean[::-1])) / len(series_clean)
595
+
596
+ # Last location of minimum
597
+ features[f"{prefix}_last_loc_min"] = (len(series_clean) - 1 - np.argmin(series_clean[::-1])) / len(series_clean)
598
+
599
+ # Longest strike above mean
600
+ above_mean = series_clean > mean_val
601
+ features[f"{prefix}_longest_strike_above_mean"] = self._longest_consecutive(above_mean)
602
+
603
+ # Longest strike below mean
604
+ below_mean = series_clean < mean_val
605
+ features[f"{prefix}_longest_strike_below_mean"] = self._longest_consecutive(below_mean)
606
+
607
+ # Number of crossings (mean)
608
+ crossings = np.sum(np.diff(np.sign(series_clean - mean_val)) != 0)
609
+ features[f"{prefix}_number_crossings_mean"] = crossings
610
+
611
+ # Number of zero crossings
612
+ zero_crossings = np.sum(np.diff(np.sign(series_clean)) != 0)
613
+ features[f"{prefix}_number_zero_crossings"] = zero_crossings
614
+
615
+ # Absolute sum of changes
616
+ features[f"{prefix}_abs_sum_changes"] = np.sum(np.abs(np.diff(series_clean)))
617
+
618
+ return features
619
+
620
+ def _longest_consecutive(self, bool_array: np.ndarray) -> int:
621
+ """Find longest consecutive True values in boolean array."""
622
+ max_len = 0
623
+ current_len = 0
624
+ for val in bool_array:
625
+ if val:
626
+ current_len += 1
627
+ max_len = max(max_len, current_len)
628
+ else:
629
+ current_len = 0
630
+ return max_len
631
+
403
632
  def get_feature_set(self) -> FeatureSet:
404
633
  """Get the feature set with metadata."""
405
634
  return self._feature_set
@@ -8,6 +8,7 @@ from featcopilot.llm.copilot_client import CopilotFeatureClient
8
8
  from featcopilot.llm.explainer import FeatureExplainer
9
9
  from featcopilot.llm.litellm_client import LiteLLMFeatureClient, SyncLiteLLMFeatureClient
10
10
  from featcopilot.llm.semantic_engine import SemanticEngine
11
+ from featcopilot.llm.transform_rule_generator import TransformRuleGenerator
11
12
 
12
13
  __all__ = [
13
14
  "CopilotFeatureClient",
@@ -16,4 +17,5 @@ __all__ = [
16
17
  "SemanticEngine",
17
18
  "FeatureExplainer",
18
19
  "FeatureCodeGenerator",
20
+ "TransformRuleGenerator",
19
21
  ]
@@ -523,38 +523,71 @@ class SyncCopilotFeatureClient:
523
523
 
524
524
  def __init__(self, **kwargs):
525
525
  self._async_client = CopilotFeatureClient(**kwargs)
526
+ self._loop = None
527
+
528
+ def _get_or_create_loop(self):
529
+ """Get or create a persistent event loop for this client."""
530
+ if self._loop is None or self._loop.is_closed():
531
+ self._loop = asyncio.new_event_loop()
532
+ asyncio.set_event_loop(self._loop)
533
+ return self._loop
526
534
 
527
535
  def _run_async(self, coro):
528
- """Run an async coroutine, handling nested event loops (e.g., Jupyter)."""
536
+ """Run an async coroutine, handling various event loop scenarios."""
529
537
  try:
530
- # Check if we're in a running event loop (e.g., Jupyter)
531
- loop = asyncio.get_running_loop()
532
- # We're in a running loop - use nest_asyncio if available
538
+ # First, try to get the running loop
533
539
  try:
534
- import nest_asyncio
535
-
536
- nest_asyncio.apply()
540
+ loop = asyncio.get_running_loop()
541
+ # We're in a running loop - use nest_asyncio if available
542
+ try:
543
+ import nest_asyncio
544
+
545
+ nest_asyncio.apply()
546
+ return loop.run_until_complete(coro)
547
+ except ImportError:
548
+ # nest_asyncio not available, use thread pool
549
+ import concurrent.futures
550
+
551
+ with concurrent.futures.ThreadPoolExecutor() as executor:
552
+ future = executor.submit(self._run_in_new_loop, coro)
553
+ return future.result(timeout=120)
554
+ except RuntimeError:
555
+ # No running event loop - use our persistent loop
556
+ loop = self._get_or_create_loop()
537
557
  return loop.run_until_complete(coro)
538
- except ImportError:
539
- # nest_asyncio not available, try alternative approach
540
- import concurrent.futures
558
+ except Exception as e:
559
+ # Last resort - create a completely fresh loop
560
+ try:
561
+ return self._run_in_new_loop(coro)
562
+ except Exception:
563
+ raise e from None
541
564
 
542
- with concurrent.futures.ThreadPoolExecutor() as executor:
543
- future = executor.submit(asyncio.run, coro)
544
- return future.result()
545
- except RuntimeError:
546
- # No running event loop - safe to use asyncio.run
547
- return asyncio.run(coro)
565
+ def _run_in_new_loop(self, coro):
566
+ """Run coroutine in a fresh event loop."""
567
+ loop = asyncio.new_event_loop()
568
+ try:
569
+ asyncio.set_event_loop(loop)
570
+ return loop.run_until_complete(coro)
571
+ finally:
572
+ loop.close()
548
573
 
549
574
  def start(self):
550
575
  return self._run_async(self._async_client.start())
551
576
 
552
577
  def stop(self):
553
- return self._run_async(self._async_client.stop())
578
+ result = self._run_async(self._async_client.stop())
579
+ # Close our loop if it exists
580
+ if self._loop is not None and not self._loop.is_closed():
581
+ self._loop.close()
582
+ self._loop = None
583
+ return result
554
584
 
555
585
  def suggest_features(self, **kwargs):
556
586
  return self._run_async(self._async_client.suggest_features(**kwargs))
557
587
 
588
+ def send_prompt(self, prompt: str):
589
+ return self._run_async(self._async_client.send_prompt(prompt))
590
+
558
591
  def explain_feature(self, **kwargs):
559
592
  return self._run_async(self._async_client.explain_feature(**kwargs))
560
593