mcli-framework 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (186) hide show
  1. mcli/app/chat_cmd.py +42 -0
  2. mcli/app/commands_cmd.py +226 -0
  3. mcli/app/completion_cmd.py +216 -0
  4. mcli/app/completion_helpers.py +288 -0
  5. mcli/app/cron_test_cmd.py +697 -0
  6. mcli/app/logs_cmd.py +419 -0
  7. mcli/app/main.py +492 -0
  8. mcli/app/model/model.py +1060 -0
  9. mcli/app/model_cmd.py +227 -0
  10. mcli/app/redis_cmd.py +269 -0
  11. mcli/app/video/video.py +1114 -0
  12. mcli/app/visual_cmd.py +303 -0
  13. mcli/chat/chat.py +2409 -0
  14. mcli/chat/command_rag.py +514 -0
  15. mcli/chat/enhanced_chat.py +652 -0
  16. mcli/chat/system_controller.py +1010 -0
  17. mcli/chat/system_integration.py +1016 -0
  18. mcli/cli.py +25 -0
  19. mcli/config.toml +20 -0
  20. mcli/lib/api/api.py +586 -0
  21. mcli/lib/api/daemon_client.py +203 -0
  22. mcli/lib/api/daemon_client_local.py +44 -0
  23. mcli/lib/api/daemon_decorator.py +217 -0
  24. mcli/lib/api/mcli_decorators.py +1032 -0
  25. mcli/lib/auth/auth.py +85 -0
  26. mcli/lib/auth/aws_manager.py +85 -0
  27. mcli/lib/auth/azure_manager.py +91 -0
  28. mcli/lib/auth/credential_manager.py +192 -0
  29. mcli/lib/auth/gcp_manager.py +93 -0
  30. mcli/lib/auth/key_manager.py +117 -0
  31. mcli/lib/auth/mcli_manager.py +93 -0
  32. mcli/lib/auth/token_manager.py +75 -0
  33. mcli/lib/auth/token_util.py +1011 -0
  34. mcli/lib/config/config.py +47 -0
  35. mcli/lib/discovery/__init__.py +1 -0
  36. mcli/lib/discovery/command_discovery.py +274 -0
  37. mcli/lib/erd/erd.py +1345 -0
  38. mcli/lib/erd/generate_graph.py +453 -0
  39. mcli/lib/files/files.py +76 -0
  40. mcli/lib/fs/fs.py +109 -0
  41. mcli/lib/lib.py +29 -0
  42. mcli/lib/logger/logger.py +611 -0
  43. mcli/lib/performance/optimizer.py +409 -0
  44. mcli/lib/performance/rust_bridge.py +502 -0
  45. mcli/lib/performance/uvloop_config.py +154 -0
  46. mcli/lib/pickles/pickles.py +50 -0
  47. mcli/lib/search/cached_vectorizer.py +479 -0
  48. mcli/lib/services/data_pipeline.py +460 -0
  49. mcli/lib/services/lsh_client.py +441 -0
  50. mcli/lib/services/redis_service.py +387 -0
  51. mcli/lib/shell/shell.py +137 -0
  52. mcli/lib/toml/toml.py +33 -0
  53. mcli/lib/ui/styling.py +47 -0
  54. mcli/lib/ui/visual_effects.py +634 -0
  55. mcli/lib/watcher/watcher.py +185 -0
  56. mcli/ml/api/app.py +215 -0
  57. mcli/ml/api/middleware.py +224 -0
  58. mcli/ml/api/routers/admin_router.py +12 -0
  59. mcli/ml/api/routers/auth_router.py +244 -0
  60. mcli/ml/api/routers/backtest_router.py +12 -0
  61. mcli/ml/api/routers/data_router.py +12 -0
  62. mcli/ml/api/routers/model_router.py +302 -0
  63. mcli/ml/api/routers/monitoring_router.py +12 -0
  64. mcli/ml/api/routers/portfolio_router.py +12 -0
  65. mcli/ml/api/routers/prediction_router.py +267 -0
  66. mcli/ml/api/routers/trade_router.py +12 -0
  67. mcli/ml/api/routers/websocket_router.py +76 -0
  68. mcli/ml/api/schemas.py +64 -0
  69. mcli/ml/auth/auth_manager.py +425 -0
  70. mcli/ml/auth/models.py +154 -0
  71. mcli/ml/auth/permissions.py +302 -0
  72. mcli/ml/backtesting/backtest_engine.py +502 -0
  73. mcli/ml/backtesting/performance_metrics.py +393 -0
  74. mcli/ml/cache.py +400 -0
  75. mcli/ml/cli/main.py +398 -0
  76. mcli/ml/config/settings.py +394 -0
  77. mcli/ml/configs/dvc_config.py +230 -0
  78. mcli/ml/configs/mlflow_config.py +131 -0
  79. mcli/ml/configs/mlops_manager.py +293 -0
  80. mcli/ml/dashboard/app.py +532 -0
  81. mcli/ml/dashboard/app_integrated.py +738 -0
  82. mcli/ml/dashboard/app_supabase.py +560 -0
  83. mcli/ml/dashboard/app_training.py +615 -0
  84. mcli/ml/dashboard/cli.py +51 -0
  85. mcli/ml/data_ingestion/api_connectors.py +501 -0
  86. mcli/ml/data_ingestion/data_pipeline.py +567 -0
  87. mcli/ml/data_ingestion/stream_processor.py +512 -0
  88. mcli/ml/database/migrations/env.py +94 -0
  89. mcli/ml/database/models.py +667 -0
  90. mcli/ml/database/session.py +200 -0
  91. mcli/ml/experimentation/ab_testing.py +845 -0
  92. mcli/ml/features/ensemble_features.py +607 -0
  93. mcli/ml/features/political_features.py +676 -0
  94. mcli/ml/features/recommendation_engine.py +809 -0
  95. mcli/ml/features/stock_features.py +573 -0
  96. mcli/ml/features/test_feature_engineering.py +346 -0
  97. mcli/ml/logging.py +85 -0
  98. mcli/ml/mlops/data_versioning.py +518 -0
  99. mcli/ml/mlops/experiment_tracker.py +377 -0
  100. mcli/ml/mlops/model_serving.py +481 -0
  101. mcli/ml/mlops/pipeline_orchestrator.py +614 -0
  102. mcli/ml/models/base_models.py +324 -0
  103. mcli/ml/models/ensemble_models.py +675 -0
  104. mcli/ml/models/recommendation_models.py +474 -0
  105. mcli/ml/models/test_models.py +487 -0
  106. mcli/ml/monitoring/drift_detection.py +676 -0
  107. mcli/ml/monitoring/metrics.py +45 -0
  108. mcli/ml/optimization/portfolio_optimizer.py +834 -0
  109. mcli/ml/preprocessing/data_cleaners.py +451 -0
  110. mcli/ml/preprocessing/feature_extractors.py +491 -0
  111. mcli/ml/preprocessing/ml_pipeline.py +382 -0
  112. mcli/ml/preprocessing/politician_trading_preprocessor.py +569 -0
  113. mcli/ml/preprocessing/test_preprocessing.py +294 -0
  114. mcli/ml/scripts/populate_sample_data.py +200 -0
  115. mcli/ml/tasks.py +400 -0
  116. mcli/ml/tests/test_integration.py +429 -0
  117. mcli/ml/tests/test_training_dashboard.py +387 -0
  118. mcli/public/oi/oi.py +15 -0
  119. mcli/public/public.py +4 -0
  120. mcli/self/self_cmd.py +1246 -0
  121. mcli/workflow/daemon/api_daemon.py +800 -0
  122. mcli/workflow/daemon/async_command_database.py +681 -0
  123. mcli/workflow/daemon/async_process_manager.py +591 -0
  124. mcli/workflow/daemon/client.py +530 -0
  125. mcli/workflow/daemon/commands.py +1196 -0
  126. mcli/workflow/daemon/daemon.py +905 -0
  127. mcli/workflow/daemon/daemon_api.py +59 -0
  128. mcli/workflow/daemon/enhanced_daemon.py +571 -0
  129. mcli/workflow/daemon/process_cli.py +244 -0
  130. mcli/workflow/daemon/process_manager.py +439 -0
  131. mcli/workflow/daemon/test_daemon.py +275 -0
  132. mcli/workflow/dashboard/dashboard_cmd.py +113 -0
  133. mcli/workflow/docker/docker.py +0 -0
  134. mcli/workflow/file/file.py +100 -0
  135. mcli/workflow/gcloud/config.toml +21 -0
  136. mcli/workflow/gcloud/gcloud.py +58 -0
  137. mcli/workflow/git_commit/ai_service.py +328 -0
  138. mcli/workflow/git_commit/commands.py +430 -0
  139. mcli/workflow/lsh_integration.py +355 -0
  140. mcli/workflow/model_service/client.py +594 -0
  141. mcli/workflow/model_service/download_and_run_efficient_models.py +288 -0
  142. mcli/workflow/model_service/lightweight_embedder.py +397 -0
  143. mcli/workflow/model_service/lightweight_model_server.py +714 -0
  144. mcli/workflow/model_service/lightweight_test.py +241 -0
  145. mcli/workflow/model_service/model_service.py +1955 -0
  146. mcli/workflow/model_service/ollama_efficient_runner.py +425 -0
  147. mcli/workflow/model_service/pdf_processor.py +386 -0
  148. mcli/workflow/model_service/test_efficient_runner.py +234 -0
  149. mcli/workflow/model_service/test_example.py +315 -0
  150. mcli/workflow/model_service/test_integration.py +131 -0
  151. mcli/workflow/model_service/test_new_features.py +149 -0
  152. mcli/workflow/openai/openai.py +99 -0
  153. mcli/workflow/politician_trading/commands.py +1790 -0
  154. mcli/workflow/politician_trading/config.py +134 -0
  155. mcli/workflow/politician_trading/connectivity.py +490 -0
  156. mcli/workflow/politician_trading/data_sources.py +395 -0
  157. mcli/workflow/politician_trading/database.py +410 -0
  158. mcli/workflow/politician_trading/demo.py +248 -0
  159. mcli/workflow/politician_trading/models.py +165 -0
  160. mcli/workflow/politician_trading/monitoring.py +413 -0
  161. mcli/workflow/politician_trading/scrapers.py +966 -0
  162. mcli/workflow/politician_trading/scrapers_california.py +412 -0
  163. mcli/workflow/politician_trading/scrapers_eu.py +377 -0
  164. mcli/workflow/politician_trading/scrapers_uk.py +350 -0
  165. mcli/workflow/politician_trading/scrapers_us_states.py +438 -0
  166. mcli/workflow/politician_trading/supabase_functions.py +354 -0
  167. mcli/workflow/politician_trading/workflow.py +852 -0
  168. mcli/workflow/registry/registry.py +180 -0
  169. mcli/workflow/repo/repo.py +223 -0
  170. mcli/workflow/scheduler/commands.py +493 -0
  171. mcli/workflow/scheduler/cron_parser.py +238 -0
  172. mcli/workflow/scheduler/job.py +182 -0
  173. mcli/workflow/scheduler/monitor.py +139 -0
  174. mcli/workflow/scheduler/persistence.py +324 -0
  175. mcli/workflow/scheduler/scheduler.py +679 -0
  176. mcli/workflow/sync/sync_cmd.py +437 -0
  177. mcli/workflow/sync/test_cmd.py +314 -0
  178. mcli/workflow/videos/videos.py +242 -0
  179. mcli/workflow/wakatime/wakatime.py +11 -0
  180. mcli/workflow/workflow.py +37 -0
  181. mcli_framework-7.0.0.dist-info/METADATA +479 -0
  182. mcli_framework-7.0.0.dist-info/RECORD +186 -0
  183. mcli_framework-7.0.0.dist-info/WHEEL +5 -0
  184. mcli_framework-7.0.0.dist-info/entry_points.txt +7 -0
  185. mcli_framework-7.0.0.dist-info/licenses/LICENSE +21 -0
  186. mcli_framework-7.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,676 @@
1
+ """Political influence features for stock recommendation models"""
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from datetime import datetime, timedelta
6
+ from typing import Any, Dict, List, Optional, Tuple, Union
7
+ from dataclasses import dataclass
8
+ import logging
9
+ from collections import defaultdict, Counter
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ @dataclass
15
+ class PoliticalFeatureConfig:
16
+ """Configuration for political feature extraction"""
17
+
18
+ # Politician influence scoring
19
+ committee_weights: Dict[str, float] = None
20
+ party_influence_weights: Dict[str, float] = None
21
+ position_weights: Dict[str, float] = None
22
+
23
+ # Trading pattern analysis
24
+ influence_lookback_days: int = 180
25
+ insider_threshold_days: int = 30
26
+ cluster_analysis_window: int = 60
27
+
28
+ # Policy impact modeling
29
+ sector_policy_mapping: Dict[str, List[str]] = None
30
+ policy_announcement_window: int = 7
31
+
32
+ def __post_init__(self):
33
+ if self.committee_weights is None:
34
+ self.committee_weights = {
35
+ "financial_services": 3.0,
36
+ "energy_commerce": 2.5,
37
+ "judiciary": 2.0,
38
+ "appropriations": 2.5,
39
+ "ways_means": 3.0,
40
+ "defense": 2.0,
41
+ "foreign_affairs": 1.5,
42
+ "healthcare": 2.0,
43
+ "technology": 2.5,
44
+ }
45
+
46
+ if self.party_influence_weights is None:
47
+ self.party_influence_weights = {
48
+ "majority_party": 1.2,
49
+ "minority_party": 0.8,
50
+ "leadership": 2.0,
51
+ "committee_chair": 1.8,
52
+ "ranking_member": 1.4,
53
+ }
54
+
55
+ if self.position_weights is None:
56
+ self.position_weights = {
57
+ "speaker": 3.0,
58
+ "majority_leader": 2.5,
59
+ "minority_leader": 2.0,
60
+ "committee_chair": 2.0,
61
+ "subcommittee_chair": 1.5,
62
+ "ranking_member": 1.3,
63
+ "member": 1.0,
64
+ }
65
+
66
+ if self.sector_policy_mapping is None:
67
+ self.sector_policy_mapping = {
68
+ "technology": ["tech_regulation", "data_privacy", "antitrust"],
69
+ "healthcare": ["medicare", "drug_pricing", "healthcare_reform"],
70
+ "energy": ["climate_policy", "renewable_energy", "oil_regulation"],
71
+ "financial": ["banking_regulation", "fintech", "cryptocurrency"],
72
+ "defense": ["defense_spending", "military_contracts", "cybersecurity"],
73
+ }
74
+
75
+
76
+ class PoliticalInfluenceFeatures:
77
+ """Extract features based on political influence and power"""
78
+
79
+ def __init__(self, config: Optional[PoliticalFeatureConfig] = None):
80
+ self.config = config or PoliticalFeatureConfig()
81
+ self.politician_influence_cache = {}
82
+
83
+ def extract_influence_features(
84
+ self, trading_data: pd.DataFrame, politician_metadata: Optional[pd.DataFrame] = None
85
+ ) -> pd.DataFrame:
86
+ """Extract political influence features from trading data"""
87
+ df = trading_data.copy()
88
+
89
+ # Calculate politician influence scores
90
+ df = self._calculate_politician_influence(df, politician_metadata)
91
+
92
+ # Trading timing analysis
93
+ df = self._analyze_trading_timing(df)
94
+
95
+ # Committee and sector alignment
96
+ df = self._analyze_committee_sector_alignment(df, politician_metadata)
97
+
98
+ # Party clustering analysis
99
+ df = self._analyze_party_clustering(df)
100
+
101
+ # Seniority and experience features
102
+ df = self._extract_seniority_features(df, politician_metadata)
103
+
104
+ return df
105
+
106
+ def _calculate_politician_influence(
107
+ self, df: pd.DataFrame, metadata: Optional[pd.DataFrame]
108
+ ) -> pd.DataFrame:
109
+ """Calculate comprehensive politician influence scores"""
110
+
111
+ # Base influence score from trading frequency and volume
112
+ politician_stats = (
113
+ df.groupby("politician_name_cleaned")
114
+ .agg(
115
+ {
116
+ "transaction_amount_cleaned": ["count", "sum", "mean", "std"],
117
+ "asset_name_cleaned": "nunique",
118
+ }
119
+ )
120
+ .round(2)
121
+ )
122
+
123
+ politician_stats.columns = [
124
+ "trade_count",
125
+ "total_volume",
126
+ "avg_trade_size",
127
+ "trade_size_std",
128
+ "unique_assets",
129
+ ]
130
+
131
+ # Calculate base influence from trading metrics
132
+ # More trades, higher volumes, and diverse assets = higher influence
133
+ politician_stats["trade_influence"] = (
134
+ np.log1p(politician_stats["trade_count"])
135
+ + np.log1p(politician_stats["total_volume"]) / 10
136
+ + np.log1p(politician_stats["unique_assets"]) * 2
137
+ )
138
+
139
+ # Normalize to 0-1 scale
140
+ politician_stats["trade_influence"] = (
141
+ politician_stats["trade_influence"] / politician_stats["trade_influence"].max()
142
+ )
143
+
144
+ # Add metadata-based influence if available
145
+ if metadata is not None:
146
+ politician_stats = self._add_metadata_influence(politician_stats, metadata)
147
+ else:
148
+ # Use default influence based on trading patterns
149
+ politician_stats["position_influence"] = 1.0
150
+ politician_stats["committee_influence"] = 1.0
151
+ politician_stats["party_influence"] = 1.0
152
+
153
+ # Combined influence score
154
+ politician_stats["total_influence"] = (
155
+ politician_stats["trade_influence"] * 0.4
156
+ + politician_stats["position_influence"] * 0.3
157
+ + politician_stats["committee_influence"] * 0.2
158
+ + politician_stats["party_influence"] * 0.1
159
+ )
160
+
161
+ # Merge back to main dataframe
162
+ df = df.merge(
163
+ politician_stats[["total_influence", "trade_influence"]],
164
+ left_on="politician_name_cleaned",
165
+ right_index=True,
166
+ how="left",
167
+ )
168
+
169
+ return df
170
+
171
+ def _add_metadata_influence(
172
+ self, stats_df: pd.DataFrame, metadata: pd.DataFrame
173
+ ) -> pd.DataFrame:
174
+ """Add influence scores based on politician metadata"""
175
+
176
+ # Position-based influence
177
+ if "position" in metadata.columns:
178
+ position_influence = metadata["position"].map(self.config.position_weights)
179
+ metadata["position_influence"] = position_influence.fillna(1.0)
180
+ else:
181
+ metadata["position_influence"] = 1.0
182
+
183
+ # Committee-based influence
184
+ if "committees" in metadata.columns:
185
+
186
+ def calculate_committee_influence(committees_str):
187
+ if pd.isna(committees_str):
188
+ return 1.0
189
+ committees = str(committees_str).lower().split(",")
190
+ influence = 1.0
191
+ for committee in committees:
192
+ committee = committee.strip()
193
+ for key, weight in self.config.committee_weights.items():
194
+ if key in committee:
195
+ influence = max(influence, weight)
196
+ return influence
197
+
198
+ metadata["committee_influence"] = metadata["committees"].apply(
199
+ calculate_committee_influence
200
+ )
201
+ else:
202
+ metadata["committee_influence"] = 1.0
203
+
204
+ # Party-based influence (simplified)
205
+ if "party" in metadata.columns:
206
+ # Assume majority party has more influence (would need current data)
207
+ party_influence = metadata["party"].map({"Republican": 1.1, "Democrat": 1.0})
208
+ metadata["party_influence"] = party_influence.fillna(1.0)
209
+ else:
210
+ metadata["party_influence"] = 1.0
211
+
212
+ # Merge metadata influence scores
213
+ influence_cols = ["position_influence", "committee_influence", "party_influence"]
214
+ available_cols = [col for col in influence_cols if col in metadata.columns]
215
+
216
+ if available_cols:
217
+ stats_df = stats_df.merge(
218
+ metadata[["politician_name_cleaned"] + available_cols],
219
+ left_index=True,
220
+ right_on="politician_name_cleaned",
221
+ how="left",
222
+ )
223
+
224
+ # Fill missing values
225
+ for col in influence_cols:
226
+ if col not in stats_df.columns:
227
+ stats_df[col] = 1.0
228
+ else:
229
+ stats_df[col] = stats_df[col].fillna(1.0)
230
+
231
+ return stats_df
232
+
233
+ def _analyze_trading_timing(self, df: pd.DataFrame) -> pd.DataFrame:
234
+ """Analyze timing patterns in political trading"""
235
+
236
+ # Convert date to datetime if not already
237
+ if "transaction_date_cleaned" in df.columns:
238
+ df["transaction_date_dt"] = pd.to_datetime(df["transaction_date_cleaned"])
239
+
240
+ # Days since last trade by politician
241
+ df = df.sort_values(["politician_name_cleaned", "transaction_date_dt"])
242
+ df["days_since_last_trade"] = (
243
+ df.groupby("politician_name_cleaned")["transaction_date_dt"].diff().dt.days
244
+ )
245
+
246
+ # Trading frequency score (more frequent = higher score)
247
+ df["trading_frequency_score"] = np.where(
248
+ df["days_since_last_trade"].isna(),
249
+ 1.0,
250
+ np.clip(30 / (df["days_since_last_trade"] + 1), 0, 2.0),
251
+ )
252
+
253
+ # Cluster trading detection (multiple trades in short timeframe)
254
+ df["cluster_trades"] = (
255
+ df.groupby("politician_name_cleaned")["days_since_last_trade"]
256
+ .rolling(window=5, min_periods=1)
257
+ .apply(lambda x: (x <= 7).sum())
258
+ .values
259
+ )
260
+
261
+ # Quarterly timing (end of quarter trading patterns)
262
+ df["quarter_end_trade"] = (
263
+ df["transaction_date_dt"].dt.month.isin([3, 6, 9, 12])
264
+ & (df["transaction_date_dt"].dt.day >= 25)
265
+ ).astype(int)
266
+
267
+ # Year-end trading
268
+ df["year_end_trade"] = (
269
+ (df["transaction_date_dt"].dt.month == 12) & (df["transaction_date_dt"].dt.day >= 20)
270
+ ).astype(int)
271
+
272
+ # Pre-earnings timing (approximate - would need earnings calendar)
273
+ df["potential_insider_timing"] = (df["days_since_last_trade"] <= 5).astype(int)
274
+
275
+ return df
276
+
277
+ def _analyze_committee_sector_alignment(
278
+ self, df: pd.DataFrame, metadata: Optional[pd.DataFrame]
279
+ ) -> pd.DataFrame:
280
+ """Analyze alignment between committee assignments and traded sectors"""
281
+
282
+ # Simplified sector classification based on asset names
283
+ def classify_sector(asset_name):
284
+ if pd.isna(asset_name):
285
+ return "unknown"
286
+
287
+ asset_lower = str(asset_name).lower()
288
+
289
+ # Technology sector
290
+ tech_keywords = [
291
+ "tech",
292
+ "software",
293
+ "microsoft",
294
+ "apple",
295
+ "google",
296
+ "meta",
297
+ "facebook",
298
+ "amazon",
299
+ "netflix",
300
+ "tesla",
301
+ "nvidia",
302
+ "intel",
303
+ ]
304
+ if any(keyword in asset_lower for keyword in tech_keywords):
305
+ return "technology"
306
+
307
+ # Healthcare sector
308
+ health_keywords = [
309
+ "health",
310
+ "pharma",
311
+ "medical",
312
+ "bio",
313
+ "johnson",
314
+ "pfizer",
315
+ "merck",
316
+ "abbott",
317
+ "healthcare",
318
+ ]
319
+ if any(keyword in asset_lower for keyword in health_keywords):
320
+ return "healthcare"
321
+
322
+ # Financial sector
323
+ finance_keywords = [
324
+ "bank",
325
+ "financial",
326
+ "capital",
327
+ "credit",
328
+ "jpmorgan",
329
+ "bank of america",
330
+ "wells fargo",
331
+ "goldman",
332
+ "morgan stanley",
333
+ ]
334
+ if any(keyword in asset_lower for keyword in finance_keywords):
335
+ return "financial"
336
+
337
+ # Energy sector
338
+ energy_keywords = [
339
+ "energy",
340
+ "oil",
341
+ "gas",
342
+ "exxon",
343
+ "chevron",
344
+ "renewable",
345
+ "solar",
346
+ "wind",
347
+ "petroleum",
348
+ ]
349
+ if any(keyword in asset_lower for keyword in energy_keywords):
350
+ return "energy"
351
+
352
+ # Defense sector
353
+ defense_keywords = [
354
+ "defense",
355
+ "aerospace",
356
+ "boeing",
357
+ "lockheed",
358
+ "raytheon",
359
+ "general dynamics",
360
+ "northrop",
361
+ ]
362
+ if any(keyword in asset_lower for keyword in defense_keywords):
363
+ return "defense"
364
+
365
+ return "other"
366
+
367
+ df["sector_classification"] = df["asset_name_cleaned"].apply(classify_sector)
368
+
369
+ # Committee-sector alignment score
370
+ if metadata is not None and "committees" in metadata.columns:
371
+
372
+ def calculate_alignment_score(politician, sector):
373
+ politician_metadata = metadata[metadata["politician_name_cleaned"] == politician]
374
+ if politician_metadata.empty:
375
+ return 0.5 # Neutral alignment
376
+
377
+ committees = str(politician_metadata.iloc[0]["committees"]).lower()
378
+
379
+ # Check for relevant committee memberships
380
+ alignment_score = 0.5 # Base neutral score
381
+
382
+ if sector == "technology" and any(
383
+ keyword in committees for keyword in ["technology", "commerce", "judiciary"]
384
+ ):
385
+ alignment_score = 0.9
386
+ elif sector == "healthcare" and "health" in committees:
387
+ alignment_score = 0.9
388
+ elif sector == "financial" and "financial" in committees:
389
+ alignment_score = 0.9
390
+ elif sector == "energy" and any(
391
+ keyword in committees for keyword in ["energy", "environment"]
392
+ ):
393
+ alignment_score = 0.9
394
+ elif sector == "defense" and any(
395
+ keyword in committees for keyword in ["defense", "armed services"]
396
+ ):
397
+ alignment_score = 0.9
398
+
399
+ return alignment_score
400
+
401
+ df["committee_sector_alignment"] = df.apply(
402
+ lambda row: calculate_alignment_score(
403
+ row["politician_name_cleaned"], row["sector_classification"]
404
+ ),
405
+ axis=1,
406
+ )
407
+ else:
408
+ df["committee_sector_alignment"] = 0.5 # Neutral when no metadata
409
+
410
+ return df
411
+
412
+ def _analyze_party_clustering(self, df: pd.DataFrame) -> pd.DataFrame:
413
+ """Analyze clustering of trades by party affiliation"""
414
+
415
+ # Mock party assignment based on politician name patterns
416
+ # In real implementation, this would come from metadata
417
+ def assign_party(name):
418
+ # This is a simplified mock assignment
419
+ # In practice, this would come from politician metadata
420
+ republican_indicators = ["mitch", "mcconnell", "cruz", "rubio", "romney"]
421
+ democrat_indicators = ["pelosi", "schumer", "warren", "sanders"]
422
+
423
+ name_lower = str(name).lower()
424
+ if any(indicator in name_lower for indicator in republican_indicators):
425
+ return "Republican"
426
+ elif any(indicator in name_lower for indicator in democrat_indicators):
427
+ return "Democrat"
428
+ else:
429
+ return "Independent" # Default
430
+
431
+ df["estimated_party"] = df["politician_name_cleaned"].apply(assign_party)
432
+
433
+ # Party-based trading patterns
434
+ party_stats = (
435
+ df.groupby(["estimated_party", "sector_classification"])
436
+ .agg(
437
+ {
438
+ "transaction_amount_cleaned": ["count", "mean"],
439
+ "transaction_type_cleaned": lambda x: (x == "buy").mean(),
440
+ }
441
+ )
442
+ .round(3)
443
+ )
444
+
445
+ party_stats.columns = ["party_sector_trades", "party_avg_amount", "party_buy_ratio"]
446
+
447
+ # Calculate party consensus score for each trade
448
+ df = df.merge(
449
+ party_stats,
450
+ left_on=["estimated_party", "sector_classification"],
451
+ right_index=True,
452
+ how="left",
453
+ )
454
+
455
+ # Party divergence score (how much this trade differs from party norm)
456
+ df["party_divergence"] = abs(
457
+ (df["transaction_type_cleaned"] == "buy").astype(int) - df["party_buy_ratio"]
458
+ )
459
+
460
+ return df
461
+
462
+ def _extract_seniority_features(
463
+ self, df: pd.DataFrame, metadata: Optional[pd.DataFrame]
464
+ ) -> pd.DataFrame:
465
+ """Extract features related to politician seniority and experience"""
466
+
467
+ # Estimate seniority based on trading patterns (mock implementation)
468
+ politician_first_trade = df.groupby("politician_name_cleaned")["transaction_date_dt"].min()
469
+
470
+ # Calculate trading experience (days since first recorded trade)
471
+ df = df.merge(
472
+ politician_first_trade.rename("first_trade_date"),
473
+ left_on="politician_name_cleaned",
474
+ right_index=True,
475
+ how="left",
476
+ )
477
+
478
+ df["trading_experience_days"] = (df["transaction_date_dt"] - df["first_trade_date"]).dt.days
479
+
480
+ # Experience categories
481
+ df["experience_category"] = pd.cut(
482
+ df["trading_experience_days"],
483
+ bins=[0, 90, 365, 1095, float("inf")],
484
+ labels=["novice", "intermediate", "experienced", "veteran"],
485
+ )
486
+
487
+ # Seniority influence score
488
+ df["seniority_influence"] = np.clip(np.log1p(df["trading_experience_days"]) / 10, 0, 2.0)
489
+
490
+ return df
491
+
492
+
493
+ class CongressionalTrackingFeatures:
494
+ """Features based on congressional trading disclosure tracking"""
495
+
496
+ def __init__(self, config: Optional[PoliticalFeatureConfig] = None):
497
+ self.config = config or PoliticalFeatureConfig()
498
+
499
+ def extract_disclosure_features(self, df: pd.DataFrame) -> pd.DataFrame:
500
+ """Extract features related to disclosure timing and patterns"""
501
+
502
+ # Disclosure delay analysis
503
+ if "disclosure_date" in df.columns and "transaction_date_cleaned" in df.columns:
504
+ df["disclosure_date_dt"] = pd.to_datetime(df["disclosure_date"])
505
+ df["disclosure_delay_days"] = (
506
+ df["disclosure_date_dt"] - df["transaction_date_dt"]
507
+ ).dt.days
508
+
509
+ # Disclosure compliance scoring
510
+ df["timely_disclosure"] = (df["disclosure_delay_days"] <= 45).astype(int)
511
+ df["late_disclosure"] = (df["disclosure_delay_days"] > 45).astype(int)
512
+ df["very_late_disclosure"] = (df["disclosure_delay_days"] > 90).astype(int)
513
+
514
+ # Disclosure pattern analysis
515
+ df["disclosure_compliance_score"] = np.clip(
516
+ 1.0 - (df["disclosure_delay_days"] / 90), 0, 1
517
+ )
518
+ else:
519
+ # Default values when disclosure dates not available
520
+ df["disclosure_delay_days"] = 30
521
+ df["timely_disclosure"] = 1
522
+ df["disclosure_compliance_score"] = 0.8
523
+
524
+ # Transaction size vs disclosure timing
525
+ df["large_trade_late_disclosure"] = (
526
+ (df["transaction_amount_cleaned"] > df["transaction_amount_cleaned"].quantile(0.9))
527
+ & (df["disclosure_delay_days"] > 45)
528
+ ).astype(int)
529
+
530
+ return df
531
+
532
+ def extract_reporting_patterns(self, df: pd.DataFrame) -> pd.DataFrame:
533
+ """Extract patterns in reporting behavior"""
534
+
535
+ # Reporting frequency by politician
536
+ politician_reporting = df.groupby("politician_name_cleaned").agg(
537
+ {
538
+ "disclosure_delay_days": ["mean", "std", "max"],
539
+ "timely_disclosure": "mean",
540
+ "transaction_amount_cleaned": "count",
541
+ }
542
+ )
543
+
544
+ politician_reporting.columns = [
545
+ "avg_disclosure_delay",
546
+ "disclosure_delay_std",
547
+ "max_disclosure_delay",
548
+ "timely_disclosure_rate",
549
+ "total_disclosures",
550
+ ]
551
+
552
+ # Reporting reliability score
553
+ politician_reporting["reporting_reliability"] = (
554
+ politician_reporting["timely_disclosure_rate"] * 0.7
555
+ + np.clip(1.0 - politician_reporting["avg_disclosure_delay"] / 90, 0, 1) * 0.3
556
+ )
557
+
558
+ # Merge back to main dataframe
559
+ df = df.merge(
560
+ politician_reporting[["reporting_reliability", "avg_disclosure_delay"]],
561
+ left_on="politician_name_cleaned",
562
+ right_index=True,
563
+ how="left",
564
+ )
565
+
566
+ return df
567
+
568
+
569
+ class PolicyImpactFeatures:
570
+ """Features related to policy announcements and their market impact"""
571
+
572
+ def __init__(self, config: Optional[PoliticalFeatureConfig] = None):
573
+ self.config = config or PoliticalFeatureConfig()
574
+
575
+ def extract_policy_timing_features(self, df: pd.DataFrame) -> pd.DataFrame:
576
+ """Extract features related to policy announcement timing"""
577
+
578
+ # Mock policy events (in practice, this would come from news/policy databases)
579
+ policy_events = self._generate_mock_policy_events(df)
580
+
581
+ if policy_events:
582
+ df = self._analyze_policy_trade_timing(df, policy_events)
583
+ else:
584
+ # Default values when no policy data available
585
+ df["days_to_policy_event"] = 999
586
+ df["pre_policy_trade"] = 0
587
+ df["post_policy_trade"] = 0
588
+ df["policy_relevant_trade"] = 0
589
+
590
+ return df
591
+
592
+ def _generate_mock_policy_events(self, df: pd.DataFrame) -> List[Dict]:
593
+ """Generate mock policy events for demonstration"""
594
+ # In practice, this would be loaded from external policy/news data
595
+
596
+ date_range = pd.date_range(
597
+ start=df["transaction_date_dt"].min(),
598
+ end=df["transaction_date_dt"].max(),
599
+ freq="30D",
600
+ )
601
+
602
+ policy_events = []
603
+ sectors = ["technology", "healthcare", "financial", "energy"]
604
+
605
+ for date in date_range:
606
+ for sector in sectors:
607
+ if np.random.random() < 0.1: # 10% chance of policy event
608
+ policy_events.append(
609
+ {
610
+ "date": date,
611
+ "sector": sector,
612
+ "event_type": np.random.choice(
613
+ ["regulation", "legislation", "hearing"]
614
+ ),
615
+ "impact_score": np.random.uniform(0.1, 1.0),
616
+ }
617
+ )
618
+
619
+ return policy_events
620
+
621
+ def _analyze_policy_trade_timing(
622
+ self, df: pd.DataFrame, policy_events: List[Dict]
623
+ ) -> pd.DataFrame:
624
+ """Analyze timing of trades relative to policy events"""
625
+
626
+ # Convert policy events to DataFrame
627
+ policy_df = pd.DataFrame(policy_events)
628
+ policy_df["date"] = pd.to_datetime(policy_df["date"])
629
+
630
+ # For each trade, find the nearest policy event in the same sector
631
+ def find_nearest_policy_event(row):
632
+ sector = row["sector_classification"]
633
+ trade_date = row["transaction_date_dt"]
634
+
635
+ # Filter policy events for the same sector
636
+ sector_events = policy_df[policy_df["sector"] == sector]
637
+
638
+ if sector_events.empty:
639
+ return 999, 0 # No relevant events
640
+
641
+ # Calculate days to each event
642
+ days_diff = (sector_events["date"] - trade_date).dt.days
643
+
644
+ # Find nearest event (past or future)
645
+ abs_days = days_diff.abs()
646
+ nearest_idx = abs_days.idxmin()
647
+
648
+ nearest_days = days_diff.loc[nearest_idx]
649
+ impact_score = sector_events.loc[nearest_idx, "impact_score"]
650
+
651
+ return nearest_days, impact_score
652
+
653
+ # Apply to all trades
654
+ policy_analysis = df.apply(find_nearest_policy_event, axis=1, result_type="expand")
655
+ df["days_to_policy_event"] = policy_analysis[0]
656
+ df["policy_impact_score"] = policy_analysis[1]
657
+
658
+ # Policy-related trade flags
659
+ df["pre_policy_trade"] = (
660
+ (df["days_to_policy_event"] > 0) & (df["days_to_policy_event"] <= 7)
661
+ ).astype(int)
662
+
663
+ df["post_policy_trade"] = (
664
+ (df["days_to_policy_event"] < 0) & (df["days_to_policy_event"] >= -7)
665
+ ).astype(int)
666
+
667
+ df["policy_relevant_trade"] = (abs(df["days_to_policy_event"]) <= 7).astype(int)
668
+
669
+ # Potential insider trading indicator
670
+ df["potential_insider_policy"] = (
671
+ (df["pre_policy_trade"] == 1)
672
+ & (df["policy_impact_score"] > 0.7)
673
+ & (df["transaction_amount_cleaned"] > df["transaction_amount_cleaned"].quantile(0.8))
674
+ ).astype(int)
675
+
676
+ return df