mcli-framework 7.10.1__py3-none-any.whl → 7.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (99) hide show
  1. mcli/lib/custom_commands.py +10 -0
  2. mcli/lib/optional_deps.py +240 -0
  3. mcli/workflow/git_commit/ai_service.py +13 -2
  4. mcli/workflow/notebook/converter.py +375 -0
  5. mcli/workflow/notebook/notebook_cmd.py +441 -0
  6. mcli/workflow/notebook/schema.py +402 -0
  7. mcli/workflow/notebook/validator.py +313 -0
  8. mcli/workflow/workflow.py +14 -0
  9. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/METADATA +36 -2
  10. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/RECORD +14 -94
  11. mcli/__init__.py +0 -160
  12. mcli/__main__.py +0 -14
  13. mcli/app/__init__.py +0 -23
  14. mcli/app/model/__init__.py +0 -0
  15. mcli/app/video/__init__.py +0 -5
  16. mcli/chat/__init__.py +0 -34
  17. mcli/lib/__init__.py +0 -0
  18. mcli/lib/api/__init__.py +0 -0
  19. mcli/lib/auth/__init__.py +0 -1
  20. mcli/lib/config/__init__.py +0 -1
  21. mcli/lib/erd/__init__.py +0 -25
  22. mcli/lib/files/__init__.py +0 -0
  23. mcli/lib/fs/__init__.py +0 -1
  24. mcli/lib/logger/__init__.py +0 -3
  25. mcli/lib/performance/__init__.py +0 -17
  26. mcli/lib/pickles/__init__.py +0 -1
  27. mcli/lib/secrets/__init__.py +0 -10
  28. mcli/lib/shell/__init__.py +0 -0
  29. mcli/lib/toml/__init__.py +0 -1
  30. mcli/lib/watcher/__init__.py +0 -0
  31. mcli/ml/__init__.py +0 -16
  32. mcli/ml/api/__init__.py +0 -30
  33. mcli/ml/api/routers/__init__.py +0 -27
  34. mcli/ml/auth/__init__.py +0 -41
  35. mcli/ml/backtesting/__init__.py +0 -33
  36. mcli/ml/cli/__init__.py +0 -5
  37. mcli/ml/config/__init__.py +0 -33
  38. mcli/ml/configs/__init__.py +0 -16
  39. mcli/ml/dashboard/__init__.py +0 -12
  40. mcli/ml/dashboard/components/__init__.py +0 -7
  41. mcli/ml/dashboard/pages/__init__.py +0 -6
  42. mcli/ml/data_ingestion/__init__.py +0 -29
  43. mcli/ml/database/__init__.py +0 -40
  44. mcli/ml/experimentation/__init__.py +0 -29
  45. mcli/ml/features/__init__.py +0 -39
  46. mcli/ml/features/political_features.py +0 -677
  47. mcli/ml/mlops/__init__.py +0 -19
  48. mcli/ml/models/__init__.py +0 -90
  49. mcli/ml/monitoring/__init__.py +0 -25
  50. mcli/ml/optimization/__init__.py +0 -27
  51. mcli/ml/predictions/__init__.py +0 -5
  52. mcli/ml/preprocessing/__init__.py +0 -24
  53. mcli/ml/preprocessing/politician_trading_preprocessor.py +0 -570
  54. mcli/ml/scripts/__init__.py +0 -1
  55. mcli/ml/serving/__init__.py +0 -1
  56. mcli/ml/trading/__init__.py +0 -63
  57. mcli/ml/training/__init__.py +0 -7
  58. mcli/mygroup/__init__.py +0 -3
  59. mcli/public/__init__.py +0 -1
  60. mcli/public/commands/__init__.py +0 -2
  61. mcli/self/__init__.py +0 -3
  62. mcli/workflow/__init__.py +0 -0
  63. mcli/workflow/daemon/__init__.py +0 -15
  64. mcli/workflow/dashboard/__init__.py +0 -5
  65. mcli/workflow/docker/__init__.py +0 -0
  66. mcli/workflow/file/__init__.py +0 -0
  67. mcli/workflow/gcloud/__init__.py +0 -1
  68. mcli/workflow/git_commit/__init__.py +0 -0
  69. mcli/workflow/interview/__init__.py +0 -0
  70. mcli/workflow/politician_trading/__init__.py +0 -4
  71. mcli/workflow/politician_trading/config.py +0 -134
  72. mcli/workflow/politician_trading/connectivity.py +0 -492
  73. mcli/workflow/politician_trading/data_sources.py +0 -654
  74. mcli/workflow/politician_trading/database.py +0 -412
  75. mcli/workflow/politician_trading/demo.py +0 -249
  76. mcli/workflow/politician_trading/models.py +0 -327
  77. mcli/workflow/politician_trading/monitoring.py +0 -413
  78. mcli/workflow/politician_trading/scrapers.py +0 -1074
  79. mcli/workflow/politician_trading/scrapers_california.py +0 -434
  80. mcli/workflow/politician_trading/scrapers_corporate_registry.py +0 -797
  81. mcli/workflow/politician_trading/scrapers_eu.py +0 -376
  82. mcli/workflow/politician_trading/scrapers_free_sources.py +0 -509
  83. mcli/workflow/politician_trading/scrapers_third_party.py +0 -373
  84. mcli/workflow/politician_trading/scrapers_uk.py +0 -378
  85. mcli/workflow/politician_trading/scrapers_us_states.py +0 -471
  86. mcli/workflow/politician_trading/seed_database.py +0 -520
  87. mcli/workflow/politician_trading/supabase_functions.py +0 -354
  88. mcli/workflow/politician_trading/workflow.py +0 -879
  89. mcli/workflow/registry/__init__.py +0 -0
  90. mcli/workflow/repo/__init__.py +0 -0
  91. mcli/workflow/scheduler/__init__.py +0 -25
  92. mcli/workflow/search/__init__.py +0 -0
  93. mcli/workflow/sync/__init__.py +0 -5
  94. mcli/workflow/videos/__init__.py +0 -1
  95. mcli/workflow/wakatime/__init__.py +0 -80
  96. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/WHEEL +0 -0
  97. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/entry_points.txt +0 -0
  98. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/licenses/LICENSE +0 -0
  99. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/top_level.txt +0 -0
@@ -1,677 +0,0 @@
1
- """Political influence features for stock recommendation models"""
2
-
3
- import logging
4
- from collections import Counter, defaultdict
5
- from dataclasses import dataclass
6
- from datetime import datetime, timedelta
7
- from typing import Any, Dict, List, Optional, Tuple, Union
8
-
9
- import numpy as np
10
- import pandas as pd
11
-
12
- logger = logging.getLogger(__name__)
13
-
14
-
15
- @dataclass
16
- class PoliticalFeatureConfig:
17
- """Configuration for political feature extraction"""
18
-
19
- # Politician influence scoring
20
- committee_weights: Dict[str, float] = None
21
- party_influence_weights: Dict[str, float] = None
22
- position_weights: Dict[str, float] = None
23
-
24
- # Trading pattern analysis
25
- influence_lookback_days: int = 180
26
- insider_threshold_days: int = 30
27
- cluster_analysis_window: int = 60
28
-
29
- # Policy impact modeling
30
- sector_policy_mapping: Dict[str, List[str]] = None
31
- policy_announcement_window: int = 7
32
-
33
- def __post_init__(self):
34
- if self.committee_weights is None:
35
- self.committee_weights = {
36
- "financial_services": 3.0,
37
- "energy_commerce": 2.5,
38
- "judiciary": 2.0,
39
- "appropriations": 2.5,
40
- "ways_means": 3.0,
41
- "defense": 2.0,
42
- "foreign_affairs": 1.5,
43
- "healthcare": 2.0,
44
- "technology": 2.5,
45
- }
46
-
47
- if self.party_influence_weights is None:
48
- self.party_influence_weights = {
49
- "majority_party": 1.2,
50
- "minority_party": 0.8,
51
- "leadership": 2.0,
52
- "committee_chair": 1.8,
53
- "ranking_member": 1.4,
54
- }
55
-
56
- if self.position_weights is None:
57
- self.position_weights = {
58
- "speaker": 3.0,
59
- "majority_leader": 2.5,
60
- "minority_leader": 2.0,
61
- "committee_chair": 2.0,
62
- "subcommittee_chair": 1.5,
63
- "ranking_member": 1.3,
64
- "member": 1.0,
65
- }
66
-
67
- if self.sector_policy_mapping is None:
68
- self.sector_policy_mapping = {
69
- "technology": ["tech_regulation", "data_privacy", "antitrust"],
70
- "healthcare": ["medicare", "drug_pricing", "healthcare_reform"],
71
- "energy": ["climate_policy", "renewable_energy", "oil_regulation"],
72
- "financial": ["banking_regulation", "fintech", "cryptocurrency"],
73
- "defense": ["defense_spending", "military_contracts", "cybersecurity"],
74
- }
75
-
76
-
77
- class PoliticalInfluenceFeatures:
78
- """Extract features based on political influence and power"""
79
-
80
- def __init__(self, config: Optional[PoliticalFeatureConfig] = None):
81
- self.config = config or PoliticalFeatureConfig()
82
- self.politician_influence_cache = {}
83
-
84
- def extract_influence_features(
85
- self, trading_data: pd.DataFrame, politician_metadata: Optional[pd.DataFrame] = None
86
- ) -> pd.DataFrame:
87
- """Extract political influence features from trading data"""
88
- df = trading_data.copy()
89
-
90
- # Calculate politician influence scores
91
- df = self._calculate_politician_influence(df, politician_metadata)
92
-
93
- # Trading timing analysis
94
- df = self._analyze_trading_timing(df)
95
-
96
- # Committee and sector alignment
97
- df = self._analyze_committee_sector_alignment(df, politician_metadata)
98
-
99
- # Party clustering analysis
100
- df = self._analyze_party_clustering(df)
101
-
102
- # Seniority and experience features
103
- df = self._extract_seniority_features(df, politician_metadata)
104
-
105
- return df
106
-
107
- def _calculate_politician_influence(
108
- self, df: pd.DataFrame, metadata: Optional[pd.DataFrame]
109
- ) -> pd.DataFrame:
110
- """Calculate comprehensive politician influence scores"""
111
-
112
- # Base influence score from trading frequency and volume
113
- politician_stats = (
114
- df.groupby("politician_name_cleaned")
115
- .agg(
116
- {
117
- "transaction_amount_cleaned": ["count", "sum", "mean", "std"],
118
- "asset_name_cleaned": "nunique",
119
- }
120
- )
121
- .round(2)
122
- )
123
-
124
- politician_stats.columns = [
125
- "trade_count",
126
- "total_volume",
127
- "avg_trade_size",
128
- "trade_size_std",
129
- "unique_assets",
130
- ]
131
-
132
- # Calculate base influence from trading metrics
133
- # More trades, higher volumes, and diverse assets = higher influence
134
- politician_stats["trade_influence"] = (
135
- np.log1p(politician_stats["trade_count"])
136
- + np.log1p(politician_stats["total_volume"]) / 10
137
- + np.log1p(politician_stats["unique_assets"]) * 2
138
- )
139
-
140
- # Normalize to 0-1 scale
141
- politician_stats["trade_influence"] = (
142
- politician_stats["trade_influence"] / politician_stats["trade_influence"].max()
143
- )
144
-
145
- # Add metadata-based influence if available
146
- if metadata is not None:
147
- politician_stats = self._add_metadata_influence(politician_stats, metadata)
148
- else:
149
- # Use default influence based on trading patterns
150
- politician_stats["position_influence"] = 1.0
151
- politician_stats["committee_influence"] = 1.0
152
- politician_stats["party_influence"] = 1.0
153
-
154
- # Combined influence score
155
- politician_stats["total_influence"] = (
156
- politician_stats["trade_influence"] * 0.4
157
- + politician_stats["position_influence"] * 0.3
158
- + politician_stats["committee_influence"] * 0.2
159
- + politician_stats["party_influence"] * 0.1
160
- )
161
-
162
- # Merge back to main dataframe
163
- df = df.merge(
164
- politician_stats[["total_influence", "trade_influence"]],
165
- left_on="politician_name_cleaned",
166
- right_index=True,
167
- how="left",
168
- )
169
-
170
- return df
171
-
172
- def _add_metadata_influence(
173
- self, stats_df: pd.DataFrame, metadata: pd.DataFrame
174
- ) -> pd.DataFrame:
175
- """Add influence scores based on politician metadata"""
176
-
177
- # Position-based influence
178
- if "position" in metadata.columns:
179
- position_influence = metadata["position"].map(self.config.position_weights)
180
- metadata["position_influence"] = position_influence.fillna(1.0)
181
- else:
182
- metadata["position_influence"] = 1.0
183
-
184
- # Committee-based influence
185
- if "committees" in metadata.columns:
186
-
187
- def calculate_committee_influence(committees_str):
188
- if pd.isna(committees_str):
189
- return 1.0
190
- committees = str(committees_str).lower().split(",")
191
- influence = 1.0
192
- for committee in committees:
193
- committee = committee.strip()
194
- for key, weight in self.config.committee_weights.items():
195
- if key in committee:
196
- influence = max(influence, weight)
197
- return influence
198
-
199
- metadata["committee_influence"] = metadata["committees"].apply(
200
- calculate_committee_influence
201
- )
202
- else:
203
- metadata["committee_influence"] = 1.0
204
-
205
- # Party-based influence (simplified)
206
- if "party" in metadata.columns:
207
- # Assume majority party has more influence (would need current data)
208
- party_influence = metadata["party"].map({"Republican": 1.1, "Democrat": 1.0})
209
- metadata["party_influence"] = party_influence.fillna(1.0)
210
- else:
211
- metadata["party_influence"] = 1.0
212
-
213
- # Merge metadata influence scores
214
- influence_cols = ["position_influence", "committee_influence", "party_influence"]
215
- available_cols = [col for col in influence_cols if col in metadata.columns]
216
-
217
- if available_cols:
218
- stats_df = stats_df.merge(
219
- metadata[["politician_name_cleaned"] + available_cols],
220
- left_index=True,
221
- right_on="politician_name_cleaned",
222
- how="left",
223
- )
224
-
225
- # Fill missing values
226
- for col in influence_cols:
227
- if col not in stats_df.columns:
228
- stats_df[col] = 1.0
229
- else:
230
- stats_df[col] = stats_df[col].fillna(1.0)
231
-
232
- return stats_df
233
-
234
- def _analyze_trading_timing(self, df: pd.DataFrame) -> pd.DataFrame:
235
- """Analyze timing patterns in political trading"""
236
-
237
- # Convert date to datetime if not already
238
- if "transaction_date_cleaned" in df.columns:
239
- df["transaction_date_dt"] = pd.to_datetime(df["transaction_date_cleaned"])
240
-
241
- # Days since last trade by politician
242
- df = df.sort_values(["politician_name_cleaned", "transaction_date_dt"])
243
- df["days_since_last_trade"] = (
244
- df.groupby("politician_name_cleaned")["transaction_date_dt"].diff().dt.days
245
- )
246
-
247
- # Trading frequency score (more frequent = higher score)
248
- df["trading_frequency_score"] = np.where(
249
- df["days_since_last_trade"].isna(),
250
- 1.0,
251
- np.clip(30 / (df["days_since_last_trade"] + 1), 0, 2.0),
252
- )
253
-
254
- # Cluster trading detection (multiple trades in short timeframe)
255
- df["cluster_trades"] = (
256
- df.groupby("politician_name_cleaned")["days_since_last_trade"]
257
- .rolling(window=5, min_periods=1)
258
- .apply(lambda x: (x <= 7).sum())
259
- .values
260
- )
261
-
262
- # Quarterly timing (end of quarter trading patterns)
263
- df["quarter_end_trade"] = (
264
- df["transaction_date_dt"].dt.month.isin([3, 6, 9, 12])
265
- & (df["transaction_date_dt"].dt.day >= 25)
266
- ).astype(int)
267
-
268
- # Year-end trading
269
- df["year_end_trade"] = (
270
- (df["transaction_date_dt"].dt.month == 12) & (df["transaction_date_dt"].dt.day >= 20)
271
- ).astype(int)
272
-
273
- # Pre-earnings timing (approximate - would need earnings calendar)
274
- df["potential_insider_timing"] = (df["days_since_last_trade"] <= 5).astype(int)
275
-
276
- return df
277
-
278
- def _analyze_committee_sector_alignment(
279
- self, df: pd.DataFrame, metadata: Optional[pd.DataFrame]
280
- ) -> pd.DataFrame:
281
- """Analyze alignment between committee assignments and traded sectors"""
282
-
283
- # Simplified sector classification based on asset names
284
- def classify_sector(asset_name):
285
- if pd.isna(asset_name):
286
- return "unknown"
287
-
288
- asset_lower = str(asset_name).lower()
289
-
290
- # Technology sector
291
- tech_keywords = [
292
- "tech",
293
- "software",
294
- "microsoft",
295
- "apple",
296
- "google",
297
- "meta",
298
- "facebook",
299
- "amazon",
300
- "netflix",
301
- "tesla",
302
- "nvidia",
303
- "intel",
304
- ]
305
- if any(keyword in asset_lower for keyword in tech_keywords):
306
- return "technology"
307
-
308
- # Healthcare sector
309
- health_keywords = [
310
- "health",
311
- "pharma",
312
- "medical",
313
- "bio",
314
- "johnson",
315
- "pfizer",
316
- "merck",
317
- "abbott",
318
- "healthcare",
319
- ]
320
- if any(keyword in asset_lower for keyword in health_keywords):
321
- return "healthcare"
322
-
323
- # Financial sector
324
- finance_keywords = [
325
- "bank",
326
- "financial",
327
- "capital",
328
- "credit",
329
- "jpmorgan",
330
- "bank of america",
331
- "wells fargo",
332
- "goldman",
333
- "morgan stanley",
334
- ]
335
- if any(keyword in asset_lower for keyword in finance_keywords):
336
- return "financial"
337
-
338
- # Energy sector
339
- energy_keywords = [
340
- "energy",
341
- "oil",
342
- "gas",
343
- "exxon",
344
- "chevron",
345
- "renewable",
346
- "solar",
347
- "wind",
348
- "petroleum",
349
- ]
350
- if any(keyword in asset_lower for keyword in energy_keywords):
351
- return "energy"
352
-
353
- # Defense sector
354
- defense_keywords = [
355
- "defense",
356
- "aerospace",
357
- "boeing",
358
- "lockheed",
359
- "raytheon",
360
- "general dynamics",
361
- "northrop",
362
- ]
363
- if any(keyword in asset_lower for keyword in defense_keywords):
364
- return "defense"
365
-
366
- return "other"
367
-
368
- df["sector_classification"] = df["asset_name_cleaned"].apply(classify_sector)
369
-
370
- # Committee-sector alignment score
371
- if metadata is not None and "committees" in metadata.columns:
372
-
373
- def calculate_alignment_score(politician, sector):
374
- politician_metadata = metadata[metadata["politician_name_cleaned"] == politician]
375
- if politician_metadata.empty:
376
- return 0.5 # Neutral alignment
377
-
378
- committees = str(politician_metadata.iloc[0]["committees"]).lower()
379
-
380
- # Check for relevant committee memberships
381
- alignment_score = 0.5 # Base neutral score
382
-
383
- if sector == "technology" and any(
384
- keyword in committees for keyword in ["technology", "commerce", "judiciary"]
385
- ):
386
- alignment_score = 0.9
387
- elif sector == "healthcare" and "health" in committees:
388
- alignment_score = 0.9
389
- elif sector == "financial" and "financial" in committees:
390
- alignment_score = 0.9
391
- elif sector == "energy" and any(
392
- keyword in committees for keyword in ["energy", "environment"]
393
- ):
394
- alignment_score = 0.9
395
- elif sector == "defense" and any(
396
- keyword in committees for keyword in ["defense", "armed services"]
397
- ):
398
- alignment_score = 0.9
399
-
400
- return alignment_score
401
-
402
- df["committee_sector_alignment"] = df.apply(
403
- lambda row: calculate_alignment_score(
404
- row["politician_name_cleaned"], row["sector_classification"]
405
- ),
406
- axis=1,
407
- )
408
- else:
409
- df["committee_sector_alignment"] = 0.5 # Neutral when no metadata
410
-
411
- return df
412
-
413
- def _analyze_party_clustering(self, df: pd.DataFrame) -> pd.DataFrame:
414
- """Analyze clustering of trades by party affiliation"""
415
-
416
- # Mock party assignment based on politician name patterns
417
- # In real implementation, this would come from metadata
418
- def assign_party(name):
419
- # This is a simplified mock assignment
420
- # In practice, this would come from politician metadata
421
- republican_indicators = ["mitch", "mcconnell", "cruz", "rubio", "romney"]
422
- democrat_indicators = ["pelosi", "schumer", "warren", "sanders"]
423
-
424
- name_lower = str(name).lower()
425
- if any(indicator in name_lower for indicator in republican_indicators):
426
- return "Republican"
427
- elif any(indicator in name_lower for indicator in democrat_indicators):
428
- return "Democrat"
429
- else:
430
- return "Independent" # Default
431
-
432
- df["estimated_party"] = df["politician_name_cleaned"].apply(assign_party)
433
-
434
- # Party-based trading patterns
435
- party_stats = (
436
- df.groupby(["estimated_party", "sector_classification"])
437
- .agg(
438
- {
439
- "transaction_amount_cleaned": ["count", "mean"],
440
- "transaction_type_cleaned": lambda x: (x == "buy").mean(),
441
- }
442
- )
443
- .round(3)
444
- )
445
-
446
- party_stats.columns = ["party_sector_trades", "party_avg_amount", "party_buy_ratio"]
447
-
448
- # Calculate party consensus score for each trade
449
- df = df.merge(
450
- party_stats,
451
- left_on=["estimated_party", "sector_classification"],
452
- right_index=True,
453
- how="left",
454
- )
455
-
456
- # Party divergence score (how much this trade differs from party norm)
457
- df["party_divergence"] = abs(
458
- (df["transaction_type_cleaned"] == "buy").astype(int) - df["party_buy_ratio"]
459
- )
460
-
461
- return df
462
-
463
- def _extract_seniority_features(
464
- self, df: pd.DataFrame, metadata: Optional[pd.DataFrame]
465
- ) -> pd.DataFrame:
466
- """Extract features related to politician seniority and experience"""
467
-
468
- # Estimate seniority based on trading patterns (mock implementation)
469
- politician_first_trade = df.groupby("politician_name_cleaned")["transaction_date_dt"].min()
470
-
471
- # Calculate trading experience (days since first recorded trade)
472
- df = df.merge(
473
- politician_first_trade.rename("first_trade_date"),
474
- left_on="politician_name_cleaned",
475
- right_index=True,
476
- how="left",
477
- )
478
-
479
- df["trading_experience_days"] = (df["transaction_date_dt"] - df["first_trade_date"]).dt.days
480
-
481
- # Experience categories
482
- df["experience_category"] = pd.cut(
483
- df["trading_experience_days"],
484
- bins=[0, 90, 365, 1095, float("inf")],
485
- labels=["novice", "intermediate", "experienced", "veteran"],
486
- )
487
-
488
- # Seniority influence score
489
- df["seniority_influence"] = np.clip(np.log1p(df["trading_experience_days"]) / 10, 0, 2.0)
490
-
491
- return df
492
-
493
-
494
- class CongressionalTrackingFeatures:
495
- """Features based on congressional trading disclosure tracking"""
496
-
497
- def __init__(self, config: Optional[PoliticalFeatureConfig] = None):
498
- self.config = config or PoliticalFeatureConfig()
499
-
500
- def extract_disclosure_features(self, df: pd.DataFrame) -> pd.DataFrame:
501
- """Extract features related to disclosure timing and patterns"""
502
-
503
- # Disclosure delay analysis
504
- if "disclosure_date" in df.columns and "transaction_date_cleaned" in df.columns:
505
- df["disclosure_date_dt"] = pd.to_datetime(df["disclosure_date"])
506
- df["disclosure_delay_days"] = (
507
- df["disclosure_date_dt"] - df["transaction_date_dt"]
508
- ).dt.days
509
-
510
- # Disclosure compliance scoring
511
- df["timely_disclosure"] = (df["disclosure_delay_days"] <= 45).astype(int)
512
- df["late_disclosure"] = (df["disclosure_delay_days"] > 45).astype(int)
513
- df["very_late_disclosure"] = (df["disclosure_delay_days"] > 90).astype(int)
514
-
515
- # Disclosure pattern analysis
516
- df["disclosure_compliance_score"] = np.clip(
517
- 1.0 - (df["disclosure_delay_days"] / 90), 0, 1
518
- )
519
- else:
520
- # Default values when disclosure dates not available
521
- df["disclosure_delay_days"] = 30
522
- df["timely_disclosure"] = 1
523
- df["disclosure_compliance_score"] = 0.8
524
-
525
- # Transaction size vs disclosure timing
526
- df["large_trade_late_disclosure"] = (
527
- (df["transaction_amount_cleaned"] > df["transaction_amount_cleaned"].quantile(0.9))
528
- & (df["disclosure_delay_days"] > 45)
529
- ).astype(int)
530
-
531
- return df
532
-
533
- def extract_reporting_patterns(self, df: pd.DataFrame) -> pd.DataFrame:
534
- """Extract patterns in reporting behavior"""
535
-
536
- # Reporting frequency by politician
537
- politician_reporting = df.groupby("politician_name_cleaned").agg(
538
- {
539
- "disclosure_delay_days": ["mean", "std", "max"],
540
- "timely_disclosure": "mean",
541
- "transaction_amount_cleaned": "count",
542
- }
543
- )
544
-
545
- politician_reporting.columns = [
546
- "avg_disclosure_delay",
547
- "disclosure_delay_std",
548
- "max_disclosure_delay",
549
- "timely_disclosure_rate",
550
- "total_disclosures",
551
- ]
552
-
553
- # Reporting reliability score
554
- politician_reporting["reporting_reliability"] = (
555
- politician_reporting["timely_disclosure_rate"] * 0.7
556
- + np.clip(1.0 - politician_reporting["avg_disclosure_delay"] / 90, 0, 1) * 0.3
557
- )
558
-
559
- # Merge back to main dataframe
560
- df = df.merge(
561
- politician_reporting[["reporting_reliability", "avg_disclosure_delay"]],
562
- left_on="politician_name_cleaned",
563
- right_index=True,
564
- how="left",
565
- )
566
-
567
- return df
568
-
569
-
570
- class PolicyImpactFeatures:
571
- """Features related to policy announcements and their market impact"""
572
-
573
- def __init__(self, config: Optional[PoliticalFeatureConfig] = None):
574
- self.config = config or PoliticalFeatureConfig()
575
-
576
- def extract_policy_timing_features(self, df: pd.DataFrame) -> pd.DataFrame:
577
- """Extract features related to policy announcement timing"""
578
-
579
- # Mock policy events (in practice, this would come from news/policy databases)
580
- policy_events = self._generate_mock_policy_events(df)
581
-
582
- if policy_events:
583
- df = self._analyze_policy_trade_timing(df, policy_events)
584
- else:
585
- # Default values when no policy data available
586
- df["days_to_policy_event"] = 999
587
- df["pre_policy_trade"] = 0
588
- df["post_policy_trade"] = 0
589
- df["policy_relevant_trade"] = 0
590
-
591
- return df
592
-
593
- def _generate_mock_policy_events(self, df: pd.DataFrame) -> List[Dict]:
594
- """Generate mock policy events for demonstration"""
595
- # In practice, this would be loaded from external policy/news data
596
-
597
- date_range = pd.date_range(
598
- start=df["transaction_date_dt"].min(),
599
- end=df["transaction_date_dt"].max(),
600
- freq="30D",
601
- )
602
-
603
- policy_events = []
604
- sectors = ["technology", "healthcare", "financial", "energy"]
605
-
606
- for date in date_range:
607
- for sector in sectors:
608
- if np.random.random() < 0.1: # 10% chance of policy event
609
- policy_events.append(
610
- {
611
- "date": date,
612
- "sector": sector,
613
- "event_type": np.random.choice(
614
- ["regulation", "legislation", "hearing"]
615
- ),
616
- "impact_score": np.random.uniform(0.1, 1.0),
617
- }
618
- )
619
-
620
- return policy_events
621
-
622
- def _analyze_policy_trade_timing(
623
- self, df: pd.DataFrame, policy_events: List[Dict]
624
- ) -> pd.DataFrame:
625
- """Analyze timing of trades relative to policy events"""
626
-
627
- # Convert policy events to DataFrame
628
- policy_df = pd.DataFrame(policy_events)
629
- policy_df["date"] = pd.to_datetime(policy_df["date"])
630
-
631
- # For each trade, find the nearest policy event in the same sector
632
- def find_nearest_policy_event(row):
633
- sector = row["sector_classification"]
634
- trade_date = row["transaction_date_dt"]
635
-
636
- # Filter policy events for the same sector
637
- sector_events = policy_df[policy_df["sector"] == sector]
638
-
639
- if sector_events.empty:
640
- return 999, 0 # No relevant events
641
-
642
- # Calculate days to each event
643
- days_diff = (sector_events["date"] - trade_date).dt.days
644
-
645
- # Find nearest event (past or future)
646
- abs_days = days_diff.abs()
647
- nearest_idx = abs_days.idxmin()
648
-
649
- nearest_days = days_diff.loc[nearest_idx]
650
- impact_score = sector_events.loc[nearest_idx, "impact_score"]
651
-
652
- return nearest_days, impact_score
653
-
654
- # Apply to all trades
655
- policy_analysis = df.apply(find_nearest_policy_event, axis=1, result_type="expand")
656
- df["days_to_policy_event"] = policy_analysis[0]
657
- df["policy_impact_score"] = policy_analysis[1]
658
-
659
- # Policy-related trade flags
660
- df["pre_policy_trade"] = (
661
- (df["days_to_policy_event"] > 0) & (df["days_to_policy_event"] <= 7)
662
- ).astype(int)
663
-
664
- df["post_policy_trade"] = (
665
- (df["days_to_policy_event"] < 0) & (df["days_to_policy_event"] >= -7)
666
- ).astype(int)
667
-
668
- df["policy_relevant_trade"] = (abs(df["days_to_policy_event"]) <= 7).astype(int)
669
-
670
- # Potential insider trading indicator
671
- df["potential_insider_policy"] = (
672
- (df["pre_policy_trade"] == 1)
673
- & (df["policy_impact_score"] > 0.7)
674
- & (df["transaction_amount_cleaned"] > df["transaction_amount_cleaned"].quantile(0.8))
675
- ).astype(int)
676
-
677
- return df
mcli/ml/mlops/__init__.py DELETED
@@ -1,19 +0,0 @@
1
- """MLOps components for ML pipeline management"""
2
-
3
- from .experiment_tracker import ExperimentRun, ExperimentTracker, MLflowConfig, ModelRegistry
4
- from .model_serving import ModelEndpoint, ModelServer, PredictionService
5
- from .pipeline_orchestrator import MLPipeline, PipelineConfig, PipelineExecutor, PipelineStep
6
-
7
- __all__ = [
8
- "ExperimentTracker",
9
- "ModelRegistry",
10
- "MLflowConfig",
11
- "ExperimentRun",
12
- "ModelServer",
13
- "PredictionService",
14
- "ModelEndpoint",
15
- "MLPipeline",
16
- "PipelineStep",
17
- "PipelineConfig",
18
- "PipelineExecutor",
19
- ]