mcli-framework 7.1.0__py3-none-any.whl → 7.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (94) hide show
  1. mcli/app/completion_cmd.py +59 -49
  2. mcli/app/completion_helpers.py +60 -138
  3. mcli/app/logs_cmd.py +46 -13
  4. mcli/app/main.py +17 -14
  5. mcli/app/model_cmd.py +19 -4
  6. mcli/chat/chat.py +3 -2
  7. mcli/lib/search/cached_vectorizer.py +1 -0
  8. mcli/lib/services/data_pipeline.py +12 -5
  9. mcli/lib/services/lsh_client.py +69 -58
  10. mcli/ml/api/app.py +28 -36
  11. mcli/ml/api/middleware.py +8 -16
  12. mcli/ml/api/routers/admin_router.py +3 -1
  13. mcli/ml/api/routers/auth_router.py +32 -56
  14. mcli/ml/api/routers/backtest_router.py +3 -1
  15. mcli/ml/api/routers/data_router.py +3 -1
  16. mcli/ml/api/routers/model_router.py +35 -74
  17. mcli/ml/api/routers/monitoring_router.py +3 -1
  18. mcli/ml/api/routers/portfolio_router.py +3 -1
  19. mcli/ml/api/routers/prediction_router.py +60 -65
  20. mcli/ml/api/routers/trade_router.py +6 -2
  21. mcli/ml/api/routers/websocket_router.py +12 -9
  22. mcli/ml/api/schemas.py +10 -2
  23. mcli/ml/auth/auth_manager.py +49 -114
  24. mcli/ml/auth/models.py +30 -15
  25. mcli/ml/auth/permissions.py +12 -19
  26. mcli/ml/backtesting/backtest_engine.py +134 -108
  27. mcli/ml/backtesting/performance_metrics.py +142 -108
  28. mcli/ml/cache.py +12 -18
  29. mcli/ml/cli/main.py +37 -23
  30. mcli/ml/config/settings.py +29 -12
  31. mcli/ml/dashboard/app.py +122 -130
  32. mcli/ml/dashboard/app_integrated.py +283 -152
  33. mcli/ml/dashboard/app_supabase.py +176 -108
  34. mcli/ml/dashboard/app_training.py +212 -206
  35. mcli/ml/dashboard/cli.py +14 -5
  36. mcli/ml/data_ingestion/api_connectors.py +51 -81
  37. mcli/ml/data_ingestion/data_pipeline.py +127 -125
  38. mcli/ml/data_ingestion/stream_processor.py +72 -80
  39. mcli/ml/database/migrations/env.py +3 -2
  40. mcli/ml/database/models.py +112 -79
  41. mcli/ml/database/session.py +6 -5
  42. mcli/ml/experimentation/ab_testing.py +149 -99
  43. mcli/ml/features/ensemble_features.py +9 -8
  44. mcli/ml/features/political_features.py +6 -5
  45. mcli/ml/features/recommendation_engine.py +15 -14
  46. mcli/ml/features/stock_features.py +7 -6
  47. mcli/ml/features/test_feature_engineering.py +8 -7
  48. mcli/ml/logging.py +10 -15
  49. mcli/ml/mlops/data_versioning.py +57 -64
  50. mcli/ml/mlops/experiment_tracker.py +49 -41
  51. mcli/ml/mlops/model_serving.py +59 -62
  52. mcli/ml/mlops/pipeline_orchestrator.py +203 -149
  53. mcli/ml/models/base_models.py +8 -7
  54. mcli/ml/models/ensemble_models.py +6 -5
  55. mcli/ml/models/recommendation_models.py +7 -6
  56. mcli/ml/models/test_models.py +18 -14
  57. mcli/ml/monitoring/drift_detection.py +95 -74
  58. mcli/ml/monitoring/metrics.py +10 -22
  59. mcli/ml/optimization/portfolio_optimizer.py +172 -132
  60. mcli/ml/predictions/prediction_engine.py +235 -0
  61. mcli/ml/preprocessing/data_cleaners.py +6 -5
  62. mcli/ml/preprocessing/feature_extractors.py +7 -6
  63. mcli/ml/preprocessing/ml_pipeline.py +3 -2
  64. mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
  65. mcli/ml/preprocessing/test_preprocessing.py +4 -4
  66. mcli/ml/scripts/populate_sample_data.py +36 -16
  67. mcli/ml/tasks.py +82 -83
  68. mcli/ml/tests/test_integration.py +86 -76
  69. mcli/ml/tests/test_training_dashboard.py +169 -142
  70. mcli/mygroup/test_cmd.py +2 -1
  71. mcli/self/self_cmd.py +38 -18
  72. mcli/self/test_cmd.py +2 -1
  73. mcli/workflow/dashboard/dashboard_cmd.py +13 -6
  74. mcli/workflow/lsh_integration.py +46 -58
  75. mcli/workflow/politician_trading/commands.py +576 -427
  76. mcli/workflow/politician_trading/config.py +7 -7
  77. mcli/workflow/politician_trading/connectivity.py +35 -33
  78. mcli/workflow/politician_trading/data_sources.py +72 -71
  79. mcli/workflow/politician_trading/database.py +18 -16
  80. mcli/workflow/politician_trading/demo.py +4 -3
  81. mcli/workflow/politician_trading/models.py +5 -5
  82. mcli/workflow/politician_trading/monitoring.py +13 -13
  83. mcli/workflow/politician_trading/scrapers.py +332 -224
  84. mcli/workflow/politician_trading/scrapers_california.py +116 -94
  85. mcli/workflow/politician_trading/scrapers_eu.py +70 -71
  86. mcli/workflow/politician_trading/scrapers_uk.py +118 -90
  87. mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
  88. mcli/workflow/politician_trading/workflow.py +98 -71
  89. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +2 -2
  90. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -93
  91. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
  92. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
  93. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
  94. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
@@ -8,169 +8,186 @@ to collect MP financial disclosure data.
8
8
  import asyncio
9
9
  import logging
10
10
  from datetime import datetime
11
- from typing import List, Dict, Any, Optional
11
+ from typing import Any, Dict, List, Optional
12
+
12
13
  import aiohttp
13
14
 
15
+ from .models import Politician, PoliticianRole, TradingDisclosure, TransactionType
14
16
  from .scrapers import BaseScraper
15
- from .models import TradingDisclosure, Politician, PoliticianRole, TransactionType
16
17
 
17
18
  logger = logging.getLogger(__name__)
18
19
 
19
20
 
20
21
  class UKParliamentScraper(BaseScraper):
21
22
  """Scraper for UK Parliament Register of Interests API"""
22
-
23
+
23
24
  def __init__(self, config):
24
25
  super().__init__(config)
25
26
  self.base_url = "https://interests-api.parliament.uk/api/v1"
26
27
  self.session: Optional[aiohttp.ClientSession] = None
27
-
28
+
28
29
  async def __aenter__(self):
29
30
  """Async context manager entry"""
30
31
  self.session = aiohttp.ClientSession(
31
32
  timeout=aiohttp.ClientTimeout(total=self.config.timeout),
32
- headers={'User-Agent': self.config.user_agent}
33
+ headers={"User-Agent": self.config.user_agent},
33
34
  )
34
35
  return self
35
-
36
+
36
37
  async def __aexit__(self, exc_type, exc_val, exc_tb):
37
38
  """Async context manager exit"""
38
39
  if self.session:
39
40
  await self.session.close()
40
-
41
+
41
42
  async def fetch_members_interests(self) -> List[TradingDisclosure]:
42
43
  """Fetch all MP financial interests from the API"""
43
44
  logger.info("Starting UK Parliament financial interests collection")
44
-
45
+
45
46
  if not self.session:
46
47
  raise RuntimeError("Session not initialized. Use async context manager.")
47
-
48
+
48
49
  disclosures = []
49
-
50
+
50
51
  try:
51
52
  # First, get all interest categories to understand what types of interests exist
52
53
  categories = await self._fetch_categories()
53
54
  logger.info(f"Found {len(categories)} interest categories")
54
-
55
+
55
56
  # Get all interests for financial/investment categories
56
57
  financial_categories = self._filter_financial_categories(categories)
57
-
58
+
58
59
  for category in financial_categories:
59
60
  category_disclosures = await self._fetch_interests_by_category(category)
60
61
  disclosures.extend(category_disclosures)
61
-
62
+
62
63
  # Rate limiting
63
64
  await asyncio.sleep(self.config.request_delay)
64
-
65
+
65
66
  logger.info(f"Collected {len(disclosures)} UK Parliament financial interests")
66
67
  return disclosures
67
-
68
+
68
69
  except Exception as e:
69
70
  logger.error(f"Failed to fetch UK Parliament interests: {e}")
70
71
  raise
71
-
72
+
72
73
  async def _fetch_categories(self) -> List[Dict[str, Any]]:
73
74
  """Fetch all interest categories from the API"""
74
75
  url = f"{self.base_url}/Categories"
75
76
  params = {"Take": 100} # Get up to 100 categories
76
-
77
+
77
78
  async with self.session.get(url, params=params) as response:
78
79
  response.raise_for_status()
79
80
  data = await response.json()
80
81
  return data.get("items", [])
81
-
82
- def _filter_financial_categories(self, categories: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
82
+
83
+ def _filter_financial_categories(
84
+ self, categories: List[Dict[str, Any]]
85
+ ) -> List[Dict[str, Any]]:
83
86
  """Filter categories to include only financial/investment related ones"""
84
87
  financial_keywords = [
85
- "shareholding", "share", "investment", "financial", "company",
86
- "directorship", "employment", "remuneration", "sponsorship",
87
- "gift", "benefit", "land", "property"
88
+ "shareholding",
89
+ "share",
90
+ "investment",
91
+ "financial",
92
+ "company",
93
+ "directorship",
94
+ "employment",
95
+ "remuneration",
96
+ "sponsorship",
97
+ "gift",
98
+ "benefit",
99
+ "land",
100
+ "property",
88
101
  ]
89
-
102
+
90
103
  financial_categories = []
91
104
  for category in categories:
92
105
  category_name = category.get("name", "").lower()
93
106
  if any(keyword in category_name for keyword in financial_keywords):
94
107
  financial_categories.append(category)
95
108
  logger.debug(f"Including financial category: {category.get('name')}")
96
-
109
+
97
110
  return financial_categories
98
-
99
- async def _fetch_interests_by_category(self, category: Dict[str, Any]) -> List[TradingDisclosure]:
111
+
112
+ async def _fetch_interests_by_category(
113
+ self, category: Dict[str, Any]
114
+ ) -> List[TradingDisclosure]:
100
115
  """Fetch interests for a specific category"""
101
116
  category_id = category.get("id")
102
117
  category_name = category.get("name")
103
-
118
+
104
119
  logger.debug(f"Fetching interests for category: {category_name} (ID: {category_id})")
105
-
120
+
106
121
  disclosures = []
107
122
  skip = 0
108
123
  take = 50
109
-
124
+
110
125
  while True:
111
126
  url = f"{self.base_url}/Interests"
112
- params = {
113
- "categoryId": category_id,
114
- "Skip": skip,
115
- "Take": take
116
- }
117
-
127
+ params = {"categoryId": category_id, "Skip": skip, "Take": take}
128
+
118
129
  try:
119
130
  async with self.session.get(url, params=params) as response:
120
131
  response.raise_for_status()
121
132
  data = await response.json()
122
-
133
+
123
134
  interests = data.get("items", [])
124
135
  if not interests:
125
136
  break
126
-
137
+
127
138
  for interest in interests:
128
139
  disclosure = await self._parse_uk_interest(interest, category_name)
129
140
  if disclosure:
130
141
  disclosures.append(disclosure)
131
-
142
+
132
143
  skip += take
133
-
144
+
134
145
  # If we got fewer results than requested, we're done
135
146
  if len(interests) < take:
136
147
  break
137
-
148
+
138
149
  except Exception as e:
139
150
  logger.error(f"Failed to fetch interests for category {category_name}: {e}")
140
151
  break
141
-
152
+
142
153
  logger.debug(f"Found {len(disclosures)} interests in category: {category_name}")
143
154
  return disclosures
144
-
145
- async def _parse_uk_interest(self, interest: Dict[str, Any], category_name: str) -> Optional[TradingDisclosure]:
155
+
156
+ async def _parse_uk_interest(
157
+ self, interest: Dict[str, Any], category_name: str
158
+ ) -> Optional[TradingDisclosure]:
146
159
  """Parse a UK Parliament interest into a TradingDisclosure"""
147
160
  try:
148
161
  # Extract member information from the new API structure
149
162
  member_data = interest.get("member")
150
163
  if not member_data:
151
164
  return None
152
-
165
+
153
166
  member_id = member_data.get("id")
154
167
  politician_name = member_data.get("nameDisplayAs", "")
155
-
168
+
156
169
  # Get interest details
157
170
  interest_id = interest.get("id")
158
171
  description = interest.get("summary", "")
159
172
  registered_date = interest.get("registrationDate")
160
-
173
+
161
174
  # Parse dates
162
- transaction_date = self._parse_date(registered_date) if registered_date else datetime.now()
175
+ transaction_date = (
176
+ self._parse_date(registered_date) if registered_date else datetime.now()
177
+ )
163
178
  disclosure_date = transaction_date # UK system doesn't separate these
164
-
179
+
165
180
  # Determine transaction type from description
166
181
  transaction_type = self._infer_transaction_type(description, category_name)
167
-
182
+
168
183
  # Extract asset information from fields and description
169
- asset_name, asset_ticker = self._extract_asset_info_from_fields(interest, description, category_name)
170
-
184
+ asset_name, asset_ticker = self._extract_asset_info_from_fields(
185
+ interest, description, category_name
186
+ )
187
+
171
188
  # Extract amount information (if available)
172
189
  amount_min, amount_max, amount_exact = self._extract_amount_info(description)
173
-
190
+
174
191
  disclosure = TradingDisclosure(
175
192
  id=f"uk_parliament_{interest_id}",
176
193
  politician_id="", # Will be filled during politician matching
@@ -191,29 +208,29 @@ class UKParliamentScraper(BaseScraper):
191
208
  "category_name": category_name,
192
209
  "registered_date": registered_date,
193
210
  "source": "uk_parliament_api",
194
- "politician_name": politician_name
195
- }
211
+ "politician_name": politician_name,
212
+ },
196
213
  )
197
-
214
+
198
215
  return disclosure
199
-
216
+
200
217
  except Exception as e:
201
218
  logger.error(f"Failed to parse UK interest: {e}")
202
219
  return None
203
-
220
+
204
221
  async def _fetch_mp_name(self, member_id: int) -> str:
205
222
  """Fetch MP name from the Parliament API using member ID"""
206
223
  if not self.session:
207
224
  return ""
208
-
225
+
209
226
  try:
210
227
  # Try the Members endpoint to get MP details
211
228
  member_url = f"{self.base_url}/Members/{member_id}"
212
-
229
+
213
230
  async with self.session.get(member_url) as response:
214
231
  if response.status == 200:
215
232
  data = await response.json()
216
-
233
+
217
234
  # Extract name from the response
218
235
  name = data.get("name", "")
219
236
  if not name:
@@ -225,19 +242,21 @@ class UKParliamentScraper(BaseScraper):
225
242
  last_name = data.get("nameFull", "") or data.get("nameFamily", "")
226
243
  if first_name and last_name:
227
244
  name = f"{first_name} {last_name}"
228
-
245
+
229
246
  if name:
230
247
  logger.debug(f"Found MP name for ID {member_id}: {name}")
231
248
  return name.strip()
232
-
249
+
233
250
  else:
234
- logger.debug(f"Could not fetch MP details for ID {member_id}: HTTP {response.status}")
235
-
251
+ logger.debug(
252
+ f"Could not fetch MP details for ID {member_id}: HTTP {response.status}"
253
+ )
254
+
236
255
  except Exception as e:
237
256
  logger.debug(f"Failed to fetch MP name for ID {member_id}: {e}")
238
-
257
+
239
258
  return ""
240
-
259
+
241
260
  def _parse_date(self, date_str: str) -> datetime:
242
261
  """Parse UK Parliament API date format"""
243
262
  try:
@@ -245,12 +264,12 @@ class UKParliamentScraper(BaseScraper):
245
264
  return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
246
265
  except Exception:
247
266
  return datetime.now()
248
-
267
+
249
268
  def _infer_transaction_type(self, description: str, category_name: str) -> TransactionType:
250
269
  """Infer transaction type from description and category"""
251
270
  description_lower = description.lower()
252
271
  category_lower = category_name.lower()
253
-
272
+
254
273
  # UK Parliament disclosures are mostly about holdings, not transactions
255
274
  # But we can infer some information
256
275
  if any(word in description_lower for word in ["sold", "disposed", "divested"]):
@@ -261,29 +280,33 @@ class UKParliamentScraper(BaseScraper):
261
280
  return TransactionType.PURCHASE # Assume shareholding disclosure is a purchase
262
281
  else:
263
282
  return TransactionType.PURCHASE # Default assumption
264
-
265
- def _extract_asset_info_from_fields(self, interest: Dict[str, Any], description: str, category_name: str) -> tuple[str, Optional[str]]:
283
+
284
+ def _extract_asset_info_from_fields(
285
+ self, interest: Dict[str, Any], description: str, category_name: str
286
+ ) -> tuple[str, Optional[str]]:
266
287
  """Extract asset name and ticker from interest fields"""
267
288
  # Look for OrganisationName in fields
268
289
  fields = interest.get("fields", [])
269
290
  organization_name = None
270
-
291
+
271
292
  for field in fields:
272
293
  if field.get("name") == "OrganisationName":
273
294
  organization_name = field.get("value")
274
295
  break
275
-
296
+
276
297
  # Use organization name if available, otherwise fall back to description
277
298
  if organization_name:
278
299
  return organization_name, None
279
300
  else:
280
301
  return self._extract_asset_info(description, category_name)
281
-
282
- def _extract_asset_info(self, description: str, category_name: str) -> tuple[str, Optional[str]]:
302
+
303
+ def _extract_asset_info(
304
+ self, description: str, category_name: str
305
+ ) -> tuple[str, Optional[str]]:
283
306
  """Extract asset name and ticker from description"""
284
307
  # UK descriptions often contain company names
285
308
  # This is a simplified extraction - could be enhanced with NLP
286
-
309
+
287
310
  if "shareholding" in category_name.lower():
288
311
  # Try to extract company name from shareholding descriptions
289
312
  # Format often like: "Shareholding in [Company Name] Ltd"
@@ -292,38 +315,41 @@ class UKParliamentScraper(BaseScraper):
292
315
  if len(parts) > 1:
293
316
  asset_name = parts[1].strip().rstrip(".")
294
317
  return asset_name, None
295
-
318
+
296
319
  # Fallback: use description as asset name
297
320
  return description[:100], None # Truncate to reasonable length
298
-
299
- def _extract_amount_info(self, description: str) -> tuple[Optional[float], Optional[float], Optional[float]]:
321
+
322
+ def _extract_amount_info(
323
+ self, description: str
324
+ ) -> tuple[Optional[float], Optional[float], Optional[float]]:
300
325
  """Extract amount information from description"""
301
326
  # UK Parliament disclosures often don't include specific amounts
302
327
  # They use threshold categories (£70,000+, etc.)
303
-
328
+
304
329
  description_lower = description.lower()
305
-
330
+
306
331
  # Look for UK threshold amounts
307
332
  if "£70,000" in description_lower or "70000" in description_lower:
308
333
  return 70000.0, None, None
309
334
  elif "£" in description_lower:
310
335
  # Try to extract specific amounts
311
336
  import re
312
- amount_pattern = r'£([\d,]+)'
337
+
338
+ amount_pattern = r"£([\d,]+)"
313
339
  matches = re.findall(amount_pattern, description)
314
340
  if matches:
315
341
  try:
316
- amount = float(matches[0].replace(',', ''))
342
+ amount = float(matches[0].replace(",", ""))
317
343
  return amount, None, amount
318
344
  except ValueError:
319
345
  pass
320
-
346
+
321
347
  return None, None, None
322
-
348
+
323
349
  async def get_politicians(self) -> List[Politician]:
324
350
  """Fetch current MPs from the Members API"""
325
351
  logger.info("Fetching current UK MPs")
326
-
352
+
327
353
  # For now, return empty list - would need Members API integration
328
354
  # This would require calling https://members-api.parliament.uk/
329
355
  return []
@@ -338,13 +364,15 @@ async def run_uk_parliament_collection(config) -> List[TradingDisclosure]:
338
364
  # Example usage for testing
339
365
  if __name__ == "__main__":
340
366
  from .config import WorkflowConfig
341
-
367
+
342
368
  async def main():
343
369
  config = WorkflowConfig.default()
344
370
  disclosures = await run_uk_parliament_collection(config.scraping)
345
371
  print(f"Collected {len(disclosures)} UK Parliament financial interests")
346
-
372
+
347
373
  for disclosure in disclosures[:3]: # Show first 3
348
- print(f"- {disclosure.asset_name} by {disclosure.raw_data.get('politician_name', 'Unknown')}")
349
-
350
- asyncio.run(main())
374
+ print(
375
+ f"- {disclosure.asset_name} by {disclosure.raw_data.get('politician_name', 'Unknown')}"
376
+ )
377
+
378
+ asyncio.run(main())