mcli-framework 7.10.0__py3-none-any.whl → 7.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (42) hide show
  1. mcli/lib/custom_commands.py +10 -0
  2. mcli/lib/optional_deps.py +240 -0
  3. mcli/ml/backtesting/run.py +5 -3
  4. mcli/ml/models/ensemble_models.py +1 -0
  5. mcli/ml/models/recommendation_models.py +1 -0
  6. mcli/ml/optimization/optimize.py +6 -4
  7. mcli/ml/serving/serve.py +2 -2
  8. mcli/ml/training/train.py +14 -7
  9. mcli/self/completion_cmd.py +2 -2
  10. mcli/workflow/doc_convert.py +82 -112
  11. mcli/workflow/git_commit/ai_service.py +13 -2
  12. mcli/workflow/notebook/converter.py +375 -0
  13. mcli/workflow/notebook/notebook_cmd.py +441 -0
  14. mcli/workflow/notebook/schema.py +402 -0
  15. mcli/workflow/notebook/validator.py +313 -0
  16. mcli/workflow/workflow.py +14 -0
  17. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/METADATA +37 -3
  18. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/RECORD +22 -37
  19. mcli/ml/features/political_features.py +0 -677
  20. mcli/ml/preprocessing/politician_trading_preprocessor.py +0 -570
  21. mcli/workflow/politician_trading/config.py +0 -134
  22. mcli/workflow/politician_trading/connectivity.py +0 -492
  23. mcli/workflow/politician_trading/data_sources.py +0 -654
  24. mcli/workflow/politician_trading/database.py +0 -412
  25. mcli/workflow/politician_trading/demo.py +0 -249
  26. mcli/workflow/politician_trading/models.py +0 -327
  27. mcli/workflow/politician_trading/monitoring.py +0 -413
  28. mcli/workflow/politician_trading/scrapers.py +0 -1074
  29. mcli/workflow/politician_trading/scrapers_california.py +0 -434
  30. mcli/workflow/politician_trading/scrapers_corporate_registry.py +0 -797
  31. mcli/workflow/politician_trading/scrapers_eu.py +0 -376
  32. mcli/workflow/politician_trading/scrapers_free_sources.py +0 -509
  33. mcli/workflow/politician_trading/scrapers_third_party.py +0 -373
  34. mcli/workflow/politician_trading/scrapers_uk.py +0 -378
  35. mcli/workflow/politician_trading/scrapers_us_states.py +0 -471
  36. mcli/workflow/politician_trading/seed_database.py +0 -520
  37. mcli/workflow/politician_trading/supabase_functions.py +0 -354
  38. mcli/workflow/politician_trading/workflow.py +0 -879
  39. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/WHEEL +0 -0
  40. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/entry_points.txt +0 -0
  41. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/licenses/LICENSE +0 -0
  42. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/top_level.txt +0 -0
@@ -1,378 +0,0 @@
1
- """
2
- UK Parliament API scraper for financial interests register data
3
-
4
- This module implements scrapers for the UK Parliament's Register of Interests API
5
- to collect MP financial disclosure data.
6
- """
7
-
8
- import asyncio
9
- import logging
10
- from datetime import datetime
11
- from typing import Any, Dict, List, Optional
12
-
13
- import aiohttp
14
-
15
- from .models import Politician, PoliticianRole, TradingDisclosure, TransactionType
16
- from .scrapers import BaseScraper
17
-
18
- logger = logging.getLogger(__name__)
19
-
20
-
21
- class UKParliamentScraper(BaseScraper):
22
- """Scraper for UK Parliament Register of Interests API"""
23
-
24
- def __init__(self, config):
25
- super().__init__(config)
26
- self.base_url = "https://interests-api.parliament.uk/api/v1"
27
- self.session: Optional[aiohttp.ClientSession] = None
28
-
29
- async def __aenter__(self):
30
- """Async context manager entry"""
31
- self.session = aiohttp.ClientSession(
32
- timeout=aiohttp.ClientTimeout(total=self.config.timeout),
33
- headers={"User-Agent": self.config.user_agent},
34
- )
35
- return self
36
-
37
- async def __aexit__(self, exc_type, exc_val, exc_tb):
38
- """Async context manager exit"""
39
- if self.session:
40
- await self.session.close()
41
-
42
- async def fetch_members_interests(self) -> List[TradingDisclosure]:
43
- """Fetch all MP financial interests from the API"""
44
- logger.info("Starting UK Parliament financial interests collection")
45
-
46
- if not self.session:
47
- raise RuntimeError("Session not initialized. Use async context manager.")
48
-
49
- disclosures = []
50
-
51
- try:
52
- # First, get all interest categories to understand what types of interests exist
53
- categories = await self._fetch_categories()
54
- logger.info(f"Found {len(categories)} interest categories")
55
-
56
- # Get all interests for financial/investment categories
57
- financial_categories = self._filter_financial_categories(categories)
58
-
59
- for category in financial_categories:
60
- category_disclosures = await self._fetch_interests_by_category(category)
61
- disclosures.extend(category_disclosures)
62
-
63
- # Rate limiting
64
- await asyncio.sleep(self.config.request_delay)
65
-
66
- logger.info(f"Collected {len(disclosures)} UK Parliament financial interests")
67
- return disclosures
68
-
69
- except Exception as e:
70
- logger.error(f"Failed to fetch UK Parliament interests: {e}")
71
- raise
72
-
73
- async def _fetch_categories(self) -> List[Dict[str, Any]]:
74
- """Fetch all interest categories from the API"""
75
- url = f"{self.base_url}/Categories"
76
- params = {"Take": 100} # Get up to 100 categories
77
-
78
- async with self.session.get(url, params=params) as response:
79
- response.raise_for_status()
80
- data = await response.json()
81
- return data.get("items", [])
82
-
83
- def _filter_financial_categories(
84
- self, categories: List[Dict[str, Any]]
85
- ) -> List[Dict[str, Any]]:
86
- """Filter categories to include only financial/investment related ones"""
87
- financial_keywords = [
88
- "shareholding",
89
- "share",
90
- "investment",
91
- "financial",
92
- "company",
93
- "directorship",
94
- "employment",
95
- "remuneration",
96
- "sponsorship",
97
- "gift",
98
- "benefit",
99
- "land",
100
- "property",
101
- ]
102
-
103
- financial_categories = []
104
- for category in categories:
105
- category_name = category.get("name", "").lower()
106
- if any(keyword in category_name for keyword in financial_keywords):
107
- financial_categories.append(category)
108
- logger.debug(f"Including financial category: {category.get('name')}")
109
-
110
- return financial_categories
111
-
112
- async def _fetch_interests_by_category(
113
- self, category: Dict[str, Any]
114
- ) -> List[TradingDisclosure]:
115
- """Fetch interests for a specific category"""
116
- category_id = category.get("id")
117
- category_name = category.get("name")
118
-
119
- logger.debug(f"Fetching interests for category: {category_name} (ID: {category_id})")
120
-
121
- disclosures = []
122
- skip = 0
123
- take = 50
124
-
125
- while True:
126
- url = f"{self.base_url}/Interests"
127
- params = {"categoryId": category_id, "Skip": skip, "Take": take}
128
-
129
- try:
130
- async with self.session.get(url, params=params) as response:
131
- response.raise_for_status()
132
- data = await response.json()
133
-
134
- interests = data.get("items", [])
135
- if not interests:
136
- break
137
-
138
- for interest in interests:
139
- disclosure = await self._parse_uk_interest(interest, category_name)
140
- if disclosure:
141
- disclosures.append(disclosure)
142
-
143
- skip += take
144
-
145
- # If we got fewer results than requested, we're done
146
- if len(interests) < take:
147
- break
148
-
149
- except Exception as e:
150
- logger.error(f"Failed to fetch interests for category {category_name}: {e}")
151
- break
152
-
153
- logger.debug(f"Found {len(disclosures)} interests in category: {category_name}")
154
- return disclosures
155
-
156
- async def _parse_uk_interest(
157
- self, interest: Dict[str, Any], category_name: str
158
- ) -> Optional[TradingDisclosure]:
159
- """Parse a UK Parliament interest into a TradingDisclosure"""
160
- try:
161
- # Extract member information from the new API structure
162
- member_data = interest.get("member")
163
- if not member_data:
164
- return None
165
-
166
- member_id = member_data.get("id")
167
- politician_name = member_data.get("nameDisplayAs", "")
168
-
169
- # Get interest details
170
- interest_id = interest.get("id")
171
- description = interest.get("summary", "")
172
- registered_date = interest.get("registrationDate")
173
-
174
- # Parse dates
175
- transaction_date = (
176
- self._parse_date(registered_date) if registered_date else datetime.now()
177
- )
178
- disclosure_date = transaction_date # UK system doesn't separate these
179
-
180
- # Determine transaction type from description
181
- transaction_type = self._infer_transaction_type(description, category_name)
182
-
183
- # Extract asset information from fields and description
184
- asset_name, asset_ticker = self._extract_asset_info_from_fields(
185
- interest, description, category_name
186
- )
187
-
188
- # Extract amount information (if available)
189
- amount_min, amount_max, amount_exact = self._extract_amount_info(description)
190
-
191
- disclosure = TradingDisclosure(
192
- id=f"uk_parliament_{interest_id}",
193
- politician_id="", # Will be filled during politician matching
194
- transaction_date=transaction_date,
195
- disclosure_date=disclosure_date,
196
- transaction_type=transaction_type,
197
- asset_name=asset_name,
198
- asset_ticker=asset_ticker,
199
- asset_type="shareholding", # Most UK disclosures are shareholdings
200
- amount_range_min=amount_min,
201
- amount_range_max=amount_max,
202
- amount_exact=amount_exact,
203
- source_url=f"https://www.parliament.uk/mps-lords-and-offices/standards-and-financial-interests/",
204
- raw_data={
205
- "uk_interest_id": interest_id,
206
- "uk_member_id": member_id,
207
- "description": description,
208
- "category_name": category_name,
209
- "registered_date": registered_date,
210
- "source": "uk_parliament_api",
211
- "politician_name": politician_name,
212
- },
213
- )
214
-
215
- return disclosure
216
-
217
- except Exception as e:
218
- logger.error(f"Failed to parse UK interest: {e}")
219
- return None
220
-
221
- async def _fetch_mp_name(self, member_id: int) -> str:
222
- """Fetch MP name from the Parliament API using member ID"""
223
- if not self.session:
224
- return ""
225
-
226
- try:
227
- # Try the Members endpoint to get MP details
228
- member_url = f"{self.base_url}/Members/{member_id}"
229
-
230
- async with self.session.get(member_url) as response:
231
- if response.status == 200:
232
- data = await response.json()
233
-
234
- # Extract name from the response
235
- name = data.get("name", "")
236
- if not name:
237
- # Try alternative field names
238
- name = data.get("displayAs", "")
239
- if not name:
240
- # Combine first and last name if available
241
- first_name = data.get("nameGiven", "")
242
- last_name = data.get("nameFull", "") or data.get("nameFamily", "")
243
- if first_name and last_name:
244
- name = f"{first_name} {last_name}"
245
-
246
- if name:
247
- logger.debug(f"Found MP name for ID {member_id}: {name}")
248
- return name.strip()
249
-
250
- else:
251
- logger.debug(
252
- f"Could not fetch MP details for ID {member_id}: HTTP {response.status}"
253
- )
254
-
255
- except Exception as e:
256
- logger.debug(f"Failed to fetch MP name for ID {member_id}: {e}")
257
-
258
- return ""
259
-
260
- def _parse_date(self, date_str: str) -> datetime:
261
- """Parse UK Parliament API date format"""
262
- try:
263
- # UK Parliament API uses ISO format
264
- return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
265
- except Exception:
266
- return datetime.now()
267
-
268
- def _infer_transaction_type(self, description: str, category_name: str) -> TransactionType:
269
- """Infer transaction type from description and category"""
270
- description_lower = description.lower()
271
- category_lower = category_name.lower()
272
-
273
- # UK Parliament disclosures are mostly about holdings, not transactions
274
- # But we can infer some information
275
- if any(word in description_lower for word in ["sold", "disposed", "divested"]):
276
- return TransactionType.SALE
277
- elif any(word in description_lower for word in ["acquired", "purchased", "bought"]):
278
- return TransactionType.PURCHASE
279
- elif "shareholding" in category_lower:
280
- return TransactionType.PURCHASE # Assume shareholding disclosure is a purchase
281
- else:
282
- return TransactionType.PURCHASE # Default assumption
283
-
284
- def _extract_asset_info_from_fields(
285
- self, interest: Dict[str, Any], description: str, category_name: str
286
- ) -> tuple[str, Optional[str]]:
287
- """Extract asset name and ticker from interest fields"""
288
- # Look for OrganisationName in fields
289
- fields = interest.get("fields", [])
290
- organization_name = None
291
-
292
- for field in fields:
293
- if field.get("name") == "OrganisationName":
294
- organization_name = field.get("value")
295
- break
296
-
297
- # Use organization name if available, otherwise fall back to description
298
- if organization_name:
299
- return organization_name, None
300
- else:
301
- return self._extract_asset_info(description, category_name)
302
-
303
- def _extract_asset_info(
304
- self, description: str, category_name: str
305
- ) -> tuple[str, Optional[str]]:
306
- """Extract asset name and ticker from description"""
307
- # UK descriptions often contain company names
308
- # This is a simplified extraction - could be enhanced with NLP
309
-
310
- if "shareholding" in category_name.lower():
311
- # Try to extract company name from shareholding descriptions
312
- # Format often like: "Shareholding in [Company Name] Ltd"
313
- if " in " in description:
314
- parts = description.split(" in ", 1)
315
- if len(parts) > 1:
316
- asset_name = parts[1].strip().rstrip(".")
317
- return asset_name, None
318
-
319
- # Fallback: use description as asset name
320
- return description[:100], None # Truncate to reasonable length
321
-
322
- def _extract_amount_info(
323
- self, description: str
324
- ) -> tuple[Optional[float], Optional[float], Optional[float]]:
325
- """Extract amount information from description"""
326
- # UK Parliament disclosures often don't include specific amounts
327
- # They use threshold categories (£70,000+, etc.)
328
-
329
- description_lower = description.lower()
330
-
331
- # Look for UK threshold amounts
332
- if "£70,000" in description_lower or "70000" in description_lower:
333
- return 70000.0, None, None
334
- elif "£" in description_lower:
335
- # Try to extract specific amounts
336
- import re
337
-
338
- amount_pattern = r"£([\d,]+)"
339
- matches = re.findall(amount_pattern, description)
340
- if matches:
341
- try:
342
- amount = float(matches[0].replace(",", ""))
343
- return amount, None, amount
344
- except ValueError:
345
- pass
346
-
347
- return None, None, None
348
-
349
- async def get_politicians(self) -> List[Politician]:
350
- """Fetch current MPs from the Members API"""
351
- logger.info("Fetching current UK MPs")
352
-
353
- # For now, return empty list - would need Members API integration
354
- # This would require calling https://members-api.parliament.uk/
355
- return []
356
-
357
-
358
- async def run_uk_parliament_collection(config) -> List[TradingDisclosure]:
359
- """Main function to run UK Parliament data collection"""
360
- async with UKParliamentScraper(config) as scraper:
361
- return await scraper.fetch_members_interests()
362
-
363
-
364
- # Example usage for testing
365
- if __name__ == "__main__":
366
- from .config import WorkflowConfig
367
-
368
- async def main():
369
- config = WorkflowConfig.default()
370
- disclosures = await run_uk_parliament_collection(config.scraping)
371
- print(f"Collected {len(disclosures)} UK Parliament financial interests")
372
-
373
- for disclosure in disclosures[:3]: # Show first 3
374
- print(
375
- f"- {disclosure.asset_name} by {disclosure.raw_data.get('politician_name', 'Unknown')}"
376
- )
377
-
378
- asyncio.run(main())