borsapy 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,534 @@
1
+ """KAP (Kamuyu Aydınlatma Platformu) provider for disclosures and calendar."""
2
+
3
+ import io
4
+ import re
5
+ import time
6
+ from datetime import datetime, timedelta
7
+
8
+ import pandas as pd
9
+
10
+ from borsapy._providers.base import BaseProvider
11
+ from borsapy.exceptions import APIError
12
+
13
+
14
+ class KAPProvider(BaseProvider):
15
+ """
16
+ Provider for KAP (Kamuyu Aydınlatma Platformu) data.
17
+
18
+ KAP is the official disclosure platform for publicly traded
19
+ companies in Turkey, similar to SEC EDGAR in the US.
20
+
21
+ Provides:
22
+ - List of all BIST companies with ticker codes
23
+ - Company search functionality
24
+ - Company disclosures (bildirimler)
25
+ - Expected disclosure calendar (beklenen bildirimler)
26
+ """
27
+
28
+ EXCEL_URL = "https://www.kap.org.tr/tr/api/company/generic/excel/IGS/A"
29
+ BIST_COMPANIES_URL = "https://www.kap.org.tr/tr/bist-sirketler"
30
+ DISCLOSURE_URL = "https://www.kap.org.tr/tr/bildirim-sorgu-sonuc"
31
+ CALENDAR_API_URL = "https://kap.org.tr/tr/api/expected-disclosure-inquiry/company"
32
+ COMPANY_INFO_URL = "https://kap.org.tr/tr/sirket-bilgileri/ozet"
33
+ COMPANY_GENERAL_URL = "https://kap.org.tr/tr/sirket-bilgileri/genel"
34
+ CACHE_DURATION = 86400 # 24 hours
35
+
36
+ def __init__(self):
37
+ super().__init__()
38
+ self._company_cache: pd.DataFrame | None = None
39
+ self._cache_time: float = 0
40
+ self._oid_map: dict[str, str] | None = None
41
+ self._oid_cache_time: float = 0
42
+ self._company_details_cache: dict[str, dict] = {}
43
+ self._company_details_cache_time: dict[str, float] = {}
44
+
45
+ def get_companies(self) -> pd.DataFrame:
46
+ """
47
+ Get list of all BIST companies.
48
+
49
+ Returns:
50
+ DataFrame with columns: ticker, name, city
51
+ """
52
+ current_time = time.time()
53
+
54
+ # Check cache
55
+ if (
56
+ self._company_cache is not None
57
+ and (current_time - self._cache_time) < self.CACHE_DURATION
58
+ ):
59
+ return self._company_cache
60
+
61
+ try:
62
+ headers = {
63
+ "Accept": "*/*",
64
+ "Accept-Language": "tr",
65
+ "User-Agent": self.DEFAULT_HEADERS["User-Agent"],
66
+ "Referer": "https://www.kap.org.tr/tr/bist-sirketler",
67
+ }
68
+
69
+ response = self._client.get(self.EXCEL_URL, headers=headers)
70
+ response.raise_for_status()
71
+
72
+ # Read Excel data
73
+ df = pd.read_excel(io.BytesIO(response.content))
74
+
75
+ companies = []
76
+ for _, row in df.iterrows():
77
+ if len(row) >= 3:
78
+ ticker_field = str(row.iloc[0]).strip() if pd.notna(row.iloc[0]) else ""
79
+ name = str(row.iloc[1]).strip() if pd.notna(row.iloc[1]) else ""
80
+ city = str(row.iloc[2]).strip() if pd.notna(row.iloc[2]) else ""
81
+
82
+ # Skip header or empty rows
83
+ if ticker_field and name and ticker_field not in ("BIST KODU", "Kod"):
84
+ # Handle multiple tickers (e.g., "GARAN, TGB")
85
+ if "," in ticker_field:
86
+ tickers = [t.strip() for t in ticker_field.split(",")]
87
+ for ticker in tickers:
88
+ if ticker:
89
+ companies.append(
90
+ {
91
+ "ticker": ticker,
92
+ "name": name,
93
+ "city": city,
94
+ }
95
+ )
96
+ else:
97
+ companies.append(
98
+ {
99
+ "ticker": ticker_field,
100
+ "name": name,
101
+ "city": city,
102
+ }
103
+ )
104
+
105
+ result = pd.DataFrame(companies)
106
+ self._company_cache = result
107
+ self._cache_time = current_time
108
+ return result
109
+
110
+ except Exception as e:
111
+ raise APIError(f"Failed to fetch company list: {e}") from e
112
+
113
+ def search(self, query: str) -> pd.DataFrame:
114
+ """
115
+ Search companies by name or ticker.
116
+
117
+ Args:
118
+ query: Search query (ticker code or company name)
119
+
120
+ Returns:
121
+ DataFrame with matching companies
122
+ """
123
+ if not query:
124
+ return pd.DataFrame(columns=["ticker", "name", "city"])
125
+
126
+ companies = self.get_companies()
127
+ if companies.empty:
128
+ return companies
129
+
130
+ query_normalized = self._normalize_text(query)
131
+ query_upper = query.upper()
132
+
133
+ # Score and filter results
134
+ results = []
135
+ for _, row in companies.iterrows():
136
+ score = 0
137
+ ticker = row["ticker"]
138
+ name = row["name"]
139
+
140
+ # Exact ticker match
141
+ if ticker.upper() == query_upper:
142
+ score = 1000
143
+ # Ticker starts with query
144
+ elif ticker.upper().startswith(query_upper):
145
+ score = 500
146
+ # Name contains query
147
+ elif query_normalized in self._normalize_text(name):
148
+ score = 100
149
+
150
+ if score > 0:
151
+ results.append((score, row))
152
+
153
+ # Sort by score descending
154
+ results.sort(key=lambda x: x[0], reverse=True)
155
+
156
+ if not results:
157
+ return pd.DataFrame(columns=["ticker", "name", "city"])
158
+
159
+ return pd.DataFrame([r[1] for r in results])
160
+
161
+ def _normalize_text(self, text: str) -> str:
162
+ """Normalize Turkish text for comparison."""
163
+ tr_map = str.maketrans("İıÖöÜüŞşÇçĞğ", "iioouussccgg")
164
+ normalized = text.translate(tr_map).lower()
165
+ # Remove common suffixes
166
+ normalized = re.sub(r"[\.,']|\s+a\.s\.?|\s+anonim sirketi", "", normalized)
167
+ return normalized.strip()
168
+
169
+ def get_member_oid(self, symbol: str) -> str | None:
170
+ """
171
+ Get KAP member OID (mkkMemberOid) for a stock symbol.
172
+
173
+ The member OID is required to query disclosures from KAP.
174
+
175
+ Args:
176
+ symbol: Stock symbol (e.g., "THYAO").
177
+
178
+ Returns:
179
+ Member OID string or None if not found.
180
+ """
181
+ symbol = symbol.upper().replace(".IS", "").replace(".E", "")
182
+ current_time = time.time()
183
+
184
+ # Check cache
185
+ if (
186
+ self._oid_map is not None
187
+ and (current_time - self._oid_cache_time) < self.CACHE_DURATION
188
+ ):
189
+ return self._oid_map.get(symbol)
190
+
191
+ # Fetch BIST companies list from KAP
192
+ try:
193
+ response = self._client.get(self.BIST_COMPANIES_URL, timeout=20)
194
+ response.raise_for_status()
195
+
196
+ # Parse mkkMemberOid and stockCode pairs from Next.js data
197
+ # Format: \"mkkMemberOid\":\"xxx\",\"kapMemberTitle\":\"...\",
198
+ # \"relatedMemberTitle\":\"...\",\"stockCode\":\"THYAO\",...
199
+ # Note: stockCode may contain multiple codes like "GARAN, TGB"
200
+ pattern = (
201
+ r'\\"mkkMemberOid\\":\\"([^\\"]+)\\",'
202
+ r'\\"kapMemberTitle\\":\\"[^\\"]+\\",'
203
+ r'\\"relatedMemberTitle\\":\\"[^\\"]*\\",'
204
+ r'\\"stockCode\\":\\"([^\\"]+)\\"'
205
+ )
206
+ matches = re.findall(pattern, response.text)
207
+
208
+ # Build mapping: stockCode -> mkkMemberOid
209
+ # Handle multiple codes per company (e.g., "GARAN, TGB")
210
+ self._oid_map = {}
211
+ for oid, codes_str in matches:
212
+ for code in codes_str.split(","):
213
+ code = code.strip()
214
+ if code:
215
+ self._oid_map[code] = oid
216
+
217
+ self._oid_cache_time = current_time
218
+ return self._oid_map.get(symbol)
219
+
220
+ except Exception:
221
+ return None
222
+
223
+ def get_disclosures(self, symbol: str, limit: int = 20) -> pd.DataFrame:
224
+ """
225
+ Get KAP disclosures (bildirimler) for a stock.
226
+
227
+ Args:
228
+ symbol: Stock symbol (e.g., "THYAO").
229
+ limit: Maximum number of disclosures to return (default: 20).
230
+
231
+ Returns:
232
+ DataFrame with columns: Date, Title, URL.
233
+ """
234
+ symbol = symbol.upper().replace(".IS", "").replace(".E", "")
235
+
236
+ # Get KAP member OID for the symbol
237
+ member_oid = self.get_member_oid(symbol)
238
+ if not member_oid:
239
+ return pd.DataFrame(columns=["Date", "Title", "URL"])
240
+
241
+ # Fetch disclosures from KAP
242
+ disc_url = f"{self.DISCLOSURE_URL}?member={member_oid}"
243
+
244
+ try:
245
+ response = self._client.get(disc_url, timeout=15)
246
+ response.raise_for_status()
247
+
248
+ # Parse disclosures from Next.js embedded data
249
+ # Format: publishDate\\":\\"29.12.2025 19:21:18\\",\\"disclosureIndex\\":1530826...
250
+ pattern = (
251
+ r'publishDate\\":\\"([^\\"]+)\\".*?'
252
+ r'disclosureIndex\\":(\d+).*?'
253
+ r'title\\":\\"([^\\"]+)\\"'
254
+ )
255
+ matches = re.findall(pattern, response.text, re.DOTALL)
256
+
257
+ records = []
258
+ for date, idx, title in matches[:limit]:
259
+ url = f"https://www.kap.org.tr/tr/Bildirim/{idx}"
260
+ records.append({
261
+ "Date": date,
262
+ "Title": title,
263
+ "URL": url,
264
+ })
265
+
266
+ return pd.DataFrame(records)
267
+
268
+ except Exception as e:
269
+ raise APIError(f"Failed to fetch disclosures for {symbol}: {e}") from e
270
+
271
+ def get_calendar(self, symbol: str) -> pd.DataFrame:
272
+ """
273
+ Get expected disclosure calendar for a stock from KAP.
274
+
275
+ Returns upcoming expected disclosures like financial reports,
276
+ annual reports, sustainability reports, and corporate governance reports.
277
+
278
+ Args:
279
+ symbol: Stock symbol (e.g., "THYAO").
280
+
281
+ Returns:
282
+ DataFrame with columns:
283
+ - StartDate: Expected disclosure window start
284
+ - EndDate: Expected disclosure window end
285
+ - Subject: Type of disclosure (e.g., "Finansal Rapor")
286
+ - Period: Report period (e.g., "Yıllık", "3 Aylık")
287
+ - Year: Fiscal year
288
+ """
289
+ symbol = symbol.upper().replace(".IS", "").replace(".E", "")
290
+
291
+ # Get KAP member OID for the symbol
292
+ member_oid = self.get_member_oid(symbol)
293
+ if not member_oid:
294
+ return pd.DataFrame(columns=["StartDate", "EndDate", "Subject", "Period", "Year"])
295
+
296
+ # Calculate date range: today to 6 months from now
297
+ now = datetime.now()
298
+ start_date = now.strftime("%Y-%m-%d")
299
+ end_date = (now + timedelta(days=180)).strftime("%Y-%m-%d")
300
+
301
+ # Fetch expected disclosures from KAP API
302
+ headers = {
303
+ "Accept": "*/*",
304
+ "Content-Type": "application/json",
305
+ "Origin": "https://kap.org.tr",
306
+ "Referer": "https://kap.org.tr/tr/beklenen-bildirim-sorgu",
307
+ }
308
+ payload = {
309
+ "startDate": start_date,
310
+ "endDate": end_date,
311
+ "memberTypes": ["IGS"],
312
+ "mkkMemberOidList": [member_oid],
313
+ "disclosureClass": "",
314
+ "subjects": [],
315
+ "mainSector": "",
316
+ "sector": "",
317
+ "subSector": "",
318
+ "market": "",
319
+ "index": "",
320
+ "year": "",
321
+ "term": "",
322
+ "ruleType": "",
323
+ }
324
+
325
+ try:
326
+ response = self._client.post(
327
+ self.CALENDAR_API_URL,
328
+ json=payload,
329
+ headers=headers,
330
+ timeout=15,
331
+ )
332
+ response.raise_for_status()
333
+ data = response.json()
334
+
335
+ records = []
336
+ for item in data:
337
+ records.append({
338
+ "StartDate": item.get("startDate", ""),
339
+ "EndDate": item.get("endDate", ""),
340
+ "Subject": item.get("subject", ""),
341
+ "Period": item.get("ruleTypeTerm", ""),
342
+ "Year": item.get("year", ""),
343
+ })
344
+
345
+ return pd.DataFrame(records)
346
+
347
+ except Exception as e:
348
+ raise APIError(f"Failed to fetch calendar for {symbol}: {e}") from e
349
+
350
+ def get_company_details(self, symbol: str) -> dict:
351
+ """
352
+ Get company details from KAP company info page.
353
+
354
+ Scrapes the KAP company page for sector, market, and website information.
355
+
356
+ Args:
357
+ symbol: Stock symbol (e.g., "THYAO").
358
+
359
+ Returns:
360
+ Dict with keys:
361
+ - sector: Company sector (e.g., "ULAŞTIRMA VE DEPOLAMA")
362
+ - market: Stock market (e.g., "YILDIZ PAZAR")
363
+ - website: Company website URL(s)
364
+ """
365
+ symbol = symbol.upper().replace(".IS", "").replace(".E", "")
366
+ current_time = time.time()
367
+
368
+ # Check cache
369
+ if symbol in self._company_details_cache:
370
+ cache_time = self._company_details_cache_time.get(symbol, 0)
371
+ if (current_time - cache_time) < self.CACHE_DURATION:
372
+ return self._company_details_cache[symbol]
373
+
374
+ # Get KAP member OID for the symbol
375
+ member_oid = self.get_member_oid(symbol)
376
+ if not member_oid:
377
+ return {}
378
+
379
+ # Fetch company info page
380
+ url = f"{self.COMPANY_INFO_URL}/{member_oid}"
381
+
382
+ try:
383
+ response = self._client.get(url, timeout=15)
384
+ response.raise_for_status()
385
+ html = response.text
386
+
387
+ result = {}
388
+
389
+ # Extract sector: href="/tr/Sektorler?sector=...">SECTOR_NAME</a>
390
+ sector_match = re.search(
391
+ r'href="/tr/Sektorler\?sector=[^"]*">([^<]+)</a>',
392
+ html
393
+ )
394
+ if sector_match:
395
+ result["sector"] = sector_match.group(1).strip()
396
+
397
+ # Extract market: href="/tr/Pazarlar?market=...">MARKET_NAME</a>
398
+ market_match = re.search(
399
+ r'href="/tr/Pazarlar\?market=[^"]*">([^<]+)</a>',
400
+ html
401
+ )
402
+ if market_match:
403
+ result["market"] = market_match.group(1).strip()
404
+
405
+ # Extract website: after "İnternet Adresi" label
406
+ # Pattern: <h3...>İnternet Adresi</h3><p class="...">WEBSITE</p>
407
+ website_match = re.search(
408
+ r'İnternet Adresi</h3><p[^>]*>([^<]+)</p>',
409
+ html
410
+ )
411
+ if website_match:
412
+ result["website"] = website_match.group(1).strip()
413
+
414
+ # Get business summary from genel page
415
+ business_summary = self._get_business_summary(member_oid)
416
+ if business_summary:
417
+ result["businessSummary"] = business_summary
418
+
419
+ # Cache result
420
+ self._company_details_cache[symbol] = result
421
+ self._company_details_cache_time[symbol] = current_time
422
+
423
+ return result
424
+
425
+ except Exception:
426
+ return {}
427
+
428
+ def _get_business_summary(self, member_oid: str) -> str | None:
429
+ """Get business summary (Faaliyet Konusu) from KAP genel page."""
430
+ url = f"{self.COMPANY_GENERAL_URL}/{member_oid}"
431
+
432
+ try:
433
+ response = self._client.get(url, timeout=15)
434
+ if response.status_code != 200:
435
+ return None
436
+
437
+ text = response.text
438
+
439
+ # Find the faaliyet_konu section
440
+ key_idx = text.find("kpy41_acc2_faaliyet_konu")
441
+ if key_idx < 0:
442
+ return None
443
+
444
+ # Extract chunk - stop at next section (kpy41_acc2_sure or similar)
445
+ remaining = text[key_idx:]
446
+ next_section = re.search(r'kpy41_acc2_(?!faaliyet)', remaining)
447
+ if next_section:
448
+ chunk = remaining[: next_section.start()]
449
+ else:
450
+ chunk = remaining[:5000]
451
+
452
+ # Format 1: HTML-encoded value field (e.g., AKBNK, THYAO)
453
+ # Pattern: value\":\"...\",\"disclosureIndex
454
+ value_match = re.search(
455
+ r'value\\":\\"(.*?)\\",\\"disclosureIndex', chunk, re.DOTALL
456
+ )
457
+ if value_match:
458
+ value = value_match.group(1)
459
+ if "u003e" in value:
460
+ # Decode HTML entities
461
+ decoded = value.replace("\\u003e", ">").replace("\\u003c", "<")
462
+ decoded = decoded.replace("u003e", ">").replace("u003c", "<")
463
+ # Extract text between > and <
464
+ text_parts = re.findall(r">([^<>]+)<", decoded)
465
+ meaningful = [
466
+ t.replace("\\n", " ").replace("\\\\n", " ").strip()
467
+ for t in text_parts
468
+ if len(t.strip()) > 10
469
+ ]
470
+ if meaningful:
471
+ summary = " ".join(meaningful)
472
+ # Clean up escape sequences
473
+ summary = summary.replace("\\ ", " ").replace("\\", "")
474
+ summary = re.sub(r"\s+", " ", summary).strip()
475
+ if len(summary) > 10:
476
+ return summary
477
+
478
+ # Format 2: React children patterns
479
+ texts = []
480
+ skip_texts = ("Bulunmamaktadır.", "-", "Şirketin Süresi")
481
+
482
+ # Children array: \"children\":[\"text\"]
483
+ array_matches = re.findall(r'\\"children\\":\[\\"([^\\"]{10,})\\"', chunk)
484
+ for m in array_matches:
485
+ if not m.startswith("$") and m not in skip_texts:
486
+ texts.append(m)
487
+
488
+ # Children string: \"children\":\"text\"
489
+ string_matches = re.findall(r'\\"children\\":\\"([^\\"]{10,})\\"', chunk)
490
+ for m in string_matches:
491
+ if not m.startswith("$") and m not in skip_texts:
492
+ texts.append(m)
493
+
494
+ if texts:
495
+ summary = " ".join(texts)
496
+ summary = summary.replace("\\n", " ").strip()
497
+ summary = re.sub(r"\s+", " ", summary)
498
+ if len(summary) > 10:
499
+ return summary
500
+
501
+ return None
502
+ except Exception:
503
+ return None
504
+
505
+ def get_disclosure_content(self, disclosure_id: int | str) -> str | None:
506
+ """
507
+ Get disclosure HTML content by ID.
508
+
509
+ Args:
510
+ disclosure_id: KAP disclosure ID (e.g., 1530826).
511
+
512
+ Returns:
513
+ Raw HTML body content or None if failed.
514
+ """
515
+ url = f"https://www.kap.org.tr/tr/Bildirim/{disclosure_id}"
516
+
517
+ try:
518
+ response = self._client.get(url, timeout=15)
519
+ response.raise_for_status()
520
+ return response.text
521
+ except Exception:
522
+ return None
523
+
524
+
525
+ # Singleton
526
+ _provider: KAPProvider | None = None
527
+
528
+
529
+ def get_kap_provider() -> KAPProvider:
530
+ """Get singleton provider instance."""
531
+ global _provider
532
+ if _provider is None:
533
+ _provider = KAPProvider()
534
+ return _provider