borsapy 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- borsapy/__init__.py +134 -0
- borsapy/_models/__init__.py +1 -0
- borsapy/_providers/__init__.py +5 -0
- borsapy/_providers/base.py +94 -0
- borsapy/_providers/bist_index.py +150 -0
- borsapy/_providers/btcturk.py +230 -0
- borsapy/_providers/canlidoviz.py +773 -0
- borsapy/_providers/dovizcom.py +869 -0
- borsapy/_providers/dovizcom_calendar.py +276 -0
- borsapy/_providers/dovizcom_tahvil.py +172 -0
- borsapy/_providers/hedeffiyat.py +376 -0
- borsapy/_providers/isin.py +247 -0
- borsapy/_providers/isyatirim.py +943 -0
- borsapy/_providers/isyatirim_screener.py +468 -0
- borsapy/_providers/kap.py +534 -0
- borsapy/_providers/paratic.py +278 -0
- borsapy/_providers/tcmb.py +317 -0
- borsapy/_providers/tefas.py +802 -0
- borsapy/_providers/viop.py +204 -0
- borsapy/bond.py +162 -0
- borsapy/cache.py +86 -0
- borsapy/calendar.py +272 -0
- borsapy/crypto.py +153 -0
- borsapy/exceptions.py +64 -0
- borsapy/fund.py +471 -0
- borsapy/fx.py +388 -0
- borsapy/index.py +285 -0
- borsapy/inflation.py +166 -0
- borsapy/market.py +53 -0
- borsapy/multi.py +227 -0
- borsapy/screener.py +365 -0
- borsapy/ticker.py +1196 -0
- borsapy/viop.py +162 -0
- borsapy-0.4.0.dist-info/METADATA +969 -0
- borsapy-0.4.0.dist-info/RECORD +37 -0
- borsapy-0.4.0.dist-info/WHEEL +4 -0
- borsapy-0.4.0.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,534 @@
|
|
|
1
|
+
"""KAP (Kamuyu Aydınlatma Platformu) provider for disclosures and calendar."""
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import re
|
|
5
|
+
import time
|
|
6
|
+
from datetime import datetime, timedelta
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
from borsapy._providers.base import BaseProvider
|
|
11
|
+
from borsapy.exceptions import APIError
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class KAPProvider(BaseProvider):
|
|
15
|
+
"""
|
|
16
|
+
Provider for KAP (Kamuyu Aydınlatma Platformu) data.
|
|
17
|
+
|
|
18
|
+
KAP is the official disclosure platform for publicly traded
|
|
19
|
+
companies in Turkey, similar to SEC EDGAR in the US.
|
|
20
|
+
|
|
21
|
+
Provides:
|
|
22
|
+
- List of all BIST companies with ticker codes
|
|
23
|
+
- Company search functionality
|
|
24
|
+
- Company disclosures (bildirimler)
|
|
25
|
+
- Expected disclosure calendar (beklenen bildirimler)
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
EXCEL_URL = "https://www.kap.org.tr/tr/api/company/generic/excel/IGS/A"
|
|
29
|
+
BIST_COMPANIES_URL = "https://www.kap.org.tr/tr/bist-sirketler"
|
|
30
|
+
DISCLOSURE_URL = "https://www.kap.org.tr/tr/bildirim-sorgu-sonuc"
|
|
31
|
+
CALENDAR_API_URL = "https://kap.org.tr/tr/api/expected-disclosure-inquiry/company"
|
|
32
|
+
COMPANY_INFO_URL = "https://kap.org.tr/tr/sirket-bilgileri/ozet"
|
|
33
|
+
COMPANY_GENERAL_URL = "https://kap.org.tr/tr/sirket-bilgileri/genel"
|
|
34
|
+
CACHE_DURATION = 86400 # 24 hours
|
|
35
|
+
|
|
36
|
+
def __init__(self):
|
|
37
|
+
super().__init__()
|
|
38
|
+
self._company_cache: pd.DataFrame | None = None
|
|
39
|
+
self._cache_time: float = 0
|
|
40
|
+
self._oid_map: dict[str, str] | None = None
|
|
41
|
+
self._oid_cache_time: float = 0
|
|
42
|
+
self._company_details_cache: dict[str, dict] = {}
|
|
43
|
+
self._company_details_cache_time: dict[str, float] = {}
|
|
44
|
+
|
|
45
|
+
def get_companies(self) -> pd.DataFrame:
|
|
46
|
+
"""
|
|
47
|
+
Get list of all BIST companies.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
DataFrame with columns: ticker, name, city
|
|
51
|
+
"""
|
|
52
|
+
current_time = time.time()
|
|
53
|
+
|
|
54
|
+
# Check cache
|
|
55
|
+
if (
|
|
56
|
+
self._company_cache is not None
|
|
57
|
+
and (current_time - self._cache_time) < self.CACHE_DURATION
|
|
58
|
+
):
|
|
59
|
+
return self._company_cache
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
headers = {
|
|
63
|
+
"Accept": "*/*",
|
|
64
|
+
"Accept-Language": "tr",
|
|
65
|
+
"User-Agent": self.DEFAULT_HEADERS["User-Agent"],
|
|
66
|
+
"Referer": "https://www.kap.org.tr/tr/bist-sirketler",
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
response = self._client.get(self.EXCEL_URL, headers=headers)
|
|
70
|
+
response.raise_for_status()
|
|
71
|
+
|
|
72
|
+
# Read Excel data
|
|
73
|
+
df = pd.read_excel(io.BytesIO(response.content))
|
|
74
|
+
|
|
75
|
+
companies = []
|
|
76
|
+
for _, row in df.iterrows():
|
|
77
|
+
if len(row) >= 3:
|
|
78
|
+
ticker_field = str(row.iloc[0]).strip() if pd.notna(row.iloc[0]) else ""
|
|
79
|
+
name = str(row.iloc[1]).strip() if pd.notna(row.iloc[1]) else ""
|
|
80
|
+
city = str(row.iloc[2]).strip() if pd.notna(row.iloc[2]) else ""
|
|
81
|
+
|
|
82
|
+
# Skip header or empty rows
|
|
83
|
+
if ticker_field and name and ticker_field not in ("BIST KODU", "Kod"):
|
|
84
|
+
# Handle multiple tickers (e.g., "GARAN, TGB")
|
|
85
|
+
if "," in ticker_field:
|
|
86
|
+
tickers = [t.strip() for t in ticker_field.split(",")]
|
|
87
|
+
for ticker in tickers:
|
|
88
|
+
if ticker:
|
|
89
|
+
companies.append(
|
|
90
|
+
{
|
|
91
|
+
"ticker": ticker,
|
|
92
|
+
"name": name,
|
|
93
|
+
"city": city,
|
|
94
|
+
}
|
|
95
|
+
)
|
|
96
|
+
else:
|
|
97
|
+
companies.append(
|
|
98
|
+
{
|
|
99
|
+
"ticker": ticker_field,
|
|
100
|
+
"name": name,
|
|
101
|
+
"city": city,
|
|
102
|
+
}
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
result = pd.DataFrame(companies)
|
|
106
|
+
self._company_cache = result
|
|
107
|
+
self._cache_time = current_time
|
|
108
|
+
return result
|
|
109
|
+
|
|
110
|
+
except Exception as e:
|
|
111
|
+
raise APIError(f"Failed to fetch company list: {e}") from e
|
|
112
|
+
|
|
113
|
+
def search(self, query: str) -> pd.DataFrame:
|
|
114
|
+
"""
|
|
115
|
+
Search companies by name or ticker.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
query: Search query (ticker code or company name)
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
DataFrame with matching companies
|
|
122
|
+
"""
|
|
123
|
+
if not query:
|
|
124
|
+
return pd.DataFrame(columns=["ticker", "name", "city"])
|
|
125
|
+
|
|
126
|
+
companies = self.get_companies()
|
|
127
|
+
if companies.empty:
|
|
128
|
+
return companies
|
|
129
|
+
|
|
130
|
+
query_normalized = self._normalize_text(query)
|
|
131
|
+
query_upper = query.upper()
|
|
132
|
+
|
|
133
|
+
# Score and filter results
|
|
134
|
+
results = []
|
|
135
|
+
for _, row in companies.iterrows():
|
|
136
|
+
score = 0
|
|
137
|
+
ticker = row["ticker"]
|
|
138
|
+
name = row["name"]
|
|
139
|
+
|
|
140
|
+
# Exact ticker match
|
|
141
|
+
if ticker.upper() == query_upper:
|
|
142
|
+
score = 1000
|
|
143
|
+
# Ticker starts with query
|
|
144
|
+
elif ticker.upper().startswith(query_upper):
|
|
145
|
+
score = 500
|
|
146
|
+
# Name contains query
|
|
147
|
+
elif query_normalized in self._normalize_text(name):
|
|
148
|
+
score = 100
|
|
149
|
+
|
|
150
|
+
if score > 0:
|
|
151
|
+
results.append((score, row))
|
|
152
|
+
|
|
153
|
+
# Sort by score descending
|
|
154
|
+
results.sort(key=lambda x: x[0], reverse=True)
|
|
155
|
+
|
|
156
|
+
if not results:
|
|
157
|
+
return pd.DataFrame(columns=["ticker", "name", "city"])
|
|
158
|
+
|
|
159
|
+
return pd.DataFrame([r[1] for r in results])
|
|
160
|
+
|
|
161
|
+
def _normalize_text(self, text: str) -> str:
|
|
162
|
+
"""Normalize Turkish text for comparison."""
|
|
163
|
+
tr_map = str.maketrans("İıÖöÜüŞşÇçĞğ", "iioouussccgg")
|
|
164
|
+
normalized = text.translate(tr_map).lower()
|
|
165
|
+
# Remove common suffixes
|
|
166
|
+
normalized = re.sub(r"[\.,']|\s+a\.s\.?|\s+anonim sirketi", "", normalized)
|
|
167
|
+
return normalized.strip()
|
|
168
|
+
|
|
169
|
+
def get_member_oid(self, symbol: str) -> str | None:
|
|
170
|
+
"""
|
|
171
|
+
Get KAP member OID (mkkMemberOid) for a stock symbol.
|
|
172
|
+
|
|
173
|
+
The member OID is required to query disclosures from KAP.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
symbol: Stock symbol (e.g., "THYAO").
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
Member OID string or None if not found.
|
|
180
|
+
"""
|
|
181
|
+
symbol = symbol.upper().replace(".IS", "").replace(".E", "")
|
|
182
|
+
current_time = time.time()
|
|
183
|
+
|
|
184
|
+
# Check cache
|
|
185
|
+
if (
|
|
186
|
+
self._oid_map is not None
|
|
187
|
+
and (current_time - self._oid_cache_time) < self.CACHE_DURATION
|
|
188
|
+
):
|
|
189
|
+
return self._oid_map.get(symbol)
|
|
190
|
+
|
|
191
|
+
# Fetch BIST companies list from KAP
|
|
192
|
+
try:
|
|
193
|
+
response = self._client.get(self.BIST_COMPANIES_URL, timeout=20)
|
|
194
|
+
response.raise_for_status()
|
|
195
|
+
|
|
196
|
+
# Parse mkkMemberOid and stockCode pairs from Next.js data
|
|
197
|
+
# Format: \"mkkMemberOid\":\"xxx\",\"kapMemberTitle\":\"...\",
|
|
198
|
+
# \"relatedMemberTitle\":\"...\",\"stockCode\":\"THYAO\",...
|
|
199
|
+
# Note: stockCode may contain multiple codes like "GARAN, TGB"
|
|
200
|
+
pattern = (
|
|
201
|
+
r'\\"mkkMemberOid\\":\\"([^\\"]+)\\",'
|
|
202
|
+
r'\\"kapMemberTitle\\":\\"[^\\"]+\\",'
|
|
203
|
+
r'\\"relatedMemberTitle\\":\\"[^\\"]*\\",'
|
|
204
|
+
r'\\"stockCode\\":\\"([^\\"]+)\\"'
|
|
205
|
+
)
|
|
206
|
+
matches = re.findall(pattern, response.text)
|
|
207
|
+
|
|
208
|
+
# Build mapping: stockCode -> mkkMemberOid
|
|
209
|
+
# Handle multiple codes per company (e.g., "GARAN, TGB")
|
|
210
|
+
self._oid_map = {}
|
|
211
|
+
for oid, codes_str in matches:
|
|
212
|
+
for code in codes_str.split(","):
|
|
213
|
+
code = code.strip()
|
|
214
|
+
if code:
|
|
215
|
+
self._oid_map[code] = oid
|
|
216
|
+
|
|
217
|
+
self._oid_cache_time = current_time
|
|
218
|
+
return self._oid_map.get(symbol)
|
|
219
|
+
|
|
220
|
+
except Exception:
|
|
221
|
+
return None
|
|
222
|
+
|
|
223
|
+
def get_disclosures(self, symbol: str, limit: int = 20) -> pd.DataFrame:
|
|
224
|
+
"""
|
|
225
|
+
Get KAP disclosures (bildirimler) for a stock.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
symbol: Stock symbol (e.g., "THYAO").
|
|
229
|
+
limit: Maximum number of disclosures to return (default: 20).
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
DataFrame with columns: Date, Title, URL.
|
|
233
|
+
"""
|
|
234
|
+
symbol = symbol.upper().replace(".IS", "").replace(".E", "")
|
|
235
|
+
|
|
236
|
+
# Get KAP member OID for the symbol
|
|
237
|
+
member_oid = self.get_member_oid(symbol)
|
|
238
|
+
if not member_oid:
|
|
239
|
+
return pd.DataFrame(columns=["Date", "Title", "URL"])
|
|
240
|
+
|
|
241
|
+
# Fetch disclosures from KAP
|
|
242
|
+
disc_url = f"{self.DISCLOSURE_URL}?member={member_oid}"
|
|
243
|
+
|
|
244
|
+
try:
|
|
245
|
+
response = self._client.get(disc_url, timeout=15)
|
|
246
|
+
response.raise_for_status()
|
|
247
|
+
|
|
248
|
+
# Parse disclosures from Next.js embedded data
|
|
249
|
+
# Format: publishDate\\":\\"29.12.2025 19:21:18\\",\\"disclosureIndex\\":1530826...
|
|
250
|
+
pattern = (
|
|
251
|
+
r'publishDate\\":\\"([^\\"]+)\\".*?'
|
|
252
|
+
r'disclosureIndex\\":(\d+).*?'
|
|
253
|
+
r'title\\":\\"([^\\"]+)\\"'
|
|
254
|
+
)
|
|
255
|
+
matches = re.findall(pattern, response.text, re.DOTALL)
|
|
256
|
+
|
|
257
|
+
records = []
|
|
258
|
+
for date, idx, title in matches[:limit]:
|
|
259
|
+
url = f"https://www.kap.org.tr/tr/Bildirim/{idx}"
|
|
260
|
+
records.append({
|
|
261
|
+
"Date": date,
|
|
262
|
+
"Title": title,
|
|
263
|
+
"URL": url,
|
|
264
|
+
})
|
|
265
|
+
|
|
266
|
+
return pd.DataFrame(records)
|
|
267
|
+
|
|
268
|
+
except Exception as e:
|
|
269
|
+
raise APIError(f"Failed to fetch disclosures for {symbol}: {e}") from e
|
|
270
|
+
|
|
271
|
+
def get_calendar(self, symbol: str) -> pd.DataFrame:
|
|
272
|
+
"""
|
|
273
|
+
Get expected disclosure calendar for a stock from KAP.
|
|
274
|
+
|
|
275
|
+
Returns upcoming expected disclosures like financial reports,
|
|
276
|
+
annual reports, sustainability reports, and corporate governance reports.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
symbol: Stock symbol (e.g., "THYAO").
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
DataFrame with columns:
|
|
283
|
+
- StartDate: Expected disclosure window start
|
|
284
|
+
- EndDate: Expected disclosure window end
|
|
285
|
+
- Subject: Type of disclosure (e.g., "Finansal Rapor")
|
|
286
|
+
- Period: Report period (e.g., "Yıllık", "3 Aylık")
|
|
287
|
+
- Year: Fiscal year
|
|
288
|
+
"""
|
|
289
|
+
symbol = symbol.upper().replace(".IS", "").replace(".E", "")
|
|
290
|
+
|
|
291
|
+
# Get KAP member OID for the symbol
|
|
292
|
+
member_oid = self.get_member_oid(symbol)
|
|
293
|
+
if not member_oid:
|
|
294
|
+
return pd.DataFrame(columns=["StartDate", "EndDate", "Subject", "Period", "Year"])
|
|
295
|
+
|
|
296
|
+
# Calculate date range: today to 6 months from now
|
|
297
|
+
now = datetime.now()
|
|
298
|
+
start_date = now.strftime("%Y-%m-%d")
|
|
299
|
+
end_date = (now + timedelta(days=180)).strftime("%Y-%m-%d")
|
|
300
|
+
|
|
301
|
+
# Fetch expected disclosures from KAP API
|
|
302
|
+
headers = {
|
|
303
|
+
"Accept": "*/*",
|
|
304
|
+
"Content-Type": "application/json",
|
|
305
|
+
"Origin": "https://kap.org.tr",
|
|
306
|
+
"Referer": "https://kap.org.tr/tr/beklenen-bildirim-sorgu",
|
|
307
|
+
}
|
|
308
|
+
payload = {
|
|
309
|
+
"startDate": start_date,
|
|
310
|
+
"endDate": end_date,
|
|
311
|
+
"memberTypes": ["IGS"],
|
|
312
|
+
"mkkMemberOidList": [member_oid],
|
|
313
|
+
"disclosureClass": "",
|
|
314
|
+
"subjects": [],
|
|
315
|
+
"mainSector": "",
|
|
316
|
+
"sector": "",
|
|
317
|
+
"subSector": "",
|
|
318
|
+
"market": "",
|
|
319
|
+
"index": "",
|
|
320
|
+
"year": "",
|
|
321
|
+
"term": "",
|
|
322
|
+
"ruleType": "",
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
try:
|
|
326
|
+
response = self._client.post(
|
|
327
|
+
self.CALENDAR_API_URL,
|
|
328
|
+
json=payload,
|
|
329
|
+
headers=headers,
|
|
330
|
+
timeout=15,
|
|
331
|
+
)
|
|
332
|
+
response.raise_for_status()
|
|
333
|
+
data = response.json()
|
|
334
|
+
|
|
335
|
+
records = []
|
|
336
|
+
for item in data:
|
|
337
|
+
records.append({
|
|
338
|
+
"StartDate": item.get("startDate", ""),
|
|
339
|
+
"EndDate": item.get("endDate", ""),
|
|
340
|
+
"Subject": item.get("subject", ""),
|
|
341
|
+
"Period": item.get("ruleTypeTerm", ""),
|
|
342
|
+
"Year": item.get("year", ""),
|
|
343
|
+
})
|
|
344
|
+
|
|
345
|
+
return pd.DataFrame(records)
|
|
346
|
+
|
|
347
|
+
except Exception as e:
|
|
348
|
+
raise APIError(f"Failed to fetch calendar for {symbol}: {e}") from e
|
|
349
|
+
|
|
350
|
+
def get_company_details(self, symbol: str) -> dict:
|
|
351
|
+
"""
|
|
352
|
+
Get company details from KAP company info page.
|
|
353
|
+
|
|
354
|
+
Scrapes the KAP company page for sector, market, and website information.
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
symbol: Stock symbol (e.g., "THYAO").
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
Dict with keys:
|
|
361
|
+
- sector: Company sector (e.g., "ULAŞTIRMA VE DEPOLAMA")
|
|
362
|
+
- market: Stock market (e.g., "YILDIZ PAZAR")
|
|
363
|
+
- website: Company website URL(s)
|
|
364
|
+
"""
|
|
365
|
+
symbol = symbol.upper().replace(".IS", "").replace(".E", "")
|
|
366
|
+
current_time = time.time()
|
|
367
|
+
|
|
368
|
+
# Check cache
|
|
369
|
+
if symbol in self._company_details_cache:
|
|
370
|
+
cache_time = self._company_details_cache_time.get(symbol, 0)
|
|
371
|
+
if (current_time - cache_time) < self.CACHE_DURATION:
|
|
372
|
+
return self._company_details_cache[symbol]
|
|
373
|
+
|
|
374
|
+
# Get KAP member OID for the symbol
|
|
375
|
+
member_oid = self.get_member_oid(symbol)
|
|
376
|
+
if not member_oid:
|
|
377
|
+
return {}
|
|
378
|
+
|
|
379
|
+
# Fetch company info page
|
|
380
|
+
url = f"{self.COMPANY_INFO_URL}/{member_oid}"
|
|
381
|
+
|
|
382
|
+
try:
|
|
383
|
+
response = self._client.get(url, timeout=15)
|
|
384
|
+
response.raise_for_status()
|
|
385
|
+
html = response.text
|
|
386
|
+
|
|
387
|
+
result = {}
|
|
388
|
+
|
|
389
|
+
# Extract sector: href="/tr/Sektorler?sector=...">SECTOR_NAME</a>
|
|
390
|
+
sector_match = re.search(
|
|
391
|
+
r'href="/tr/Sektorler\?sector=[^"]*">([^<]+)</a>',
|
|
392
|
+
html
|
|
393
|
+
)
|
|
394
|
+
if sector_match:
|
|
395
|
+
result["sector"] = sector_match.group(1).strip()
|
|
396
|
+
|
|
397
|
+
# Extract market: href="/tr/Pazarlar?market=...">MARKET_NAME</a>
|
|
398
|
+
market_match = re.search(
|
|
399
|
+
r'href="/tr/Pazarlar\?market=[^"]*">([^<]+)</a>',
|
|
400
|
+
html
|
|
401
|
+
)
|
|
402
|
+
if market_match:
|
|
403
|
+
result["market"] = market_match.group(1).strip()
|
|
404
|
+
|
|
405
|
+
# Extract website: after "İnternet Adresi" label
|
|
406
|
+
# Pattern: <h3...>İnternet Adresi</h3><p class="...">WEBSITE</p>
|
|
407
|
+
website_match = re.search(
|
|
408
|
+
r'İnternet Adresi</h3><p[^>]*>([^<]+)</p>',
|
|
409
|
+
html
|
|
410
|
+
)
|
|
411
|
+
if website_match:
|
|
412
|
+
result["website"] = website_match.group(1).strip()
|
|
413
|
+
|
|
414
|
+
# Get business summary from genel page
|
|
415
|
+
business_summary = self._get_business_summary(member_oid)
|
|
416
|
+
if business_summary:
|
|
417
|
+
result["businessSummary"] = business_summary
|
|
418
|
+
|
|
419
|
+
# Cache result
|
|
420
|
+
self._company_details_cache[symbol] = result
|
|
421
|
+
self._company_details_cache_time[symbol] = current_time
|
|
422
|
+
|
|
423
|
+
return result
|
|
424
|
+
|
|
425
|
+
except Exception:
|
|
426
|
+
return {}
|
|
427
|
+
|
|
428
|
+
def _get_business_summary(self, member_oid: str) -> str | None:
|
|
429
|
+
"""Get business summary (Faaliyet Konusu) from KAP genel page."""
|
|
430
|
+
url = f"{self.COMPANY_GENERAL_URL}/{member_oid}"
|
|
431
|
+
|
|
432
|
+
try:
|
|
433
|
+
response = self._client.get(url, timeout=15)
|
|
434
|
+
if response.status_code != 200:
|
|
435
|
+
return None
|
|
436
|
+
|
|
437
|
+
text = response.text
|
|
438
|
+
|
|
439
|
+
# Find the faaliyet_konu section
|
|
440
|
+
key_idx = text.find("kpy41_acc2_faaliyet_konu")
|
|
441
|
+
if key_idx < 0:
|
|
442
|
+
return None
|
|
443
|
+
|
|
444
|
+
# Extract chunk - stop at next section (kpy41_acc2_sure or similar)
|
|
445
|
+
remaining = text[key_idx:]
|
|
446
|
+
next_section = re.search(r'kpy41_acc2_(?!faaliyet)', remaining)
|
|
447
|
+
if next_section:
|
|
448
|
+
chunk = remaining[: next_section.start()]
|
|
449
|
+
else:
|
|
450
|
+
chunk = remaining[:5000]
|
|
451
|
+
|
|
452
|
+
# Format 1: HTML-encoded value field (e.g., AKBNK, THYAO)
|
|
453
|
+
# Pattern: value\":\"...\",\"disclosureIndex
|
|
454
|
+
value_match = re.search(
|
|
455
|
+
r'value\\":\\"(.*?)\\",\\"disclosureIndex', chunk, re.DOTALL
|
|
456
|
+
)
|
|
457
|
+
if value_match:
|
|
458
|
+
value = value_match.group(1)
|
|
459
|
+
if "u003e" in value:
|
|
460
|
+
# Decode HTML entities
|
|
461
|
+
decoded = value.replace("\\u003e", ">").replace("\\u003c", "<")
|
|
462
|
+
decoded = decoded.replace("u003e", ">").replace("u003c", "<")
|
|
463
|
+
# Extract text between > and <
|
|
464
|
+
text_parts = re.findall(r">([^<>]+)<", decoded)
|
|
465
|
+
meaningful = [
|
|
466
|
+
t.replace("\\n", " ").replace("\\\\n", " ").strip()
|
|
467
|
+
for t in text_parts
|
|
468
|
+
if len(t.strip()) > 10
|
|
469
|
+
]
|
|
470
|
+
if meaningful:
|
|
471
|
+
summary = " ".join(meaningful)
|
|
472
|
+
# Clean up escape sequences
|
|
473
|
+
summary = summary.replace("\\ ", " ").replace("\\", "")
|
|
474
|
+
summary = re.sub(r"\s+", " ", summary).strip()
|
|
475
|
+
if len(summary) > 10:
|
|
476
|
+
return summary
|
|
477
|
+
|
|
478
|
+
# Format 2: React children patterns
|
|
479
|
+
texts = []
|
|
480
|
+
skip_texts = ("Bulunmamaktadır.", "-", "Şirketin Süresi")
|
|
481
|
+
|
|
482
|
+
# Children array: \"children\":[\"text\"]
|
|
483
|
+
array_matches = re.findall(r'\\"children\\":\[\\"([^\\"]{10,})\\"', chunk)
|
|
484
|
+
for m in array_matches:
|
|
485
|
+
if not m.startswith("$") and m not in skip_texts:
|
|
486
|
+
texts.append(m)
|
|
487
|
+
|
|
488
|
+
# Children string: \"children\":\"text\"
|
|
489
|
+
string_matches = re.findall(r'\\"children\\":\\"([^\\"]{10,})\\"', chunk)
|
|
490
|
+
for m in string_matches:
|
|
491
|
+
if not m.startswith("$") and m not in skip_texts:
|
|
492
|
+
texts.append(m)
|
|
493
|
+
|
|
494
|
+
if texts:
|
|
495
|
+
summary = " ".join(texts)
|
|
496
|
+
summary = summary.replace("\\n", " ").strip()
|
|
497
|
+
summary = re.sub(r"\s+", " ", summary)
|
|
498
|
+
if len(summary) > 10:
|
|
499
|
+
return summary
|
|
500
|
+
|
|
501
|
+
return None
|
|
502
|
+
except Exception:
|
|
503
|
+
return None
|
|
504
|
+
|
|
505
|
+
def get_disclosure_content(self, disclosure_id: int | str) -> str | None:
|
|
506
|
+
"""
|
|
507
|
+
Get disclosure HTML content by ID.
|
|
508
|
+
|
|
509
|
+
Args:
|
|
510
|
+
disclosure_id: KAP disclosure ID (e.g., 1530826).
|
|
511
|
+
|
|
512
|
+
Returns:
|
|
513
|
+
Raw HTML body content or None if failed.
|
|
514
|
+
"""
|
|
515
|
+
url = f"https://www.kap.org.tr/tr/Bildirim/{disclosure_id}"
|
|
516
|
+
|
|
517
|
+
try:
|
|
518
|
+
response = self._client.get(url, timeout=15)
|
|
519
|
+
response.raise_for_status()
|
|
520
|
+
return response.text
|
|
521
|
+
except Exception:
|
|
522
|
+
return None
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
# Singleton
|
|
526
|
+
_provider: KAPProvider | None = None
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
def get_kap_provider() -> KAPProvider:
|
|
530
|
+
"""Get singleton provider instance."""
|
|
531
|
+
global _provider
|
|
532
|
+
if _provider is None:
|
|
533
|
+
_provider = KAPProvider()
|
|
534
|
+
return _provider
|