financechatbotkit 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- financechatbotkit-2.0.0.dist-info/METADATA +11 -0
- financechatbotkit-2.0.0.dist-info/RECORD +39 -0
- financechatbotkit-2.0.0.dist-info/WHEEL +5 -0
- financechatbotkit-2.0.0.dist-info/entry_points.txt +2 -0
- financechatbotkit-2.0.0.dist-info/top_level.txt +2 -0
- orchestrator/__init__.py +29 -0
- orchestrator/bond/__init__.py +8 -0
- orchestrator/bond/base_reader.py +139 -0
- orchestrator/bond/getBondBasiInfo.py +84 -0
- orchestrator/bond/getBondWithOptiCallRede.py +83 -0
- orchestrator/bond/getEarlExerOpti.py +90 -0
- orchestrator/bond/getIssuIssuItemStat.py +85 -0
- orchestrator/bond/getOptiExer.py +83 -0
- orchestrator/bond/getOptiExerPricAdju.py +84 -0
- orchestrator/bond/workflow.py +252 -0
- orchestrator/exceptions.py +17 -0
- orchestrator/fnguide/__init__.py +21 -0
- orchestrator/fnguide/workflow.py +391 -0
- orchestrator/mapping/__init__.py +22 -0
- orchestrator/mapping/data/__init__.py +1 -0
- orchestrator/mapping/data/corp_codes_raw.json +693170 -0
- orchestrator/mapping/update_raw_data.py +96 -0
- orchestrator/mapping/workflow.py +303 -0
- orchestrator/price/__init__.py +15 -0
- orchestrator/price/workflow.py +250 -0
- telebotkit/__init__.py +51 -0
- telebotkit/bot/__init__.py +38 -0
- telebotkit/bot/client.py +217 -0
- telebotkit/bot/reply.py +36 -0
- telebotkit/bot/router.py +125 -0
- telebotkit/bot/safety.py +28 -0
- telebotkit/bot/telegram.py +41 -0
- telebotkit/firestore/__init__.py +45 -0
- telebotkit/firestore/client.py +141 -0
- telebotkit/firestore/documents.py +164 -0
- telebotkit/firestore/fetch.py +228 -0
- telebotkit/firestore/locks.py +74 -0
- telebotkit/firestore/upload.py +75 -0
- telebotkit/sheets.py +219 -0
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
"""FnGuide workflows for financial statements and ratios.
|
|
2
|
+
- Follow the policies of the target website and use appropriate delays between requests if needed.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from html import unescape
|
|
8
|
+
import logging
|
|
9
|
+
import re
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import httpx
|
|
13
|
+
|
|
14
|
+
from ..exceptions import DownloadError, InvalidInputError
|
|
15
|
+
|
|
16
|
+
FN_GUIDE_FINANCE_RATIO_URL = "https://comp.fnguide.com/SVO2/ASP/SVD_FinanceRatio.asp"
|
|
17
|
+
FN_GUIDE_FINANCE_STATEMENT_URL = "https://comp.fnguide.com/SVO2/ASP/SVD_Finance.asp"
|
|
18
|
+
|
|
19
|
+
_TAG_RE = re.compile(r"<[^>]+>", re.S)
|
|
20
|
+
_WHITESPACE_RE = re.compile(r"\s+")
|
|
21
|
+
_TABLE_SECTION_RE = re.compile(
|
|
22
|
+
r'<div class="ul_col2wrap pd_t25">.*?<h2>(.*?)</h2>\s*'
|
|
23
|
+
r'<span class="stxt">(.*?)</span>.*?'
|
|
24
|
+
r'<span class="txt1">(.*?)</span>.*?'
|
|
25
|
+
r"(<table\b.*?</table>)",
|
|
26
|
+
re.S,
|
|
27
|
+
)
|
|
28
|
+
_TABLE_RE = re.compile(r"<table\b.*?</table>", re.S)
|
|
29
|
+
_ROW_RE = re.compile(r"<tr\b([^>]*)>(.*?)</tr>", re.S)
|
|
30
|
+
_CELL_RE = re.compile(r"<(th|td)\b([^>]*)>(.*?)</(?:th|td)>", re.S)
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def build_fnguide_params(
|
|
36
|
+
*,
|
|
37
|
+
gicode: str,
|
|
38
|
+
new_menu_id: str,
|
|
39
|
+
pgb: str = "1",
|
|
40
|
+
cid: str = "",
|
|
41
|
+
menu_yn: str = "Y",
|
|
42
|
+
report_gb: str = "",
|
|
43
|
+
stk_gb: str = "701",
|
|
44
|
+
) -> dict[str, Any]:
|
|
45
|
+
return {
|
|
46
|
+
"pGB": pgb,
|
|
47
|
+
"gicode": gicode,
|
|
48
|
+
"cID": cid,
|
|
49
|
+
"MenuYn": menu_yn,
|
|
50
|
+
"ReportGB": report_gb,
|
|
51
|
+
"NewMenuID": new_menu_id,
|
|
52
|
+
"stkGb": stk_gb,
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def build_finance_ratio_params(
|
|
57
|
+
*,
|
|
58
|
+
gicode: str,
|
|
59
|
+
report_gb: str = "",
|
|
60
|
+
) -> dict[str, Any]:
|
|
61
|
+
return build_fnguide_params(gicode=gicode, new_menu_id="104", report_gb=report_gb)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def build_finance_statement_params(
|
|
65
|
+
*,
|
|
66
|
+
gicode: str,
|
|
67
|
+
report_gb: str = "D",
|
|
68
|
+
) -> dict[str, Any]:
|
|
69
|
+
return build_fnguide_params(gicode=gicode, new_menu_id="103", report_gb=report_gb)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def fetch_fnguide_page(
|
|
73
|
+
*,
|
|
74
|
+
url: str,
|
|
75
|
+
params: dict[str, Any],
|
|
76
|
+
timeout: float = 30.0,
|
|
77
|
+
user_agent: str | None = None,
|
|
78
|
+
headers: dict[str, str] | None = None,
|
|
79
|
+
) -> str:
|
|
80
|
+
request_headers: dict[str, str] = {}
|
|
81
|
+
if user_agent is not None:
|
|
82
|
+
request_headers["User-Agent"] = user_agent
|
|
83
|
+
if headers:
|
|
84
|
+
request_headers.update(headers)
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
response = httpx.get(
|
|
88
|
+
url,
|
|
89
|
+
params=params,
|
|
90
|
+
headers=request_headers if request_headers else None,
|
|
91
|
+
timeout=timeout,
|
|
92
|
+
follow_redirects=True,
|
|
93
|
+
)
|
|
94
|
+
response.raise_for_status()
|
|
95
|
+
except httpx.HTTPError as exc:
|
|
96
|
+
logger.warning("FnGuide request failed for %s: %s", url, exc)
|
|
97
|
+
raise DownloadError(f"Failed to fetch FnGuide page: {url}") from exc
|
|
98
|
+
|
|
99
|
+
return response.text
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _normalize_text(text: str) -> str:
|
|
103
|
+
text = unescape(text).replace("\xa0", " ")
|
|
104
|
+
text = _WHITESPACE_RE.sub(" ", text)
|
|
105
|
+
return text.strip()
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _strip_tags(html: str) -> str:
|
|
109
|
+
cleaned = re.sub(r"<br\s*/?>", "\n", html, flags=re.I)
|
|
110
|
+
cleaned = _TAG_RE.sub(" ", cleaned)
|
|
111
|
+
return _normalize_text(cleaned)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _extract_attr(tag_attrs: str, name: str) -> str | None:
|
|
115
|
+
match = re.search(rf'{name}=(["\'])(.*?)\1', tag_attrs, re.S)
|
|
116
|
+
if not match:
|
|
117
|
+
return None
|
|
118
|
+
return unescape(match.group(2)).strip()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _clean_label_html(cell_html: str) -> str:
|
|
122
|
+
cleaned = re.sub(r"<dl[^>]*>.*?</dl>", " ", cell_html, flags=re.S)
|
|
123
|
+
cleaned = re.sub(
|
|
124
|
+
r'<a\b[^>]*class=["\']btn_acdopen["\'][^>]*>.*?</a>',
|
|
125
|
+
" ",
|
|
126
|
+
cleaned,
|
|
127
|
+
flags=re.S,
|
|
128
|
+
)
|
|
129
|
+
cleaned = re.sub(
|
|
130
|
+
r'<span\b[^>]*class=["\']blind["\'][^>]*>.*?</span>',
|
|
131
|
+
" ",
|
|
132
|
+
cleaned,
|
|
133
|
+
flags=re.S,
|
|
134
|
+
)
|
|
135
|
+
return _strip_tags(cleaned)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _parse_value_cells(cells: list[dict[str, Any]], columns: list[str]) -> list[dict[str, str | None]]:
|
|
139
|
+
values: list[dict[str, str | None]] = []
|
|
140
|
+
for column, cell in zip(columns, cells[1:]):
|
|
141
|
+
values.append(
|
|
142
|
+
{
|
|
143
|
+
"period": column,
|
|
144
|
+
"text": cell["text"] or None,
|
|
145
|
+
"title": cell["title"] or None,
|
|
146
|
+
}
|
|
147
|
+
)
|
|
148
|
+
return values
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _parse_table_row(row_attrs: str, row_html: str, columns: list[str]) -> dict[str, Any]:
|
|
152
|
+
cells = []
|
|
153
|
+
for _, cell_attrs, cell_html in _CELL_RE.findall(row_html):
|
|
154
|
+
cells.append(
|
|
155
|
+
{
|
|
156
|
+
"attrs": cell_attrs,
|
|
157
|
+
"html": cell_html,
|
|
158
|
+
"text": _strip_tags(cell_html),
|
|
159
|
+
"title": _extract_attr(cell_attrs, "title"),
|
|
160
|
+
}
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
label_cell = cells[0] if cells else {"html": "", "text": ""}
|
|
164
|
+
row_class = _extract_attr(row_attrs, "class") or ""
|
|
165
|
+
|
|
166
|
+
return {
|
|
167
|
+
"_row_class": row_class,
|
|
168
|
+
"_row_id": _extract_attr(row_attrs, "id"),
|
|
169
|
+
"label": _clean_label_html(label_cell["html"]) or label_cell["text"],
|
|
170
|
+
"values": _parse_value_cells(cells, columns),
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _infer_table_period_kind(subtitle: str, columns: list[str]) -> str:
|
|
175
|
+
lower_subtitle = subtitle.lower()
|
|
176
|
+
if "연간" in subtitle:
|
|
177
|
+
return "annual"
|
|
178
|
+
if "분기" in subtitle:
|
|
179
|
+
return "quarterly"
|
|
180
|
+
if "누적" in subtitle:
|
|
181
|
+
return "annual_cumulative"
|
|
182
|
+
if "3개월" in subtitle:
|
|
183
|
+
return "quarter_3m"
|
|
184
|
+
|
|
185
|
+
period_columns = [str(column) for column in columns if re.match(r"^\d{4}/\d{2}$", str(column))]
|
|
186
|
+
years = [column.split("/")[0] for column in period_columns]
|
|
187
|
+
if len(set(years)) < len(years):
|
|
188
|
+
return "quarterly"
|
|
189
|
+
return "annual"
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _parse_finance_table(
|
|
193
|
+
*,
|
|
194
|
+
title: str,
|
|
195
|
+
subtitle: str,
|
|
196
|
+
unit: str,
|
|
197
|
+
table_html: str,
|
|
198
|
+
) -> dict[str, Any]:
|
|
199
|
+
caption_match = re.search(r"<caption[^>]*>(.*?)</caption>", table_html, re.S)
|
|
200
|
+
caption = _strip_tags(caption_match.group(1)) if caption_match else None
|
|
201
|
+
|
|
202
|
+
header_match = re.search(r"<thead>(.*?)</thead>", table_html, re.S)
|
|
203
|
+
header_cells = _CELL_RE.findall(header_match.group(1)) if header_match else []
|
|
204
|
+
header_texts = [_strip_tags(cell_html) for _, _, cell_html in header_cells]
|
|
205
|
+
basis = header_texts[0] if header_texts else None
|
|
206
|
+
columns = header_texts[1:]
|
|
207
|
+
|
|
208
|
+
body_match = re.search(r"<tbody>(.*?)</tbody>", table_html, re.S)
|
|
209
|
+
sections: list[dict[str, Any]] = []
|
|
210
|
+
current_section: dict[str, Any] | None = None
|
|
211
|
+
row_lookup: dict[str, dict[str, Any]] = {}
|
|
212
|
+
|
|
213
|
+
for row_attrs, row_html in _ROW_RE.findall(body_match.group(1) if body_match else ""):
|
|
214
|
+
row_class = _extract_attr(row_attrs, "class") or ""
|
|
215
|
+
|
|
216
|
+
if "tbody_tit" in row_class:
|
|
217
|
+
title_cells = _CELL_RE.findall(row_html)
|
|
218
|
+
section_title = _strip_tags(title_cells[0][2]) if title_cells else ""
|
|
219
|
+
current_section = {"title": section_title, "rows": []}
|
|
220
|
+
sections.append(current_section)
|
|
221
|
+
continue
|
|
222
|
+
|
|
223
|
+
if current_section is None:
|
|
224
|
+
current_section = {"title": None, "rows": []}
|
|
225
|
+
sections.append(current_section)
|
|
226
|
+
|
|
227
|
+
parsed_row = _parse_table_row(row_attrs, row_html, columns)
|
|
228
|
+
|
|
229
|
+
if "acd_dep2_sub" in row_class:
|
|
230
|
+
parent_keys = [
|
|
231
|
+
cls[len("c_") :]
|
|
232
|
+
for cls in row_class.split()
|
|
233
|
+
if cls.startswith("c_grid")
|
|
234
|
+
]
|
|
235
|
+
parent = next((row_lookup[key] for key in parent_keys if key in row_lookup), None)
|
|
236
|
+
parsed_row.pop("_row_class", None)
|
|
237
|
+
parsed_row.pop("_row_id", None)
|
|
238
|
+
if parent is not None:
|
|
239
|
+
parent.setdefault("details", []).append(parsed_row)
|
|
240
|
+
else:
|
|
241
|
+
current_section["rows"].append(parsed_row)
|
|
242
|
+
continue
|
|
243
|
+
|
|
244
|
+
row_id = parsed_row.pop("_row_id", None)
|
|
245
|
+
parsed_row.pop("_row_class", None)
|
|
246
|
+
current_section["rows"].append(parsed_row)
|
|
247
|
+
key = row_id
|
|
248
|
+
if key and key.startswith("p_"):
|
|
249
|
+
key = key[2:]
|
|
250
|
+
if key:
|
|
251
|
+
row_lookup[key] = parsed_row
|
|
252
|
+
|
|
253
|
+
return {
|
|
254
|
+
"title": _normalize_text(title),
|
|
255
|
+
"subtitle": _normalize_text(subtitle),
|
|
256
|
+
"period_kind": _infer_table_period_kind(_normalize_text(subtitle), columns),
|
|
257
|
+
"unit": _normalize_text(unit),
|
|
258
|
+
"caption": caption,
|
|
259
|
+
"basis": basis,
|
|
260
|
+
"columns": columns,
|
|
261
|
+
"sections": sections,
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def parse_fnguide_html_tables(html: str) -> dict[str, Any]:
|
|
266
|
+
title_match = re.search(r"<title>(.*?)</title>", html, re.S)
|
|
267
|
+
page_title = _strip_tags(title_match.group(1)) if title_match else None
|
|
268
|
+
|
|
269
|
+
hidden_keys = ("gicode", "giname", "stkGb", "NewMenuID", "ReportGb", "MenuYn")
|
|
270
|
+
hidden_inputs = {
|
|
271
|
+
key: value
|
|
272
|
+
for key in hidden_keys
|
|
273
|
+
if (
|
|
274
|
+
value := _extract_attr(
|
|
275
|
+
re.search(rf'<input[^>]*id=["\']{key}["\'][^>]*', html).group(0),
|
|
276
|
+
"value",
|
|
277
|
+
)
|
|
278
|
+
if re.search(rf'<input[^>]*id=["\']{key}["\'][^>]*', html)
|
|
279
|
+
else None
|
|
280
|
+
)
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
tables = [
|
|
284
|
+
_parse_finance_table(
|
|
285
|
+
title=title,
|
|
286
|
+
subtitle=subtitle,
|
|
287
|
+
unit=unit,
|
|
288
|
+
table_html=table_html,
|
|
289
|
+
)
|
|
290
|
+
for title, subtitle, unit, table_html in _TABLE_SECTION_RE.findall(html)
|
|
291
|
+
]
|
|
292
|
+
|
|
293
|
+
if not tables:
|
|
294
|
+
tables = [
|
|
295
|
+
_parse_finance_table(
|
|
296
|
+
title=f"table_{index}",
|
|
297
|
+
subtitle="",
|
|
298
|
+
unit="",
|
|
299
|
+
table_html=table_html,
|
|
300
|
+
)
|
|
301
|
+
for index, table_html in enumerate(_TABLE_RE.findall(html), start=1)
|
|
302
|
+
]
|
|
303
|
+
|
|
304
|
+
return {
|
|
305
|
+
"page_title": page_title,
|
|
306
|
+
"page_meta": hidden_inputs,
|
|
307
|
+
"tables": tables,
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def parse_fnguide_finance_ratio_html(html: str) -> dict[str, Any]:
|
|
312
|
+
return parse_fnguide_html_tables(html)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def parse_fnguide_finance_statement_html(html: str) -> dict[str, Any]:
|
|
316
|
+
return parse_fnguide_html_tables(html)
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def _normalize_stock_code(stock_code: str) -> str:
|
|
320
|
+
normalized = str(stock_code or "").strip()
|
|
321
|
+
if not normalized:
|
|
322
|
+
raise InvalidInputError("stock_code is required.")
|
|
323
|
+
return normalized
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def _normalize_statement_type(statement_type: str) -> tuple[str, str]:
|
|
327
|
+
normalized = str(statement_type or "").strip().lower()
|
|
328
|
+
if normalized in {"consolidated", "connected", "d"}:
|
|
329
|
+
return "consolidated", "D"
|
|
330
|
+
if normalized in {"separate", "standalone", "single", "b"}:
|
|
331
|
+
return "separate", "B"
|
|
332
|
+
raise InvalidInputError("statement_type must be 'consolidated' or 'separate'.")
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
class FnGuideWorkflow:
|
|
336
|
+
"""Fetch FnGuide ratio and financial statement pages as normalized JSON."""
|
|
337
|
+
|
|
338
|
+
def run(
|
|
339
|
+
self,
|
|
340
|
+
*,
|
|
341
|
+
stock_code: str,
|
|
342
|
+
statement_type: str = "consolidated",
|
|
343
|
+
timeout: float = 30.0,
|
|
344
|
+
user_agent: str | None = None,
|
|
345
|
+
) -> dict[str, Any]:
|
|
346
|
+
normalized_code = _normalize_stock_code(stock_code)
|
|
347
|
+
normalized_statement_type, report_gb = _normalize_statement_type(statement_type)
|
|
348
|
+
gicode = normalized_code if normalized_code.startswith("A") else f"A{normalized_code}"
|
|
349
|
+
|
|
350
|
+
ratio_html = fetch_fnguide_page(
|
|
351
|
+
url=FN_GUIDE_FINANCE_RATIO_URL,
|
|
352
|
+
params=build_finance_ratio_params(gicode=gicode, report_gb=report_gb),
|
|
353
|
+
timeout=timeout,
|
|
354
|
+
user_agent=user_agent,
|
|
355
|
+
)
|
|
356
|
+
statement_html = fetch_fnguide_page(
|
|
357
|
+
url=FN_GUIDE_FINANCE_STATEMENT_URL,
|
|
358
|
+
params=build_finance_statement_params(gicode=gicode, report_gb=report_gb),
|
|
359
|
+
timeout=timeout,
|
|
360
|
+
user_agent=user_agent,
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
return {
|
|
364
|
+
"input": {
|
|
365
|
+
"stock_code": normalized_code.removeprefix("A"),
|
|
366
|
+
"statement_type": normalized_statement_type,
|
|
367
|
+
"timeout": timeout,
|
|
368
|
+
},
|
|
369
|
+
"data": {
|
|
370
|
+
"financials": parse_fnguide_finance_statement_html(statement_html),
|
|
371
|
+
"ratios": parse_fnguide_finance_ratio_html(ratio_html),
|
|
372
|
+
},
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
_DEFAULT_WORKFLOW = FnGuideWorkflow()
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def run_fnguide_workflow(
|
|
380
|
+
*,
|
|
381
|
+
stock_code: str,
|
|
382
|
+
statement_type: str = "consolidated",
|
|
383
|
+
timeout: float = 30.0,
|
|
384
|
+
user_agent: str | None = None,
|
|
385
|
+
) -> dict[str, Any]:
|
|
386
|
+
return _DEFAULT_WORKFLOW.run(
|
|
387
|
+
stock_code=stock_code,
|
|
388
|
+
statement_type=statement_type,
|
|
389
|
+
timeout=timeout,
|
|
390
|
+
user_agent=user_agent,
|
|
391
|
+
)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Mapping workflows for FinanceChatbot.
|
|
2
|
+
|
|
3
|
+
For development:
|
|
4
|
+
run `orchestrator-update-corp-codes --api-key "$DART_API_KEY"` to update the raw corp codes file
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .workflow import MappingWorkflow, download_latest_raw_entries, run_mapping_workflow
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"MappingWorkflow",
|
|
11
|
+
"download_latest_raw_entries",
|
|
12
|
+
"run_mapping_workflow",
|
|
13
|
+
"update_raw_corp_codes_file",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def __getattr__(name: str):
|
|
18
|
+
if name == "update_raw_corp_codes_file":
|
|
19
|
+
from .update_raw_data import update_raw_corp_codes_file
|
|
20
|
+
|
|
21
|
+
return update_raw_corp_codes_file
|
|
22
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Bundled corp-code data for mapping workflows."""
|