financechatbotkit 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. financechatbotkit-2.0.0.dist-info/METADATA +11 -0
  2. financechatbotkit-2.0.0.dist-info/RECORD +39 -0
  3. financechatbotkit-2.0.0.dist-info/WHEEL +5 -0
  4. financechatbotkit-2.0.0.dist-info/entry_points.txt +2 -0
  5. financechatbotkit-2.0.0.dist-info/top_level.txt +2 -0
  6. orchestrator/__init__.py +29 -0
  7. orchestrator/bond/__init__.py +8 -0
  8. orchestrator/bond/base_reader.py +139 -0
  9. orchestrator/bond/getBondBasiInfo.py +84 -0
  10. orchestrator/bond/getBondWithOptiCallRede.py +83 -0
  11. orchestrator/bond/getEarlExerOpti.py +90 -0
  12. orchestrator/bond/getIssuIssuItemStat.py +85 -0
  13. orchestrator/bond/getOptiExer.py +83 -0
  14. orchestrator/bond/getOptiExerPricAdju.py +84 -0
  15. orchestrator/bond/workflow.py +252 -0
  16. orchestrator/exceptions.py +17 -0
  17. orchestrator/fnguide/__init__.py +21 -0
  18. orchestrator/fnguide/workflow.py +391 -0
  19. orchestrator/mapping/__init__.py +22 -0
  20. orchestrator/mapping/data/__init__.py +1 -0
  21. orchestrator/mapping/data/corp_codes_raw.json +693170 -0
  22. orchestrator/mapping/update_raw_data.py +96 -0
  23. orchestrator/mapping/workflow.py +303 -0
  24. orchestrator/price/__init__.py +15 -0
  25. orchestrator/price/workflow.py +250 -0
  26. telebotkit/__init__.py +51 -0
  27. telebotkit/bot/__init__.py +38 -0
  28. telebotkit/bot/client.py +217 -0
  29. telebotkit/bot/reply.py +36 -0
  30. telebotkit/bot/router.py +125 -0
  31. telebotkit/bot/safety.py +28 -0
  32. telebotkit/bot/telegram.py +41 -0
  33. telebotkit/firestore/__init__.py +45 -0
  34. telebotkit/firestore/client.py +141 -0
  35. telebotkit/firestore/documents.py +164 -0
  36. telebotkit/firestore/fetch.py +228 -0
  37. telebotkit/firestore/locks.py +74 -0
  38. telebotkit/firestore/upload.py +75 -0
  39. telebotkit/sheets.py +219 -0
@@ -0,0 +1,391 @@
1
+ """FnGuide workflows for financial statements and ratios.
2
+ - Follow the policies of the target website and use appropriate delays between requests if needed.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from html import unescape
8
+ import logging
9
+ import re
10
+ from typing import Any
11
+
12
+ import httpx
13
+
14
+ from ..exceptions import DownloadError, InvalidInputError
15
+
16
+ FN_GUIDE_FINANCE_RATIO_URL = "https://comp.fnguide.com/SVO2/ASP/SVD_FinanceRatio.asp"
17
+ FN_GUIDE_FINANCE_STATEMENT_URL = "https://comp.fnguide.com/SVO2/ASP/SVD_Finance.asp"
18
+
19
+ _TAG_RE = re.compile(r"<[^>]+>", re.S)
20
+ _WHITESPACE_RE = re.compile(r"\s+")
21
+ _TABLE_SECTION_RE = re.compile(
22
+ r'<div class="ul_col2wrap pd_t25">.*?<h2>(.*?)</h2>\s*'
23
+ r'<span class="stxt">(.*?)</span>.*?'
24
+ r'<span class="txt1">(.*?)</span>.*?'
25
+ r"(<table\b.*?</table>)",
26
+ re.S,
27
+ )
28
+ _TABLE_RE = re.compile(r"<table\b.*?</table>", re.S)
29
+ _ROW_RE = re.compile(r"<tr\b([^>]*)>(.*?)</tr>", re.S)
30
+ _CELL_RE = re.compile(r"<(th|td)\b([^>]*)>(.*?)</(?:th|td)>", re.S)
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ def build_fnguide_params(
36
+ *,
37
+ gicode: str,
38
+ new_menu_id: str,
39
+ pgb: str = "1",
40
+ cid: str = "",
41
+ menu_yn: str = "Y",
42
+ report_gb: str = "",
43
+ stk_gb: str = "701",
44
+ ) -> dict[str, Any]:
45
+ return {
46
+ "pGB": pgb,
47
+ "gicode": gicode,
48
+ "cID": cid,
49
+ "MenuYn": menu_yn,
50
+ "ReportGB": report_gb,
51
+ "NewMenuID": new_menu_id,
52
+ "stkGb": stk_gb,
53
+ }
54
+
55
+
56
+ def build_finance_ratio_params(
57
+ *,
58
+ gicode: str,
59
+ report_gb: str = "",
60
+ ) -> dict[str, Any]:
61
+ return build_fnguide_params(gicode=gicode, new_menu_id="104", report_gb=report_gb)
62
+
63
+
64
+ def build_finance_statement_params(
65
+ *,
66
+ gicode: str,
67
+ report_gb: str = "D",
68
+ ) -> dict[str, Any]:
69
+ return build_fnguide_params(gicode=gicode, new_menu_id="103", report_gb=report_gb)
70
+
71
+
72
+ def fetch_fnguide_page(
73
+ *,
74
+ url: str,
75
+ params: dict[str, Any],
76
+ timeout: float = 30.0,
77
+ user_agent: str | None = None,
78
+ headers: dict[str, str] | None = None,
79
+ ) -> str:
80
+ request_headers: dict[str, str] = {}
81
+ if user_agent is not None:
82
+ request_headers["User-Agent"] = user_agent
83
+ if headers:
84
+ request_headers.update(headers)
85
+
86
+ try:
87
+ response = httpx.get(
88
+ url,
89
+ params=params,
90
+ headers=request_headers if request_headers else None,
91
+ timeout=timeout,
92
+ follow_redirects=True,
93
+ )
94
+ response.raise_for_status()
95
+ except httpx.HTTPError as exc:
96
+ logger.warning("FnGuide request failed for %s: %s", url, exc)
97
+ raise DownloadError(f"Failed to fetch FnGuide page: {url}") from exc
98
+
99
+ return response.text
100
+
101
+
102
+ def _normalize_text(text: str) -> str:
103
+ text = unescape(text).replace("\xa0", " ")
104
+ text = _WHITESPACE_RE.sub(" ", text)
105
+ return text.strip()
106
+
107
+
108
+ def _strip_tags(html: str) -> str:
109
+ cleaned = re.sub(r"<br\s*/?>", "\n", html, flags=re.I)
110
+ cleaned = _TAG_RE.sub(" ", cleaned)
111
+ return _normalize_text(cleaned)
112
+
113
+
114
+ def _extract_attr(tag_attrs: str, name: str) -> str | None:
115
+ match = re.search(rf'{name}=(["\'])(.*?)\1', tag_attrs, re.S)
116
+ if not match:
117
+ return None
118
+ return unescape(match.group(2)).strip()
119
+
120
+
121
+ def _clean_label_html(cell_html: str) -> str:
122
+ cleaned = re.sub(r"<dl[^>]*>.*?</dl>", " ", cell_html, flags=re.S)
123
+ cleaned = re.sub(
124
+ r'<a\b[^>]*class=["\']btn_acdopen["\'][^>]*>.*?</a>',
125
+ " ",
126
+ cleaned,
127
+ flags=re.S,
128
+ )
129
+ cleaned = re.sub(
130
+ r'<span\b[^>]*class=["\']blind["\'][^>]*>.*?</span>',
131
+ " ",
132
+ cleaned,
133
+ flags=re.S,
134
+ )
135
+ return _strip_tags(cleaned)
136
+
137
+
138
+ def _parse_value_cells(cells: list[dict[str, Any]], columns: list[str]) -> list[dict[str, str | None]]:
139
+ values: list[dict[str, str | None]] = []
140
+ for column, cell in zip(columns, cells[1:]):
141
+ values.append(
142
+ {
143
+ "period": column,
144
+ "text": cell["text"] or None,
145
+ "title": cell["title"] or None,
146
+ }
147
+ )
148
+ return values
149
+
150
+
151
+ def _parse_table_row(row_attrs: str, row_html: str, columns: list[str]) -> dict[str, Any]:
152
+ cells = []
153
+ for _, cell_attrs, cell_html in _CELL_RE.findall(row_html):
154
+ cells.append(
155
+ {
156
+ "attrs": cell_attrs,
157
+ "html": cell_html,
158
+ "text": _strip_tags(cell_html),
159
+ "title": _extract_attr(cell_attrs, "title"),
160
+ }
161
+ )
162
+
163
+ label_cell = cells[0] if cells else {"html": "", "text": ""}
164
+ row_class = _extract_attr(row_attrs, "class") or ""
165
+
166
+ return {
167
+ "_row_class": row_class,
168
+ "_row_id": _extract_attr(row_attrs, "id"),
169
+ "label": _clean_label_html(label_cell["html"]) or label_cell["text"],
170
+ "values": _parse_value_cells(cells, columns),
171
+ }
172
+
173
+
174
+ def _infer_table_period_kind(subtitle: str, columns: list[str]) -> str:
175
+ lower_subtitle = subtitle.lower()
176
+ if "연간" in subtitle:
177
+ return "annual"
178
+ if "분기" in subtitle:
179
+ return "quarterly"
180
+ if "누적" in subtitle:
181
+ return "annual_cumulative"
182
+ if "3개월" in subtitle:
183
+ return "quarter_3m"
184
+
185
+ period_columns = [str(column) for column in columns if re.match(r"^\d{4}/\d{2}$", str(column))]
186
+ years = [column.split("/")[0] for column in period_columns]
187
+ if len(set(years)) < len(years):
188
+ return "quarterly"
189
+ return "annual"
190
+
191
+
192
+ def _parse_finance_table(
193
+ *,
194
+ title: str,
195
+ subtitle: str,
196
+ unit: str,
197
+ table_html: str,
198
+ ) -> dict[str, Any]:
199
+ caption_match = re.search(r"<caption[^>]*>(.*?)</caption>", table_html, re.S)
200
+ caption = _strip_tags(caption_match.group(1)) if caption_match else None
201
+
202
+ header_match = re.search(r"<thead>(.*?)</thead>", table_html, re.S)
203
+ header_cells = _CELL_RE.findall(header_match.group(1)) if header_match else []
204
+ header_texts = [_strip_tags(cell_html) for _, _, cell_html in header_cells]
205
+ basis = header_texts[0] if header_texts else None
206
+ columns = header_texts[1:]
207
+
208
+ body_match = re.search(r"<tbody>(.*?)</tbody>", table_html, re.S)
209
+ sections: list[dict[str, Any]] = []
210
+ current_section: dict[str, Any] | None = None
211
+ row_lookup: dict[str, dict[str, Any]] = {}
212
+
213
+ for row_attrs, row_html in _ROW_RE.findall(body_match.group(1) if body_match else ""):
214
+ row_class = _extract_attr(row_attrs, "class") or ""
215
+
216
+ if "tbody_tit" in row_class:
217
+ title_cells = _CELL_RE.findall(row_html)
218
+ section_title = _strip_tags(title_cells[0][2]) if title_cells else ""
219
+ current_section = {"title": section_title, "rows": []}
220
+ sections.append(current_section)
221
+ continue
222
+
223
+ if current_section is None:
224
+ current_section = {"title": None, "rows": []}
225
+ sections.append(current_section)
226
+
227
+ parsed_row = _parse_table_row(row_attrs, row_html, columns)
228
+
229
+ if "acd_dep2_sub" in row_class:
230
+ parent_keys = [
231
+ cls[len("c_") :]
232
+ for cls in row_class.split()
233
+ if cls.startswith("c_grid")
234
+ ]
235
+ parent = next((row_lookup[key] for key in parent_keys if key in row_lookup), None)
236
+ parsed_row.pop("_row_class", None)
237
+ parsed_row.pop("_row_id", None)
238
+ if parent is not None:
239
+ parent.setdefault("details", []).append(parsed_row)
240
+ else:
241
+ current_section["rows"].append(parsed_row)
242
+ continue
243
+
244
+ row_id = parsed_row.pop("_row_id", None)
245
+ parsed_row.pop("_row_class", None)
246
+ current_section["rows"].append(parsed_row)
247
+ key = row_id
248
+ if key and key.startswith("p_"):
249
+ key = key[2:]
250
+ if key:
251
+ row_lookup[key] = parsed_row
252
+
253
+ return {
254
+ "title": _normalize_text(title),
255
+ "subtitle": _normalize_text(subtitle),
256
+ "period_kind": _infer_table_period_kind(_normalize_text(subtitle), columns),
257
+ "unit": _normalize_text(unit),
258
+ "caption": caption,
259
+ "basis": basis,
260
+ "columns": columns,
261
+ "sections": sections,
262
+ }
263
+
264
+
265
+ def parse_fnguide_html_tables(html: str) -> dict[str, Any]:
266
+ title_match = re.search(r"<title>(.*?)</title>", html, re.S)
267
+ page_title = _strip_tags(title_match.group(1)) if title_match else None
268
+
269
+ hidden_keys = ("gicode", "giname", "stkGb", "NewMenuID", "ReportGb", "MenuYn")
270
+ hidden_inputs = {
271
+ key: value
272
+ for key in hidden_keys
273
+ if (
274
+ value := _extract_attr(
275
+ re.search(rf'<input[^>]*id=["\']{key}["\'][^>]*', html).group(0),
276
+ "value",
277
+ )
278
+ if re.search(rf'<input[^>]*id=["\']{key}["\'][^>]*', html)
279
+ else None
280
+ )
281
+ }
282
+
283
+ tables = [
284
+ _parse_finance_table(
285
+ title=title,
286
+ subtitle=subtitle,
287
+ unit=unit,
288
+ table_html=table_html,
289
+ )
290
+ for title, subtitle, unit, table_html in _TABLE_SECTION_RE.findall(html)
291
+ ]
292
+
293
+ if not tables:
294
+ tables = [
295
+ _parse_finance_table(
296
+ title=f"table_{index}",
297
+ subtitle="",
298
+ unit="",
299
+ table_html=table_html,
300
+ )
301
+ for index, table_html in enumerate(_TABLE_RE.findall(html), start=1)
302
+ ]
303
+
304
+ return {
305
+ "page_title": page_title,
306
+ "page_meta": hidden_inputs,
307
+ "tables": tables,
308
+ }
309
+
310
+
311
+ def parse_fnguide_finance_ratio_html(html: str) -> dict[str, Any]:
312
+ return parse_fnguide_html_tables(html)
313
+
314
+
315
+ def parse_fnguide_finance_statement_html(html: str) -> dict[str, Any]:
316
+ return parse_fnguide_html_tables(html)
317
+
318
+
319
+ def _normalize_stock_code(stock_code: str) -> str:
320
+ normalized = str(stock_code or "").strip()
321
+ if not normalized:
322
+ raise InvalidInputError("stock_code is required.")
323
+ return normalized
324
+
325
+
326
+ def _normalize_statement_type(statement_type: str) -> tuple[str, str]:
327
+ normalized = str(statement_type or "").strip().lower()
328
+ if normalized in {"consolidated", "connected", "d"}:
329
+ return "consolidated", "D"
330
+ if normalized in {"separate", "standalone", "single", "b"}:
331
+ return "separate", "B"
332
+ raise InvalidInputError("statement_type must be 'consolidated' or 'separate'.")
333
+
334
+
335
+ class FnGuideWorkflow:
336
+ """Fetch FnGuide ratio and financial statement pages as normalized JSON."""
337
+
338
+ def run(
339
+ self,
340
+ *,
341
+ stock_code: str,
342
+ statement_type: str = "consolidated",
343
+ timeout: float = 30.0,
344
+ user_agent: str | None = None,
345
+ ) -> dict[str, Any]:
346
+ normalized_code = _normalize_stock_code(stock_code)
347
+ normalized_statement_type, report_gb = _normalize_statement_type(statement_type)
348
+ gicode = normalized_code if normalized_code.startswith("A") else f"A{normalized_code}"
349
+
350
+ ratio_html = fetch_fnguide_page(
351
+ url=FN_GUIDE_FINANCE_RATIO_URL,
352
+ params=build_finance_ratio_params(gicode=gicode, report_gb=report_gb),
353
+ timeout=timeout,
354
+ user_agent=user_agent,
355
+ )
356
+ statement_html = fetch_fnguide_page(
357
+ url=FN_GUIDE_FINANCE_STATEMENT_URL,
358
+ params=build_finance_statement_params(gicode=gicode, report_gb=report_gb),
359
+ timeout=timeout,
360
+ user_agent=user_agent,
361
+ )
362
+
363
+ return {
364
+ "input": {
365
+ "stock_code": normalized_code.removeprefix("A"),
366
+ "statement_type": normalized_statement_type,
367
+ "timeout": timeout,
368
+ },
369
+ "data": {
370
+ "financials": parse_fnguide_finance_statement_html(statement_html),
371
+ "ratios": parse_fnguide_finance_ratio_html(ratio_html),
372
+ },
373
+ }
374
+
375
+
376
+ _DEFAULT_WORKFLOW = FnGuideWorkflow()
377
+
378
+
379
+ def run_fnguide_workflow(
380
+ *,
381
+ stock_code: str,
382
+ statement_type: str = "consolidated",
383
+ timeout: float = 30.0,
384
+ user_agent: str | None = None,
385
+ ) -> dict[str, Any]:
386
+ return _DEFAULT_WORKFLOW.run(
387
+ stock_code=stock_code,
388
+ statement_type=statement_type,
389
+ timeout=timeout,
390
+ user_agent=user_agent,
391
+ )
@@ -0,0 +1,22 @@
1
+ """Mapping workflows for FinanceChatbot.
2
+
3
+ For development:
4
+ run `orchestrator-update-corp-codes --api-key "$DART_API_KEY"` to update the raw corp codes file
5
+ """
6
+
7
+ from .workflow import MappingWorkflow, download_latest_raw_entries, run_mapping_workflow
8
+
9
+ __all__ = [
10
+ "MappingWorkflow",
11
+ "download_latest_raw_entries",
12
+ "run_mapping_workflow",
13
+ "update_raw_corp_codes_file",
14
+ ]
15
+
16
+
17
+ def __getattr__(name: str):
18
+ if name == "update_raw_corp_codes_file":
19
+ from .update_raw_data import update_raw_corp_codes_file
20
+
21
+ return update_raw_corp_codes_file
22
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -0,0 +1 @@
1
+ """Bundled corp-code data for mapping workflows."""